From c45b4ac07ea39ecfe5323cc4bd25fd14c1d83887 Mon Sep 17 00:00:00 2001 From: Niti Goyal Date: Fri, 20 Feb 2026 10:23:51 -0500 Subject: [PATCH 1/5] Major pdd setup changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - No prompt files included - Adds support for many more LiteLLM-supported providers (Vertex AI, AWS Bedrock, Azure, etc.) - The api_key column now supports pipe-delimited fields (e.g. VERTEXAI_PROJECT|VERTEXAI_LOCATION|GOOGLE_APPLICATION_CREDENTIALS) for providers whose auth requires multiple credentials - Updated pdd setup documentation - Update llm_invoke api_key handling to support the new pipe-delimited credentials format and generalized to remove provider-specific logic --- README.md | 58 +- SETUP_WITH_GEMINI.md | 31 +- context/api_key_scanner_example.py | 50 + context/cli_detector_example.py | 48 + context/model_tester_example.py | 44 + context/pddrc_initializer_example.py | 42 + context/provider_manager_example.py | 78 ++ context/setup_tool_example.py | 99 ++ docs/ONBOARDING.md | 34 +- pdd/api_key_scanner.py | 202 ++++ pdd/cli_detector.py | 595 +++++++++++ pdd/data/llm_model.csv | 286 +++++- pdd/docs/prompting_guide.md | 222 ++++- pdd/generate_model_catalog.py | 711 +++++++++++++ pdd/llm_invoke.py | 229 ++--- pdd/model_tester.py | 420 ++++++++ pdd/pddrc_initializer.py | 192 ++++ pdd/provider_manager.py | 951 ++++++++++++++++++ pdd/setup_tool.py | 1381 +++++++++++++++----------- tests/test_api_key_scanner.py | 515 ++++++++++ tests/test_cli_detector.py | 774 +++++++++++++++ tests/test_model_tester.py | 490 +++++++++ tests/test_pddrc_initializer.py | 356 +++++++ tests/test_provider_manager.py | 754 ++++++++++++++ tests/test_setup_tool.py | 1276 ++++++++++++++---------- 25 files changed, 8511 insertions(+), 1327 deletions(-) create mode 100644 context/api_key_scanner_example.py create mode 100644 context/cli_detector_example.py create mode 100644 context/model_tester_example.py create mode 100644 context/pddrc_initializer_example.py create mode 100644 context/provider_manager_example.py create mode 100644 context/setup_tool_example.py create mode 100644 pdd/api_key_scanner.py create mode 100644 pdd/cli_detector.py create mode 100644 pdd/generate_model_catalog.py create mode 100644 pdd/model_tester.py create mode 100644 pdd/pddrc_initializer.py create mode 100644 pdd/provider_manager.py create mode 100644 tests/test_api_key_scanner.py create mode 100644 tests/test_cli_detector.py create mode 100644 tests/test_model_tester.py create mode 100644 tests/test_pddrc_initializer.py create mode 100644 tests/test_provider_manager.py diff --git a/README.md b/README.md index 530884540..ab7f048f9 100644 --- a/README.md +++ b/README.md @@ -102,7 +102,7 @@ With the CLI on your `PATH`, continue with: ```bash pdd setup ``` -The command installs tab completion, walks you through API key entry, and seeds local configuration files. +The command detects agentic CLI tools, scans for API keys, configures models, and seeds local configuration files. If you postpone this step, the CLI detects the missing setup artifacts the first time you run another command and shows a reminder banner so you can complete it later (the banner is suppressed once `~/.pdd/api-env` exists or when your project already provides credentials via `.env` or `.pdd/`). ### Alternative: pip Installation @@ -167,7 +167,7 @@ For CLI enthusiasts, implement GitHub issues directly: 2. **One Agentic CLI** - Required to run the workflows (install at least one): - **Claude Code**: `npm install -g @anthropic-ai/claude-code` (requires `ANTHROPIC_API_KEY`) - - **Gemini CLI**: `npm install -g @google/gemini-cli` (requires `GOOGLE_API_KEY`) + - **Gemini CLI**: `npm install -g @google/gemini-cli` (requires `GOOGLE_API_KEY` or `GEMINI_API_KEY`) - **Codex CLI**: `npm install -g @openai/codex` (requires `OPENAI_API_KEY`) **Usage:** @@ -222,21 +222,28 @@ If you want to understand PDD fundamentals, follow this manual example to see it ### Post-Installation Setup (Required first step after installation) -Run the guided setup: +Run the comprehensive setup wizard: ```bash pdd setup ``` -This wraps the interactive bootstrap utility to install shell tab completion, capture your API keys, create ~/.pdd configuration files, and write the starter prompt. Re-run it any time to update keys or reinstall completion. +The setup wizard runs these steps: + 1. Detects agentic CLI tools (Claude, Gemini, Codex) and offers installation and API key configuration if needed + 2. Scans for API keys across `.env`, and `~/.pdd/api-env.*`, and the shell environment; prompts to add one if none are found + 3. Configures models from a reference CSV `data/llm_model.csv` of top models (ELO ≥ 1400) across all LiteLLM-supported providers based on your available keys + 4. Optionally creates a `.pddrc` project config + 5. Tests the first available model with a real LLM call + 6. Prints a structured summary (CLIs, keys, models, test result) -If you skip this step, the first regular pdd command you run will detect the missing setup files and print a reminder banner so you can finish onboarding later. +The wizard can be re-run at any time to update keys, add providers, or reconfigure settings. -Reload your shell so the new completion and environment hooks are available: -```bash -source ~/.zshrc # or source ~/.bashrc / fish equivalent -``` +> **Important:** After setup completes, source the API environment file so your keys take effect in the current terminal session: +> ```bash +> source ~/.pdd/api-env.zsh # or api-env.bash, depending on your shell +> ``` +> New terminal windows will load keys automatically. -👉 If you prefer to configure things manually, see [SETUP_WITH_GEMINI.md](SETUP_WITH_GEMINI.md) for full instructions on obtaining a Gemini API key and creating your own `~/.pdd/llm_model.csv`. +If you skip this step, the first regular pdd command you run will detect the missing setup files and print a reminder banner so you can finish onboarding later. 5. **Run Hello**: ```bash @@ -321,28 +328,6 @@ For a concrete, up-to-date reference of supported models and example rows, see t For proper model identifiers to use in your custom configuration, refer to the [LiteLLM Model List](https://docs.litellm.ai/docs/providers) documentation. LiteLLM typically uses model identifiers in the format `provider/model_name` (e.g., "openai/gpt-4", "anthropic/claude-3-opus-20240229"). -## Post-Installation Setup - -1. Run the guided setup (required unless you do this manually or use the cloud): -```bash -pdd setup -``` -This wraps the interactive bootstrap utility to install shell tab completion, capture your API keys, create `~/.pdd` configuration files, and write the starter prompt. Re-run it any time to update keys or reinstall completion. -If you skip this step, the first regular `pdd` command you run will detect the missing setup files and print a reminder banner so you can finish onboarding later (the banner is suppressed once `~/.pdd/api-env` exists or when your project already provides credentials via `.env` or `.pdd/`). - -2. Reload your shell so the new completion and environment hooks are available: -```bash -source ~/.zshrc # or source ~/.bashrc / fish equivalent -``` - -3. Configure environment variables (optional): -```bash -# Add to .bashrc, .zshrc, or equivalent -export PDD_AUTO_UPDATE=true -export PDD_GENERATE_OUTPUT_PATH=/path/to/generated/code/ -export PDD_TEST_OUTPUT_PATH=/path/to/tests/ -``` - ## Troubleshooting Common Installation Issues 1. **Command not found** @@ -1853,7 +1838,7 @@ For the agentic fallback to function, you need to have at least one of the suppo * Requires the `ANTHROPIC_API_KEY` environment variable to be set. 2. **Google Gemini:** * Requires the `gemini` CLI to be installed and in your `PATH`. - * Requires the `GOOGLE_API_KEY` environment variable to be set. + * Requires the `GOOGLE_API_KEY` or `GEMINI_API_KEY` environment variable to be set. 3. **OpenAI Codex/GPT:** * Requires the `codex` CLI to be installed and in your `PATH`. * Requires the `OPENAI_API_KEY` environment variable to be set. @@ -2799,13 +2784,18 @@ The `.pddrc` approach is recommended for team projects as it ensures consistent ### Model Configuration (`llm_model.csv`) -PDD uses a CSV file (`llm_model.csv`) to store information about available AI models, their costs, capabilities, and required API key names. When running commands locally (e.g., using the `update_model_costs.py` utility or potentially local execution modes if implemented), PDD determines which configuration file to use based on the following priority: +PDD uses a CSV file (`llm_model.csv`) to store information about available AI models, their costs, capabilities, and required API key names. + +When running commands locally, PDD determines which configuration file to use based on the following priority: 1. **User-specific:** `~/.pdd/llm_model.csv` - If this file exists, it takes precedence over any project-level configuration. This allows users to maintain a personal, system-wide model configuration. 2. **Project-specific:** `/.pdd/llm_model.csv` - If the user-specific file is not found, PDD looks for the file within the `.pdd` directory of the determined project root (based on `PDD_PATH` or auto-detection). 3. **Package default:** If neither of the above exist, PDD falls back to the default configuration bundled with the package installation. This tiered approach allows for both shared project configurations and individual user overrides, while ensuring PDD works out-of-the-box without requiring manual configuration. + +**Note:** You can manually edit this CSV, but running `pdd setup` again is the recommended way to add providers and update models. + *Note: This file-based configuration primarily affects local operations and utilities. Cloud execution modes likely rely on centrally managed configurations.* diff --git a/SETUP_WITH_GEMINI.md b/SETUP_WITH_GEMINI.md index b021fe8ee..31a0707ff 100644 --- a/SETUP_WITH_GEMINI.md +++ b/SETUP_WITH_GEMINI.md @@ -60,14 +60,29 @@ Right after installation, let PDD bootstrap its configuration: pdd setup ``` -During the wizard: -- Choose **Install tab completion** if you want shell helpers. -- Pick **Google Gemini** when asked which providers to configure. -- Paste your Gemini API key when prompted (you can create it in the next step if you haven’t already). - -The wizard writes your credentials to `~/.pdd/api-env`, seeds `~/.pdd/llm_model.csv` with Gemini entries, and reminds you to reload your shell (`source ~/.zshrc`, etc.) so completion and env hooks load. - -If you prefer to configure everything manually—or you’re on an offline machine—skip the wizard and follow the manual instructions below. +The setup wizard runs these steps: + 1. Detects agentic CLI tools (Claude, Gemini, Codex) and offers installation and API key configuration if needed + 2. Scans for API keys across `.env`, and `~/.pdd/api-env.*`, and the shell environment; prompts to add one if none are found + 3. Configures models from a reference CSV `data/llm_model.csv` of top models (ELO ≥ 1400) across all LiteLLM-supported providers based on your available keys + 4. Optionally creates a `.pddrc` project config + 5. Tests the first available model with a real LLM call + 6. Prints a structured summary (CLIs, keys, models, test result) + +When adding your Gemini API key: +- Select Gemini CLI as one of the agentic CLI tools +- The wizard will detect that `GEMINI_API_KEY` is missing +- Paste your API key when prompted (you can create it in the next step if you haven't already) +- The wizard tests it immediately and confirms it works + +The wizard writes your credentials to `~/.pdd/api-env.zsh` (or `.bash`) and updates `llm_model.csv` with your selected models. + +> **Important:** After setup completes, source the API environment file so your keys take effect in the current terminal session: +> ```bash +> source ~/.pdd/api-env.zsh # or api-env.bash, depending on your shell +> ``` +> New terminal windows will load keys automatically. + +If you prefer to configure everything manually—or you're on an offline machine—skip the wizard and follow the manual instructions below. --- diff --git a/context/api_key_scanner_example.py b/context/api_key_scanner_example.py new file mode 100644 index 000000000..687d858ca --- /dev/null +++ b/context/api_key_scanner_example.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +# Add the project root to sys.path +project_root = Path(__file__).resolve().parent.parent +sys.path.append(str(project_root)) + +from pdd.api_key_scanner import scan_environment, get_provider_key_names, KeyInfo + + +def main() -> None: + """ + Demonstrates how to use the api_key_scanner module to: + 1. Discover all API key variable names from the user's ~/.pdd/llm_model.csv + 2. Scan multiple sources (shell env, .env file, ~/.pdd/api-env.*) + 3. Report existence and source without storing key values + + Note: The scanner reads from the user's configured models, not a hardcoded + master list. If no models have been added via `pdd setup`, both functions + return empty results. + """ + + # Get all provider key names from the user's configured CSV + all_keys = get_provider_key_names() + print(f"Provider key names from user CSV: {all_keys}\n") + + if not all_keys: + print("No models configured yet. Use `pdd setup` to add providers.") + return + + # Scan the environment for all API keys + print("Scanning environment for API keys...\n") + scan_results = scan_environment() + + # Display results — note: KeyInfo only has source and is_set, no value + for key_name, key_info in scan_results.items(): + if key_info.is_set: + print(f" {key_name:25s} ✓ Found ({key_info.source})") + else: + print(f" {key_name:25s} — Not found") + + found = sum(1 for k in scan_results.values() if k.is_set) + missing = sum(1 for k in scan_results.values() if not k.is_set) + print(f"\nFound: {found} Missing: {missing}") + + +if __name__ == "__main__": + main() diff --git a/context/cli_detector_example.py b/context/cli_detector_example.py new file mode 100644 index 000000000..c6206506b --- /dev/null +++ b/context/cli_detector_example.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +# Add the project root to sys.path +project_root = Path(__file__).resolve().parent.parent +sys.path.append(str(project_root)) + +from pdd.cli_detector import detect_and_bootstrap_cli, detect_cli_tools, CliBootstrapResult + + +def main() -> None: + """ + Demonstrates how to use the cli_detector module to: + 1. Bootstrap agentic CLIs for pdd setup (detect_and_bootstrap_cli) + 2. Detect installed CLI harnesses (claude, codex, gemini) + 3. Cross-reference with available API keys + 4. Offer installation for missing CLIs + """ + + # Primary entry point used by pdd setup Phase 1: + # results = detect_and_bootstrap_cli() # Returns List[CliBootstrapResult] + # for r in results: + # r.cli_name -> "claude" | "codex" | "gemini" | "" + # r.provider -> "anthropic" | "openai" | "google" | "" + # r.cli_path -> "/usr/local/bin/claude" | "" + # r.api_key_configured -> True | False + # r.skipped -> True | False + + # Legacy function for detection only: + # detect_cli_tools() # Uncomment to run interactively + + # Example flow (detect_and_bootstrap_cli with multi-select): + # Checking CLI tools... + # + # 1. Claude CLI ✓ Found at /usr/local/bin/claude ✓ ANTHROPIC_API_KEY is set + # 2. Codex CLI ✗ Not found ✗ OPENAI_API_KEY not set + # 3. Gemini CLI ✗ Not found ✓ GEMINI_API_KEY is set + # + # Select CLIs to use for pdd agentic tools (enter numbers separated by commas, e.g., 1,3): + # + # Returns [CliBootstrapResult(cli_name="claude", ...), CliBootstrapResult(cli_name="gemini", ...)] + pass + + +if __name__ == "__main__": + main() diff --git a/context/model_tester_example.py b/context/model_tester_example.py new file mode 100644 index 000000000..2c42c3126 --- /dev/null +++ b/context/model_tester_example.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +# Add the project root to sys.path +project_root = Path(__file__).resolve().parent.parent +sys.path.append(str(project_root)) + +from pdd.model_tester import test_model_interactive + + +def main() -> None: + """ + Demonstrates how to use the model_tester module to: + 1. List configured models from ~/.pdd/llm_model.csv + 2. Test a selected model via litellm.completion() + 3. Display diagnostics (API key status, timing, cost) + """ + + # Run the interactive tester + # test_model_interactive() # Uncomment to run interactively + + # Example flow: + # Configured models: + # 1. anthropic/claude-haiku-4-5-20251001 ANTHROPIC_API_KEY + # 2. gpt-5-nano OPENAI_API_KEY + # 3. lm_studio/openai-gpt-oss-120b-mlx-6 (local) + # + # Test which model? 1 + # Testing anthropic/claude-haiku-4-5-20251001... + # API key ANTHROPIC_API_KEY ✓ Found (shell environment) + # LLM call ✓ OK (0.3s, $0.0001) + # + # Test which model? 3 + # Testing lm_studio/openai-gpt-oss-120b-mlx-6... + # API key (local — no key required) + # Base URL http://localhost:1234/v1 + # LLM call ✗ Connection refused (localhost:1234) + pass + + +if __name__ == "__main__": + main() diff --git a/context/pddrc_initializer_example.py b/context/pddrc_initializer_example.py new file mode 100644 index 000000000..a5631eb64 --- /dev/null +++ b/context/pddrc_initializer_example.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +# Add the project root to sys.path +project_root = Path(__file__).resolve().parent.parent +sys.path.append(str(project_root)) + +from pdd.pddrc_initializer import _build_pddrc_content, _detect_language + + +def main() -> None: + """ + Demonstrates how to use the pddrc_initializer module. + + The primary entry points are: + - _detect_language(cwd): returns "python", "typescript", "go", or None + - _build_pddrc_content(language): returns YAML string for .pddrc + - offer_pddrc_init(): interactive flow with YAML preview + confirmation + + In practice, `pdd setup` imports _detect_language and _build_pddrc_content + directly for a streamlined flow (no YAML preview). + """ + + # Detect language from marker files in cwd + from pathlib import Path + language = _detect_language(Path.cwd()) + print(f"Detected language: {language}") # e.g. "python" or None + + # Build .pddrc content for a given language + content = _build_pddrc_content(language or "python") + print(content) + + # Or use the full interactive flow (shows YAML preview, asks for confirmation): + # from pdd.pddrc_initializer import offer_pddrc_init + # was_created = offer_pddrc_init() + pass + + +if __name__ == "__main__": + main() diff --git a/context/provider_manager_example.py b/context/provider_manager_example.py new file mode 100644 index 000000000..a09690a36 --- /dev/null +++ b/context/provider_manager_example.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +# Add the project root to sys.path +project_root = Path(__file__).resolve().parent.parent +sys.path.append(str(project_root)) + +from pdd.provider_manager import ( + add_provider_from_registry, + add_custom_provider, + remove_models_by_provider, + remove_individual_models, + parse_api_key_vars, + is_multi_credential, +) + + +def main() -> None: + """ + Demonstrates how to use the provider_manager module to: + 1. Browse the reference CSV to add a provider and its models + 2. Add a custom LiteLLM-compatible provider + 3. Remove all models for a provider (comments out the key) + 4. Remove individual models from the user CSV + 5. Parse pipe-delimited api_key fields + """ + + # Example 1: Browse providers from the bundled reference CSV + # Shows numbered provider list with model counts, enter API key + # add_provider_from_registry() # Uncomment to run interactively + + # Interactive flow: + # Add a provider + # + # 1. Anthropic (5 models) + # 2. Google Vertex AI (8 models) + # 3. OpenAI (12 models) + # ... + # Enter number (empty to cancel): 3 + # + # OPENAI_API_KEY: sk-proj-... + # ✓ Saved OPENAI_API_KEY to ~/.pdd/api-env.zsh + # ✓ Added source line to ~/.zshrc + # Key is available now for this session. + # ✓ Added 12 model(s) for OpenAI to ~/.pdd/llm_model.csv + # + # NOTE: The API key is immediately available in the current session via os.environ, + # so you can test the model right away. New terminal sessions will also have the + # key automatically because `source ~/.pdd/api-env.zsh` was added to ~/.zshrc. + + # Example 2: Add a custom provider (Together AI, Deepinfra, etc.) + # Prompts for prefix, model name, API key var, base URL, costs + # add_custom_provider() # Uncomment to run interactively + + # Example 3: Remove all models for a provider + # Groups by api_key, removes CSV rows, comments out key in api-env + # remove_models_by_provider() # Uncomment to run interactively + + # Example 4: Remove individual models + # Lists all models, user picks by number, removes selected rows + # remove_individual_models() # Uncomment to run interactively + + # Example 5: Utility functions for api_key field parsing + # Useful when working with CSV rows that have pipe-delimited api_key fields + single = parse_api_key_vars("OPENAI_API_KEY") + print(f"Single key vars: {single}") # ['OPENAI_API_KEY'] + + multi = parse_api_key_vars("AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME") + print(f"Multi key vars: {multi}") # ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'AWS_REGION_NAME'] + + print(f"Is multi-credential? {is_multi_credential('A|B')}") # True + print(f"Is multi-credential? {is_multi_credential('OPENAI_API_KEY')}") # False + + +if __name__ == "__main__": + main() diff --git a/context/setup_tool_example.py b/context/setup_tool_example.py new file mode 100644 index 000000000..ba2d76ff1 --- /dev/null +++ b/context/setup_tool_example.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +import sys +from pathlib import Path + +# Add the project root to sys.path +project_root = Path(__file__).resolve().parent.parent +sys.path.append(str(project_root)) + +from pdd.setup_tool import run_setup + + +def main() -> None: + """ + Demonstrates how to use the setup_tool module to: + 1. Launch the two-phase pdd setup flow + 2. Phase 1: Bootstrap agentic CLIs (Claude/Gemini/Codex) + 3. Phase 2: Auto-configure API keys, models, and .pddrc + + The setup flow is mostly automatic. Phase 1 asks 0-2 questions + (which CLIs to use), then Phase 2 runs 3 deterministic steps + with "Press Enter" pauses between them. + """ + + # Run the setup flow + # run_setup() # Uncomment to run interactively + + # Example flow: + # (PDD ASCII logo in cyan) + # Let's get set up quickly with a solid basic configuration! + # + # Phase 1 -- CLI Bootstrap + # Detected: claude (Anthropic) + # API key: configured + # + # ──────────────────────────────────────── + # Scanning for API keys... + # ──────────────────────────────────────── + # ✓ ANTHROPIC_API_KEY shell environment + # ✓ GEMINI_API_KEY shell environment + # + # 2 API key(s) found. + # You can edit your global API keys in ~/.pdd/api-env.zsh + # + # Press Enter to continue to the next step... + # + # ──────────────────────────────────────── + # Configuring models... + # ──────────────────────────────────────── + # ✓ 3 new model(s) added to ~/.pdd/llm_model.csv + # ✓ 4 model(s) configured + # Anthropic: 3 models + # Google: 1 model + # ✓ .pddrc detected at /path/to/project/.pddrc + # + # Press Enter to continue to the next step... + # + # ──────────────────────────────────────── + # Testing and summarizing... + # ──────────────────────────────────────── + # Testing anthropic/claude-sonnet-4-5-20250929...... + # ✓ claude-sonnet-4-5-20250929 responded OK (1.2s) + # + # PDD Setup Complete! + # + # CLI: ✓ claude configured + # API Keys: ✓ 2 found + # Models: 4 configured (Anthropic: 3, Google: 1) in ~/.pdd/llm_model.csv + # .pddrc: ✓ exists + # Test: ✓ claude-sonnet-4-5-20250929 responded OK (1.2s) + # + # Press Enter to finish, or 'm' for more options: + # + # (user presses Enter) + # + # ──────────────────────────────────────────────────────────────────────────────── + # QUICK START: + # 1. Generate code from the sample prompt: + # pdd generate success_python.prompt + # ──────────────────────────────────────────────────────────────────────────────── + # LEARN MORE: + # • PDD documentation: pdd --help + # • PDD website: https://promptdriven.ai/ + # • Discord community: https://discord.gg/Yp4RTh8bG7 + # + # Full summary saved to PDD-SETUP-SUMMARY.txt + # + # --- OR if user enters 'm': --- + # + # Options: + # 1. Add a provider + # 2. Test a model + # + # Select an option (Enter to finish): + pass + + +if __name__ == "__main__": + main() diff --git a/docs/ONBOARDING.md b/docs/ONBOARDING.md index 1c8e5eb94..485eb663b 100644 --- a/docs/ONBOARDING.md +++ b/docs/ONBOARDING.md @@ -85,7 +85,24 @@ To enable syntax highlighting for `.prompt` files in your editor, you'll need to ### 7. Set Up API Keys -Add your LLM API keys to a `.env` file in the project root: +**Recommended: Use the setup wizard** + +Run the interactive setup wizard to configure your API keys: + +```bash +pdd setup +``` + +The wizard will: +- **Scan your environment** for existing API keys from all sources (shell, .env, ~/.pdd files) +- **Present an interactive menu** to add/fix keys, configure local LLMs, or manage providers +- **Validate keys** with real test requests to ensure they work +- **Show cost transparency** for different model tiers +- **Create .pddrc** configuration for your project + +**Alternative: Manual configuration** + +If you prefer manual setup, add your LLM API keys to a `.env` file in the project root: ```bash # Required: At least one LLM provider @@ -93,7 +110,7 @@ OPENAI_API_KEY=sk-your-key-here # OR ANTHROPIC_API_KEY=sk-ant-your-key-here # OR -GOOGLE_API_KEY=your-google-api-key +GEMINI_API_KEY=your-google-api-key # Optional: For Vertex AI (Gemini via GCP) VERTEX_CREDENTIALS=/path/to/service-account.json @@ -841,6 +858,17 @@ rm -f ~/.pdd/llm_model.csv **Fix for "API key not found":** +**Recommended:** Run the setup wizard to detect and fix missing API keys: +```bash +pdd setup +``` + +The wizard will: +- Scan all sources (shell, .env, ~/.pdd files) and show which keys are missing +- Let you add missing keys with immediate validation +- Show exactly where each key is loaded from for transparency + +**Manual fixes:** - If using **Infisical**: Follow **"Step 7: Set Up Infisical for Secrets Management"** above to configure your API keys - If using **.env file**: Ensure your `.env` file in the project root contains your API keys (e.g., `OPENAI_API_KEY=sk-...`) @@ -850,6 +878,8 @@ rm -f ~/.pdd/llm_model.csv infisical run -- env | grep API_KEY # If using Infisical # OR env | grep API_KEY # If using .env +# OR +pdd setup # Shows scan of all keys with source transparency ``` **Note on API key requirements for testing:** diff --git a/pdd/api_key_scanner.py b/pdd/api_key_scanner.py new file mode 100644 index 000000000..6dcd1cbf9 --- /dev/null +++ b/pdd/api_key_scanner.py @@ -0,0 +1,202 @@ +""" +pdd/api_key_scanner.py + +Discovers API keys needed by the user's configured models, checking +existence across shell, .env, and PDD config with source transparency. +""" + +import csv +import logging +import os +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class KeyInfo: + """Information about an API key's availability.""" + source: str + is_set: bool + + +def _get_csv_path() -> Path: + """Return the path to the user's configured llm_model.csv. + + Reads from ``~/.pdd/llm_model.csv`` so the scan reflects which + API keys the user's configured models actually need, rather than + an arbitrary hardcoded list. + """ + return Path.home() / ".pdd" / "llm_model.csv" + + +def get_provider_key_names() -> List[str]: + """ + Returns a deduplicated, sorted list of all non-empty api_key values + from the user's configured CSV (~/.pdd/llm_model.csv). + + Returns an empty list if the CSV is missing, empty, or malformed. + """ + csv_path = _get_csv_path() + key_names: set = set() + + try: + if not csv_path.exists(): + logger.debug("User CSV not found at %s (no models configured yet).", csv_path) + return [] + + with open(csv_path, "r", newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + + if reader.fieldnames is None or "api_key" not in reader.fieldnames: + logger.warning( + "llm_model.csv at %s is missing the 'api_key' column.", csv_path + ) + return [] + + for row in reader: + api_key_field = row.get("api_key", "").strip() + if api_key_field: + # Support pipe-delimited multi-var fields (e.g. "VAR1|VAR2|VAR3") + for var in api_key_field.split("|"): + var = var.strip() + if var: + key_names.add(var) + + except Exception as e: + logger.error("Error reading llm_model.csv: %s", e) + return [] + + return sorted(key_names) + + +def _load_dotenv_values() -> Dict[str, str]: + """ + Load values from a .env file using python-dotenv's dotenv_values (read-only). + Returns an empty dict on any failure. + """ + try: + from dotenv import dotenv_values # type: ignore + + values = dotenv_values() + # dotenv_values returns an OrderedDict; values can be None for keys without values + return {k: v for k, v in values.items() if v is not None} + except ImportError: + logger.debug("python-dotenv not installed; skipping .env file check.") + return {} + except Exception as e: + logger.error("Error loading .env file: %s", e) + return {} + + +def _detect_shell() -> Optional[str]: + """ + Detect the current shell name from the SHELL environment variable. + Returns the shell name (e.g. 'zsh', 'bash') or None if not detectable. + """ + shell_path = os.environ.get("SHELL", "") + if shell_path: + return os.path.basename(shell_path) + return None + + +def _parse_api_env_file(file_path: Path) -> Dict[str, str]: + """ + Parse a ~/.pdd/api-env.{shell} file for uncommented `export KEY=value` lines. + Returns a dict of key names to values found. + """ + result: Dict[str, str] = {} + + try: + if not file_path.exists(): + logger.debug("api-env file not found at %s", file_path) + return result + + with open(file_path, "r", encoding="utf-8") as f: + for line in f: + stripped = line.strip() + + # Skip empty lines and comments + if not stripped or stripped.startswith("#"): + continue + + # Match lines like: export KEY=value or export KEY="value" + if stripped.startswith("export "): + remainder = stripped[len("export "):].strip() + if "=" in remainder: + key, _, value = remainder.partition("=") + key = key.strip() + value = value.strip() + + # Remove surrounding quotes if present + if len(value) >= 2 and ( + (value.startswith('"') and value.endswith('"')) + or (value.startswith("'") and value.endswith("'")) + ): + value = value[1:-1] + + if key and value: + result[key] = value + + except Exception as e: + logger.error("Error parsing api-env file %s: %s", file_path, e) + + return result + + +def scan_environment() -> Dict[str, KeyInfo]: + """ + Scan for API key existence based on the user's configured models. + + Reads API key names from ``~/.pdd/llm_model.csv`` and checks their + existence in priority order: + 1. .env file (via python-dotenv dotenv_values, read-only) + 2. Shell environment (os.environ) + 3. ~/.pdd/api-env.{shell} file + + Returns a mapping of key name -> KeyInfo(source, is_set). + Returns an empty dict if no models are configured yet. + Never raises exceptions; returns best-effort results. + """ + result: Dict[str, KeyInfo] = {} + + try: + key_names = get_provider_key_names() + + if not key_names: + logger.info("No API key names discovered from CSV.") + return result + + # Load all sources once + dotenv_vals = _load_dotenv_values() + shell_name = _detect_shell() + + api_env_file_path: Optional[Path] = None + api_env_vals: Dict[str, str] = {} + api_env_source_label = "" + + if shell_name: + api_env_file_path = Path.home() / ".pdd" / f"api-env.{shell_name}" + api_env_vals = _parse_api_env_file(api_env_file_path) + api_env_source_label = f"~/.pdd/api-env.{shell_name}" + + for key_name in key_names: + # Check in priority order + if key_name in dotenv_vals: + result[key_name] = KeyInfo(source=".env file", is_set=True) + elif key_name in os.environ: + result[key_name] = KeyInfo(source="shell environment", is_set=True) + elif key_name in api_env_vals: + result[key_name] = KeyInfo( + source=api_env_source_label, is_set=True + ) + else: + # Key not found in any source + result[key_name] = KeyInfo(source="", is_set=False) + + except Exception as e: + logger.error("Unexpected error during environment scan: %s", e) + + return result \ No newline at end of file diff --git a/pdd/cli_detector.py b/pdd/cli_detector.py new file mode 100644 index 000000000..ffadf909d --- /dev/null +++ b/pdd/cli_detector.py @@ -0,0 +1,595 @@ +from __future__ import annotations + +import os +import shutil +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +from rich.console import Console + +# Maps provider name -> CLI command name +_CLI_COMMANDS: dict[str, str] = { + "anthropic": "claude", + "google": "gemini", + "openai": "codex", +} + +# Maps provider name -> environment variable for API key +_API_KEY_ENV_VARS: dict[str, str] = { + "anthropic": "ANTHROPIC_API_KEY", + "google": "GEMINI_API_KEY", + "openai": "OPENAI_API_KEY", +} + +# Maps provider name -> npm install command for the CLI +_INSTALL_COMMANDS: dict[str, str] = { + "anthropic": "npm install -g @anthropic-ai/claude-code", + "google": "npm install -g @google/gemini-cli", + "openai": "npm install -g @openai/codex", +} + +# Maps provider name -> human-readable CLI name +_CLI_DISPLAY_NAMES: dict[str, str] = { + "anthropic": "Claude CLI", + "google": "Gemini CLI", + "openai": "Codex CLI", +} + +# Provider -> primary key env var name (used when saving) +PROVIDER_PRIMARY_KEY: Dict[str, str] = { + "anthropic": "ANTHROPIC_API_KEY", + "google": "GEMINI_API_KEY", + "openai": "OPENAI_API_KEY", +} + +# Provider -> display name +PROVIDER_DISPLAY: Dict[str, str] = { + "anthropic": "Anthropic", + "google": "Google (Gemini)", + "openai": "OpenAI", +} + +# CLI preference order (claude first because it supports subscription auth) +CLI_PREFERENCE: List[str] = ["gemini", "claude", "codex"] + +# Ordered list for the numbered selection table: (provider, cli_name, display_name) +_TABLE_ORDER: List[Tuple[str, str, str]] = [ + ("anthropic", "claude", "Claude CLI"), + ("openai", "codex", "Codex CLI"), + ("google", "gemini", "Gemini CLI"), +] + +# Shell -> RC file path (relative to home) +SHELL_RC_MAP: Dict[str, str] = { + "bash": ".bashrc", + "zsh": ".zshrc", + "fish": os.path.join(".config", "fish", "config.fish"), +} + +# Common installation paths for CLI tools (fallback) +_COMMON_CLI_PATHS: Dict[str, List[Path]] = { + "claude": [ + Path.home() / ".local" / "bin" / "claude", + Path("/usr/local/bin/claude"), + Path("/opt/homebrew/bin/claude"), + ], + "codex": [ + Path.home() / ".local" / "bin" / "codex", + Path("/usr/local/bin/codex"), + Path("/opt/homebrew/bin/codex"), + ], + "gemini": [ + Path.home() / ".local" / "bin" / "gemini", + Path("/usr/local/bin/gemini"), + Path("/opt/homebrew/bin/gemini"), + ], +} + +console = Console(highlight=False) + +@dataclass +class CliBootstrapResult: + """Result of CLI detection and bootstrapping.""" + cli_name: str = "" + provider: str = "" + cli_path: str = "" + api_key_configured: bool = False + skipped: bool = False # True when user explicitly skipped CLI setup + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _which(cmd: str) -> str | None: + """Return the full path to a command if found on PATH, else None.""" + if not cmd: + return None + return shutil.which(cmd) + +def _has_api_key(provider: str) -> bool: + """Check whether the API key environment variable is set for a provider.""" + env_var = _API_KEY_ENV_VARS.get(provider, "") + if not env_var: + # Also check fallback keys + if provider == "google": + val = os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY") + return bool(val and val.strip()) + return False + val = os.environ.get(env_var) + if val and val.strip(): + return True + # Fallback for google: also check GOOGLE_API_KEY (Vertex AI convention) + if provider == "google": + val = os.environ.get("GOOGLE_API_KEY") + return bool(val and val.strip()) + return False + +def _get_display_key_name(provider: str) -> str: + """Return the key name to display for a provider, checking which is actually set.""" + if provider == "google": + # Prefer GEMINI_API_KEY for display if set, else GOOGLE_API_KEY if set, else GEMINI_API_KEY + if os.environ.get("GEMINI_API_KEY", "").strip(): + return "GEMINI_API_KEY" + if os.environ.get("GOOGLE_API_KEY", "").strip(): + return "GOOGLE_API_KEY" + return "GEMINI_API_KEY" + return _API_KEY_ENV_VARS.get(provider, "") + +def _npm_available() -> bool: + """Check whether npm is available on PATH.""" + return _which("npm") is not None + +def _prompt_input(prompt_text: str) -> str: + """Wrapper around input() for testability.""" + return input(prompt_text) + +def _prompt_yes_no(prompt: str) -> bool: + """Prompt the user with a yes/no question. Default is No.""" + try: + answer = _prompt_input(prompt).strip().lower() + except (EOFError, KeyboardInterrupt): + return False + return answer in ("y", "yes") + +def _run_install(install_cmd: str) -> bool: + """Run an installation command via subprocess. Returns True on success.""" + try: + result = subprocess.run( + install_cmd, + shell=True, + capture_output=True, + text=True, + timeout=120 + ) + return result.returncode == 0 + except Exception: + return False + +def _detect_shell() -> str: + """Detect the user's shell from the SHELL environment variable.""" + shell_path = os.environ.get("SHELL", "/bin/bash") + return os.path.basename(shell_path) + +def _get_rc_file_path(shell: str) -> Path: + """Return the absolute path to the shell's RC file.""" + rc_relative = SHELL_RC_MAP.get(shell, SHELL_RC_MAP["bash"]) + if shell == "fish": + return Path.home() / ".config" / "fish" / "config.fish" + return Path.home() / rc_relative + +def _get_api_env_file_path(shell: str) -> Path: + """Return the path to ~/.pdd/api-env.{shell}.""" + return Path.home() / ".pdd" / f"api-env.{shell}" + +def _find_cli_binary(cli_name: str) -> Optional[str]: + """Find a CLI binary by name, including fallbacks.""" + # Use shutil.which first + result = shutil.which(cli_name) + if result: + return result + + # Try common paths + paths = _COMMON_CLI_PATHS.get(cli_name, []) + for path in paths: + if path.exists() and os.access(path, os.X_OK): + return str(path) + + # Try nvm fallback for node-based CLIs + nvm_node = Path.home() / ".nvm" / "versions" / "node" + if nvm_node.exists(): + try: + for version_dir in sorted(nvm_node.iterdir(), reverse=True): + bin_candidate = version_dir / "bin" / cli_name + if bin_candidate.is_file() and os.access(bin_candidate, os.X_OK): + return str(bin_candidate) + except OSError: + pass + + return None + +def _format_export_line(key_name: str, key_value: str, shell: str) -> str: + """Return the shell-appropriate export line.""" + if shell == "fish": + return f"set -gx {key_name} {key_value}" + return f"export {key_name}={key_value}" + +def _format_source_line(api_env_path: Path, shell: str) -> str: + """Return the shell-appropriate source line.""" + path_str = str(api_env_path) + if shell == "fish": + return f"test -f {path_str} ; and source {path_str}" + return f"source {path_str}" + +def _save_api_key(key_name: str, key_value: str, shell: str) -> bool: + """Save API key and update shell RC.""" + pdd_dir = Path.home() / ".pdd" + api_env_path = _get_api_env_file_path(shell) + rc_path = _get_rc_file_path(shell) + + try: + pdd_dir.mkdir(parents=True, exist_ok=True) + + # Append or create api-env file + existing_content = "" + if api_env_path.exists(): + existing_content = api_env_path.read_text(encoding="utf-8") + + export_line = _format_export_line(key_name, key_value, shell) + lines = existing_content.splitlines() + # Filter out existing entries for this key + filtered = [ln for ln in lines if key_name not in ln] + filtered.append(export_line) + + api_env_path.write_text("\n".join(filtered) + "\n", encoding="utf-8") + + # Update RC file + source_line = _format_source_line(api_env_path, shell) + rc_content = "" + if rc_path.exists(): + rc_content = rc_path.read_text(encoding="utf-8") + + if source_line not in rc_content: + with open(rc_path, "a", encoding="utf-8") as f: + f.write(f"\n# pdd CLI API keys\n{source_line}\n") + + os.environ[key_name] = key_value + return True + except Exception as e: + console.print(f"[red]Error saving API key: {e}[/red]") + return False + +def _prompt_api_key(provider: str, shell: str) -> bool: + """Prompt user for API key and save it. Prints save location on success.""" + key_name = PROVIDER_PRIMARY_KEY.get(provider, "") + if not key_name: + return False + + display = PROVIDER_DISPLAY.get(provider, provider) + try: + key_value = _prompt_input(f" Enter your {display} API key (or press Enter to skip): ").strip() + except (EOFError, KeyboardInterrupt): + return False + + if not key_value: + if provider == "anthropic": + console.print(" [dim]Note: Claude CLI may still work with subscription auth.[/dim]") + return False + + api_env_path = _get_api_env_file_path(shell) + if _save_api_key(key_name, key_value, shell): + console.print(f" [green]\u2713[/green] {key_name} saved to {api_env_path}") + #console.print(f" [green]\u2713[/green] {key_name} loaded into current session") + return True + return False + + +def _test_cli(cli_name: str, cli_path: str) -> bool: + """Run a quick sanity-check invocation of the CLI. Returns True on success.""" + console.print(f"\n Testing {cli_name}...") + try: + result = subprocess.run( + [cli_path, "--version"], + capture_output=True, + text=True, + timeout=15, + ) + if result.returncode == 0: + version_line = (result.stdout or result.stderr or "").strip().splitlines()[0] if (result.stdout or result.stderr) else "" + console.print(f" [green]\u2713[/green] {cli_name} version {version_line or 'OK'}") + return True + else: + # Some CLIs exit non-zero for --version but still work; try --help + result2 = subprocess.run( + [cli_path, "--help"], + capture_output=True, + text=True, + timeout=15, + ) + if result2.returncode == 0: + console.print(f" [green]\u2713[/green] {cli_name} is responsive") + return True + console.print(f" [red]\u2717[/red] {cli_name} test failed (exit {result.returncode})") + return False + except FileNotFoundError: + console.print(f" [red]\u2717[/red] {cli_name} binary not found at {cli_path}") + return False + except subprocess.TimeoutExpired: + console.print(f" [red]\u2717[/red] {cli_name} test timed out") + return False + except Exception as exc: + console.print(f" [red]\u2717[/red] {cli_name} test error: {exc}") + return False + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +def _bootstrap_single_cli( + cli_entry: Dict[str, object], + shell: str, +) -> CliBootstrapResult: + """Process install/key/test for a single CLI selection. + + Returns a populated CliBootstrapResult (skipped=True on failure). + """ + display_name = str(cli_entry["display_name"]) + sel_provider: str = str(cli_entry["provider"]) + sel_cli_name: str = str(cli_entry["cli_name"]) + sel_path: Optional[str] = str(cli_entry["path"]) if cli_entry["path"] else None + sel_has_key: bool = bool(cli_entry["has_key"]) + + console.print(f"\n [bold]Setting up {display_name}...[/bold]") + + def _cli_skip(reason: str = "") -> CliBootstrapResult: + if reason: + console.print(f" [red]\u2717 {reason}[/red]") + console.print(f" [red]\u2717 {display_name} not configured.[/red]") + return CliBootstrapResult(skipped=True) + + # Install step (if not installed) + if not sel_path: + install_cmd = _INSTALL_COMMANDS[sel_provider] + console.print(f" Install command: [bold]{install_cmd}[/bold]") + try: + install_answer = _prompt_input(" Install now? [y/N]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + console.print() + return _cli_skip() + + if install_answer in ("y", "yes"): + if not _npm_available(): + console.print(" [red]\u2717[/red] npm is not installed. Please install Node.js/npm first.") + console.print(f" Then run: {install_cmd}") + return _cli_skip("npm not available — cannot install CLI") + + console.print(f" Installing {display_name}...") + if _run_install(install_cmd): + sel_path = _find_cli_binary(sel_cli_name) + if sel_path: + console.print(f" [green]\u2713[/green] Installed {sel_cli_name} at {sel_path}") + else: + console.print(" [yellow]Installation completed but CLI not found on PATH.[/yellow]") + return _cli_skip("CLI installed but not found on PATH") + else: + console.print(" [red]Installation failed. Try installing manually.[/red]") + return _cli_skip("installation failed") + else: + return _cli_skip() + + # API key step (if not set) + if not sel_has_key: + sel_has_key = _prompt_api_key(sel_provider, shell) + if not sel_has_key and sel_provider != "anthropic": + console.print(f" [dim]No API key set. {display_name} may have limited functionality.[/dim]") + + # Force CLI test (no option to skip) + _test_cli(sel_cli_name, sel_path or sel_cli_name) + + return CliBootstrapResult( + cli_name=sel_cli_name, + provider=sel_provider, + cli_path=sel_path or "", + api_key_configured=sel_has_key, + ) + + +def detect_and_bootstrap_cli() -> List[CliBootstrapResult]: + """Phase 1 entry point for pdd setup. + + Shows a numbered selection table of all three CLI options with their + install and API-key status, lets the user choose one or more via + comma-separated input, and walks through installation and key + configuration for each. + + Returns a list of CliBootstrapResult objects (one per selected CLI). + On full skip: returns [CliBootstrapResult(skipped=True)]. + """ + # Import banner helper from setup_tool + from pdd.setup_tool import _print_step_banner + _print_step_banner("Checking CLI tools...") + shell = _detect_shell() + + def _skip_all(reason: str = "") -> List[CliBootstrapResult]: + """Print red CLI-not-configured warning and return a skipped result.""" + if reason: + console.print(f" [red]\u2717 {reason}[/red]") + console.print(" [red]\u2717 CLI not configured. Run `pdd setup` again to configure it.[/red]") + return [CliBootstrapResult(skipped=True)] + + # ------------------------------------------------------------------ + # 1. Gather status for each CLI in table order + # ------------------------------------------------------------------ + cli_info: List[Dict[str, object]] = [] + for provider, cli_name, display_name in _TABLE_ORDER: + path = _find_cli_binary(cli_name) + has_key = _has_api_key(provider) + key_display = _get_display_key_name(provider) + cli_info.append({ + "provider": provider, + "cli_name": cli_name, + "display_name": display_name, + "path": path, + "has_key": has_key, + "key_display": key_display, + }) + + # ------------------------------------------------------------------ + # 2. Print numbered selection table with aligned columns + # ------------------------------------------------------------------ + from rich.markup import escape as _escape + + # Compute column widths using plain strings (no markup) for measurement + max_name_len = max(len(str(c["display_name"])) for c in cli_info) + max_install_len = 0 + install_strs_plain: List[str] = [] + install_strs_display: List[str] = [] + for c in cli_info: + if c["path"]: + plain = f"\u2713 Found at {c['path']}" + display = f"[green]\u2713[/green] Found at {_escape(str(c['path']))}" + else: + plain = "\u2717 Not found" + display = "[red]\u2717[/red] Not found" + install_strs_plain.append(plain) + install_strs_display.append(display) + max_install_len = max(max_install_len, len(plain)) + + for idx, c in enumerate(cli_info): + num = idx + 1 + name_padded = str(c["display_name"]).ljust(max_name_len) + install_display = install_strs_display[idx] + install_padding = " " * (max_install_len - len(install_strs_plain[idx])) + if c["has_key"]: + key_str = f"[green]\u2713[/green] {c['key_display']} is set" + else: + key_str = f"[red]\u2717[/red] {c['key_display']} not set" + console.print(f" [blue]{num}[/blue]. {name_padded} {install_display}{install_padding} {key_str}") + + console.print() + + # ------------------------------------------------------------------ + # 3. Determine smart default + # ------------------------------------------------------------------ + default_idx = 0 # fallback: Claude (index 0 -> selection "1") + # Prefer installed + key + for i, c in enumerate(cli_info): + if c["path"] and c["has_key"]: + default_idx = i + break + else: + # Prefer installed only + for i, c in enumerate(cli_info): + if c["path"]: + default_idx = i + break + + # ------------------------------------------------------------------ + # 4. Prompt for selection (comma-separated) + # ------------------------------------------------------------------ + try: + console.print(r" Select CLIs to use for pdd agentic tools (enter numbers separated by commas, e.g., [blue]1[/blue],[blue]3[/blue]): ", end="") + raw = _prompt_input("").strip() + except (EOFError, KeyboardInterrupt): + console.print() + return _skip_all() + + if raw.lower() in ("q", "n"): + return _skip_all() + + # Parse comma-separated selections, deduplicate while preserving order + selected_indices: List[int] = [] + if raw == "": + selected_indices = [default_idx] + console.print(f" [dim]Defaulting to {cli_info[default_idx]['display_name']}[/dim]") + else: + seen: set[int] = set() + parts = [p.strip() for p in raw.split(",")] + for part in parts: + if part in ("1", "2", "3"): + idx = int(part) - 1 + if idx not in seen: + seen.add(idx) + selected_indices.append(idx) + if not selected_indices: + # No valid numbers found — treat as default + selected_indices = [default_idx] + console.print(f" [dim]Invalid input. Defaulting to {cli_info[default_idx]['display_name']}[/dim]") + + # ------------------------------------------------------------------ + # 5. Process each selected CLI + # ------------------------------------------------------------------ + results: List[CliBootstrapResult] = [] + for sel_idx in selected_indices: + try: + result = _bootstrap_single_cli(cli_info[sel_idx], shell) + results.append(result) + except KeyboardInterrupt: + console.print() + console.print(f" [red]\u2717 {cli_info[sel_idx]['display_name']} not configured.[/red]") + results.append(CliBootstrapResult(skipped=True)) + break # Stop processing remaining CLIs + + if not results: + return _skip_all() + + return results + + +def detect_cli_tools() -> None: + """Legacy detection function.""" + console.print("Agentic CLI Tool Detection") + console.print("(Required for: pdd fix, pdd change, pdd bug)") + console.print() + + found_any = False + all_with_keys_installed = True + + # Use ordered providers + for provider in ["anthropic", "google", "openai"]: + cli_cmd = _CLI_COMMANDS[provider] + display_name = _CLI_DISPLAY_NAMES[provider] + path = _which(cli_cmd) + has_key = _has_api_key(provider) + key_env = _API_KEY_ENV_VARS[provider] + + if path: + found_any = True + console.print(f" [green]\u2713[/green] {display_name} — Found at {path}") + if has_key: + console.print(f" [green]\u2713[/green] {key_env} is set") + else: + console.print(f" [yellow]\u2717[/yellow] {key_env} not set — CLI won't be usable for API calls") + else: + console.print(f" [red]\u2717[/red] {display_name} — Not found") + if has_key: + all_with_keys_installed = False + console.print(f" [yellow]You have {key_env} set but {display_name} is not installed.[/yellow]") + console.print(f" Install: {_INSTALL_COMMANDS[provider]} (install the CLI to use it)") + if _npm_available(): + if _prompt_yes_no(f" Install now? [y/N] "): + if _run_install(_INSTALL_COMMANDS[provider]): + new_path = _which(cli_cmd) + if new_path: + console.print(f" {display_name} installed successfully.") + else: + console.print(" completed but not found on PATH") + else: + console.print(" failed (try installing manually)") + else: + console.print(" Skipped (you can install later).") + else: + console.print(" npm is not installed.") + else: + console.print(f" API key ({key_env}): not set") + console.print() + + if all_with_keys_installed and found_any: + console.print("All CLI tools with matching API keys are installed") + elif not found_any: + console.print("Quick start: No CLI tools found. Install one of the supported CLIs and set its API key.") + +if __name__ == "__main__": + detect_cli_tools() diff --git a/pdd/data/llm_model.csv b/pdd/data/llm_model.csv index 26387089f..d3aafa406 100644 --- a/pdd/data/llm_model.csv +++ b/pdd/data/llm_model.csv @@ -1,20 +1,266 @@ -provider,model,input,output,coding_arena_elo,base_url,api_key,max_reasoning_tokens,structured_output,reasoning_type,location -OpenAI,gpt-5-nano,0.05,0.4,1249,,OPENAI_API_KEY,0,True,none, -Google,vertex_ai/gemini-3-flash-preview,0.5,3.0,1442,,VERTEX_CREDENTIALS,0,True,effort,global -Google,vertex_ai/claude-sonnet-4-6,3.0,15.0,1480,,VERTEX_CREDENTIALS,128000,True,budget,global -Google,vertex_ai/gemini-3.1-pro-preview,2.0,12.0,1495,,VERTEX_CREDENTIALS,0,True,effort,global -OpenAI,gpt-5.1-codex-mini,0.25,2.0,1325,,OPENAI_API_KEY,0,True,effort, -OpenAI,gpt-5.2,1.75,14.0,1472,,OPENAI_API_KEY,0,True,effort, -OpenAI,gpt-5.2-codex,1.75,14.0,1472,,OPENAI_API_KEY,0,True,effort, -Google,vertex_ai/deepseek-ai/deepseek-v3.2-maas,0.28,0.42,1450,,VERTEX_CREDENTIALS,0,True,effort,global -Fireworks,fireworks_ai/accounts/fireworks/models/qwen3-coder-480b-a35b-instruct,0.45,1.80,1281,,FIREWORKS_API_KEY,0,False,none, -Google,vertex_ai/claude-opus-4-6,5.0,25.0,1576,,VERTEX_CREDENTIALS,128000,True,budget,global -lm_studio,lm_studio/qwen3-coder-next,0,0,1040,http://localhost:1234/v1,,0,True,none, -lm_studio,lm_studio/openai-gpt-oss-120b-mlx-6,0.0001,0,1082,http://localhost:1234/v1,,0,True,effort, -Fireworks,fireworks_ai/accounts/fireworks/models/glm-5,1.00,3.20,1451,,FIREWORKS_API_KEY,0,False,none, -Fireworks,fireworks_ai/accounts/fireworks/models/kimi-k2p5,0.60,3.00,1449,,FIREWORKS_API_KEY,0,False,none, -Anthropic,anthropic/claude-sonnet-4-6,3.0,15.0,1480,,ANTHROPIC_API_KEY,128000,True,budget, -Anthropic,anthropic/claude-opus-4-6,5.0,25.0,1576,,ANTHROPIC_API_KEY,128000,True,budget, -Anthropic,anthropic/claude-haiku-4-5-20251001,1.0,5.0,1270,,ANTHROPIC_API_KEY,128000,True,budget, -xAI,xai/grok-4-0709,3.0,15.0,1467,,XAI_API_KEY,0,True,effort, -xAI,xai/grok-4-1-fast-reasoning,0.20,0.50,1402,,XAI_API_KEY,0,True,none, +provider,model,input,output,coding_arena_elo,base_url,api_key,max_reasoning_tokens,structured_output,reasoning_type,location +AWS Bedrock,anthropic.claude-opus-4-6-v1,5.0,25.0,1530,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,au.anthropic.claude-opus-4-6-v1,5.5,27.5,1530,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Azure AI,azure_ai/claude-opus-4-6,5.0,25.0,1530,,AZURE_AI_API_KEY,128000,True,budget, +Anthropic,claude-opus-4-6,5.0,25.0,1530,,ANTHROPIC_API_KEY,128000,True,budget, +Anthropic,claude-opus-4-6-20260205,5.0,25.0,1530,,ANTHROPIC_API_KEY,128000,True,budget, +AWS Bedrock,eu.anthropic.claude-opus-4-6-v1,5.5,27.5,1530,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Anthropic,fast/claude-opus-4-6,30.0,150.0,1530,,ANTHROPIC_API_KEY,128000,True,budget, +Anthropic,fast/claude-opus-4-6-20260205,30.0,150.0,1530,,ANTHROPIC_API_KEY,128000,True,budget, +Anthropic,fast/us/claude-opus-4-6,30.0,150.0,1530,,ANTHROPIC_API_KEY,128000,True,budget, +AWS Bedrock,global.anthropic.claude-opus-4-6-v1,5.0,25.0,1530,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,us.anthropic.claude-opus-4-6-v1,5.5,27.5,1530,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Anthropic,us/claude-opus-4-6,5.5,27.5,1530,,ANTHROPIC_API_KEY,128000,True,budget, +Anthropic,us/claude-opus-4-6-20260205,5.5,27.5,1530,,ANTHROPIC_API_KEY,128000,True,budget, +Vercel AI Gateway,vercel_ai_gateway/anthropic/claude-opus-4.6,5.0,25.0,1530,,VERCEL_AI_GATEWAY_API_KEY,0,True,effort, +Google Vertex AI,vertex_ai/claude-opus-4-6,5.0,25.0,1530,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Google Vertex AI,vertex_ai/claude-opus-4-6@default,5.0,25.0,1530,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Google Vertex AI,gemini-3-pro-preview,2.0,12.0,1501,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Google Gemini,gemini/gemini-3-pro-preview,2.0,12.0,1501,,GEMINI_API_KEY,0,True,effort, +Github Copilot,github_copilot/gemini-3-pro-preview,0.0,0.0,1501,,,0,True,none, +GMI Cloud,gmi/google/gemini-3-pro-preview,2.0,12.0,1501,,GMI_API_KEY,0,True,none, +OpenRouter,openrouter/google/gemini-3-pro-preview,2.0,12.0,1501,,OPENROUTER_API_KEY,0,True,effort, +Replicate,replicate/google/gemini-3-pro,2.0,12.0,1501,,REPLICATE_API_KEY,0,True,none, +Google Vertex AI,vertex_ai/gemini-3-pro-preview,2.0,12.0,1501,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +AWS Bedrock,anthropic.claude-opus-4-5-20251101-v1:0,5.0,25.0,1496,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Azure AI,azure_ai/claude-opus-4-5,5.0,25.0,1496,,AZURE_AI_API_KEY,128000,True,budget, +Anthropic,claude-opus-4-5,5.0,25.0,1496,,ANTHROPIC_API_KEY,128000,True,budget, +Anthropic,claude-opus-4-5-20251101,5.0,25.0,1496,,ANTHROPIC_API_KEY,128000,True,budget, +AWS Bedrock,eu.anthropic.claude-opus-4-5-20251101-v1:0,5.0,25.0,1496,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Github Copilot,github_copilot/claude-opus-4.5,0.0,0.0,1496,,,0,True,none, +AWS Bedrock,global.anthropic.claude-opus-4-5-20251101-v1:0,5.0,25.0,1496,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +GMI Cloud,gmi/anthropic/claude-opus-4.5,5.0,25.0,1496,,GMI_API_KEY,0,True,none, +OpenRouter,openrouter/anthropic/claude-opus-4.5,5.0,25.0,1496,,OPENROUTER_API_KEY,0,True,effort, +AWS Bedrock,us.anthropic.claude-opus-4-5-20251101-v1:0,5.5,27.5,1496,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Vercel AI Gateway,vercel_ai_gateway/anthropic/claude-opus-4.5,5.0,25.0,1496,,VERCEL_AI_GATEWAY_API_KEY,0,True,effort, +Google Vertex AI,vertex_ai/claude-opus-4-5,5.0,25.0,1496,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Google Vertex AI,vertex_ai/claude-opus-4-5@20251101,5.0,25.0,1496,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +AWS Bedrock,anthropic.claude-sonnet-4-6,3.0,15.0,1485,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,apac.anthropic.claude-sonnet-4-6,3.3,16.5,1485,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Azure AI,azure_ai/claude-sonnet-4-6,3.0,15.0,1485,,AZURE_AI_API_KEY,128000,True,budget, +Anthropic,claude-sonnet-4-6,3.0,15.0,1485,,ANTHROPIC_API_KEY,128000,True,budget, +AWS Bedrock,eu.anthropic.claude-sonnet-4-6,3.3,16.5,1485,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,global.anthropic.claude-sonnet-4-6,3.0,15.0,1485,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,us.anthropic.claude-sonnet-4-6,3.3,16.5,1485,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Anthropic,us/claude-sonnet-4-6,3.3,16.5,1485,,ANTHROPIC_API_KEY,128000,True,budget, +Google Vertex AI,vertex_ai/claude-sonnet-4-6,3.0,15.0,1485,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Google Vertex AI,vertex_ai/claude-sonnet-4-6@default,3.0,15.0,1485,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Azure AI,azure_ai/kimi-k2.5,0.6,3.0,1480,,AZURE_AI_API_KEY,0,True,none, +AWS Bedrock,bedrock/ap-northeast-1/moonshotai.kimi-k2.5,0.72,3.6,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/ap-south-1/moonshotai.kimi-k2.5,0.72,3.6,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/ap-southeast-3/moonshotai.kimi-k2.5,0.72,3.6,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/eu-north-1/moonshotai.kimi-k2.5,0.72,3.6,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/moonshotai.kimi-k2.5,0.6,3.03,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,bedrock/sa-east-1/moonshotai.kimi-k2.5,0.72,3.6,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/us-east-1/moonshotai.kimi-k2.5,0.6,3.0,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/us-east-2/moonshotai.kimi-k2.5,0.6,3.0,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/us-west-2/moonshotai.kimi-k2.5,0.6,3.0,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +Moonshot AI,moonshot/kimi-k2.5,0.6,3.0,1480,,MOONSHOT_API_KEY,0,True,none, +AWS Bedrock,moonshotai.kimi-k2.5,0.6,3.0,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +OpenRouter,openrouter/moonshotai/kimi-k2.5,0.6,3.0,1480,,OPENROUTER_API_KEY,0,True,none, +Together AI,together_ai/moonshotai/Kimi-K2.5,0.5,2.8,1480,,TOGETHERAI_API_KEY,0,True,effort, +AWS Bedrock,anthropic.claude-opus-4-1-20250805-v1:0,15.0,75.0,1475,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Azure AI,azure_ai/claude-opus-4-1,15.0,75.0,1475,,AZURE_AI_API_KEY,128000,True,budget, +Anthropic,claude-opus-4-1,15.0,75.0,1475,,ANTHROPIC_API_KEY,128000,True,budget, +Anthropic,claude-opus-4-1-20250805,15.0,75.0,1475,,ANTHROPIC_API_KEY,128000,True,budget, +AWS Bedrock,eu.anthropic.claude-opus-4-1-20250805-v1:0,15.0,75.0,1475,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +OpenRouter,openrouter/anthropic/claude-opus-4.1,15.0,75.0,1475,,OPENROUTER_API_KEY,0,True,effort, +AWS Bedrock,us.anthropic.claude-opus-4-1-20250805-v1:0,15.0,75.0,1475,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Vercel AI Gateway,vercel_ai_gateway/anthropic/claude-opus-4.1,15.0,75.0,1475,,VERCEL_AI_GATEWAY_API_KEY,0,True,effort, +Google Vertex AI,vertex_ai/claude-opus-4-1,15.0,75.0,1475,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global +Google Vertex AI,vertex_ai/claude-opus-4-1@20250805,15.0,75.0,1475,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global +Google Vertex AI,gemini-3-flash-preview,0.5,3.0,1469,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Google Gemini,gemini/gemini-3-flash-preview,0.5,3.0,1469,,GEMINI_API_KEY,0,True,effort, +GMI Cloud,gmi/google/gemini-3-flash-preview,0.5,3.0,1469,,GMI_API_KEY,0,True,none, +OpenRouter,openrouter/google/gemini-3-flash-preview,0.5,3.0,1469,,OPENROUTER_API_KEY,0,True,effort, +Google Vertex AI,vertex_ai/gemini-3-flash-preview,0.5,3.0,1469,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Dashscope,dashscope/qwen3-max,0.0,0.0,1468,,DASHSCOPE_API_KEY,0,True,effort, +Dashscope,dashscope/qwen3-max-preview,0.0,0.0,1468,,DASHSCOPE_API_KEY,0,True,effort, +Novita AI,novita/qwen/qwen3-max,2.11,8.45,1468,,NOVITA_API_KEY,0,True,none, +Azure OpenAI,azure/gpt-5.2,1.75,14.0,1465,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, +DeepInfra,deepinfra/google/gemini-2.5-pro,1.25,10.0,1465,,DEEPINFRA_API_KEY,0,False,none, +Google Vertex AI,gemini-2.5-pro,1.25,10.0,1465,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Google Gemini,gemini/gemini-2.5-pro,1.25,10.0,1465,,GEMINI_API_KEY,0,True,effort, +Github Copilot,github_copilot/gemini-2.5-pro,0.0,0.0,1465,,,0,True,none, +Github Copilot,github_copilot/gpt-5.2,0.0,0.0,1465,,,0,True,none, +GMI Cloud,gmi/openai/gpt-5.2,1.75,14.0,1465,,GMI_API_KEY,0,True,none, +OpenAI,gpt-5.2,1.75,14.0,1465,,OPENAI_API_KEY,0,True,effort, +OpenRouter,openrouter/google/gemini-2.5-pro,1.25,10.0,1465,,OPENROUTER_API_KEY,0,True,none, +OpenRouter,openrouter/openai/gpt-5.2,1.75,14.0,1465,,OPENROUTER_API_KEY,0,True,effort, +Vercel AI Gateway,vercel_ai_gateway/google/gemini-2.5-pro,2.5,10.0,1465,,VERCEL_AI_GATEWAY_API_KEY,0,True,none, +AWS Bedrock,anthropic.claude-sonnet-4-5-20250929-v1:0,3.0,15.0,1464,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,au.anthropic.claude-sonnet-4-5-20250929-v1:0,3.3,16.5,1464,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Azure AI,azure_ai/claude-sonnet-4-5,3.0,15.0,1464,,AZURE_AI_API_KEY,128000,True,budget, +Anthropic,claude-sonnet-4-5,3.0,15.0,1464,,ANTHROPIC_API_KEY,128000,True,budget, +Anthropic,claude-sonnet-4-5-20250929,3.0,15.0,1464,,ANTHROPIC_API_KEY,128000,True,budget, +AWS Bedrock,claude-sonnet-4-5-20250929-v1:0,3.0,15.0,1464,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,eu.anthropic.claude-sonnet-4-5-20250929-v1:0,3.3,16.5,1464,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Github Copilot,github_copilot/claude-sonnet-4.5,0.0,0.0,1464,,,0,True,none, +AWS Bedrock,global.anthropic.claude-sonnet-4-5-20250929-v1:0,3.0,15.0,1464,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +GMI Cloud,gmi/anthropic/claude-sonnet-4.5,3.0,15.0,1464,,GMI_API_KEY,0,True,none, +AWS Bedrock,jp.anthropic.claude-sonnet-4-5-20250929-v1:0,3.3,16.5,1464,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +OpenRouter,openrouter/anthropic/claude-sonnet-4.5,3.0,15.0,1464,,OPENROUTER_API_KEY,0,True,effort, +AWS Bedrock,us.anthropic.claude-sonnet-4-5-20250929-v1:0,3.3,16.5,1464,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Vercel AI Gateway,vercel_ai_gateway/anthropic/claude-sonnet-4.5,3.0,15.0,1464,,VERCEL_AI_GATEWAY_API_KEY,0,True,effort, +Google Vertex AI,vertex_ai/claude-sonnet-4-5,3.0,15.0,1464,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Google Vertex AI,vertex_ai/claude-sonnet-4-5@20250929,3.0,15.0,1464,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Azure OpenAI,azure/gpt-5,1.25,10.0,1460,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, +Github Copilot,github_copilot/gpt-5,0.0,0.0,1460,,,0,True,none, +GMI Cloud,gmi/openai/gpt-5,1.25,10.0,1460,,GMI_API_KEY,0,True,none, +OpenAI,gpt-5,1.25,10.0,1460,,OPENAI_API_KEY,0,True,effort, +OpenRouter,openrouter/openai/gpt-5,1.25,10.0,1460,,OPENROUTER_API_KEY,0,False,effort, +Replicate,replicate/openai/gpt-5,1.25,10.0,1460,,REPLICATE_API_KEY,0,True,none, +AWS Bedrock,zai.glm-4.7,0.6,2.2,1460,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +DeepInfra,deepinfra/Qwen/Qwen3-235B-A22B-Instruct-2507,0.09,0.6,1457,,DEEPINFRA_API_KEY,0,False,none, +Fireworks AI,fireworks_ai/accounts/fireworks/models/qwen3-235b-a22b-instruct-2507,0.22,0.88,1457,,FIREWORKS_AI_API_KEY,0,False,none, +Novita AI,novita/qwen/qwen3-235b-a22b-instruct-2507,0.09,0.58,1457,,NOVITA_API_KEY,0,True,none, +Replicate,replicate/qwen/qwen3-235b-a22b-instruct-2507,0.264,1.06,1457,,REPLICATE_API_KEY,0,True,none, +Google Vertex AI,vertex_ai/qwen/qwen3-235b-a22b-instruct-2507-maas,0.25,1.0,1457,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global +W&B Inference,wandb/Qwen/Qwen3-235B-A22B-Instruct-2507,10000.0,10000.0,1457,,WANDB_API_KEY,0,False,none, +Azure AI,azure_ai/grok-4,3.0,15.0,1453,,AZURE_AI_API_KEY,0,True,none, +Oci,oci/xai.grok-4,3.0,15.0,1453,,OCI_API_KEY,0,True,none, +OpenRouter,openrouter/x-ai/grok-4,3.0,15.0,1453,,OPENROUTER_API_KEY,0,True,effort, +Replicate,replicate/xai/grok-4,7.2,36.0,1453,,REPLICATE_API_KEY,0,True,none, +Vercel AI Gateway,vercel_ai_gateway/xai/grok-4,3.0,15.0,1453,,VERCEL_AI_GATEWAY_API_KEY,0,True,none, +xAI,xai/grok-4,3.0,15.0,1453,,XAI_API_KEY,0,True,none, +xAI,xai/grok-4-latest,3.0,15.0,1453,,XAI_API_KEY,0,True,none, +Azure OpenAI,azure/eu/gpt-5.1,1.38,11.0,1450,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, +Azure OpenAI,azure/global/gpt-5.1,1.25,10.0,1450,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, +Azure OpenAI,azure/gpt-5.1,1.25,10.0,1450,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, +Azure OpenAI,azure/mistral-large-latest,8.0,24.0,1450,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,none, +Azure OpenAI,azure/us/gpt-5.1,1.38,11.0,1450,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, +Azure AI,azure_ai/mistral-large,4.0,12.0,1450,,AZURE_AI_API_KEY,0,True,none, +Azure AI,azure_ai/mistral-large-3,0.5,1.5,1450,,AZURE_AI_API_KEY,0,True,none, +Azure AI,azure_ai/mistral-large-latest,2.0,6.0,1450,,AZURE_AI_API_KEY,0,True,none, +AWS Bedrock,bedrock/ap-northeast-1/moonshotai.kimi-k2-thinking,0.73,3.03,1450,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,bedrock/ap-south-1/moonshotai.kimi-k2-thinking,0.71,2.94,1450,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,bedrock/moonshotai.kimi-k2-thinking,0.73,3.03,1450,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,bedrock/sa-east-1/moonshotai.kimi-k2-thinking,0.73,3.03,1450,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,bedrock/us-east-1/moonshotai.kimi-k2-thinking,0.6,2.5,1450,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,bedrock/us-east-2/moonshotai.kimi-k2-thinking,0.6,2.5,1450,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,bedrock/us-west-2/moonshotai.kimi-k2-thinking,0.6,2.5,1450,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Fireworks AI,fireworks_ai/accounts/fireworks/models/kimi-k2-thinking,0.6,2.5,1450,,FIREWORKS_AI_API_KEY,0,True,none, +Github Copilot,github_copilot/gpt-5.1,0.0,0.0,1450,,,0,True,none, +GMI Cloud,gmi/moonshotai/Kimi-K2-Thinking,0.8,1.2,1450,,GMI_API_KEY,0,False,none, +GMI Cloud,gmi/openai/gpt-5.1,1.25,10.0,1450,,GMI_API_KEY,0,True,none, +OpenAI,gpt-5.1,1.25,10.0,1450,,OPENAI_API_KEY,0,True,effort, +Mistral AI,mistral/mistral-large-3,0.5,1.5,1450,,MISTRAL_API_KEY,0,True,none, +Mistral AI,mistral/mistral-large-latest,2.0,6.0,1450,,MISTRAL_API_KEY,0,True,none, +Moonshot AI,moonshot/kimi-k2-thinking,0.6,2.5,1450,,MOONSHOT_API_KEY,0,True,none, +Novita AI,novita/moonshotai/kimi-k2-thinking,0.6,2.5,1450,,NOVITA_API_KEY,0,True,effort, +OpenRouter,openrouter/mistralai/mistral-large,8.0,24.0,1450,,OPENROUTER_API_KEY,0,False,none, +Snowflake,snowflake/mistral-large,0.0,0.0,1450,,SNOWFLAKE_API_KEY,0,False,none, +Google Vertex AI,vertex_ai/mistral-large@2407,2.0,6.0,1450,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global +Google Vertex AI,vertex_ai/mistral-large@2411-001,2.0,6.0,1450,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global +Google Vertex AI,vertex_ai/mistral-large@latest,2.0,6.0,1450,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global +Google Vertex AI,vertex_ai/moonshotai/kimi-k2-thinking-maas,0.6,2.5,1450,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global +DeepInfra,deepinfra/Qwen/Qwen3-235B-A22B-Thinking-2507,0.3,2.9,1442,,DEEPINFRA_API_KEY,0,False,none, +Fireworks AI,fireworks_ai/accounts/fireworks/models/qwen3-235b-a22b-thinking-2507,0.22,0.88,1442,,FIREWORKS_AI_API_KEY,0,False,none, +Novita AI,novita/qwen/qwen3-235b-a22b-thinking-2507,0.3,3.0,1442,,NOVITA_API_KEY,0,True,effort, +OpenRouter,openrouter/qwen/qwen3-235b-a22b-thinking-2507,0.11,0.6,1442,,OPENROUTER_API_KEY,0,True,effort, +Together AI,together_ai/Qwen/Qwen3-235B-A22B-Thinking-2507,0.65,3.0,1442,,TOGETHERAI_API_KEY,0,True,none, +W&B Inference,wandb/Qwen/Qwen3-235B-A22B-Thinking-2507,10000.0,10000.0,1442,,WANDB_API_KEY,0,False,none, +Azure OpenAI,azure/o3,2.0,8.0,1441,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, +OpenAI,o3,2.0,8.0,1441,,OPENAI_API_KEY,0,True,effort, +Vercel AI Gateway,vercel_ai_gateway/openai/o3,2.0,8.0,1441,,VERCEL_AI_GATEWAY_API_KEY,0,True,none, +Azure AI,azure_ai/global/grok-3,3.0,15.0,1439,,AZURE_AI_API_KEY,0,True,none, +Azure AI,azure_ai/grok-3,3.0,15.0,1439,,AZURE_AI_API_KEY,0,True,none, +Oci,oci/xai.grok-3,3.0,15.0,1439,,OCI_API_KEY,0,True,none, +Vercel AI Gateway,vercel_ai_gateway/xai/grok-3,3.0,15.0,1439,,VERCEL_AI_GATEWAY_API_KEY,0,True,none, +xAI,xai/grok-3,3.0,15.0,1439,,XAI_API_KEY,0,True,none, +xAI,xai/grok-3-latest,3.0,15.0,1439,,XAI_API_KEY,0,True,none, +AWS Bedrock,anthropic.claude-haiku-4-5-20251001-v1:0,1.0,5.0,1436,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,anthropic.claude-haiku-4-5@20251001,1.0,5.0,1436,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,apac.anthropic.claude-haiku-4-5-20251001-v1:0,1.1,5.5,1436,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,au.anthropic.claude-haiku-4-5-20251001-v1:0,1.1,5.5,1436,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Azure AI,azure_ai/claude-haiku-4-5,1.0,5.0,1436,,AZURE_AI_API_KEY,128000,True,budget, +Anthropic,claude-haiku-4-5,1.0,5.0,1436,,ANTHROPIC_API_KEY,128000,True,budget, +Anthropic,claude-haiku-4-5-20251001,1.0,5.0,1436,,ANTHROPIC_API_KEY,128000,True,budget, +DeepInfra,deepinfra/deepseek-ai/DeepSeek-R1-0528,0.5,2.15,1436,,DEEPINFRA_API_KEY,0,False,none, +AWS Bedrock,eu.anthropic.claude-haiku-4-5-20251001-v1:0,1.1,5.5,1436,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Fireworks AI,fireworks_ai/accounts/fireworks/models/deepseek-r1-0528,3.0,8.0,1436,,FIREWORKS_AI_API_KEY,0,True,none, +Github Copilot,github_copilot/claude-haiku-4.5,0.0,0.0,1436,,,0,True,none, +AWS Bedrock,global.anthropic.claude-haiku-4-5-20251001-v1:0,1.0,5.0,1436,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Hyperbolic,hyperbolic/deepseek-ai/DeepSeek-R1-0528,0.25,0.25,1436,,HYPERBOLIC_API_KEY,0,True,none, +AWS Bedrock,jp.anthropic.claude-haiku-4-5-20251001-v1:0,1.1,5.5,1436,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Lambda AI,lambda_ai/deepseek-r1-0528,0.2,0.6,1436,,LAMBDA_API_KEY,0,True,effort, +Novita AI,novita/deepseek/deepseek-r1-0528,0.7,2.5,1436,,NOVITA_API_KEY,0,True,effort, +OpenRouter,openrouter/anthropic/claude-haiku-4.5,1.0,5.0,1436,,OPENROUTER_API_KEY,0,True,effort, +OpenRouter,openrouter/deepseek/deepseek-r1-0528,0.5,2.15,1436,,OPENROUTER_API_KEY,0,True,effort, +AWS Bedrock,us.anthropic.claude-haiku-4-5-20251001-v1:0,1.1,5.5,1436,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Vercel AI Gateway,vercel_ai_gateway/anthropic/claude-haiku-4.5,1.0,5.0,1436,,VERCEL_AI_GATEWAY_API_KEY,0,True,effort, +Google Vertex AI,vertex_ai/claude-haiku-4-5@20251001,1.0,5.0,1436,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Google Vertex AI,vertex_ai/deepseek-ai/deepseek-r1-0528-maas,1.35,5.4,1436,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +W&B Inference,wandb/deepseek-ai/DeepSeek-R1-0528,135000.0,540000.0,1436,,WANDB_API_KEY,0,False,none, +Azure AI,azure_ai/deepseek-v3.2,0.58,1.68,1431,,AZURE_AI_API_KEY,128000,True,budget, +DeepSeek,deepseek/deepseek-v3.2,0.28,0.4,1431,,DEEPSEEK_API_KEY,0,True,effort, +GMI Cloud,gmi/deepseek-ai/DeepSeek-V3.2,0.28,0.4,1431,,GMI_API_KEY,0,True,none, +Novita AI,novita/deepseek/deepseek-v3.2,0.269,0.4,1431,,NOVITA_API_KEY,0,True,effort, +OpenRouter,openrouter/deepseek/deepseek-v3.2,0.28,0.4,1431,,OPENROUTER_API_KEY,0,True,effort, +Google Vertex AI,vertex_ai/deepseek-ai/deepseek-v3.2-maas,0.56,1.68,1431,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +AWS Bedrock,bedrock/ap-northeast-1/minimax.minimax-m2.1,0.36,1.44,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/ap-south-1/minimax.minimax-m2.1,0.36,1.44,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/ap-southeast-3/minimax.minimax-m2.1,0.36,1.44,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/eu-central-1/minimax.minimax-m2.1,0.36,1.44,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/eu-north-1/minimax.minimax-m2.1,0.36,1.44,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/eu-south-1/minimax.minimax-m2.1,0.36,1.44,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/eu-west-1/minimax.minimax-m2.1,0.36,1.44,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/eu-west-2/minimax.minimax-m2.1,0.47,1.86,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/sa-east-1/minimax.minimax-m2.1,0.36,1.44,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/us-east-1/minimax.minimax-m2.1,0.3,1.2,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/us-east-2/minimax.minimax-m2.1,0.3,1.2,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,bedrock/us-west-2/minimax.minimax-m2.1,0.3,1.2,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +DeepInfra,deepinfra/deepseek-ai/DeepSeek-V3.1,0.27,1.0,1430,,DEEPINFRA_API_KEY,0,False,effort, +Fireworks AI,fireworks_ai/accounts/fireworks/models/minimax-m2,0.3,1.2,1430,,FIREWORKS_AI_API_KEY,0,False,none, +GMI Cloud,gmi/MiniMaxAI/MiniMax-M2.1,0.3,1.2,1430,,GMI_API_KEY,0,False,none, +AWS Bedrock,minimax.minimax-m2,0.3,1.2,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,False,none, +AWS Bedrock,minimax.minimax-m2.1,0.3,1.2,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +Novita AI,novita/deepseek/deepseek-v3.1,0.27,1.0,1430,,NOVITA_API_KEY,0,True,effort, +Replicate,replicate/deepseek-ai/deepseek-v3.1,0.672,2.016,1430,,REPLICATE_API_KEY,0,True,effort, +SambaNova,sambanova/DeepSeek-V3.1,3.0,4.5,1430,,SAMBANOVA_API_KEY,0,True,effort, +Together AI,together_ai/deepseek-ai/DeepSeek-V3.1,0.6,1.7,1430,,TOGETHERAI_API_KEY,0,True,effort, +Google Vertex AI,vertex_ai/deepseek-ai/deepseek-v3.1-maas,1.35,5.4,1430,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Google Vertex AI,vertex_ai/minimaxai/minimax-m2-maas,0.3,1.2,1430,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global +W&B Inference,wandb/deepseek-ai/DeepSeek-V3.1,55000.0,165000.0,1430,,WANDB_API_KEY,0,False,none, +DeepInfra,deepinfra/google/gemini-2.5-flash,0.3,2.5,1420,,DEEPINFRA_API_KEY,0,False,none, +Google Vertex AI,gemini-2.5-flash,0.3,2.5,1420,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Google Gemini,gemini/gemini-2.5-flash,0.3,2.5,1420,,GEMINI_API_KEY,0,True,effort, +OpenRouter,openrouter/google/gemini-2.5-flash,0.3,2.5,1420,,OPENROUTER_API_KEY,0,True,none, +Replicate,replicate/google/gemini-2.5-flash,2.5,2.5,1420,,REPLICATE_API_KEY,0,True,none, +Vercel AI Gateway,vercel_ai_gateway/google/gemini-2.5-flash,0.3,2.5,1420,,VERCEL_AI_GATEWAY_API_KEY,0,True,none, +Azure OpenAI,azure/gpt-4.5-preview,75.0,150.0,1419,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,none, +Azure OpenAI,azure/gpt-5-mini,0.25,2.0,1419,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, +Github Copilot,github_copilot/gpt-5-mini,0.0,0.0,1419,,,0,True,none, +OpenAI,gpt-4.5-preview,75.0,150.0,1419,,OPENAI_API_KEY,0,True,none, +OpenAI,gpt-5-mini,0.25,2.0,1419,,OPENAI_API_KEY,0,True,effort, +OpenRouter,openrouter/openai/gpt-5-mini,0.25,2.0,1419,,OPENROUTER_API_KEY,0,False,effort, +Replicate,replicate/openai/gpt-5-mini,0.25,2.0,1419,,REPLICATE_API_KEY,0,True,none, +DeepInfra,deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct,0.4,1.6,1406,,DEEPINFRA_API_KEY,0,False,none, +DeepInfra,deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo,0.29,1.2,1406,,DEEPINFRA_API_KEY,0,False,none, +Fireworks AI,fireworks_ai/accounts/fireworks/models/qwen3-coder-480b-a35b-instruct,0.45,1.8,1406,,FIREWORKS_AI_API_KEY,0,False,effort, +Novita AI,novita/qwen/qwen3-coder-480b-a35b-instruct,0.3,1.3,1406,,NOVITA_API_KEY,0,True,none, +AWS Bedrock,qwen.qwen3-coder-480b-a35b-v1:0,0.22,1.8,1406,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Together AI,together_ai/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8,2.0,2.0,1406,,TOGETHERAI_API_KEY,0,True,none, +Google Vertex AI,vertex_ai/qwen/qwen3-coder-480b-a35b-instruct-maas,1.0,4.0,1406,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global +W&B Inference,wandb/Qwen/Qwen3-Coder-480B-A35B-Instruct,100000.0,150000.0,1406,,WANDB_API_KEY,0,False,none, +AWS Bedrock,anthropic.claude-opus-4-20250514-v1:0,15.0,75.0,1405,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Anthropic,claude-4-opus-20250514,15.0,75.0,1405,,ANTHROPIC_API_KEY,128000,True,budget, +Anthropic,claude-opus-4-20250514,15.0,75.0,1405,,ANTHROPIC_API_KEY,128000,True,budget, +DeepInfra,deepinfra/anthropic/claude-4-opus,16.5,82.5,1405,,DEEPINFRA_API_KEY,0,False,none, +AWS Bedrock,eu.anthropic.claude-opus-4-20250514-v1:0,15.0,75.0,1405,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +GMI Cloud,gmi/anthropic/claude-opus-4,15.0,75.0,1405,,GMI_API_KEY,0,True,none, +OpenRouter,openrouter/anthropic/claude-opus-4,15.0,75.0,1405,,OPENROUTER_API_KEY,0,True,effort, +AWS Bedrock,us.anthropic.claude-opus-4-20250514-v1:0,15.0,75.0,1405,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +Vercel AI Gateway,vercel_ai_gateway/anthropic/claude-4-opus,15.0,75.0,1405,,VERCEL_AI_GATEWAY_API_KEY,0,True,none, +Vercel AI Gateway,vercel_ai_gateway/anthropic/claude-opus-4,15.0,75.0,1405,,VERCEL_AI_GATEWAY_API_KEY,0,True,effort, +Google Vertex AI,vertex_ai/claude-opus-4,15.0,75.0,1405,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Google Vertex AI,vertex_ai/claude-opus-4@20250514,15.0,75.0,1405,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Moonshot AI,moonshot/kimi-k2-0905-preview,0.6,2.5,1403,,MOONSHOT_API_KEY,0,True,none, +Novita AI,novita/moonshotai/kimi-k2-0905,0.6,2.5,1403,,NOVITA_API_KEY,0,True,none, +DeepInfra,deepinfra/moonshotai/Kimi-K2-Instruct,0.5,2.0,1402,,DEEPINFRA_API_KEY,0,False,none, +Fireworks AI,fireworks_ai/accounts/fireworks/models/kimi-k2-instruct,0.6,2.5,1402,,FIREWORKS_AI_API_KEY,0,True,none, +Hyperbolic,hyperbolic/moonshotai/Kimi-K2-Instruct,2.0,2.0,1402,,HYPERBOLIC_API_KEY,0,True,none, +Moonshot AI,moonshot/kimi-k2-0711-preview,0.6,2.5,1402,,MOONSHOT_API_KEY,0,True,none, +Novita AI,novita/moonshotai/kimi-k2-instruct,0.57,2.3,1402,,NOVITA_API_KEY,0,True,none, +Together AI,together_ai/moonshotai/Kimi-K2-Instruct,1.0,3.0,1402,,TOGETHERAI_API_KEY,0,True,none, +W&B Inference,wandb/moonshotai/Kimi-K2-Instruct,0.6,2.5,1402,,WANDB_API_KEY,0,False,none, diff --git a/pdd/docs/prompting_guide.md b/pdd/docs/prompting_guide.md index 80f7be992..b336cca34 100644 --- a/pdd/docs/prompting_guide.md +++ b/pdd/docs/prompting_guide.md @@ -195,10 +195,10 @@ Tip: Prefer small, named sections using XML‑style tags to make context scannab The PDD preprocessor supports additional XML‑style tags to keep prompts clean, reproducible, and self‑contained. Processing order (per spec) is: `pdd` → `include`/`include-many` → `shell` → `web`. When `recursive=True`, `` and `` are deferred until a non‑recursive pass. -- `` +- `` - Purpose: human‑only comment. Removed entirely during preprocessing. - Use: inline rationale or notes that should not reach the model. - - Example: `Before step X explain why we do this here` + - Example: `Before step X ` - `` - Purpose: run a shell command and inline stdout at that position. @@ -222,6 +222,194 @@ The PDD preprocessor supports additional XML‑style tags to keep prompts clean, Use these tags sparingly. When you must use them, prefer stable commands with bounded output (e.g., `head -n 20` in ``). +**`context_urls` in Architecture Entries:** + +When an architecture.json entry includes a `context_urls` array, the `generate_prompt` template automatically converts each entry into a `` tag in the generated prompt's Dependencies section. This enables the LLM to fetch relevant API documentation during code generation: + +```json +"context_urls": [ + {"url": "https://fastapi.tiangolo.com/tutorial/first-steps/", "purpose": "FastAPI routing patterns"} +] +``` + +Becomes in the generated prompt: +```xml + + https://fastapi.tiangolo.com/tutorial/first-steps/ + +``` + +The tag name is derived from the `purpose` field (lowercased, spaces replaced with underscores). This mechanism bridges architecture-level research with prompt-level context. + +--- + +## Architecture Metadata Tags + +PDD prompts can include optional XML metadata tags that sync with `architecture.json`. These tags enable bidirectional sync between prompt files and the architecture visualization, keeping your project's architecture documentation automatically up-to-date. + +### Tag Format + +Place architecture metadata tags at the **top of your prompt file** (after any `` directives but before the main content): + +```xml +Brief description of module's purpose (60-120 chars) + + +{{ + "type": "module", + "module": {{ + "functions": [ + {"name": "function_name", "signature": "(...)", "returns": "Type"} + ] + }} +}} + + +dependency_prompt_1.prompt +dependency_prompt_2.prompt +``` + +### Tag Reference + +**``** +- **Purpose**: One-line description of why this module exists +- **Maps to**: `architecture.json["reason"]` +- **Format**: Single line string (recommended 60-120 characters) +- **Example**: `Provides unified LLM invocation across all PDD operations.` + +**``** +- **Purpose**: JSON describing the module's public API (functions, commands, pages) +- **Maps to**: `architecture.json["interface"]` +- **Format**: Valid JSON matching one of four interface types (see below) +- **Example**: + ```xml + + {{ + "type": "module", + "module": {{ + "functions": [ + {"name": "llm_invoke", "signature": "(prompt, strength, ...)", "returns": "Dict"} + ] + }} + }} + + ``` + +**``** +- **Purpose**: References other prompt files this module depends on +- **Maps to**: `architecture.json["dependencies"]` array +- **Format**: Prompt filename (e.g., `llm_invoke_python.prompt`) +- **Multiple tags**: Use one `` tag per dependency +- **Example**: + ```xml + llm_invoke_python.prompt + path_resolution_python.prompt + ``` + +### Interface Types + +The `` tag supports four interface types, matching the architecture.json schema: + +**Module Interface** (Python modules with functions): +```json +{ + "type": "module", + "module": { + "functions": [ + {"name": "func_name", "signature": "(arg1, arg2)", "returns": "Type"} + ] + } +} +``` + +**CLI Interface** (Command-line interfaces): +```json +{ + "type": "cli", + "cli": { + "commands": [ + {"name": "cmd_name", "description": "What it does"} + ] + } +} +``` + +**Command Interface** (PDD commands): +```json +{ + "type": "command", + "command": { + "commands": [ + {"name": "cmd_name", "description": "What it does"} + ] + } +} +``` + +**Frontend Interface** (UI pages): +```json +{ + "type": "frontend", + "frontend": { + "pages": [ + {"name": "page_name", "route": "/path"} + ] + } +} +``` + +### Sync Workflow + +1. **Add/edit tags** in your prompt files using the format above +2. **Click "Sync from Prompt"** in the PDD Connect Architecture page (or call the API endpoint) +3. **Tags automatically update** `architecture.json` with your changes +4. **Architecture visualization** reflects the updated dependencies and interfaces + +Prompts are the **source of truth** - tags in prompt files override what's in `architecture.json`. This aligns with PDD's core philosophy that prompts, not code or documentation, are authoritative. + +### Validation + +Validation is **lenient**: +- Missing tags are OK - only fields with tags get updated +- Malformed XML/JSON is skipped without blocking sync +- Circular dependencies are detected and prevent invalid updates +- Missing dependency files generate warnings but don't block sync + +### Best Practices + +**Keep `` concise** (60-120 chars) +- Good: "Provides unified LLM invocation across all PDD operations." +- Too long: "This module exists because we needed a way to call different LLM providers through a unified interface that supports both streaming and non-streaming modes while also handling rate limiting and retry logic..." + +**Use prompt filenames for dependencies**, not module names +- Correct: `llm_invoke_python.prompt` +- Wrong: `pdd.llm_invoke` +- Wrong: `context/example.py` + +**Validate interface JSON before committing** +- Use a JSON validator to check syntax +- Ensure `type` field matches one of: `module`, `cli`, `command`, `frontend` +- Include required nested keys (`functions`, `commands`, or `pages`) + +**Run "Sync All" after bulk prompt updates** +- If you've edited multiple prompts, sync all at once +- Review the validation results for circular dependencies +- Fix any warnings before committing changes + +### Relationship to Other Tags + +**`` vs ``**: +- ``: Declares architectural dependency (updates `architecture.json`) +- ``: Injects content into prompt for LLM context (does NOT affect architecture) +- Use both when appropriate - they serve different purposes + +**`` tags vs ``: Human-only comments (removed by preprocessor, never reach LLM) +- Both are valid PDD directives with different purposes + +### Example: Complete Prompt with Metadata Tags + +See `docs/examples/prompt_with_metadata.prompt` for a full example showing all three metadata tags in context. + --- ## Advanced Tips @@ -544,7 +732,8 @@ Key practice: Code and examples are ephemeral (regenerated); Tests and Prompts a | Task Type | Where to Start | The Workflow | | :--- | :--- | :--- | | **New Feature** | **The Prompt** | 1. Add/Update Requirements in Prompt.
2. Regenerate Code (LLM sees existing tests).
3. Write new Tests to verify. | -| **Bug Fix** | **The Test File** | 1. Use `pdd bug` to create a failing test case (repro) in the Test file.
2. Clarify the Prompt to address the edge case if needed.
3. Run `pdd fix` (LLM sees the new test and must pass it). | +| **Bug Fix (Code)** | **The Test File** | 1. Use `pdd bug` to create a failing test case (repro) in the Test file.
2. Clarify the Prompt to address the edge case if needed.
3. Run `pdd fix` (LLM sees the new test and must pass it).
**Tip:** Use `pdd fix --protect-tests` if the tests from `pdd bug` are correct and you want to prevent the LLM from modifying them. | +| **Bug Fix (Prompt Defect)** | **The Prompt** | When `pdd bug` determines the prompt specification itself is wrong (Step 5.5), it auto-fixes the prompt file. The workflow then continues to generate tests based on the corrected prompt. | **Key insight:** When you run `pdd generate` after adding a test, the LLM sees that test as context. This means the generated code is constrained to pass it - the test acts as a specification, not just a verification. @@ -572,6 +761,31 @@ After a successful fix, ask: "Where should this knowledge live?" - "The code style was inconsistent" → Update preamble (not prompt) - "I prefer different variable names" → Update preamble/prompt +### Prompt Defects vs. Code Bugs + +In PDD, the prompt is the source of truth. However, prompts themselves can contain defects. The `pdd bug` agentic workflow (Step 5.5: Prompt Classification) distinguishes between two types of bugs: + +| Defect Type | Definition | Detection | Action | +|-------------|------------|-----------|--------| +| **Code Bug** | Code doesn't match the prompt specification | Tests fail because implementation diverges from requirements | Fix the code via `pdd fix` | +| **Prompt Defect** | Prompt doesn't match the intended behavior | User-reported expected behavior contradicts the prompt | Fix the prompt, then regenerate | + +**How Prompt Classification Works:** + +After root cause analysis (Step 5), the workflow examines whether: +1. The code correctly implements the prompt, but the prompt is wrong (→ Prompt Defect) +2. The code incorrectly implements the prompt (→ Code Bug) + +**Output markers** for automation: +- `DEFECT_TYPE: code` - Proceed with normal test generation +- `DEFECT_TYPE: prompt` - Auto-fix the prompt file first +- `PROMPT_FIXED: path/to/file.prompt` - Indicates which prompt was modified +- `PROMPT_REVIEW: reason` - Request human review for ambiguous cases + +**Default behavior:** When classification is uncertain, the workflow defaults to "code bug" to preserve backward compatibility. + +This classification prevents the "test oracle problem" - where tests generated from a flawed prompt would encode incorrect behavior, causing `pdd fix` to "fix" correct code to match the buggy specification. + --- ## PDD vs Interactive Agentic Coders (Claude Code, Cursor) @@ -690,4 +904,4 @@ Key differences: ## Final Notes -Think of prompts as your programming language. Keep them concise, explicit, and modular. Regenerate instead of patching, verify behavior with accumulating tests, and continuously back‑propagate implementation learnings into your prompts. That discipline is what converts maintenance from an endless patchwork into a compounding system of leverage. +Think of prompts as your programming language. Keep them concise, explicit, and modular. Regenerate instead of patching, verify behavior with accumulating tests, and continuously back‑propagate implementation learnings into your prompts. That discipline is what converts maintenance from an endless patchwork into a compounding system of leverage. \ No newline at end of file diff --git a/pdd/generate_model_catalog.py b/pdd/generate_model_catalog.py new file mode 100644 index 000000000..907d00725 --- /dev/null +++ b/pdd/generate_model_catalog.py @@ -0,0 +1,711 @@ +#!/usr/bin/env python3 +""" +scripts/generate_model_catalog.py + +Regenerates pdd/data/llm_model.csv from LiteLLM's bundled model registry. + +Usage: + python scripts/generate_model_catalog.py [--output PATH] + +The script pulls from litellm.model_cost (local data, no network calls) and: + - Filters to chat-mode models only + - Skips deprecated models + - Skips placeholder/tier entries (e.g. together-ai-4.1b-8b) + - Converts per-token costs to per-million-token costs + - Looks up display provider names and API key env var names + - Applies curated ELO scores for known models; skips models below ELO_CUTOFF + - Infers structured_output, reasoning_type, max_reasoning_tokens + - Sorts by ELO descending, then model name ascending + +Re-run this script whenever you update the litellm package to pick up new models. +""" + +from __future__ import annotations + +import argparse +import csv +import re +import sys +from datetime import date +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +# --------------------------------------------------------------------------- +# ELO cutoff — models below this score are excluded from the catalog. +# --------------------------------------------------------------------------- +ELO_CUTOFF = 1400 + +# --------------------------------------------------------------------------- +# ELO scores — canonical base model names mapped to coding arena ELO. +# All known models are listed here; ELO_CUTOFF controls which make the CSV. +# Keys are normalized base names (as produced by _extract_base_model). + +# Scores sourced from LM Arena *coding* leaderboard (Feb 2026): +# https://openlm.ai/chatbot-arena/ (Coding column) +# You should update these values every so often. +# --------------------------------------------------------------------------- +ELO_SCORES: Dict[str, int] = { + # ----------------------------------------------------------------------- + # Anthropic Claude — dash-separated canonical form + # ----------------------------------------------------------------------- + "claude-opus-4-6": 1530, + "claude-opus-4-5": 1496, + "claude-opus-4": 1405, + "claude-opus-4-1": 1475, + "claude-sonnet-4-6": 1485, + "claude-sonnet-4-5": 1464, + "claude-sonnet-4": 1384, + "claude-3-7-sonnet": 1341, + "claude-3-5-sonnet-20241022": 1340, + "claude-3-5-sonnet-20240620": 1309, + "claude-3-5-sonnet": 1340, + "claude-haiku-4-5": 1436, + "claude-3-5-haiku": 1287, + "claude-3-opus": 1269, + "claude-3-haiku": 1208, + "claude-3-sonnet": 1232, + # Dot-separated variants (OpenRouter, GitHub Copilot, Vercel, GMI) + "claude-opus-4.6": 1530, + "claude-opus-4.5": 1496, + "claude-opus-4.1": 1475, + "claude-sonnet-4.6": 1485, + "claude-sonnet-4.5": 1464, + "claude-haiku-4.5": 1436, + "claude-3.5-sonnet": 1340, + "claude-3.5-haiku": 1287, + "claude-3.7-sonnet": 1341, + # Alternate naming: "claude-4-opus" / "claude-4-sonnet" + "claude-4-opus": 1405, + "claude-4-sonnet": 1384, + # ----------------------------------------------------------------------- + # OpenAI — GPT-5 family + # ----------------------------------------------------------------------- + "gpt-5": 1460, + "gpt-5.1": 1450, + "gpt-5.2": 1465, + "gpt-5-mini": 1419, + "gpt-5-nano": 1363, + # OpenAI — GPT-4.x + "gpt-4.5": 1419, + "gpt-4.1": 1396, + "gpt-4.1-mini": 1370, + "gpt-4.1-nano": 1312, + "gpt-4o": 1307, + "gpt-4o-2024-08-06": 1307, + "gpt-4o-2024-11-20": 1307, + "gpt-4o-mini": 1300, + "gpt-4-turbo": 1280, + "gpt-4-0125-preview": 1261, + "gpt-4-1106-preview": 1269, + # OpenAI — o-series + "o3": 1441, + "o4-mini": 1385, + "o3-mini": 1361, + "o1": 1378, + "o1-mini": 1366, + "o1-preview": 1378, + # OpenAI — gpt-oss + "gpt-oss-120b": 1398, + "gpt-oss-20b": 1371, + # ----------------------------------------------------------------------- + # Google Gemini + # ----------------------------------------------------------------------- + "gemini-3-pro": 1501, + "gemini-3-pro-preview": 1501, + "gemini-3-flash": 1469, + "gemini-3-flash-preview": 1469, + "gemini-2.5-pro": 1465, + "gemini-2.5-flash": 1420, + "gemini-2.0-flash": 1371, + "gemini-2.0-flash-thinking": 1383, + "gemini-1.5-pro": 1311, + "gemini-1.5-flash": 1273, + # ----------------------------------------------------------------------- + # DeepSeek + # ----------------------------------------------------------------------- + "deepseek-r1": 1382, + "deepseek-r1-0528": 1436, + "deepseek-reasoner": 1382, + "deepseek-chat": 1337, + "deepseek-v3": 1337, + "deepseek-v3-0324": 1391, + "deepseek-v3.1": 1430, + "deepseek-v3.2": 1431, + # ----------------------------------------------------------------------- + # xAI / Grok + # ----------------------------------------------------------------------- + "grok-4.1": 1483, + "grok-4": 1453, + "grok-4-fast": 1441, + "grok-3": 1439, + "grok-3-mini": 1380, + "grok-2": 1298, + # ----------------------------------------------------------------------- + # Mistral + # ----------------------------------------------------------------------- + "mistral-large": 1450, + "mistral-large-3": 1450, + "mistral-medium-3": 1387, + "mistral-medium-3.1": 1412, + "magistral-medium": 1307, + "magistral-small": 1330, + "codestral": 1300, + "mistral-small-3.1": 1295, + "mistral-small-3.2": 1361, + "mistral-small-3": 1251, + # ----------------------------------------------------------------------- + # Moonshot / Kimi + # ----------------------------------------------------------------------- + "kimi-k2.5": 1480, + "kimi-k2-instruct": 1402, + "kimi-k2-thinking": 1450, + "kimi-k2-0905": 1403, + "kimi-k2-0711": 1402, + # ----------------------------------------------------------------------- + # Meta Llama + # ----------------------------------------------------------------------- + "llama-4-maverick-17b-128e": 1312, + "llama-4-scout-17b-16e": 1290, + "llama-3.3-70b": 1279, + "llama-3.1-405b": 1299, + "llama-3.1-70b": 1268, + "llama-3.1-8b": 1203, + "llama-3-70b": 1216, + # ----------------------------------------------------------------------- + # Qwen / Alibaba + # ----------------------------------------------------------------------- + "qwen3-max": 1468, + "qwen3-235b-a22b": 1394, + "qwen3-235b-a22b-instruct-2507": 1457, + "qwen3-235b-a22b-thinking-2507": 1442, + "qwen3-32b": 1376, + "qwen3-30b-a3b": 1346, + "qwen3-coder-480b-a35b": 1406, + "qwq-32b": 1351, + "qwen2.5-72b": 1302, + "qwen2.5-max": 1373, + # ----------------------------------------------------------------------- + # GLM (Zhipu AI / ZAI) + # ----------------------------------------------------------------------- + "glm-5": 1461, + "glm-4.7": 1460, + "glm-4.6": 1458, + "glm-4.5": 1448, + "glm-4.5-air": 1410, + # ----------------------------------------------------------------------- + # Minimax + # ----------------------------------------------------------------------- + "minimax-m2.1": 1430, + "minimax-m1": 1369, + "minimax-m2": 1430, + # ----------------------------------------------------------------------- + # Amazon Nova + # ----------------------------------------------------------------------- + "nova-pro": 1282, + "nova-lite": 1253, + "nova-micro": 1228, + # ----------------------------------------------------------------------- + # MiMo (Xiaomi) + # ----------------------------------------------------------------------- + "mimo-v2-flash": 1411, + # ----------------------------------------------------------------------- + # Gemma (Google open) + # ----------------------------------------------------------------------- + "gemma-3-27b": 1350, + "gemma-3-12b": 1310, + "gemma-3-4b": 1265, + # ----------------------------------------------------------------------- + # NVIDIA Nemotron + # ----------------------------------------------------------------------- + "llama-3.3-nemotron-super-49b": 1359, + "llama-3.1-nemotron-70b": 1289, + # ----------------------------------------------------------------------- + # Phi (Microsoft) + # ----------------------------------------------------------------------- + "phi-4": 1242, +} + +# --------------------------------------------------------------------------- +# Provider table — maps litellm provider ID to (display name, API key env var). +# --------------------------------------------------------------------------- +PROVIDERS: Dict[str, Tuple[str, str]] = { + "openai": ("OpenAI", "OPENAI_API_KEY"), + "anthropic": ("Anthropic", "ANTHROPIC_API_KEY"), + "gemini": ("Google Gemini", "GEMINI_API_KEY"), + "vertex_ai": ("Google Vertex AI", "GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION"), + "xai": ("xAI", "XAI_API_KEY"), + "deepseek": ("DeepSeek", "DEEPSEEK_API_KEY"), + "mistral": ("Mistral AI", "MISTRAL_API_KEY"), + "cohere": ("Cohere", "COHERE_API_KEY"), + "cohere_chat": ("Cohere", "COHERE_API_KEY"), + "moonshot": ("Moonshot AI", "MOONSHOT_API_KEY"), + "groq": ("Groq", "GROQ_API_KEY"), + "fireworks_ai": ("Fireworks AI", "FIREWORKS_AI_API_KEY"), + "together_ai": ("Together AI", "TOGETHERAI_API_KEY"), + "perplexity": ("Perplexity", "PERPLEXITYAI_API_KEY"), + "openrouter": ("OpenRouter", "OPENROUTER_API_KEY"), + "deepinfra": ("DeepInfra", "DEEPINFRA_API_KEY"), + "cerebras": ("Cerebras", "CEREBRAS_API_KEY"), + "replicate": ("Replicate", "REPLICATE_API_KEY"), + "anyscale": ("Anyscale", "ANYSCALE_API_KEY"), + "novita": ("Novita AI", "NOVITA_API_KEY"), + "sambanova": ("SambaNova", "SAMBANOVA_API_KEY"), + "nvidia_nim": ("NVIDIA NIM", "NVIDIA_NIM_API_KEY"), + "bedrock": ("AWS Bedrock", "AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME"), + "bedrock_converse": ("AWS Bedrock", "AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME"), + "sagemaker": ("AWS SageMaker", "AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME"), + "azure": ("Azure OpenAI", "AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION"), + "azure_ai": ("Azure AI", "AZURE_AI_API_KEY"), + "databricks": ("Databricks", "DATABRICKS_API_KEY"), + "watsonx": ("IBM watsonx", "WATSONX_APIKEY"), + "cloudflare": ("Cloudflare Workers AI", "CLOUDFLARE_API_KEY"), + "huggingface": ("Hugging Face", "HF_TOKEN"), + "ai21": ("AI21", "AI21_API_KEY"), + "nlp_cloud": ("NLP Cloud", "NLP_CLOUD_API_KEY"), + "aleph_alpha": ("Aleph Alpha", "ALEPHALPHA_API_KEY"), + "predibase": ("Predibase", "PREDIBASE_API_KEY"), + "friendliai": ("FriendliAI", "FRIENDLI_TOKEN"), + "github": ("GitHub Models", "GITHUB_API_KEY"), + "github_copilot": ("Github Copilot", ""), + "clarifai": ("Clarifai", "CLARIFAI_PAT"), + "voyage": ("Voyage", "VOYAGE_API_KEY"), + "codestral": ("Codestral", "CODESTRAL_API_KEY"), + "infinity": ("Infinity", "INFINITY_API_KEY"), + "nscale": ("Nscale", "NSCALE_API_KEY"), + "hyperbolic": ("Hyperbolic", "HYPERBOLIC_API_KEY"), + "lambda_ai": ("Lambda AI", "LAMBDA_API_KEY"), + "featherless_ai": ("Featherless AI", "FEATHERLESS_API_KEY"), + "gmi": ("GMI Cloud", "GMI_API_KEY"), + "wandb": ("W&B Inference", "WANDB_API_KEY"), + "vercel_ai_gateway": ("Vercel AI Gateway", "VERCEL_AI_GATEWAY_API_KEY"), + "ollama": ("Ollama", ""), + "ollama_chat": ("Ollama", ""), + "lm_studio": ("LM Studio", ""), +} + +# Anthropic provider IDs — these use "budget" reasoning +_ANTHROPIC_PROVIDERS = {"anthropic", "azure_ai"} # azure_ai hosts Claude models too + +# Model name patterns that signal reasoning (for providers not in the sets above) +_EFFORT_PATTERNS = re.compile( + r"o1|o3|o4|gemini.*thinking|deepseek.r1|deepseek.reasoner|" + r"qwen.*thinking|kimi.*thinking|magistral|" + r"gemini.*flash.*thinking", + re.IGNORECASE, +) + +# Placeholder tier entries in together_ai (not real model IDs) +_TIER_PATTERN = re.compile(r"^together-ai-[\d.]+b", re.IGNORECASE) + +# Models we never want in the catalog (sample spec, image-only, etc.) +_SKIP_KEYS = {"sample_spec"} + +# Regex matching dated preview model names (after provider prefix is stripped). +# Examples: gemini-2.5-flash-preview-04-17, gemini-2.5-pro-preview-06-05 +_DATED_PREVIEW = re.compile( + r"^(?Pgemini-[\d.]+-\w+)-preview-\d{2}-\d{2,4}$", + re.IGNORECASE, +) + +CSV_FIELDNAMES = [ + "provider", "model", "input", "output", "coding_arena_elo", + "base_url", "api_key", "max_reasoning_tokens", "structured_output", + "reasoning_type", "location", +] + +# --------------------------------------------------------------------------- +# Regex patterns for _extract_base_model() — stripping provider/region/version +# --------------------------------------------------------------------------- + +# Known provider prefixes (simple provider/rest format) +_SIMPLE_PREFIX_PROVIDERS = { + "vertex_ai", "azure_ai", "openrouter", "deepinfra", "together_ai", + "fireworks_ai", "vercel_ai_gateway", "github_copilot", "groq", + "cerebras", "hyperbolic", "novita", "sambanova", "replicate", + "lambda_ai", "nscale", "oci", "gmi", "wandb", "ovhcloud", + "llamagate", "gradient_ai", "moonshot", "snowflake", "heroku", + "publicai", "deepseek", "xai", "mistral", "gemini", "perplexity", + "cohere", "cohere_chat", "meta_llama", "dashscope", +} + +# Bedrock region paths: us-east-1/, ap-northeast-1/, us-gov-west-1/, etc. +# Also handles commitment and invoke prefixes. +_BEDROCK_REGION_PATH = re.compile( + r"^(?:[a-z]{2}-[a-z]+-\d+/)+" # one or more region segments + r"|^(?:\d+-month-commitment/)" + r"|^(?:invoke/)", + re.IGNORECASE, +) + +# Azure sub-region paths: eu/, global/, global-standard/ +_AZURE_REGION_PREFIX = re.compile( + r"^(?:eu|global-standard|global|us)/", + re.IGNORECASE, +) + +# Bedrock cross-region inference prefixes on bare IDs: us., eu., apac., au., jp., global. +_BEDROCK_GEO_PREFIX = re.compile( + r"^(?:us|eu|apac|ap|au|jp|global)\.", + re.IGNORECASE, +) + +# Vendor dot-namespace: anthropic., meta., moonshotai., deepseek., xai., etc. +# Used by Bedrock (anthropic.claude-*) and OCI (xai.grok-3, meta.llama-*) +_VENDOR_DOT_PREFIX = re.compile( + r"^(?:anthropic|meta|amazon|cohere|ai21|mistral|moonshotai|deepseek|" + r"qwen|minimax|nvidia|openai|google|writer|twelvelabs|zai|xai)\.", + re.IGNORECASE, +) + +# HuggingFace-style org namespaces used by deepinfra, together_ai, openrouter, etc. +_ORG_NAMESPACE = re.compile( + r"^(?:deepseek-ai|deepseek|meta-llama|meta|anthropic|google|openai|" + r"moonshotai|mistralai|qwen|Qwen|x-ai|xai|cohere|microsoft|" + r"allenai|NousResearch|nvidia|MiniMaxAI)/", + re.IGNORECASE, +) + +# Fireworks account path: accounts/fireworks/models/ (or any account) +_FIREWORKS_ACCOUNT = re.compile( + r"^accounts/[^/]+/models/", + re.IGNORECASE, +) + +# Anthropic fast/us routing prefixes on bare IDs +_FAST_PREFIX = re.compile(r"^(?:fast/us/|fast/|us/)", re.IGNORECASE) + +# Vertex AI @version suffix: @20241022, @default, @001, @latest +_VERTEX_VERSION = re.compile(r"@[\w.-]+$") + +# Bedrock version suffix: -v1:0, -v2:0, :0 +_BEDROCK_VERSION = re.compile(r"(?:-v\d+:\d+|:\d+)$") + +# Special mapping for Bedrock deepseek after vendor prefix is stripped +# e.g. deepseek.v3.2 -> strips to "v3.2" or "v3-v1:0" -> "v3" +_BEDROCK_DEEPSEEK_REMAP: Dict[str, str] = { + "v3": "deepseek-v3", + "v3.2": "deepseek-v3", + "r1": "deepseek-r1", +} + +# Safe remainder patterns after a canonical prefix match. +# Only accept: empty, date suffixes (-20241022), version tags (-v1, -v2), +# preview/latest tags, or @version. +# This REJECTS things like -distill-*, -turbo, -mini, -fast. +_SAFE_REMAINDER = re.compile( + r"^(?:" + r"-\d{8}" # -20241022 (8-digit date) + r"|-v\d+" # -v1, -v2 + r"|-preview" # -preview + r"|-latest" # -latest + r"|-instruct" # -instruct (same weights, just instruction-tuned name) + r"|-versatile" # -versatile (Groq naming for same model) + r"|-\d{4}(?:0[1-9]|1[0-2])\d{2}" # -YYYYMMDD compact + r")(?:$|[-@])", # must be end-of-string or followed by another suffix + re.IGNORECASE, +) + + +def _extract_base_model(model_id: str) -> Optional[str]: + """ + Extract a canonical base model name from a litellm model ID by stripping + provider prefixes, regions, vendor namespaces, and version suffixes. + + Returns a key matching ELO_SCORES if confident, or None if the model + cannot be safely identified (conservative — prefers returning None over + a wrong match). + """ + s = model_id.strip() + + # Step 1: Strip known provider prefix + slash_pos = s.find("/") + if slash_pos > 0: + prefix = s[:slash_pos] + if prefix in _SIMPLE_PREFIX_PROVIDERS: + s = s[slash_pos + 1:] + # Azure AI and some providers also have region sub-paths (global/, etc.) + s = _AZURE_REGION_PREFIX.sub("", s) + elif prefix == "bedrock" or prefix == "bedrock_converse": + s = s[slash_pos + 1:] + # Strip region paths (may be multiple segments) + while _BEDROCK_REGION_PATH.match(s): + s = _BEDROCK_REGION_PATH.sub("", s, count=1) + elif prefix == "azure": + s = s[slash_pos + 1:] + s = _AZURE_REGION_PREFIX.sub("", s) + elif prefix == "openai": + s = s[slash_pos + 1:] + + # Step 2: Strip fast/us routing prefixes (bare Anthropic IDs) + s = _FAST_PREFIX.sub("", s) + + # Step 3: Strip Bedrock cross-region geo prefixes (us., eu., apac., etc.) + s = _BEDROCK_GEO_PREFIX.sub("", s) + + # Step 4: Strip vendor dot-namespace (anthropic., meta., moonshotai., xai., etc.) + # Only when there's no slash left (to avoid mangling org/model paths) + if "/" not in s and "." in s: + m = _VENDOR_DOT_PREFIX.match(s) + if m: + s = s[m.end():] + + # Step 5: Strip HuggingFace-style org namespace (deepseek-ai/, meta-llama/, etc.) + s = _ORG_NAMESPACE.sub("", s) + + # Step 6: Strip Fireworks account path + s = _FIREWORKS_ACCOUNT.sub("", s) + + # Step 7: Strip Vertex AI @version suffix + s = _VERTEX_VERSION.sub("", s) + + # Step 8: Strip Bedrock version suffix (-v1:0, :0) + s = _BEDROCK_VERSION.sub("", s) + + # Step 9: Lowercase for matching + s = s.lower() + + # Step 10: Handle Bedrock deepseek special naming (vendor-stripped leftovers) + if s in _BEDROCK_DEEPSEEK_REMAP: + s = _BEDROCK_DEEPSEEK_REMAP[s] + + # Step 11: Strip trailing -maas suffix (Vertex AI model-as-a-service) + if s.endswith("-maas"): + s = s[:-5] + + # Step 12: Exact match + if s in ELO_SCORES: + return s + + # Step 13: Longest-prefix match against ELO_SCORES keys. + # Sorted longest-first to prefer more specific matches + # (e.g. "claude-opus-4-1" over "claude-opus-4"). + for key in sorted(ELO_SCORES, key=len, reverse=True): + if s.startswith(key): + remainder = s[len(key):] + if not remainder: + return key + if _SAFE_REMAINDER.match(remainder): + return key + + return None + + +def _get_provider_root(litellm_provider: str) -> str: + """Return the root provider for compound provider strings like vertex_ai-anthropic_models.""" + return litellm_provider.split("-")[0].split("_models")[0] + + +def _infer_reasoning_type(model_id: str, litellm_provider: str, entry: dict) -> str: + supports_reasoning = entry.get("supports_reasoning", False) + if not supports_reasoning: + return "none" + root = _get_provider_root(litellm_provider) + # Anthropic (and Azure AI hosting Claude) use "budget" reasoning tokens + if root in _ANTHROPIC_PROVIDERS: + return "budget" + # All other providers use "effort" (low/medium/high string) + return "effort" + + +def _infer_max_reasoning_tokens(model_id: str, litellm_provider: str, entry: dict) -> int: + root = _get_provider_root(litellm_provider) + if not entry.get("supports_reasoning", False): + return 0 + if root in _ANTHROPIC_PROVIDERS: + return 128000 + return 0 + + +def _is_deprecated(entry: dict) -> bool: + dep = entry.get("deprecation_date") + if not dep or not isinstance(dep, str): + return False + try: + dep_date = date.fromisoformat(dep) + return dep_date <= date.today() + except ValueError: + return False + + +def _is_placeholder(model_id: str) -> bool: + """Filter out non-usable placeholder entries.""" + if model_id in _SKIP_KEYS: + return True + if _TIER_PATTERN.match(model_id): + return True + return False + + +def _is_superseded_preview(model_id: str, all_model_ids: set) -> bool: + """Return True if this is a dated Gemini preview whose stable GA version exists. + + Google routinely sunsets dated preview models (e.g. gemini-2.5-flash-preview-04-17) + once the stable GA version (gemini-2.5-flash) is available, but litellm's registry + often retains them without a deprecation_date. We skip these to avoid catalog + entries that fail at call time with a 404. + + The check is applied to both bare IDs (gemini-2.5-flash-preview-04-17) and + provider-prefixed IDs (gemini/gemini-2.5-flash-preview-04-17) — we strip the + provider prefix before matching. + """ + # Strip simple provider prefix (e.g. "gemini/", "vertex_ai/") + bare = model_id + slash = bare.find("/") + if slash > 0: + bare = bare[slash + 1:] + + m = _DATED_PREVIEW.match(bare) + if not m: + return False + + ga_name = m.group("base") # e.g. "gemini-2.5-flash" + + # Check whether the stable GA version exists in litellm's registry + # (either bare or under common provider prefixes) + if ga_name in all_model_ids: + return True + if f"gemini/{ga_name}" in all_model_ids: + return True + + return False + + +def _get_elo(model_id: str) -> int: + """Look up ELO for a model. + + Lookup order (stops at first hit): + 1. Exact match in ELO_SCORES + 2. _extract_base_model() -> ELO_SCORES lookup + 3. Return 0 + """ + if model_id in ELO_SCORES: + return ELO_SCORES[model_id] + canonical = _extract_base_model(model_id) + if canonical is not None: + return ELO_SCORES[canonical] + return 0 + + +def build_rows() -> List[dict]: + try: + import litellm + except ImportError: + print("ERROR: litellm is not installed. Run: pip install litellm", file=sys.stderr) + sys.exit(1) + + all_model_ids = set(litellm.model_cost.keys()) + rows = [] + skipped_previews = 0 + + for model_id, entry in litellm.model_cost.items(): + # Only chat mode + if entry.get("mode") != "chat": + continue + # Skip deprecated + if _is_deprecated(entry): + continue + # Skip placeholder/tier entries + if _is_placeholder(model_id): + continue + # Skip dated preview models superseded by a stable GA release + if _is_superseded_preview(model_id, all_model_ids): + skipped_previews += 1 + continue + # Skip models that cannot produce text output (e.g. TTS / audio-only) + output_modalities = entry.get("supported_output_modalities", []) + if output_modalities and "text" not in output_modalities: + continue + + litellm_provider: str = entry.get("litellm_provider", "") + root_provider = _get_provider_root(litellm_provider) + + # ELO — skip models below cutoff or with no known score + elo = _get_elo(model_id) + if elo < ELO_CUTOFF: + continue + + # Convert per-token costs to per-million + in_cost_token = entry.get("input_cost_per_token") or 0.0 + out_cost_token = entry.get("output_cost_per_token") or 0.0 + input_cost = round(in_cost_token * 1_000_000, 6) + output_cost = round(out_cost_token * 1_000_000, 6) + + # Provider display name and API key env var + display_name, api_key = PROVIDERS.get( + litellm_provider, + PROVIDERS.get( + root_provider, + (litellm_provider.replace("_", " ").title(), f"{root_provider.upper()}_API_KEY"), + ), + ) + + # Structured output + structured = bool( + entry.get("supports_function_calling") or + entry.get("supports_response_schema") + ) + + # Reasoning + reasoning_type = _infer_reasoning_type(model_id, litellm_provider, entry) + max_reasoning_tokens = _infer_max_reasoning_tokens(model_id, litellm_provider, entry) + + # Location (Vertex AI models default to global) + location = "global" if litellm_provider.startswith("vertex_ai") else "" + + rows.append({ + "provider": display_name, + "model": model_id, + "input": input_cost, + "output": output_cost, + "coding_arena_elo": elo, + "base_url": "", + "api_key": api_key, + "max_reasoning_tokens": max_reasoning_tokens, + "structured_output": structured, + "reasoning_type": reasoning_type, + "location": location, + }) + + if skipped_previews: + print(f" Skipped {skipped_previews} dated preview model(s) superseded by stable GA releases.") + + # Sort: ELO descending, then model name ascending + rows.sort(key=lambda r: (-r["coding_arena_elo"], r["model"])) + return rows + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + default_output = Path(__file__).parent.parent / "pdd" / "data" / "llm_model.csv" + parser.add_argument( + "--output", "-o", + type=Path, + default=default_output, + help=f"Output CSV path (default: {default_output})", + ) + args = parser.parse_args() + + output_path: Path = args.output + output_path.parent.mkdir(parents=True, exist_ok=True) + + print("Building model catalog from litellm.model_cost...") + rows = build_rows() + print(f" Found {len(rows)} chat models across all providers.") + + with open(output_path, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=CSV_FIELDNAMES) + writer.writeheader() + writer.writerows(rows) + + print(f" Written to: {output_path}") + + # Print a quick summary by provider + from collections import Counter + providers = Counter(r["provider"] for r in rows) + print("\nTop providers by model count:") + for provider, count in providers.most_common(20): + print(f" {provider}: {count}") + + +if __name__ == "__main__": + main() diff --git a/pdd/llm_invoke.py b/pdd/llm_invoke.py index ad374f463..5d98c0969 100644 --- a/pdd/llm_invoke.py +++ b/pdd/llm_invoke.py @@ -1166,13 +1166,53 @@ def _save_key_to_env_file(key_name: str, value: str, env_path: Path) -> None: def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, bool], verbose: bool) -> bool: - """Checks for API key in env, prompts user if missing, and updates .env.""" - key_name = model_info.get('api_key') + """Checks for API key(s) in env, prompts user if missing, and updates .env. - if not key_name or key_name == "EXISTING_KEY": + Supports pipe-delimited api_key fields (e.g. ``VAR1|VAR2|VAR3``). + - Empty field → no auth needed (device flow / local model), always True. + - Single var → existing interactive-prompt behaviour for simple providers. + - Multi var → checks all vars; if any missing, directs user to ``pdd setup``. + """ + from pdd.provider_manager import parse_api_key_vars + + api_key_field = str(model_info.get('api_key', '') or '') + + if not api_key_field.strip() or api_key_field == "EXISTING_KEY": if verbose: - logger.info(f"Skipping API key check for model {model_info.get('model')} (key name: {key_name})") - return True # Assume key is handled elsewhere or not needed + logger.info(f"Skipping API key check for model {model_info.get('model')} (key field: {api_key_field!r})") + return True # Device flow, local model, or handled elsewhere + + env_vars = parse_api_key_vars(api_key_field) + + # --- Multi-credential provider (pipe-delimited) --- + if len(env_vars) > 1: + missing = [v for v in env_vars if not os.getenv(v)] + if not missing: + if verbose: + logger.info(f"All {len(env_vars)} env vars set for model {model_info.get('model')}.") + newly_acquired_keys[api_key_field] = False + return True + + # Vertex AI ADC fallback: GOOGLE_APPLICATION_CREDENTIALS may be unset + # if the user ran ``gcloud auth application-default login`` instead. + if "GOOGLE_APPLICATION_CREDENTIALS" in env_vars and "GOOGLE_APPLICATION_CREDENTIALS" in missing: + project = os.getenv("VERTEXAI_PROJECT") or os.getenv("GOOGLE_CLOUD_PROJECT") + if project: + remaining = [v for v in missing if v != "GOOGLE_APPLICATION_CREDENTIALS"] + if not remaining: + logger.info(f"Using ADC for Vertex AI (project={project}).") + newly_acquired_keys[api_key_field] = False + return True + + logger.warning( + f"Multi-credential provider for model '{model_info.get('model')}' " + f"is missing env vars: {', '.join(missing)}. " + f"Run 'pdd setup' to configure." + ) + return False + + # --- Single-credential provider (original behaviour) --- + key_name = env_vars[0] key_value = os.getenv(key_name) if key_value: @@ -1181,58 +1221,50 @@ def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, b if key_value: if verbose: logger.info(f"API key '{key_name}' found in environment.") - newly_acquired_keys[key_name] = False # Mark as existing + newly_acquired_keys[key_name] = False # Mark as existing return True - else: - # For Vertex AI, allow ADC when project is available - if key_name == 'VERTEX_CREDENTIALS': - vertex_project = os.getenv("VERTEX_PROJECT") or os.getenv("GOOGLE_CLOUD_PROJECT") - if vertex_project: - logger.info(f"VERTEX_CREDENTIALS not set; using ADC (project={vertex_project}).") - newly_acquired_keys[key_name] = False - return True - logger.warning(f"API key environment variable '{key_name}' for model '{model_info.get('model')}' is not set.") + logger.warning(f"API key environment variable '{key_name}' for model '{model_info.get('model')}' is not set.") - # Skip prompting if --force flag is set (non-interactive mode) - if os.environ.get('PDD_FORCE'): - logger.error(f"API key '{key_name}' not set. In --force mode, skipping interactive prompt.") + # Skip prompting if --force flag is set (non-interactive mode) + if os.environ.get('PDD_FORCE'): + logger.error(f"API key '{key_name}' not set. In --force mode, skipping interactive prompt.") + return False + + try: + # Interactive prompt + user_provided_key = input(f"Please enter the API key for {key_name}: ").strip() + if not user_provided_key: + logger.error("No API key provided. Cannot proceed with this model.") return False - try: - # Interactive prompt - user_provided_key = input(f"Please enter the API key for {key_name}: ").strip() - if not user_provided_key: - logger.error("No API key provided. Cannot proceed with this model.") - return False - - # Sanitize the user-provided key - user_provided_key = _sanitize_api_key(user_provided_key) - - # Set environment variable for the current process - os.environ[key_name] = user_provided_key - logger.info(f"API key '{key_name}' set for the current session.") - newly_acquired_keys[key_name] = True # Mark as newly acquired + # Sanitize the user-provided key + user_provided_key = _sanitize_api_key(user_provided_key) - # Update .env file - try: - _save_key_to_env_file(key_name, user_provided_key, ENV_PATH) - logger.info(f"API key '{key_name}' saved to {ENV_PATH}.") - logger.warning("SECURITY WARNING: The API key has been saved to your .env file. " - "Ensure this file is kept secure and is included in your .gitignore.") + # Set environment variable for the current process + os.environ[key_name] = user_provided_key + logger.info(f"API key '{key_name}' set for the current session.") + newly_acquired_keys[key_name] = True # Mark as newly acquired - except IOError as e: - logger.error(f"Failed to update .env file at {ENV_PATH}: {e}") - # Continue since the key is set in the environment for this session + # Update .env file + try: + _save_key_to_env_file(key_name, user_provided_key, ENV_PATH) + logger.info(f"API key '{key_name}' saved to {ENV_PATH}.") + logger.warning("SECURITY WARNING: The API key has been saved to your .env file. " + "Ensure this file is kept secure and is included in your .gitignore.") - return True + except IOError as e: + logger.error(f"Failed to update .env file at {ENV_PATH}: {e}") + # Continue since the key is set in the environment for this session - except EOFError: # Handle non-interactive environments - logger.error(f"Cannot prompt for API key '{key_name}' in a non-interactive environment.") - return False - except Exception as e: - logger.error(f"An unexpected error occurred during API key acquisition: {e}") - return False + return True + + except EOFError: # Handle non-interactive environments + logger.error(f"Cannot prompt for API key '{key_name}' in a non-interactive environment.") + return False + except Exception as e: + logger.error(f"An unexpected error occurred during API key acquisition: {e}") + return False def _format_messages(prompt: str, input_data: Union[Dict[str, Any], List[Dict[str, Any]]], use_batch_mode: bool) -> Union[List[Dict[str, str]], List[List[Dict[str, str]]]]: @@ -1910,83 +1942,35 @@ def calc_strength(candidate): "num_retries": 2, } - api_key_name_from_csv = model_info.get('api_key') # From CSV - # Determine if it's a Vertex AI model for special handling - is_vertex_model = (provider.lower() == 'google') or \ - (provider.lower() == 'googlevertexai') or \ - (provider.lower() == 'vertex_ai') or \ - model_name_litellm.startswith('vertex_ai/') - - if is_vertex_model and api_key_name_from_csv == 'VERTEX_CREDENTIALS': - vertex_project_env = os.getenv("VERTEX_PROJECT") - # Resolve location: CSV override → env var fallback - model_location = model_info.get('location') - if pd.notna(model_location) and str(model_location).strip(): - vertex_location_env = str(model_location).strip() - if verbose: - logger.info(f"[INFO] Using per-model location override: '{vertex_location_env}' for model '{model_name_litellm}'") - else: - vertex_location_env = os.getenv("VERTEX_LOCATION") - - if vertex_project_env and vertex_location_env: - litellm_kwargs["vertex_project"] = vertex_project_env - litellm_kwargs["vertex_location"] = vertex_location_env - # Optionally load explicit credentials file - credentials_file_path = os.getenv("VERTEX_CREDENTIALS") - if credentials_file_path: - try: - with open(credentials_file_path, 'r') as f: - loaded_credentials = json.load(f) - litellm_kwargs["vertex_credentials"] = json.dumps(loaded_credentials) - if verbose: - logger.info(f"[INFO] For Vertex AI: using vertex_credentials from '{credentials_file_path}', project '{vertex_project_env}', location '{vertex_location_env}'.") - except (FileNotFoundError, json.JSONDecodeError) as e: - if verbose: - logger.info(f"[INFO] No credentials file ({e}); using ADC.") - except Exception as e: - if verbose: - logger.error(f"[ERROR] Failed to load Vertex credentials from '{credentials_file_path}': {e}. Using ADC.") - elif verbose: - logger.info(f"[INFO] Using ADC for Vertex AI (project={vertex_project_env}, location={vertex_location_env})") - else: - if verbose: - logger.warning(f"[WARN] Missing VERTEX_PROJECT or VERTEX_LOCATION for {model_name_litellm}.") - if not vertex_project_env: logger.warning(f" Reason: VERTEX_PROJECT env var not set or empty.") - if not vertex_location_env: logger.warning(f" Reason: VERTEX_LOCATION env var not set or empty.") - logger.warning(f" LiteLLM may attempt to use Application Default Credentials or the call may fail.") + # --- Resolve API key / credentials --- + # The CSV api_key field may be: + # - Single env var (e.g. "ANTHROPIC_API_KEY") → pass as api_key= + # - Pipe-delimited (e.g. "VAR1|VAR2|VAR3") → litellm reads from env + # - Empty (device flow / local) → no api_key needed + from pdd.provider_manager import parse_api_key_vars - elif api_key_name_from_csv: # For other api_key_names specified in CSV (e.g., OPENAI_API_KEY, or a direct VERTEX_AI_API_KEY string) - key_value = os.getenv(api_key_name_from_csv) + api_key_field = str(model_info.get('api_key', '') or '') + env_vars = parse_api_key_vars(api_key_field) + + if len(env_vars) == 1: + # Simple provider: pass env var value as api_key= + key_value = os.getenv(env_vars[0]) if key_value: key_value = _sanitize_api_key(key_value) litellm_kwargs["api_key"] = key_value if verbose: - logger.info(f"[INFO] Explicitly passing API key from env var '{api_key_name_from_csv}' as 'api_key' parameter to LiteLLM.") - - # If this model is Vertex AI AND uses a direct API key string (not VERTEX_CREDENTIALS from CSV), - # also pass project and location from env vars. - if is_vertex_model: - vertex_project_env = os.getenv("VERTEX_PROJECT") - # Check for per-model location override, fall back to env var - model_location = model_info.get('location') - if pd.notna(model_location) and str(model_location).strip(): - vertex_location_env = str(model_location).strip() - if verbose: - logger.info(f"[INFO] Using per-model location override: '{vertex_location_env}' for model '{model_name_litellm}'") - else: - vertex_location_env = os.getenv("VERTEX_LOCATION") - if vertex_project_env and vertex_location_env: - litellm_kwargs["vertex_project"] = vertex_project_env - litellm_kwargs["vertex_location"] = vertex_location_env - if verbose: - logger.info(f"[INFO] For Vertex AI model (using direct API key '{api_key_name_from_csv}'), also passing vertex_project='{vertex_project_env}' and vertex_location='{vertex_location_env}' from env vars.") - elif verbose: - logger.warning(f"[WARN] For Vertex AI model (using direct API key '{api_key_name_from_csv}'), VERTEX_PROJECT or VERTEX_LOCATION env vars not set. This might be required by LiteLLM.") - elif verbose: # api_key_name_from_csv was in CSV, but corresponding env var was not set/empty - logger.warning(f"[WARN] API key name '{api_key_name_from_csv}' found in CSV, but the environment variable '{api_key_name_from_csv}' is not set or empty. LiteLLM will use default authentication if applicable (e.g., other standard env vars or ADC).") - - elif verbose: # No api_key_name_from_csv in CSV for this model - logger.info(f"[INFO] No API key name specified in CSV for model '{model_name_litellm}'. LiteLLM will use its default authentication mechanisms (e.g., standard provider env vars or ADC for Vertex AI).") + logger.info(f"[INFO] Passing API key from '{env_vars[0]}' to LiteLLM.") + elif verbose: + logger.warning(f"[WARN] Env var '{env_vars[0]}' not set. LiteLLM will use default auth.") + elif len(env_vars) > 1: + # Multi-credential provider (Bedrock, Azure, Vertex AI, etc.) + # litellm reads these env vars from os.environ automatically. + if verbose: + logger.info(f"[INFO] Multi-credential provider; litellm reads env vars: {env_vars}") + else: + # Empty api_key — device flow (GitHub Copilot) or local model + if verbose: + logger.info(f"[INFO] No API key for '{model_name_litellm}'; using device flow or default auth.") # Add base_url/api_base override if present in CSV api_base = model_info.get('base_url') @@ -2412,10 +2396,9 @@ def calc_strength(candidate): logger.info(f"[SUCCESS] Invocation successful for {model_name_litellm} (took {end_time - start_time:.2f}s)") # Build retry kwargs with provider credentials from litellm_kwargs - # Issue #185: Retry calls were missing vertex_location, vertex_project, etc. retry_provider_kwargs = {k: v for k, v in litellm_kwargs.items() - if k in ('vertex_credentials', 'vertex_project', 'vertex_location', - 'api_key', 'base_url', 'api_base')} + if k in ('api_key', 'base_url', 'api_base', + 'api_version')} # --- 7. Process Response --- results = [] diff --git a/pdd/model_tester.py b/pdd/model_tester.py new file mode 100644 index 000000000..993cb92f4 --- /dev/null +++ b/pdd/model_tester.py @@ -0,0 +1,420 @@ +from __future__ import annotations + +import os +import sys +import threading +import time as time_module +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +import pandas as pd +from rich.console import Console + +from rich.table import Table + +console = Console() + + +def _load_user_csv() -> Optional[pd.DataFrame]: + """Load the user's LLM model CSV from ~/.pdd/llm_model.csv. + + Returns: + DataFrame with model data, or None if file doesn't exist or is empty. + """ + csv_path = Path.home() / ".pdd" / "llm_model.csv" + if not csv_path.is_file(): + return None + + try: + df = pd.read_csv(csv_path) + except Exception as e: + console.print(f"[red]Failed to read {csv_path}: {e}[/red]") + return None + + if df.empty: + return None + + # Ensure expected columns exist + required_cols = {"provider", "model", "api_key"} + missing = required_cols - set(df.columns) + if missing: + console.print(f"[red]CSV is missing required columns: {missing}[/red]") + return None + + # Normalise nullable string columns + for col in ("api_key", "base_url", "location"): + if col in df.columns: + df[col] = df[col].fillna("").astype(str) + + # Normalise numeric cost columns + for col in ("input", "output"): + if col in df.columns: + df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0.0) + + return df + + +def _resolve_api_key(row: Dict[str, Any]) -> Tuple[Optional[str], str]: + """Resolve the API key for a model row. + + Returns: + (key_value_or_none, status_string) + status_string is a human-readable description like '✓ Found (OPENAI_API_KEY)'. + """ + key_name: str = str(row.get("api_key", "")).strip() + + # No env var configured — litellm will use its own defaults + if not key_name: + return None, "(no key configured)" + + # Check environment + key_value = os.getenv(key_name, "") + if key_value: + return key_value.strip(), f"✓ Found ({key_name})" + + # Check if a .env file might have it (dotenv may not be loaded yet) + try: + from dotenv import dotenv_values + + env_path = Path.home() / ".pdd" / ".env" + if not env_path.is_file(): + env_path = Path.cwd() / ".env" + if env_path.is_file(): + vals = dotenv_values(env_path) + val = vals.get(key_name, "") + if val: + return val.strip(), f"✓ Found ({key_name} via .env)" + except ImportError: + pass + + return None, f"✗ Not found ({key_name})" + + +def _resolve_base_url(row: Dict[str, Any]) -> Optional[str]: + """Return the base_url for the model, if any.""" + base_url: str = str(row.get("base_url", "")).strip() + if base_url: + return base_url + + # LM Studio convention + model_name = str(row.get("model", "")).lower() + provider = str(row.get("provider", "")).lower() + if model_name.startswith("lm_studio/") or provider == "lm_studio": + return os.getenv("LM_STUDIO_API_BASE", "http://localhost:1234/v1") + + return None + + +def _resolve_provider_auth(row: Dict[str, Any]) -> List[Tuple[str, str, bool]]: + """Resolve all auth-related env vars for a model row. + + Returns a list of (label, status_string, is_ok) tuples. + Driven by the CSV api_key field (pipe-delimited for multi-credential providers). + """ + from pdd.provider_manager import parse_api_key_vars + + api_key_field = str(row.get("api_key", "")).strip() + env_vars = parse_api_key_vars(api_key_field) + + if not env_vars: + # Empty api_key — device flow (e.g. GitHub Copilot) or local model + return [("Auth", "Device flow / no key needed", True)] + + results: List[Tuple[str, str, bool]] = [] + for var in env_vars: + value = os.getenv(var, "") + if value: + # Extra validation for credential file paths + if var == "GOOGLE_APPLICATION_CREDENTIALS" and not Path(value).is_file(): + results.append((var, f"⚠ Path set but file not found ({var})", False)) + else: + results.append((var, f"✓ Found ({var})", True)) + else: + results.append((var, f"✗ Not found ({var})", False)) + + return results + + +def _calculate_cost( + prompt_tokens: int, + completion_tokens: int, + input_price_per_m: float, + output_price_per_m: float, +) -> float: + """Calculate cost from token counts and per-million-token prices.""" + return (prompt_tokens * input_price_per_m + completion_tokens * output_price_per_m) / 1_000_000.0 + + +def _classify_error(exc: Exception) -> str: + """Return a concise, user-friendly error description.""" + msg = str(exc).lower() + exc_type = type(exc).__name__ + + # Authentication errors + if "authentication" in msg or "401" in msg or "403" in msg or "invalid api key" in msg: + return f"Authentication error — check your API key ({exc_type})" + + # Connection refused (typically local servers) + if "connection refused" in msg or "connect" in msg and "refused" in msg: + return f"Connection refused — is the local server running? ({exc_type})" + + # Model not found + if "not found" in msg or "404" in msg or "does not exist" in msg: + return f"Model not found — check the model name ({exc_type})" + + # Timeout + if "timeout" in msg or "timed out" in msg: + return f"Request timed out ({exc_type})" + + # Rate limit + if "rate" in msg and "limit" in msg or "429" in msg: + return f"Rate limited — try again later ({exc_type})" + + # Generic + return f"{exc_type}: {exc}" + + +def _run_test(row: Dict[str, Any]) -> Dict[str, Any]: + """Run a single litellm.completion() test against the given model row. + + Returns a dict with keys: success, duration_s, cost, error, tokens. + """ + import litellm + from pdd.provider_manager import parse_api_key_vars + + model_name: str = str(row.get("model", "")) + base_url = _resolve_base_url(row) + + kwargs: Dict[str, Any] = { + "model": model_name, + "messages": [{"role": "user", "content": "Say OK"}], + "timeout": 8, + } + + # Resolve API key using the pipe-delimited convention: + # Single var → pass as api_key= + # Multi var → litellm reads from os.environ (don't pass api_key=) + # Empty → device flow / local (don't pass api_key=) + api_key_field = str(row.get("api_key", "")).strip() + env_vars = parse_api_key_vars(api_key_field) + + if len(env_vars) == 1: + key_value = os.getenv(env_vars[0], "") + if key_value: + kwargs["api_key"] = key_value.strip() + # Multi-var and empty: litellm reads env vars automatically + + if base_url: + kwargs["base_url"] = base_url + kwargs["api_base"] = base_url + + start = time_module.time() + try: + response = litellm.completion(**kwargs) + duration = time_module.time() - start + + # Extract token usage + usage = getattr(response, "usage", None) + prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0 + completion_tokens = getattr(usage, "completion_tokens", 0) or 0 + + input_price = float(row.get("input", 0.0)) + output_price = float(row.get("output", 0.0)) + cost = _calculate_cost(prompt_tokens, completion_tokens, input_price, output_price) + + return { + "success": True, + "duration_s": duration, + "cost": cost, + "error": None, + "tokens": {"prompt": prompt_tokens, "completion": completion_tokens}, + } + + except Exception as exc: + duration = time_module.time() - start + return { + "success": False, + "duration_s": duration, + "cost": 0.0, + "error": _classify_error(exc), + "tokens": None, + } + + +def _display_model_list( + df: pd.DataFrame, + results: Dict[int, Dict[str, Any]], +) -> None: + """Display the model list as a rich table with any persisted test results.""" + table = Table(title="Available Models", show_lines=False, pad_edge=True) + table.add_column("#", style="bold cyan", justify="right", width=4) + table.add_column("Provider", style="white", min_width=10) + table.add_column("Model", style="bright_white", min_width=30) + table.add_column("Input $/M", justify="right", min_width=8) + table.add_column("Output $/M", justify="right", min_width=8) + table.add_column("ELO", justify="right", min_width=6) + table.add_column("Last Test", min_width=25) + + for idx, row in df.iterrows(): + i = int(idx) + provider = str(row.get("provider", "")) + model = str(row.get("model", "")) + input_cost = row.get("input", 0.0) + output_cost = row.get("output", 0.0) + elo = row.get("coding_arena_elo", "") + + # Format costs + input_str = f"${float(input_cost):.2f}" if pd.notna(input_cost) else "—" + output_str = f"${float(output_cost):.2f}" if pd.notna(output_cost) else "—" + elo_str = str(int(elo)) if pd.notna(elo) and elo else "—" + + # Test result + if i in results: + r = results[i] + if r["success"]: + test_str = f"[green]✓ OK ({r['duration_s']:.1f}s, ${r['cost']:.4f})[/green]" + else: + # Truncate error for table display + err = r["error"] or "Unknown error" + if len(err) > 40: + err = err[:37] + "..." + test_str = f"[red]✗ {err}[/red]" + else: + test_str = "—" + + table.add_row( + str(i + 1), + provider, + model, + input_str, + output_str, + elo_str, + test_str, + ) + + console.print(table) + + +def test_model_interactive() -> None: + """Interactive model tester. + + Shows models from ~/.pdd/llm_model.csv, lets the user pick one to test, + runs a minimal litellm.completion() call, and displays diagnostics. + Loops until the user enters empty input or 'q'. + """ + df = _load_user_csv() + if df is None: + console.print( + "[yellow]No user model CSV found at ~/.pdd/llm_model.csv or it is empty.[/yellow]" + ) + console.print( + "[dim]Run [bold]pdd setup[/bold] to configure your models first.[/dim]" + ) + return + + # Session-persisted test results: index -> result dict + results: Dict[int, Dict[str, Any]] = {} + + while True: + console.print() + _display_model_list(df, results) + console.print() + + try: + choice = console.input( + "[bold cyan]Enter model number to test (or empty to quit): [/bold cyan]" + ).strip() + except (EOFError, KeyboardInterrupt): + console.print("\n[dim]Exiting model tester.[/dim]") + return + + if not choice or choice.lower() == "q": + console.print("[dim]Exiting model tester.[/dim]") + return + + # Parse selection + try: + idx = int(choice) - 1 + except ValueError: + console.print(f"[red]Invalid input: '{choice}'. Enter a number or 'q'.[/red]") + continue + + if idx < 0 or idx >= len(df): + console.print(f"[red]Invalid selection. Choose 1–{len(df)}.[/red]") + continue + + row = df.iloc[idx].to_dict() + model_name = str(row.get("model", "")) + provider = str(row.get("provider", "")) + + console.print() + console.print(f"[bold]Testing: [bright_white]{model_name}[/bright_white] ({provider})[/bold]") + console.print("─" * 50) + + # Diagnostics: provider authentication + auth_checks = _resolve_provider_auth(row) + for label, status_str, is_ok in auth_checks: + color = "green" if is_ok else "red" + console.print(f" {label + ':':<13s}[{color}]{status_str}[/{color}]") + + # Diagnostics: base URL + base_url = _resolve_base_url(row) + if base_url: + console.print(f" Base URL: [dim]{base_url}[/dim]") + + console.print() + sys.stdout.write(" Sending test prompt...") + sys.stdout.flush() + + # Run the test in a thread, printing dots while waiting + test_result_holder: List[Optional[Dict[str, Any]]] = [None] + + def _do_test() -> None: + test_result_holder[0] = _run_test(row) + + t = threading.Thread(target=_do_test, daemon=True) + t.start() + + elapsed = 0.0 + while t.is_alive() and elapsed < 8.0: + t.join(timeout=1.0) + if t.is_alive(): + sys.stdout.write(".") + sys.stdout.flush() + elapsed += 1.0 + + if t.is_alive(): + # Timeout — thread is still running; don't wait further + sys.stdout.write("\n") + result = { + "success": False, + "duration_s": elapsed, + "cost": 0.0, + "error": "Request timed out (8s)", + "tokens": None, + } + else: + sys.stdout.write("\n") + result = test_result_holder[0] or { + "success": False, + "duration_s": 0.0, + "cost": 0.0, + "error": "Unknown error", + "tokens": None, + } + + results[idx] = result + + if result["success"]: + tokens = result.get("tokens") or {} + token_info = "" + if tokens: + token_info = f", {tokens.get('prompt', 0)}+{tokens.get('completion', 0)} tokens" + console.print( + f" LLM call [green]✓ OK[/green] " + f"({result['duration_s']:.1f}s, ${result['cost']:.4f}{token_info})" + ) + else: + console.print(f" LLM call [red]✗ {result['error']}[/red]") + + console.print() \ No newline at end of file diff --git a/pdd/pddrc_initializer.py b/pdd/pddrc_initializer.py new file mode 100644 index 000000000..9021ba205 --- /dev/null +++ b/pdd/pddrc_initializer.py @@ -0,0 +1,192 @@ +from __future__ import annotations + +import os +from pathlib import Path +from typing import Optional + +from rich.console import Console +from rich.syntax import Syntax + +console = Console() + +# Language detection markers +PYTHON_MARKERS = ("setup.py", "pyproject.toml", "setup.cfg", "Pipfile", "requirements.txt") +TYPESCRIPT_MARKERS = ("package.json",) +GO_MARKERS = ("go.mod",) + +# Path defaults per language +LANGUAGE_DEFAULTS: dict[str, dict[str, str]] = { + "python": { + "generate_output_path": "pdd/", + "test_output_path": "tests/", + "example_output_path": "context/", + }, + "typescript": { + "generate_output_path": "src/", + "test_output_path": "__tests__/", + "example_output_path": "examples/", + }, + "go": { + "generate_output_path": ".", + "test_output_path": ".", + "example_output_path": "examples/", + }, +} + +# Standard defaults +STANDARD_DEFAULTS: dict[str, float | int] = { + "strength": 0.818, + "temperature": 0.0, + "target_coverage": 80.0, + "budget": 10.0, + "max_attempts": 3, +} + +PDDRC_FILENAME = ".pddrc" + + +def _detect_language(cwd: Path) -> Optional[str]: + """Detect project language based on marker files in the current directory. + + Returns the detected language string or ``None`` if the project type + cannot be determined automatically. + """ + # Check Python markers + for marker in PYTHON_MARKERS: + if (cwd / marker).exists(): + return "python" + + # Check TypeScript – look for typescript in package.json dependencies + package_json_path = cwd / "package.json" + if package_json_path.exists(): + try: + import json + + with open(package_json_path, "r", encoding="utf-8") as fh: + pkg = json.load(fh) + all_deps: dict[str, str] = {} + all_deps.update(pkg.get("dependencies", {})) + all_deps.update(pkg.get("devDependencies", {})) + if "typescript" in all_deps: + return "typescript" + except (json.JSONDecodeError, OSError): + pass + + # Check Go markers + for marker in GO_MARKERS: + if (cwd / marker).exists(): + return "go" + + return None + + +def _prompt_language() -> str: + """Interactively ask the user to choose a project language.""" + console.print("\n[warning]Could not auto-detect project language.[/warning]") + console.print(" [bold]1)[/bold] Python") + console.print(" [bold]2)[/bold] TypeScript") + console.print(" [bold]3)[/bold] Go") + + while True: + choice = console.input("\nSelect language [1/2/3]: ").strip() + if choice == "1": + return "python" + elif choice == "2": + return "typescript" + elif choice == "3": + return "go" + else: + console.print("[error]Invalid choice. Please enter 1, 2, or 3.[/error]") + + +def _build_pddrc_content(language: str) -> str: + """Build the YAML content for a ``.pddrc`` file. + + Parameters + ---------- + language: + One of ``"python"``, ``"typescript"``, or ``"go"``. + + Returns + ------- + str + The full YAML string ready to be written to disk. + """ + paths = LANGUAGE_DEFAULTS.get(language, LANGUAGE_DEFAULTS["python"]) + + lines: list[str] = [ + 'version: "1.0"', + "", + "contexts:", + " default:", + " defaults:", + f' generate_output_path: "{paths["generate_output_path"]}"', + f' test_output_path: "{paths["test_output_path"]}"', + f' example_output_path: "{paths["example_output_path"]}"', + f' default_language: "{language}"', + ] + + for key, value in STANDARD_DEFAULTS.items(): + # Format integers without trailing .0, floats with one decimal + if isinstance(value, int): + lines.append(f" {key}: {value}") + else: + lines.append(f" {key}: {value}") + + lines.append("") # trailing newline + return "\n".join(lines) + + +def offer_pddrc_init() -> bool: + """Offer to create a ``.pddrc`` configuration file in the current directory. + + If a ``.pddrc`` already exists the user is informed and the function + returns ``False``. Otherwise a preview of sensible defaults is shown + and the user is prompted to confirm creation. + + Returns + ------- + bool + ``True`` if the file was created, ``False`` otherwise. + """ + cwd = Path.cwd() + pddrc_path = cwd / PDDRC_FILENAME + + # ── Already exists ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + if pddrc_path.exists(): + console.print( + f"[info]A {PDDRC_FILENAME} file already exists in {cwd}.[/info]" + ) + return False + + # ── Detect / prompt language ━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + language = _detect_language(cwd) + if language is None: + language = _prompt_language() + else: + console.print(f"\n[success]Detected project language: {language}[/success]") + + # ── Build & preview ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + content = _build_pddrc_content(language) + + console.print(f"\n[info]Proposed {PDDRC_FILENAME} contents:[/info]\n") + syntax = Syntax(content, "yaml", theme="monokai", line_numbers=False) + console.print(syntax) + + # ── Prompt for confirmation (Enter = yes) ━━━━━━━━━━━━━━━━━━━━━ + answer = console.input(f"\nCreate {PDDRC_FILENAME}? [Y/n] ").strip().lower() + if answer in ("", "y", "yes"): + try: + pddrc_path.write_text(content, encoding="utf-8") + console.print( + f"[success]Created {PDDRC_FILENAME} in {cwd}[/success]" + ) + return True + except OSError as exc: + console.print( + f"[error]Failed to write {PDDRC_FILENAME}: {exc}[/error]" + ) + return False + else: + console.print("[info]Skipped .pddrc creation.[/info]") + return False \ No newline at end of file diff --git a/pdd/provider_manager.py b/pdd/provider_manager.py new file mode 100644 index 000000000..ffa0f54c0 --- /dev/null +++ b/pdd/provider_manager.py @@ -0,0 +1,951 @@ +from __future__ import annotations + +import csv +import io +import os +import re +import shlex +import shutil +import tempfile +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional + +from rich.console import Console +from rich.table import Table +from rich.prompt import Prompt, Confirm + + +console = Console() + +# CSV column schema +CSV_FIELDNAMES = [ + "provider", "model", "input", "output", "coding_arena_elo", + "base_url", "api_key", "max_reasoning_tokens", "structured_output", + "reasoning_type", "location", +] + +# --------------------------------------------------------------------------- +# Pipe-delimited api_key helpers +# --------------------------------------------------------------------------- +# The CSV api_key column can contain multiple env var names separated by "|". +# Single var → pass as api_key= to litellm. Multi-var → litellm reads from +# os.environ automatically (Bedrock, Azure, Vertex AI). Empty → device flow +# or local model (GitHub Copilot, Ollama). + + +def parse_api_key_vars(api_key_field: str) -> List[str]: + """Split the pipe-delimited api_key CSV field into individual env var names. + + Returns an empty list if the field is empty/blank. + """ + if not api_key_field or not api_key_field.strip(): + return [] + return [v.strip() for v in api_key_field.split("|") if v.strip()] + + +def is_multi_credential(api_key_field: str) -> bool: + """Return True if the api_key field contains multiple env vars (pipe-delimited).""" + return "|" in (api_key_field or "") + + +# --------------------------------------------------------------------------- +# Complex provider authentication registry +# --------------------------------------------------------------------------- +# Providers that require multi-variable auth (not just a single API key). +# Maps provider display name (as in CSV) -> list of env var configs. +# Used by _setup_complex_provider() for interactive credential prompting. + +COMPLEX_AUTH_PROVIDERS: Dict[str, List[Dict[str, Any]]] = { + "Google Vertex AI": [ + { + "env_var": "GOOGLE_APPLICATION_CREDENTIALS", + "label": "Credentials", + "required": True, + "default": None, + "hint": "Path to GCP service account JSON (or 'adc' for Application Default Credentials)", + }, + { + "env_var": "VERTEXAI_PROJECT", + "label": "GCP Project", + "required": True, + "default": None, + "hint": "Google Cloud project ID", + }, + { + "env_var": "VERTEXAI_LOCATION", + "label": "Location", + "required": True, + "default": "us-central1", + "hint": "GCP region (e.g. us-central1)", + }, + ], + "AWS Bedrock": [ + { + "env_var": "AWS_ACCESS_KEY_ID", + "label": "Access Key ID", + "required": True, + "default": None, + "hint": "AWS IAM access key ID", + }, + { + "env_var": "AWS_SECRET_ACCESS_KEY", + "label": "Secret Key", + "required": True, + "default": None, + "hint": "AWS IAM secret access key", + }, + { + "env_var": "AWS_REGION_NAME", + "label": "Region", + "required": True, + "default": "us-east-1", + "hint": "AWS region (e.g. us-east-1)", + }, + ], + "Azure OpenAI": [ + { + "env_var": "AZURE_API_KEY", + "label": "API Key", + "required": True, + "default": None, + "hint": "Azure OpenAI resource key", + }, + { + "env_var": "AZURE_API_BASE", + "label": "Endpoint", + "required": True, + "default": None, + "hint": "Azure OpenAI endpoint URL (e.g. https://myresource.openai.azure.com/)", + }, + { + "env_var": "AZURE_API_VERSION", + "label": "API Version", + "required": True, + "default": "2024-10-21", + "hint": "Azure API version string", + }, + ], + "Azure AI": [ + { + "env_var": "AZURE_AI_API_KEY", + "label": "API Key", + "required": True, + "default": None, + "hint": "Azure AI Foundry API key", + }, + { + "env_var": "AZURE_AI_API_BASE", + "label": "Endpoint", + "required": False, + "default": None, + "hint": "Optional: Azure AI endpoint URL", + }, + ], + "Github Copilot": [ + { + "env_var": "GITHUB_COPILOT_API_KEY", + "label": "API Key", + "required": False, + "default": None, + "hint": "Optional: GitHub Copilot uses device flow auth at runtime", + }, + ], +} + +# --------------------------------------------------------------------------- +# Path helpers +# --------------------------------------------------------------------------- + +def _get_shell_name() -> str: + """Detect shell from SHELL env var, default to bash.""" + shell_path = os.environ.get("SHELL", "/bin/bash") + shell = Path(shell_path).name + # Normalise common shells + if shell in ("bash", "zsh", "fish", "sh", "ksh", "csh", "tcsh"): + return shell + return "bash" + + +def _get_pdd_dir() -> Path: + """Return ~/.pdd, creating it if necessary.""" + pdd_dir = Path.home() / ".pdd" + pdd_dir.mkdir(parents=True, exist_ok=True) + return pdd_dir + + +def _get_api_env_path() -> Path: + """Return path to ~/.pdd/api-env.{shell}.""" + shell = _get_shell_name() + return _get_pdd_dir() / f"api-env.{shell}" + + +def _get_user_csv_path() -> Path: + """Return path to ~/.pdd/llm_model.csv.""" + return _get_pdd_dir() / "llm_model.csv" + + +def _get_shell_rc_path() -> Optional[Path]: + """Return the shell RC file path (~/.zshrc, ~/.bashrc, etc.).""" + shell = _get_shell_name() + home = Path.home() + shell_files = { + "zsh": home / ".zshrc", + "bash": home / ".bashrc", + "fish": home / ".config" / "fish" / "config.fish", + "csh": home / ".cshrc", + "tcsh": home / ".tcshrc", + "ksh": home / ".kshrc", + "sh": home / ".profile", + } + return shell_files.get(shell) + + +def _get_source_line_for_shell(api_env_path: Path) -> str: + """Return the appropriate source line syntax for the current shell.""" + shell = _get_shell_name() + path_str = str(api_env_path) + + if shell == "fish": + return f'test -f "{path_str}"; and source "{path_str}"' + elif shell in ("csh", "tcsh"): + return f'if ( -f "{path_str}" ) source "{path_str}"' + elif shell == "sh": + # sh uses . instead of source + return f'[ -f "{path_str}" ] && . "{path_str}"' + else: + # bash, zsh, ksh and others + return f'[ -f "{path_str}" ] && source "{path_str}"' + + +def _ensure_api_env_sourced_in_rc() -> bool: + """ + Ensure the api-env file is sourced in the user's shell RC file. + + Adds a shell-appropriate source line to ~/.zshrc (or equivalent) if not + already present. This ensures new terminal sessions automatically have + the API keys available. + + Returns True if the line was added, False if already present or unsupported. + """ + rc_path = _get_shell_rc_path() + if rc_path is None: + return False + + api_env_path = _get_api_env_path() + + # Ensure parent directory exists (important for fish: ~/.config/fish/) + rc_path.parent.mkdir(parents=True, exist_ok=True) + + # Check if api-env path is already referenced in the RC file + if rc_path.exists(): + content = rc_path.read_text(encoding="utf-8") + # Check if the api-env file path is already mentioned (covers any syntax) + if str(api_env_path) in content: + return False + else: + content = "" + + # Build shell-appropriate source line + source_line = _get_source_line_for_shell(api_env_path) + + # Append the source line + with open(rc_path, "a", encoding="utf-8") as f: + f.write(f"\n# PDD API keys\n{source_line}\n") + + return True + + +# --------------------------------------------------------------------------- +# CSV I/O helpers +# --------------------------------------------------------------------------- + +def _read_csv(path: Path) -> List[Dict[str, str]]: + """Read a CSV file and return list of row dicts. Returns [] if missing.""" + if not path.exists(): + return [] + with open(path, "r", encoding="utf-8", newline="") as f: + reader = csv.DictReader(f) + return list(reader) + + +def _write_csv_atomic(path: Path, rows: List[Dict[str, str]]) -> None: + """Atomically write rows to a CSV file (temp file + rename).""" + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp_path = tempfile.mkstemp( + dir=str(path.parent), suffix=".tmp", prefix=".llm_model_" + ) + try: + with os.fdopen(fd, "w", encoding="utf-8", newline="") as f: + writer = csv.DictWriter(f, fieldnames=CSV_FIELDNAMES) + writer.writeheader() + for row in rows: + # Ensure every field is present + clean = {k: row.get(k, "") for k in CSV_FIELDNAMES} + writer.writerow(clean) + shutil.move(tmp_path, str(path)) + except Exception: + # Clean up temp file on failure + if os.path.exists(tmp_path): + os.unlink(tmp_path) + raise + + +# --------------------------------------------------------------------------- +# api-env file helpers +# --------------------------------------------------------------------------- + +def _read_api_env_lines(path: Path) -> List[str]: + """Read api-env file lines. Returns [] if missing.""" + if not path.exists(): + return [] + with open(path, "r", encoding="utf-8") as f: + return f.readlines() + + +def _write_api_env_atomic(path: Path, lines: List[str]) -> None: + """Atomically write lines to api-env file.""" + path.parent.mkdir(parents=True, exist_ok=True) + fd, tmp_path = tempfile.mkstemp( + dir=str(path.parent), suffix=".tmp", prefix=".api-env_" + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + f.writelines(lines) + shutil.move(tmp_path, str(path)) + except Exception: + if os.path.exists(tmp_path): + os.unlink(tmp_path) + raise + + +def _quote_for_shell(value: str, shell: str) -> str: + """Quote a value for the given shell, handling shell-specific edge cases. + + - POSIX shells (bash/zsh/sh/ksh): shlex.quote() is fully correct. + - fish: single quotes treat \\\\ and \\' as escape sequences (unlike POSIX), + so we must escape backslashes and single quotes within single quotes. + - csh/tcsh: single quotes DO prevent $ expansion, but ! (history expansion) + is never suppressed by any quoting. We backslash-escape ! outside quotes. + """ + if shell == "fish": + # fish single quotes recognise \\' and \\\\ as escapes + escaped = value.replace("\\", "\\\\").replace("'", "\\'") + return f"'{escaped}'" + elif shell in ("csh", "tcsh"): + # csh single quotes are mostly POSIX-like, but ! is never suppressed. + # Strategy: use shlex.quote() for the base quoting, then break out + # any ! characters so they can be backslash-escaped outside quotes. + if "!" not in value: + return shlex.quote(value) + # Split on !, quote each segment, rejoin with escaped ! + parts = value.split("!") + quoted_parts = [shlex.quote(p) for p in parts] + return "\\!".join(quoted_parts) + else: + # bash, zsh, ksh, sh — shlex.quote() is fully correct + return shlex.quote(value) + + +def _build_env_export_line(key_name: str, key_value: str) -> str: + """Build a shell-appropriate export line for the given key/value.""" + shell = _get_shell_name() + quoted_value = _quote_for_shell(key_value, shell) + + if shell == "fish": + return f"set -gx {key_name} {quoted_value}\n" + elif shell in ("csh", "tcsh"): + return f"setenv {key_name} {quoted_value}\n" + else: + # bash, zsh, ksh, sh and others + return f"export {key_name}={quoted_value}\n" + + +def _build_env_key_pattern(key_name: str) -> re.Pattern: + """Build a regex pattern to match any shell syntax for the given key.""" + # Match: export KEY=, setenv KEY , set -gx KEY (with optional comment prefix) + escaped_key = re.escape(key_name) + return re.compile( + rf"^(?:#\s*)?(?:export\s+{escaped_key}\s*=|setenv\s+{escaped_key}\s|set\s+-gx\s+{escaped_key}\s)", + re.MULTILINE, + ) + + +def _save_key_to_api_env(key_name: str, key_value: str) -> None: + """ + Add or update an export line in the api-env file. + If the key already exists (even commented out), replace it. + + Uses shell-appropriate syntax (export for bash/zsh, set -gx for fish, + setenv for csh/tcsh). + + Also sets the key in os.environ so it's immediately available + in the current session without requiring the user to source their shell. + """ + # Set in current process environment for immediate availability + os.environ[key_name] = key_value + + env_path = _get_api_env_path() + lines = _read_api_env_lines(env_path) + + export_line = _build_env_export_line(key_name, key_value) + pattern = _build_env_key_pattern(key_name) + + found = False + new_lines: List[str] = [] + for line in lines: + if pattern.match(line.strip()): + new_lines.append(export_line) + found = True + else: + new_lines.append(line) + + if not found: + # Ensure trailing newline before appending + if new_lines and not new_lines[-1].endswith("\n"): + new_lines[-1] += "\n" + new_lines.append(export_line) + + _write_api_env_atomic(env_path, new_lines) + + +def _comment_out_key_in_api_env(key_name: str) -> None: + """ + Comment out (never delete) a key in the api-env file. + Adds a comment with the date. Handles all shell syntaxes. + """ + env_path = _get_api_env_path() + lines = _read_api_env_lines(env_path) + + # Match uncommented lines only (export, setenv, set -gx) + escaped_key = re.escape(key_name) + pattern = re.compile( + rf"^(?:export\s+{escaped_key}\s*=|setenv\s+{escaped_key}\s|set\s+-gx\s+{escaped_key}\s)", + re.MULTILINE, + ) + + today = datetime.now().strftime("%Y-%m-%d") + new_lines: List[str] = [] + for line in lines: + stripped = line.strip() + if pattern.match(stripped): + comment = f"# Commented out by pdd setup on {today}\n" + new_lines.append(comment) + new_lines.append(f"# {stripped}\n") + else: + new_lines.append(line) + + _write_api_env_atomic(env_path, new_lines) + + +# --------------------------------------------------------------------------- +# Key-existence check (used by add_provider_from_registry) +# --------------------------------------------------------------------------- + +def _is_key_set(key_name: str) -> Optional[str]: + """Return the source label if *key_name* is set, else ``None``. + + Checks .env (via python-dotenv), shell environment, and api-env file. + """ + try: + from dotenv import dotenv_values # type: ignore + dotenv_vals = dotenv_values() + if key_name in dotenv_vals and dotenv_vals[key_name] is not None: + return ".env file" + except Exception: + pass + + if os.environ.get(key_name): + return "shell environment" + + env_path = _get_api_env_path() + if env_path.exists(): + from pdd.api_key_scanner import _parse_api_env_file + api_env_vals = _parse_api_env_file(env_path) + if key_name in api_env_vals: + return f"~/.pdd/{env_path.name}" + + return None + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + +def _get_ref_csv_path() -> Path: + """Return path to the bundled reference CSV.""" + return Path(__file__).parent / "data" / "llm_model.csv" + + +def _setup_complex_provider(provider_name: str) -> bool: + """Run interactive auth setup for a complex (multi-variable) provider. + + Prompts for each required env var and saves to api-env. + Returns True if at least one credential was configured, False if all skipped. + """ + var_configs = COMPLEX_AUTH_PROVIDERS.get(provider_name) + if not var_configs: + return False + + required_names = [c["label"] for c in var_configs if c["required"]] + optional_names = [c["label"] for c in var_configs if not c["required"]] + print() + console.print(f" [bold]{provider_name} Setup[/bold]") + if required_names: + console.print(f" Required: {', '.join(required_names)}") + if optional_names: + console.print(f" Optional: {', '.join(optional_names)}") + + # GitHub Copilot: explain device flow before prompting + if provider_name == "Github Copilot": + console.print( + "\n [dim]GitHub Copilot authenticates via device flow at runtime.\n" + " You can paste an API key now, or skip and authenticate later.[/dim]" + ) + print() + + any_saved = False + for cfg in var_configs: + env_var = cfg["env_var"] + label = cfg["label"] + required = cfg["required"] + default = cfg["default"] + hint = cfg["hint"] + + existing_source = _is_key_set(env_var) + if existing_source: + console.print(f" [green]✓[/green] {label} already set ({existing_source})") + if not Confirm.ask(" Update?", default=False): + continue + + opt_tag = " [dim](optional)[/dim]" if not required else "" + if default: + value = Prompt.ask(f" {label}{opt_tag} [dim]{hint}[/dim]", default=default) + else: + value = Prompt.ask(f" {label}{opt_tag} [dim]{hint}[/dim]", default="") + + value = value.strip() + if not value: + if not required: + continue + console.print(f" [yellow]Skipped[/yellow]") + continue + + # Vertex AI: special handling for credentials path + if env_var == "GOOGLE_APPLICATION_CREDENTIALS": + if value.lower() == "adc": + console.print( + " [dim]Using Application Default Credentials.\n" + " Make sure you've run: gcloud auth application-default login[/dim]" + ) + continue + if not Path(value).exists(): + console.print(f" [yellow]Warning: file not found at {value}[/yellow]") + + _save_key_to_api_env(env_var, value) + console.print(f" [green]✓ Saved[/green]") + any_saved = True + + if any_saved: + _ensure_api_env_sourced_in_rc() + console.print("\n [dim]Credentials available for this session.[/dim]") + + return any_saved + + +def add_provider_from_registry() -> bool: + """ + Browse providers from the reference CSV, let the user pick one, + handle the API key, and add its models to the user CSV. + + Returns True if any models were added, False if cancelled. + """ + # ── Step 1: List providers from reference CSV ───────────────────── + + ref_rows = _read_csv(_get_ref_csv_path()) + if not ref_rows: + console.print("[yellow]No models found in reference CSV.[/yellow]") + return False + + # Build unique provider list with model counts and api_key + provider_info: Dict[str, Dict[str, object]] = {} + for row in ref_rows: + provider = row.get("provider", "").strip() + api_key = row.get("api_key", "").strip() + if not provider: + continue + if provider not in provider_info: + provider_info[provider] = {"api_key": api_key, "count": 0} + provider_info[provider]["count"] = int(provider_info[provider]["count"]) + 1 + + sorted_providers = sorted(provider_info.keys()) + + console.print("\n[bold]Add a provider[/bold]\n") + for idx, prov in enumerate(sorted_providers, 1): + info = provider_info[prov] + count = info["count"] + s = "s" if count != 1 else "" + console.print(f" {idx:>2}. {prov:25s} ({count} model{s})") + console.print() + + selection = Prompt.ask("Enter number (empty to cancel)") + if not selection.strip(): + console.print("[dim]Cancelled.[/dim]") + return False + + try: + choice = int(selection.strip()) + if choice < 1 or choice > len(sorted_providers): + console.print("[red]Invalid selection.[/red]") + return False + except ValueError: + console.print("[red]Invalid input.[/red]") + return False + + selected_provider = sorted_providers[choice - 1] + api_key_var = str(provider_info[selected_provider]["api_key"]) or None + + # ── Step 2: Provider authentication ────────────────────────────── + + if selected_provider in COMPLEX_AUTH_PROVIDERS: + _setup_complex_provider(selected_provider) + elif api_key_var: + existing_source = _is_key_set(api_key_var) + if existing_source: + console.print( + f" [green]{api_key_var} is already set ({existing_source}).[/green]" + ) + if Confirm.ask("Update the key?", default=False): + key_value = Prompt.ask(f"Enter new value for {api_key_var}") + if key_value.strip(): + _save_key_to_api_env(api_key_var, key_value.strip()) + console.print( + f"[green]Updated {api_key_var} in {_get_api_env_path()}[/green]" + ) + rc_updated = _ensure_api_env_sourced_in_rc() + if rc_updated: + console.print( + f"[green]Added source line to {_get_shell_rc_path()}[/green]" + ) + console.print( + "[dim]Key is available now for this session.[/dim]" + ) + else: + key_value = Prompt.ask( + f"Enter your {selected_provider} API key (or press Enter to skip)", + default="", + ) + if key_value.strip(): + _save_key_to_api_env(api_key_var, key_value.strip()) + console.print( + f"[green]Saved {api_key_var} to {_get_api_env_path()}[/green]" + ) + rc_updated = _ensure_api_env_sourced_in_rc() + if rc_updated: + console.print( + f"[green]Added source line to {_get_shell_rc_path()}[/green]" + ) + console.print( + "[dim]Key is available now for this session.[/dim]" + ) + else: + console.print( + f"[yellow]Note: No API key configured for {selected_provider}. " + f"The LLM may have limited capability.[/yellow]" + ) + + # ── Step 3: Add all models for this provider to user CSV ────────── + + provider_rows = [ + row for row in ref_rows + if row.get("provider", "").strip() == selected_provider + ] + + user_csv_path = _get_user_csv_path() + existing_rows = _read_csv(user_csv_path) + existing_models = {r.get("model", "").strip() for r in existing_rows} + + added_count = 0 + for row in provider_rows: + model = row.get("model", "").strip() + if model and model not in existing_models: + existing_rows.append(row) + existing_models.add(model) + added_count += 1 + + if added_count > 0: + _write_csv_atomic(user_csv_path, existing_rows) + console.print( + f"[green]Added {added_count} model(s) for {selected_provider} to {user_csv_path}[/green]" + ) + else: + console.print("[yellow]All models for this provider are already configured.[/yellow]") + + return added_count > 0 + + +def add_custom_provider() -> bool: + """ + Prompt for custom provider details and append a row to user CSV. + + Returns True if a provider was added, False if cancelled. + """ + console.print("\n[bold]Add a Custom LiteLLM-Compatible Provider[/bold]\n") + + # Provider prefix (e.g. "openai", "anthropic", "ollama", etc.) + provider = Prompt.ask("Provider prefix (e.g. openai, ollama, together_ai)") + if not provider.strip(): + console.print("[dim]Cancelled.[/dim]") + return False + provider = provider.strip() + + # Model name + model_name = Prompt.ask("Model name (e.g. my-model-v1)") + if not model_name.strip(): + console.print("[dim]Cancelled.[/dim]") + return False + model_name = model_name.strip() + + # Full model string for LiteLLM: provider/model + full_model = f"{provider}/{model_name}" + + # API key env var name + api_key_var = Prompt.ask("API key environment variable name (e.g. OPENAI_API_KEY)") + if not api_key_var.strip(): + console.print("[dim]Cancelled.[/dim]") + return False + api_key_var = api_key_var.strip() + + # Base URL (optional) + base_url = Prompt.ask("Base URL (optional, press Enter to skip)", default="") + base_url = base_url.strip() + + # Costs (optional) + input_cost = Prompt.ask("Input cost per 1M tokens (optional, press Enter for 0.0)", default="0.0") + output_cost = Prompt.ask("Output cost per 1M tokens (optional, press Enter for 0.0)", default="0.0") + + try: + input_cost_val = str(float(input_cost.strip())) + except ValueError: + input_cost_val = "0.0" + + try: + output_cost_val = str(float(output_cost.strip())) + except ValueError: + output_cost_val = "0.0" + + # Ask if user wants to provide the actual API key value now + provide_key = Confirm.ask( + f"Do you want to enter the value for {api_key_var} now?", default=True + ) + if provide_key: + key_value = Prompt.ask(f"Enter the value for {api_key_var}") + if key_value.strip(): + _save_key_to_api_env(api_key_var, key_value.strip()) + console.print( + f"[green]Saved {api_key_var} to {_get_api_env_path()}[/green]" + ) + rc_updated = _ensure_api_env_sourced_in_rc() + if rc_updated: + console.print( + f"[green]Added source line to {_get_shell_rc_path()}[/green]" + ) + console.print( + "[dim]Key is available now for this session.[/dim]" + ) + + # Build the row with sensible defaults + new_row: Dict[str, str] = { + "provider": provider, + "model": full_model, + "input": input_cost_val, + "output": output_cost_val, + "coding_arena_elo": "1000", + "base_url": base_url, + "api_key": api_key_var, + "max_reasoning_tokens": "0", + "structured_output": "True", + "reasoning_type": "", + "location": "", + } + + # Append to user CSV + user_csv_path = _get_user_csv_path() + existing_rows = _read_csv(user_csv_path) + existing_rows.append(new_row) + _write_csv_atomic(user_csv_path, existing_rows) + + console.print( + f"[green]Added custom model '{full_model}' to {user_csv_path}[/green]" + ) + return True + + +def remove_models_by_provider() -> bool: + """ + Group user CSV models by api_key, show numbered list with counts, + remove all rows for selected provider. Comment out the key in api-env. + + Returns True if models were removed, False if cancelled. + """ + user_csv_path = _get_user_csv_path() + rows = _read_csv(user_csv_path) + + if not rows: + console.print("[yellow]No models configured in user CSV.[/yellow]") + return False + + # Group by api_key + provider_groups: Dict[str, List[Dict[str, str]]] = {} + for row in rows: + key = row.get("api_key", "").strip() + if not key: + key = "(no api_key)" + provider_groups.setdefault(key, []).append(row) + + sorted_providers = sorted(provider_groups.keys()) + + # Display table + table = Table(title="Configured Providers") + table.add_column("#", style="bold") + table.add_column("API Key Variable") + table.add_column("Model Count", justify="right") + table.add_column("Sample Models") + + for idx, prov_key in enumerate(sorted_providers, 1): + prov_rows = provider_groups[prov_key] + sample = ", ".join( + r.get("model", "?") for r in prov_rows[:3] + ) + if len(prov_rows) > 3: + sample += ", ..." + table.add_row(str(idx), prov_key, str(len(prov_rows)), sample) + + console.print(table) + + selection = Prompt.ask( + "\nEnter the number of the provider to remove (or press Enter to cancel)" + ) + if not selection.strip(): + console.print("[dim]Cancelled.[/dim]") + return False + + try: + choice = int(selection.strip()) + if choice < 1 or choice > len(sorted_providers): + console.print("[red]Invalid selection.[/red]") + return False + except ValueError: + console.print("[red]Invalid input.[/red]") + return False + + selected_provider_key = sorted_providers[choice - 1] + remove_count = len(provider_groups[selected_provider_key]) + + # Confirm + if not Confirm.ask( + f"Remove all {remove_count} model(s) for '{selected_provider_key}'?" + ): + console.print("[dim]Cancelled.[/dim]") + return False + + # Filter out the selected provider's rows + remaining_rows = [ + r for r in rows + if (r.get("api_key", "").strip() or "(no api_key)") != selected_provider_key + ] + + _write_csv_atomic(user_csv_path, remaining_rows) + console.print( + f"[green]Removed {remove_count} model(s) for '{selected_provider_key}'.[/green]" + ) + + # Comment out the key in api-env (only if it's a real key name) + if selected_provider_key != "(no api_key)": + _comment_out_key_in_api_env(selected_provider_key) + console.print( + f"[green]Commented out {selected_provider_key} in {_get_api_env_path()}[/green]" + ) + + return True + + +def remove_individual_models() -> bool: + """ + List all models from user CSV, let user select by comma-separated numbers, + remove selected rows. + + Returns True if models were removed, False if cancelled. + """ + user_csv_path = _get_user_csv_path() + rows = _read_csv(user_csv_path) + + if not rows: + console.print("[yellow]No models configured in user CSV.[/yellow]") + return False + + # Display all models + table = Table(title="Configured Models") + table.add_column("#", style="bold") + table.add_column("Provider") + table.add_column("Model") + table.add_column("API Key") + + for idx, row in enumerate(rows, 1): + table.add_row( + str(idx), + row.get("provider", ""), + row.get("model", ""), + row.get("api_key", ""), + ) + + console.print(table) + + selection = Prompt.ask( + "\nEnter model numbers to remove (comma-separated, or press Enter to cancel)" + ) + if not selection.strip(): + console.print("[dim]Cancelled.[/dim]") + return False + + # Parse comma-separated numbers + indices_to_remove: set[int] = set() + for part in selection.split(","): + part = part.strip() + if not part: + continue + try: + num = int(part) + if 1 <= num <= len(rows): + indices_to_remove.add(num) + else: + console.print(f"[yellow]Skipping invalid number: {num}[/yellow]") + except ValueError: + console.print(f"[yellow]Skipping invalid input: '{part}'[/yellow]") + + if not indices_to_remove: + console.print("[dim]No valid selections. Cancelled.[/dim]") + return False + + # Show what will be removed + console.print("\n[bold]Models to remove:[/bold]") + for idx in sorted(indices_to_remove): + row = rows[idx - 1] + console.print(f" {idx}. {row.get('model', '?')} ({row.get('api_key', '')})") + + if not Confirm.ask(f"Remove {len(indices_to_remove)} model(s)?"): + console.print("[dim]Cancelled.[/dim]") + return False + + # Filter out selected rows (convert to 0-based) + remaining_rows = [ + row for idx, row in enumerate(rows, 1) + if idx not in indices_to_remove + ] + + _write_csv_atomic(user_csv_path, remaining_rows) + console.print( + f"[green]Removed {len(indices_to_remove)} model(s) from {user_csv_path}[/green]" + ) + + return True \ No newline at end of file diff --git a/pdd/setup_tool.py b/pdd/setup_tool.py index 55a8677a5..25d7d5161 100644 --- a/pdd/setup_tool.py +++ b/pdd/setup_tool.py @@ -1,91 +1,46 @@ -#!/usr/bin/env python3 """ -PDD Setup Script - Post-install configuration tool for PDD (Prompt Driven Development) -Helps new users bootstrap their PDD configuration with LLM API keys and basic settings. +Main orchestrator for `pdd setup`. + +Implements a two-phase flow designed for minimal user friction: + Phase 1 — Interactive CLI bootstrap (0–2 user inputs) + Phase 2 — Deterministic auto-configuration (pure Python, no LLM calls) """ +from __future__ import annotations +import getpass import os import sys -import subprocess -import json -import requests -import csv -import importlib.resources -import shlex from pathlib import Path -from typing import Dict, Optional, Tuple, List - -# Global variables for non-ASCII characters and colors -HEAVY_HORIZONTAL = "━" -LIGHT_HORIZONTAL = "─" -HEAVY_VERTICAL = "┃" -LIGHT_VERTICAL = "│" -TOP_LEFT_CORNER = "┏" -TOP_RIGHT_CORNER = "┓" -BOTTOM_LEFT_CORNER = "┗" -BOTTOM_RIGHT_CORNER = "┛" -CROSS = "┼" -TEE_DOWN = "┬" -TEE_UP = "┴" -TEE_RIGHT = "├" -TEE_LEFT = "┤" -BULLET = "•" -ARROW_RIGHT = "→" -CHECK_MARK = "✓" -CROSS_MARK = "✗" - -# Color codes -RESET = "\033[0m" -WHITE = "\033[97m" -CYAN = "\033[96m" -YELLOW = "\033[93m" +from typing import Dict, List, Optional, Tuple + +from rich.console import Console as _RichConsole +_console = _RichConsole(highlight=False) + +# ANSI escape codes for coloring (works without rich) +CYAN = "\033[36m" +WHITE = "\033[37m" BOLD = "\033[1m" +RESET = "\033[0m" +LIGHT_HORIZONTAL = "\u2500" -# Template content inline -SUCCESS_PYTHON_TEMPLATE = """ -Write a python script to print "You did it, !!!" to the console. -Do not write anything except that message. -Capitalize the username.""" +# Top providers shown when prompting for an API key (order = display order) +_PROMPT_PROVIDERS = [ + ("anthropic", "Anthropic", "ANTHROPIC_API_KEY"), + ("gemini", "Google Gemini", "GEMINI_API_KEY"), + ("openai", "OpenAI", "OPENAI_API_KEY"), + ("deepseek", "DeepSeek", "DEEPSEEK_API_KEY"), +] -def _read_packaged_llm_model_csv() -> Tuple[List[str], List[Dict[str, str]]]: - """Load the packaged CSV (pdd/data/llm_model.csv) and return header + rows. - Returns: - (header_fields, rows) where header_fields is the list of column names - and rows is a list of dictionaries for each CSV row. - """ - try: - csv_text = importlib.resources.files('pdd').joinpath('data/llm_model.csv').read_text() - except Exception as e: - raise FileNotFoundError(f"Failed to load default LLM model CSV from package: {e}") - - reader = csv.DictReader(csv_text.splitlines()) - header = reader.fieldnames or [] - rows = [row for row in reader] - return header, rows - -def print_colored(text: str, color: str = WHITE, bold: bool = False) -> None: - """Print colored text to console""" - style = BOLD + color if bold else color - print(f"{style}{text}{RESET}") - -def create_divider(char: str = LIGHT_HORIZONTAL, width: int = 80) -> str: - """Create a horizontal divider line""" - return char * width - -def create_fat_divider(width: int = 80) -> str: - """Create a fat horizontal divider line""" - return HEAVY_HORIZONTAL * width - -def print_pdd_logo(): - """Print the PDD logo in ASCII art""" +def _print_pdd_logo() -> None: + """Print the PDD logo in ASCII art with ANSI colors.""" logo = "\n".join( [ " +xxxxxxxxxxxxxxx+", "xxxxxxxxxxxxxxxxxxxxx+", "xxx +xx+ PROMPT", "xxx x+ xx+ DRIVEN", - "xxx x+ xxx DEVELOPMENT©", + "xxx x+ xxx DEVELOPMENT\u00a9", "xxx x+ xx+", "xxx x+ xx+ COMMAND LINE INTERFACE", "xxx x+ xxx", @@ -100,549 +55,793 @@ def print_pdd_logo(): ) print(f"{CYAN}{logo}{RESET}") print() - print_colored("Let's get set up quickly with a solid basic configuration!", WHITE, bold=True) - print() - print_colored("Supported: OpenAI, Google Gemini, and Anthropic Claude", WHITE) - print_colored("from their respective API endpoints (no third-parties, such as Azure)", WHITE) + print(f"{BOLD}{WHITE}Let's get set up quickly with a solid basic configuration!{RESET}") print() -def get_csv_variable_names() -> Dict[str, str]: - """Inspect packaged CSV to determine API key variable names per provider. - - Focus on direct providers only: OpenAI GPT models (model startswith 'gpt-'), - Google Gemini (model startswith 'gemini/'), and Anthropic (model startswith 'anthropic/'). - """ - header, rows = _read_packaged_llm_model_csv() - variable_names: Dict[str, str] = {} - for row in rows: - model = (row.get('model') or '').strip() - api_key = (row.get('api_key') or '').strip() - provider = (row.get('provider') or '').strip().upper() +def run_setup() -> None: + """Main entry point for pdd setup. Two-phase flow with post-setup menu.""" + from pdd.cli_detector import detect_and_bootstrap_cli, CliBootstrapResult - if not api_key: - continue + # ── Banner ──────────────────────────────────────────────────────────── + _print_pdd_logo() - if model.startswith('gpt-') and provider == 'OPENAI': - variable_names['OPENAI'] = api_key - elif model.startswith('gemini/') and provider == 'GOOGLE': - # Prefer direct Gemini key, not Vertex - variable_names['GOOGLE'] = api_key - elif model.startswith('anthropic/') and provider == 'ANTHROPIC': - variable_names['ANTHROPIC'] = api_key - - # Fallbacks if not detected (keep prior behavior) - variable_names.setdefault('OPENAI', 'OPENAI_API_KEY') - # Prefer GEMINI_API_KEY name for Google if present - variable_names.setdefault('GOOGLE', 'GEMINI_API_KEY') - variable_names.setdefault('ANTHROPIC', 'ANTHROPIC_API_KEY') - return variable_names - -def discover_api_keys() -> Dict[str, Optional[str]]: - """Discover API keys from environment variables""" - # Get the variable names actually used in CSV template - csv_vars = get_csv_variable_names() - - keys = { - 'OPENAI_API_KEY': os.getenv('OPENAI_API_KEY'), - 'ANTHROPIC_API_KEY': os.getenv('ANTHROPIC_API_KEY'), - } - - # For Google, check both possible environment variables but use CSV template's variable name - google_var_name = csv_vars.get('GOOGLE', 'GEMINI_API_KEY') # Default to GEMINI_API_KEY - google_api_key = os.getenv('GEMINI_API_KEY') or os.getenv('GOOGLE_API_KEY') - keys[google_var_name] = google_api_key - - return keys - -def test_openai_key(api_key: str) -> bool: - """Test OpenAI API key validity""" - if not api_key or not api_key.strip(): - return False - try: - headers = { - 'Authorization': f'Bearer {api_key.strip()}', - 'Content-Type': 'application/json' - } - response = requests.get( - 'https://api.openai.com/v1/models', - headers=headers, - timeout=10 - ) - return response.status_code == 200 - except Exception: - return False - -def test_google_key(api_key: str) -> bool: - """Test Google Gemini API key validity""" - if not api_key or not api_key.strip(): - return False - + # ── Phase 1 — CLI Bootstrap (interactive, 0–2 user inputs) ──────── + results: list[CliBootstrapResult] = detect_and_bootstrap_cli() + + for result in results: + if result.skipped: + pass + elif not result.api_key_configured: + _console.print( + f"[yellow]Note: No API key configured for {result.cli_name or 'the CLI'}. " + "The agent may have limited capability.[/yellow]" + ) + + # ── Phase 2 — Deterministic Auto-Configuration ──────────────────── + auto_result = _run_auto_phase(results) + + if auto_result: + found_keys, _model_summary = auto_result + # Offer post-setup menu before final summary + try: + choice = input( + "\n Press Enter to finish, or 'm' for more options: " + ).strip() + except (EOFError, KeyboardInterrupt): + choice = "" + + if choice: + _run_options_menu() + else: + found_keys: list[tuple[str, str]] = [] + _console.print("\n [yellow]Setup incomplete. Use the menu to configure manually.[/yellow]") + _run_options_menu() + + # ── Final summary (after menu, so it reflects any changes) ──────── + _print_exit_summary(found_keys, results) + + except KeyboardInterrupt: + print("\nSetup interrupted — exiting.") + return + + +# --------------------------------------------------------------------------- +# Phase 2 — Deterministic auto-configuration +# --------------------------------------------------------------------------- + +def _print_step_banner(title: str) -> None: + """Print a cyan banner for a setup step.""" + print(f"\n{CYAN}{LIGHT_HORIZONTAL * 40}{RESET}") + print(f"{CYAN}{BOLD}{title}{RESET}") + print(f"{CYAN}{LIGHT_HORIZONTAL * 40}{RESET}") + + +def _run_auto_phase(cli_results=None) -> Optional[Tuple[List[Tuple[str, str]], Dict[str, int]]]: + """Run 3 deterministic setup steps. + + Returns (found_keys, model_summary) on success, or None on failure. + """ try: - response = requests.get( - f'https://generativelanguage.googleapis.com/v1beta/models?key={api_key.strip()}', - timeout=10 - ) - return response.status_code == 200 - except Exception: - return False - -def test_anthropic_key(api_key: str) -> bool: - """Test Anthropic API key validity""" - if not api_key or not api_key.strip(): - return False - + # Step 1: Scan API keys + _print_step_banner("Scanning for API keys...") + found_keys = _step1_scan_keys() + print() + _console.print("[blue]Press Enter to continue to the next step...[/blue]", end="") + input() + + # Step 2: Configure models + .pddrc + _print_step_banner("Configuring models...") + model_summary = _step2_configure_models_and_pddrc(found_keys) + print() + _console.print("[blue]Press Enter to continue to the next step...[/blue]", end="") + input() + + # Step 3: Test + summary + _print_step_banner("Testing and summarizing...") + _step3_test_and_summary(found_keys, model_summary, cli_results) + + return (found_keys, model_summary) + + except Exception as exc: + _console.print(f"\n[yellow]Auto-configuration failed: {exc}[/yellow]") + return None + + +# --------------------------------------------------------------------------- +# Step 1 — Scan for API keys +# --------------------------------------------------------------------------- + +def _step1_scan_keys() -> List[Tuple[str, str]]: + """Scan API key env vars referenced in the reference CSV across all sources. + + Returns list of (key_name, source_label) for keys that were found. + Multi-credential providers (pipe-delimited api_key) are displayed as + grouped provider lines; single-var providers as individual lines. + """ + from pdd.provider_manager import _read_csv, parse_api_key_vars + from pdd.api_key_scanner import _parse_api_env_file, _detect_shell + + # Ensure ~/.pdd exists + pdd_dir = Path.home() / ".pdd" + pdd_dir.mkdir(parents=True, exist_ok=True) + + # Gather unique api_key field values from the reference CSV + ref_path = Path(__file__).parent / "data" / "llm_model.csv" + ref_rows = _read_csv(ref_path) + + # Build two sets: single-var keys and multi-var provider groups + single_var_keys: set = set() # e.g. {"ANTHROPIC_API_KEY", "OPENAI_API_KEY"} + multi_var_providers: Dict[str, List[str]] = {} # provider_name -> [var1, var2, ...] + all_individual_vars: set = set() # every individual var across all providers + + for row in ref_rows: + api_key_field = row.get("api_key", "").strip() + if not api_key_field: + continue + env_vars = parse_api_key_vars(api_key_field) + if len(env_vars) == 1: + single_var_keys.add(env_vars[0]) + all_individual_vars.add(env_vars[0]) + elif len(env_vars) > 1: + provider = row.get("provider", "").strip() or api_key_field + if provider not in multi_var_providers: + multi_var_providers[provider] = env_vars + for v in env_vars: + all_individual_vars.add(v) + + # Load all credential sources once + dotenv_vals: Dict[str, str] = {} try: - headers = { - 'x-api-key': api_key.strip(), - 'Content-Type': 'application/json' - } - response = requests.get( - 'https://api.anthropic.com/v1/messages', - headers=headers, - timeout=10 - ) - # Anthropic returns 400 for invalid request structure but 401/403 for bad keys - return response.status_code != 401 and response.status_code != 403 - except Exception: - return False - -def test_api_keys(keys: Dict[str, Optional[str]]) -> Dict[str, bool]: - """Test all discovered API keys""" - results = {} - - print_colored(f"\n{LIGHT_HORIZONTAL * 40}", CYAN) - print_colored("Testing discovered API keys...", CYAN, bold=True) - print_colored(f"{LIGHT_HORIZONTAL * 40}", CYAN) - - for key_name, key_value in keys.items(): - if key_value: - print(f"Testing {key_name}...", end=" ", flush=True) - if key_name == 'OPENAI_API_KEY': - valid = test_openai_key(key_value) - elif key_name in ['GEMINI_API_KEY', 'GOOGLE_API_KEY']: - valid = test_google_key(key_value) - elif key_name == 'ANTHROPIC_API_KEY': - valid = test_anthropic_key(key_value) + from dotenv import dotenv_values + for env_path in [Path.cwd() / ".env", Path.home() / ".env"]: + if env_path.is_file(): + vals = dotenv_values(env_path) + for k, v in vals.items(): + if v is not None and k not in dotenv_vals: + dotenv_vals[k] = v + except ImportError: + pass + + shell_name = _detect_shell() + api_env_vals: Dict[str, str] = {} + api_env_label = "" + if shell_name: + api_env_path = pdd_dir / f"api-env.{shell_name}" + api_env_vals = _parse_api_env_file(api_env_path) + api_env_label = f"~/.pdd/api-env.{shell_name}" + + def _find_source(var: str) -> Optional[str]: + if var in os.environ: + return "shell environment" + if var in api_env_vals: + return api_env_label + if var in dotenv_vals: + return ".env file" + return None + + found_keys: List[Tuple[str, str]] = [] + + # --- Multi-var providers: grouped display --- + for provider_name, env_vars in sorted(multi_var_providers.items()): + found_vars = [] + missing_vars = [] + for var in env_vars: + source = _find_source(var) + if source: + found_vars.append(var) + found_keys.append((var, source)) else: - valid = False - - if valid: - print_colored(f"{CHECK_MARK} Valid", CYAN) - results[key_name] = True - else: - print_colored(f"{CROSS_MARK} Invalid", YELLOW) - results[key_name] = False - else: - print_colored(f"{key_name}: Not found", YELLOW) - results[key_name] = False - - return results - -def get_user_keys(current_keys: Dict[str, Optional[str]]) -> Dict[str, Optional[str]]: - """Interactive key entry/modification""" - print_colored(f"\n{create_fat_divider()}", YELLOW) - print_colored("API Key Configuration", YELLOW, bold=True) - print_colored(f"{create_fat_divider()}", YELLOW) - - print_colored("You need only one API key to get started", WHITE) - print() - print_colored("Get API keys here:", WHITE) - print_colored(f" OpenAI {ARROW_RIGHT} https://platform.openai.com/api-keys", CYAN) - print_colored(f" Google Gemini {ARROW_RIGHT} https://aistudio.google.com/app/apikey", CYAN) - print_colored(f" Anthropic {ARROW_RIGHT} https://console.anthropic.com/settings/keys", CYAN) - print() - print_colored("A free instant starter key is available from Google Gemini (above)", CYAN) - print() - - new_keys = current_keys.copy() - - # Get the actual key names from discovered keys - key_names = list(current_keys.keys()) - for key_name in key_names: - current_value = current_keys.get(key_name, "") - status = "found" if current_value else "not found" - - print_colored(f"{LIGHT_HORIZONTAL * 60}", CYAN) - print_colored(f"{key_name} (currently: {status})", WHITE, bold=True) - - if current_value: - prompt = f"Enter new key or press ENTER to keep existing: " + missing_vars.append(var) + + if not found_vars and not missing_vars: + continue + + total = len(env_vars) + found_count = len(found_vars) + if found_count == total: + _console.print(f" [green]✓[/green] {provider_name}: {found_count}/{total} vars set") + elif found_count > 0: + missing_str = ", ".join(missing_vars) + _console.print( + f" [yellow]![/yellow] {provider_name}: {found_count}/{total} vars set" + f" (missing: {missing_str})" + ) + # If found_count == 0, skip — nothing to show for this provider + + # --- Single-var providers: individual display --- + sorted_single = sorted(single_var_keys) + max_name_len = max((len(k) for k in sorted_single), default=20) if sorted_single else 20 + for key_name in sorted_single: + source = _find_source(key_name) + if source: + found_keys.append((key_name, source)) + _console.print(f" [green]✓[/green] {key_name:<{max_name_len}s} {source}") + + if not found_keys: + _console.print(" [yellow]✗ No API keys found.[/yellow]\n") + found_keys = _prompt_for_api_key() + + print(f"\n {len(found_keys)} API key(s) found.") + + api_env_path = pdd_dir / f"api-env.{shell_name}" if shell_name else pdd_dir / "api-env.bash" + _console.print(f" [dim]You can edit your global API keys in {api_env_path}[/dim]") + + return found_keys + + +def _prompt_for_api_key() -> List[Tuple[str, str]]: + """Interactively ask the user to add at least one API key. + + Called when no keys are found during scanning. Saves the key to + ~/.pdd/api-env.{shell} and loads it into the current session. + Returns list of (key_name, source_label) for newly added keys. + """ + from pdd.provider_manager import _read_csv, _save_key_to_api_env + + added_keys: List[Tuple[str, str]] = [] + api_env_label = f"~/.pdd/api-env.{os.path.basename(os.environ.get('SHELL', 'bash'))}" + + # Build provider list from reference CSV + ref_path = Path(__file__).parent / "data" / "llm_model.csv" + ref_rows = _read_csv(ref_path) + # Collect unique (provider_display, api_key_env_var) pairs + seen = set() + all_providers: List[Tuple[str, str]] = [] + for row in ref_rows: + provider = row.get("provider", "").strip() + api_key = row.get("api_key", "").strip() + if provider and api_key and (provider, api_key) not in seen: + seen.add((provider, api_key)) + all_providers.append((provider, api_key)) + all_providers.sort(key=lambda x: x[0]) + + while True: + print(" To continue setup, add at least one API key.") + print(" Providers:") + for i, (display, env_var) in enumerate(all_providers, 1): + print(f" {i}) {display:<25s} ({env_var})") + skip_idx = len(all_providers) + 1 + print(f" {skip_idx}) Skip (continue without keys)") + + try: + choice = input(f"\n Select provider [1-{skip_idx}]: ").strip() + except (EOFError, KeyboardInterrupt): + print() + break + + # Parse choice + try: + choice_num = int(choice) + except ValueError: + _console.print(f" [yellow]Invalid input. Enter a number 1-{skip_idx}.[/yellow]\n") + continue + + if choice_num == skip_idx: + break + + if 1 <= choice_num <= len(all_providers): + display, env_var = all_providers[choice_num - 1] else: - prompt = f"Enter API key (or press ENTER to skip): " - + _console.print(f" [yellow]Invalid input. Enter a number 1-{skip_idx}.[/yellow]\n") + continue + + # Prompt for the key value (masked) try: - user_input = input(f"{WHITE}{prompt}{RESET}").strip() - if user_input: - new_keys[key_name] = user_input - elif not current_value: - new_keys[key_name] = None - except KeyboardInterrupt: - print_colored("\n\nSetup cancelled.", YELLOW) - sys.exit(0) - - return new_keys - -def detect_shell() -> str: - """Detect user's default shell""" - try: - shell_path = os.getenv('SHELL', '/bin/bash') - shell_name = os.path.basename(shell_path) - return shell_name - except: - return 'bash' - -def get_shell_init_file(shell: str) -> str: - """Get the appropriate shell initialization file""" - home = Path.home() - - shell_files = { - 'bash': home / '.bashrc', - 'zsh': home / '.zshrc', - 'fish': home / '.config/fish/config.fish', - 'csh': home / '.cshrc', - 'tcsh': home / '.tcshrc', - 'ksh': home / '.kshrc', - 'sh': home / '.profile' - } - - return str(shell_files.get(shell, home / '.bashrc')) - -def create_api_env_script(keys: Dict[str, str], shell: str) -> str: - """Create shell-appropriate environment script with proper escaping""" - valid_keys = {k: v for k, v in keys.items() if v} - lines = [] - - for key, value in valid_keys.items(): - # shlex.quote is designed for POSIX shells (sh, bash, zsh, ksh) - # It also works reasonably well for fish and csh for simple assignments - quoted_val = shlex.quote(value) - - if shell == 'fish': - lines.append(f'set -gx {key} {quoted_val}') - elif shell in ['csh', 'tcsh']: - lines.append(f'setenv {key} {quoted_val}') - else: # bash, zsh, ksh, sh and others - lines.append(f'export {key}={quoted_val}') - - return '\n'.join(lines) + '\n' - -def save_configuration(valid_keys: Dict[str, str]) -> Tuple[List[str], bool, Optional[str]]: - """Save configuration to ~/.pdd/ directory""" - home = Path.home() - pdd_dir = home / '.pdd' - created_pdd_dir = False - saved_files = [] - - # Create .pdd directory if it doesn't exist - if not pdd_dir.exists(): - pdd_dir.mkdir(mode=0o755) - created_pdd_dir = True - - # Detect shell and create api-env script - shell = detect_shell() - api_env_content = create_api_env_script(valid_keys, shell) - - # Write shell-specific api-env file - api_env_file = pdd_dir / f'api-env.{shell}' - api_env_file.write_text(api_env_content) - api_env_file.chmod(0o755) - saved_files.append(str(api_env_file)) - - # Create llm_model.csv with models from packaged CSV filtered by provider and available keys - header_fields, rows = _read_packaged_llm_model_csv() - - # Keep only direct Google Gemini (model startswith 'gemini/'), OpenAI GPT (gpt-*) and Anthropic (anthropic/*) - def _is_supported_model(row: Dict[str, str]) -> bool: - model = (row.get('model') or '').strip() - if model.startswith('gpt-'): - return True - if model.startswith('gemini/'): - return True - if model.startswith('anthropic/'): - return True - return False - - # Filter rows by supported models and by api_key presence in valid_keys - filtered_rows: List[Dict[str, str]] = [] - for row in rows: - if not _is_supported_model(row): + key_value = getpass.getpass(f" Paste your {env_var}: ").strip() + except (EOFError, KeyboardInterrupt): + print() + break + + if not key_value: + _console.print(" [yellow]No key entered, skipping.[/yellow]\n") + continue + + # Save to api-env file and load into current session + _save_key_to_api_env(env_var, key_value) + added_keys.append((env_var, api_env_label)) + _console.print(f" [green]✓[/green] {env_var} saved to {api_env_label}") + _console.print(f" [green]✓[/green] Loaded into current session\n") + + # Ask if they want to add another + try: + another = input(" Add another key? [y/N]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + print() + break + + if another not in ("y", "yes"): + break + print() + + return added_keys + + +# --------------------------------------------------------------------------- +# Step 2 — Configure models + .pddrc +# --------------------------------------------------------------------------- + +def _step2_configure_models_and_pddrc( + found_keys: List[Tuple[str, str]], +) -> Dict[str, int]: + """Match found API keys to reference models, write user CSV, and ensure .pddrc. + + Returns {provider_display_name: model_count} for the summary. + """ + from pdd.provider_manager import ( + _read_csv, + _write_csv_atomic, + _get_user_csv_path, + ) + from pdd.pddrc_initializer import _detect_language, _build_pddrc_content + + found_key_names = {k for k, _ in found_keys} + + # Read reference CSV + ref_path = Path(__file__).parent / "data" / "llm_model.csv" + ref_rows = _read_csv(ref_path) + + # Filter reference rows to those whose api_key env vars are all found. + # Supports pipe-delimited multi-var fields (e.g. "VAR1|VAR2|VAR3"). + # Empty api_key (device flow / local) matches automatically. + # Skip local-only rows (lm_studio, ollama, localhost base_url). + from pdd.provider_manager import parse_api_key_vars + + matching_rows: List[Dict[str, str]] = [] + for row in ref_rows: + api_key_col = row.get("api_key", "").strip() + provider = row.get("provider", "").strip().lower() + base_url = row.get("base_url", "").strip() + + # Skip local models + if provider in ("lm_studio", "ollama"): continue - api_key_name = (row.get('api_key') or '').strip() - # Include only if we have a validated key for this row - if api_key_name and api_key_name in valid_keys: - filtered_rows.append(row) - - # Write out the filtered CSV to ~/.pdd/llm_model.csv preserving column order - llm_model_file = pdd_dir / 'llm_model.csv' - with llm_model_file.open('w', newline='') as f: - writer = csv.DictWriter(f, fieldnames=header_fields) - writer.writeheader() - for row in filtered_rows: - writer.writerow({k: row.get(k, '') for k in header_fields}) - saved_files.append(str(llm_model_file)) - - # Update shell init file - init_file_path = get_shell_init_file(shell) - init_file = Path(init_file_path) - init_file_updated = None - - source_line = f'[ -f "{api_env_file}" ] && source "{api_env_file}"' - if shell == 'fish': - source_line = f'test -f "{api_env_file}"; and source "{api_env_file}"' - elif shell in ['csh', 'tcsh']: - source_line = f'if ( -f "{api_env_file}" ) source "{api_env_file}"' - elif shell == 'sh': - source_line = f'[ -f "{api_env_file}" ] && . "{api_env_file}"' - - # Ensure parent directory exists (important for fish shell) - init_file.parent.mkdir(parents=True, exist_ok=True) - - # Check if source line already exists - if init_file.exists(): - content = init_file.read_text() - if str(api_env_file) not in content: - with init_file.open('a') as f: - f.write(f'\n# PDD API environment\n{source_line}\n') - init_file_updated = str(init_file) + if base_url and ("localhost" in base_url or "127.0.0.1" in base_url): + continue + + # Match: all individual env vars must be in found_key_names + env_vars = parse_api_key_vars(api_key_col) + if not env_vars: + # Empty api_key = device flow (e.g. GitHub Copilot) — always match + matching_rows.append(row) + elif all(v in found_key_names for v in env_vars): + matching_rows.append(row) + + # Read existing user CSV and deduplicate (create if missing) + user_csv_path = _get_user_csv_path() + user_csv_path.parent.mkdir(parents=True, exist_ok=True) + existing_rows = _read_csv(user_csv_path) + existing_models = {r.get("model", "").strip() for r in existing_rows} + + new_rows: List[Dict[str, str]] = [] + for row in matching_rows: + if row.get("model", "").strip() not in existing_models: + new_rows.append(row) + + # Count by provider for display + provider_counts: Dict[str, int] = {} + all_rows = existing_rows + new_rows + for row in all_rows: + provider = row.get("provider", "Unknown").strip() + if row.get("api_key", "").strip(): + provider_counts[provider] = provider_counts.get(provider, 0) + 1 + + # Write merged result + if new_rows: + _write_csv_atomic(user_csv_path, all_rows) + _console.print(f" [green]✓[/green] {len(new_rows)} new model(s) added to {user_csv_path}") else: - init_file.write_text(f'# PDD API environment\n{source_line}\n') - init_file_updated = str(init_file) - - return saved_files, created_pdd_dir, init_file_updated - -def create_sample_prompt(): - """Create the sample prompt file""" - prompt_file = Path('success_python.prompt') - prompt_file.write_text(SUCCESS_PYTHON_TEMPLATE) - return str(prompt_file) + _console.print(f" [green]✓[/green] All matching models already lodaed in {user_csv_path}") + + total = sum(provider_counts.values()) + _console.print(f" [green]✓[/green] {total} model(s) configured") + for provider, count in sorted(provider_counts.items()): + s = "s" if count != 1 else "" + print(f" {provider}: {count} model{s}") + + # ── Check .pddrc ───────────────────────────────────────────────────── + cwd = Path.cwd() + pddrc_path = cwd / ".pddrc" + if pddrc_path.exists(): + _console.print(f" [green]✓[/green] .pddrc detected at {pddrc_path}") + else: + print() + _console.print(" [bold].pddrc[/bold] configures where PDD puts generated code, tests, and examples.") + _console.print(" It lives in your project root and lets you define contexts for different") + _console.print(" parts of your codebase (e.g. frontend vs backend).") + print() + try: + answer = input(" Create .pddrc in this project? [y/Enter to skip] ").strip().lower() + except (EOFError, KeyboardInterrupt): + answer = "" -def show_menu(keys: Dict[str, Optional[str]], test_results: Dict[str, bool]) -> str: - """Show main menu and get user choice""" - print_colored(f"\n{create_divider()}", CYAN) - print_colored("Main Menu", CYAN, bold=True) - print_colored(f"{create_divider()}", CYAN) - - # Show current status - print_colored("Current API Key Status:", WHITE, bold=True) - # Get the actual key names from discovered keys - key_names = list(keys.keys()) - for key_name in key_names: - key_value = keys.get(key_name) - if key_value: - status = f"{CHECK_MARK} Valid" if test_results.get(key_name) else f"{CROSS_MARK} Invalid" - status_color = CYAN if test_results.get(key_name) else YELLOW + if answer in ("y", "yes"): + language = _detect_language(cwd) or "python" + content = _build_pddrc_content(language) + try: + pddrc_path.write_text(content, encoding="utf-8") + _console.print(f" [green]✓[/green] Created .pddrc at {pddrc_path} (detected: {language})") + except OSError as exc: + _console.print(f" [yellow]✗ Failed to create .pddrc: {exc}[/yellow]") else: - status = "Not configured" - status_color = YELLOW - - print(f" {key_name}: ", end="") - print_colored(status, status_color) - + _console.print(" [dim]Skipped .pddrc creation. You can create one later with pdd setup.") + + return provider_counts + + +# --------------------------------------------------------------------------- +# Step 3 — Test one model + print summary +# --------------------------------------------------------------------------- + +def _step3_test_and_summary( + found_keys: List[Tuple[str, str]], + model_summary: Dict[str, int], + cli_results=None, +) -> None: + """Test the first available cloud model and print the final summary.""" + from pdd.provider_manager import _read_csv, _get_user_csv_path, parse_api_key_vars + + user_csv_path = _get_user_csv_path() + rows = _read_csv(user_csv_path) + test_result = "Skipped (no models configured)" + + # Pick first cloud model that has all auth configured. + # Uses the pipe-delimited api_key convention: check every env var is set. + cloud_row = None + for row in rows: + api_key_field = row.get("api_key", "").strip() + env_vars = parse_api_key_vars(api_key_field) + if not env_vars: + # Empty = device flow (e.g. GitHub Copilot) — pick it + cloud_row = row + break + if all(os.getenv(v, "") for v in env_vars): + cloud_row = row + break + + if cloud_row: + test_model = cloud_row.get("model", "") + try: + import litellm # noqa: F401 + from pdd.model_tester import _run_test + import threading + import time as time_module + + sys.stdout.write(f" Testing {test_model}...") + sys.stdout.flush() + + # Run in a thread so we can print dots while waiting + test_result_holder: list = [None] + + def _do_test() -> None: + test_result_holder[0] = _run_test(cloud_row) + + t = threading.Thread(target=_do_test, daemon=True) + t.start() + + elapsed = 0.0 + while t.is_alive() and elapsed < 8.0: + t.join(timeout=1.0) + if t.is_alive(): + sys.stdout.write(".") + sys.stdout.flush() + elapsed += 1.0 + + sys.stdout.write("\n") + if t.is_alive(): + result = { + "success": False, + "duration_s": elapsed, + "cost": 0.0, + "error": "Request timed out (8s)", + "tokens": None, + } + else: + result = test_result_holder[0] or { + "success": False, + "duration_s": 0.0, + "cost": 0.0, + "error": "Unknown error", + "tokens": None, + } + + if result["success"]: + test_result = f"[green]✓[/green] {test_model} responded OK ({result['duration_s']:.1f}s)" + else: + test_result = f"[yellow]✗ {test_model} failed: {result['error']}[/yellow]" + except ImportError: + test_result = "[yellow]Skipped (litellm not installed)[/yellow]" + _console.print(f" {test_result}") + + # ── Summary ─────────────────────────────────────────────────────────── print() - print_colored("Options:", WHITE, bold=True) - print(f" 1. Re-enter API keys") - print(f" 2. Re-test current keys") - print(f" 3. Save configuration and exit") - print(f" 4. Exit without saving") + _console.print(" [bold green]PDD Setup Complete![/bold green]") print() - - while True: - try: - choice = input(f"{WHITE}Choose an option (1-4): {RESET}").strip() - if choice in ['1', '2', '3', '4']: - return choice + + # CLIs + if cli_results: + configured = [r for r in cli_results if not r.skipped and r.cli_name] + skipped = [r for r in cli_results if r.skipped] + if configured: + names = ", ".join(r.cli_name for r in configured) + no_key = [r for r in configured if not r.api_key_configured] + if no_key: + no_key_names = ", ".join(r.cli_name for r in no_key) + _console.print(f" CLI: [green]✓[/green] {names} configured ([yellow]{no_key_names} missing API key[/yellow])") else: - print_colored("Please enter 1, 2, 3, or 4", YELLOW) - except KeyboardInterrupt: - print_colored("\n\nSetup cancelled.", YELLOW) - sys.exit(0) - -def create_exit_summary(saved_files: List[str], created_pdd_dir: bool, sample_prompt_file: str, shell: str, valid_keys: Dict[str, str], init_file_updated: Optional[str] = None) -> str: - """Create comprehensive exit summary""" - summary_lines = [ - "\n\n\n\n\n", - create_fat_divider(), - "PDD Setup Complete!", - create_fat_divider(), - "", - "API Keys Configured:", - "" - ] - - # Add configured API keys information + _console.print(f" CLI: [green]✓[/green] {names} configured") + elif skipped: + _console.print(" CLI: [yellow]✗[/yellow] skipped") + else: + _console.print(" CLI: [dim]not configured[/dim]") + else: + _console.print(" CLI: [dim]not configured[/dim]") + + # API Keys + if found_keys: + _console.print(f" API Keys: [green]\u2713[/green] {len(found_keys)} found") + else: + _console.print(" API Keys: [red]\u2717[/red] 0 found") + + # Models + total_models = sum(model_summary.values()) + parts = ", ".join(f"{p}: {c}" for p, c in sorted(model_summary.items())) + if parts: + print(f" Models: {total_models} configured ({parts}) in {_get_user_csv_path()}") + else: + print(f" Models: {total_models} configured in {_get_user_csv_path()}") + + # .pddrc + pddrc_path = Path.cwd() / ".pddrc" + if pddrc_path.exists(): + _console.print(" .pddrc: [green]\u2713[/green] exists") + else: + _console.print(" .pddrc: [red]\u2717[/red] not created") + + # Test + _console.print(f" Test: {test_result}") + + # Exit summary is handled by run_setup after the options menu + + +# --------------------------------------------------------------------------- +# Exit summary — files, quick start, tips +# --------------------------------------------------------------------------- + +_FAT_DIVIDER = "\u2501" * 80 # ━ +_THIN_DIVIDER = "\u2500" * 80 # ─ +_BULLET = "\u2022" # • + +_SUCCESS_PYTHON_TEMPLATE = """\ +Write a python script to print "You did it, !!!" to the console. +Do not write anything except that message. +Capitalize the username.""" + + +def _create_sample_prompt() -> str: + """Create the sample prompt file if it doesn't exist. Returns the filename.""" + prompt_file = Path("success_python.prompt") + if not prompt_file.exists(): + prompt_file.write_text(_SUCCESS_PYTHON_TEMPLATE) + return str(prompt_file) + + +def _print_exit_summary(found_keys: List[Tuple[str, str]], cli_results=None) -> None: + """Write PDD-SETUP-SUMMARY.txt and print QUICK START + LEARN MORE to terminal.""" + from pdd.api_key_scanner import _detect_shell + + shell = _detect_shell() or "bash" + pdd_dir = Path.home() / ".pdd" + api_env_path = pdd_dir / f"api-env.{shell}" + user_csv_path = pdd_dir / "llm_model.csv" + sample_prompt = _create_sample_prompt() + + # Build valid_keys dict: key_name -> actual value + valid_keys: Dict[str, str] = {} + for key_name, _source in found_keys: + val = os.environ.get(key_name, "") + if val.strip(): + valid_keys[key_name] = val + + # Determine which files were created/configured + saved_files: List[str] = [] + if api_env_path.exists(): + saved_files.append(str(api_env_path)) + if user_csv_path.exists(): + saved_files.append(str(user_csv_path)) + + created_pdd_dir = pdd_dir.exists() + + # Check if shell init file was updated + from pdd.provider_manager import _get_shell_rc_path + rc_path = _get_shell_rc_path() + init_file_updated: Optional[str] = None + if rc_path and rc_path.exists(): + rc_content = rc_path.read_text(encoding="utf-8") + if "api-env" in rc_content: + init_file_updated = str(rc_path) + + # Source command + if shell == "sh": + source_cmd = f". {api_env_path}" + else: + source_cmd = f"source {api_env_path}" + + # ── Build full summary (saved to file) ─────────────────────────────── + lines: List[str] = [] + lines.append("") + lines.append("") + lines.append(_FAT_DIVIDER) + lines.append("PDD Setup Complete!") + lines.append(_FAT_DIVIDER) + lines.append("") + + # CLIs configured + lines.append("CLIs Configured:") + lines.append("") + if cli_results: + configured = [r for r in cli_results if not r.skipped and r.cli_name] + if configured: + for r in configured: + key_status = "API key set" if r.api_key_configured else "no API key" + lines.append(f" {r.cli_name} ({r.provider}) — {key_status}") + else: + lines.append(" None") + else: + lines.append(" None") + lines.append("") + + # API Keys configured + lines.append("API Keys Configured:") + lines.append("") if valid_keys: - for key_name, key_value in valid_keys.items(): - # Show just the first and last few characters for security - masked_key = f"{key_value[:8]}...{key_value[-4:]}" if len(key_value) > 12 else "***" - summary_lines.append(f" {key_name}: {masked_key}") - summary_lines.extend(["", "Files created and configured:", ""]) + for kn, kv in valid_keys.items(): + masked = f"{kv[:8]}...{kv[-4:]}" if len(kv) > 12 else "***" + lines.append(f" {kn}: {masked}") else: - summary_lines.extend([" None", "", "Files created and configured:", ""]) - - # File descriptions with alignment - file_descriptions = [] + lines.append(" None") + lines.append("") + + # Files created + lines.append("Files created and configured:") + lines.append("") + + file_descriptions: List[Tuple[str, str]] = [] if created_pdd_dir: file_descriptions.append(("~/.pdd/", "PDD configuration directory")) - - for file_path in saved_files: - if 'api-env.' in file_path: - file_descriptions.append((file_path, f"API environment variables ({shell} shell)")) - elif 'llm_model.csv' in file_path: - file_descriptions.append((file_path, "LLM model configuration")) - - file_descriptions.append((sample_prompt_file, "Sample prompt for testing")) - - # Add shell init file if it was updated + for fp in saved_files: + if "api-env." in fp: + file_descriptions.append((fp, f"API environment variables ({shell} shell)")) + elif "llm_model.csv" in fp: + file_descriptions.append((fp, "LLM model configuration")) + file_descriptions.append((sample_prompt, "Sample prompt for testing")) if init_file_updated: - file_descriptions.append((init_file_updated, f"Shell startup file (updated to source API environment)")) - + file_descriptions.append((init_file_updated, "Shell startup file (updated to source API environment)")) file_descriptions.append(("PDD-SETUP-SUMMARY.txt", "This summary")) - - # Find max file path length for alignment - max_path_len = max(len(path) for path, _ in file_descriptions) - - for file_path, description in file_descriptions: - summary_lines.append(f"{file_path:<{max_path_len + 2}}{description}") - - summary_lines.extend([ - "", - create_divider(), - "", - "QUICK START:", - "", - f"1. Reload your shell environment:" - ]) - - # Shell-specific source command for manual reloading - api_env_path = f"{Path.home()}/.pdd/api-env.{shell}" - # Use dot command for sh shell, source for others - if shell == 'sh': - source_cmd = f". {api_env_path}" - else: - source_cmd = f"source {api_env_path}" - - summary_lines.extend([ - f" {source_cmd}", - "", - f"2. Generate code from the sample prompt:", - f" pdd generate success_python.prompt", - "", - create_divider(), - "", - "LEARN MORE:", - "", - f"{BULLET} PDD documentation: pdd --help", - f"{BULLET} PDD website: https://promptdriven.ai/", - f"{BULLET} Discord community: https://discord.gg/Yp4RTh8bG7", - "", - "TIPS:", - "", - f"{BULLET} IMPORTANT: Reload your shell environment using the source command above", - "", - f"{BULLET} Start with simple prompts and gradually increase complexity", - f"{BULLET} Try out 'pdd test' with your prompt+code to create test(s) pdd can use to automatically verify and fix your output code", - f"{BULLET} Try out 'pdd example' with your prompt+code to create examples which help pdd do better", - "", - f"{BULLET} As you get comfortable, learn configuration settings, including the .pddrc file, PDD_GENERATE_OUTPUT_PATH, and PDD_TEST_OUTPUT_PATH", - f"{BULLET} For larger projects, use Makefiles and/or 'pdd sync'", - f"{BULLET} For ongoing substantial projects, learn about llm_model.csv and the --strength,", - f" --temperature, and --time options to optimize model cost, latency, and output quality", - "", - f"{BULLET} Use 'pdd --help' to explore all available commands", - "", - "Problems? Shout out on our Discord for help! https://discord.gg/Yp4RTh8bG7" - ]) - - return '\n'.join(summary_lines) - -def main(): - """Main setup workflow""" - # Initial greeting - print_pdd_logo() - - # Discover environment - print_colored(f"{create_divider()}", CYAN) - print_colored("Discovering local configuration...", CYAN, bold=True) - print_colored(f"{create_divider()}", CYAN) - - keys = discover_api_keys() - - # Test discovered keys - test_results = test_api_keys(keys) - - # Main interaction loop + + max_path_len = max(len(p) for p, _ in file_descriptions) if file_descriptions else 0 + for fp, desc in file_descriptions: + lines.append(f"{fp:<{max_path_len + 2}}{desc}") + + lines.append("") + lines.append(_THIN_DIVIDER) + lines.append("") + lines.append("QUICK START:") + lines.append("") + lines.append("1. Generate code from the sample prompt:") + lines.append(" pdd generate success_python.prompt") + lines.append("") + lines.append(_THIN_DIVIDER) + lines.append("") + lines.append("LEARN MORE:") + lines.append("") + lines.append(f"{_BULLET} PDD documentation: pdd --help") + lines.append(f"{_BULLET} PDD website: https://promptdriven.ai/") + lines.append(f"{_BULLET} Discord community: https://discord.gg/Yp4RTh8bG7") + lines.append("") + lines.append("TIPS:") + lines.append("") + lines.append(f"{_BULLET} Start with simple prompts and gradually increase complexity") + lines.append(f"{_BULLET} Try out 'pdd test' with your prompt+code to create test(s) pdd can use to automatically verify and fix your output code") + lines.append(f"{_BULLET} Try out 'pdd example' with your prompt+code to create examples which help pdd do better") + lines.append("") + lines.append(f"{_BULLET} As you get comfortable, learn configuration settings, including the .pddrc file, PDD_GENERATE_OUTPUT_PATH, and PDD_TEST_OUTPUT_PATH") + lines.append(f"{_BULLET} For larger projects, use Makefiles and/or 'pdd sync'") + lines.append(f"{_BULLET} For ongoing substantial projects, learn about llm_model.csv and the --strength,") + lines.append(f" --temperature, and --time options to optimize model cost, latency, and output quality") + lines.append("") + lines.append(f"{_BULLET} Use 'pdd --help' to explore all available commands") + lines.append("") + lines.append(f"Problems? Shout out on our Discord for help! https://discord.gg/Yp4RTh8bG7") + + if api_env_path.exists(): + lines.append("") + lines.append(_THIN_DIVIDER) + lines.append("") + lines.append("IMPORTANT: To use your API keys in this terminal session, run:") + lines.append(f" {source_cmd}") + lines.append("") + lines.append("New terminal windows will load keys automatically.") + + summary_text = "\n".join(lines) + + # Write PDD-SETUP-SUMMARY.txt + summary_path = Path("PDD-SETUP-SUMMARY.txt") + summary_path.write_text(summary_text, encoding="utf-8") + + # ── Print only QUICK START + LEARN MORE to terminal ────────────────── + print() + print() + _console.print("[bold green]Completed setup.[/bold green]") + print() + print(_THIN_DIVIDER) + print() + print("QUICK START:") + print() + print("1. Generate code from the sample prompt:") + print(" pdd generate success_python.prompt") + print() + print(_THIN_DIVIDER) + print() + print("LEARN MORE:") + print() + print(f"{_BULLET} PDD documentation: pdd --help") + print(f"{_BULLET} PDD website: https://promptdriven.ai/") + print(f"{_BULLET} Discord community: https://discord.gg/Yp4RTh8bG7") + print() + _console.print(f"[dim]Full summary saved to PDD-SETUP-SUMMARY.txt[/dim]") + print() + if api_env_path.exists(): + _console.print( + f"[bold yellow]Important:[/bold yellow] For updates to API keys in this terminal session, run:\n" + f"\n {source_cmd}\n\n" + f"[dim]New terminal windows will load updated keys automatically.[/dim]" + ) + print() + + +# --------------------------------------------------------------------------- +# Options menu (post-setup or fallback) +# --------------------------------------------------------------------------- + +def _run_options_menu() -> None: + """Menu loop for manual configuration options.""" + print() + + from pdd.provider_manager import add_provider_from_registry + from pdd.model_tester import test_model_interactive + while True: - choice = show_menu(keys, test_results) - - if choice == '1': - # Re-enter keys - keys = get_user_keys(keys) - test_results = test_api_keys(keys) - - elif choice == '2': - # Re-test keys - test_results = test_api_keys(keys) - - elif choice == '3': - # Save and exit - valid_keys = {k: v for k, v in keys.items() if v and test_results.get(k)} - - if not valid_keys: - print_colored("\nNo valid API keys to save!", YELLOW) - continue - - print_colored(f"\n{create_divider()}", CYAN) - print_colored("Saving configuration...", CYAN, bold=True) - print_colored(f"{create_divider()}", CYAN) - - try: - saved_files, created_pdd_dir, init_file_updated = save_configuration(valid_keys) - sample_prompt_file = create_sample_prompt() - shell = detect_shell() - - # Create and display summary - summary = create_exit_summary(saved_files, created_pdd_dir, sample_prompt_file, shell, valid_keys, init_file_updated) - - # Write summary to file - summary_file = Path('PDD-SETUP-SUMMARY.txt') - summary_file.write_text(summary) - - # Display summary with colors - lines = summary.split('\n') - for line in lines: - if line == create_fat_divider(): - print_colored(line, YELLOW, bold=True) - elif line == "PDD Setup Complete!": - print_colored(line, YELLOW, bold=True) - elif line == create_divider(): - print_colored(line, CYAN) - elif line.startswith("API Keys Configured:") or line.startswith("Files created and configured:"): - print_colored(line, CYAN, bold=True) - elif line.startswith("QUICK START:"): - print_colored(line, YELLOW, bold=True) - elif line.startswith("LEARN MORE:") or line.startswith("TIPS:"): - print_colored(line, CYAN, bold=True) - elif "IMPORTANT:" in line or "Problems?" in line: - print_colored(line, YELLOW, bold=True) - else: - print(line) - - break - - except Exception as e: - print_colored(f"Error saving configuration: {e}", YELLOW) - continue - - elif choice == '4': - # Exit without saving - print_colored("\nExiting without saving configuration.", YELLOW) + print(" Options:") + print(" 1. Add a provider") + print(" 2. Test a model") + print() + + try: + choice = input(" Select an option (Enter to finish): ").strip() + except (EOFError, KeyboardInterrupt): + print() break -if __name__ == '__main__': - try: - main() - except KeyboardInterrupt: - print_colored("\n\nSetup cancelled.", YELLOW) - sys.exit(0) \ No newline at end of file + if not choice: + break + + if choice == "1": + try: + add_provider_from_registry() + except Exception as exc: + print(f" Error adding provider: {exc}") + elif choice == "2": + try: + test_model_interactive() + except Exception as exc: + print(f" Error testing model: {exc}") + else: + _console.print(" [yellow]Invalid option. Please enter 1 or 2.[/yellow]") + + print() + + +if __name__ == "__main__": + run_setup() diff --git a/tests/test_api_key_scanner.py b/tests/test_api_key_scanner.py new file mode 100644 index 000000000..0d3e55c91 --- /dev/null +++ b/tests/test_api_key_scanner.py @@ -0,0 +1,515 @@ +# Test Plan: pdd/api_key_scanner.py +# +# Public API under test: +# - get_provider_key_names() → List[str] +# - scan_environment() → Dict[str, KeyInfo] +# - KeyInfo → dataclass(source, is_set) +# +# I. KeyInfo Data Model +# 1. test_keyinfo_fields: KeyInfo has source and is_set attributes. +# +# II. get_provider_key_names — CSV Parsing +# 2. test_key_names_csv_missing: No CSV → empty list. +# 3. test_key_names_csv_empty_file: Empty file → empty list. +# 4. test_key_names_csv_no_api_key_column: CSV without api_key header → empty list. +# 5. test_key_names_csv_all_empty_keys: All api_key values blank → empty list. +# 6. test_key_names_returns_sorted_unique: Normal CSV → sorted, deduplicated keys. +# 7. test_key_names_deduplicates_across_rows: Same key in multiple rows → single entry. +# 8. test_key_names_splits_pipe_delimited: Pipe-delimited api_key → individual keys. +# 9. test_key_names_pipe_dedup_across_rows: Pipe keys deduplicated across rows. +# 10. test_key_names_pipe_strips_whitespace: Whitespace around pipe segments stripped. +# 11. test_key_names_pipe_ignores_empty_segments: Empty pipe segments ignored. +# 12. test_key_names_malformed_csv: Malformed CSV → empty list, no crash. +# 13. test_key_names_permission_error: PermissionError → empty list, no crash. +# 14. test_key_names_unicode: Unicode in CSV → handled correctly. +# +# III. scan_environment — Early Exits +# 15. test_scan_no_models_configured: No CSV → empty dict. +# 16. test_scan_exception_returns_empty: Internal error → empty dict, no raise. +# +# IV. scan_environment — Source Detection +# 17. test_scan_detects_shell_env_key: Key in os.environ → source="shell environment". +# 18. test_scan_detects_api_env_file_key: Key in api-env.{shell} → source="~/.pdd/api-env.{shell}". +# 19. test_scan_detects_dotenv_key: Key in .env → source=".env file". +# 20. test_scan_missing_key_marked_not_set: Key absent everywhere → is_set=False. +# +# V. scan_environment — Priority Order +# 21. test_scan_dotenv_wins_over_shell: .env beats shell environment. +# 22. test_scan_shell_wins_over_api_env: Shell environment beats api-env file. +# +# VI. scan_environment — Shell-Specific Behavior +# 23. test_scan_bash_uses_bash_api_env: SHELL=/bin/bash → reads api-env.bash. +# 24. test_scan_zsh_uses_zsh_api_env: SHELL=/bin/zsh → reads api-env.zsh. +# +# VII. scan_environment — Pipe-Delimited Keys +# 25. test_scan_pipe_keys_scanned_individually: Each pipe-delimited key checked independently. +# +# VIII. scan_environment — Edge Cases +# 26. test_scan_special_chars_in_key_value: Key value with special chars → no crash. + +import csv +from pathlib import Path +from unittest import mock + +import pytest + +from pdd.api_key_scanner import ( + KeyInfo, + get_provider_key_names, + scan_environment, +) + + +# --------------------------------------------------------------------------- +# Module-level CSV fixtures +# --------------------------------------------------------------------------- + +_CSV_FIELDS = [ + "provider", "model", "input", "output", "coding_arena_elo", + "base_url", "api_key", "max_reasoning_tokens", "structured_output", + "reasoning_type", "location", +] + +SIMPLE_CSV_ROWS = [ + {"provider": "OpenAI", "model": "gpt-4", "input": "30.0", "output": "60.0", + "coding_arena_elo": "1000", "base_url": "", "api_key": "OPENAI_API_KEY", + "max_reasoning_tokens": "0", "structured_output": "True", + "reasoning_type": "", "location": ""}, + {"provider": "Anthropic", "model": "claude-3-opus", "input": "15.0", "output": "75.0", + "coding_arena_elo": "1000", "base_url": "", "api_key": "ANTHROPIC_API_KEY", + "max_reasoning_tokens": "0", "structured_output": "True", + "reasoning_type": "", "location": ""}, + {"provider": "Local", "model": "ollama/llama2", "input": "0.0", "output": "0.0", + "coding_arena_elo": "1000", "base_url": "http://localhost:11434", "api_key": "", + "max_reasoning_tokens": "0", "structured_output": "False", + "reasoning_type": "", "location": ""}, +] + +BEDROCK_CSV_ROWS = [ + {"provider": "AWS Bedrock", "model": "anthropic.claude-3", "input": "8.0", + "output": "24.0", "coding_arena_elo": "1000", "base_url": "", + "api_key": "AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME", + "max_reasoning_tokens": "0", "structured_output": "True", + "reasoning_type": "", "location": ""}, +] + +MIXED_CSV_ROWS = SIMPLE_CSV_ROWS + BEDROCK_CSV_ROWS + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _write_csv(path: Path, rows: list[dict], fieldnames: list[str] | None = None): + """Write rows to a CSV file at *path*.""" + fieldnames = fieldnames or _CSV_FIELDS + with open(path, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(rows) + + +def _setup_home(tmp_path, monkeypatch, csv_rows=None, api_env_shell=None, + api_env_content=None): + """Set up a fake ~/.pdd directory with optional CSV and api-env file. + + Returns the tmp_path (acting as $HOME). + """ + pdd_dir = tmp_path / ".pdd" + pdd_dir.mkdir(parents=True, exist_ok=True) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + + if csv_rows is not None: + _write_csv(pdd_dir / "llm_model.csv", csv_rows) + + if api_env_shell and api_env_content: + (pdd_dir / f"api-env.{api_env_shell}").write_text(api_env_content) + + return tmp_path + + +# --------------------------------------------------------------------------- +# I. KeyInfo Data Model +# --------------------------------------------------------------------------- + + +def test_keyinfo_fields(): + """KeyInfo dataclass should expose source and is_set.""" + ki = KeyInfo(source="shell environment", is_set=True) + assert ki.source == "shell environment" + assert ki.is_set is True + + ki_missing = KeyInfo(source="", is_set=False) + assert ki_missing.is_set is False + + +# --------------------------------------------------------------------------- +# II. get_provider_key_names — CSV Parsing +# --------------------------------------------------------------------------- + + +def test_key_names_csv_missing(tmp_path, monkeypatch): + """No CSV at all → empty list.""" + _setup_home(tmp_path, monkeypatch) + assert get_provider_key_names() == [] + + +def test_key_names_csv_empty_file(tmp_path, monkeypatch): + """CSV file exists but is empty → empty list.""" + home = _setup_home(tmp_path, monkeypatch) + (home / ".pdd" / "llm_model.csv").touch() + assert get_provider_key_names() == [] + + +def test_key_names_csv_no_api_key_column(tmp_path, monkeypatch): + """CSV lacks an api_key column → empty list.""" + home = _setup_home(tmp_path, monkeypatch) + csv_path = home / ".pdd" / "llm_model.csv" + with open(csv_path, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=["provider", "model"]) + writer.writeheader() + writer.writerow({"provider": "OpenAI", "model": "gpt-4"}) + assert get_provider_key_names() == [] + + +def test_key_names_csv_all_empty_keys(tmp_path, monkeypatch): + """All api_key values are blank → empty list.""" + home = _setup_home(tmp_path, monkeypatch) + csv_path = home / ".pdd" / "llm_model.csv" + with open(csv_path, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=["provider", "model", "api_key"]) + writer.writeheader() + writer.writerow({"provider": "Local", "model": "llama2", "api_key": ""}) + writer.writerow({"provider": "Local2", "model": "mistral", "api_key": " "}) + assert get_provider_key_names() == [] + + +def test_key_names_returns_sorted_unique(tmp_path, monkeypatch): + """Normal CSV → sorted, deduplicated key names (local models with no key excluded).""" + _setup_home(tmp_path, monkeypatch, csv_rows=SIMPLE_CSV_ROWS) + assert get_provider_key_names() == ["ANTHROPIC_API_KEY", "OPENAI_API_KEY"] + + +def test_key_names_deduplicates_across_rows(tmp_path, monkeypatch): + """Same key used by multiple models → appears only once.""" + home = _setup_home(tmp_path, monkeypatch) + rows = [ + {"provider": "OpenAI", "model": "gpt-4", "api_key": "OPENAI_API_KEY"}, + {"provider": "OpenAI", "model": "gpt-3.5", "api_key": "OPENAI_API_KEY"}, + {"provider": "Together", "model": "llama", "api_key": "TOGETHER_API_KEY"}, + ] + _write_csv(home / ".pdd" / "llm_model.csv", rows, + fieldnames=["provider", "model", "api_key"]) + assert get_provider_key_names() == ["OPENAI_API_KEY", "TOGETHER_API_KEY"] + + +def test_key_names_splits_pipe_delimited(tmp_path, monkeypatch): + """Pipe-delimited api_key → individual key names.""" + _setup_home(tmp_path, monkeypatch, csv_rows=BEDROCK_CSV_ROWS) + assert get_provider_key_names() == [ + "AWS_ACCESS_KEY_ID", "AWS_REGION_NAME", "AWS_SECRET_ACCESS_KEY", + ] + + +def test_key_names_pipe_dedup_across_rows(tmp_path, monkeypatch): + """Pipe keys from multiple rows are deduplicated.""" + home = _setup_home(tmp_path, monkeypatch) + rows = [ + {"provider": "AWS Bedrock", "model": "claude-3", + "api_key": "AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME"}, + {"provider": "AWS Bedrock", "model": "claude-3.5", + "api_key": "AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME"}, + {"provider": "Anthropic", "model": "claude-3", "api_key": "ANTHROPIC_API_KEY"}, + ] + _write_csv(home / ".pdd" / "llm_model.csv", rows, + fieldnames=["provider", "model", "api_key"]) + assert get_provider_key_names() == [ + "ANTHROPIC_API_KEY", "AWS_ACCESS_KEY_ID", + "AWS_REGION_NAME", "AWS_SECRET_ACCESS_KEY", + ] + + +@pytest.mark.parametrize("raw_key,expected", [ + (" KEY_A | KEY_B | KEY_C ", ["KEY_A", "KEY_B", "KEY_C"]), +]) +def test_key_names_pipe_strips_whitespace(tmp_path, monkeypatch, raw_key, expected): + """Whitespace around pipe segments is stripped.""" + home = _setup_home(tmp_path, monkeypatch) + _write_csv( + home / ".pdd" / "llm_model.csv", + [{"provider": "Test", "model": "t", "api_key": raw_key}], + fieldnames=["provider", "model", "api_key"], + ) + assert get_provider_key_names() == expected + + +@pytest.mark.parametrize("raw_key,expected", [ + ("KEY_A||KEY_B|", ["KEY_A", "KEY_B"]), +]) +def test_key_names_pipe_ignores_empty_segments(tmp_path, monkeypatch, raw_key, expected): + """Empty segments in pipe-delimited values are ignored.""" + home = _setup_home(tmp_path, monkeypatch) + _write_csv( + home / ".pdd" / "llm_model.csv", + [{"provider": "Test", "model": "t", "api_key": raw_key}], + fieldnames=["provider", "model", "api_key"], + ) + assert get_provider_key_names() == expected + + +def test_key_names_malformed_csv(tmp_path, monkeypatch): + """Malformed CSV → empty list, no crash.""" + home = _setup_home(tmp_path, monkeypatch) + (home / ".pdd" / "llm_model.csv").write_text( + 'this is not,a valid\ncsv file with"broken quotes' + ) + result = get_provider_key_names() + assert isinstance(result, list) + + +def test_key_names_permission_error(tmp_path, monkeypatch): + """PermissionError reading CSV → empty list, no crash.""" + home = _setup_home(tmp_path, monkeypatch) + csv_path = home / ".pdd" / "llm_model.csv" + csv_path.write_text("provider,model,api_key\nTest,test,KEY\n") + + original_open = open + + def _raise_on_csv(file, *args, **kwargs): + if str(file) == str(csv_path): + raise PermissionError("Access denied") + return original_open(file, *args, **kwargs) + + with mock.patch("builtins.open", side_effect=_raise_on_csv): + assert get_provider_key_names() == [] + + +def test_key_names_unicode(tmp_path, monkeypatch): + """Unicode in CSV is handled without error.""" + home = _setup_home(tmp_path, monkeypatch) + _write_csv( + home / ".pdd" / "llm_model.csv", + [{"provider": "Tëst", "model": "模型", "api_key": "UNICODE_KEY_名前"}], + fieldnames=["provider", "model", "api_key"], + ) + assert "UNICODE_KEY_名前" in get_provider_key_names() + + +# --------------------------------------------------------------------------- +# III. scan_environment — Early Exits +# --------------------------------------------------------------------------- + + +def test_scan_no_models_configured(tmp_path, monkeypatch): + """No CSV → empty dict.""" + _setup_home(tmp_path, monkeypatch) + assert scan_environment() == {} + + +def test_scan_exception_returns_empty(tmp_path, monkeypatch): + """If get_provider_key_names raises, scan_environment returns {}.""" + _setup_home(tmp_path, monkeypatch) + with mock.patch( + "pdd.api_key_scanner.get_provider_key_names", + side_effect=Exception("boom"), + ): + assert scan_environment() == {} + + +# --------------------------------------------------------------------------- +# IV. scan_environment — Source Detection +# --------------------------------------------------------------------------- + + +def test_scan_detects_shell_env_key(tmp_path, monkeypatch): + """Key set in os.environ → source='shell environment', is_set=True.""" + _setup_home(tmp_path, monkeypatch, csv_rows=SIMPLE_CSV_ROWS) + monkeypatch.setenv("OPENAI_API_KEY", "sk-test123") + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + + result = scan_environment() + + assert result["OPENAI_API_KEY"].is_set is True + assert result["OPENAI_API_KEY"].source == "shell environment" + assert result["ANTHROPIC_API_KEY"].is_set is False + + +def test_scan_detects_api_env_file_key(tmp_path, monkeypatch): + """Key in api-env file → source='~/.pdd/api-env.bash', is_set=True.""" + _setup_home( + tmp_path, monkeypatch, + csv_rows=SIMPLE_CSV_ROWS, + api_env_shell="bash", + api_env_content="export OPENAI_API_KEY=sk-from-api-env\n", + ) + monkeypatch.setenv("SHELL", "/bin/bash") + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + + result = scan_environment() + + assert result["OPENAI_API_KEY"].is_set is True + assert result["OPENAI_API_KEY"].source == "~/.pdd/api-env.bash" + assert result["ANTHROPIC_API_KEY"].is_set is False + + +def test_scan_detects_dotenv_key(tmp_path, monkeypatch): + """Key in .env file → source='.env file', is_set=True.""" + _setup_home(tmp_path, monkeypatch, csv_rows=SIMPLE_CSV_ROWS) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + + with mock.patch( + "pdd.api_key_scanner._load_dotenv_values", + return_value={"OPENAI_API_KEY": "sk-from-dotenv"}, + ): + result = scan_environment() + + assert result["OPENAI_API_KEY"].is_set is True + assert result["OPENAI_API_KEY"].source == ".env file" + + +def test_scan_missing_key_marked_not_set(tmp_path, monkeypatch): + """Key absent from all sources → is_set=False.""" + _setup_home(tmp_path, monkeypatch, csv_rows=SIMPLE_CSV_ROWS) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + + with mock.patch("pdd.api_key_scanner._load_dotenv_values", return_value={}): + result = scan_environment() + + assert result["OPENAI_API_KEY"].is_set is False + assert result["ANTHROPIC_API_KEY"].is_set is False + + +# --------------------------------------------------------------------------- +# V. scan_environment — Priority Order +# --------------------------------------------------------------------------- + + +def test_scan_dotenv_wins_over_shell(tmp_path, monkeypatch): + """.env file has higher priority than shell environment.""" + _setup_home( + tmp_path, monkeypatch, + csv_rows=SIMPLE_CSV_ROWS, + api_env_shell="bash", + api_env_content="export OPENAI_API_KEY=sk-from-api-env\n", + ) + monkeypatch.setenv("SHELL", "/bin/bash") + monkeypatch.setenv("OPENAI_API_KEY", "sk-from-shell") + + with mock.patch( + "pdd.api_key_scanner._load_dotenv_values", + return_value={"OPENAI_API_KEY": "sk-from-dotenv"}, + ): + result = scan_environment() + + assert result["OPENAI_API_KEY"].source == ".env file" + + +def test_scan_shell_wins_over_api_env(tmp_path, monkeypatch): + """Shell environment has higher priority than api-env file.""" + _setup_home( + tmp_path, monkeypatch, + csv_rows=SIMPLE_CSV_ROWS, + api_env_shell="bash", + api_env_content="export OPENAI_API_KEY=sk-from-api-env\n", + ) + monkeypatch.setenv("SHELL", "/bin/bash") + monkeypatch.setenv("OPENAI_API_KEY", "sk-from-shell") + + with mock.patch("pdd.api_key_scanner._load_dotenv_values", return_value={}): + result = scan_environment() + + assert result["OPENAI_API_KEY"].source == "shell environment" + + +# --------------------------------------------------------------------------- +# VI. scan_environment — Shell-Specific Behavior +# --------------------------------------------------------------------------- + + +def test_scan_bash_uses_bash_api_env(tmp_path, monkeypatch): + """SHELL=/bin/bash → reads api-env.bash, not api-env.zsh.""" + home = _setup_home( + tmp_path, monkeypatch, + csv_rows=SIMPLE_CSV_ROWS, + api_env_shell="bash", + api_env_content="export OPENAI_API_KEY=sk-bash\n", + ) + # Also create a zsh file with a different key + (home / ".pdd" / "api-env.zsh").write_text( + "export ANTHROPIC_API_KEY=ant-zsh\n" + ) + monkeypatch.setenv("SHELL", "/bin/bash") + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + + with mock.patch("pdd.api_key_scanner._load_dotenv_values", return_value={}): + result = scan_environment() + + assert result["OPENAI_API_KEY"].is_set is True + assert result["OPENAI_API_KEY"].source == "~/.pdd/api-env.bash" + # zsh file should NOT be consulted when shell is bash + assert result["ANTHROPIC_API_KEY"].is_set is False + + +def test_scan_zsh_uses_zsh_api_env(tmp_path, monkeypatch): + """SHELL=/bin/zsh → reads api-env.zsh.""" + _setup_home( + tmp_path, monkeypatch, + csv_rows=SIMPLE_CSV_ROWS, + api_env_shell="zsh", + api_env_content="export ANTHROPIC_API_KEY=ant-zsh\n", + ) + monkeypatch.setenv("SHELL", "/bin/zsh") + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + + with mock.patch("pdd.api_key_scanner._load_dotenv_values", return_value={}): + result = scan_environment() + + assert result["ANTHROPIC_API_KEY"].is_set is True + assert result["ANTHROPIC_API_KEY"].source == "~/.pdd/api-env.zsh" + + +# --------------------------------------------------------------------------- +# VII. scan_environment — Pipe-Delimited Keys +# --------------------------------------------------------------------------- + + +def test_scan_pipe_keys_scanned_individually(tmp_path, monkeypatch): + """Each segment of a pipe-delimited api_key is checked independently.""" + _setup_home(tmp_path, monkeypatch, csv_rows=BEDROCK_CSV_ROWS) + monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIA...") + monkeypatch.setenv("AWS_REGION_NAME", "us-east-1") + monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False) + + with mock.patch("pdd.api_key_scanner._load_dotenv_values", return_value={}): + result = scan_environment() + + assert result["AWS_ACCESS_KEY_ID"].is_set is True + assert result["AWS_ACCESS_KEY_ID"].source == "shell environment" + assert result["AWS_REGION_NAME"].is_set is True + assert result["AWS_SECRET_ACCESS_KEY"].is_set is False + + +# --------------------------------------------------------------------------- +# VIII. scan_environment — Edge Cases +# --------------------------------------------------------------------------- + + +def test_scan_special_chars_in_key_value(tmp_path, monkeypatch): + """Keys with special-character values don't crash the scanner.""" + home = _setup_home(tmp_path, monkeypatch) + _write_csv( + home / ".pdd" / "llm_model.csv", + [{"provider": "Test", "model": "t", "api_key": "MY_SPECIAL_KEY"}], + fieldnames=["provider", "model", "api_key"], + ) + monkeypatch.setenv("MY_SPECIAL_KEY", "value_with_$pecial_chars") + monkeypatch.setenv("SHELL", "/bin/bash") + + with mock.patch("pdd.api_key_scanner._load_dotenv_values", return_value={}): + result = scan_environment() + + assert result["MY_SPECIAL_KEY"].is_set is True diff --git a/tests/test_cli_detector.py b/tests/test_cli_detector.py new file mode 100644 index 000000000..6158f0cd8 --- /dev/null +++ b/tests/test_cli_detector.py @@ -0,0 +1,774 @@ +"""Tests for pdd/cli_detector.py + +Behavioral tests driven through the two public entry points: + - detect_and_bootstrap_cli() + - detect_cli_tools() + +Test plan +--------- + +1. CliBootstrapResult data model + 1.1 Defaults to empty strings and False flags + 1.2 Skipped result has skipped=True, rest defaults + +2. detect_and_bootstrap_cli — Selection table & input parsing + 2.1 Table shows all three CLIs with install/key status + 2.2 Selecting "1" picks Claude CLI + 2.3 Comma-separated input "1,3" selects multiple CLIs + 2.4 Spaces in input "1, 3" are tolerated + 2.5 Duplicate input "1,1,3" is deduplicated + 2.6 Empty input defaults to best available (installed+key) + 2.7 Empty input defaults to installed-only when no keys set + 2.8 Invalid input falls back to default + 2.9 "q" quits with skipped result + 2.10 "n" quits with skipped result + +3. detect_and_bootstrap_cli — Install flow + 3.1 Already-installed CLI skips install prompt + 3.2 Not-installed CLI prompts for install, user accepts, npm succeeds + 3.3 Not-installed CLI, user accepts install but npm missing + 3.4 Not-installed CLI, install fails (non-zero exit) + 3.5 Not-installed CLI, user declines install → skipped + 3.6 Install succeeds but binary not found on PATH afterwards + +4. detect_and_bootstrap_cli — API key flow + 4.1 Key already set skips prompt + 4.2 Key not set, user provides key → saved to file and os.environ + 4.3 Key not set, user skips (Enter) → api_key_configured=False + 4.4 Anthropic skip shows subscription auth note + 4.5 Non-anthropic skip shows limited functionality note + 4.6 Google provider checks both GOOGLE_API_KEY and GEMINI_API_KEY + +5. detect_and_bootstrap_cli — CLI test step + 5.1 CLI test always runs after install+key steps + 5.2 --version success shows version output + 5.3 --version fails, falls back to --help + +6. detect_and_bootstrap_cli — Interrupt handling + 6.1 KeyboardInterrupt on selection prompt → skipped + 6.2 EOFError on selection prompt → skipped + 6.3 KeyboardInterrupt during per-CLI processing → stops remaining + +7. detect_and_bootstrap_cli — API key persistence + 7.1 Key saved to ~/.pdd/api-env.{shell} with correct export syntax + 7.2 Source line added to shell RC file + 7.3 Fish shell uses set -gx syntax and fish source line + 7.4 Duplicate keys are deduplicated in api-env file + +8. detect_cli_tools — Legacy detection + 8.1 Shows header with command context + 8.2 Found CLI shows checkmark and path + 8.3 Missing CLI shows X + 8.4 Key set but CLI missing → suggests install + 8.5 All CLIs installed with keys → success message + 8.6 No CLIs found → quick start message +""" + +from __future__ import annotations + +import os +import subprocess +from pathlib import Path +from unittest import mock + +import pytest + +from pdd.cli_detector import ( + CliBootstrapResult, + detect_and_bootstrap_cli, + detect_cli_tools, +) + + +# --------------------------------------------------------------------------- +# Module-level constants — realistic scenarios for test fixtures +# --------------------------------------------------------------------------- + +# Provider/CLI status: all three CLIs installed with keys +ALL_INSTALLED = { + "claude": "/usr/local/bin/claude", + "codex": "/usr/local/bin/codex", + "gemini": "/usr/local/bin/gemini", +} + +ALL_KEYS = { + "ANTHROPIC_API_KEY": "sk-ant-test", + "OPENAI_API_KEY": "sk-oai-test", + "GEMINI_API_KEY": "gm-test", + "GOOGLE_API_KEY": "gm-test", +} + +# Only Claude installed with key +CLAUDE_ONLY = {"claude": "/usr/local/bin/claude"} +CLAUDE_KEY = {"ANTHROPIC_API_KEY": "sk-ant-test"} + +# No CLIs installed, no keys +NOTHING = {} + + +# --------------------------------------------------------------------------- +# Helper: capture output from detect_and_bootstrap_cli +# --------------------------------------------------------------------------- + +def _run_bootstrap_capture( + monkeypatch, + tmp_path: Path, + user_inputs: list[str], + *, + cli_paths: dict[str, str] | None = None, + env_keys: dict[str, str] | None = None, + npm_available: bool = False, + install_succeeds: bool = False, + install_then_found: str | None = None, + version_output: str = "1.0.0", + version_returncode: int = 0, +) -> tuple[str, list[CliBootstrapResult]]: + """Run detect_and_bootstrap_cli with mocked boundaries. + + Args: + monkeypatch: pytest monkeypatch fixture + tmp_path: temporary directory for home + user_inputs: sequence of strings for _prompt_input + cli_paths: mapping of cli_name -> path (None = not found) + env_keys: environment variables to set + npm_available: whether npm is on PATH + install_succeeds: whether subprocess install returns 0 + install_then_found: path to return after install succeeds (None = not found) + version_output: stdout from --version + version_returncode: exit code from --version + + Returns: + (captured_output, results) tuple + """ + cli_paths = cli_paths or {} + env_keys = env_keys or {} + + # Clean environment + for var in ("ANTHROPIC_API_KEY", "OPENAI_API_KEY", "GOOGLE_API_KEY", + "GEMINI_API_KEY", "SHELL"): + monkeypatch.delenv(var, raising=False) + for k, v in env_keys.items(): + monkeypatch.setenv(k, v) + monkeypatch.setenv("SHELL", "/bin/bash") + + # Mock Path.home to tmp_path + monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path)) + + # Create shell RC file + rc_file = tmp_path / ".bashrc" + if not rc_file.exists(): + rc_file.write_text("# existing\n") + + # Mock user input + input_iter = iter(user_inputs) + monkeypatch.setattr( + "pdd.cli_detector._prompt_input", + lambda _prompt="": next(input_iter), + ) + + # Track _find_cli_binary calls to simulate post-install discovery + find_call_count = {} + + def mock_find_cli_binary(name): + find_call_count[name] = find_call_count.get(name, 0) + 1 + if name in cli_paths: + return cli_paths[name] + # After install, return install_then_found for the CLI being installed + if install_then_found and find_call_count[name] > 1: + return install_then_found + return None + + # Mock subprocess.run for both install and --version/--help + def mock_subprocess_run(cmd, **kwargs): + result = mock.MagicMock() + if kwargs.get("shell"): + # Install command + result.returncode = 0 if install_succeeds else 1 + result.stdout = "" + result.stderr = "" + else: + # CLI test (--version or --help) + result.returncode = version_returncode + result.stdout = version_output + result.stderr = "" + return result + + # Mock npm availability + def mock_shutil_which(cmd): + if cmd == "npm": + return "/usr/bin/npm" if npm_available else None + return cli_paths.get(cmd) + + # Capture console output + printed = [] + + def capture_print(*args, **kwargs): + printed.append(" ".join(str(a) for a in args)) + + # Apply mocks + with mock.patch("pdd.cli_detector._find_cli_binary", side_effect=mock_find_cli_binary), \ + mock.patch("pdd.cli_detector.console") as mock_console, \ + mock.patch("subprocess.run", side_effect=mock_subprocess_run), \ + mock.patch("shutil.which", side_effect=mock_shutil_which), \ + mock.patch("pdd.setup_tool._print_step_banner"): + + mock_console.print.side_effect = capture_print + results = detect_and_bootstrap_cli() + + output = "\n".join(printed) + return output, results + + +# --------------------------------------------------------------------------- +# Helper: capture output from detect_cli_tools +# --------------------------------------------------------------------------- + +def _run_legacy_capture( + monkeypatch, + cli_paths: dict[str, str] | None = None, + env_keys: dict[str, str] | None = None, +) -> str: + """Run detect_cli_tools with mocked boundaries, return captured output.""" + cli_paths = cli_paths or {} + env_keys = env_keys or {} + + for var in ("ANTHROPIC_API_KEY", "OPENAI_API_KEY", "GOOGLE_API_KEY", + "GEMINI_API_KEY"): + monkeypatch.delenv(var, raising=False) + for k, v in env_keys.items(): + monkeypatch.setenv(k, v) + + def mock_which(cmd): + return cli_paths.get(cmd) + + printed = [] + + def capture_print(*args, **kwargs): + printed.append(" ".join(str(a) for a in args)) + + with mock.patch("pdd.cli_detector._which", side_effect=mock_which), \ + mock.patch("pdd.cli_detector.console") as mock_console, \ + mock.patch("pdd.cli_detector._npm_available", return_value=False): + mock_console.print.side_effect = capture_print + detect_cli_tools() + + return "\n".join(printed) + + +# =================================================================== +# 1. CliBootstrapResult data model +# =================================================================== + + +class TestCliBootstrapResult: + """Pure contract tests for the result dataclass.""" + + def test_defaults_to_empty(self): + r = CliBootstrapResult() + assert r.cli_name == "" + assert r.provider == "" + assert r.cli_path == "" + assert r.api_key_configured is False + assert r.skipped is False + + def test_skipped_result(self): + r = CliBootstrapResult(skipped=True) + assert r.skipped is True + assert r.cli_name == "" + + def test_populated_result(self): + r = CliBootstrapResult( + cli_name="claude", provider="anthropic", + cli_path="/usr/local/bin/claude", api_key_configured=True, + ) + assert r.cli_name == "claude" + assert r.provider == "anthropic" + assert r.cli_path == "/usr/local/bin/claude" + assert r.api_key_configured is True + assert r.skipped is False + + +# =================================================================== +# 2. detect_and_bootstrap_cli — Selection table & input parsing +# =================================================================== + + +class TestBootstrapSelectionTable: + """Tests for the numbered table display and user input parsing.""" + + def test_table_shows_all_three_clis(self, monkeypatch, tmp_path): + output, _ = _run_bootstrap_capture( + monkeypatch, tmp_path, ["1"], + cli_paths=CLAUDE_ONLY, env_keys=CLAUDE_KEY, + ) + assert "Claude CLI" in output + assert "Codex CLI" in output + assert "Gemini CLI" in output + + def test_table_shows_install_and_key_status(self, monkeypatch, tmp_path): + output, _ = _run_bootstrap_capture( + monkeypatch, tmp_path, ["1"], + cli_paths=CLAUDE_ONLY, env_keys=CLAUDE_KEY, + ) + # Claude is installed with key + assert "Found at" in output + assert "ANTHROPIC_API_KEY" in output + # Others are not installed + assert "Not found" in output + + def test_select_single_cli(self, monkeypatch, tmp_path): + _, results = _run_bootstrap_capture( + monkeypatch, tmp_path, ["1"], + cli_paths=CLAUDE_ONLY, env_keys=CLAUDE_KEY, + ) + assert len(results) == 1 + assert results[0].cli_name == "claude" + assert results[0].provider == "anthropic" + assert results[0].api_key_configured is True + + def test_multi_select_comma_separated(self, monkeypatch, tmp_path): + _, results = _run_bootstrap_capture( + monkeypatch, tmp_path, ["1,3"], + cli_paths=ALL_INSTALLED, env_keys=ALL_KEYS, + ) + assert len(results) == 2 + assert results[0].cli_name == "claude" + assert results[1].cli_name == "gemini" + + def test_multi_select_with_spaces(self, monkeypatch, tmp_path): + _, results = _run_bootstrap_capture( + monkeypatch, tmp_path, ["1, 3"], + cli_paths=ALL_INSTALLED, env_keys=ALL_KEYS, + ) + assert len(results) == 2 + assert results[0].cli_name == "claude" + assert results[1].cli_name == "gemini" + + def test_duplicate_input_deduplicated(self, monkeypatch, tmp_path): + _, results = _run_bootstrap_capture( + monkeypatch, tmp_path, ["1,1,3"], + cli_paths=ALL_INSTALLED, env_keys=ALL_KEYS, + ) + assert len(results) == 2 + assert results[0].cli_name == "claude" + assert results[1].cli_name == "gemini" + + def test_empty_input_defaults_to_installed_with_key(self, monkeypatch, tmp_path): + """Empty input → default to first CLI that is installed AND has a key.""" + output, results = _run_bootstrap_capture( + monkeypatch, tmp_path, [""], + cli_paths={"gemini": "/usr/bin/gemini"}, + env_keys={"GEMINI_API_KEY": "gm-test"}, + ) + assert len(results) == 1 + assert results[0].cli_name == "gemini" + assert "Defaulting" in output + + def test_empty_input_defaults_to_installed_when_no_keys(self, monkeypatch, tmp_path): + """No keys set → default to first installed CLI.""" + output, results = _run_bootstrap_capture( + monkeypatch, tmp_path, ["", ""], # selection + key prompt skip + cli_paths={"codex": "/usr/bin/codex"}, + ) + assert len(results) == 1 + assert results[0].cli_name == "codex" + + def test_invalid_input_falls_back_to_default(self, monkeypatch, tmp_path): + output, results = _run_bootstrap_capture( + monkeypatch, tmp_path, ["xyz"], + cli_paths=CLAUDE_ONLY, env_keys=CLAUDE_KEY, + ) + assert len(results) == 1 + assert "Invalid input" in output or "Defaulting" in output + + @pytest.mark.parametrize("quit_input", ["q", "n"]) + def test_quit_returns_skipped(self, monkeypatch, tmp_path, quit_input): + _, results = _run_bootstrap_capture( + monkeypatch, tmp_path, [quit_input], + cli_paths=CLAUDE_ONLY, env_keys=CLAUDE_KEY, + ) + assert len(results) == 1 + assert results[0].skipped is True + + +# =================================================================== +# 3. detect_and_bootstrap_cli — Install flow +# =================================================================== + + +class TestBootstrapInstallFlow: + """Tests for CLI installation behavior.""" + + def test_installed_cli_skips_install_prompt(self, monkeypatch, tmp_path): + """If CLI is already found, no install prompt is shown.""" + output, results = _run_bootstrap_capture( + monkeypatch, tmp_path, ["1"], + cli_paths=CLAUDE_ONLY, env_keys=CLAUDE_KEY, + ) + assert results[0].cli_name == "claude" + assert "Install now?" not in output + + def test_not_installed_user_accepts_npm_succeeds(self, monkeypatch, tmp_path): + """User accepts install, npm present, install succeeds.""" + _, results = _run_bootstrap_capture( + monkeypatch, tmp_path, + ["1", "y", ""], # select, accept install, skip key + npm_available=True, + install_succeeds=True, + install_then_found="/usr/local/bin/claude", + ) + assert len(results) == 1 + assert results[0].cli_name == "claude" + assert results[0].cli_path == "/usr/local/bin/claude" + assert results[0].skipped is False + + def test_not_installed_npm_missing(self, monkeypatch, tmp_path): + """User accepts install but npm is not available.""" + output, results = _run_bootstrap_capture( + monkeypatch, tmp_path, + ["1", "y"], # select, accept install + npm_available=False, + ) + assert results[0].skipped is True + assert "npm" in output.lower() + + def test_not_installed_install_fails(self, monkeypatch, tmp_path): + """Install command exits non-zero.""" + output, results = _run_bootstrap_capture( + monkeypatch, tmp_path, + ["1", "y"], # select, accept install + npm_available=True, + install_succeeds=False, + ) + assert results[0].skipped is True + assert "failed" in output.lower() or "manually" in output.lower() + + def test_not_installed_user_declines(self, monkeypatch, tmp_path): + """User declines install.""" + output, results = _run_bootstrap_capture( + monkeypatch, tmp_path, + ["1", "n"], # select, decline install + ) + assert results[0].skipped is True + assert "not configured" in output.lower() + + def test_install_succeeds_but_binary_not_found(self, monkeypatch, tmp_path): + """Install exits 0 but binary still not on PATH.""" + output, results = _run_bootstrap_capture( + monkeypatch, tmp_path, + ["1", "y"], # select, accept install + npm_available=True, + install_succeeds=True, + install_then_found=None, # not found after install + ) + assert results[0].skipped is True + assert "not found on PATH" in output or "not configured" in output.lower() + + +# =================================================================== +# 4. detect_and_bootstrap_cli — API key flow +# =================================================================== + + +class TestBootstrapApiKeyFlow: + """Tests for API key configuration behavior.""" + + def test_key_already_set_skips_prompt(self, monkeypatch, tmp_path): + """If key is already in env, no prompt for it.""" + output, results = _run_bootstrap_capture( + monkeypatch, tmp_path, ["1"], + cli_paths=CLAUDE_ONLY, env_keys=CLAUDE_KEY, + ) + assert results[0].api_key_configured is True + assert "Enter your" not in output + + def test_key_not_set_user_provides(self, monkeypatch, tmp_path): + """User provides key when prompted.""" + _, results = _run_bootstrap_capture( + monkeypatch, tmp_path, + ["1", "sk-new-key"], # select, provide key + cli_paths=CLAUDE_ONLY, + ) + assert results[0].api_key_configured is True + assert os.environ.get("ANTHROPIC_API_KEY") == "sk-new-key" + + def test_key_saved_to_file(self, monkeypatch, tmp_path): + """Provided key is written to ~/.pdd/api-env.bash.""" + _run_bootstrap_capture( + monkeypatch, tmp_path, + ["1", "sk-saved-key"], + cli_paths=CLAUDE_ONLY, + ) + api_env = tmp_path / ".pdd" / "api-env.bash" + assert api_env.exists() + content = api_env.read_text() + assert "export ANTHROPIC_API_KEY=sk-saved-key" in content + + def test_source_line_added_to_rc(self, monkeypatch, tmp_path): + """Source line is added to ~/.bashrc.""" + _run_bootstrap_capture( + monkeypatch, tmp_path, + ["1", "sk-test"], + cli_paths=CLAUDE_ONLY, + ) + rc_content = (tmp_path / ".bashrc").read_text() + api_env_path = str(tmp_path / ".pdd" / "api-env.bash") + assert f"source {api_env_path}" in rc_content + + def test_key_not_set_user_skips(self, monkeypatch, tmp_path): + """User presses Enter to skip key.""" + _, results = _run_bootstrap_capture( + monkeypatch, tmp_path, + ["1", ""], # select, skip key + cli_paths=CLAUDE_ONLY, + ) + assert results[0].api_key_configured is False + + def test_anthropic_skip_shows_subscription_note(self, monkeypatch, tmp_path): + """Skipping Anthropic key mentions subscription auth.""" + output, _ = _run_bootstrap_capture( + monkeypatch, tmp_path, + ["1", ""], # select, skip key + cli_paths=CLAUDE_ONLY, + ) + assert "subscription" in output.lower() + + def test_non_anthropic_skip_shows_limited_note(self, monkeypatch, tmp_path): + """Skipping non-Anthropic key mentions limited functionality.""" + output, _ = _run_bootstrap_capture( + monkeypatch, tmp_path, + ["2", ""], # select codex, skip key + cli_paths={"codex": "/usr/bin/codex"}, + ) + assert "limited functionality" in output.lower() + + def test_google_checks_gemini_key(self, monkeypatch, tmp_path): + """Google provider recognizes GEMINI_API_KEY.""" + _, results = _run_bootstrap_capture( + monkeypatch, tmp_path, ["3"], + cli_paths={"gemini": "/usr/bin/gemini"}, + env_keys={"GEMINI_API_KEY": "gm-test"}, + ) + assert results[0].api_key_configured is True + + def test_google_checks_google_api_key(self, monkeypatch, tmp_path): + """Google provider recognizes GOOGLE_API_KEY.""" + _, results = _run_bootstrap_capture( + monkeypatch, tmp_path, ["3"], + cli_paths={"gemini": "/usr/bin/gemini"}, + env_keys={"GOOGLE_API_KEY": "gm-test"}, + ) + assert results[0].api_key_configured is True + + +# =================================================================== +# 5. detect_and_bootstrap_cli — CLI test step +# =================================================================== + + +class TestBootstrapCliTest: + """Tests for the forced CLI verification step.""" + + def test_cli_test_runs_after_setup(self, monkeypatch, tmp_path): + """CLI test always runs, output includes version info.""" + output, _ = _run_bootstrap_capture( + monkeypatch, tmp_path, ["1"], + cli_paths=CLAUDE_ONLY, env_keys=CLAUDE_KEY, + version_output="2.5.0", + ) + assert "Testing" in output + assert "2.5.0" in output or "version" in output.lower() + + +# =================================================================== +# 6. detect_and_bootstrap_cli — Interrupt handling +# =================================================================== + + +class TestBootstrapInterrupts: + """Tests for graceful interrupt handling.""" + + def test_keyboard_interrupt_on_selection(self, monkeypatch, tmp_path): + """KeyboardInterrupt at selection prompt → skipped result.""" + for var in ("ANTHROPIC_API_KEY", "OPENAI_API_KEY", "GOOGLE_API_KEY", + "GEMINI_API_KEY"): + monkeypatch.delenv(var, raising=False) + monkeypatch.setenv("SHELL", "/bin/bash") + monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path)) + + monkeypatch.setattr( + "pdd.cli_detector._prompt_input", + mock.MagicMock(side_effect=KeyboardInterrupt), + ) + + with mock.patch("pdd.cli_detector._find_cli_binary", return_value=None), \ + mock.patch("pdd.cli_detector.console"), \ + mock.patch("pdd.setup_tool._print_step_banner"): + results = detect_and_bootstrap_cli() + + assert len(results) == 1 + assert results[0].skipped is True + + def test_eof_on_selection(self, monkeypatch, tmp_path): + """EOFError at selection prompt → skipped result.""" + for var in ("ANTHROPIC_API_KEY", "OPENAI_API_KEY", "GOOGLE_API_KEY", + "GEMINI_API_KEY"): + monkeypatch.delenv(var, raising=False) + monkeypatch.setenv("SHELL", "/bin/bash") + monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path)) + + monkeypatch.setattr( + "pdd.cli_detector._prompt_input", + mock.MagicMock(side_effect=EOFError), + ) + + with mock.patch("pdd.cli_detector._find_cli_binary", return_value=None), \ + mock.patch("pdd.cli_detector.console"), \ + mock.patch("pdd.setup_tool._print_step_banner"): + results = detect_and_bootstrap_cli() + + assert len(results) == 1 + assert results[0].skipped is True + + +# =================================================================== +# 7. detect_and_bootstrap_cli — API key persistence (shell variants) +# =================================================================== + + +class TestApiKeyPersistence: + """Tests for key file format across shell types.""" + + def test_fish_shell_syntax(self, monkeypatch, tmp_path): + """Fish shell uses set -gx and fish source syntax.""" + monkeypatch.setenv("SHELL", "/usr/bin/fish") + monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path)) + + # Create fish config + fish_config = tmp_path / ".config" / "fish" / "config.fish" + fish_config.parent.mkdir(parents=True) + fish_config.write_text("") + + for var in ("ANTHROPIC_API_KEY", "OPENAI_API_KEY", "GOOGLE_API_KEY", + "GEMINI_API_KEY"): + monkeypatch.delenv(var, raising=False) + + input_iter = iter(["1", "sk-fish-key"]) + monkeypatch.setattr( + "pdd.cli_detector._prompt_input", + lambda _prompt="": next(input_iter), + ) + + with mock.patch("pdd.cli_detector._find_cli_binary") as mock_find, \ + mock.patch("pdd.cli_detector.console"), \ + mock.patch("subprocess.run") as mock_run, \ + mock.patch("shutil.which", return_value=None), \ + mock.patch("pdd.setup_tool._print_step_banner"): + mock_find.side_effect = lambda n: "/usr/bin/claude" if n == "claude" else None + mock_run.return_value = mock.MagicMock(returncode=0, stdout="1.0", stderr="") + detect_and_bootstrap_cli() + + api_env = tmp_path / ".pdd" / "api-env.fish" + assert api_env.exists() + content = api_env.read_text() + assert "set -gx ANTHROPIC_API_KEY sk-fish-key" in content + + rc_content = fish_config.read_text() + assert "test -f" in rc_content + assert "and source" in rc_content + + def test_duplicate_key_deduplicated(self, monkeypatch, tmp_path): + """Saving the same key twice doesn't create duplicate lines.""" + monkeypatch.setenv("SHELL", "/bin/bash") + monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path)) + (tmp_path / ".bashrc").write_text("") + + for var in ("ANTHROPIC_API_KEY", "OPENAI_API_KEY", "GOOGLE_API_KEY", + "GEMINI_API_KEY"): + monkeypatch.delenv(var, raising=False) + + # First save + input_iter = iter(["1", "sk-first"]) + monkeypatch.setattr( + "pdd.cli_detector._prompt_input", + lambda _prompt="": next(input_iter), + ) + with mock.patch("pdd.cli_detector._find_cli_binary") as mock_find, \ + mock.patch("pdd.cli_detector.console"), \ + mock.patch("subprocess.run") as mock_run, \ + mock.patch("shutil.which", return_value=None), \ + mock.patch("pdd.setup_tool._print_step_banner"): + mock_find.side_effect = lambda n: "/usr/bin/claude" if n == "claude" else None + mock_run.return_value = mock.MagicMock(returncode=0, stdout="1.0", stderr="") + detect_and_bootstrap_cli() + + # Second save (overwrite key) + monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) + input_iter2 = iter(["1", "sk-second"]) + monkeypatch.setattr( + "pdd.cli_detector._prompt_input", + lambda _prompt="": next(input_iter2), + ) + with mock.patch("pdd.cli_detector._find_cli_binary") as mock_find, \ + mock.patch("pdd.cli_detector.console"), \ + mock.patch("subprocess.run") as mock_run, \ + mock.patch("shutil.which", return_value=None), \ + mock.patch("pdd.setup_tool._print_step_banner"): + mock_find.side_effect = lambda n: "/usr/bin/claude" if n == "claude" else None + mock_run.return_value = mock.MagicMock(returncode=0, stdout="1.0", stderr="") + detect_and_bootstrap_cli() + + api_env = tmp_path / ".pdd" / "api-env.bash" + content = api_env.read_text() + # Should have only one export line for ANTHROPIC_API_KEY + export_lines = [l for l in content.splitlines() + if "ANTHROPIC_API_KEY" in l] + assert len(export_lines) == 1 + assert "sk-second" in export_lines[0] + + +# =================================================================== +# 8. detect_cli_tools — Legacy detection +# =================================================================== + + +class TestDetectCliToolsLegacy: + """Tests for the legacy detect_cli_tools function.""" + + def test_shows_header(self, monkeypatch): + output = _run_legacy_capture(monkeypatch) + assert "Agentic CLI Tool Detection" in output + assert "pdd fix" in output + + def test_found_cli_shows_checkmark_and_path(self, monkeypatch): + output = _run_legacy_capture( + monkeypatch, + cli_paths={"claude": "/usr/local/bin/claude"}, + env_keys=CLAUDE_KEY, + ) + assert "Claude CLI" in output + assert "Found at" in output or "/usr/local/bin/claude" in output + + def test_missing_cli_shows_not_found(self, monkeypatch): + output = _run_legacy_capture(monkeypatch) + assert "Not found" in output + + def test_key_set_but_cli_missing_suggests_install(self, monkeypatch): + output = _run_legacy_capture( + monkeypatch, + env_keys={"OPENAI_API_KEY": "sk-test"}, + ) + assert "OPENAI_API_KEY" in output + assert "not installed" in output.lower() or "install" in output.lower() + + def test_all_installed_with_keys_shows_success(self, monkeypatch): + output = _run_legacy_capture( + monkeypatch, + cli_paths=ALL_INSTALLED, + env_keys=ALL_KEYS, + ) + assert "All CLI tools" in output + + def test_no_clis_found_shows_quick_start(self, monkeypatch): + output = _run_legacy_capture(monkeypatch) + assert "No CLI tools found" in output or "Quick start" in output diff --git a/tests/test_model_tester.py b/tests/test_model_tester.py new file mode 100644 index 000000000..dd691308a --- /dev/null +++ b/tests/test_model_tester.py @@ -0,0 +1,490 @@ +# Test Plan: +# I. No-CSV Edge Cases (test_model_interactive exits early) +# 1. test_no_csv_file: No ~/.pdd/llm_model.csv → prints guidance message and returns. +# 2. test_empty_csv: CSV exists but has no data rows → same early exit. +# 3. test_csv_missing_required_columns: CSV exists but lacks provider/model/api_key → early exit. +# +# II. Interactive Flow — User Input Handling +# 4. test_quit_with_empty_input: User presses Enter immediately → exits cleanly. +# 5. test_quit_with_q: User types "q" → exits cleanly. +# 6. test_invalid_input_then_quit: User types "abc", sees error, then quits. +# 7. test_out_of_range_then_quit: User types "99" (out of range), sees error, then quits. +# 8. test_eof_exits_gracefully: EOFError during input → exits without crashing. +# +# III. Successful Model Test (end-to-end through test_model_interactive) +# 9. test_successful_test_shows_ok: User picks model 1, LLM returns OK → output shows ✓ OK with cost/tokens. +# 10. test_successful_test_passes_api_key_for_single_var: Single api_key var → passed as api_key= to litellm. +# 11. test_multi_var_provider_no_api_key_kwarg: Bedrock (pipe-delimited) → api_key= NOT passed to litellm. +# 12. test_device_flow_no_api_key_kwarg: Empty api_key → api_key= NOT passed to litellm. +# +# IV. Failed Model Test (end-to-end through test_model_interactive) +# 13. test_auth_error_shows_classified_message: LLM raises 401 → output shows "Authentication error". +# 14. test_connection_refused_shows_local_server_hint: LLM raises connection error → output suggests local server. +# +# V. Diagnostics Displayed Before Test +# 15. test_diagnostics_show_key_found: API key in env → output includes "✓ Found". +# 16. test_diagnostics_show_key_missing: API key not in env → output includes "✗ Not found". +# 17. test_diagnostics_show_base_url_for_lm_studio: LM Studio model → base URL shown in output. +# 18. test_diagnostics_bedrock_checks_all_vars: Bedrock model → all three env vars checked in output. +# 19. test_diagnostics_vertex_bad_creds_file: GOOGLE_APPLICATION_CREDENTIALS path invalid → warns in output. +# 20. test_diagnostics_device_flow_no_key_needed: Empty api_key → output indicates no key needed. +# +# VI. Session Persistence +# 21. test_results_persist_across_picks: User tests model 1 then model 2 → both results shown in table. +# +# VII. CSV Loading Normalization +# 22. test_csv_normalizes_nan_strings_and_bad_numerics: NaN strings → "", bad numbers → 0.0. +# +# VIII. Pure Function Contracts +# 23-28. _classify_error: auth, connection refused, not found, timeout, rate limit, generic. +# 29-30. _calculate_cost: basic math, zero tokens. + +"""Tests for model_tester.py — behavioral tests driven through test_model_interactive().""" + +import pytest +from unittest.mock import MagicMock, patch + +from pdd import model_tester + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_csv(tmp_path, content): + """Write a CSV file at the expected ~/.pdd/llm_model.csv location.""" + csv_file = tmp_path / ".pdd" / "llm_model.csv" + csv_file.parent.mkdir(parents=True, exist_ok=True) + csv_file.write_text(content) + return csv_file + + +def _mock_litellm_success(prompt_tokens=10, completion_tokens=5): + """Return a mock litellm response with token usage.""" + usage = MagicMock() + usage.prompt_tokens = prompt_tokens + usage.completion_tokens = completion_tokens + response = MagicMock() + response.usage = usage + return response + + +def _run_interactive(tmp_path, csv_content, user_inputs, monkeypatch, + mock_completion=None, env_vars=None): + """Run test_model_interactive with mocked CSV, user input, and litellm. + + Returns the captured console output as a string. + """ + _make_csv(tmp_path, csv_content) + + for k, v in (env_vars or {}).items(): + monkeypatch.setenv(k, v) + + input_iter = iter(user_inputs) + mock_console_input = MagicMock(side_effect=input_iter) + + if mock_completion is None: + mock_completion = MagicMock(return_value=_mock_litellm_success()) + + with patch.object(model_tester.Path, "home", return_value=tmp_path), \ + patch.object(model_tester.console, "input", mock_console_input), \ + patch("litellm.completion", mock_completion), \ + patch("sys.stdout"): # suppress dot-printing from thread + model_tester.test_model_interactive() + + # Collect all console.print() calls into a single string for assertions. + # Each call may contain rich markup; we join them for substring matching. + output_parts = [] + for c in model_tester.console.print.call_args_list if hasattr(model_tester.console.print, "call_args_list") else []: + for arg in c.args: + output_parts.append(str(arg)) + return "\n".join(output_parts), mock_completion + + +def _run_interactive_capture(tmp_path, csv_content, user_inputs, monkeypatch, + mock_completion=None, env_vars=None): + """Like _run_interactive but patches console.print to capture output.""" + _make_csv(tmp_path, csv_content) + + for k, v in (env_vars or {}).items(): + monkeypatch.setenv(k, v) + + input_iter = iter(user_inputs) + + if mock_completion is None: + mock_completion = MagicMock(return_value=_mock_litellm_success()) + + captured = [] + + def _capture_print(*args, **kwargs): + for a in args: + captured.append(str(a)) + + with patch.object(model_tester.Path, "home", return_value=tmp_path), \ + patch.object(model_tester.console, "input", side_effect=input_iter), \ + patch.object(model_tester.console, "print", side_effect=_capture_print), \ + patch("litellm.completion", mock_completion), \ + patch("sys.stdout"): + model_tester.test_model_interactive() + + return "\n".join(captured), mock_completion + + +SIMPLE_CSV = "provider,model,api_key,input,output\nOpenAI,gpt-5,OPENAI_API_KEY,3.0,15.0\n" + +TWO_MODEL_CSV = ( + "provider,model,api_key,input,output\n" + "OpenAI,gpt-5,OPENAI_API_KEY,3.0,15.0\n" + "Anthropic,claude-sonnet,ANTHROPIC_API_KEY,3.0,15.0\n" +) + +BEDROCK_CSV = ( + "provider,model,api_key,input,output\n" + "AWS Bedrock,bedrock/anthropic.claude-v1," + "AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,1.0,5.0\n" +) + +DEVICE_FLOW_CSV = ( + "provider,model,api_key,input,output\n" + "Github Copilot,github_copilot/gpt-5,,0.0,0.0\n" +) + +LM_STUDIO_CSV = ( + "provider,model,api_key,input,output,base_url\n" + "lm_studio,lm_studio/local-model,,0.0,0.0,\n" +) + +VERTEX_CSV = ( + "provider,model,api_key,input,output\n" + "Google Vertex AI,vertex_ai/gemini-2.5-pro," + "GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,1.0,5.0\n" +) + + +# =========================================================================== +# I. No-CSV Edge Cases +# =========================================================================== + +def test_no_csv_file(tmp_path): + """No ~/.pdd/llm_model.csv → prints guidance and returns.""" + with patch.object(model_tester.Path, "home", return_value=tmp_path): + captured = [] + with patch.object(model_tester.console, "print", + side_effect=lambda *a, **kw: captured.extend(str(x) for x in a)): + model_tester.test_model_interactive() + output = "\n".join(captured) + assert "No user model CSV" in output + assert "pdd setup" in output + + +def test_empty_csv(tmp_path): + """CSV with headers but no rows → same early exit.""" + _make_csv(tmp_path, "provider,model,api_key,input,output\n") + with patch.object(model_tester.Path, "home", return_value=tmp_path): + captured = [] + with patch.object(model_tester.console, "print", + side_effect=lambda *a, **kw: captured.extend(str(x) for x in a)): + model_tester.test_model_interactive() + output = "\n".join(captured) + assert "No user model CSV" in output + + +def test_csv_missing_required_columns(tmp_path): + """CSV with wrong columns → early exit.""" + _make_csv(tmp_path, "name,value\nfoo,bar\n") + with patch.object(model_tester.Path, "home", return_value=tmp_path): + captured = [] + with patch.object(model_tester.console, "print", + side_effect=lambda *a, **kw: captured.extend(str(x) for x in a)): + model_tester.test_model_interactive() + output = "\n".join(captured) + assert "No user model CSV" in output or "missing required columns" in output + + +# =========================================================================== +# II. Interactive Flow — User Input Handling +# =========================================================================== + +def test_quit_with_empty_input(tmp_path, monkeypatch): + """User presses Enter → exits cleanly.""" + output, _ = _run_interactive_capture( + tmp_path, SIMPLE_CSV, [""], monkeypatch, + env_vars={"OPENAI_API_KEY": "sk-test"}, + ) + assert "Exiting" in output + + +def test_quit_with_q(tmp_path, monkeypatch): + """User types 'q' → exits cleanly.""" + output, _ = _run_interactive_capture( + tmp_path, SIMPLE_CSV, ["q"], monkeypatch, + env_vars={"OPENAI_API_KEY": "sk-test"}, + ) + assert "Exiting" in output + + +def test_invalid_input_then_quit(tmp_path, monkeypatch): + """User types 'abc' → error message, then quits.""" + output, _ = _run_interactive_capture( + tmp_path, SIMPLE_CSV, ["abc", "q"], monkeypatch, + env_vars={"OPENAI_API_KEY": "sk-test"}, + ) + assert "Invalid input" in output + + +def test_out_of_range_then_quit(tmp_path, monkeypatch): + """User types '99' → out-of-range error, then quits.""" + output, _ = _run_interactive_capture( + tmp_path, SIMPLE_CSV, ["99", "q"], monkeypatch, + env_vars={"OPENAI_API_KEY": "sk-test"}, + ) + assert "Invalid selection" in output + + +def test_eof_exits_gracefully(tmp_path, monkeypatch): + """EOFError during input → exits without crashing.""" + _make_csv(tmp_path, SIMPLE_CSV) + monkeypatch.setenv("OPENAI_API_KEY", "sk-test") + + with patch.object(model_tester.Path, "home", return_value=tmp_path), \ + patch.object(model_tester.console, "input", side_effect=EOFError), \ + patch.object(model_tester.console, "print"), \ + patch("sys.stdout"): + # Should not raise + model_tester.test_model_interactive() + + +# =========================================================================== +# III. Successful Model Test +# =========================================================================== + +def test_successful_test_shows_ok(tmp_path, monkeypatch): + """User picks model 1, LLM succeeds → output shows ✓ OK with cost info.""" + output, _ = _run_interactive_capture( + tmp_path, SIMPLE_CSV, ["1", "q"], monkeypatch, + mock_completion=MagicMock(return_value=_mock_litellm_success(10, 5)), + env_vars={"OPENAI_API_KEY": "sk-test"}, + ) + assert "✓ OK" in output + + +def test_successful_test_passes_api_key_for_single_var(tmp_path, monkeypatch): + """Single-var provider → api_key= passed to litellm.completion.""" + mock_comp = MagicMock(return_value=_mock_litellm_success()) + _run_interactive_capture( + tmp_path, SIMPLE_CSV, ["1", "q"], monkeypatch, + mock_completion=mock_comp, + env_vars={"OPENAI_API_KEY": "sk-test123"}, + ) + call_kwargs = mock_comp.call_args[1] + assert call_kwargs["api_key"] == "sk-test123" + + +def test_multi_var_provider_no_api_key_kwarg(tmp_path, monkeypatch): + """Bedrock (pipe-delimited api_key) → api_key= NOT passed to litellm.""" + mock_comp = MagicMock(return_value=_mock_litellm_success()) + _run_interactive_capture( + tmp_path, BEDROCK_CSV, ["1", "q"], monkeypatch, + mock_completion=mock_comp, + env_vars={ + "AWS_ACCESS_KEY_ID": "AKIAEXAMPLE", + "AWS_SECRET_ACCESS_KEY": "secret", + "AWS_REGION_NAME": "us-east-1", + }, + ) + call_kwargs = mock_comp.call_args[1] + assert "api_key" not in call_kwargs + + +def test_device_flow_no_api_key_kwarg(tmp_path, monkeypatch): + """Device flow (empty api_key) → api_key= NOT passed to litellm.""" + mock_comp = MagicMock(return_value=_mock_litellm_success()) + _run_interactive_capture( + tmp_path, DEVICE_FLOW_CSV, ["1", "q"], monkeypatch, + mock_completion=mock_comp, + ) + call_kwargs = mock_comp.call_args[1] + assert "api_key" not in call_kwargs + + +# =========================================================================== +# IV. Failed Model Test +# =========================================================================== + +def test_auth_error_shows_classified_message(tmp_path, monkeypatch): + """LLM raises 401 → output shows 'Authentication error'.""" + mock_comp = MagicMock(side_effect=Exception("401 Unauthorized")) + output, _ = _run_interactive_capture( + tmp_path, SIMPLE_CSV, ["1", "q"], monkeypatch, + mock_completion=mock_comp, + env_vars={"OPENAI_API_KEY": "sk-bad"}, + ) + assert "Authentication error" in output + + +def test_connection_refused_shows_local_server_hint(tmp_path, monkeypatch): + """LLM raises connection error → output suggests local server.""" + mock_comp = MagicMock(side_effect=ConnectionError("Connection refused")) + output, _ = _run_interactive_capture( + tmp_path, LM_STUDIO_CSV, ["1", "q"], monkeypatch, + mock_completion=mock_comp, + ) + assert "Connection refused" in output + assert "local server" in output + + +# =========================================================================== +# V. Diagnostics Displayed Before Test +# =========================================================================== + +def test_diagnostics_show_key_found(tmp_path, monkeypatch): + """API key in env → diagnostics show ✓ Found.""" + output, _ = _run_interactive_capture( + tmp_path, SIMPLE_CSV, ["1", "q"], monkeypatch, + env_vars={"OPENAI_API_KEY": "sk-test"}, + ) + assert "✓ Found" in output + assert "OPENAI_API_KEY" in output + + +def test_diagnostics_show_key_missing(tmp_path, monkeypatch): + """API key NOT in env → diagnostics show ✗ Not found.""" + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + output, _ = _run_interactive_capture( + tmp_path, SIMPLE_CSV, ["1", "q"], monkeypatch, + ) + assert "✗ Not found" in output + + +def test_diagnostics_show_base_url_for_lm_studio(tmp_path, monkeypatch): + """LM Studio model → base URL shown in diagnostics.""" + monkeypatch.delenv("LM_STUDIO_API_BASE", raising=False) + output, _ = _run_interactive_capture( + tmp_path, LM_STUDIO_CSV, ["1", "q"], monkeypatch, + ) + assert "localhost:1234" in output + + +def test_diagnostics_bedrock_checks_all_vars(tmp_path, monkeypatch): + """Bedrock model → all three env vars appear in diagnostics.""" + monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAEXAMPLE") + monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False) + monkeypatch.setenv("AWS_REGION_NAME", "us-east-1") + output, _ = _run_interactive_capture( + tmp_path, BEDROCK_CSV, ["1", "q"], monkeypatch, + ) + assert "AWS_ACCESS_KEY_ID" in output + assert "AWS_SECRET_ACCESS_KEY" in output + assert "AWS_REGION_NAME" in output + # One should be found, one missing + assert "✓ Found" in output + assert "✗ Not found" in output + + +def test_diagnostics_vertex_bad_creds_file(tmp_path, monkeypatch): + """GOOGLE_APPLICATION_CREDENTIALS pointing to nonexistent file → warns.""" + monkeypatch.setenv("GOOGLE_APPLICATION_CREDENTIALS", "/nonexistent/creds.json") + monkeypatch.setenv("VERTEXAI_PROJECT", "my-project") + monkeypatch.setenv("VERTEXAI_LOCATION", "us-central1") + output, _ = _run_interactive_capture( + tmp_path, VERTEX_CSV, ["1", "q"], monkeypatch, + ) + assert "file not found" in output + + +def test_diagnostics_device_flow_no_key_needed(tmp_path, monkeypatch): + """Device flow provider → diagnostics say no key needed.""" + output, _ = _run_interactive_capture( + tmp_path, DEVICE_FLOW_CSV, ["1", "q"], monkeypatch, + ) + assert "Device flow" in output or "no key needed" in output + + +# =========================================================================== +# VI. Session Persistence +# =========================================================================== + +def test_results_persist_across_picks(tmp_path, monkeypatch): + """User tests model 1 then model 2 → second table render includes first result.""" + call_count = [0] + def _completion_side_effect(**kwargs): + call_count[0] += 1 + return _mock_litellm_success() + + mock_comp = MagicMock(side_effect=_completion_side_effect) + output, _ = _run_interactive_capture( + tmp_path, TWO_MODEL_CSV, ["1", "2", "q"], monkeypatch, + mock_completion=mock_comp, + env_vars={"OPENAI_API_KEY": "sk-test", "ANTHROPIC_API_KEY": "sk-test"}, + ) + # litellm.completion should have been called twice (once per model) + assert mock_comp.call_count == 2 + + +# =========================================================================== +# VII. CSV Loading Normalization +# =========================================================================== + +def test_csv_normalizes_nan_strings_and_bad_numerics(tmp_path): + """NaN string columns → empty string; non-numeric cost → 0.0.""" + csv_content = ( + "provider,model,api_key,base_url,location,input,output\n" + "OpenAI,gpt-5,,,us-east,bad,3.0\n" + ) + _make_csv(tmp_path, csv_content) + + with patch.object(model_tester.Path, "home", return_value=tmp_path): + df = model_tester._load_user_csv() + + assert df is not None + row = df.iloc[0] + assert row["api_key"] == "" + assert row["base_url"] == "" + assert row["input"] == 0.0 + assert row["output"] == 3.0 + + +# =========================================================================== +# VIII. Pure Function Contracts — _classify_error +# These are kept as direct tests because _classify_error is a pure function +# with clear sub-contract semantics (like ExtractedCode in test_postprocess.py). +# =========================================================================== + +@pytest.mark.parametrize("message,expected_fragment", [ + ("401 Unauthorized - invalid api key", "Authentication error"), + ("403 Forbidden - access denied", "Authentication error"), + ("Connection refused", "Connection refused"), + ("404 Model does not exist", "Model not found"), + ("Request timed out after 30s", "timed out"), + ("429 Rate limit exceeded", "Rate limited"), +]) +def test_classify_error_categories(message, expected_fragment): + """Error messages are classified into user-friendly categories.""" + exc = Exception(message) + result = model_tester._classify_error(exc) + assert expected_fragment in result + + +def test_classify_error_generic(): + """Unknown errors fall through to generic classification.""" + exc = ValueError("Something unexpected") + result = model_tester._classify_error(exc) + assert "ValueError" in result + assert "Something unexpected" in result + + +# =========================================================================== +# IX. Pure Function Contracts — _calculate_cost +# =========================================================================== + +def test_calculate_cost_basic(): + """Cost = (prompt_tokens * input_price + completion_tokens * output_price) / 1M.""" + cost = model_tester._calculate_cost(100, 50, 3.0, 15.0) + expected = (100 * 3.0 + 50 * 15.0) / 1_000_000.0 + assert abs(cost - expected) < 1e-10 + + +def test_calculate_cost_zero(): + """Zero tokens or zero prices produce zero cost.""" + assert model_tester._calculate_cost(0, 0, 3.0, 15.0) == 0.0 + assert model_tester._calculate_cost(100, 100, 0.0, 0.0) == 0.0 diff --git a/tests/test_pddrc_initializer.py b/tests/test_pddrc_initializer.py new file mode 100644 index 000000000..a05306fc7 --- /dev/null +++ b/tests/test_pddrc_initializer.py @@ -0,0 +1,356 @@ +# Test Plan: +# I. Early Exits +# 1. test_already_exists_returns_false: .pddrc exists → returns False, file untouched +# 2. test_already_exists_shows_message: .pddrc exists → output mentions "already exists" +# +# II. Language Detection (pure function contract — stable sub-contract) +# 3. test_detect_language_python_markers: pyproject.toml, setup.py, requirements.txt → "python" +# 4. test_detect_language_typescript: package.json with typescript dep → "typescript" +# 5. test_detect_language_not_typescript_without_dep: package.json without typescript → None +# 6. test_detect_language_go: go.mod → "go" +# 7. test_detect_language_none: empty dir → None +# 8. test_detect_language_python_priority: both pyproject.toml and go.mod → "python" +# +# III. Content Generation (pure function contract — stable sub-contract) +# 9. test_build_content_language_paths: each language gets correct output paths +# 10. test_build_content_standard_defaults: strength, temperature, etc. present +# 11. test_build_content_unknown_language_fallback: unknown lang falls back to Python paths +# 12. test_build_content_ends_with_newline: trailing newline +# +# IV. Success Path — File Created +# 13. test_creates_file_on_confirm_yes: user types "y" → file created, returns True +# 14. test_creates_file_on_enter: empty input (Enter) → file created, returns True +# 15. test_created_file_has_correct_content: created file contains detected language defaults +# 16. test_prompts_language_when_undetected: no markers → asks for language, then confirms +# +# V. User Declines +# 17. test_declined_returns_false: user types "n" → returns False, no file +# +# VI. Language Prompt with Invalid Input +# 18. test_language_prompt_retries_on_invalid: invalid then valid → correct language used +# +# VII. Output / Display +# 19. test_detected_language_shown: auto-detected language appears in output +# 20. test_preview_shown_before_confirmation: YAML preview shown before user asked to confirm +# 21. test_creation_success_message: "Created .pddrc" message appears after creation +# 22. test_skip_message_on_decline: "Skipped" message appears when user declines +# +# VIII. Filesystem Error +# 23. test_write_error_returns_false: OSError on write → returns False, error shown + +import json +import pytest +from unittest.mock import patch, MagicMock +from pathlib import Path +from io import StringIO + +from pdd import pddrc_initializer +from pdd.pddrc_initializer import _detect_language, _build_pddrc_content + + +# --------------------------------------------------------------------------- +# Module-level fixtures / constants +# --------------------------------------------------------------------------- + +PYTHON_PROJECT_MARKERS = ["pyproject.toml", "setup.py", "requirements.txt"] + +TS_PACKAGE_JSON = json.dumps({ + "devDependencies": {"typescript": "^5.0.0"} +}) + +NON_TS_PACKAGE_JSON = json.dumps({ + "dependencies": {"express": "^4.0.0"} +}) + + +# --------------------------------------------------------------------------- +# Helper: run offer_pddrc_init and capture output +# --------------------------------------------------------------------------- + +def _run_offer_capture(tmp_path, monkeypatch, user_inputs, *, marker_files=None): + """Run offer_pddrc_init() in tmp_path, capturing printed output. + + Parameters + ---------- + tmp_path : Path + Working directory for the test. + monkeypatch : pytest.MonkeyPatch + Used to patch cwd. + user_inputs : list[str] + Sequence of strings returned by console.input() calls. + marker_files : dict[str, str | None] | None + Files to create in tmp_path before running. Keys are filenames, + values are contents (None → touch). + + Returns + ------- + tuple[bool, str] + (return_value, captured_output_text) + """ + # Set up marker files + if marker_files: + for name, content in marker_files.items(): + path = tmp_path / name + if content is not None: + path.write_text(content) + else: + path.touch() + + # Mock console.input to feed user inputs + input_iter = iter(user_inputs) + + # Capture console.print output + captured = [] + + original_print = pddrc_initializer.console.print + + def fake_print(*args, **kwargs): + # Convert to plain string for assertion + buf = StringIO() + temp_console = pddrc_initializer.Console(file=buf, force_terminal=False, no_color=True) + temp_console.print(*args, **kwargs) + captured.append(buf.getvalue()) + + with patch.object(Path, "cwd", return_value=tmp_path), \ + patch.object(pddrc_initializer.console, "input", side_effect=input_iter), \ + patch.object(pddrc_initializer.console, "print", side_effect=fake_print): + result = pddrc_initializer.offer_pddrc_init() + + output = "".join(captured) + return result, output + + +# --------------------------------------------------------------------------- +# I. Early Exits +# --------------------------------------------------------------------------- + +def test_already_exists_returns_false(tmp_path, monkeypatch): + """When .pddrc already exists, offer_pddrc_init returns False.""" + (tmp_path / ".pddrc").write_text("existing config") + result, _ = _run_offer_capture(tmp_path, monkeypatch, []) + assert result is False + assert (tmp_path / ".pddrc").read_text() == "existing config" + + +def test_already_exists_shows_message(tmp_path, monkeypatch): + """When .pddrc already exists, user sees 'already exists' message.""" + (tmp_path / ".pddrc").write_text("existing config") + _, output = _run_offer_capture(tmp_path, monkeypatch, []) + assert "already exists" in output + + +# --------------------------------------------------------------------------- +# II. Language Detection (pure function — stable sub-contract) +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize("marker", PYTHON_PROJECT_MARKERS) +def test_detect_language_python_markers(tmp_path, marker): + """Python marker files are detected correctly.""" + (tmp_path / marker).touch() + assert _detect_language(tmp_path) == "python" + + +def test_detect_language_typescript(tmp_path): + """package.json with typescript dependency → 'typescript'.""" + (tmp_path / "package.json").write_text(TS_PACKAGE_JSON) + assert _detect_language(tmp_path) == "typescript" + + +def test_detect_language_not_typescript_without_dep(tmp_path): + """package.json without typescript dep → None.""" + (tmp_path / "package.json").write_text(NON_TS_PACKAGE_JSON) + assert _detect_language(tmp_path) is None + + +def test_detect_language_go(tmp_path): + """go.mod → 'go'.""" + (tmp_path / "go.mod").touch() + assert _detect_language(tmp_path) == "go" + + +def test_detect_language_none(tmp_path): + """Empty directory → None.""" + assert _detect_language(tmp_path) is None + + +def test_detect_language_python_priority(tmp_path): + """Python markers take priority over Go markers.""" + (tmp_path / "pyproject.toml").touch() + (tmp_path / "go.mod").touch() + assert _detect_language(tmp_path) == "python" + + +# --------------------------------------------------------------------------- +# III. Content Generation (pure function — stable sub-contract) +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize("language, gen_path, test_path, example_path", [ + ("python", "pdd/", "tests/", "context/"), + ("typescript", "src/", "__tests__/", "examples/"), + ("go", ".", ".", "examples/"), +]) +def test_build_content_language_paths(language, gen_path, test_path, example_path): + """Each language gets correct output paths in generated content.""" + content = _build_pddrc_content(language) + assert f'generate_output_path: "{gen_path}"' in content + assert f'test_output_path: "{test_path}"' in content + assert f'example_output_path: "{example_path}"' in content + assert f'default_language: "{language}"' in content + + +def test_build_content_standard_defaults(): + """Generated content includes all standard defaults.""" + content = _build_pddrc_content("python") + assert "strength: 0.818" in content + assert "temperature: 0.0" in content + assert "target_coverage: 80.0" in content + assert "budget: 10.0" in content + assert "max_attempts: 3" in content + assert 'version: "1.0"' in content + + +def test_build_content_unknown_language_fallback(): + """Unknown language falls back to Python paths but uses given language name.""" + content = _build_pddrc_content("rust") + assert 'generate_output_path: "pdd/"' in content + assert 'default_language: "rust"' in content + + +def test_build_content_ends_with_newline(): + """Generated content ends with a trailing newline.""" + content = _build_pddrc_content("python") + assert content.endswith("\n") + + +# --------------------------------------------------------------------------- +# IV. Success Path — File Created +# --------------------------------------------------------------------------- + +def test_creates_file_on_confirm_yes(tmp_path, monkeypatch): + """User confirms with 'y' → .pddrc created, returns True.""" + result, _ = _run_offer_capture( + tmp_path, monkeypatch, ["y"], + marker_files={"pyproject.toml": None}, + ) + assert result is True + assert (tmp_path / ".pddrc").exists() + + +def test_creates_file_on_enter(tmp_path, monkeypatch): + """Empty input (Enter) means yes → .pddrc created, returns True.""" + result, _ = _run_offer_capture( + tmp_path, monkeypatch, [""], + marker_files={"pyproject.toml": None}, + ) + assert result is True + assert (tmp_path / ".pddrc").exists() + + +def test_created_file_has_correct_content(tmp_path, monkeypatch): + """Created .pddrc contains language-appropriate defaults.""" + _run_offer_capture( + tmp_path, monkeypatch, ["y"], + marker_files={"pyproject.toml": None}, + ) + content = (tmp_path / ".pddrc").read_text() + assert 'default_language: "python"' in content + assert 'generate_output_path: "pdd/"' in content + assert "strength: 0.818" in content + + +def test_prompts_language_when_undetected(tmp_path, monkeypatch): + """No markers → user prompted for language (1=Python), then confirms.""" + # First input: language choice, second: confirmation + result, _ = _run_offer_capture( + tmp_path, monkeypatch, ["1", "y"], + ) + assert result is True + content = (tmp_path / ".pddrc").read_text() + assert 'default_language: "python"' in content + + +# --------------------------------------------------------------------------- +# V. User Declines +# --------------------------------------------------------------------------- + +def test_declined_returns_false(tmp_path, monkeypatch): + """User types 'n' → returns False, no file created.""" + result, _ = _run_offer_capture( + tmp_path, monkeypatch, ["n"], + marker_files={"pyproject.toml": None}, + ) + assert result is False + assert not (tmp_path / ".pddrc").exists() + + +# --------------------------------------------------------------------------- +# VI. Language Prompt with Invalid Input +# --------------------------------------------------------------------------- + +def test_language_prompt_retries_on_invalid(tmp_path, monkeypatch): + """Invalid language choices cause retries until valid choice, then file created.""" + # "x" and "99" are invalid, "2" selects TypeScript, "y" confirms + result, output = _run_offer_capture( + tmp_path, monkeypatch, ["x", "99", "2", "y"], + ) + assert result is True + content = (tmp_path / ".pddrc").read_text() + assert 'default_language: "typescript"' in content + assert "Invalid choice" in output + + +# --------------------------------------------------------------------------- +# VII. Output / Display +# --------------------------------------------------------------------------- + +def test_detected_language_shown(tmp_path, monkeypatch): + """Auto-detected language is displayed to user.""" + _, output = _run_offer_capture( + tmp_path, monkeypatch, ["y"], + marker_files={"pyproject.toml": None}, + ) + assert "python" in output.lower() + + +def test_preview_shown_before_confirmation(tmp_path, monkeypatch): + """YAML preview content appears in output before confirmation.""" + _, output = _run_offer_capture( + tmp_path, monkeypatch, ["y"], + marker_files={"pyproject.toml": None}, + ) + assert "Proposed" in output or "contents" in output + assert "version" in output + + +def test_creation_success_message(tmp_path, monkeypatch): + """'Created .pddrc' message appears after successful creation.""" + _, output = _run_offer_capture( + tmp_path, monkeypatch, ["y"], + marker_files={"pyproject.toml": None}, + ) + assert "Created" in output + assert ".pddrc" in output + + +def test_skip_message_on_decline(tmp_path, monkeypatch): + """'Skipped' message appears when user declines.""" + _, output = _run_offer_capture( + tmp_path, monkeypatch, ["n"], + marker_files={"pyproject.toml": None}, + ) + assert "Skipped" in output or "skipped" in output + + +# --------------------------------------------------------------------------- +# VIII. Filesystem Error +# --------------------------------------------------------------------------- + +def test_write_error_returns_false(tmp_path, monkeypatch): + """OSError during file write → returns False, error message shown.""" + with patch.object(Path, "write_text", side_effect=OSError("Permission denied")): + result, output = _run_offer_capture( + tmp_path, monkeypatch, ["y"], + marker_files={"pyproject.toml": None}, + ) + assert result is False + assert "Failed" in output or "error" in output.lower() diff --git a/tests/test_provider_manager.py b/tests/test_provider_manager.py new file mode 100644 index 000000000..d170f0a2b --- /dev/null +++ b/tests/test_provider_manager.py @@ -0,0 +1,754 @@ +"""Tests for pdd/provider_manager.py + +Organized by public API function. Tests verify user-observable behavior +through the public interface; private helpers are exercised indirectly. +Shell execution integration tests verify generated scripts actually work. +""" + +import csv +import os +import subprocess +import shutil +from pathlib import Path +from unittest import mock + +import pytest + +from pdd.provider_manager import ( + CSV_FIELDNAMES, + COMPLEX_AUTH_PROVIDERS, + _save_key_to_api_env, + _setup_complex_provider, + add_custom_provider, + add_provider_from_registry, + is_multi_credential, + parse_api_key_vars, + remove_models_by_provider, + remove_individual_models, +) + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def temp_home(tmp_path, monkeypatch): + """Create a temporary home directory with .pdd folder.""" + pdd_dir = tmp_path / ".pdd" + pdd_dir.mkdir(parents=True, exist_ok=True) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setenv("SHELL", "/bin/bash") + return tmp_path + + +@pytest.fixture +def sample_csv(temp_home): + """Create a sample llm_model.csv with test data.""" + csv_path = temp_home / ".pdd" / "llm_model.csv" + rows = [ + { + "provider": "OpenAI", + "model": "gpt-4", + "input": "30.0", + "output": "60.0", + "coding_arena_elo": "1000", + "base_url": "", + "api_key": "OPENAI_API_KEY", + "max_reasoning_tokens": "0", + "structured_output": "True", + "reasoning_type": "", + "location": "", + }, + { + "provider": "OpenAI", + "model": "gpt-3.5-turbo", + "input": "0.5", + "output": "1.5", + "coding_arena_elo": "1000", + "base_url": "", + "api_key": "OPENAI_API_KEY", + "max_reasoning_tokens": "0", + "structured_output": "True", + "reasoning_type": "", + "location": "", + }, + { + "provider": "Anthropic", + "model": "claude-3-opus", + "input": "15.0", + "output": "75.0", + "coding_arena_elo": "1000", + "base_url": "", + "api_key": "ANTHROPIC_API_KEY", + "max_reasoning_tokens": "0", + "structured_output": "True", + "reasoning_type": "", + "location": "", + }, + ] + + with open(csv_path, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=CSV_FIELDNAMES) + writer.writeheader() + writer.writerows(rows) + + return csv_path + + +@pytest.fixture +def sample_api_env(temp_home): + """Create a sample api-env.bash file.""" + api_env_path = temp_home / ".pdd" / "api-env.bash" + api_env_path.write_text( + "export OPENAI_API_KEY=sk-test123\n" + "export ANTHROPIC_API_KEY=ant-test456\n" + ) + return api_env_path + + +def _read_user_csv(temp_home): + """Read the user CSV and return list of row dicts.""" + csv_path = temp_home / ".pdd" / "llm_model.csv" + if not csv_path.exists(): + return [] + with open(csv_path, "r", encoding="utf-8", newline="") as f: + return list(csv.DictReader(f)) + + +# --------------------------------------------------------------------------- +# I. parse_api_key_vars / is_multi_credential +# --------------------------------------------------------------------------- + + +class TestApiKeyParsing: + """Tests for the public utility functions parse_api_key_vars and is_multi_credential.""" + + def test_parse_single_var(self): + assert parse_api_key_vars("OPENAI_API_KEY") == ["OPENAI_API_KEY"] + + def test_parse_multiple_vars(self): + result = parse_api_key_vars("AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME") + assert result == ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_REGION_NAME"] + + def test_parse_empty_and_none(self): + assert parse_api_key_vars("") == [] + assert parse_api_key_vars(None) == [] + assert parse_api_key_vars(" ") == [] + + def test_parse_strips_whitespace_and_filters_empty(self): + assert parse_api_key_vars(" KEY_A | KEY_B ") == ["KEY_A", "KEY_B"] + assert parse_api_key_vars("KEY_A||KEY_B") == ["KEY_A", "KEY_B"] + + def test_is_multi_credential(self): + assert is_multi_credential("A|B") is True + assert is_multi_credential("OPENAI_API_KEY") is False + assert is_multi_credential("") is False + assert is_multi_credential(None) is False + + +# --------------------------------------------------------------------------- +# II. add_provider_from_registry +# --------------------------------------------------------------------------- + + +class TestAddProviderFromRegistry: + """Tests for add_provider_from_registry — the main provider browsing flow.""" + + def test_returns_false_on_empty_ref_csv(self, temp_home): + """Should return False when reference CSV has no models.""" + with mock.patch("pdd.provider_manager._read_csv", return_value=[]): + with mock.patch("pdd.provider_manager.console"): + assert add_provider_from_registry() is False + + def test_returns_false_on_cancel(self, temp_home): + """Empty input should cancel the flow.""" + ref_rows = [ + {"provider": "Anthropic", "model": "claude", "api_key": "ANTHROPIC_API_KEY"}, + ] + with mock.patch("pdd.provider_manager._read_csv", return_value=ref_rows): + with mock.patch("pdd.provider_manager.Prompt") as mock_prompt: + mock_prompt.ask.return_value = "" + with mock.patch("pdd.provider_manager.console"): + assert add_provider_from_registry() is False + + @pytest.mark.parametrize("bad_input", ["99", "0", "abc", "-1"]) + def test_returns_false_on_invalid_selection(self, temp_home, bad_input): + """Out-of-range or non-numeric input should return False.""" + ref_rows = [ + {"provider": "Anthropic", "model": "claude", "api_key": "ANTHROPIC_API_KEY"}, + ] + with mock.patch("pdd.provider_manager._read_csv", return_value=ref_rows): + with mock.patch("pdd.provider_manager.Prompt") as mock_prompt: + mock_prompt.ask.return_value = bad_input + with mock.patch("pdd.provider_manager.console"): + assert add_provider_from_registry() is False + + def test_adds_models_to_csv(self, temp_home, monkeypatch): + """Should add all models for the selected provider to user CSV.""" + monkeypatch.setenv("SHELL", "/bin/bash") + + ref_rows = [ + {"provider": "Anthropic", "model": "claude-sonnet", "api_key": "ANTHROPIC_API_KEY", + "input": "3.0", "output": "15.0", "coding_arena_elo": "1400", "base_url": "", + "max_reasoning_tokens": "0", "structured_output": "True", "reasoning_type": "", "location": ""}, + {"provider": "Anthropic", "model": "claude-opus", "api_key": "ANTHROPIC_API_KEY", + "input": "5.0", "output": "25.0", "coding_arena_elo": "1500", "base_url": "", + "max_reasoning_tokens": "0", "structured_output": "True", "reasoning_type": "", "location": ""}, + {"provider": "OpenAI", "model": "gpt-4", "api_key": "OPENAI_API_KEY", + "input": "30.0", "output": "60.0", "coding_arena_elo": "1300", "base_url": "", + "max_reasoning_tokens": "0", "structured_output": "True", "reasoning_type": "", "location": ""}, + ] + + with mock.patch("pdd.provider_manager._read_csv", side_effect=[ref_rows, []]): + with mock.patch("pdd.provider_manager._write_csv_atomic") as mock_write: + with mock.patch("pdd.provider_manager.Prompt") as mock_prompt: + mock_prompt.ask.side_effect = ["1", "test-api-key"] + with mock.patch("pdd.provider_manager.Confirm") as mock_confirm: + mock_confirm.ask.return_value = False + with mock.patch("pdd.provider_manager.console"): + with mock.patch("pdd.provider_manager._is_key_set", return_value=None): + result = add_provider_from_registry() + + assert result is True + mock_write.assert_called_once() + written_rows = mock_write.call_args[0][1] + assert len(written_rows) == 2 + assert all(r["provider"] == "Anthropic" for r in written_rows) + + def test_skips_duplicate_models(self, temp_home, monkeypatch): + """Should not add models that already exist in user CSV.""" + monkeypatch.setenv("SHELL", "/bin/bash") + + ref_rows = [ + {"provider": "Anthropic", "model": "claude-sonnet", "api_key": "ANTHROPIC_API_KEY", + "input": "3.0", "output": "15.0", "coding_arena_elo": "1400", "base_url": "", + "max_reasoning_tokens": "0", "structured_output": "True", "reasoning_type": "", "location": ""}, + ] + existing_rows = [ + {"provider": "Anthropic", "model": "claude-sonnet", "api_key": "ANTHROPIC_API_KEY"}, + ] + + with mock.patch("pdd.provider_manager._read_csv", side_effect=[ref_rows, existing_rows]): + with mock.patch("pdd.provider_manager._write_csv_atomic") as mock_write: + with mock.patch("pdd.provider_manager.Prompt") as mock_prompt: + mock_prompt.ask.return_value = "1" + with mock.patch("pdd.provider_manager.console"): + with mock.patch("pdd.provider_manager._is_key_set", return_value="shell environment"): + with mock.patch("pdd.provider_manager.Confirm") as mock_confirm: + mock_confirm.ask.return_value = False + result = add_provider_from_registry() + + assert result is False + mock_write.assert_not_called() + + def test_dispatches_to_complex_auth_for_vertex(self, temp_home): + """Selecting a complex provider should delegate to _setup_complex_provider.""" + with mock.patch("pdd.provider_manager._setup_complex_provider", return_value=True) as mock_setup: + with mock.patch("pdd.provider_manager._write_csv_atomic"): + with mock.patch("pdd.provider_manager._read_csv") as mock_read: + mock_read.side_effect = [ + [{"provider": "Google Vertex AI", "model": "vertex_ai/gemini-2.5-pro", + "api_key": "GOOGLE_APPLICATION_CREDENTIALS", "base_url": ""}], + [], + ] + with mock.patch("pdd.provider_manager.Prompt") as mock_prompt: + mock_prompt.ask.return_value = "1" + with mock.patch("pdd.provider_manager.console"): + add_provider_from_registry() + + mock_setup.assert_called_once_with("Google Vertex AI") + + +# --------------------------------------------------------------------------- +# III. add_custom_provider +# --------------------------------------------------------------------------- + + +class TestAddCustomProvider: + """Tests for add_custom_provider — the manual provider entry flow.""" + + @mock.patch("pdd.provider_manager._ensure_api_env_sourced_in_rc") + @mock.patch("pdd.provider_manager._save_key_to_api_env") + @mock.patch("pdd.provider_manager._write_csv_atomic") + @mock.patch("pdd.provider_manager._read_csv", return_value=[]) + @mock.patch("pdd.provider_manager.Confirm") + @mock.patch("pdd.provider_manager.Prompt") + @mock.patch("pdd.provider_manager.console") + def test_adds_custom_model_with_correct_format( + self, mock_console, mock_prompt, mock_confirm, mock_read, mock_write, mock_save, mock_rc + ): + """Should create provider/model formatted model name and sensible defaults.""" + mock_prompt.ask.side_effect = [ + "ollama", "llama3", "OLLAMA_API_KEY", "", "0.0", "0.0", + ] + mock_confirm.ask.return_value = False + + assert add_custom_provider() is True + + written_rows = mock_write.call_args[0][1] + assert len(written_rows) == 1 + assert written_rows[0]["model"] == "ollama/llama3" + assert written_rows[0]["provider"] == "ollama" + assert written_rows[0]["api_key"] == "OLLAMA_API_KEY" + assert written_rows[0]["coding_arena_elo"] == "1000" + assert written_rows[0]["structured_output"] == "True" + + @pytest.mark.parametrize("abort_at_step,inputs", [ + ("provider", [""]), + ("model", ["ollama", ""]), + ("api_key_var", ["ollama", "llama3", ""]), + ]) + @mock.patch("pdd.provider_manager.Prompt") + @mock.patch("pdd.provider_manager.console") + def test_returns_false_on_empty_input_at_each_step( + self, mock_console, mock_prompt, abort_at_step, inputs + ): + """Empty input at any required step should cancel.""" + mock_prompt.ask.side_effect = inputs + assert add_custom_provider() is False + + @mock.patch("pdd.provider_manager._ensure_api_env_sourced_in_rc") + @mock.patch("pdd.provider_manager._save_key_to_api_env") + @mock.patch("pdd.provider_manager._write_csv_atomic") + @mock.patch("pdd.provider_manager._read_csv", return_value=[]) + @mock.patch("pdd.provider_manager.Confirm") + @mock.patch("pdd.provider_manager.Prompt") + @mock.patch("pdd.provider_manager.console") + def test_saves_api_key_when_user_provides_value( + self, mock_console, mock_prompt, mock_confirm, mock_read, mock_write, mock_save, mock_rc + ): + """When user opts to provide key value, it should be saved to api-env.""" + mock_prompt.ask.side_effect = [ + "openai", "gpt-5", "MY_KEY", "", "0.0", "0.0", "sk-secret123", + ] + mock_confirm.ask.return_value = True + + assert add_custom_provider() is True + mock_save.assert_called_once_with("MY_KEY", "sk-secret123") + + @mock.patch("pdd.provider_manager._ensure_api_env_sourced_in_rc") + @mock.patch("pdd.provider_manager._save_key_to_api_env") + @mock.patch("pdd.provider_manager._write_csv_atomic") + @mock.patch("pdd.provider_manager._read_csv", return_value=[]) + @mock.patch("pdd.provider_manager.Confirm") + @mock.patch("pdd.provider_manager.Prompt") + @mock.patch("pdd.provider_manager.console") + def test_invalid_costs_default_to_zero( + self, mock_console, mock_prompt, mock_confirm, mock_read, mock_write, mock_save, mock_rc + ): + """Non-numeric cost values should default to 0.0.""" + mock_prompt.ask.side_effect = [ + "test", "model", "TEST_KEY", "", "not-a-number", "also-bad", + ] + mock_confirm.ask.return_value = False + + assert add_custom_provider() is True + written_rows = mock_write.call_args[0][1] + assert written_rows[0]["input"] == "0.0" + assert written_rows[0]["output"] == "0.0" + + +# --------------------------------------------------------------------------- +# IV. remove_models_by_provider +# --------------------------------------------------------------------------- + + +class TestRemoveModelsByProvider: + """Tests for remove_models_by_provider — bulk removal by api_key group.""" + + def test_returns_false_when_no_models(self, temp_home): + with mock.patch("pdd.provider_manager.console"): + assert remove_models_by_provider() is False + + def test_returns_false_on_cancel(self, sample_csv, temp_home): + with mock.patch("pdd.provider_manager.Prompt") as mock_prompt: + mock_prompt.ask.return_value = "" + with mock.patch("pdd.provider_manager.console"): + assert remove_models_by_provider() is False + + @pytest.mark.parametrize("bad_input", ["99", "abc"]) + def test_returns_false_on_invalid_selection(self, sample_csv, temp_home, bad_input): + with mock.patch("pdd.provider_manager.Prompt") as mock_prompt: + mock_prompt.ask.return_value = bad_input + with mock.patch("pdd.provider_manager.console"): + assert remove_models_by_provider() is False + + def test_returns_false_when_user_declines_confirm(self, sample_csv, temp_home): + with mock.patch("pdd.provider_manager.Prompt") as mock_prompt: + mock_prompt.ask.return_value = "1" + with mock.patch("pdd.provider_manager.Confirm") as mock_confirm: + mock_confirm.ask.return_value = False + with mock.patch("pdd.provider_manager.console"): + assert remove_models_by_provider() is False + + def test_removes_all_models_for_selected_provider(self, sample_csv, temp_home, monkeypatch): + """Should remove all models sharing the selected api_key and comment it out.""" + monkeypatch.setenv("SHELL", "/bin/bash") + + with mock.patch("pdd.provider_manager.Prompt") as mock_prompt: + mock_prompt.ask.return_value = "1" + with mock.patch("pdd.provider_manager.Confirm") as mock_confirm: + mock_confirm.ask.return_value = True + with mock.patch("pdd.provider_manager.console"): + result = remove_models_by_provider() + + assert result is True + remaining = _read_user_csv(temp_home) + assert len(remaining) < 3 + + +# --------------------------------------------------------------------------- +# V. remove_individual_models +# --------------------------------------------------------------------------- + + +class TestRemoveIndividualModels: + """Tests for remove_individual_models — selective model removal.""" + + def test_returns_false_when_no_models(self, temp_home): + with mock.patch("pdd.provider_manager.console"): + assert remove_individual_models() is False + + def test_returns_false_on_cancel(self, sample_csv, temp_home): + with mock.patch("pdd.provider_manager.Prompt") as mock_prompt: + mock_prompt.ask.return_value = "" + with mock.patch("pdd.provider_manager.console"): + assert remove_individual_models() is False + + def test_returns_false_on_all_invalid_numbers(self, sample_csv, temp_home): + """All-invalid comma-separated input should result in no selections.""" + with mock.patch("pdd.provider_manager.Prompt") as mock_prompt: + mock_prompt.ask.return_value = "99, abc, -1" + with mock.patch("pdd.provider_manager.console"): + assert remove_individual_models() is False + + def test_returns_false_when_user_declines_confirm(self, sample_csv, temp_home): + with mock.patch("pdd.provider_manager.Prompt") as mock_prompt: + mock_prompt.ask.return_value = "1" + with mock.patch("pdd.provider_manager.Confirm") as mock_confirm: + mock_confirm.ask.return_value = False + with mock.patch("pdd.provider_manager.console"): + assert remove_individual_models() is False + + def test_removes_single_model(self, sample_csv, temp_home): + with mock.patch("pdd.provider_manager.Prompt") as mock_prompt: + mock_prompt.ask.return_value = "1" + with mock.patch("pdd.provider_manager.Confirm") as mock_confirm: + mock_confirm.ask.return_value = True + with mock.patch("pdd.provider_manager.console"): + assert remove_individual_models() is True + + assert len(_read_user_csv(temp_home)) == 2 + + def test_removes_multiple_comma_separated(self, sample_csv, temp_home): + with mock.patch("pdd.provider_manager.Prompt") as mock_prompt: + mock_prompt.ask.return_value = "1, 2" + with mock.patch("pdd.provider_manager.Confirm") as mock_confirm: + mock_confirm.ask.return_value = True + with mock.patch("pdd.provider_manager.console"): + assert remove_individual_models() is True + + assert len(_read_user_csv(temp_home)) == 1 + + +# --------------------------------------------------------------------------- +# VI. Complex provider auth (_setup_complex_provider) +# --------------------------------------------------------------------------- + + +class TestComplexProviderAuth: + """Tests for complex (multi-variable) provider authentication flows. + + _setup_complex_provider is tested directly because it's the entry point + for a significant user-facing flow that add_provider_from_registry delegates to. + """ + + def test_registry_contains_expected_providers(self): + """Registry should contain the 5 known complex providers.""" + expected = {"Google Vertex AI", "AWS Bedrock", "Azure OpenAI", "Azure AI", "Github Copilot"} + assert expected == set(COMPLEX_AUTH_PROVIDERS.keys()) + + def test_simple_providers_not_in_registry(self): + for name in ["Anthropic", "OpenAI", "DeepSeek"]: + assert name not in COMPLEX_AUTH_PROVIDERS + + def test_registry_entries_have_required_fields(self): + required_keys = {"env_var", "label", "required", "default", "hint"} + for provider, configs in COMPLEX_AUTH_PROVIDERS.items(): + assert len(configs) > 0, f"{provider} has no configs" + for cfg in configs: + assert required_keys <= set(cfg.keys()), f"{provider} config missing keys" + + def test_unknown_provider_returns_false(self): + assert _setup_complex_provider("Unknown Provider") is False + + @mock.patch("pdd.provider_manager._ensure_api_env_sourced_in_rc") + @mock.patch("pdd.provider_manager._save_key_to_api_env") + @mock.patch("pdd.provider_manager._is_key_set", return_value=None) + @mock.patch("pdd.provider_manager.Confirm") + @mock.patch("pdd.provider_manager.Prompt") + @mock.patch("pdd.provider_manager.console") + def test_bedrock_saves_all_three_vars( + self, mock_console, mock_prompt, mock_confirm, mock_is_key, mock_save, mock_rc + ): + mock_prompt.ask.side_effect = ["AKIAEXAMPLE", "wJalrXSecret", "us-east-1"] + + assert _setup_complex_provider("AWS Bedrock") is True + assert mock_save.call_count == 3 + mock_save.assert_any_call("AWS_ACCESS_KEY_ID", "AKIAEXAMPLE") + mock_save.assert_any_call("AWS_SECRET_ACCESS_KEY", "wJalrXSecret") + mock_save.assert_any_call("AWS_REGION_NAME", "us-east-1") + mock_rc.assert_called_once() + + @mock.patch("pdd.provider_manager._ensure_api_env_sourced_in_rc") + @mock.patch("pdd.provider_manager._save_key_to_api_env") + @mock.patch("pdd.provider_manager._is_key_set", return_value=None) + @mock.patch("pdd.provider_manager.Confirm") + @mock.patch("pdd.provider_manager.Prompt") + @mock.patch("pdd.provider_manager.console") + def test_vertex_adc_skips_credentials_save( + self, mock_console, mock_prompt, mock_confirm, mock_is_key, mock_save, mock_rc + ): + """When user enters 'adc' for Vertex credentials, that var should not be saved.""" + mock_prompt.ask.side_effect = ["adc", "my-project-123", "us-central1"] + + assert _setup_complex_provider("Google Vertex AI") is True + assert mock_save.call_count == 2 + mock_save.assert_any_call("VERTEXAI_PROJECT", "my-project-123") + mock_save.assert_any_call("VERTEXAI_LOCATION", "us-central1") + + @mock.patch("pdd.provider_manager._ensure_api_env_sourced_in_rc") + @mock.patch("pdd.provider_manager._save_key_to_api_env") + @mock.patch("pdd.provider_manager._is_key_set", return_value=None) + @mock.patch("pdd.provider_manager.Confirm") + @mock.patch("pdd.provider_manager.Prompt") + @mock.patch("pdd.provider_manager.console") + def test_azure_openai_saves_three_vars( + self, mock_console, mock_prompt, mock_confirm, mock_is_key, mock_save, mock_rc + ): + mock_prompt.ask.side_effect = [ + "abc123key", "https://myresource.openai.azure.com/", "2024-10-21", + ] + + assert _setup_complex_provider("Azure OpenAI") is True + assert mock_save.call_count == 3 + mock_save.assert_any_call("AZURE_API_KEY", "abc123key") + mock_save.assert_any_call("AZURE_API_BASE", "https://myresource.openai.azure.com/") + mock_save.assert_any_call("AZURE_API_VERSION", "2024-10-21") + + @mock.patch("pdd.provider_manager._ensure_api_env_sourced_in_rc") + @mock.patch("pdd.provider_manager._save_key_to_api_env") + @mock.patch("pdd.provider_manager._is_key_set", return_value=None) + @mock.patch("pdd.provider_manager.Confirm") + @mock.patch("pdd.provider_manager.Prompt") + @mock.patch("pdd.provider_manager.console") + def test_skip_all_required_vars_returns_false( + self, mock_console, mock_prompt, mock_confirm, mock_is_key, mock_save, mock_rc + ): + """Skipping all vars should return False and save nothing.""" + mock_prompt.ask.side_effect = ["", "", ""] + + assert _setup_complex_provider("AWS Bedrock") is False + mock_save.assert_not_called() + mock_rc.assert_not_called() + + @mock.patch("pdd.provider_manager._ensure_api_env_sourced_in_rc") + @mock.patch("pdd.provider_manager._save_key_to_api_env") + @mock.patch("pdd.provider_manager._is_key_set", return_value="shell environment") + @mock.patch("pdd.provider_manager.Confirm") + @mock.patch("pdd.provider_manager.Prompt") + @mock.patch("pdd.provider_manager.console") + def test_existing_key_skipped_when_update_declined( + self, mock_console, mock_prompt, mock_confirm, mock_is_key, mock_save, mock_rc + ): + mock_confirm.ask.return_value = False + + assert _setup_complex_provider("Github Copilot") is False + mock_save.assert_not_called() + + +# --------------------------------------------------------------------------- +# VII. Shell execution integration tests +# +# These are the most valuable tests in this file. They verify that +# _save_key_to_api_env produces scripts that real shells can source, +# and that API key values survive the shell escaping roundtrip. +# --------------------------------------------------------------------------- + + +def _shell_available(shell: str) -> bool: + return shutil.which(shell) is not None + + +class TestShellExecution: + """ + Integration tests that actually execute generated api-env scripts + in real shells and verify key values are preserved exactly. + """ + + def test_bash_syntax_valid_with_special_chars(self, temp_home, monkeypatch): + """Generated api-env script should have valid bash syntax.""" + monkeypatch.setenv("SHELL", "/bin/bash") + _save_key_to_api_env("TEST_KEY", 'AIzaSyAbCdEf123456$var"quote\'backtick\\slash') + env_path = temp_home / ".pdd" / "api-env.bash" + + result = subprocess.run( + ["bash", "-n", str(env_path)], + capture_output=True, text=True, timeout=5, + ) + assert result.returncode == 0, ( + f"Bash syntax error: {result.stderr}\nScript:\n{env_path.read_text()}" + ) + + def test_zsh_syntax_valid_with_special_chars(self, temp_home, monkeypatch): + if not _shell_available("zsh"): + pytest.skip("zsh not available") + monkeypatch.setenv("SHELL", "/bin/zsh") + _save_key_to_api_env("TEST_KEY", 'AIzaSyAbCdEf123456$var"quote\'backtick\\slash') + env_path = temp_home / ".pdd" / "api-env.zsh" + + result = subprocess.run( + ["zsh", "-n", str(env_path)], + capture_output=True, text=True, timeout=5, + ) + assert result.returncode == 0, ( + f"Zsh syntax error: {result.stderr}\nScript:\n{env_path.read_text()}" + ) + + def test_key_value_preserved_bash(self, temp_home, monkeypatch): + """API key should survive bash source→read roundtrip exactly.""" + monkeypatch.setenv("SHELL", "/bin/bash") + original = 'AIzaSyAbCdEf123456$var"quote\'backtick\\slash' + _save_key_to_api_env("TEST_KEY", original) + env_path = temp_home / ".pdd" / "api-env.bash" + + result = subprocess.run( + ["bash", "-c", + f"source {env_path} && python3 -c \"import os; print(os.environ.get('TEST_KEY', ''))\""], + capture_output=True, text=True, timeout=5, + ) + assert result.returncode == 0, f"Source failed: {result.stderr}" + assert result.stdout.strip() == original + + def test_key_value_preserved_zsh(self, temp_home, monkeypatch): + if not _shell_available("zsh"): + pytest.skip("zsh not available") + monkeypatch.setenv("SHELL", "/bin/zsh") + original = 'AIzaSyAbCdEf123456$var"quote\'backtick\\slash' + _save_key_to_api_env("TEST_KEY", original) + env_path = temp_home / ".pdd" / "api-env.zsh" + + result = subprocess.run( + ["zsh", "-c", + f"source {env_path} && python3 -c \"import os; print(os.environ.get('TEST_KEY', ''))\""], + capture_output=True, text=True, timeout=5, + ) + assert result.returncode == 0, f"Source failed: {result.stderr}" + assert result.stdout.strip() == original + + @pytest.mark.parametrize("name,value", [ + ("dollar", "key$value"), + ("double_quote", 'key"value'), + ("single_quote", "key'value"), + ("backtick", "key`value"), + ("backslash", "key\\value"), + ("space", "key value"), + ("semicolon", "key;value"), + ("ampersand", "key&value"), + ("pipe", "key|value"), + ("newline", "key\nvalue"), + ("tab", "key\tvalue"), + ]) + def test_problematic_char_preserved_bash(self, temp_home, monkeypatch, name, value): + """Each problematic shell character should be preserved through bash roundtrip.""" + monkeypatch.setenv("SHELL", "/bin/bash") + key_name = f"TEST_{name.upper()}" + _save_key_to_api_env(key_name, value) + env_path = temp_home / ".pdd" / "api-env.bash" + + syntax = subprocess.run( + ["bash", "-n", str(env_path)], + capture_output=True, text=True, timeout=5, + ) + assert syntax.returncode == 0, f"Syntax error for '{name}': {syntax.stderr}" + + extract = subprocess.run( + ["bash", "-c", + f"source {env_path} && python3 -c \"import os; print(repr(os.environ.get('{key_name}', '')))\""], + capture_output=True, text=True, timeout=5, + ) + if extract.returncode == 0: + extracted = eval(extract.stdout.strip()) + assert extracted == value, ( + f"Value corrupted for '{name}': expected {repr(value)}, got {repr(extracted)}" + ) + + def test_multiple_keys_all_preserved(self, temp_home, monkeypatch): + """Multiple keys saved sequentially should all be preserved.""" + monkeypatch.setenv("SHELL", "/bin/bash") + keys = { + "OPENAI_API_KEY": "sk-test123", + "ANTHROPIC_API_KEY": "ant-key$special", + "GEMINI_API_KEY": 'gem"quoted\'key', + } + for k, v in keys.items(): + _save_key_to_api_env(k, v) + + env_path = temp_home / ".pdd" / "api-env.bash" + for key_name, expected in keys.items(): + result = subprocess.run( + ["bash", "-c", + f"source {env_path} && python3 -c \"import os; print(os.environ.get('{key_name}', ''))\""], + capture_output=True, text=True, timeout=5, + ) + assert result.returncode == 0 + assert result.stdout.strip() == expected + + def test_key_update_replaces_in_place(self, temp_home, monkeypatch): + """Updating an existing key should replace it, not duplicate it.""" + monkeypatch.setenv("SHELL", "/bin/bash") + _save_key_to_api_env("MY_KEY", "old-value") + _save_key_to_api_env("MY_KEY", "new-value") + + env_path = temp_home / ".pdd" / "api-env.bash" + content = env_path.read_text() + assert content.count("MY_KEY") == 1 + + result = subprocess.run( + ["bash", "-c", + f"source {env_path} && python3 -c \"import os; print(os.environ.get('MY_KEY', ''))\""], + capture_output=True, text=True, timeout=5, + ) + assert result.returncode == 0 + assert result.stdout.strip() == "new-value" + + def test_save_key_sets_os_environ_immediately(self, temp_home, monkeypatch): + """_save_key_to_api_env should set os.environ for immediate availability.""" + monkeypatch.setenv("SHELL", "/bin/bash") + monkeypatch.delenv("MY_IMMEDIATE_KEY", raising=False) + + _save_key_to_api_env("MY_IMMEDIATE_KEY", "test-value-abc") + + assert os.environ.get("MY_IMMEDIATE_KEY") == "test-value-abc" + + def test_commented_key_replaced_on_save(self, temp_home, monkeypatch): + """Saving a key that was previously commented out should uncomment/replace it.""" + monkeypatch.setenv("SHELL", "/bin/bash") + env_path = temp_home / ".pdd" / "api-env.bash" + env_path.write_text("# export OLD_KEY=old-value\n") + + _save_key_to_api_env("OLD_KEY", "new-value") + + content = env_path.read_text() + assert "# export OLD_KEY" not in content + assert "new-value" in content + + result = subprocess.run( + ["bash", "-c", + f"source {env_path} && python3 -c \"import os; print(os.environ.get('OLD_KEY', ''))\""], + capture_output=True, text=True, timeout=5, + ) + assert result.returncode == 0 + assert result.stdout.strip() == "new-value" diff --git a/tests/test_setup_tool.py b/tests/test_setup_tool.py index b89710774..5c74b3eec 100644 --- a/tests/test_setup_tool.py +++ b/tests/test_setup_tool.py @@ -1,574 +1,760 @@ -"""Tests for setup_tool.py""" +# Test Plan: +# All tests drive through the public entry point `run_setup()` via the helper +# `_run_setup_capture()` which mocks only at true boundaries (user input, +# filesystem paths, LLM calls, CLI detection) and captures printed output. +# +# I. End-to-End Success Path +# 1. test_happy_path_enter_to_finish: CLI detected, auto-phase succeeds, +# user presses Enter → exit summary printed, no options menu. +# 2. test_happy_path_open_menu_then_exit: Auto-phase succeeds, user enters +# 'm' → options menu shown, then exit summary printed. +# 3. test_happy_path_skipped_cli: CLI skipped → auto-phase still runs, +# exit summary printed. +# +# II. CLI Bootstrap Warnings +# 4. test_no_api_key_warning_shown: CLI found but api_key_configured=False +# → yellow warning about limited capability appears in output. +# 5. test_multiple_cli_results: Multiple CLIs, one missing key → warning +# only for the one missing. +# +# III. Auto-Phase Failure / Fallback +# 6. test_auto_phase_failure_triggers_menu: _run_auto_phase returns None +# → "Setup incomplete" message, options menu shown. +# +# IV. Interrupt Handling +# 7. test_keyboard_interrupt_phase1: KeyboardInterrupt during CLI bootstrap +# → "Setup interrupted" message, clean exit. +# 8. test_keyboard_interrupt_phase2: KeyboardInterrupt during auto phase +# → "Setup interrupted" message, clean exit. +# +# V. Key Scanning (via run_setup) +# 9. test_scan_finds_env_keys: Keys in os.environ → found and displayed +# with source "shell environment". +# 10. test_scan_finds_multiple_keys: Multiple keys → all found, count correct. +# 11. test_scan_no_keys_prompts_user: No keys anywhere → interactive +# prompt is invoked; after adding one, flow continues. +# 12. test_scan_multi_var_provider_grouped: Pipe-delimited api_key → +# grouped display shows "N/N vars set". +# 13. test_scan_multi_var_provider_partial: Some vars missing → +# grouped display shows partial count and missing names. +# +# VI. Model Configuration (via run_setup) +# 14. test_models_added_from_reference_csv: Matching API keys → +# new models written to user CSV. +# 15. test_models_deduplicated: Existing models in user CSV → +# not duplicated. +# 16. test_local_models_skipped: ollama/lm_studio/localhost rows excluded. +# 17. test_device_flow_models_included: Empty api_key rows always included. +# +# VII. .pddrc Handling (via run_setup) +# 18. test_pddrc_exists_confirmed: .pddrc already exists → "detected". +# 19. test_pddrc_created_on_confirm: No .pddrc, user types 'y' → created. +# 20. test_pddrc_skipped_on_enter: No .pddrc, user presses Enter → skipped. +# +# VIII. Model Testing (via run_setup) +# 21. test_model_test_success: _run_test succeeds → "responded OK". +# 22. test_model_test_failure: _run_test fails → error shown. +# +# IX. Exit Summary +# 23. test_exit_summary_writes_file: PDD-SETUP-SUMMARY.txt created. +# 24. test_exit_summary_creates_sample_prompt: success_python.prompt created. +# 25. test_exit_summary_quick_start_printed: QUICK START in terminal output. +# +# X. Options Menu +# 26. test_menu_add_provider: User selects "1" → add_provider called. +# 27. test_menu_test_model: User selects "2" → test_model_interactive called. +# 28. test_menu_enter_exits: Enter → menu exits, no actions. +# 29. test_menu_invalid_option: "9" → "Invalid option" shown. -import subprocess -import tempfile -from pathlib import Path +import csv +import os import pytest -from pdd.setup_tool import create_api_env_script +from pathlib import Path +from unittest.mock import MagicMock, patch +from pdd import setup_tool -def test_create_api_env_script_with_special_characters_bash(): - """ - Test that API keys with special shell characters are properly escaped - when generating bash/zsh shell scripts. - - This test will fail with the current implementation (no escaping) and - pass after fixing with shlex.quote(). - """ - # Simulate a Gemini API key that might contain special characters - # These are realistic characters that could appear in API keys or be accidentally - # included when copy-pasting - test_keys = { - 'GEMINI_API_KEY': 'AIzaSyAbCdEf123456$var"quote\'backtick\\slash' - } - - # Generate the script - script_content = create_api_env_script(test_keys, 'bash') - - # Write to a temporary file - with tempfile.NamedTemporaryFile(mode='w', suffix='.sh', delete=False) as f: - f.write(script_content) - script_path = Path(f.name) - - try: - # Try to parse/validate the script by running it with bash -n (syntax check) - # This will fail if the script has parsing errors - result = subprocess.run( - ['bash', '-n', str(script_path)], - capture_output=True, - text=True, - timeout=5 - ) - - # The script should parse without errors - assert result.returncode == 0, ( - f"Generated script has syntax errors: {result.stderr}\n" - f"Script content:\n{script_content}" - ) - - # Additionally, try to source it in a subprocess to ensure it can be executed - # We'll check the exit code but not the actual env vars (since they're set in subprocess) - result = subprocess.run( - ['bash', '-c', f'source {script_path} && exit 0'], - capture_output=True, - text=True, - timeout=5 - ) - - assert result.returncode == 0, ( - f"Generated script cannot be sourced: {result.stderr}\n" - f"Script content:\n{script_content}" - ) - - finally: - # Clean up - script_path.unlink() +# --------------------------------------------------------------------------- +# Module-level test data constants +# --------------------------------------------------------------------------- -def test_create_api_env_script_with_special_characters_zsh(): - """Test that API keys with special characters work in zsh scripts.""" - test_keys = { - 'GEMINI_API_KEY': 'AIzaSyAbCdEf123456$var"quote\'backtick\\slash' - } - - script_content = create_api_env_script(test_keys, 'zsh') - - with tempfile.NamedTemporaryFile(mode='w', suffix='.sh', delete=False) as f: - f.write(script_content) - script_path = Path(f.name) - - try: - # Test zsh syntax - result = subprocess.run( - ['zsh', '-n', str(script_path)], - capture_output=True, - text=True, - timeout=5 - ) - - assert result.returncode == 0, ( - f"Generated zsh script has syntax errors: {result.stderr}\n" - f"Script content:\n{script_content}" - ) - finally: - script_path.unlink() +SIMPLE_REF_CSV = [ + {"provider": "Anthropic", "model": "claude-sonnet", "api_key": "ANTHROPIC_API_KEY", + "base_url": "", "input": "3", "output": "15", "coding_arena_elo": "1200", + "max_reasoning_tokens": "", "structured_output": "", "reasoning_type": "", "location": ""}, + {"provider": "OpenAI", "model": "gpt-4o", "api_key": "OPENAI_API_KEY", + "base_url": "", "input": "5", "output": "15", "coding_arena_elo": "1100", + "max_reasoning_tokens": "", "structured_output": "", "reasoning_type": "", "location": ""}, +] +BEDROCK_REF_CSV = [ + {"provider": "AWS Bedrock", "model": "bedrock/anthropic.claude-v1", + "api_key": "AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME", + "base_url": "", "input": "8", "output": "24", "coding_arena_elo": "1150", + "max_reasoning_tokens": "", "structured_output": "", "reasoning_type": "", "location": ""}, +] -def test_create_api_env_script_with_common_problematic_characters(): - """ - Test with various common problematic characters that might appear in API keys. - - Characters tested: - - Double quotes: " - - Single quotes: ' - - Dollar signs: $ (variable expansion) - - Backticks: ` (command substitution) - - Backslashes: \\ (escaping) - - Spaces: (should be handled) - - Parentheses: () (might be interpreted) +DEVICE_FLOW_CSV = [ + {"provider": "GitHub Copilot", "model": "copilot/gpt-4", "api_key": "", + "base_url": "", "input": "0", "output": "0", "coding_arena_elo": "1050", + "max_reasoning_tokens": "", "structured_output": "", "reasoning_type": "", "location": ""}, +] + +LOCAL_MODELS_CSV = [ + {"provider": "ollama", "model": "ollama/llama3", "api_key": "", + "base_url": "http://localhost:11434", "input": "0", "output": "0", + "coding_arena_elo": "", "max_reasoning_tokens": "", "structured_output": "", + "reasoning_type": "", "location": ""}, + {"provider": "lm_studio", "model": "lm/mistral", "api_key": "", + "base_url": "http://localhost:1234", "input": "0", "output": "0", + "coding_arena_elo": "", "max_reasoning_tokens": "", "structured_output": "", + "reasoning_type": "", "location": ""}, +] + +TEST_SUCCESS_RESULT = { + "success": True, "duration_s": 1.2, "cost": 0.001, + "error": None, "tokens": {"input": 10, "output": 20}, +} + +TEST_FAILURE_RESULT = { + "success": False, "duration_s": 0.5, "cost": 0.0, + "error": "Authentication error", "tokens": None, +} + +# Env vars to clean to prevent leakage from real environment +_ENV_VARS_TO_CLEAN = [ + "ANTHROPIC_API_KEY", "OPENAI_API_KEY", "GEMINI_API_KEY", + "DEEPSEEK_API_KEY", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", + "AWS_REGION_NAME", "GOOGLE_APPLICATION_CREDENTIALS", "VERTEXAI_PROJECT", + "VERTEXAI_LOCATION", "AZURE_API_KEY", "AZURE_API_BASE", + "AZURE_API_VERSION", +] + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_cli_result(cli_name="claude", provider="anthropic", + api_key_configured=True, skipped=False): + """Create a mock CliBootstrapResult.""" + result = MagicMock() + result.cli_name = cli_name + result.provider = provider + result.api_key_configured = api_key_configured + result.skipped = skipped + return result + + +def _write_csv_file(path, rows): + """Write a list of row dicts as a CSV file.""" + path.parent.mkdir(parents=True, exist_ok=True) + if not rows: + path.write_text("") + return + fieldnames = list(rows[0].keys()) + with open(path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(rows) + + +def _run_setup_capture(tmp_path, monkeypatch, ref_csv_rows=None, + user_csv_rows=None, env_keys=None, + input_sequence=None, cli_results=None, + test_result=None, create_pddrc=False): + """Run run_setup() with full environment control, capturing all output. + + Mocks at true boundaries only: CLI detection, user input, model testing, + menu delegates, filesystem paths, and shell detection. Lets all internal + logic (key scanning, model filtering, CSV I/O, .pddrc creation) run + naturally. + + Returns: + (output_str, mocks_dict) — output is all captured print/console text; + mocks contains mock objects for call-count assertions. """ - problematic_key = 'key"with\'many$special`characters\\and spaces(too)' - test_keys = { - 'GEMINI_API_KEY': problematic_key - } - - # Test all common shells - for shell in ['bash', 'zsh', 'sh']: - script_content = create_api_env_script(test_keys, shell) - - with tempfile.NamedTemporaryFile(mode='w', suffix='.sh', delete=False) as f: - f.write(script_content) - script_path = Path(f.name) - + if ref_csv_rows is None: + ref_csv_rows = SIMPLE_REF_CSV + if env_keys is None: + env_keys = {"ANTHROPIC_API_KEY": "sk-ant-test123"} + if input_sequence is None: + input_sequence = ["", "", ""] + if cli_results is None: + cli_results = [_make_cli_result()] + if test_result is None: + test_result = TEST_SUCCESS_RESULT + + # --- Filesystem isolation --- + pdd_home = tmp_path / "home" + pdd_dir = pdd_home / ".pdd" + pdd_dir.mkdir(parents=True) + project_dir = tmp_path / "project" + project_dir.mkdir() + + monkeypatch.setattr(Path, "home", lambda: pdd_home) + monkeypatch.chdir(project_dir) + + # Create reference CSV alongside a fake module path + fake_module_dir = tmp_path / "fake_pdd" + fake_module_dir.mkdir() + data_dir = fake_module_dir / "data" + data_dir.mkdir() + _write_csv_file(data_dir / "llm_model.csv", ref_csv_rows) + monkeypatch.setattr(setup_tool, "__file__", + str(fake_module_dir / "setup_tool.py")) + + # Pre-populate user CSV if needed + if user_csv_rows: + _write_csv_file(pdd_dir / "llm_model.csv", user_csv_rows) + + # Create .pddrc if requested + if create_pddrc: + (project_dir / ".pddrc").write_text("version: '1.0'\n") + + # --- Environment isolation --- + for var in _ENV_VARS_TO_CLEAN: + monkeypatch.delenv(var, raising=False) + for key, val in env_keys.items(): + monkeypatch.setenv(key, val) + + # Force shell detection to "bash" for deterministic api-env path + monkeypatch.setenv("SHELL", "/bin/bash") + + # --- Output capture --- + captured_lines = [] + + def capture_print(*args, **kwargs): + captured_lines.append(" ".join(str(a) for a in args)) + + mock_console = MagicMock() + mock_console.print = lambda *a, **kw: captured_lines.append( + " ".join(str(x) for x in a)) + + # --- Input mock --- + input_iter = iter(input_sequence) + + def mock_input(prompt=""): + captured_lines.append(str(prompt)) try: - # Use bash/sh for sh, bash for bash, zsh for zsh - shell_cmd = 'sh' if shell == 'sh' else shell - result = subprocess.run( - [shell_cmd, '-n', str(script_path)], - capture_output=True, - text=True, - timeout=5 - ) - - assert result.returncode == 0, ( - f"Generated {shell} script has syntax errors: {result.stderr}\n" - f"Script content:\n{script_content}" - ) - finally: - script_path.unlink() - - -def test_create_api_env_script_preserves_key_value(): - """ - Test that after proper escaping, the key value can still be correctly - extracted when the script is sourced. - """ - original_key = 'AIzaSyAbCdEf123456$var"quote\'backtick\\slash' - test_keys = { - 'GEMINI_API_KEY': original_key - } - - script_content = create_api_env_script(test_keys, 'bash') - - with tempfile.NamedTemporaryFile(mode='w', suffix='.sh', delete=False) as f: - f.write(script_content) - script_path = Path(f.name) - - try: - # Source the script and extract the value - # We'll use a Python subprocess to avoid shell escaping issues in our test - result = subprocess.run( - ['bash', '-c', f'source {script_path} && python3 -c "import os; print(os.environ.get(\'GEMINI_API_KEY\', \'\'))"'], - capture_output=True, - text=True, - timeout=5 - ) - - assert result.returncode == 0, ( - f"Failed to source script and read env var: {result.stderr}\n" - f"Script content:\n{script_content}" - ) - - extracted_key = result.stdout.strip() - assert extracted_key == original_key, ( - f"Key value was corrupted during escaping.\n" - f"Original: {repr(original_key)}\n" - f"Extracted: {repr(extracted_key)}\n" - f"Script content:\n{script_content}" - ) - finally: - script_path.unlink() + return next(input_iter) + except StopIteration: + return "" + # --- Boundary mocks --- + mock_detect_cli = MagicMock(return_value=cli_results) + mock_run_test = MagicMock(return_value=test_result) + mock_add_provider = MagicMock() + mock_test_interactive = MagicMock() -def test_create_api_env_script_with_normal_key(): - """ - Test that normal keys (without special characters) still work correctly. - This ensures our fix doesn't break existing functionality. - """ - normal_key = 'AIzaSyAbCdEf1234567890_normal_key_value' - test_keys = { - 'OPENAI_API_KEY': normal_key, - 'GEMINI_API_KEY': normal_key - } - - script_content = create_api_env_script(test_keys, 'bash') - - with tempfile.NamedTemporaryFile(mode='w', suffix='.sh', delete=False) as f: - f.write(script_content) - script_path = Path(f.name) - - try: - result = subprocess.run( - ['bash', '-n', str(script_path)], - capture_output=True, - text=True, - timeout=5 - ) - - assert result.returncode == 0, ( - f"Normal key failed syntax check: {result.stderr}\n" - f"Script content:\n{script_content}" - ) - - # Verify values can be extracted - result = subprocess.run( - ['bash', '-c', f'source {script_path} && python3 -c "import os; print(os.environ.get(\'OPENAI_API_KEY\', \'\')); print(os.environ.get(\'GEMINI_API_KEY\', \'\'))"'], - capture_output=True, - text=True, - timeout=5 - ) - - assert result.returncode == 0 - extracted_keys = result.stdout.strip().split('\n') - assert extracted_keys[0] == normal_key - assert extracted_keys[1] == normal_key - finally: - script_path.unlink() + # Patch sys.stdout.write/flush used by the threaded test animation + mock_stdout_write = MagicMock( + side_effect=lambda s: captured_lines.append(s)) + patches = [ + patch("pdd.setup_tool._console", mock_console), + patch("builtins.print", capture_print), + patch("builtins.input", mock_input), + patch("pdd.cli_detector.detect_and_bootstrap_cli", mock_detect_cli), + patch("pdd.model_tester._run_test", mock_run_test), + patch("pdd.provider_manager.add_provider_from_registry", mock_add_provider), + patch("pdd.model_tester.test_model_interactive", mock_test_interactive), + patch("pdd.provider_manager._get_user_csv_path", + lambda: pdd_dir / "llm_model.csv"), + patch("pdd.provider_manager._get_shell_rc_path", lambda: None), + patch("sys.stdout"), + ] -def _shell_available(shell: str) -> bool: - """Check if a shell is available on the system""" - try: - result = subprocess.run( - ['which', shell], - capture_output=True, - timeout=2 - ) - return result.returncode == 0 - except (subprocess.TimeoutExpired, FileNotFoundError): - return False + for p in patches: + p.start() + # Re-enable stdout.write and flush for the test animation capture + import sys as _sys + _sys.stdout.write = mock_stdout_write + _sys.stdout.flush = MagicMock() -def test_create_api_env_script_with_special_characters_fish(): - """ - Test that API keys with special characters work in fish shell scripts. - - This test verifies that shlex.quote() works correctly with fish shell. - Fish is not POSIX-compliant, so there may be edge cases where POSIX-style - quoting doesn't work as expected. - """ - if not _shell_available('fish'): - pytest.skip("fish shell not available") - - test_keys = { - 'GEMINI_API_KEY': 'AIzaSyAbCdEf123456$var"quote\'backtick\\slash' - } - - script_content = create_api_env_script(test_keys, 'fish') - - with tempfile.NamedTemporaryFile(mode='w', suffix='.fish', delete=False) as f: - f.write(script_content) - script_path = Path(f.name) - try: - # Fish doesn't have a -n syntax check flag like bash/zsh - # So we'll try to source it and see if it works - result = subprocess.run( - ['fish', '-c', f'source {script_path}; exit 0'], - capture_output=True, - text=True, - timeout=5 - ) - - assert result.returncode == 0, ( - f"Generated fish script has syntax/execution errors: {result.stderr}\n" - f"Script content:\n{script_content}" - ) + setup_tool.run_setup() + except (SystemExit, StopIteration): + pass finally: - script_path.unlink() - + for p in patches: + p.stop() -def test_create_api_env_script_preserves_key_value_fish(): - """ - Test that fish shell correctly preserves key values with special characters. - - This is critical because fish has different quoting rules than POSIX shells, - and shlex.quote() may not handle all cases correctly. - """ - if not _shell_available('fish'): - pytest.skip("fish shell not available") - - original_key = 'AIzaSyAbCdEf123456$var"quote\'backtick\\slash' - test_keys = { - 'GEMINI_API_KEY': original_key + output = "\n".join(captured_lines) + mocks = { + "detect_cli": mock_detect_cli, + "run_test": mock_run_test, + "console": mock_console, + "add_provider": mock_add_provider, + "test_interactive": mock_test_interactive, } - - script_content = create_api_env_script(test_keys, 'fish') - - with tempfile.NamedTemporaryFile(mode='w', suffix='.fish', delete=False) as f: - f.write(script_content) - script_path = Path(f.name) - - try: - # Source the script and extract the value using fish - result = subprocess.run( - ['fish', '-c', f'source {script_path}; python3 -c "import os; print(os.environ.get(\'GEMINI_API_KEY\', \'\'))"'], - capture_output=True, - text=True, - timeout=5 - ) - - assert result.returncode == 0, ( - f"Failed to source fish script and read env var: {result.stderr}\n" - f"Script content:\n{script_content}" - ) - - extracted_key = result.stdout.strip() - assert extracted_key == original_key, ( - f"Key value was corrupted during escaping in fish shell.\n" - f"Original: {repr(original_key)}\n" - f"Extracted: {repr(extracted_key)}\n" - f"Script content:\n{script_content}\n" - f"This indicates shlex.quote() may not work correctly with fish shell." - ) - finally: - script_path.unlink() + return output, mocks -def test_create_api_env_script_with_special_characters_csh(): - """ - Test that API keys with special characters work in csh/tcsh shell scripts. - - WARNING: csh/tcsh have fundamentally different quoting rules than POSIX shells. - shlex.quote() uses POSIX single-quote syntax which may not work correctly - in csh/tcsh, especially with: - - Variables containing $ (variable expansion still occurs in single quotes) - - Complex backslash sequences - - Certain special characters - - This test will help identify if shlex.quote() works correctly with csh/tcsh. - """ - # Try csh first, then tcsh - shell_cmd = None - shell_name = None - for shell in ['csh', 'tcsh']: - if _shell_available(shell): - shell_cmd = shell - shell_name = shell - break - - if not shell_cmd: - pytest.skip("csh/tcsh not available") - - test_keys = { - 'GEMINI_API_KEY': 'AIzaSyAbCdEf123456$var"quote\'backtick\\slash' - } - - script_content = create_api_env_script(test_keys, shell_name) - - with tempfile.NamedTemporaryFile(mode='w', suffix='.csh', delete=False) as f: - f.write(script_content) - script_path = Path(f.name) - - try: - # csh/tcsh don't have a -n flag, so we'll try to source it - # Use -f to prevent reading .cshrc/.tcshrc which might interfere - result = subprocess.run( - [shell_cmd, '-f', '-c', f'source {script_path}; exit 0'], - capture_output=True, - text=True, - timeout=5 - ) - - assert result.returncode == 0, ( - f"Generated {shell_name} script has syntax/execution errors: {result.stderr}\n" - f"Script content:\n{script_content}\n" - f"This may indicate that shlex.quote() doesn't work correctly with {shell_name}." - ) - finally: - script_path.unlink() +# =========================================================================== +# I. End-to-End Success Path +# =========================================================================== +def test_happy_path_enter_to_finish(tmp_path, monkeypatch): + """Auto-phase succeeds, user presses Enter → exit summary, no menu.""" + output, mocks = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + # Inputs: Enter after step1, Enter after step2, Enter to finish + input_sequence=["", "", ""], + ) + assert "PDD Setup Complete" in output + mocks["detect_cli"].assert_called_once() + mocks["add_provider"].assert_not_called() -def test_create_api_env_script_preserves_key_value_csh(): - """ - Test that csh/tcsh correctly preserves key values with special characters. - - This is critical because csh/tcsh have fundamentally different quoting rules: - - Single quotes in csh do NOT prevent variable expansion ($var still expands) - - Backslash escaping works differently - - The quoting mechanism is incompatible with POSIX - - This test will likely reveal issues with using shlex.quote() for csh/tcsh. - """ - # Try csh first, then tcsh - shell_cmd = None - shell_name = None - for shell in ['csh', 'tcsh']: - if _shell_available(shell): - shell_cmd = shell - shell_name = shell - break - - if not shell_cmd: - pytest.skip("csh/tcsh not available") - - original_key = 'AIzaSyAbCdEf123456$var"quote\'backtick\\slash' - test_keys = { - 'GEMINI_API_KEY': original_key - } - - script_content = create_api_env_script(test_keys, shell_name) - - with tempfile.NamedTemporaryFile(mode='w', suffix='.csh', delete=False) as f: - f.write(script_content) - script_path = Path(f.name) - - try: - # Source the script and extract the value using csh/tcsh - # Use -f to prevent reading .cshrc/.tcshrc - result = subprocess.run( - [shell_cmd, '-f', '-c', f'source {script_path}; python3 -c "import os; print(os.environ.get(\'GEMINI_API_KEY\', \'\'))"'], - capture_output=True, - text=True, - timeout=5 - ) - - assert result.returncode == 0, ( - f"Failed to source {shell_name} script and read env var: {result.stderr}\n" - f"Script content:\n{script_content}" - ) - - extracted_key = result.stdout.strip() - assert extracted_key == original_key, ( - f"Key value was corrupted during escaping in {shell_name} shell.\n" - f"Original: {repr(original_key)}\n" - f"Extracted: {repr(extracted_key)}\n" - f"Script content:\n{script_content}\n" - f"This indicates shlex.quote() does NOT work correctly with {shell_name}.\n" - f"csh/tcsh have different quoting rules than POSIX shells." - ) - finally: - script_path.unlink() +def test_happy_path_open_menu_then_exit(tmp_path, monkeypatch): + """Auto-phase succeeds, user enters 'm' → menu shown, then exit.""" + output, mocks = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + # Inputs: Enter step1, Enter step2, 'm' for menu, Enter to exit menu + input_sequence=["", "", "m", ""], + ) + assert "PDD Setup Complete" in output + assert "Options" in output -def test_create_api_env_script_csh_variable_expansion_issue(): - """ - Test a specific csh/tcsh issue: variable expansion in single quotes. - - In csh/tcsh, single quotes do NOT prevent variable expansion. - This means a key containing $HOME will expand to the actual home directory - path, which is incorrect behavior. - - This test demonstrates the fundamental incompatibility between - POSIX-style quoting (shlex.quote) and csh/tcsh. - """ - # Try csh first, then tcsh - shell_cmd = None - shell_name = None - for shell in ['csh', 'tcsh']: - if _shell_available(shell): - shell_cmd = shell - shell_name = shell - break - - if not shell_cmd: - pytest.skip("csh/tcsh not available") - - # Create a key that contains $HOME to test variable expansion - # In POSIX shells, this should be preserved as-is - # In csh/tcsh, this might expand to the actual home directory - test_key = 'api_key_with_$HOME_in_it' - test_keys = { - 'GEMINI_API_KEY': test_key - } - - script_content = create_api_env_script(test_keys, shell_name) - - with tempfile.NamedTemporaryFile(mode='w', suffix='.csh', delete=False) as f: - f.write(script_content) - script_path = Path(f.name) - - try: - # Source the script and extract the value - result = subprocess.run( - [shell_cmd, '-f', '-c', f'source {script_path}; python3 -c "import os; print(os.environ.get(\'GEMINI_API_KEY\', \'\'))"'], - capture_output=True, - text=True, - timeout=5 - ) - - assert result.returncode == 0, ( - f"Failed to source {shell_name} script: {result.stderr}\n" - f"Script content:\n{script_content}" - ) - - extracted_key = result.stdout.strip() - # This test will likely FAIL, demonstrating the issue - assert extracted_key == test_key, ( - f"Variable expansion occurred in {shell_name} despite single quotes!\n" - f"Expected: {repr(test_key)}\n" - f"Got: {repr(extracted_key)}\n" - f"Script content:\n{script_content}\n" - f"This proves that shlex.quote() (POSIX single quotes) does NOT work\n" - f"correctly with csh/tcsh, which expand variables even in single quotes." - ) - finally: - script_path.unlink() +def test_happy_path_skipped_cli(tmp_path, monkeypatch): + """CLI skipped → auto-phase still runs.""" + output, mocks = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + cli_results=[_make_cli_result(skipped=True, cli_name="")], + create_pddrc=True, + input_sequence=["", "", ""], + ) + assert "PDD Setup Complete" in output + assert "No API key configured" not in output + + +# =========================================================================== +# II. CLI Bootstrap Warnings +# =========================================================================== + +def test_no_api_key_warning_shown(tmp_path, monkeypatch): + """CLI found but no API key → warning appears.""" + output, _ = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + cli_results=[_make_cli_result(api_key_configured=False)], + create_pddrc=True, + input_sequence=["", "", ""], + ) + assert "No API key configured" in output + + +def test_multiple_cli_results_warning_only_for_missing(tmp_path, monkeypatch): + """Multiple CLIs, warning only for the one missing API key.""" + output, _ = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + cli_results=[ + _make_cli_result(cli_name="claude", api_key_configured=True), + _make_cli_result(cli_name="codex", api_key_configured=False), + ], + create_pddrc=True, + input_sequence=["", "", ""], + ) + assert "No API key configured" in output + + +# =========================================================================== +# III. Auto-Phase Failure / Fallback +# =========================================================================== + +def test_auto_phase_failure_triggers_menu(tmp_path, monkeypatch): + """Auto-phase fails → 'Setup incomplete' and options menu shown.""" + captured = [] + mock_console = MagicMock() + mock_console.print = lambda *a, **kw: captured.append( + " ".join(str(x) for x in a)) + + with patch("pdd.setup_tool._run_auto_phase", return_value=None), \ + patch("pdd.setup_tool._run_options_menu") as mock_menu, \ + patch("pdd.setup_tool._print_exit_summary"), \ + patch("pdd.setup_tool._print_pdd_logo"), \ + patch("pdd.setup_tool._console", mock_console), \ + patch("pdd.cli_detector.detect_and_bootstrap_cli", + return_value=[_make_cli_result()]): + setup_tool.run_setup() + + output = "\n".join(captured) + assert "Setup incomplete" in output + mock_menu.assert_called_once() + + +# =========================================================================== +# IV. Interrupt Handling +# =========================================================================== + +def test_keyboard_interrupt_phase1(): + """KeyboardInterrupt during CLI bootstrap → clean exit.""" + captured = [] + with patch("pdd.cli_detector.detect_and_bootstrap_cli", + side_effect=KeyboardInterrupt), \ + patch("pdd.setup_tool._print_pdd_logo"), \ + patch("builtins.print", lambda *a, **kw: captured.append( + " ".join(str(x) for x in a))): + setup_tool.run_setup() + assert any("Setup interrupted" in line for line in captured) + + +def test_keyboard_interrupt_phase2(): + """KeyboardInterrupt during auto phase → clean exit.""" + captured = [] + with patch("pdd.cli_detector.detect_and_bootstrap_cli", + return_value=[_make_cli_result()]), \ + patch("pdd.setup_tool._run_auto_phase", + side_effect=KeyboardInterrupt), \ + patch("pdd.setup_tool._print_pdd_logo"), \ + patch("pdd.setup_tool._console", MagicMock()), \ + patch("builtins.print", lambda *a, **kw: captured.append( + " ".join(str(x) for x in a))): + setup_tool.run_setup() + assert any("Setup interrupted" in line for line in captured) + + +# =========================================================================== +# V. Key Scanning (via run_setup) +# =========================================================================== + +def test_scan_finds_env_keys(tmp_path, monkeypatch): + """Keys in os.environ → found with 'shell environment' source.""" + output, _ = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + input_sequence=["", "", ""], + ) + assert "ANTHROPIC_API_KEY" in output + assert "shell environment" in output + assert "1 API key" in output + + +def test_scan_finds_multiple_keys(tmp_path, monkeypatch): + """Multiple keys in os.environ → all found.""" + output, _ = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test", "OPENAI_API_KEY": "sk-openai"}, + create_pddrc=True, + input_sequence=["", "", ""], + ) + assert "ANTHROPIC_API_KEY" in output + assert "OPENAI_API_KEY" in output + assert "2 API key" in output + + +def test_scan_no_keys_prompts_user(tmp_path, monkeypatch): + """No keys found → interactive key prompt is triggered.""" + # Use only the single-row ref CSV so skip is option "2" + ref_rows = [SIMPLE_REF_CSV[0]] + + captured = [] + mock_console = MagicMock() + mock_console.print = lambda *a, **kw: captured.append( + " ".join(str(x) for x in a)) + + with patch("pdd.setup_tool._run_auto_phase", return_value=None), \ + patch("pdd.setup_tool._print_exit_summary"), \ + patch("pdd.setup_tool._print_pdd_logo"), \ + patch("pdd.setup_tool._run_options_menu"), \ + patch("pdd.setup_tool._console", mock_console), \ + patch("pdd.cli_detector.detect_and_bootstrap_cli", + return_value=[_make_cli_result(skipped=True, cli_name="")]), \ + patch("builtins.input", return_value=""), \ + patch("builtins.print", + lambda *a, **kw: captured.append(" ".join(str(x) for x in a))): + setup_tool.run_setup() + + output = "\n".join(captured) + # Verify auto-phase failure path was hit (keys couldn't be found) + assert "Setup incomplete" in output + + +def test_scan_multi_var_provider_grouped(tmp_path, monkeypatch): + """Pipe-delimited api_key → grouped display with var counts.""" + output, _ = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=BEDROCK_REF_CSV, + env_keys={ + "AWS_ACCESS_KEY_ID": "AKIAEXAMPLE", + "AWS_SECRET_ACCESS_KEY": "secret123", + "AWS_REGION_NAME": "us-east-1", + }, + create_pddrc=True, + input_sequence=["", "", ""], + ) + assert "3/3" in output + assert "AWS Bedrock" in output + + +def test_scan_multi_var_provider_partial(tmp_path, monkeypatch): + """Partial multi-var credentials → missing vars shown.""" + output, _ = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=BEDROCK_REF_CSV, + env_keys={"AWS_ACCESS_KEY_ID": "AKIAEXAMPLE"}, + create_pddrc=True, + input_sequence=["", "", ""], + ) + assert "1/3" in output + assert "missing" in output.lower() + + +# =========================================================================== +# VI. Model Configuration (via run_setup) +# =========================================================================== + +def test_models_added_from_reference_csv(tmp_path, monkeypatch): + """Matching API keys → models written to user CSV.""" + output, _ = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + input_sequence=["", "", ""], + ) + # Verify user CSV was created with the matching model + user_csv = tmp_path / "home" / ".pdd" / "llm_model.csv" + assert user_csv.exists() + content = user_csv.read_text() + assert "claude-sonnet" in content + # OpenAI should NOT be present (no key set) + assert "gpt-4o" not in content + + +def test_models_deduplicated(tmp_path, monkeypatch): + """Existing models not duplicated.""" + existing = [SIMPLE_REF_CSV[0].copy()] + output, _ = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + user_csv_rows=existing, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + input_sequence=["", "", ""], + ) + # Should mention "already" loaded rather than new additions + assert "already" in output.lower() or "All matching" in output + + +def test_local_models_skipped(tmp_path, monkeypatch): + """ollama/lm_studio/localhost models excluded from user CSV.""" + combined = SIMPLE_REF_CSV + LOCAL_MODELS_CSV + output, _ = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=combined, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + input_sequence=["", "", ""], + ) + user_csv = tmp_path / "home" / ".pdd" / "llm_model.csv" + assert user_csv.exists() + content = user_csv.read_text() + assert "ollama" not in content + assert "lm_studio" not in content + + +def test_device_flow_models_included(tmp_path, monkeypatch): + """Empty api_key (device flow) models always included.""" + combined = SIMPLE_REF_CSV + DEVICE_FLOW_CSV + output, _ = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=combined, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + input_sequence=["", "", ""], + ) + user_csv = tmp_path / "home" / ".pdd" / "llm_model.csv" + assert user_csv.exists() + content = user_csv.read_text() + assert "copilot" in content.lower() + + +# =========================================================================== +# VII. .pddrc Handling (via run_setup) +# =========================================================================== + +def test_pddrc_exists_confirmed(tmp_path, monkeypatch): + """.pddrc already exists → 'detected' message shown.""" + output, _ = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + input_sequence=["", "", ""], + ) + assert "pddrc" in output.lower() + assert "detected" in output.lower() + + +def test_pddrc_created_on_confirm(tmp_path, monkeypatch): + """No .pddrc, user types 'y' → file created.""" + _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=False, + # step1 Enter, pddrc "y", step2 Enter, finish Enter + input_sequence=["", "y", "", ""], + ) + assert (tmp_path / "project" / ".pddrc").exists() + + +def test_pddrc_skipped_on_enter(tmp_path, monkeypatch): + """No .pddrc, user presses Enter → file not created.""" + output, _ = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=False, + # step1 Enter, pddrc skip Enter, step2 Enter, finish Enter + input_sequence=["", "", "", ""], + ) + assert not (tmp_path / "project" / ".pddrc").exists() + + +# =========================================================================== +# VIII. Model Testing (via run_setup) +# =========================================================================== + +def test_model_test_success(tmp_path, monkeypatch): + """Model test succeeds → 'responded OK' in output.""" + output, _ = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + test_result=TEST_SUCCESS_RESULT, + input_sequence=["", "", ""], + ) + assert "responded OK" in output or "OK" in output + + +def test_model_test_failure(tmp_path, monkeypatch): + """Model test fails → error message in output.""" + output, _ = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + test_result=TEST_FAILURE_RESULT, + input_sequence=["", "", ""], + ) + assert "Authentication error" in output or "failed" in output.lower() + + +# =========================================================================== +# IX. Exit Summary +# =========================================================================== + +def test_exit_summary_writes_file(tmp_path, monkeypatch): + """PDD-SETUP-SUMMARY.txt created after setup.""" + _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + input_sequence=["", "", ""], + ) + summary = tmp_path / "project" / "PDD-SETUP-SUMMARY.txt" + assert summary.exists() + content = summary.read_text() + assert "PDD Setup Complete" in content + assert "QUICK START" in content + + +def test_exit_summary_creates_sample_prompt(tmp_path, monkeypatch): + """success_python.prompt created if not existing.""" + _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + input_sequence=["", "", ""], + ) + assert (tmp_path / "project" / "success_python.prompt").exists() + + +def test_exit_summary_quick_start_printed(tmp_path, monkeypatch): + """QUICK START section appears in terminal output.""" + output, _ = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + input_sequence=["", "", ""], + ) + assert "QUICK START" in output + assert "pdd generate" in output + + +# =========================================================================== +# X. Options Menu (via run_setup with 'm' input) +# =========================================================================== + +def test_menu_add_provider(tmp_path, monkeypatch): + """User selects '1' in menu → add_provider_from_registry called.""" + _, mocks = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + input_sequence=["", "", "m", "1", ""], + ) + mocks["add_provider"].assert_called_once() + + +def test_menu_test_model(tmp_path, monkeypatch): + """User selects '2' in menu → test_model_interactive called.""" + _, mocks = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + input_sequence=["", "", "m", "2", ""], + ) + mocks["test_interactive"].assert_called_once() + + +def test_menu_enter_exits(tmp_path, monkeypatch): + """User presses Enter in menu → exits, no actions.""" + _, mocks = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + input_sequence=["", "", "m", ""], + ) + mocks["add_provider"].assert_not_called() + mocks["test_interactive"].assert_not_called() -def test_create_api_env_script_fish_edge_cases(): - """ - Test fish shell with various edge cases that might reveal quoting issues. - - Fish shell, while often compatible with POSIX-style quoting, may have - edge cases with certain character combinations. - """ - if not _shell_available('fish'): - pytest.skip("fish shell not available") - - edge_cases = [ - 'key with spaces', - "key'with'single'quotes", - 'key"with"double"quotes', - 'key$with$dollars', - 'key\\with\\backslashes', - 'key`with`backticks', - 'key(with)parentheses', - 'key[with]brackets', - 'key{with}braces', - 'key;with;semicolons', - 'key|with|pipes', - 'key&with&ersands', - 'keyredirects', - 'key\nwith\nnewlines', - 'key\twith\ttabs', - ] - - for i, test_key in enumerate(edge_cases): - test_keys = { - 'TEST_API_KEY': test_key - } - - script_content = create_api_env_script(test_keys, 'fish') - - with tempfile.NamedTemporaryFile(mode='w', suffix=f'.fish', delete=False) as f: - f.write(script_content) - script_path = Path(f.name) - - try: - # Try to source it - result = subprocess.run( - ['fish', '-c', f'source {script_path}; python3 -c "import os; print(os.environ.get(\'TEST_API_KEY\', \'\'))"'], - capture_output=True, - text=True, - timeout=5 - ) - - if result.returncode != 0: - pytest.fail( - f"Fish shell failed with edge case {i+1}: {repr(test_key)}\n" - f"Error: {result.stderr}\n" - f"Script content:\n{script_content}" - ) - - extracted_key = result.stdout.strip() - if extracted_key != test_key: - pytest.fail( - f"Fish shell corrupted value for edge case {i+1}: {repr(test_key)}\n" - f"Expected: {repr(test_key)}\n" - f"Got: {repr(extracted_key)}\n" - f"Script content:\n{script_content}" - ) - finally: - script_path.unlink() +def test_menu_invalid_option(tmp_path, monkeypatch): + """User enters invalid option → 'Invalid option' message.""" + output, mocks = _run_setup_capture( + tmp_path, monkeypatch, + ref_csv_rows=SIMPLE_REF_CSV, + env_keys={"ANTHROPIC_API_KEY": "sk-test"}, + create_pddrc=True, + input_sequence=["", "", "m", "9", ""], + ) + assert "Invalid" in output or "invalid" in output.lower() + mocks["add_provider"].assert_not_called() From cb148fe790af1df8f9aa63265c68bee7b18d078f Mon Sep 17 00:00:00 2001 From: Niti Goyal Date: Fri, 20 Feb 2026 10:32:31 -0500 Subject: [PATCH 2/5] Update test llm invoke files to support new model api_key column MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit test_llm_invoke.py: - The tests were setting `VERTEX_CREDENTIALS` as the mock env var, but the new CSV `api_key` column for Vertex AI models specifies `GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION`. The `_ensure_api_key` function checked those exact env vars (because it previosuly only needed to look for one var), finds them missing , returns `False`, and skips the model — causing "All candidate models failed." - **Fix**: Changed the tests to set the correct env vars (`GOOGLE_APPLICATION_CREDENTIALS`, `VERTEXAI_PROJECT`, `VERTEXAI_LOCATION`) that match what the CSV actually declares. test_llm_invoke_vertex_retry.py: - The old tests verified that `llm_invoke` **explicitly passed** Vertex AI credentials on retry. The new tests verify that `llm_invoke` **does NOT** pass them, because under the multi-credential convention, LiteLLM reads from `os.environ` directly — both on initial calls and retries. These tests all pass now. --- tests/test_llm_invoke.py | 130 ++++++------- tests/test_llm_invoke_vertex_retry.py | 256 ++++++++------------------ 2 files changed, 127 insertions(+), 259 deletions(-) diff --git a/tests/test_llm_invoke.py b/tests/test_llm_invoke.py index bdbe2c425..d222e7884 100644 --- a/tests/test_llm_invoke.py +++ b/tests/test_llm_invoke.py @@ -1200,142 +1200,100 @@ def test_llm_invoke_responses_api_valid_json_parses_correctly(mock_load_models, assert response['result'].field2 == 42 -# --- Tests for Per-Model Vertex AI Location Override --- +# --- Tests for Multi-Credential Provider (Vertex AI) --- -def test_vertex_location_override_from_csv(mock_set_llm_cache): - """Test that per-model location in CSV overrides VERTEX_LOCATION env var.""" +def test_vertex_multi_credential_no_api_key_passed(mock_set_llm_cache): + """Test that Vertex AI (pipe-delimited api_key) does NOT pass api_key= to litellm.""" with patch('pdd.llm_invoke._load_model_data') as mock_load_data: - # Create mock model with location='us-central1' mock_data = [{ - 'provider': 'Google', - 'model': 'vertex_ai/deepseek-ai/deepseek-r1-0528-maas', - 'input': 0.55, 'output': 2.19, - 'coding_arena_elo': 1391, - 'structured_output': False, - 'base_url': '', - 'api_key': 'VERTEX_CREDENTIALS', - 'reasoning_type': 'none', - 'max_reasoning_tokens': 0, - 'location': 'us-central1' # Per-model location override - }] - mock_df = pd.DataFrame(mock_data) - mock_df['avg_cost'] = (mock_df['input'] + mock_df['output']) / 2 - mock_load_data.return_value = mock_df - - # Set env vars - VERTEX_LOCATION is 'global' but should be overridden - env_vars = { - 'VERTEX_CREDENTIALS': '/fake/path.json', - 'VERTEX_PROJECT': 'test-project', - 'VERTEX_LOCATION': 'global' # This should be overridden by CSV - } - - with patch.dict(os.environ, env_vars): - with patch('pdd.llm_invoke.litellm.completion') as mock_completion: - mock_completion.return_value = create_mock_litellm_response("test") - # Use mock_open for proper file context manager behavior - m = mock_open(read_data='{}') - with patch('builtins.open', m): - with patch('pdd.llm_invoke.json.load', return_value={}): - llm_invoke("test {x}", {"x": "y"}, 0.5, 0.7, True) - - # Assert vertex_location was set to 'us-central1', not 'global' - call_kwargs = mock_completion.call_args[1] - assert call_kwargs.get('vertex_location') == 'us-central1' - - -def test_vertex_location_fallback_when_empty(mock_set_llm_cache): - """Test that empty location in CSV falls back to VERTEX_LOCATION env var.""" - with patch('pdd.llm_invoke._load_model_data') as mock_load_data: - # Create mock model with NO location (empty string) - mock_data = [{ - 'provider': 'Google', + 'provider': 'Google Vertex AI', 'model': 'vertex_ai/gemini-3-flash-preview', 'input': 0.15, 'output': 0.6, 'coding_arena_elo': 1290, 'structured_output': True, 'base_url': '', - 'api_key': 'VERTEX_CREDENTIALS', + 'api_key': 'GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION', 'reasoning_type': 'effort', 'max_reasoning_tokens': 0, - 'location': '' # Empty - should fall back to env var + 'location': '' }] mock_df = pd.DataFrame(mock_data) mock_df['avg_cost'] = (mock_df['input'] + mock_df['output']) / 2 mock_load_data.return_value = mock_df env_vars = { - 'VERTEX_CREDENTIALS': '/fake/path.json', - 'VERTEX_PROJECT': 'test-project', - 'VERTEX_LOCATION': 'global' # Should use this when CSV location is empty + 'GOOGLE_APPLICATION_CREDENTIALS': '/fake/path.json', + 'VERTEXAI_PROJECT': 'test-project', + 'VERTEXAI_LOCATION': 'us-east4', } with patch.dict(os.environ, env_vars): with patch('pdd.llm_invoke.litellm.completion') as mock_completion: mock_completion.return_value = create_mock_litellm_response("test") - m = mock_open(read_data='{}') - with patch('builtins.open', m): - with patch('pdd.llm_invoke.json.load', return_value={}): - llm_invoke("test {x}", {"x": "y"}, 0.5, 0.7, True) + llm_invoke("test {x}", {"x": "y"}, 0.5, 0.7, True) - # Assert vertex_location falls back to env var 'global' + # Multi-credential: litellm reads from env, no api_key= passed call_kwargs = mock_completion.call_args[1] - assert call_kwargs.get('vertex_location') == 'global' + assert 'api_key' not in call_kwargs + assert 'vertex_credentials' not in call_kwargs + assert 'vertex_project' not in call_kwargs + assert 'vertex_location' not in call_kwargs def test_vertex_adc_without_credentials_file(mock_set_llm_cache): - """Test that Vertex AI works via ADC when VERTEX_CREDENTIALS is not set.""" + """Test that _ensure_api_key allows ADC when GOOGLE_APPLICATION_CREDENTIALS is missing but VERTEXAI_PROJECT is set.""" with patch('pdd.llm_invoke._load_model_data') as mock_load_data: mock_data = [{ - 'provider': 'Google', + 'provider': 'Google Vertex AI', 'model': 'vertex_ai/gemini-3-flash-preview', 'input': 0.15, 'output': 0.6, 'coding_arena_elo': 1290, 'structured_output': True, 'base_url': '', - 'api_key': 'VERTEX_CREDENTIALS', + 'api_key': 'GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION', 'reasoning_type': 'effort', 'max_reasoning_tokens': 0, - 'location': 'global' + 'location': '' }] mock_df = pd.DataFrame(mock_data) mock_df['avg_cost'] = (mock_df['input'] + mock_df['output']) / 2 mock_load_data.return_value = mock_df - # Set project and location but NOT VERTEX_CREDENTIALS + # Set project and location but NOT GOOGLE_APPLICATION_CREDENTIALS (ADC) env_vars = { - 'VERTEX_PROJECT': 'test-project', - 'VERTEX_LOCATION': 'global', + 'VERTEXAI_PROJECT': 'test-project', + 'VERTEXAI_LOCATION': 'global', } with patch.dict(os.environ, env_vars, clear=False): - # Ensure VERTEX_CREDENTIALS is not set - os.environ.pop('VERTEX_CREDENTIALS', None) + os.environ.pop('GOOGLE_APPLICATION_CREDENTIALS', None) with patch('pdd.llm_invoke.litellm.completion') as mock_completion: mock_completion.return_value = create_mock_litellm_response("test") llm_invoke("test {x}", {"x": "y"}, 0.5, 0.7, True) + # Multi-credential: no api_key or vertex-specific kwargs call_kwargs = mock_completion.call_args[1] - assert call_kwargs.get('vertex_project') == 'test-project' - assert call_kwargs.get('vertex_location') == 'global' - assert 'vertex_credentials' not in call_kwargs + assert 'api_key' not in call_kwargs def test_ensure_api_key_allows_adc_for_vertex(mock_set_llm_cache): - """Test that _ensure_api_key returns True for VERTEX_CREDENTIALS when VERTEX_PROJECT is set.""" + """Test that _ensure_api_key returns True for Vertex AI ADC when VERTEXAI_PROJECT is set.""" from pdd.llm_invoke import _ensure_api_key model_info = { 'model': 'vertex_ai/gemini-3-flash-preview', - 'api_key': 'VERTEX_CREDENTIALS' + 'api_key': 'GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION' } newly_acquired_keys = {} - with patch.dict(os.environ, {'VERTEX_PROJECT': 'test-project'}, clear=False): - os.environ.pop('VERTEX_CREDENTIALS', None) + with patch.dict(os.environ, { + 'VERTEXAI_PROJECT': 'test-project', + 'VERTEXAI_LOCATION': 'global', + }, clear=False): + os.environ.pop('GOOGLE_APPLICATION_CREDENTIALS', None) result = _ensure_api_key(model_info, newly_acquired_keys, verbose=True) assert result is True - assert newly_acquired_keys.get('VERTEX_CREDENTIALS') is False # ============================================================================== @@ -1694,7 +1652,13 @@ def test_deepseek_maas_passes_response_format_for_structured_output(mock_set_llm assert len(deepseek_data) == 1, "DeepSeek MaaS model not found in CSV" with patch('pdd.llm_invoke._load_model_data', return_value=deepseek_data): - with patch.dict(os.environ, {'VERTEX_CREDENTIALS': 'fake_creds'}): + # Set the actual env vars that the CSV api_key column requires for Vertex AI models + vertex_env = { + 'GOOGLE_APPLICATION_CREDENTIALS': '/fake/path/creds.json', + 'VERTEXAI_PROJECT': 'fake-project', + 'VERTEXAI_LOCATION': 'us-central1', + } + with patch.dict(os.environ, vertex_env): with patch('pdd.llm_invoke.litellm.completion') as mock_completion: # Return valid JSON that matches SampleOutputModel json_response = '{"field1": "test_value", "field2": 42}' @@ -1761,7 +1725,13 @@ def test_vertex_ai_claude_opus_passes_response_format_for_structured_output(mock "vertex_ai/claude-opus-4-6 should have structured_output=True in CSV" with patch('pdd.llm_invoke._load_model_data', return_value=opus_data): - with patch.dict(os.environ, {'VERTEX_CREDENTIALS': 'fake_creds'}): + # Set the actual env vars that the CSV api_key column requires for Vertex AI models + vertex_env = { + 'GOOGLE_APPLICATION_CREDENTIALS': '/fake/path/creds.json', + 'VERTEXAI_PROJECT': 'fake-project', + 'VERTEXAI_LOCATION': 'us-central1', + } + with patch.dict(os.environ, vertex_env): with patch('pdd.llm_invoke.litellm.completion') as mock_completion: # Return valid JSON that matches SampleOutputModel json_response = '{"field1": "test_value", "field2": 42}' @@ -1819,7 +1789,13 @@ def test_structured_output_uses_strict_json_schema_mode(mock_set_llm_cache): assert len(opus_data) == 1, "Vertex AI Claude Opus model not found in CSV" with patch('pdd.llm_invoke._load_model_data', return_value=opus_data): - with patch.dict(os.environ, {'VERTEX_CREDENTIALS': 'fake_creds'}): + # Set the actual env vars that the CSV api_key column requires for Vertex AI models + vertex_env = { + 'GOOGLE_APPLICATION_CREDENTIALS': '/fake/path/creds.json', + 'VERTEXAI_PROJECT': 'fake-project', + 'VERTEXAI_LOCATION': 'us-central1', + } + with patch.dict(os.environ, vertex_env): with patch('pdd.llm_invoke.litellm.completion') as mock_completion: # Return valid JSON matching SampleOutputModel json_response = '{"field1": "test_value", "field2": 42}' diff --git a/tests/test_llm_invoke_vertex_retry.py b/tests/test_llm_invoke_vertex_retry.py index eb0ee94d2..d6c836263 100644 --- a/tests/test_llm_invoke_vertex_retry.py +++ b/tests/test_llm_invoke_vertex_retry.py @@ -1,12 +1,15 @@ """ -Tests for Issue #185: Vertex AI location not passed in retry code paths. +Tests for Issue #185: Vertex AI retry code paths. -These tests verify that when a retry is triggered (None content, malformed JSON, -or invalid Python), the retry call to litellm.completion includes the Vertex AI -credentials (vertex_location, vertex_project, vertex_credentials). +With the pipe-delimited api_key convention, Vertex AI models use +multi-credential auth (GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION). +LiteLLM reads these from os.environ automatically — no api_key= or +vertex_credentials/vertex_project/vertex_location kwargs are passed. -Bug: Retry calls only pass **time_kwargs, missing Vertex AI credentials. -Result: LiteLLM defaults to us-central1, causing failures for models not available there. +These tests verify that retry calls for Vertex AI models: +1. Do NOT pass api_key= (multi-credential convention) +2. Still succeed (litellm reads env vars on retry too) +3. Preserve only standard kwargs (model, messages, temperature, etc.) """ import pytest @@ -52,19 +55,19 @@ def create_mock_litellm_response(content, model_name="test-model", prompt_tokens def create_vertex_model_dataframe(): - """Create a mock DataFrame with a Vertex AI model configuration.""" + """Create a mock DataFrame with a Vertex AI model using pipe-delimited api_key.""" mock_data = [{ - 'provider': 'Google', + 'provider': 'Google Vertex AI', 'model': 'vertex_ai/claude-opus-4-6', 'input': 5.0, 'output': 25.0, 'coding_arena_elo': 1465, 'structured_output': True, 'base_url': '', - 'api_key': 'VERTEX_CREDENTIALS', + 'api_key': 'GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION', 'reasoning_type': 'budget', 'max_reasoning_tokens': 128000, - 'location': '' # Empty = use VERTEX_LOCATION env var + 'location': '' # Empty = litellm reads VERTEXAI_LOCATION from env }] mock_df = pd.DataFrame(mock_data) mock_df['avg_cost'] = (mock_df['input'] + mock_df['output']) / 2 @@ -80,30 +83,25 @@ def mock_set_llm_cache(): yield mock_cache_class -class TestVertexRetryPassesCredentials: +class TestVertexRetryMultiCredential: """ - Tests that verify retry calls include Vertex AI credentials. - - Issue #185: Retry code paths only pass **time_kwargs, missing: - - vertex_credentials - - vertex_project - - vertex_location + Tests that verify retry calls for Vertex AI use the multi-credential + convention: no api_key= passed, litellm reads from os.environ. """ - def test_vertex_kwargs_passed_on_none_content_retry(self, mock_set_llm_cache): + def test_vertex_retry_no_api_key_on_none_content(self, mock_set_llm_cache): """ - Test that retry for None content includes vertex_location. + Test that retry for None content does NOT pass api_key for Vertex AI. - Trigger: raw_result is None (line 2021) - Bug: Retry at line 2031-2037 only passes **time_kwargs + Multi-credential provider: litellm reads env vars automatically. """ mock_df = create_vertex_model_dataframe() env_vars = { - 'VERTEX_CREDENTIALS': '/fake/path.json', - 'VERTEX_PROJECT': 'test-project', - 'VERTEX_LOCATION': 'us-east4', # NOT us-central1 - 'PDD_FORCE_LOCAL': '1', # Force local execution to prevent cloud routing + 'GOOGLE_APPLICATION_CREDENTIALS': '/fake/path.json', + 'VERTEXAI_PROJECT': 'test-project', + 'VERTEXAI_LOCATION': 'us-east4', + 'PDD_FORCE_LOCAL': '1', } with patch('pdd.llm_invoke._load_model_data', return_value=mock_df): @@ -115,86 +113,61 @@ def test_vertex_kwargs_passed_on_none_content_retry(self, mock_set_llm_cache): second_response = create_mock_litellm_response("Valid response") mock_completion.side_effect = [first_response, second_response] - # Mock file open for vertex credentials - m = mock_open(read_data='{}') - with patch('builtins.open', m): - with patch('pdd.llm_invoke.json.load', return_value={}): - llm_invoke("test {x}", {"x": "y"}, 0.5, 0.7, True) + llm_invoke("test {x}", {"x": "y"}, 0.5, 0.7, True) # Assert both calls were made assert mock_completion.call_count == 2 - # Get the retry call kwargs (second call) - retry_kwargs = mock_completion.call_args_list[1][1] + # Neither call should have api_key (multi-credential) + for call in mock_completion.call_args_list: + call_kwargs = call[1] + assert 'api_key' not in call_kwargs, \ + "Multi-credential Vertex AI should NOT pass api_key=" + assert 'vertex_credentials' not in call_kwargs + assert 'vertex_project' not in call_kwargs + assert 'vertex_location' not in call_kwargs - # CRITICAL ASSERTIONS - These FAIL before fix, PASS after fix - assert retry_kwargs.get('vertex_location') == 'us-east4', \ - "Retry call missing vertex_location - bug #185" - assert retry_kwargs.get('vertex_project') == 'test-project', \ - "Retry call missing vertex_project - bug #185" - assert 'vertex_credentials' in retry_kwargs, \ - "Retry call missing vertex_credentials - bug #185" - - def test_vertex_kwargs_passed_on_malformed_json_retry(self, mock_set_llm_cache): + def test_vertex_retry_no_api_key_on_malformed_json(self, mock_set_llm_cache): """ - Test that retry for malformed JSON includes vertex_location. - - Trigger: _is_malformed_json_response() returns True (line 2060) - Bug: Retry at line 2070-2076 only passes **time_kwargs + Test that retry for malformed JSON does NOT pass api_key for Vertex AI. """ mock_df = create_vertex_model_dataframe() env_vars = { - 'VERTEX_CREDENTIALS': '/fake/path.json', - 'VERTEX_PROJECT': 'test-project', - 'VERTEX_LOCATION': 'us-east4', - 'PDD_FORCE_LOCAL': '1', # Force local execution to prevent cloud routing + 'GOOGLE_APPLICATION_CREDENTIALS': '/fake/path.json', + 'VERTEXAI_PROJECT': 'test-project', + 'VERTEXAI_LOCATION': 'us-east4', + 'PDD_FORCE_LOCAL': '1', } with patch('pdd.llm_invoke._load_model_data', return_value=mock_df): with patch.dict(os.environ, env_vars, clear=False): with patch('pdd.llm_invoke.litellm.completion') as mock_completion: # First call returns malformed JSON (triggers retry) - # Create content that triggers _is_malformed_json_response - # (starts with '{', doesn't end with '}', has 100+ trailing \n) malformed_content = '{"field": "value' + '\n' * 150 - first_response = create_mock_litellm_response(malformed_content) second_response = create_mock_litellm_response("Valid response") mock_completion.side_effect = [first_response, second_response] - m = mock_open(read_data='{}') - with patch('builtins.open', m): - with patch('pdd.llm_invoke.json.load', return_value={}): - # This should trigger malformed JSON retry - llm_invoke("test {x}", {"x": "y"}, 0.5, 0.7, True) - - # Check if retry was triggered (may be 1 or 2 calls depending on detection) - if mock_completion.call_count >= 2: - retry_kwargs = mock_completion.call_args_list[1][1] + llm_invoke("test {x}", {"x": "y"}, 0.5, 0.7, True) - # CRITICAL ASSERTIONS - assert retry_kwargs.get('vertex_location') == 'us-east4', \ - "Retry call missing vertex_location - bug #185" - assert retry_kwargs.get('vertex_project') == 'test-project', \ - "Retry call missing vertex_project - bug #185" + # Check that no call passes api_key + for call in mock_completion.call_args_list: + call_kwargs = call[1] + assert 'api_key' not in call_kwargs, \ + "Multi-credential Vertex AI should NOT pass api_key=" - def test_vertex_kwargs_passed_on_invalid_python_retry(self, mock_set_llm_cache): + def test_vertex_retry_no_api_key_on_invalid_python(self, mock_set_llm_cache): """ - Test that retry for invalid Python includes vertex_location. - - Trigger: _has_invalid_python_code() returns True (line 2296) - Bug: Retry at line 2306-2312 only passes **time_kwargs - - This is the most common retry path based on production logs. + Test that retry for invalid Python does NOT pass api_key for Vertex AI. """ mock_df = create_vertex_model_dataframe() env_vars = { - 'VERTEX_CREDENTIALS': '/fake/path.json', - 'VERTEX_PROJECT': 'test-project', - 'VERTEX_LOCATION': 'us-east4', - 'PDD_FORCE_LOCAL': '1', # Force local execution to prevent cloud routing + 'GOOGLE_APPLICATION_CREDENTIALS': '/fake/path.json', + 'VERTEXAI_PROJECT': 'test-project', + 'VERTEXAI_LOCATION': 'us-east4', + 'PDD_FORCE_LOCAL': '1', } # Force retry by patching _has_invalid_python_code @@ -207,7 +180,6 @@ def force_retry_once(obj): with patch.dict(os.environ, env_vars, clear=False): with patch('pdd.llm_invoke.litellm.completion') as mock_completion: with patch('pdd.llm_invoke._has_invalid_python_code', side_effect=force_retry_once): - # Both calls return valid JSON that parses to CodeOutputModel valid_json = json.dumps({ "explanation": "Test explanation", "code": "def foo(): pass" @@ -216,54 +188,43 @@ def force_retry_once(obj): second_response = create_mock_litellm_response(valid_json) mock_completion.side_effect = [first_response, second_response] - m = mock_open(read_data='{}') - with patch('builtins.open', m): - with patch('pdd.llm_invoke.json.load', return_value={}): - llm_invoke( - "test {x}", - {"x": "y"}, - 0.5, - 0.7, - True, - output_pydantic=CodeOutputModel - ) + llm_invoke( + "test {x}", + {"x": "y"}, + 0.5, + 0.7, + True, + output_pydantic=CodeOutputModel + ) # Assert both calls were made (main + retry) assert mock_completion.call_count == 2, \ f"Expected 2 calls (main + retry), got {mock_completion.call_count}" - # Get the retry call kwargs (second call) - retry_kwargs = mock_completion.call_args_list[1][1] - - # CRITICAL ASSERTIONS - These FAIL before fix, PASS after fix - assert retry_kwargs.get('vertex_location') == 'us-east4', \ - f"Retry call missing vertex_location - bug #185. Got: {retry_kwargs.get('vertex_location')}" - assert retry_kwargs.get('vertex_project') == 'test-project', \ - f"Retry call missing vertex_project - bug #185. Got: {retry_kwargs.get('vertex_project')}" - assert 'vertex_credentials' in retry_kwargs, \ - f"Retry call missing vertex_credentials - bug #185. Keys: {list(retry_kwargs.keys())}" - + # Neither call should have api_key (multi-credential) + for call in mock_completion.call_args_list: + call_kwargs = call[1] + assert 'api_key' not in call_kwargs, \ + "Multi-credential Vertex AI should NOT pass api_key=" - def test_vertex_kwargs_passed_on_retry_with_adc(self, mock_set_llm_cache): + def test_vertex_retry_with_adc(self, mock_set_llm_cache): """ - Test that retry calls include vertex_project and vertex_location - when using ADC (no VERTEX_CREDENTIALS file). + Test that retry calls work with ADC (no GOOGLE_APPLICATION_CREDENTIALS). """ mock_df = create_vertex_model_dataframe() env_vars = { - 'VERTEX_PROJECT': 'test-project', - 'VERTEX_LOCATION': 'us-east4', + 'VERTEXAI_PROJECT': 'test-project', + 'VERTEXAI_LOCATION': 'us-east4', 'PDD_FORCE_LOCAL': '1', } with patch('pdd.llm_invoke._load_model_data', return_value=mock_df): with patch.dict(os.environ, env_vars, clear=False): - # Ensure VERTEX_CREDENTIALS is not set - os.environ.pop('VERTEX_CREDENTIALS', None) + # Ensure GOOGLE_APPLICATION_CREDENTIALS is not set + os.environ.pop('GOOGLE_APPLICATION_CREDENTIALS', None) with patch('pdd.llm_invoke.litellm.completion') as mock_completion: # First call returns None content (triggers retry) - # Second call returns valid content first_response = create_mock_litellm_response(None) second_response = create_mock_litellm_response("Valid response") mock_completion.side_effect = [first_response, second_response] @@ -273,77 +234,8 @@ def test_vertex_kwargs_passed_on_retry_with_adc(self, mock_set_llm_cache): # Assert both calls were made assert mock_completion.call_count == 2 - # Get the retry call kwargs (second call) - retry_kwargs = mock_completion.call_args_list[1][1] - - # ADC path: project and location must be set, credentials must NOT - assert retry_kwargs.get('vertex_location') == 'us-east4', \ - "Retry call missing vertex_location with ADC" - assert retry_kwargs.get('vertex_project') == 'test-project', \ - "Retry call missing vertex_project with ADC" - assert 'vertex_credentials' not in retry_kwargs, \ - "ADC retry should NOT have vertex_credentials" - - -class TestVertexRetryIntegration: - """ - Integration tests that verify no us-central1 errors in retry logs. - - These tests require VERTEX_CREDENTIALS to be set and make real API calls. - """ - - @pytest.mark.skipif( - not os.getenv('VERTEX_CREDENTIALS'), - reason="VERTEX_CREDENTIALS not set - skipping integration test" - ) - def test_vertex_retry_no_location_error_in_logs(self, mock_set_llm_cache, caplog): - """ - Test that retry calls don't produce us-central1 location errors. - - This test mocks litellm.completion to avoid real API calls while still - verifying that the retry logic doesn't produce location-related errors. - """ - import logging - import json - - mock_df = create_vertex_model_dataframe() - - # Force retry by patching _has_invalid_python_code - call_count = [0] - def force_retry_once(obj): - call_count[0] += 1 - return call_count[0] == 1 - - with caplog.at_level(logging.WARNING): - with patch('pdd.llm_invoke._load_model_data', return_value=mock_df): - with patch('pdd.llm_invoke.litellm.completion') as mock_completion: - # Return valid JSON response - valid_json = json.dumps({ - "explanation": "Test explanation", - "code": "def hello(): pass" - }) - mock_response = create_mock_litellm_response(valid_json) - mock_completion.return_value = mock_response - - with patch('pdd.llm_invoke._has_invalid_python_code', side_effect=force_retry_once): - with patch('pdd.llm_invoke._LAST_CALLBACK_DATA', {"cost": 0.001, "input_tokens": 10, "output_tokens": 10}): - m = mock_open(read_data='{}') - with patch('builtins.open', m): - with patch('pdd.llm_invoke.json.load', return_value={}): - try: - llm_invoke( - "Generate a simple Python function: {x}", - {"x": "hello world"}, - 0.5, - 0.7, - True, - output_pydantic=CodeOutputModel - ) - except Exception: - pass # We're checking logs, not success - - # FAIL if us-central1 error appears in logs - assert 'us-central1' not in caplog.text, \ - f"Found us-central1 error in logs - bug #185: {caplog.text}" - assert 'not servable in region' not in caplog.text, \ - f"Found 'not servable in region' error - bug #185: {caplog.text}" + # No api_key or vertex-specific kwargs on any call + for call in mock_completion.call_args_list: + call_kwargs = call[1] + assert 'api_key' not in call_kwargs + assert 'vertex_credentials' not in call_kwargs From 90dac50a564c70987e80517a95c53a8b3a64d8b4 Mon Sep 17 00:00:00 2001 From: Niti Goyal Date: Fri, 20 Feb 2026 11:54:25 -0500 Subject: [PATCH 3/5] Improve GitHub Copilot OAuth flow - Skip GitHub Copilot models in --force/CI mode when no OAuth token exists, preventing litellm from hanging on an interactive device flow login - Respect litellm's GITHUB_COPILOT_TOKEN_DIR and GITHUB_COPILOT_API_KEY_FILE env vars when checking for the token - In pdd setup, trigger the actual OAuth device flow when a user adds GitHub Copilot as a provider, instead of just showing a "authenticate later" message - Update provider_manager_example to show the new GitHub Copilot provider flow - Tests/test_update_command.py now passes --- context/provider_manager_example.py | 35 +++++++++++++++++++++++++++-- pdd/llm_invoke.py | 17 ++++++++++++++ pdd/provider_manager.py | 31 ++++++++++++++++++++++--- 3 files changed, 78 insertions(+), 5 deletions(-) diff --git a/context/provider_manager_example.py b/context/provider_manager_example.py index a09690a36..d2d23f894 100644 --- a/context/provider_manager_example.py +++ b/context/provider_manager_example.py @@ -24,7 +24,8 @@ def main() -> None: 2. Add a custom LiteLLM-compatible provider 3. Remove all models for a provider (comments out the key) 4. Remove individual models from the user CSV - 5. Parse pipe-delimited api_key fields + 5. Add GitHub Copilot provider (OAuth device flow) + 6. Parse pipe-delimited api_key fields """ # Example 1: Browse providers from the bundled reference CSV @@ -62,7 +63,37 @@ def main() -> None: # Lists all models, user picks by number, removes selected rows # remove_individual_models() # Uncomment to run interactively - # Example 5: Utility functions for api_key field parsing + # Example 5: Add GitHub Copilot provider (OAuth device flow) + # When selecting "Github Copilot" from the registry, pdd setup triggers + # litellm's interactive OAuth device flow instead of prompting for an API key. + # add_provider_from_registry() # Uncomment to run interactively + + # Interactive flow: + # Add a provider + # + # ... + # 5. Github Copilot (9 models) + # ... + # Enter number (empty to cancel): 5 + # + # Github Copilot Setup + # + # GitHub Copilot authenticates via OAuth device flow. + # This will open a browser to authenticate with GitHub. + # + # Authenticate now? [Y/n]: Y + # Starting device flow authentication... + # + # + # ✓ GitHub Copilot authenticated successfully! + # ✓ Added 9 model(s) for Github Copilot to ~/.pdd/llm_model.csv + # + # NOTE: The OAuth token is cached by litellm at + # ~/.config/litellm/github_copilot/api-key.json (customizable via + # GITHUB_COPILOT_TOKEN_DIR and GITHUB_COPILOT_API_KEY_FILE env vars). + # In --force/CI mode, GitHub Copilot models are skipped if no token exists. + + # Example 6: Utility functions for api_key field parsing # Useful when working with CSV rows that have pipe-delimited api_key fields single = parse_api_key_vars("OPENAI_API_KEY") print(f"Single key vars: {single}") # ['OPENAI_API_KEY'] diff --git a/pdd/llm_invoke.py b/pdd/llm_invoke.py index 5d98c0969..2f3440057 100644 --- a/pdd/llm_invoke.py +++ b/pdd/llm_invoke.py @@ -1178,6 +1178,23 @@ def _ensure_api_key(model_info: Dict[str, Any], newly_acquired_keys: Dict[str, b api_key_field = str(model_info.get('api_key', '') or '') if not api_key_field.strip() or api_key_field == "EXISTING_KEY": + # GitHub Copilot models use an interactive OAuth device flow managed by + # litellm. In non-interactive (--force) mode we must skip the model + # unless the user has already authenticated (token file exists). + model_name = str(model_info.get('model', '')) + if model_name.startswith("github_copilot/") and os.environ.get('PDD_FORCE'): + token_dir = Path(os.environ.get( + 'GITHUB_COPILOT_TOKEN_DIR', + str(Path.home() / ".config" / "litellm" / "github_copilot"), + )) + api_key_file = os.environ.get('GITHUB_COPILOT_API_KEY_FILE', 'api-key.json') + token_path = token_dir / api_key_file + if not token_path.exists(): + logger.warning( + f"Skipping GitHub Copilot model '{model_name}' in --force mode: " + f"no OAuth token found at {token_path}. Run 'pdd setup' to authenticate." + ) + return False if verbose: logger.info(f"Skipping API key check for model {model_info.get('model')} (key field: {api_key_field!r})") return True # Device flow, local model, or handled elsewhere diff --git a/pdd/provider_manager.py b/pdd/provider_manager.py index ffa0f54c0..061190b6e 100644 --- a/pdd/provider_manager.py +++ b/pdd/provider_manager.py @@ -496,12 +496,37 @@ def _setup_complex_provider(provider_name: str) -> bool: if optional_names: console.print(f" Optional: {', '.join(optional_names)}") - # GitHub Copilot: explain device flow before prompting + # GitHub Copilot: trigger interactive OAuth device flow via litellm if provider_name == "Github Copilot": console.print( - "\n [dim]GitHub Copilot authenticates via device flow at runtime.\n" - " You can paste an API key now, or skip and authenticate later.[/dim]" + "\n [dim]GitHub Copilot authenticates via OAuth device flow.\n" + " This will open a browser to authenticate with GitHub.[/dim]\n" ) + if Confirm.ask(" Authenticate now?", default=True): + try: + import litellm + console.print(" [dim]Starting device flow authentication...[/dim]") + # A simple completion call triggers litellm's GitHub Copilot + # OAuth device flow, which prompts the user to visit a URL + # and enter a code. The resulting token is cached by litellm + # at ~/.config/litellm/github_copilot/api-key.json. + litellm.completion( + model="github_copilot/gpt-4o", + messages=[{"role": "user", "content": "Say OK"}], + timeout=120, + ) + console.print(" [green]✓ GitHub Copilot authenticated successfully![/green]") + return True + except KeyboardInterrupt: + console.print("\n [yellow]Authentication cancelled.[/yellow]") + return False + except Exception as e: + console.print(f" [red]Authentication failed: {e}[/red]") + console.print(" [dim]You can try again later with 'pdd setup'.[/dim]") + return False + else: + console.print(" [dim]Skipped. You can authenticate later with 'pdd setup'.[/dim]") + return False print() any_saved = False From 78cdd4079082b784863a7c52ffe38705f9642b93 Mon Sep 17 00:00:00 2001 From: Niti Goyal Date: Fri, 20 Feb 2026 18:23:59 -0500 Subject: [PATCH 4/5] Curate llm_model.csv, reducing size from 265 to 92 models - Use a Pareto filter to remove models that are strictly dominated (higher cost AND lower ELO) by another model from the same provider - Remove models with designated regions or 'fast' versions - Override buggy LiteLLM model costs with data from the internet - Update ELO scores to Coding Arena ELO scores (from Text ELO scores) --- pdd/data/llm_model.csv | 357 +++++++-------------------- pdd/generate_model_catalog.py | 448 ++++++++++++++++++++++------------ 2 files changed, 390 insertions(+), 415 deletions(-) diff --git a/pdd/data/llm_model.csv b/pdd/data/llm_model.csv index d3aafa406..204b40542 100644 --- a/pdd/data/llm_model.csv +++ b/pdd/data/llm_model.csv @@ -1,266 +1,93 @@ provider,model,input,output,coding_arena_elo,base_url,api_key,max_reasoning_tokens,structured_output,reasoning_type,location -AWS Bedrock,anthropic.claude-opus-4-6-v1,5.0,25.0,1530,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,au.anthropic.claude-opus-4-6-v1,5.5,27.5,1530,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Azure AI,azure_ai/claude-opus-4-6,5.0,25.0,1530,,AZURE_AI_API_KEY,128000,True,budget, -Anthropic,claude-opus-4-6,5.0,25.0,1530,,ANTHROPIC_API_KEY,128000,True,budget, -Anthropic,claude-opus-4-6-20260205,5.0,25.0,1530,,ANTHROPIC_API_KEY,128000,True,budget, -AWS Bedrock,eu.anthropic.claude-opus-4-6-v1,5.5,27.5,1530,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Anthropic,fast/claude-opus-4-6,30.0,150.0,1530,,ANTHROPIC_API_KEY,128000,True,budget, -Anthropic,fast/claude-opus-4-6-20260205,30.0,150.0,1530,,ANTHROPIC_API_KEY,128000,True,budget, -Anthropic,fast/us/claude-opus-4-6,30.0,150.0,1530,,ANTHROPIC_API_KEY,128000,True,budget, -AWS Bedrock,global.anthropic.claude-opus-4-6-v1,5.0,25.0,1530,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,us.anthropic.claude-opus-4-6-v1,5.5,27.5,1530,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Anthropic,us/claude-opus-4-6,5.5,27.5,1530,,ANTHROPIC_API_KEY,128000,True,budget, -Anthropic,us/claude-opus-4-6-20260205,5.5,27.5,1530,,ANTHROPIC_API_KEY,128000,True,budget, -Vercel AI Gateway,vercel_ai_gateway/anthropic/claude-opus-4.6,5.0,25.0,1530,,VERCEL_AI_GATEWAY_API_KEY,0,True,effort, -Google Vertex AI,vertex_ai/claude-opus-4-6,5.0,25.0,1530,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -Google Vertex AI,vertex_ai/claude-opus-4-6@default,5.0,25.0,1530,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -Google Vertex AI,gemini-3-pro-preview,2.0,12.0,1501,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -Google Gemini,gemini/gemini-3-pro-preview,2.0,12.0,1501,,GEMINI_API_KEY,0,True,effort, -Github Copilot,github_copilot/gemini-3-pro-preview,0.0,0.0,1501,,,0,True,none, -GMI Cloud,gmi/google/gemini-3-pro-preview,2.0,12.0,1501,,GMI_API_KEY,0,True,none, -OpenRouter,openrouter/google/gemini-3-pro-preview,2.0,12.0,1501,,OPENROUTER_API_KEY,0,True,effort, -Replicate,replicate/google/gemini-3-pro,2.0,12.0,1501,,REPLICATE_API_KEY,0,True,none, -Google Vertex AI,vertex_ai/gemini-3-pro-preview,2.0,12.0,1501,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -AWS Bedrock,anthropic.claude-opus-4-5-20251101-v1:0,5.0,25.0,1496,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Azure AI,azure_ai/claude-opus-4-5,5.0,25.0,1496,,AZURE_AI_API_KEY,128000,True,budget, -Anthropic,claude-opus-4-5,5.0,25.0,1496,,ANTHROPIC_API_KEY,128000,True,budget, -Anthropic,claude-opus-4-5-20251101,5.0,25.0,1496,,ANTHROPIC_API_KEY,128000,True,budget, -AWS Bedrock,eu.anthropic.claude-opus-4-5-20251101-v1:0,5.0,25.0,1496,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Github Copilot,github_copilot/claude-opus-4.5,0.0,0.0,1496,,,0,True,none, -AWS Bedrock,global.anthropic.claude-opus-4-5-20251101-v1:0,5.0,25.0,1496,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -GMI Cloud,gmi/anthropic/claude-opus-4.5,5.0,25.0,1496,,GMI_API_KEY,0,True,none, -OpenRouter,openrouter/anthropic/claude-opus-4.5,5.0,25.0,1496,,OPENROUTER_API_KEY,0,True,effort, -AWS Bedrock,us.anthropic.claude-opus-4-5-20251101-v1:0,5.5,27.5,1496,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Vercel AI Gateway,vercel_ai_gateway/anthropic/claude-opus-4.5,5.0,25.0,1496,,VERCEL_AI_GATEWAY_API_KEY,0,True,effort, -Google Vertex AI,vertex_ai/claude-opus-4-5,5.0,25.0,1496,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -Google Vertex AI,vertex_ai/claude-opus-4-5@20251101,5.0,25.0,1496,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -AWS Bedrock,anthropic.claude-sonnet-4-6,3.0,15.0,1485,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,apac.anthropic.claude-sonnet-4-6,3.3,16.5,1485,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Azure AI,azure_ai/claude-sonnet-4-6,3.0,15.0,1485,,AZURE_AI_API_KEY,128000,True,budget, -Anthropic,claude-sonnet-4-6,3.0,15.0,1485,,ANTHROPIC_API_KEY,128000,True,budget, -AWS Bedrock,eu.anthropic.claude-sonnet-4-6,3.3,16.5,1485,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,global.anthropic.claude-sonnet-4-6,3.0,15.0,1485,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,us.anthropic.claude-sonnet-4-6,3.3,16.5,1485,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Anthropic,us/claude-sonnet-4-6,3.3,16.5,1485,,ANTHROPIC_API_KEY,128000,True,budget, -Google Vertex AI,vertex_ai/claude-sonnet-4-6,3.0,15.0,1485,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -Google Vertex AI,vertex_ai/claude-sonnet-4-6@default,3.0,15.0,1485,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -Azure AI,azure_ai/kimi-k2.5,0.6,3.0,1480,,AZURE_AI_API_KEY,0,True,none, -AWS Bedrock,bedrock/ap-northeast-1/moonshotai.kimi-k2.5,0.72,3.6,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/ap-south-1/moonshotai.kimi-k2.5,0.72,3.6,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/ap-southeast-3/moonshotai.kimi-k2.5,0.72,3.6,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/eu-north-1/moonshotai.kimi-k2.5,0.72,3.6,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/moonshotai.kimi-k2.5,0.6,3.03,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,bedrock/sa-east-1/moonshotai.kimi-k2.5,0.72,3.6,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/us-east-1/moonshotai.kimi-k2.5,0.6,3.0,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/us-east-2/moonshotai.kimi-k2.5,0.6,3.0,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/us-west-2/moonshotai.kimi-k2.5,0.6,3.0,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -Moonshot AI,moonshot/kimi-k2.5,0.6,3.0,1480,,MOONSHOT_API_KEY,0,True,none, -AWS Bedrock,moonshotai.kimi-k2.5,0.6,3.0,1480,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -OpenRouter,openrouter/moonshotai/kimi-k2.5,0.6,3.0,1480,,OPENROUTER_API_KEY,0,True,none, -Together AI,together_ai/moonshotai/Kimi-K2.5,0.5,2.8,1480,,TOGETHERAI_API_KEY,0,True,effort, -AWS Bedrock,anthropic.claude-opus-4-1-20250805-v1:0,15.0,75.0,1475,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Azure AI,azure_ai/claude-opus-4-1,15.0,75.0,1475,,AZURE_AI_API_KEY,128000,True,budget, -Anthropic,claude-opus-4-1,15.0,75.0,1475,,ANTHROPIC_API_KEY,128000,True,budget, -Anthropic,claude-opus-4-1-20250805,15.0,75.0,1475,,ANTHROPIC_API_KEY,128000,True,budget, -AWS Bedrock,eu.anthropic.claude-opus-4-1-20250805-v1:0,15.0,75.0,1475,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -OpenRouter,openrouter/anthropic/claude-opus-4.1,15.0,75.0,1475,,OPENROUTER_API_KEY,0,True,effort, -AWS Bedrock,us.anthropic.claude-opus-4-1-20250805-v1:0,15.0,75.0,1475,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Vercel AI Gateway,vercel_ai_gateway/anthropic/claude-opus-4.1,15.0,75.0,1475,,VERCEL_AI_GATEWAY_API_KEY,0,True,effort, -Google Vertex AI,vertex_ai/claude-opus-4-1,15.0,75.0,1475,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global -Google Vertex AI,vertex_ai/claude-opus-4-1@20250805,15.0,75.0,1475,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global -Google Vertex AI,gemini-3-flash-preview,0.5,3.0,1469,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -Google Gemini,gemini/gemini-3-flash-preview,0.5,3.0,1469,,GEMINI_API_KEY,0,True,effort, -GMI Cloud,gmi/google/gemini-3-flash-preview,0.5,3.0,1469,,GMI_API_KEY,0,True,none, -OpenRouter,openrouter/google/gemini-3-flash-preview,0.5,3.0,1469,,OPENROUTER_API_KEY,0,True,effort, -Google Vertex AI,vertex_ai/gemini-3-flash-preview,0.5,3.0,1469,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -Dashscope,dashscope/qwen3-max,0.0,0.0,1468,,DASHSCOPE_API_KEY,0,True,effort, -Dashscope,dashscope/qwen3-max-preview,0.0,0.0,1468,,DASHSCOPE_API_KEY,0,True,effort, -Novita AI,novita/qwen/qwen3-max,2.11,8.45,1468,,NOVITA_API_KEY,0,True,none, -Azure OpenAI,azure/gpt-5.2,1.75,14.0,1465,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, -DeepInfra,deepinfra/google/gemini-2.5-pro,1.25,10.0,1465,,DEEPINFRA_API_KEY,0,False,none, -Google Vertex AI,gemini-2.5-pro,1.25,10.0,1465,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -Google Gemini,gemini/gemini-2.5-pro,1.25,10.0,1465,,GEMINI_API_KEY,0,True,effort, -Github Copilot,github_copilot/gemini-2.5-pro,0.0,0.0,1465,,,0,True,none, -Github Copilot,github_copilot/gpt-5.2,0.0,0.0,1465,,,0,True,none, -GMI Cloud,gmi/openai/gpt-5.2,1.75,14.0,1465,,GMI_API_KEY,0,True,none, -OpenAI,gpt-5.2,1.75,14.0,1465,,OPENAI_API_KEY,0,True,effort, -OpenRouter,openrouter/google/gemini-2.5-pro,1.25,10.0,1465,,OPENROUTER_API_KEY,0,True,none, -OpenRouter,openrouter/openai/gpt-5.2,1.75,14.0,1465,,OPENROUTER_API_KEY,0,True,effort, -Vercel AI Gateway,vercel_ai_gateway/google/gemini-2.5-pro,2.5,10.0,1465,,VERCEL_AI_GATEWAY_API_KEY,0,True,none, -AWS Bedrock,anthropic.claude-sonnet-4-5-20250929-v1:0,3.0,15.0,1464,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,au.anthropic.claude-sonnet-4-5-20250929-v1:0,3.3,16.5,1464,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Azure AI,azure_ai/claude-sonnet-4-5,3.0,15.0,1464,,AZURE_AI_API_KEY,128000,True,budget, -Anthropic,claude-sonnet-4-5,3.0,15.0,1464,,ANTHROPIC_API_KEY,128000,True,budget, -Anthropic,claude-sonnet-4-5-20250929,3.0,15.0,1464,,ANTHROPIC_API_KEY,128000,True,budget, -AWS Bedrock,claude-sonnet-4-5-20250929-v1:0,3.0,15.0,1464,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,eu.anthropic.claude-sonnet-4-5-20250929-v1:0,3.3,16.5,1464,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Github Copilot,github_copilot/claude-sonnet-4.5,0.0,0.0,1464,,,0,True,none, -AWS Bedrock,global.anthropic.claude-sonnet-4-5-20250929-v1:0,3.0,15.0,1464,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -GMI Cloud,gmi/anthropic/claude-sonnet-4.5,3.0,15.0,1464,,GMI_API_KEY,0,True,none, -AWS Bedrock,jp.anthropic.claude-sonnet-4-5-20250929-v1:0,3.3,16.5,1464,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -OpenRouter,openrouter/anthropic/claude-sonnet-4.5,3.0,15.0,1464,,OPENROUTER_API_KEY,0,True,effort, -AWS Bedrock,us.anthropic.claude-sonnet-4-5-20250929-v1:0,3.3,16.5,1464,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Vercel AI Gateway,vercel_ai_gateway/anthropic/claude-sonnet-4.5,3.0,15.0,1464,,VERCEL_AI_GATEWAY_API_KEY,0,True,effort, -Google Vertex AI,vertex_ai/claude-sonnet-4-5,3.0,15.0,1464,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -Google Vertex AI,vertex_ai/claude-sonnet-4-5@20250929,3.0,15.0,1464,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -Azure OpenAI,azure/gpt-5,1.25,10.0,1460,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, -Github Copilot,github_copilot/gpt-5,0.0,0.0,1460,,,0,True,none, -GMI Cloud,gmi/openai/gpt-5,1.25,10.0,1460,,GMI_API_KEY,0,True,none, -OpenAI,gpt-5,1.25,10.0,1460,,OPENAI_API_KEY,0,True,effort, -OpenRouter,openrouter/openai/gpt-5,1.25,10.0,1460,,OPENROUTER_API_KEY,0,False,effort, -Replicate,replicate/openai/gpt-5,1.25,10.0,1460,,REPLICATE_API_KEY,0,True,none, -AWS Bedrock,zai.glm-4.7,0.6,2.2,1460,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -DeepInfra,deepinfra/Qwen/Qwen3-235B-A22B-Instruct-2507,0.09,0.6,1457,,DEEPINFRA_API_KEY,0,False,none, -Fireworks AI,fireworks_ai/accounts/fireworks/models/qwen3-235b-a22b-instruct-2507,0.22,0.88,1457,,FIREWORKS_AI_API_KEY,0,False,none, -Novita AI,novita/qwen/qwen3-235b-a22b-instruct-2507,0.09,0.58,1457,,NOVITA_API_KEY,0,True,none, -Replicate,replicate/qwen/qwen3-235b-a22b-instruct-2507,0.264,1.06,1457,,REPLICATE_API_KEY,0,True,none, -Google Vertex AI,vertex_ai/qwen/qwen3-235b-a22b-instruct-2507-maas,0.25,1.0,1457,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global -W&B Inference,wandb/Qwen/Qwen3-235B-A22B-Instruct-2507,10000.0,10000.0,1457,,WANDB_API_KEY,0,False,none, -Azure AI,azure_ai/grok-4,3.0,15.0,1453,,AZURE_AI_API_KEY,0,True,none, -Oci,oci/xai.grok-4,3.0,15.0,1453,,OCI_API_KEY,0,True,none, -OpenRouter,openrouter/x-ai/grok-4,3.0,15.0,1453,,OPENROUTER_API_KEY,0,True,effort, -Replicate,replicate/xai/grok-4,7.2,36.0,1453,,REPLICATE_API_KEY,0,True,none, -Vercel AI Gateway,vercel_ai_gateway/xai/grok-4,3.0,15.0,1453,,VERCEL_AI_GATEWAY_API_KEY,0,True,none, -xAI,xai/grok-4,3.0,15.0,1453,,XAI_API_KEY,0,True,none, -xAI,xai/grok-4-latest,3.0,15.0,1453,,XAI_API_KEY,0,True,none, -Azure OpenAI,azure/eu/gpt-5.1,1.38,11.0,1450,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, -Azure OpenAI,azure/global/gpt-5.1,1.25,10.0,1450,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, -Azure OpenAI,azure/gpt-5.1,1.25,10.0,1450,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, -Azure OpenAI,azure/mistral-large-latest,8.0,24.0,1450,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,none, -Azure OpenAI,azure/us/gpt-5.1,1.38,11.0,1450,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, -Azure AI,azure_ai/mistral-large,4.0,12.0,1450,,AZURE_AI_API_KEY,0,True,none, -Azure AI,azure_ai/mistral-large-3,0.5,1.5,1450,,AZURE_AI_API_KEY,0,True,none, -Azure AI,azure_ai/mistral-large-latest,2.0,6.0,1450,,AZURE_AI_API_KEY,0,True,none, -AWS Bedrock,bedrock/ap-northeast-1/moonshotai.kimi-k2-thinking,0.73,3.03,1450,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,bedrock/ap-south-1/moonshotai.kimi-k2-thinking,0.71,2.94,1450,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,bedrock/moonshotai.kimi-k2-thinking,0.73,3.03,1450,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,bedrock/sa-east-1/moonshotai.kimi-k2-thinking,0.73,3.03,1450,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,bedrock/us-east-1/moonshotai.kimi-k2-thinking,0.6,2.5,1450,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,bedrock/us-east-2/moonshotai.kimi-k2-thinking,0.6,2.5,1450,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,bedrock/us-west-2/moonshotai.kimi-k2-thinking,0.6,2.5,1450,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Fireworks AI,fireworks_ai/accounts/fireworks/models/kimi-k2-thinking,0.6,2.5,1450,,FIREWORKS_AI_API_KEY,0,True,none, -Github Copilot,github_copilot/gpt-5.1,0.0,0.0,1450,,,0,True,none, -GMI Cloud,gmi/moonshotai/Kimi-K2-Thinking,0.8,1.2,1450,,GMI_API_KEY,0,False,none, -GMI Cloud,gmi/openai/gpt-5.1,1.25,10.0,1450,,GMI_API_KEY,0,True,none, -OpenAI,gpt-5.1,1.25,10.0,1450,,OPENAI_API_KEY,0,True,effort, -Mistral AI,mistral/mistral-large-3,0.5,1.5,1450,,MISTRAL_API_KEY,0,True,none, -Mistral AI,mistral/mistral-large-latest,2.0,6.0,1450,,MISTRAL_API_KEY,0,True,none, -Moonshot AI,moonshot/kimi-k2-thinking,0.6,2.5,1450,,MOONSHOT_API_KEY,0,True,none, -Novita AI,novita/moonshotai/kimi-k2-thinking,0.6,2.5,1450,,NOVITA_API_KEY,0,True,effort, -OpenRouter,openrouter/mistralai/mistral-large,8.0,24.0,1450,,OPENROUTER_API_KEY,0,False,none, -Snowflake,snowflake/mistral-large,0.0,0.0,1450,,SNOWFLAKE_API_KEY,0,False,none, -Google Vertex AI,vertex_ai/mistral-large@2407,2.0,6.0,1450,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global -Google Vertex AI,vertex_ai/mistral-large@2411-001,2.0,6.0,1450,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global -Google Vertex AI,vertex_ai/mistral-large@latest,2.0,6.0,1450,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global -Google Vertex AI,vertex_ai/moonshotai/kimi-k2-thinking-maas,0.6,2.5,1450,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global -DeepInfra,deepinfra/Qwen/Qwen3-235B-A22B-Thinking-2507,0.3,2.9,1442,,DEEPINFRA_API_KEY,0,False,none, -Fireworks AI,fireworks_ai/accounts/fireworks/models/qwen3-235b-a22b-thinking-2507,0.22,0.88,1442,,FIREWORKS_AI_API_KEY,0,False,none, -Novita AI,novita/qwen/qwen3-235b-a22b-thinking-2507,0.3,3.0,1442,,NOVITA_API_KEY,0,True,effort, -OpenRouter,openrouter/qwen/qwen3-235b-a22b-thinking-2507,0.11,0.6,1442,,OPENROUTER_API_KEY,0,True,effort, -Together AI,together_ai/Qwen/Qwen3-235B-A22B-Thinking-2507,0.65,3.0,1442,,TOGETHERAI_API_KEY,0,True,none, -W&B Inference,wandb/Qwen/Qwen3-235B-A22B-Thinking-2507,10000.0,10000.0,1442,,WANDB_API_KEY,0,False,none, -Azure OpenAI,azure/o3,2.0,8.0,1441,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, -OpenAI,o3,2.0,8.0,1441,,OPENAI_API_KEY,0,True,effort, -Vercel AI Gateway,vercel_ai_gateway/openai/o3,2.0,8.0,1441,,VERCEL_AI_GATEWAY_API_KEY,0,True,none, -Azure AI,azure_ai/global/grok-3,3.0,15.0,1439,,AZURE_AI_API_KEY,0,True,none, -Azure AI,azure_ai/grok-3,3.0,15.0,1439,,AZURE_AI_API_KEY,0,True,none, -Oci,oci/xai.grok-3,3.0,15.0,1439,,OCI_API_KEY,0,True,none, -Vercel AI Gateway,vercel_ai_gateway/xai/grok-3,3.0,15.0,1439,,VERCEL_AI_GATEWAY_API_KEY,0,True,none, -xAI,xai/grok-3,3.0,15.0,1439,,XAI_API_KEY,0,True,none, -xAI,xai/grok-3-latest,3.0,15.0,1439,,XAI_API_KEY,0,True,none, -AWS Bedrock,anthropic.claude-haiku-4-5-20251001-v1:0,1.0,5.0,1436,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,anthropic.claude-haiku-4-5@20251001,1.0,5.0,1436,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,apac.anthropic.claude-haiku-4-5-20251001-v1:0,1.1,5.5,1436,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -AWS Bedrock,au.anthropic.claude-haiku-4-5-20251001-v1:0,1.1,5.5,1436,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Azure AI,azure_ai/claude-haiku-4-5,1.0,5.0,1436,,AZURE_AI_API_KEY,128000,True,budget, -Anthropic,claude-haiku-4-5,1.0,5.0,1436,,ANTHROPIC_API_KEY,128000,True,budget, -Anthropic,claude-haiku-4-5-20251001,1.0,5.0,1436,,ANTHROPIC_API_KEY,128000,True,budget, -DeepInfra,deepinfra/deepseek-ai/DeepSeek-R1-0528,0.5,2.15,1436,,DEEPINFRA_API_KEY,0,False,none, -AWS Bedrock,eu.anthropic.claude-haiku-4-5-20251001-v1:0,1.1,5.5,1436,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Fireworks AI,fireworks_ai/accounts/fireworks/models/deepseek-r1-0528,3.0,8.0,1436,,FIREWORKS_AI_API_KEY,0,True,none, -Github Copilot,github_copilot/claude-haiku-4.5,0.0,0.0,1436,,,0,True,none, -AWS Bedrock,global.anthropic.claude-haiku-4-5-20251001-v1:0,1.0,5.0,1436,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Hyperbolic,hyperbolic/deepseek-ai/DeepSeek-R1-0528,0.25,0.25,1436,,HYPERBOLIC_API_KEY,0,True,none, -AWS Bedrock,jp.anthropic.claude-haiku-4-5-20251001-v1:0,1.1,5.5,1436,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Lambda AI,lambda_ai/deepseek-r1-0528,0.2,0.6,1436,,LAMBDA_API_KEY,0,True,effort, -Novita AI,novita/deepseek/deepseek-r1-0528,0.7,2.5,1436,,NOVITA_API_KEY,0,True,effort, -OpenRouter,openrouter/anthropic/claude-haiku-4.5,1.0,5.0,1436,,OPENROUTER_API_KEY,0,True,effort, -OpenRouter,openrouter/deepseek/deepseek-r1-0528,0.5,2.15,1436,,OPENROUTER_API_KEY,0,True,effort, -AWS Bedrock,us.anthropic.claude-haiku-4-5-20251001-v1:0,1.1,5.5,1436,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Vercel AI Gateway,vercel_ai_gateway/anthropic/claude-haiku-4.5,1.0,5.0,1436,,VERCEL_AI_GATEWAY_API_KEY,0,True,effort, -Google Vertex AI,vertex_ai/claude-haiku-4-5@20251001,1.0,5.0,1436,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -Google Vertex AI,vertex_ai/deepseek-ai/deepseek-r1-0528-maas,1.35,5.4,1436,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -W&B Inference,wandb/deepseek-ai/DeepSeek-R1-0528,135000.0,540000.0,1436,,WANDB_API_KEY,0,False,none, -Azure AI,azure_ai/deepseek-v3.2,0.58,1.68,1431,,AZURE_AI_API_KEY,128000,True,budget, -DeepSeek,deepseek/deepseek-v3.2,0.28,0.4,1431,,DEEPSEEK_API_KEY,0,True,effort, -GMI Cloud,gmi/deepseek-ai/DeepSeek-V3.2,0.28,0.4,1431,,GMI_API_KEY,0,True,none, -Novita AI,novita/deepseek/deepseek-v3.2,0.269,0.4,1431,,NOVITA_API_KEY,0,True,effort, -OpenRouter,openrouter/deepseek/deepseek-v3.2,0.28,0.4,1431,,OPENROUTER_API_KEY,0,True,effort, -Google Vertex AI,vertex_ai/deepseek-ai/deepseek-v3.2-maas,0.56,1.68,1431,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -AWS Bedrock,bedrock/ap-northeast-1/minimax.minimax-m2.1,0.36,1.44,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/ap-south-1/minimax.minimax-m2.1,0.36,1.44,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/ap-southeast-3/minimax.minimax-m2.1,0.36,1.44,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/eu-central-1/minimax.minimax-m2.1,0.36,1.44,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/eu-north-1/minimax.minimax-m2.1,0.36,1.44,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/eu-south-1/minimax.minimax-m2.1,0.36,1.44,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/eu-west-1/minimax.minimax-m2.1,0.36,1.44,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/eu-west-2/minimax.minimax-m2.1,0.47,1.86,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/sa-east-1/minimax.minimax-m2.1,0.36,1.44,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/us-east-1/minimax.minimax-m2.1,0.3,1.2,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/us-east-2/minimax.minimax-m2.1,0.3,1.2,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -AWS Bedrock,bedrock/us-west-2/minimax.minimax-m2.1,0.3,1.2,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -DeepInfra,deepinfra/deepseek-ai/DeepSeek-V3.1,0.27,1.0,1430,,DEEPINFRA_API_KEY,0,False,effort, -Fireworks AI,fireworks_ai/accounts/fireworks/models/minimax-m2,0.3,1.2,1430,,FIREWORKS_AI_API_KEY,0,False,none, -GMI Cloud,gmi/MiniMaxAI/MiniMax-M2.1,0.3,1.2,1430,,GMI_API_KEY,0,False,none, -AWS Bedrock,minimax.minimax-m2,0.3,1.2,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,False,none, -AWS Bedrock,minimax.minimax-m2.1,0.3,1.2,1430,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, -Novita AI,novita/deepseek/deepseek-v3.1,0.27,1.0,1430,,NOVITA_API_KEY,0,True,effort, -Replicate,replicate/deepseek-ai/deepseek-v3.1,0.672,2.016,1430,,REPLICATE_API_KEY,0,True,effort, -SambaNova,sambanova/DeepSeek-V3.1,3.0,4.5,1430,,SAMBANOVA_API_KEY,0,True,effort, -Together AI,together_ai/deepseek-ai/DeepSeek-V3.1,0.6,1.7,1430,,TOGETHERAI_API_KEY,0,True,effort, -Google Vertex AI,vertex_ai/deepseek-ai/deepseek-v3.1-maas,1.35,5.4,1430,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -Google Vertex AI,vertex_ai/minimaxai/minimax-m2-maas,0.3,1.2,1430,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global -W&B Inference,wandb/deepseek-ai/DeepSeek-V3.1,55000.0,165000.0,1430,,WANDB_API_KEY,0,False,none, -DeepInfra,deepinfra/google/gemini-2.5-flash,0.3,2.5,1420,,DEEPINFRA_API_KEY,0,False,none, -Google Vertex AI,gemini-2.5-flash,0.3,2.5,1420,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -Google Gemini,gemini/gemini-2.5-flash,0.3,2.5,1420,,GEMINI_API_KEY,0,True,effort, -OpenRouter,openrouter/google/gemini-2.5-flash,0.3,2.5,1420,,OPENROUTER_API_KEY,0,True,none, -Replicate,replicate/google/gemini-2.5-flash,2.5,2.5,1420,,REPLICATE_API_KEY,0,True,none, -Vercel AI Gateway,vercel_ai_gateway/google/gemini-2.5-flash,0.3,2.5,1420,,VERCEL_AI_GATEWAY_API_KEY,0,True,none, -Azure OpenAI,azure/gpt-4.5-preview,75.0,150.0,1419,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,none, -Azure OpenAI,azure/gpt-5-mini,0.25,2.0,1419,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, -Github Copilot,github_copilot/gpt-5-mini,0.0,0.0,1419,,,0,True,none, -OpenAI,gpt-4.5-preview,75.0,150.0,1419,,OPENAI_API_KEY,0,True,none, -OpenAI,gpt-5-mini,0.25,2.0,1419,,OPENAI_API_KEY,0,True,effort, -OpenRouter,openrouter/openai/gpt-5-mini,0.25,2.0,1419,,OPENROUTER_API_KEY,0,False,effort, -Replicate,replicate/openai/gpt-5-mini,0.25,2.0,1419,,REPLICATE_API_KEY,0,True,none, -DeepInfra,deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct,0.4,1.6,1406,,DEEPINFRA_API_KEY,0,False,none, -DeepInfra,deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo,0.29,1.2,1406,,DEEPINFRA_API_KEY,0,False,none, -Fireworks AI,fireworks_ai/accounts/fireworks/models/qwen3-coder-480b-a35b-instruct,0.45,1.8,1406,,FIREWORKS_AI_API_KEY,0,False,effort, -Novita AI,novita/qwen/qwen3-coder-480b-a35b-instruct,0.3,1.3,1406,,NOVITA_API_KEY,0,True,none, -AWS Bedrock,qwen.qwen3-coder-480b-a35b-v1:0,0.22,1.8,1406,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Together AI,together_ai/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8,2.0,2.0,1406,,TOGETHERAI_API_KEY,0,True,none, -Google Vertex AI,vertex_ai/qwen/qwen3-coder-480b-a35b-instruct-maas,1.0,4.0,1406,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global -W&B Inference,wandb/Qwen/Qwen3-Coder-480B-A35B-Instruct,100000.0,150000.0,1406,,WANDB_API_KEY,0,False,none, -AWS Bedrock,anthropic.claude-opus-4-20250514-v1:0,15.0,75.0,1405,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Anthropic,claude-4-opus-20250514,15.0,75.0,1405,,ANTHROPIC_API_KEY,128000,True,budget, -Anthropic,claude-opus-4-20250514,15.0,75.0,1405,,ANTHROPIC_API_KEY,128000,True,budget, -DeepInfra,deepinfra/anthropic/claude-4-opus,16.5,82.5,1405,,DEEPINFRA_API_KEY,0,False,none, -AWS Bedrock,eu.anthropic.claude-opus-4-20250514-v1:0,15.0,75.0,1405,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -GMI Cloud,gmi/anthropic/claude-opus-4,15.0,75.0,1405,,GMI_API_KEY,0,True,none, -OpenRouter,openrouter/anthropic/claude-opus-4,15.0,75.0,1405,,OPENROUTER_API_KEY,0,True,effort, -AWS Bedrock,us.anthropic.claude-opus-4-20250514-v1:0,15.0,75.0,1405,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, -Vercel AI Gateway,vercel_ai_gateway/anthropic/claude-4-opus,15.0,75.0,1405,,VERCEL_AI_GATEWAY_API_KEY,0,True,none, -Vercel AI Gateway,vercel_ai_gateway/anthropic/claude-opus-4,15.0,75.0,1405,,VERCEL_AI_GATEWAY_API_KEY,0,True,effort, -Google Vertex AI,vertex_ai/claude-opus-4,15.0,75.0,1405,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -Google Vertex AI,vertex_ai/claude-opus-4@20250514,15.0,75.0,1405,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global -Moonshot AI,moonshot/kimi-k2-0905-preview,0.6,2.5,1403,,MOONSHOT_API_KEY,0,True,none, -Novita AI,novita/moonshotai/kimi-k2-0905,0.6,2.5,1403,,NOVITA_API_KEY,0,True,none, -DeepInfra,deepinfra/moonshotai/Kimi-K2-Instruct,0.5,2.0,1402,,DEEPINFRA_API_KEY,0,False,none, -Fireworks AI,fireworks_ai/accounts/fireworks/models/kimi-k2-instruct,0.6,2.5,1402,,FIREWORKS_AI_API_KEY,0,True,none, -Hyperbolic,hyperbolic/moonshotai/Kimi-K2-Instruct,2.0,2.0,1402,,HYPERBOLIC_API_KEY,0,True,none, -Moonshot AI,moonshot/kimi-k2-0711-preview,0.6,2.5,1402,,MOONSHOT_API_KEY,0,True,none, -Novita AI,novita/moonshotai/kimi-k2-instruct,0.57,2.3,1402,,NOVITA_API_KEY,0,True,none, -Together AI,together_ai/moonshotai/Kimi-K2-Instruct,1.0,3.0,1402,,TOGETHERAI_API_KEY,0,True,none, -W&B Inference,wandb/moonshotai/Kimi-K2-Instruct,0.6,2.5,1402,,WANDB_API_KEY,0,False,none, +AWS Bedrock,anthropic.claude-opus-4-6-v1,5.0,25.0,1569,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,moonshotai.kimi-k2.5,0.6,3.0,1446,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +AWS Bedrock,zai.glm-4.7,0.6,2.2,1441,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,effort, +AWS Bedrock,minimax.minimax-m2.1,0.3,1.2,1405,,AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_REGION_NAME,0,True,none, +Anthropic,claude-opus-4-6,5.0,25.0,1569,,ANTHROPIC_API_KEY,128000,True,budget, +Anthropic,claude-sonnet-4-6,3.0,15.0,1388,,ANTHROPIC_API_KEY,128000,True,budget, +Anthropic,claude-haiku-4-5,1.0,5.0,1303,,ANTHROPIC_API_KEY,128000,True,budget, +Azure AI,azure_ai/claude-opus-4-6,5.0,25.0,1569,,AZURE_AI_API_KEY,128000,True,budget, +Azure AI,azure_ai/kimi-k2.5,0.6,3.0,1446,,AZURE_AI_API_KEY,0,True,none, +Azure AI,azure_ai/deepseek-v3.2,0.58,1.68,1310,,AZURE_AI_API_KEY,128000,True,budget, +Azure OpenAI,azure/gpt-5.2,1.75,14.0,1397,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, +Azure OpenAI,azure/gpt-5,1.25,10.0,1394,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, +Azure OpenAI,azure/o3,2.0,8.0,1370,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, +Azure OpenAI,azure/o4-mini,1.1,4.4,1330,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,effort, +Azure OpenAI,azure/gpt-4.1-mini,0.4,1.6,1310,,AZURE_API_KEY|AZURE_API_BASE|AZURE_API_VERSION,0,True,none, +Dashscope,dashscope/qwen3-max-preview,0.0,0.0,1310,,DASHSCOPE_API_KEY,0,True,effort, +DeepInfra,deepinfra/deepseek-ai/DeepSeek-R1-0528,0.5,2.15,1370,,DEEPINFRA_API_KEY,0,False,none, +DeepInfra,deepinfra/moonshotai/Kimi-K2-Instruct,0.5,2.0,1310,,DEEPINFRA_API_KEY,0,False,none, +DeepInfra,deepinfra/deepseek-ai/DeepSeek-V3-0324,0.25,0.88,1300,,DEEPINFRA_API_KEY,0,False,none, +DeepSeek,deepseek-reasoner,0.28,0.42,1340,,DEEPSEEK_API_KEY,0,True,effort, +DeepSeek,deepseek/deepseek-v3.2,0.28,0.4,1310,,DEEPSEEK_API_KEY,0,True,effort, +Fireworks AI,fireworks_ai/accounts/fireworks/models/deepseek-r1-0528,3.0,8.0,1370,,FIREWORKS_AI_API_KEY,0,True,none, +Fireworks AI,fireworks_ai/accounts/fireworks/models/kimi-k2-thinking,0.6,2.5,1333,,FIREWORKS_AI_API_KEY,0,True,none, +Fireworks AI,fireworks_ai/accounts/fireworks/models/minimax-m2,0.3,1.2,1313,,FIREWORKS_AI_API_KEY,0,False,none, +Fireworks AI,fireworks_ai/accounts/fireworks/models/qwen3-235b-a22b-thinking-2507,0.22,0.88,1300,,FIREWORKS_AI_API_KEY,0,False,none, +GMI Cloud,gmi/anthropic/claude-opus-4.5,5.0,25.0,1471,,GMI_API_KEY,0,True,none, +GMI Cloud,gmi/google/gemini-3-pro-preview,2.0,12.0,1449,,GMI_API_KEY,0,True,none, +GMI Cloud,gmi/google/gemini-3-flash-preview,0.5,3.0,1443,,GMI_API_KEY,0,True,none, +GMI Cloud,gmi/MiniMaxAI/MiniMax-M2.1,0.3,1.2,1405,,GMI_API_KEY,0,False,none, +GMI Cloud,gmi/deepseek-ai/DeepSeek-V3.2,0.28,0.4,1310,,GMI_API_KEY,0,True,none, +Github Copilot,github_copilot/claude-opus-4.5,0.0,0.0,1471,,,0,True,none, +Github Copilot,github_copilot/gemini-3-pro-preview,0.0,0.0,1449,,,0,True,none, +Github Copilot,github_copilot/gpt-5.2,0.0,0.0,1397,,,0,True,none, +Github Copilot,github_copilot/gpt-5,0.0,0.0,1394,,,0,True,none, +Github Copilot,github_copilot/claude-sonnet-4.5,0.0,0.0,1386,,,0,True,none, +Github Copilot,github_copilot/gpt-4.1,0.0,0.0,1355,,,0,True,none, +Github Copilot,github_copilot/claude-sonnet-4,0.0,0.0,1350,,,0,True,none, +Github Copilot,github_copilot/gpt-5.1,0.0,0.0,1348,,,0,True,none, +Github Copilot,github_copilot/gpt-5-mini,0.0,0.0,1310,,,0,True,none, +Github Copilot,github_copilot/claude-haiku-4.5,0.0,0.0,1303,,,0,True,none, +Github Copilot,github_copilot/gpt-4o,0.0,0.0,1300,,,0,True,none, +Google Gemini,gemini/gemini-3-pro-preview,2.0,12.0,1449,,GEMINI_API_KEY,0,True,effort, +Google Gemini,gemini/gemini-3-flash-preview,0.5,3.0,1443,,GEMINI_API_KEY,0,True,effort, +Google Gemini,gemini/gemini-2.5-flash,0.3,2.5,1300,,GEMINI_API_KEY,0,True,effort, +Google Vertex AI,vertex_ai/claude-opus-4-6,5.0,25.0,1569,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Google Vertex AI,gemini-3-pro-preview,2.0,12.0,1449,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Google Vertex AI,vertex_ai/gemini-3-flash-preview,0.5,3.0,1443,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,effort,global +Google Vertex AI,vertex_ai/moonshotai/kimi-k2-thinking-maas,0.6,2.5,1333,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global +Google Vertex AI,vertex_ai/minimaxai/minimax-m2-maas,0.3,1.2,1313,,GOOGLE_APPLICATION_CREDENTIALS|VERTEXAI_PROJECT|VERTEXAI_LOCATION,0,True,none,global +Heroku,heroku/claude-4-sonnet,3.0,15.0,1350,,HEROKU_API_KEY,0,True,none, +Hyperbolic,hyperbolic/deepseek-ai/DeepSeek-R1-0528,3.0,3.0,1370,,HYPERBOLIC_API_KEY,0,True,none, +Hyperbolic,hyperbolic/deepseek-ai/DeepSeek-R1,2.0,2.0,1340,,HYPERBOLIC_API_KEY,0,True,none, +Hyperbolic,hyperbolic/deepseek-ai/DeepSeek-V3,0.25,0.25,1300,,HYPERBOLIC_API_KEY,0,True,none, +Lambda AI,lambda_ai/deepseek-r1-0528,0.2,0.6,1370,,LAMBDA_API_KEY,0,True,effort, +Moonshot AI,moonshot/kimi-k2.5,0.6,3.0,1446,,MOONSHOT_API_KEY,0,True,none, +Moonshot AI,moonshot/kimi-k2-thinking,0.6,2.5,1333,,MOONSHOT_API_KEY,0,True,none, +Novita AI,novita/deepseek/deepseek-r1-0528,0.7,2.5,1370,,NOVITA_API_KEY,0,True,effort, +Novita AI,novita/moonshotai/kimi-k2-thinking,0.6,2.5,1333,,NOVITA_API_KEY,0,True,effort, +Novita AI,novita/deepseek/deepseek-v3.2,0.269,0.4,1310,,NOVITA_API_KEY,0,True,effort, +OpenAI,gpt-5.2,1.75,14.0,1397,,OPENAI_API_KEY,0,True,effort, +OpenAI,gpt-5,1.25,10.0,1394,,OPENAI_API_KEY,0,True,effort, +OpenAI,o3,2.0,8.0,1370,,OPENAI_API_KEY,0,True,effort, +OpenAI,o4-mini,1.1,4.4,1330,,OPENAI_API_KEY,0,True,effort, +OpenAI,gpt-4.1-mini,0.4,1.6,1310,,OPENAI_API_KEY,0,True,none, +OpenRouter,openrouter/anthropic/claude-opus-4.5,5.0,25.0,1471,,OPENROUTER_API_KEY,0,True,effort, +OpenRouter,openrouter/google/gemini-3-pro-preview,2.0,12.0,1449,,OPENROUTER_API_KEY,0,True,effort, +OpenRouter,openrouter/moonshotai/kimi-k2.5,0.6,3.0,1446,,OPENROUTER_API_KEY,0,True,none, +OpenRouter,openrouter/google/gemini-3-flash-preview,0.5,3.0,1443,,OPENROUTER_API_KEY,0,True,effort, +OpenRouter,openrouter/deepseek/deepseek-r1-0528,0.5,2.15,1370,,OPENROUTER_API_KEY,0,True,effort, +OpenRouter,openrouter/deepseek/deepseek-v3.2,0.28,0.4,1310,,OPENROUTER_API_KEY,0,True,effort, +OpenRouter,openrouter/deepseek/deepseek-chat,0.14,0.28,1300,,OPENROUTER_API_KEY,0,False,none, +Replicate,replicate/google/gemini-3-pro,2.0,12.0,1449,,REPLICATE_API_KEY,0,True,none, +Replicate,replicate/openai/gpt-5,1.25,10.0,1394,,REPLICATE_API_KEY,0,True,none, +Replicate,replicate/openai/gpt-4.1,2.0,8.0,1355,,REPLICATE_API_KEY,0,True,none, +Replicate,replicate/openai/o4-mini,1.0,4.0,1330,,REPLICATE_API_KEY,0,False,effort, +Replicate,replicate/openai/gpt-4.1-mini,0.4,1.6,1310,,REPLICATE_API_KEY,0,True,none, +SambaNova,sambanova/DeepSeek-R1,5.0,7.0,1340,,SAMBANOVA_API_KEY,0,False,none, +SambaNova,sambanova/DeepSeek-V3-0324,3.0,4.5,1300,,SAMBANOVA_API_KEY,0,True,effort, +SambaNova,sambanova/DeepSeek-V3.1,3.0,4.5,1300,,SAMBANOVA_API_KEY,0,True,effort, +Snowflake,snowflake/deepseek-r1,0.0,0.0,1340,,SNOWFLAKE_API_KEY,0,False,effort, +Snowflake,snowflake/claude-3-5-sonnet,0.0,0.0,1310,,SNOWFLAKE_API_KEY,0,False,none, +Together AI,together_ai/moonshotai/Kimi-K2.5,0.5,2.8,1446,,TOGETHERAI_API_KEY,0,True,effort, +Together AI,together_ai/deepseek-ai/DeepSeek-V3.1,0.6,1.7,1300,,TOGETHERAI_API_KEY,0,True,effort, +Vercel AI Gateway,vercel_ai_gateway/anthropic/claude-opus-4.6,5.0,25.0,1569,,VERCEL_AI_GATEWAY_API_KEY,0,True,effort, +Vercel AI Gateway,vercel_ai_gateway/anthropic/claude-sonnet-4.5,3.0,15.0,1386,,VERCEL_AI_GATEWAY_API_KEY,0,True,effort, +Vercel AI Gateway,vercel_ai_gateway/openai/o3,2.0,8.0,1370,,VERCEL_AI_GATEWAY_API_KEY,0,True,none, +Vercel AI Gateway,vercel_ai_gateway/deepseek/deepseek-r1,0.55,2.19,1340,,VERCEL_AI_GATEWAY_API_KEY,0,False,none, +Vercel AI Gateway,vercel_ai_gateway/openai/gpt-4.1-mini,0.4,1.6,1310,,VERCEL_AI_GATEWAY_API_KEY,0,True,none, +Vercel AI Gateway,vercel_ai_gateway/deepseek/deepseek-v3,0.9,0.9,1300,,VERCEL_AI_GATEWAY_API_KEY,0,False,none, +W&B Inference,wandb/deepseek-ai/DeepSeek-R1-0528,1.35,5.4,1370,,WANDB_API_KEY,0,False,none, +W&B Inference,wandb/moonshotai/Kimi-K2-Instruct,0.6,2.5,1310,,WANDB_API_KEY,0,False,none, +W&B Inference,wandb/Qwen/Qwen3-235B-A22B-Thinking-2507,0.1,0.1,1300,,WANDB_API_KEY,0,False,none, diff --git a/pdd/generate_model_catalog.py b/pdd/generate_model_catalog.py index 907d00725..4e1459051 100644 --- a/pdd/generate_model_catalog.py +++ b/pdd/generate_model_catalog.py @@ -26,6 +26,7 @@ import csv import re import sys +from collections import defaultdict from datetime import date from pathlib import Path from typing import Dict, List, Optional, Tuple @@ -33,7 +34,8 @@ # --------------------------------------------------------------------------- # ELO cutoff — models below this score are excluded from the catalog. # --------------------------------------------------------------------------- -ELO_CUTOFF = 1400 +ELO_CUTOFF = 1300 +MAX_COST_PER_MTOK = 100.0 # Sanity cap — drop rows with absurd pricing (LiteLLM bugs) # --------------------------------------------------------------------------- # ELO scores — canonical base model names mapped to coding arena ELO. @@ -46,183 +48,159 @@ # --------------------------------------------------------------------------- ELO_SCORES: Dict[str, int] = { # ----------------------------------------------------------------------- - # Anthropic Claude — dash-separated canonical form - # ----------------------------------------------------------------------- - "claude-opus-4-6": 1530, - "claude-opus-4-5": 1496, - "claude-opus-4": 1405, - "claude-opus-4-1": 1475, - "claude-sonnet-4-6": 1485, - "claude-sonnet-4-5": 1464, - "claude-sonnet-4": 1384, - "claude-3-7-sonnet": 1341, - "claude-3-5-sonnet-20241022": 1340, - "claude-3-5-sonnet-20240620": 1309, - "claude-3-5-sonnet": 1340, - "claude-haiku-4-5": 1436, - "claude-3-5-haiku": 1287, - "claude-3-opus": 1269, - "claude-3-haiku": 1208, - "claude-3-sonnet": 1232, - # Dot-separated variants (OpenRouter, GitHub Copilot, Vercel, GMI) - "claude-opus-4.6": 1530, - "claude-opus-4.5": 1496, - "claude-opus-4.1": 1475, - "claude-sonnet-4.6": 1485, - "claude-sonnet-4.5": 1464, - "claude-haiku-4.5": 1436, - "claude-3.5-sonnet": 1340, - "claude-3.5-haiku": 1287, - "claude-3.7-sonnet": 1341, - # Alternate naming: "claude-4-opus" / "claude-4-sonnet" - "claude-4-opus": 1405, - "claude-4-sonnet": 1384, + # Source: LMArena CODE Arena leaderboard, scraped Feb 20, 2026. + # - Scores marked [CODE] are directly from the Code Arena. + # - Scores marked [EST] are estimated from Text Arena scores, + # discounted by ~40-60 pts based on observed Text→Code deltas + # for similar-tier models. + # - Only 40 models have Code Arena scores; many popular API models + # (o3, gpt-5-mini, deepseek-r1, etc.) are not on the Code Arena. + # ----------------------------------------------------------------------- + + # ----------------------------------------------------------------------- + # Anthropic Claude + # ----------------------------------------------------------------------- + "claude-opus-4-6": 1569, # [CODE] #2, 1824 votes + "claude-opus-4-5": 1471, # [CODE] #5, non-thinking variant + "claude-opus-4-1": 1389, # [CODE] #15 + "claude-opus-4": 1370, # [EST] from Text 1413, delta ~-43 + "claude-sonnet-4-6": 1388, # [EST] same tier as sonnet-4-5 (not yet on Code Arena) + "claude-sonnet-4-5": 1386, # [CODE] #18 + "claude-sonnet-4": 1350, # [EST] from Text 1397, delta ~-47 + "claude-3-7-sonnet": 1310, # [EST] from Text 1341 + "claude-3-5-sonnet-20241022": 1310, # [EST] from Text 1340 + "claude-3-5-sonnet": 1310, # [EST] + "claude-haiku-4-5": 1303, # [CODE] #28 + # Dot-separated aliases + "claude-opus-4.6": 1569, + "claude-opus-4.5": 1471, + "claude-opus-4.1": 1389, + "claude-sonnet-4.6": 1388, + "claude-sonnet-4.5": 1386, + "claude-haiku-4.5": 1303, + "claude-3.5-sonnet": 1310, + "claude-3.7-sonnet": 1310, + # Alternate naming + "claude-4-opus": 1370, + "claude-4-sonnet": 1350, + # ----------------------------------------------------------------------- # OpenAI — GPT-5 family # ----------------------------------------------------------------------- - "gpt-5": 1460, - "gpt-5.1": 1450, - "gpt-5.2": 1465, - "gpt-5-mini": 1419, - "gpt-5-nano": 1363, + "gpt-5.2": 1397, # [CODE] #13 (default reasoning) + "gpt-5.1": 1348, # [CODE] #21 (default); gpt-5.1-medium = 1389 + "gpt-5": 1394, # [CODE] #14 as gpt-5-medium + "gpt-5-mini": 1310, # [EST] from Text ~1385, heavy code discount # OpenAI — GPT-4.x - "gpt-4.5": 1419, - "gpt-4.1": 1396, - "gpt-4.1-mini": 1370, - "gpt-4.1-nano": 1312, - "gpt-4o": 1307, - "gpt-4o-2024-08-06": 1307, - "gpt-4o-2024-11-20": 1307, - "gpt-4o-mini": 1300, - "gpt-4-turbo": 1280, - "gpt-4-0125-preview": 1261, - "gpt-4-1106-preview": 1269, + "gpt-4.5": 1380, # [EST] from Text 1444, delta ~-64 + "gpt-4.1": 1355, # [EST] from Text 1413, delta ~-58 + "gpt-4.1-mini": 1310, # [EST] from Text 1370 + "gpt-4o": 1300, # [EST] # OpenAI — o-series - "o3": 1441, - "o4-mini": 1385, - "o3-mini": 1361, - "o1": 1378, - "o1-mini": 1366, - "o1-preview": 1378, - # OpenAI — gpt-oss - "gpt-oss-120b": 1398, - "gpt-oss-20b": 1371, + "o3": 1370, # [EST] from Text 1432, delta ~-62 + "o4-mini": 1330, # [EST] from Text 1385 + "o3-mini": 1310, # [EST] from Text 1361 + "o1": 1340, # [EST] from Text 1402, delta ~-62 + "o1-mini": 1315, # [EST] from Text 1366 + # ----------------------------------------------------------------------- # Google Gemini # ----------------------------------------------------------------------- - "gemini-3-pro": 1501, - "gemini-3-pro-preview": 1501, - "gemini-3-flash": 1469, - "gemini-3-flash-preview": 1469, - "gemini-2.5-pro": 1465, - "gemini-2.5-flash": 1420, - "gemini-2.0-flash": 1371, - "gemini-2.0-flash-thinking": 1383, - "gemini-1.5-pro": 1311, - "gemini-1.5-flash": 1273, + "gemini-3-pro": 1449, # [CODE] #6 + "gemini-3-pro-preview": 1449, + "gemini-3-flash": 1443, # [CODE] #8 + "gemini-3-flash-preview": 1443, + "gemini-2.5-pro": 1206, # [CODE] #35 ← huge Text→Code drop! + "gemini-2.5-flash": 1300, # [EST] from Text 1411, delta ~-111 (like 2.5-pro) + # ----------------------------------------------------------------------- # DeepSeek # ----------------------------------------------------------------------- - "deepseek-r1": 1382, - "deepseek-r1-0528": 1436, - "deepseek-reasoner": 1382, - "deepseek-chat": 1337, - "deepseek-v3": 1337, - "deepseek-v3-0324": 1391, - "deepseek-v3.1": 1430, - "deepseek-v3.2": 1431, + "deepseek-v3.2": 1310, # [CODE] #27 + "deepseek-v3.1": 1300, # [EST] from Text 1418, similar to v3.2 code perf + "deepseek-r1-0528": 1370, # [EST] from Text 1419; r1 models reason well + "deepseek-r1": 1340, # [EST] from Text 1382 + "deepseek-reasoner": 1340, # alias for deepseek-r1 + "deepseek-v3-0324": 1300, # [EST] from Text 1391 + "deepseek-v3": 1300, # [EST] + "deepseek-chat": 1300, # alias + # ----------------------------------------------------------------------- # xAI / Grok # ----------------------------------------------------------------------- - "grok-4.1": 1483, - "grok-4": 1453, - "grok-4-fast": 1441, - "grok-3": 1439, - "grok-3-mini": 1380, - "grok-2": 1298, + "grok-4.1-thinking": 1204, # [CODE] #36 — very low code score despite high text + "grok-4.1": 1200, # [EST] non-thinking likely similar or lower + "grok-4": 1200, # [EST] not on Code Arena; grok-4-fast-reasoning = 1153 + "grok-4-fast": 1153, # [CODE] #38 + "grok-3": 1200, # [EST] + # ----------------------------------------------------------------------- # Mistral # ----------------------------------------------------------------------- - "mistral-large": 1450, - "mistral-large-3": 1450, - "mistral-medium-3": 1387, - "mistral-medium-3.1": 1412, - "magistral-medium": 1307, - "magistral-small": 1330, - "codestral": 1300, - "mistral-small-3.1": 1295, - "mistral-small-3.2": 1361, - "mistral-small-3": 1251, + "mistral-large": 1223, # [CODE] #34 + "mistral-large-3": 1223, # [CODE] #34 + # ----------------------------------------------------------------------- # Moonshot / Kimi # ----------------------------------------------------------------------- - "kimi-k2.5": 1480, - "kimi-k2-instruct": 1402, - "kimi-k2-thinking": 1450, - "kimi-k2-0905": 1403, - "kimi-k2-0711": 1402, - # ----------------------------------------------------------------------- - # Meta Llama - # ----------------------------------------------------------------------- - "llama-4-maverick-17b-128e": 1312, - "llama-4-scout-17b-16e": 1290, - "llama-3.3-70b": 1279, - "llama-3.1-405b": 1299, - "llama-3.1-70b": 1268, - "llama-3.1-8b": 1203, - "llama-3-70b": 1216, + "kimi-k2.5": 1446, # [CODE] #7, kimi-k2.5-thinking + "kimi-k2.5-instant": 1421, # [CODE] #10 + "kimi-k2-thinking": 1333, # [CODE] #23, kimi-k2-thinking-turbo + "kimi-k2-instruct": 1310, # [EST] + "kimi-k2-0905": 1310, # [EST] + "kimi-k2-0711": 1310, # [EST] + # ----------------------------------------------------------------------- # Qwen / Alibaba # ----------------------------------------------------------------------- - "qwen3-max": 1468, - "qwen3-235b-a22b": 1394, - "qwen3-235b-a22b-instruct-2507": 1457, - "qwen3-235b-a22b-thinking-2507": 1442, - "qwen3-32b": 1376, - "qwen3-30b-a3b": 1346, - "qwen3-coder-480b-a35b": 1406, - "qwq-32b": 1351, - "qwen2.5-72b": 1302, - "qwen2.5-max": 1373, + "qwen3-coder-480b-a35b": 1280, # [CODE] #30 + "qwen3-235b-a22b-instruct-2507": 1280, # [EST] similar to qwen3-coder code perf + "qwen3-235b-a22b-thinking-2507": 1300, # [EST] + "qwen3-max": 1310, # [EST] from Text 1434 + "qwen3-235b-a22b": 1280, # [EST] + "qwen3-32b": 1260, # [EST] + # ----------------------------------------------------------------------- # GLM (Zhipu AI / ZAI) # ----------------------------------------------------------------------- - "glm-5": 1461, - "glm-4.7": 1460, - "glm-4.6": 1458, - "glm-4.5": 1448, - "glm-4.5-air": 1410, + "glm-4.7": 1441, # [CODE] #9 + "glm-4.6": 1357, # [CODE] #20 + "glm-5": 1420, # [EST] added to Code Arena Feb 12, not in my scrape + # ----------------------------------------------------------------------- # Minimax # ----------------------------------------------------------------------- - "minimax-m2.1": 1430, - "minimax-m1": 1369, - "minimax-m2": 1430, - # ----------------------------------------------------------------------- - # Amazon Nova - # ----------------------------------------------------------------------- - "nova-pro": 1282, - "nova-lite": 1253, - "nova-micro": 1228, + "minimax-m2.1": 1405, # [CODE] #12 + "minimax-m2": 1313, # [CODE] #26 + # ----------------------------------------------------------------------- # MiMo (Xiaomi) # ----------------------------------------------------------------------- - "mimo-v2-flash": 1411, - # ----------------------------------------------------------------------- - # Gemma (Google open) - # ----------------------------------------------------------------------- - "gemma-3-27b": 1350, - "gemma-3-12b": 1310, - "gemma-3-4b": 1265, - # ----------------------------------------------------------------------- - # NVIDIA Nemotron - # ----------------------------------------------------------------------- - "llama-3.3-nemotron-super-49b": 1359, - "llama-3.1-nemotron-70b": 1289, - # ----------------------------------------------------------------------- - # Phi (Microsoft) - # ----------------------------------------------------------------------- - "phi-4": 1242, + "mimo-v2-flash": 1340, # [CODE] #22 +} + +# --------------------------------------------------------------------------- +# Price overrides — (input_per_mtok, output_per_mtok). +# Use this to correct known LiteLLM pricing bugs or supply missing prices. +# --------------------------------------------------------------------------- +PRICE_OVERRIDES: Dict[str, Tuple[float, float]] = { + # Hyperbolic uses unified pricing; LiteLLM has V3 price for R1-0528 + "hyperbolic/deepseek-ai/DeepSeek-R1-0528": (3.0, 3.0), + # W&B prices are off by ~100,000x in LiteLLM (github.com/BerriAI/litellm/issues/17417) + "wandb/Qwen/Qwen3-235B-A22B-Instruct-2507": (0.10, 0.10), + "wandb/Qwen/Qwen3-235B-A22B-Thinking-2507": (0.10, 0.10), + "wandb/deepseek-ai/DeepSeek-R1-0528": (1.35, 5.40), + "wandb/deepseek-ai/DeepSeek-V3.1": (0.55, 1.65), + "wandb/Qwen/Qwen3-Coder-480B-A35B-Instruct": (1.0, 1.5), + # Vercel has long-context rate ($2.50) instead of standard ($1.25) for Gemini 2.5 Pro + "vercel_ai_gateway/google/gemini-2.5-pro": (1.25, 10.0), + # Hyperbolic unified pricing — LiteLLM has wrong values for R1 and V3 + "hyperbolic/deepseek-ai/DeepSeek-R1": (2.0, 2.0), + "hyperbolic/deepseek-ai/DeepSeek-V3": (0.25, 0.25), + # Heroku reports $0 in LiteLLM but actually charges per-token + "heroku/claude-4-sonnet": (3.0, 15.0), + "heroku/claude-3-5-sonnet-latest": (3.0, 15.0), + "heroku/claude-3-7-sonnet": (3.0, 15.0), } # --------------------------------------------------------------------------- @@ -374,6 +352,18 @@ # Anthropic fast/us routing prefixes on bare IDs _FAST_PREFIX = re.compile(r"^(?:fast/us/|fast/|us/)", re.IGNORECASE) +# Dated Anthropic model IDs: claude-opus-4-6-20260205, claude-sonnet-4-5-20250929, etc. +_DATED_ANTHROPIC = re.compile( + r"^(?Pclaude-[\w-]+)-\d{8}$", + re.IGNORECASE, +) + +# Bedrock region-specific model IDs (bare geo-prefix form: us., eu., apac., au., jp., ap.) +_BEDROCK_GEO_MODEL = re.compile( + r"^(?:us|eu|apac|ap|au|jp)\.", + re.IGNORECASE, +) + # Vertex AI @version suffix: @20241022, @default, @001, @latest _VERTEX_VERSION = re.compile(r"@[\w.-]+$") @@ -495,6 +485,19 @@ def _get_provider_root(litellm_provider: str) -> str: return litellm_provider.split("-")[0].split("_models")[0] +# Regex matching region-specific Bedrock model IDs, e.g.: +# bedrock/us-east-1/... bedrock/eu-north-1/... +# us.anthropic.... eu.anthropic.... +_REGION_RE = re.compile( + r"^(bedrock/[a-z]{2}-[a-z]+-\d+/|[a-z]{2}\.)" +) + + +def _has_region(model_id: str) -> bool: + """Return True if model_id is pinned to a specific cloud region.""" + return bool(_REGION_RE.match(model_id)) + + def _infer_reasoning_type(model_id: str, litellm_provider: str, entry: dict) -> str: supports_reasoning = entry.get("supports_reasoning", False) if not supports_reasoning: @@ -607,6 +610,12 @@ def build_rows() -> List[dict]: # Skip placeholder/tier entries if _is_placeholder(model_id): continue + # Fix B: Skip fast/ and us/ routing prefix variants entirely. + # These are LiteLLM routing hints, not separate models. fast/ has 6x + # inflated pricing; us/ has 10% regional surcharge. Both resolve to + # the same underlying model at the same endpoint. + if _FAST_PREFIX.match(model_id): + continue # Skip dated preview models superseded by a stable GA release if _is_superseded_preview(model_id, all_model_ids): skipped_previews += 1 @@ -630,6 +639,10 @@ def build_rows() -> List[dict]: input_cost = round(in_cost_token * 1_000_000, 6) output_cost = round(out_cost_token * 1_000_000, 6) + # Apply manual price overrides for known LiteLLM pricing bugs + if model_id in PRICE_OVERRIDES: + input_cost, output_cost = PRICE_OVERRIDES[model_id] + # Provider display name and API key env var display_name, api_key = PROVIDERS.get( litellm_provider, @@ -669,8 +682,143 @@ def build_rows() -> List[dict]: if skipped_previews: print(f" Skipped {skipped_previews} dated preview model(s) superseded by stable GA releases.") - # Sort: ELO descending, then model name ascending - rows.sort(key=lambda r: (-r["coding_arena_elo"], r["model"])) + initial_count = len(rows) + + # ------------------------------------------------------------------ + # Fix C: Skip dated variants when the undated version exists. + # e.g. drop "claude-opus-4-6-20260205" if "claude-opus-4-6" is present, + # and drop "vertex_ai/claude-opus-4-5@20250929" if the unversioned + # "vertex_ai/claude-opus-4-5" is present. + # ------------------------------------------------------------------ + model_ids_present = {r["model"] for r in rows} + kept_after_c: List[dict] = [] + skipped_dated = 0 + for row in rows: + mid = row["model"] + # Check bare Anthropic dated IDs: claude-*-YYYYMMDD + m = _DATED_ANTHROPIC.match(mid) + if m and m.group("base") in model_ids_present: + skipped_dated += 1 + continue + # Check Vertex AI @version suffixed IDs + if "@" in mid: + base_no_version = _VERTEX_VERSION.sub("", mid) + if base_no_version != mid and base_no_version in model_ids_present: + skipped_dated += 1 + continue + # Check Bedrock versioned IDs (e.g. anthropic.claude-opus-4-5-20251101-v1:0) + stripped = _BEDROCK_VERSION.sub("", mid) + if stripped != mid and stripped in model_ids_present: + skipped_dated += 1 + continue + kept_after_c.append(row) + rows = kept_after_c + if skipped_dated: + print(f" Fix C: Removed {skipped_dated} dated/versioned variant(s).") + + # ------------------------------------------------------------------ + # Fix A: Deduplicate per (provider_display, canonical_base_model). + # For each provider × base model, keep only the cheapest non-regional + # variant. This collapses e.g. 14 Anthropic claude-opus-4-6 rows into 1. + # ------------------------------------------------------------------ + # Also handles Bedrock region dedup: for Bedrock, multiple region-specific + # model IDs (bedrock/us-east-1/..., bedrock/eu-north-1/..., us.anthropic..., + # eu.anthropic...) resolve to the same model. We keep only the cheapest + # (typically the regionless/global variant). + dedup_buckets: Dict[Tuple[str, str], List[dict]] = defaultdict(list) + no_canonical = 0 + for row in rows: + canonical = _extract_base_model(row["model"]) + if canonical is None: + # Can't canonicalize — keep it as-is (use model ID as its own key) + canonical = row["model"] + no_canonical += 1 + dedup_buckets[(row["provider"], canonical)].append(row) + + rows_deduped: List[dict] = [] + dedup_removed = 0 + for (_provider, _base), bucket in dedup_buckets.items(): + if len(bucket) == 1: + rows_deduped.append(bucket[0]) + else: + # Keep the cheapest variant (by avg cost = (input + output) / 2). + # Tiebreaker: prefer regionless model IDs so Bedrock users aren't + # locked to a specific region (e.g. "bedrock/moonshotai.kimi-k2.5" + # over "bedrock/us-east-1/moonshotai.kimi-k2.5"). + bucket.sort(key=lambda r: ( + (r["input"] + r["output"]) / 2, + _has_region(r["model"]), + )) + rows_deduped.append(bucket[0]) + dedup_removed += len(bucket) - 1 + rows = rows_deduped + if dedup_removed: + print(f" Fix A: Deduplicated {dedup_removed} provider×model variant(s).") + + # ------------------------------------------------------------------ + # Sanity filter — drop rows where input or output cost exceeds the cap. + # Catches LiteLLM pricing bugs (e.g. values off by 100,000×). + # ------------------------------------------------------------------ + pre_sanity = len(rows) + rows = [ + r for r in rows + if r["input"] <= MAX_COST_PER_MTOK and r["output"] <= MAX_COST_PER_MTOK + ] + sanity_removed = pre_sanity - len(rows) + if sanity_removed: + print(f" Sanity filter: Removed {sanity_removed} row(s) with cost > ${MAX_COST_PER_MTOK}/Mtok.") + + # ------------------------------------------------------------------ + # Fix D: Pareto filter — remove models that are strictly dominated + # (higher cost AND lower ELO) by another model *from the same provider*. + # + # A model X is dominated if there exists model Y (same provider) where: + # Y.elo >= X.elo AND Y.avg_cost <= X.avg_cost AND (strictly better on >= 1) + # + # Scoped per-provider so that free-tier providers (GitHub Copilot) don't + # wipe out paid providers that users with different API keys still need. + # This prunes e.g. Opus 4/4.1 ($15/$75, ELO 1405/1475) within Anthropic + # since Opus 4.5/4.6 ($5/$25, ELO 1496/1530) strictly dominate them. + # ------------------------------------------------------------------ + provider_groups: Dict[str, List[dict]] = defaultdict(list) + for row in rows: + provider_groups[row["provider"]].append(row) + + pareto_removed = 0 + pareto_kept: List[dict] = [] + for provider, group in provider_groups.items(): + # Skip Pareto filtering for zero-cost providers (e.g. GitHub Copilot, + # Snowflake, Dashscope). All their models report $0, so cost isn't a + # meaningful differentiator and only the highest-ELO model would survive. + all_zero = all((r["input"] + r["output"]) == 0 for r in group) + if all_zero: + pareto_kept.extend(group) + continue + for candidate in group: + c_elo = candidate["coding_arena_elo"] + c_avg = (candidate["input"] + candidate["output"]) / 2 + dominated = False + for other in group: + if other is candidate: + continue + o_elo = other["coding_arena_elo"] + o_avg = (other["input"] + other["output"]) / 2 + if o_elo >= c_elo and o_avg <= c_avg: + if o_elo > c_elo or o_avg < c_avg: + dominated = True + break + if dominated: + pareto_removed += 1 + else: + pareto_kept.append(candidate) + rows = pareto_kept + if pareto_removed: + print(f" Fix D: Removed {pareto_removed} Pareto-dominated model(s).") + + print(f" Post-processing: {initial_count} -> {len(rows)} rows.") + + # Sort: provider ascending, then ELO descending within each provider + rows.sort(key=lambda r: (r["provider"], -r["coding_arena_elo"], r["model"])) return rows @@ -702,8 +850,8 @@ def main() -> None: # Print a quick summary by provider from collections import Counter providers = Counter(r["provider"] for r in rows) - print("\nTop providers by model count:") - for provider, count in providers.most_common(20): + print("\nProviders by model count:") + for provider, count in providers.most_common(): print(f" {provider}: {count}") From fae9a0855189b52b186df069b2748b4ea7418838 Mon Sep 17 00:00:00 2001 From: Niti Goyal Date: Sat, 21 Feb 2026 15:31:28 -0500 Subject: [PATCH 5/5] Test fixes - test_llm_invoke.py: Switched the Vertex AI MaaS structured output test from using deepseek-v3.2-maas (which was removed from the model catalog by the Pareto filter) to minimax-m2-maas (which is still in the catalog). The test no longer fails due to a missing CSV row. - In API key scanner, treat empty API key values as "not set" across all sources and fix test isolation to prevent false failures when a developer or test suite has local .env files --- pdd/api_key_scanner.py | 15 +++++---- tests/test_api_key_scanner.py | 58 +++++++++++++++++++++++++++++++++-- tests/test_llm_invoke.py | 33 +++++++++++--------- 3 files changed, 83 insertions(+), 23 deletions(-) diff --git a/pdd/api_key_scanner.py b/pdd/api_key_scanner.py index 6dcd1cbf9..f4d52c96a 100644 --- a/pdd/api_key_scanner.py +++ b/pdd/api_key_scanner.py @@ -76,13 +76,15 @@ def _load_dotenv_values() -> Dict[str, str]: """ Load values from a .env file using python-dotenv's dotenv_values (read-only). Returns an empty dict on any failure. + Only includes keys with non-empty values. """ try: from dotenv import dotenv_values # type: ignore values = dotenv_values() # dotenv_values returns an OrderedDict; values can be None for keys without values - return {k: v for k, v in values.items() if v is not None} + # Filter out None and empty string values + return {k: v for k, v in values.items() if v and v.strip()} except ImportError: logger.debug("python-dotenv not installed; skipping .env file check.") return {} @@ -105,7 +107,7 @@ def _detect_shell() -> Optional[str]: def _parse_api_env_file(file_path: Path) -> Dict[str, str]: """ Parse a ~/.pdd/api-env.{shell} file for uncommented `export KEY=value` lines. - Returns a dict of key names to values found. + Returns a dict of key names to non-empty values found. """ result: Dict[str, str] = {} @@ -137,7 +139,8 @@ def _parse_api_env_file(file_path: Path) -> Dict[str, str]: ): value = value[1:-1] - if key and value: + # Only include keys with non-empty values + if key and value and value.strip(): result[key] = value except Exception as e: @@ -183,17 +186,17 @@ def scan_environment() -> Dict[str, KeyInfo]: api_env_source_label = f"~/.pdd/api-env.{shell_name}" for key_name in key_names: - # Check in priority order + # Check in priority order, ensuring values are non-empty if key_name in dotenv_vals: result[key_name] = KeyInfo(source=".env file", is_set=True) - elif key_name in os.environ: + elif key_name in os.environ and os.environ[key_name].strip(): result[key_name] = KeyInfo(source="shell environment", is_set=True) elif key_name in api_env_vals: result[key_name] = KeyInfo( source=api_env_source_label, is_set=True ) else: - # Key not found in any source + # Key not found in any source or has empty value result[key_name] = KeyInfo(source="", is_set=False) except Exception as e: diff --git a/tests/test_api_key_scanner.py b/tests/test_api_key_scanner.py index 0d3e55c91..60eb0712a 100644 --- a/tests/test_api_key_scanner.py +++ b/tests/test_api_key_scanner.py @@ -46,6 +46,9 @@ # # VIII. scan_environment — Edge Cases # 26. test_scan_special_chars_in_key_value: Key value with special chars → no crash. +# 27. test_scan_empty_string_values_not_set: Keys with empty values → is_set=False. +# 28. test_scan_empty_dotenv_falls_through_to_shell: Empty .env value → shell environment wins. +# 29. test_scan_empty_shell_falls_through_to_dotenv: Empty shell value → .env file wins. import csv from pathlib import Path @@ -327,7 +330,8 @@ def test_scan_detects_shell_env_key(tmp_path, monkeypatch): monkeypatch.setenv("OPENAI_API_KEY", "sk-test123") monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) - result = scan_environment() + with mock.patch("pdd.api_key_scanner._load_dotenv_values", return_value={}): + result = scan_environment() assert result["OPENAI_API_KEY"].is_set is True assert result["OPENAI_API_KEY"].source == "shell environment" @@ -346,7 +350,8 @@ def test_scan_detects_api_env_file_key(tmp_path, monkeypatch): monkeypatch.delenv("OPENAI_API_KEY", raising=False) monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False) - result = scan_environment() + with mock.patch("pdd.api_key_scanner._load_dotenv_values", return_value={}): + result = scan_environment() assert result["OPENAI_API_KEY"].is_set is True assert result["OPENAI_API_KEY"].source == "~/.pdd/api-env.bash" @@ -513,3 +518,52 @@ def test_scan_special_chars_in_key_value(tmp_path, monkeypatch): result = scan_environment() assert result["MY_SPECIAL_KEY"].is_set is True + + +def test_scan_empty_string_values_not_set(tmp_path, monkeypatch): + """Keys with empty string values are treated as not set from all sources.""" + _setup_home(tmp_path, monkeypatch, csv_rows=SIMPLE_CSV_ROWS) + + # Set one key to empty string in shell environment + monkeypatch.setenv("OPENAI_API_KEY", "") + # Set another to whitespace-only + monkeypatch.setenv("ANTHROPIC_API_KEY", " ") + monkeypatch.setenv("SHELL", "/bin/bash") + + # .env returns empty since _load_dotenv_values filters empty strings + with mock.patch("pdd.api_key_scanner._load_dotenv_values", return_value={}): + result = scan_environment() + + # Both keys should be marked as not set since they have empty/whitespace values + assert result["OPENAI_API_KEY"].is_set is False + assert result["ANTHROPIC_API_KEY"].is_set is False + + +def test_scan_empty_dotenv_falls_through_to_shell(tmp_path, monkeypatch): + """Empty .env value is filtered out; shell environment wins.""" + _setup_home(tmp_path, monkeypatch, csv_rows=SIMPLE_CSV_ROWS) + monkeypatch.setenv("OPENAI_API_KEY", "sk-from-shell") + + # .env has the key but with empty value (gets filtered by _load_dotenv_values) + # So the implementation should skip .env and find the shell value + with mock.patch("pdd.api_key_scanner._load_dotenv_values", return_value={}): + result = scan_environment() + + assert result["OPENAI_API_KEY"].is_set is True + assert result["OPENAI_API_KEY"].source == "shell environment" + + +def test_scan_empty_shell_falls_through_to_dotenv(tmp_path, monkeypatch): + """Empty shell value is skipped; .env file wins.""" + _setup_home(tmp_path, monkeypatch, csv_rows=SIMPLE_CSV_ROWS) + monkeypatch.setenv("OPENAI_API_KEY", "") # Empty in shell + + # .env has a real value + with mock.patch( + "pdd.api_key_scanner._load_dotenv_values", + return_value={"OPENAI_API_KEY": "sk-from-dotenv"}, + ): + result = scan_environment() + + assert result["OPENAI_API_KEY"].is_set is True + assert result["OPENAI_API_KEY"].source == ".env file" diff --git a/tests/test_llm_invoke.py b/tests/test_llm_invoke.py index d222e7884..729bd8328 100644 --- a/tests/test_llm_invoke.py +++ b/tests/test_llm_invoke.py @@ -1633,25 +1633,28 @@ def test_llm_invoke_dict_response_missing_field_triggers_fallback(mock_load_mode # --- Tests for structured_output CSV flag behavior --- -def test_deepseek_maas_passes_response_format_for_structured_output(mock_set_llm_cache): - """Verify that DeepSeek MaaS model passes response_format when output_pydantic is requested. +def test_vertex_ai_maas_passes_response_format_for_structured_output(mock_set_llm_cache): + """Verify that Vertex AI MaaS models pass response_format when output_pydantic is requested. - According to Google Cloud documentation, all Vertex AI MaaS models (including DeepSeek) - support structured output. This test verifies the CSV has structured_output=True for DeepSeek. + According to Google Cloud documentation, all Vertex AI MaaS models + support structured output. This test uses the MiniMax MaaS model to verify + the CSV has structured_output=True and that response_format is correctly passed. This test will: - FAIL if structured_output=False in CSV (the bug) - PASS if structured_output=True in CSV (after fix) """ - # Read the REAL CSV to get DeepSeek's actual structured_output value + maas_model = 'vertex_ai/minimaxai/minimax-m2-maas' + + # Read the REAL CSV to get the MaaS model's actual structured_output value from pdd.llm_invoke import _load_model_data real_data = _load_model_data(None) # None uses package default CSV path - # Filter to only include DeepSeek MaaS model - deepseek_data = real_data[real_data['model'] == 'vertex_ai/deepseek-ai/deepseek-v3.2-maas'].copy() - assert len(deepseek_data) == 1, "DeepSeek MaaS model not found in CSV" + # Filter to only include the MaaS model + maas_data = real_data[real_data['model'] == maas_model].copy() + assert len(maas_data) == 1, f"MaaS model {maas_model} not found in CSV" - with patch('pdd.llm_invoke._load_model_data', return_value=deepseek_data): + with patch('pdd.llm_invoke._load_model_data', return_value=maas_data): # Set the actual env vars that the CSV api_key column requires for Vertex AI models vertex_env = { 'GOOGLE_APPLICATION_CREDENTIALS': '/fake/path/creds.json', @@ -1664,7 +1667,7 @@ def test_deepseek_maas_passes_response_format_for_structured_output(mock_set_llm json_response = '{"field1": "test_value", "field2": 42}' mock_response = create_mock_litellm_response( json_response, - model_name='vertex_ai/deepseek-ai/deepseek-v3.2-maas' + model_name=maas_model ) mock_completion.return_value = mock_response @@ -1679,16 +1682,16 @@ def test_deepseek_maas_passes_response_format_for_structured_output(mock_set_llm verbose=True ) - # Verify DeepSeek was called + # Verify the MaaS model was called mock_completion.assert_called_once() call_args, call_kwargs = mock_completion.call_args - assert call_kwargs['model'] == 'vertex_ai/deepseek-ai/deepseek-v3.2-maas', \ - f"Expected DeepSeek model, got {call_kwargs['model']}" + assert call_kwargs['model'] == maas_model, \ + f"Expected MaaS model, got {call_kwargs['model']}" - # EXPECTED: DeepSeek MaaS should have response_format passed + # EXPECTED: MaaS model should have response_format passed # because it supports structured output (per Google Cloud docs) assert 'response_format' in call_kwargs, \ - "DeepSeek MaaS should have response_format passed - check that structured_output=True in CSV" + "Vertex AI MaaS model should have response_format passed - check that structured_output=True in CSV" response_format = call_kwargs['response_format'] assert response_format['type'] == 'json_schema', \