diff --git a/.agents/skills/find-skills/SKILL.md b/.agents/skills/find-skills/SKILL.md new file mode 100644 index 0000000..114c663 --- /dev/null +++ b/.agents/skills/find-skills/SKILL.md @@ -0,0 +1,142 @@ +--- +name: find-skills +description: Helps users discover and install agent skills when they ask questions like "how do I do X", "find a skill for X", "is there a skill that can...", or express interest in extending capabilities. This skill should be used when the user is looking for functionality that might exist as an installable skill. +--- + +# Find Skills + +This skill helps you discover and install skills from the open agent skills ecosystem. + +## When to Use This Skill + +Use this skill when the user: + +- Asks "how do I do X" where X might be a common task with an existing skill +- Says "find a skill for X" or "is there a skill for X" +- Asks "can you do X" where X is a specialized capability +- Expresses interest in extending agent capabilities +- Wants to search for tools, templates, or workflows +- Mentions they wish they had help with a specific domain (design, testing, deployment, etc.) + +## What is the Skills CLI? + +The Skills CLI (`npx skills`) is the package manager for the open agent skills ecosystem. Skills are modular packages that extend agent capabilities with specialized knowledge, workflows, and tools. + +**Key commands:** + +- `npx skills find [query]` - Search for skills interactively or by keyword +- `npx skills add ` - Install a skill from GitHub or other sources +- `npx skills check` - Check for skill updates +- `npx skills update` - Update all installed skills + +**Browse skills at:** https://skills.sh/ + +## How to Help Users Find Skills + +### Step 1: Understand What They Need + +When a user asks for help with something, identify: + +1. The domain (e.g., React, testing, design, deployment) +2. The specific task (e.g., writing tests, creating animations, reviewing PRs) +3. Whether this is a common enough task that a skill likely exists + +### Step 2: Check the Leaderboard First + +Before running a CLI search, check the [skills.sh leaderboard](https://skills.sh/) to see if a well-known skill already exists for the domain. The leaderboard ranks skills by total installs, surfacing the most popular and battle-tested options. + +For example, top skills for web development include: +- `vercel-labs/agent-skills` — React, Next.js, web design (100K+ installs each) +- `anthropics/skills` — Frontend design, document processing (100K+ installs) + +### Step 3: Search for Skills + +If the leaderboard doesn't cover the user's need, run the find command: + +```bash +npx skills find [query] +``` + +For example: + +- User asks "how do I make my React app faster?" → `npx skills find react performance` +- User asks "can you help me with PR reviews?" → `npx skills find pr review` +- User asks "I need to create a changelog" → `npx skills find changelog` + +### Step 4: Verify Quality Before Recommending + +**Do not recommend a skill based solely on search results.** Always verify: + +1. **Install count** — Prefer skills with 1K+ installs. Be cautious with anything under 100. +2. **Source reputation** — Official sources (`vercel-labs`, `anthropics`, `microsoft`) are more trustworthy than unknown authors. +3. **GitHub stars** — Check the source repository. A skill from a repo with <100 stars should be treated with skepticism. + +### Step 5: Present Options to the User + +When you find relevant skills, present them to the user with: + +1. The skill name and what it does +2. The install count and source +3. The install command they can run +4. A link to learn more at skills.sh + +Example response: + +``` +I found a skill that might help! The "react-best-practices" skill provides +React and Next.js performance optimization guidelines from Vercel Engineering. +(185K installs) + +To install it: +npx skills add vercel-labs/agent-skills@react-best-practices + +Learn more: https://skills.sh/vercel-labs/agent-skills/react-best-practices +``` + +### Step 6: Offer to Install + +If the user wants to proceed, you can install the skill for them: + +```bash +npx skills add -g -y +``` + +The `-g` flag installs globally (user-level) and `-y` skips confirmation prompts. + +## Common Skill Categories + +When searching, consider these common categories: + +| Category | Example Queries | +| --------------- | ---------------------------------------- | +| Web Development | react, nextjs, typescript, css, tailwind | +| Testing | testing, jest, playwright, e2e | +| DevOps | deploy, docker, kubernetes, ci-cd | +| Documentation | docs, readme, changelog, api-docs | +| Code Quality | review, lint, refactor, best-practices | +| Design | ui, ux, design-system, accessibility | +| Productivity | workflow, automation, git | + +## Tips for Effective Searches + +1. **Use specific keywords**: "react testing" is better than just "testing" +2. **Try alternative terms**: If "deploy" doesn't work, try "deployment" or "ci-cd" +3. **Check popular sources**: Many skills come from `vercel-labs/agent-skills` or `ComposioHQ/awesome-claude-skills` + +## When No Skills Are Found + +If no relevant skills exist: + +1. Acknowledge that no existing skill was found +2. Offer to help with the task directly using your general capabilities +3. Suggest the user could create their own skill with `npx skills init` + +Example: + +``` +I searched for skills related to "xyz" but didn't find any matches. +I can still help you with this task directly! Would you like me to proceed? + +If this is something you do often, you could create your own skill: +npx skills init my-xyz-skill +``` diff --git a/.eslintrc.js b/.eslintrc.js index c19fecd..4103977 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -29,5 +29,16 @@ module.exports = { 'eqeqeq': ['error', 'always'], 'prettier/prettier': 'error' }, - ignorePatterns: ['out', 'node_modules', '**/*.d.ts', 'webpack.config.js', '.eslintrc.js'] + ignorePatterns: [ + 'out', + 'node_modules', + 'coverage', + '**/*.d.ts', + 'webpack.config.js', + 'jest.config.js', + '.eslintrc.js', + '__tests__/**', + 'src/__tests__/**', + '**/*.test.ts', + ], }; diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 19a9cbc..1553c8e 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -1,5 +1,5 @@ name: 🐛 Bug Report -description: Report a bug on cursor whisper +description: Report a bug on promptimize title: '' labels: ['bug', 'bugfix', 'priority: high'] body: @@ -23,9 +23,14 @@ body: label: Which actions are affected? multiple: true options: - - Issue - - Pull Request - - Commits + - Audio recording / microphone + - Transcription (Whisper) + - Prompt optimization + - Text insertion (editor / chat) + - Configuration / setup wizard + - Configuration panel (webview) + - Keyboard shortcuts / commands + - Other - type: dropdown id: platforms @@ -36,6 +41,8 @@ body: - macOS - Windows - Linux + - VS Code + - Cursor - type: markdown attributes: @@ -68,8 +75,8 @@ body: - type: input attributes: - label: cursor whisper version - description: What version of cursor whisper is being used? + label: promptimize version + description: What version of promptimize is being used? placeholder: 'master' validations: required: true diff --git a/.github/ISSUE_TEMPLATE/chore_task.yml b/.github/ISSUE_TEMPLATE/chore_task.yml index c50e534..e12b95d 100644 --- a/.github/ISSUE_TEMPLATE/chore_task.yml +++ b/.github/ISSUE_TEMPLATE/chore_task.yml @@ -25,8 +25,10 @@ body: options: - CI/CD - Dependencies - - Code Refactoring - - Repository Configuration + - Code refactoring + - Tests + - Documentation maintenance + - Repository configuration - Other - type: markdown diff --git a/.github/ISSUE_TEMPLATE/doc_update.yml b/.github/ISSUE_TEMPLATE/doc_update.yml index a207b07..5e70087 100644 --- a/.github/ISSUE_TEMPLATE/doc_update.yml +++ b/.github/ISSUE_TEMPLATE/doc_update.yml @@ -24,9 +24,12 @@ body: multiple: false options: - README.md - - Wiki - - API Documentation - - Inline Code Comments + - Quick start (docs/quickstart.md) + - User guide (recording, shortcuts, troubleshooting) + - Configuration docs + - Architecture / ADRs + - Developer docs (testing, standards, release) + - Inline code comments - Other - type: markdown diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml index 8321746..17df8fa 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -1,5 +1,5 @@ name: ✨ Feature Request -description: Propose an idea or improvement for cursor whisper +description: Propose an idea or improvement for promptimize title: '' labels: ['enhancement', 'feature', 'priority: low'] body: @@ -23,9 +23,13 @@ body: label: What area does this improvement affect? multiple: false options: - - User Interface + - Audio recording / microphone + - Transcription (Whisper) + - Prompt optimization + - Text insertion (editor / chat) + - Configuration / setup + - User interface (status bar, webview, notifications) - Performance - - New Feature - Documentation - Other @@ -35,9 +39,13 @@ body: label: What actions does this improvement affect? multiple: true options: - - Issue - - Pull Request - - Commit + - Transcribe recording + - Promptimize recording + - Setup wizard + - Configure API key / provider + - Open configuration panel + - Test transformation + - Other - type: markdown attributes: @@ -92,9 +100,9 @@ body: - type: input attributes: - label: Version of cursor whisper + label: Version of promptimize description: | - What version of cursor whisper are you using, or does this proposal apply to all versions? + What version of promptimize are you using, or does this proposal apply to all versions? placeholder: 'master' validations: required: false diff --git a/.github/ISSUE_TEMPLATE/help_request.yml b/.github/ISSUE_TEMPLATE/help_request.yml index 9f9a3dd..4a48e64 100644 --- a/.github/ISSUE_TEMPLATE/help_request.yml +++ b/.github/ISSUE_TEMPLATE/help_request.yml @@ -23,8 +23,12 @@ body: label: What area do you need help with? multiple: false options: - - Code or implementation - - Build or setup + - Setup and configuration (API keys, wizard) + - Recording (microphone, permissions) + - Transcription (Whisper) + - Prompt optimization / providers + - Text insertion (editor / Cursor chat) + - Build from source / development - Documentation or usage - Workflow or CI/CD - Other diff --git a/.github/ISSUE_TEMPLATE/hotfix.yml b/.github/ISSUE_TEMPLATE/hotfix.yml index e16be8e..27070ac 100644 --- a/.github/ISSUE_TEMPLATE/hotfix.yml +++ b/.github/ISSUE_TEMPLATE/hotfix.yml @@ -1,6 +1,6 @@ name: 🔥 Hotfix Issue -description: Request a new hotfix for copilot (only team members) +description: Request a new hotfix for promptimize (only team members) title: '' labels: ['hotfix', 'branched', 'priority: high'] body: @@ -8,7 +8,7 @@ body: attributes: value: | ### ⚠️ Disclaimer - > **Only members of the copilot team can create hotfix issues.** + > **Only members of the promptimize team can create hotfix issues.** > Any hotfix issue created by someone outside the team will be closed automatically. --- diff --git a/.github/ISSUE_TEMPLATE/release.yml b/.github/ISSUE_TEMPLATE/release.yml index da4884d..9f41adf 100644 --- a/.github/ISSUE_TEMPLATE/release.yml +++ b/.github/ISSUE_TEMPLATE/release.yml @@ -1,5 +1,5 @@ name: 🚀 Release Issue -description: Request a new release for copilot (only team members) +description: Request a new release for promptimize (only team members) title: "" labels: ["release", "branched", "priority: medium"] body: @@ -7,7 +7,7 @@ body: attributes: value: | ### ⚠️ Disclaimer - > **Only members of the copilot team can create release issues.** + > **Only members of the promptimize team can create release issues.** > Any release issue created by someone outside the team will be closed automatically. --- diff --git a/.vscodeignore b/.vscodeignore index 13fcc93..f89e41e 100644 --- a/.vscodeignore +++ b/.vscodeignore @@ -9,8 +9,12 @@ webpack.config.js **/*.map **/*.ts !out/**/*.js -node_modules/** -!node_modules/@kstonekuan/** +!out/presentation/webview/** +node_modules/**/test/** +node_modules/**/tests/** +node_modules/**/*.md +node_modules/**/.*.yml +node_modules/**/.*.yaml coverage/** **/__tests__/** **/*.test.ts diff --git a/PROGRESS.md b/PROGRESS.md index 3469a17..da1faa0 100644 --- a/PROGRESS.md +++ b/PROGRESS.md @@ -8,13 +8,13 @@ ## Current Status -Cursor Whisper is a working VSCode/Cursor extension that records voice, transcribes with OpenAI Whisper, optionally transforms prompts with GPT-4, and inserts text into the active editor or chat. +Promptimize is a working VSCode/Cursor extension that records voice, transcribes with OpenAI Whisper, optionally transforms prompts with GPT-4, and inserts text into the active editor or chat. | Phase | Status | Progress | |-------|--------|----------| | Documentation | Complete | 100% | | MVP Implementation | Complete | 100% | -| Testing | In progress | Strategy defined; automated tests pending | +| Testing | In progress | 12+ automated tests; expanding coverage | | Publication | Pending | Not published | **Build**: Successful (`out/extension.js`, ~579 KB) @@ -38,11 +38,11 @@ Cursor Whisper is a working VSCode/Cursor extension that records voice, transcri | Category | Files | Location | |----------|-------|----------| -| ADRs | 13 + template | `docs/adr/` | -| Architecture, domain, flows, UX, etc. | 13 | `docs/*/` | +| ADRs | 14 + template | `docs/adr/` | +| User guides, architecture, ops | 12 | `docs/*/` | | Documentation index | 1 | `docs/README.md` | -Layer-specific implementation details live in `src/` with TypeScript types and JSDoc comments. See [`docs/api/README.md`](docs/api/README.md) and [`docs/application/ports.md`](docs/application/ports.md) for interface contracts. +Layer-specific implementation details live in `src/` with TypeScript types and JSDoc comments. Port interfaces: [`src/application/ports/`](src/application/ports/). --- @@ -50,10 +50,8 @@ Layer-specific implementation details live in `src/` with TypeScript types and J ### Phase 1 — Documentation -- Architecture overview and Clean Architecture guide -- 13 Architecture Decision Records (ADRs) -- Domain, application, flows, UX, security, testing, roadmap, deployment, and research docs -- API reference structure +- Architecture overview, configuration guide, and ADRs +- Flows, UX, security, testing, deployment, and research docs ### Phase 2 — MVP Implementation @@ -127,7 +125,7 @@ Layer-specific implementation details live in `src/` with TypeScript types and J | `audioQuality` | Yes | Loaded only — recorder always uses 16 kHz mono | | `maxRecordingDuration` | Yes | Loaded only — not enforced in `NativeAudioRecorder` | | `showNotifications` | Yes | Loaded only — commands always show notifications | -| `transcriptionHint` | Partial | Read-only; not writable via settings UI | +| `transcriptionHint` | Partial | Read/write via VS Code Settings; not in webview | These options are exposed in `package.json` and documented in README for forward compatibility. @@ -154,7 +152,7 @@ pnpm run compile 1. Open the project in VSCode/Cursor 2. Press `F5` to launch the Extension Development Host -3. Run **Cursor Whisper: Configure API Key** from the Command Palette +3. Run **Promptimize: Configure API Key** from the Command Palette 4. Press `Cmd/Ctrl+Alt+V` to start recording, speak, then stop 5. Wait for transcription and insertion into the active editor diff --git a/README.md b/README.md index e3008c5..a899a7b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Cursor Whisper +# Promptimize > **Transform your voice into optimized prompts with AI-powered speech-to-text** @@ -11,11 +11,41 @@ A professional VSCode/Cursor extension that captures audio from your microphone, --- +## Quick Start + +1. **Install** the extension (VSIX or Marketplace when available) +2. **Run Setup Wizard** — Command Palette → `Promptimize: Setup Wizard` +3. **Configure OpenAI API key** — Required for Whisper voice-to-text +4. **Optionally choose optimization provider** — OpenAI, Anthropic, Google, Azure, Ollama, OpenCode, OpenRouter, or Cursor +5. **Press `Cmd+Alt+V`** (Transcribe) or **`Cmd+Alt+P`** (Promptimize) and speak + +See the full [Quick Start Guide](docs/quickstart.md) and [Recording Modes](docs/user-guide/recording-modes.md). + +### Two Services, Clear Roles + +| Service | Provider | Required | Credentials | +| ----------------------- | -------------- | -------- | ------------------------- | +| **Transcription** | OpenAI Whisper | Yes | OpenAI API key | +| **Prompt optimization** | Your choice | No | Provider-specific API key | + +```mermaid +graph LR + Voice[Your Voice] --> Whisper[OpenAI Whisper
Transcription] + Whisper --> RawText[Raw Text] + RawText --> Choice{Optimization
Enabled?} + Choice -->|No| Editor[Insert to Editor] + Choice -->|Yes| Provider[Your Chosen Provider] + Provider --> OptimizedText[Optimized Prompt] + OptimizedText --> Editor +``` + +--- + ## 🎯 Vision **Eliminate the friction between thinking and coding.** -Developers often have complex architectural ideas, detailed requirements, or intricate technical explanations that are tedious to type but natural to speak. Cursor Whisper bridges this gap by: +Developers often have complex architectural ideas, detailed requirements, or intricate technical explanations that are tedious to type but natural to speak. Promptimize bridges this gap by: - **Capturing** your spoken thoughts in real-time - **Transcribing** them with high accuracy using OpenAI Whisper @@ -26,7 +56,8 @@ Developers often have complex architectural ideas, detailed requirements, or int ## 🔥 The Problem We Solve -### Before Cursor Whisper: +### Before Promptimize: + ``` 1. Think about complex architecture requirements 2. Struggle to type everything out @@ -35,7 +66,8 @@ Developers often have complex architectural ideas, detailed requirements, or int 5. LLM misunderstands due to poor formatting ``` -### With Cursor Whisper: +### With Promptimize: + ``` 1. Press Cmd+Alt+V 2. Speak naturally about your requirements @@ -48,29 +80,31 @@ Developers often have complex architectural ideas, detailed requirements, or int ## ✨ Features -### Current (v0.1.0-alpha) +### Current (v0.1.0) -- ✅ **One-Click Recording** - Status bar button or keyboard shortcut -- ✅ **High-Quality Transcription** - OpenAI Whisper API integration -- ✅ **Smart Insertion** - Automatically inserts into active editor -- ✅ **Visual Feedback** - Clear state indicators (idle, recording, processing) -- ✅ **Secure Configuration** - API keys stored securely in VSCode SecretStorage -- ✅ **Cross-Platform** - Works on macOS, Windows, and Linux +- ✅ **Two Recording Modes** — Transcribe (raw text) and Promptimize (optimized prompts) +- ✅ **One-Click Recording** — Dual status bar buttons or keyboard shortcuts +- ✅ **High-Quality Transcription** — OpenAI Whisper API integration +- ✅ **Prompt Transformation** — AI-powered optimization via 8 providers +- ✅ **Multiple AI Providers** — OpenAI, Anthropic, Google, Azure, Ollama, OpenCode, OpenRouter, and Cursor +- ✅ **Configuration Webview** — Interactive setup panel with provider comparison and system prompt editor +- ✅ **Smart Insertion** — Chat → editor → clipboard fallback chain +- ✅ **Visual Feedback** — Status bar states and progress notifications +- ✅ **Secure Configuration** — API keys stored in VSCode SecretStorage +- ✅ **Cross-Platform** — Works on macOS, Windows, and Linux ### Coming Soon -- 🔄 **Prompt Transformation** - AI-powered optimization of transcribed text -- 🔄 **Chat Integration** - Direct insertion into Cursor chat input -- 🔄 **Real-time Streaming** - See transcription as you speak -- 🔄 **Multi-language Support** - Auto-detect or manually configure language -- 🔄 **Custom Vocabulary** - Project-specific terms and acronyms -- 🔄 **Recording History** - Review and re-use past transcriptions +- 🔄 **Real-time Streaming** — See transcription as you speak +- 🔄 **Custom Vocabulary UI** — Project-specific terms in configuration webview +- 🔄 **Recording History** — Review and re-use past transcriptions +- 🔄 **Planned settings** — `audioQuality`, `maxRecordingDuration`, `showNotifications` (defined but not yet applied) --- ## 🏗️ Architecture -Cursor Whisper follows **Clean/Hexagonal Architecture** for maximum maintainability, testability, and scalability. +Promptimize follows **Clean/Hexagonal Architecture** for maximum maintainability, testability, and scalability. ``` ┌─────────────────────────────────────────────────────┐ @@ -101,17 +135,20 @@ See [`docs/architecture/`](docs/architecture/) for detailed architecture documen ## 🛠️ Technology Stack ### Core + - **TypeScript 5.4+** - Type-safe development - **VSCode Extension API 1.120+** - Extension foundation - **Node.js 22 LTS** - Runtime environment - **Webpack 5** - Bundling and optimization ### Integrations + - **OpenAI API** - Whisper for transcription, GPT-4 for prompt transformation - **@kstonekuan/audio-capture** - Native cross-platform microphone capture - **VSCode SecretStorage** - Secure credential management ### Quality + - **Jest** - Unit testing - **ESLint + Prettier** - Code quality and formatting - **Husky** - Git hooks for pre-commit checks @@ -124,7 +161,7 @@ See [`docs/architecture/`](docs/architecture/) for detailed architecture documen 1. Open VSCode/Cursor 2. Go to Extensions (`Cmd+Shift+X` / `Ctrl+Shift+X`) -3. Search for "Cursor Whisper" +3. Search for "Promptimize" 4. Click Install ### Manual Installation (Current) @@ -135,40 +172,86 @@ See [`docs/architecture/`](docs/architecture/) for detailed architecture documen 4. Click "..." menu → "Install from VSIX..." 5. Select the downloaded file +### Upgrading from Cursor Whisper + +The extension was renamed to **Promptimize** (`promptimize` publisher). If you previously installed `cursor-whisper`: + +1. Uninstall the old **Cursor Whisper** extension +2. Install `promptimize-*.vsix` (or the new Marketplace listing when available) +3. Re-enter API keys (SecretStorage keys changed to `promptimize.apiKey.*`) +4. Update `settings.json`: replace `cursorWhisper.*` with `promptimize.*` +5. Update custom keybindings that reference `cursor-whisper.*` commands + --- ## ⚙️ Configuration ### First-Time Setup -1. After installation, you'll be prompted to configure your OpenAI API Key -2. Click "Configure API Key" -3. Enter your API key (starts with `sk-`) -4. The extension will verify the key and save it securely +1. After installation, run **Promptimize: Setup Wizard** (opens automatically on first launch) +2. Enter your **OpenAI API key** — required for Whisper transcription +3. Choose whether to enable **prompt optimization** and select a provider +4. Provide provider credentials when prompted (Anthropic, Google, Azure, etc.) +5. Test your configuration with **Promptimize: Test Configuration** + +**Note:** Whisper transcription always uses OpenAI. Prompt optimization is optional and can use a different provider with its own API key. ### Manual Configuration -Open Settings (`Cmd+,` / `Ctrl+,`) and search for "Cursor Whisper": +Open Settings (`Cmd+,` / `Ctrl+,`) and search for "Promptimize": ```json { - "cursorWhisper.transcriptionLanguage": "en", - "cursorWhisper.enablePromptTransformation": true, - "cursorWhisper.audioQuality": "high", - "cursorWhisper.maxRecordingDuration": 120, - "cursorWhisper.showNotifications": true + "promptimize.transcriptionLanguage": "en", + "promptimize.enablePromptTransformation": true, + "promptimize.transformationProvider": "openai", + "promptimize.transformationModel": "gpt-4o", + "promptimize.audioQuality": "high", + "promptimize.maxRecordingDuration": 120, + "promptimize.showNotifications": true } ``` +### Transcription (Required — OpenAI Whisper) + +| Setting | Description | +| ----------------------- | ---------------------------------------------------------------------------------------------------- | +| OpenAI API key | Required for voice-to-text. Configure via **Setup Wizard** or **Configure OpenAI API Key (Whisper)** | +| `transcriptionLanguage` | Language for transcription (`en`, `es`, `auto`, etc.) | + +**Cost:** ~$0.006/minute of audio + +### Prompt Optimization (Optional) + +Prompt optimization converts transcribed speech into structured prompts. Choose a provider and supply credentials when required. + +| Setting | Description | +| ----------------------------------- | -------------------------------------------------------------------------------------- | +| `enablePromptTransformation` | Enable/disable optimization | +| `transformationProvider` | `openai`, `anthropic`, `google`, `azure`, `ollama`, `opencode`, `openrouter`, `cursor` | +| `transformationModel` | OpenAI model (when provider is `openai`) | +| `anthropicModel` | Claude model (when provider is `anthropic`) | +| `googleModel` | Gemini model (when provider is `google`) | +| `azureEndpoint` / `azureDeployment` | Azure OpenAI resource settings | +| `ollamaBaseUrl` / `ollamaModel` | Local Ollama server settings | +| `openCodeBaseUrl` / `openCodeModel` | Local OpenCode proxy settings | +| `openRouterModel` | OpenRouter model (when provider is `openrouter`) | +| `cursorModel` | Cursor model (when provider is `cursor`) | + +Use **Promptimize: Configure Prompt Optimization Provider** to set up interactively. See [`docs/configuration/`](docs/configuration/) for provider setup. + ### Configuration Options -| Setting | Type | Default | Description | -|---------|------|---------|-------------| -| `transcriptionLanguage` | string | `"en"` | Language for transcription (`en`, `es`, `fr`, `de`, `auto`) | -| `enablePromptTransformation` | boolean | `true` | Transform transcription into optimized prompts | -| `audioQuality` | string | `"high"` | Audio recording quality (`low`, `medium`, `high`) | -| `maxRecordingDuration` | number | `120` | Maximum recording duration in seconds | -| `showNotifications` | boolean | `true` | Show status notifications | +| Setting | Type | Default | Description | +| ---------------------------- | ------- | ---------- | ------------------------------------------------------------------------------------------------------------------------ | +| `transcriptionLanguage` | string | `"auto"` | Language for transcription (`en`, `es`, `fr`, `de`, `auto`) | +| `enablePromptTransformation` | boolean | `true` | Transform transcription into optimized prompts | +| `transformationProvider` | string | `"openai"` | LLM provider for transformation (`openai`, `anthropic`, `google`, `azure`, `ollama`, `opencode`, `openrouter`, `cursor`) | +| `transformationModel` | string | `"gpt-4o"` | OpenAI model for transformation | +| `transcriptionHint` | string | `""` | Optional Whisper vocabulary hint (Settings only) | +| `audioQuality` | string | `"high"` | Planned — not yet applied (always 16 kHz mono) | +| `maxRecordingDuration` | number | `120` | Planned — not yet applied | +| `showNotifications` | boolean | `true` | Planned — not yet applied | --- @@ -205,7 +288,7 @@ pnpm run compile 1. In the Extension Development Host window: - Open Command Palette (`Cmd/Ctrl+Shift+P`) - - Type: "Cursor Whisper: Configure API Key" + - Type: "Promptimize: Configure API Key" - Paste your OpenAI API key (starts with `sk-...`) - The key is securely stored in your system's Keychain/Credential Manager @@ -262,7 +345,7 @@ This will: 1. Install all platform-specific native binaries (`darwin-arm64`, `darwin-x64`, `linux-x64-gnu`, `win32-x64-msvc`) 2. Bundle them into the VSIX (~2.5MB total) -3. Create `cursor-whisper-X.X.X.vsix` +3. Create `promptimize-X.X.X.vsix` To verify all binaries are included: @@ -283,50 +366,71 @@ Expected output: ## 🚀 Usage +### Recording Modes + +Promptimize has two modes — see [Recording Modes](docs/user-guide/recording-modes.md) for full details. + +| Mode | Shortcut | Output | +| --------------- | ---------------- | --------------------------- | +| **Transcribe** | `Cmd/Ctrl+Alt+V` | Raw Whisper transcription | +| **Promptimize** | `Cmd/Ctrl+Alt+P` | Optimized structured prompt | + ### Quick Start 1. **Open your editor or Cursor chat** -2. **Press `Cmd+Alt+V` (macOS) or `Ctrl+Alt+V` (Windows/Linux)** +2. **Press `Cmd+Alt+V`** (Transcribe) or **`Cmd+Alt+P`** (Promptimize) 3. **Speak naturally about your requirements** -4. **Press the same shortcut or click the status bar to stop** -5. **Transcribed and optimized text appears automatically** +4. **Click the status bar** (Recording...) to stop +5. **Transcribed or optimized text appears automatically** + +### Status Bar -### Alternative: Status Bar Button +Three items appear in the status bar (right side): -Click the microphone icon (🎤) in the status bar: -- **Idle state**: Click to start recording -- **Recording**: Click to stop -- **Processing**: Wait for transcription +| Item | Idle | Recording | +| --------------- | ---------------------- | -------------------------------------- | +| **Transcribe** | $(mic) Transcribe | $(record) Recording... (click to stop) | +| **Promptimize** | $(sparkle) Promptimize | $(record) Recording... (click to stop) | +| **Settings** | $(gear) Settings | Available during recording | + +During processing, progress appears in **notifications** (Transcribing..., Optimizing..., Inserting...). ### Example Workflow **Spoken Input:** + > "I need to refactor the authentication service to support JWT tokens instead of sessions. We should maintain backward compatibility with existing session-based auth for 6 months. Also need unit tests for the new JWT validation logic and integration tests for the auth flow." **Optimized Output:** + ```markdown ## Refactor Authentication Service to JWT ### Context + - Current implementation: session-based authentication - Target implementation: JWT tokens ### Objectives + 1. Implement JWT token generation and validation 2. Maintain backward compatibility with session-based auth 3. Provide 6-month deprecation period for sessions ### Technical Requirements + - JWT library integration - Token validation middleware - Session-to-JWT migration path ### Testing Requirements + - Unit tests for JWT validation logic - Integration tests for complete auth flow - Backward compatibility tests for sessions ### Timeline + - 6-month deprecation period for session-based auth ``` @@ -336,24 +440,44 @@ Click the microphone icon (🎤) in the status bar: ### Visual States -The extension provides clear visual feedback through the status bar: +The status bar reflects recorder states; fine-grained progress (Transcribing, Optimizing) appears in notifications. + +| State | Status Bar | Description | +| -------------- | ------------------------------------------ | ---------------------------------- | +| **Idle** | $(mic) Transcribe / $(sparkle) Promptimize | Ready to record | +| **Recording** | $(record) Recording... | Actively recording (click to stop) | +| **Processing** | $(sync~spin) Processing... | Preparing audio after stop | +| **Error** | Error styling | Something went wrong | -| State | Icon | Description | -|-------|------|-------------| -| **Idle** | 🎤 Voice | Ready to record | -| **Recording** | 🔴 Recording... | Actively recording audio | -| **Processing** | ⏳ Transcribing... | Sending audio to Whisper API | -| **Transforming** | ⏳ Optimizing... | Enhancing prompt with GPT-4 | -| **Error** | ❌ Error | Something went wrong | +See [UX States](docs/ux/states.md) for the full state reference. ### Keyboard Shortcuts -| Shortcut | Action | -|----------|--------| -| `Cmd+Alt+V` / `Ctrl+Alt+V` | Toggle recording | -| `Cmd+Shift+P` → "Cursor Whisper: Start Recording" | Start recording | -| `Cmd+Shift+P` → "Cursor Whisper: Stop Recording" | Stop recording | -| `Cmd+Shift+P` → "Cursor Whisper: Configure API Key" | Configure API key | +| Shortcut | Action | +| -------------------------- | ---------------------------------- | +| `Cmd+Alt+V` / `Ctrl+Alt+V` | Start Transcribe recording | +| `Cmd+Alt+P` / `Ctrl+Alt+P` | Start Promptimize recording | +| `Escape` | Cancel recording (while recording) | + +Shortcuts **start** recording only — stop by clicking the status bar. See [Keyboard Shortcuts](docs/user-guide/keyboard-shortcuts.md). + +### Commands (Command Palette) + +| Command | Purpose | +| ----------------------------------------------------- | --------------------------------- | +| `Promptimize: Start Transcribe Recording` | Start raw transcription | +| `Promptimize: Stop Transcribe Recording` | Stop and process Transcribe | +| `Promptimize: Start Promptimize Recording` | Start optimized prompt | +| `Promptimize: Stop Promptimize Recording` | Stop and process Promptimize | +| `Promptimize: Cancel Recording` | Discard recording | +| `Promptimize: Open Configuration` | Configuration webview | +| `Promptimize: Configure OpenAI API Key (Whisper)` | Set Whisper API key | +| `Promptimize: Configure Prompt Optimization Provider` | Provider setup wizard | +| `Promptimize: Configure OpenAI Optimization Model` | Pick GPT model (OpenAI only) | +| `Promptimize: Test Configuration` | Test setup; opens results webview | +| `Promptimize: Setup Wizard` | Opens configuration panel | + +**Deprecated:** `(Deprecated) Start Recording` and `(Deprecated) Stop Recording` — use mode-specific commands instead. --- @@ -370,6 +494,7 @@ The extension provides clear visual feedback through the status bar: ### API Key Security Your OpenAI API key is: + 1. Stored in VSCode's secure credential storage (SecretStorage) 2. Never exposed in logs or error messages 3. Never sent anywhere except OpenAI's official API @@ -378,6 +503,7 @@ Your OpenAI API key is: ### Microphone Permissions The extension requests microphone access: + - **macOS**: System Settings → Privacy & Security → Microphone - **Windows**: Settings → Privacy → Microphone - **Linux**: System-dependent, usually automatic @@ -415,7 +541,7 @@ pnpm run watch ### Project Structure ``` -cursor-whisper/ +promptimize/ ├── src/ │ ├── application/ # Use cases and ports │ ├── domain/ # Business entities @@ -441,16 +567,12 @@ See [`docs/architecture/`](docs/architecture/) for detailed structure documentat ## 🧪 Testing -Automated tests are **pending** — see [`PROGRESS.md`](PROGRESS.md) and [`docs/testing/strategy.md`](docs/testing/strategy.md) for the focused test plan. +Automated tests cover use cases, transformers, and UI components — see [`docs/testing/strategy.md`](docs/testing/strategy.md). -### Run Tests (when implemented) +### Run Tests ```bash -# Unit tests -pnpm test - -# Watch mode -pnpm run test:watch +source scripts/ensure-node.sh && pnpm test ``` ### Test Strategy @@ -464,39 +586,44 @@ See [`docs/testing/strategy.md`](docs/testing/strategy.md) for critical test pri ## 📈 Roadmap -### v0.1.0 (Current - Alpha) -- ✅ Basic audio recording +### v0.1.0 (Current) + +- ✅ Dual recording modes (Transcribe + Promptimize) - ✅ Whisper transcription -- ✅ Editor insertion +- ✅ Prompt transformation (8 providers) +- ✅ Configuration webview +- ✅ Chat / editor / clipboard insertion - ✅ API key configuration -### v0.2.0 (Next - Beta) -- 🔄 GPT-4 prompt transformation -- 🔄 Transformation preview -- 🔄 Custom transformation styles +### v0.2.0 (Next) + +- 🔄 Apply planned settings (`audioQuality`, `maxRecordingDuration`, `showNotifications`) +- 🔄 Transformation preview before insert +- 🔄 Transcription language in configuration webview ### v0.3.0 -- 🔄 Cursor chat integration -- 🔄 Chat Participant API -- 🔄 Context-aware insertion + +- 🔄 Context-aware insertion improvements +- 🔄 Push-to-talk mode ### v0.4.0 + - 🔄 Real-time streaming transcription - 🔄 Recording history - 🔄 Edit before insert ### v0.5.0 -- 🔄 Multi-language auto-detection -- 🔄 Custom vocabulary + +- 🔄 Custom vocabulary UI - 🔄 Technical term correction ### v1.0.0 (Stable) + - 🔄 Full production release -- 🔄 Complete documentation - 🔄 Performance optimization - 🔄 Extensive testing -See [`docs/roadmap/`](docs/roadmap/) for detailed roadmap. +See [`PROGRESS.md`](PROGRESS.md) for current project status. --- @@ -541,17 +668,22 @@ We welcome contributions! See [`docs/standards/coding-conventions.md`](docs/stan ## 🐛 Troubleshooting +See the full [Troubleshooting Guide](docs/user-guide/troubleshooting.md) with decision trees. + ### Microphone not working **macOS:** + 1. Go to System Settings → Privacy & Security → Microphone 2. Ensure VSCode/Cursor is enabled **Windows:** + 1. Go to Settings → Privacy → Microphone 2. Ensure VSCode/Cursor has permission **Linux:** + - Permissions are usually automatic - Check `pavucontrol` if using PulseAudio @@ -570,13 +702,14 @@ We welcome contributions! See [`docs/standards/coding-conventions.md`](docs/stan ### Cursor Agents Window issues -Cursor Whisper works best in: +Promptimize works best in: + - **Classic Mode** (`cursor --classic`) - **Editor Window** ### Debug output and privacy -Transcriptions and optimized prompts are **never written to logs**. For troubleshooting, use the status bar, progress notifications, and error dialogs. Enable the **Cursor Whisper** output channel only for operational messages (timestamps, durations, error types)—not user speech content. +Transcriptions and optimized prompts are **never written to logs**. For troubleshooting, use the status bar, progress notifications, and error dialogs. Enable the **Promptimize** output channel only for operational messages (timestamps, durations, error types)—not user speech content. MIT License - see [LICENSE](LICENSE) file for details. @@ -601,10 +734,12 @@ MIT License - see [LICENSE](LICENSE) file for details. ## 🔗 Links - [Documentation](docs/) +- [Recording Modes](docs/user-guide/recording-modes.md) +- [Configuration Webview Guide](docs/configuration/webview-guide.md) - [Architecture Docs](docs/architecture/) -- [API Reference](docs/api/) +- [Configuration Guide](docs/configuration/) +- [Troubleshooting](docs/user-guide/troubleshooting.md) - [Project Progress](PROGRESS.md) -- [Roadmap](docs/roadmap/versions.md) --- diff --git a/__tests__/__mocks__/token-costs.ts b/__tests__/__mocks__/token-costs.ts new file mode 100644 index 0000000..b9c9884 --- /dev/null +++ b/__tests__/__mocks__/token-costs.ts @@ -0,0 +1,12 @@ +export class CostClient { + calculateCost = jest.fn(); + getModelPricing = jest.fn(); + getModelPricingOrNull = jest.fn(); + getProviderModels = jest.fn(); + listModels = jest.fn(); + getRawProviderData = jest.fn(); + getCachedDate = jest.fn(); + clearCache = jest.fn(); +} + +export class ClockMismatchError extends Error {} diff --git a/__tests__/setup.ts b/__tests__/setup.ts index 3c98520..7218f31 100644 --- a/__tests__/setup.ts +++ b/__tests__/setup.ts @@ -22,14 +22,14 @@ jest.mock('vscode', () => ({ createWebviewPanel: jest.fn(), }, commands: { - registerCommand: jest.fn((_, handler) => ({ + registerCommand: jest.fn((_, _handler) => ({ dispose: jest.fn(), })), executeCommand: jest.fn(), }, workspace: { getConfiguration: jest.fn(() => ({ - get: jest.fn((key, defaultValue) => defaultValue), + get: jest.fn((_key, defaultValue) => defaultValue), update: jest.fn(), has: jest.fn(() => true), })), @@ -39,6 +39,10 @@ jest.mock('vscode', () => ({ Left: 1, Right: 2, }, + ThemeColor: jest.fn((id: string) => ({ id })), + ProgressLocation: { + Notification: 15, + }, ViewColumn: { One: 1, Two: 2, diff --git a/cursor-whisper-0.1.0.vsix b/cursor-whisper-0.1.0.vsix index 9bd6e34..a7f1573 100644 Binary files a/cursor-whisper-0.1.0.vsix and b/cursor-whisper-0.1.0.vsix differ diff --git a/docs/README.md b/docs/README.md index 5f9d31d..6f536c2 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,289 +1,93 @@ -# Cursor Whisper Documentation +# Promptimize Documentation -Welcome to the comprehensive documentation for Cursor Whisper, a professional VSCode/Cursor extension for voice-to-prompt transformation. +Documentation for the Promptimize VSCode/Cursor extension. --- -## 📚 Documentation Structure +## Documentation Structure -This documentation is organized into the following sections: +### User guides -### [`architecture/`](architecture/) -Complete architectural documentation including: -- System overview and layers -- Component diagrams -- Dependency graphs -- Architectural patterns -- Design decisions +| Document | Purpose | +|----------|---------| +| [Quick Start](quickstart.md) | Install, first recording, troubleshooting | +| [Recording Modes](user-guide/recording-modes.md) | Transcribe vs Promptimize workflows | +| [Keyboard Shortcuts](user-guide/keyboard-shortcuts.md) | Keybindings and Command Palette reference | +| [Troubleshooting](user-guide/troubleshooting.md) | Decision trees for common issues | +| [Configuration Guide](configuration/README.md) | Whisper setup, optimization providers, settings | +| [Configuration Webview](configuration/webview-guide.md) | Interactive setup panel features | +| [Provider Selection](configuration/provider-selection.md) | When to use which provider | +| [Advanced Settings](configuration/advanced-settings.md) | Transcription hints, planned settings, test output | -### [`adr/`](adr/) -Architecture Decision Records (ADRs) documenting: -- Key technical decisions -- Rationale and context -- Alternatives considered -- Consequences and trade-offs +### Architecture & design -### [`api/`](api/) -API reference documentation for: -- Public interfaces -- Use cases -- Domain entities -- Infrastructure services -- Presentation components +| Document | Purpose | +|----------|---------| +| [Architecture Overview](architecture/overview.md) | Layers, components, data flow | +| [Architecture Decision Records](adr/) | Why key technical choices were made | +| [Complete Flow](flows/complete-flow.md) | End-to-end runtime behavior | -### [`application/`](application/) -Application layer documentation: -- Use case detailed specifications -- Port/interface definitions -- Data Transfer Objects (DTOs) -- Application workflows +### Operations -### [`domain/`](domain/) -Domain layer documentation: -- Entity definitions -- Value objects -- Business rules -- Domain events -- Error types +| Document | Purpose | +|----------|---------| +| [Testing Strategy](testing/strategy.md) | Test priorities and manual smoke checklist | +| [Release Process](deployment/release-process.md) | Build, package, and publish | +| [Coding Conventions](standards/coding-conventions.md) | Naming, structure, review checklist | -Infrastructure and presentation layer implementations are documented in [`src/`](../src/) (TypeScript types and JSDoc). See also [`api/README.md`](api/README.md) and [`application/ports.md`](application/ports.md) for interface contracts. +### Other -### [`flows/`](flows/) -Complete workflow documentation: -- User interaction flows -- System process flows -- Error handling flows -- Sequence diagrams +| Document | Purpose | +|----------|---------| +| [UX States](ux/states.md) | Status bar states, notifications, error copy | +| [Security & Privacy](security/privacy.md) | Data handling, API keys, threat model | +| [Technical Research](research/technical-investigation.md) | API constraints, Cursor compatibility findings | -### [`ux/`](ux/) -User experience documentation: -- UI states and transitions -- Interaction patterns -- Visual feedback -- Accessibility -- Performance targets - -### [`security/`](security/) -Security and privacy documentation: -- API key management -- Data handling policies -- Microphone permissions -- Privacy guarantees -- Threat model - -### [`testing/`](testing/) -Testing strategy and guidelines: -- Unit testing approach -- Integration testing -- E2E testing scenarios -- Test coverage targets -- Testing tools and setup - -### [`deployment/`](deployment/) -Deployment and distribution documentation: -- Build process -- Packaging for VSCode Marketplace -- CI/CD pipelines -- Release process -- Version management - -### [`roadmap/`](roadmap/) -Product roadmap and planning: -- MVP definition -- Version milestones -- Feature priorities -- Technical debt tracking -- Future enhancements - -### [`research/`](research/) -Technical research and investigations: -- Cursor compatibility findings -- VSCode API limitations -- Whisper API capabilities -- Audio processing techniques -- Performance benchmarks - -Cursor compatibility notes are covered in [`adr/0007-cursor-compatibility.md`](adr/0007-cursor-compatibility.md) and [`research/technical-investigation.md`](research/technical-investigation.md). - ---- - -## 🎯 Quick Navigation - -### For New Contributors -1. Start with [`architecture/overview.md`](architecture/overview.md) -2. Read [`architecture/clean-architecture.md`](architecture/clean-architecture.md) -3. Review [`adr/`](adr/) for key decisions -4. Check project status in [`PROGRESS.md`](../PROGRESS.md) - -### For Implementers -1. Read [`api/README.md`](api/README.md) for interface definitions -2. Check [`application/ports.md`](application/ports.md) for ports and DTOs -3. Review [`flows/complete-flow.md`](flows/complete-flow.md) for workflows -4. Consult [`testing/strategy.md`](testing/strategy.md) for testing approach -5. Browse [`src/`](../src/) for layer implementations - -### For Users -1. Read the main [`README.md`](../README.md) -2. Review [`ux/states.md`](ux/states.md) for UI states -3. See [`security/privacy.md`](security/privacy.md) for privacy details - -### For Maintainers -1. Review [`roadmap/versions.md`](roadmap/versions.md) -2. Check [`deployment/release-process.md`](deployment/release-process.md) -3. Monitor [`research/technical-investigation.md`](research/technical-investigation.md) for technical findings -4. Update [`adr/`](adr/) when making architectural changes - ---- - -## 📖 Documentation Principles - -This documentation follows these principles: - -### 1. **Completeness** -Every module, interface, and decision is documented with: -- Purpose and responsibilities -- Interfaces and contracts -- Examples and usage -- Edge cases and limitations - -### 2. **Clarity** -Documentation is: -- Written in clear, concise language -- Accompanied by diagrams where helpful -- Organized logically -- Easy to navigate - -### 3. **Maintainability** -Documentation is: -- Version-controlled alongside code -- Updated with code changes -- Reviewed in pull requests -- Living and evolving - -### 4. **Actionability** -Documentation provides: -- Clear next steps -- Code examples -- Decision frameworks -- Practical guidance - ---- - -## 🔄 Keeping Documentation Updated - -### When to Update Documentation - -- **Architecture changes**: Update `architecture/` and create ADR in `adr/` -- **New features**: Update `api/`, `flows/`, and `roadmap/` -- **API changes**: Update `api/` and relevant layer docs -- **Bug fixes**: Update [`README.md`](../README.md) troubleshooting if user-facing -- **Performance improvements**: Update [`research/technical-investigation.md`](research/technical-investigation.md) - -### Documentation Review Process - -1. Documentation changes should be part of every PR -2. Reviewers check both code and docs -3. Breaking changes require ADR -4. Major features require flow diagrams +Implementation details live in [`src/`](../src/) with TypeScript types and JSDoc. --- -## 🎨 Diagram Conventions - -We use **Mermaid** for all diagrams. Common diagram types: - -- **Sequence Diagrams**: For flows and interactions -- **Class Diagrams**: For domain models and relationships -- **Flowcharts**: For decision trees and processes -- **State Diagrams**: For UI states and transitions - -See [`architecture/overview.md`](architecture/overview.md) and [`flows/complete-flow.md`](flows/complete-flow.md) for diagram examples. - ---- +## Quick Navigation -## 📝 Writing Guidelines +### For users -### Code Examples +1. [Quick Start](quickstart.md) +2. [Recording Modes](user-guide/recording-modes.md) +3. [Configuration Webview](configuration/webview-guide.md) +4. [Configuration Guide](configuration/README.md) +5. [Troubleshooting](user-guide/troubleshooting.md) -- Use TypeScript with full type annotations -- Include imports for context -- Show realistic, production-ready code -- Comment non-obvious decisions +### For contributors -### Diagrams +1. [Architecture Overview](architecture/overview.md) +2. [ADRs](adr/) for key decisions +3. [Complete Flow](flows/complete-flow.md) +4. [Testing Strategy](testing/strategy.md) +5. Browse [`src/`](../src/) for implementation -- Keep diagrams focused on one concept -- Use consistent naming with codebase -- Include legends for symbols -- Export as Mermaid code +### For maintainers -### ADRs - -- Follow ADR template in `adr/template.md` -- Number sequentially: `0001-title.md` -- Include context, decision, and consequences -- Link to related ADRs +1. [Release Process](deployment/release-process.md) +2. [Project Progress](../PROGRESS.md) +3. Update [ADRs](adr/) when making architectural changes --- -## 🤝 Contributing to Documentation - -Documentation contributions are as valuable as code contributions! - -### How to Contribute +## Keeping Documentation Updated -1. **Identify gaps**: Missing or outdated docs -2. **Create issue**: Describe what needs documenting -3. **Write docs**: Follow structure and conventions -4. **Submit PR**: Documentation changes like code changes +| Change type | Update | +|-------------|--------| +| Architecture | `architecture/overview.md` + new ADR | +| User-facing features | `user-guide/`, `configuration/`, `quickstart.md`, root `README.md` | +| Bug fixes (user-facing) | `user-guide/troubleshooting.md`, root `README.md` | +| Release process | `deployment/release-process.md` | -### Documentation Standards - -- Use Markdown for all documentation -- Follow existing structure and organization -- Include diagrams for complex concepts -- Provide code examples where relevant -- Link related documentation -- Keep language clear and concise +Documentation changes belong in the same PR as code changes when behavior changes. --- -## 🔗 External Resources +## External Resources -### VSCode Extension Development - [VSCode Extension API](https://code.visualstudio.com/api) -- [Extension Guides](https://code.visualstudio.com/api/extension-guides/overview) -- [VSCode Extension Samples](https://github.com/microsoft/vscode-extension-samples) - -### OpenAI APIs -- [Whisper API Documentation](https://platform.openai.com/docs/guides/speech-to-text) -- [GPT-4 API Documentation](https://platform.openai.com/docs/guides/gpt) -- [OpenAI Node.js SDK](https://github.com/openai/openai-node) - -### Architecture Patterns -- [Clean Architecture by Robert C. Martin](https://blog.cleancoder.com/uncle-bob/2012/08/13/the-clean-architecture.html) -- [Hexagonal Architecture](https://herbertograca.com/2017/11/16/explicit-architecture-01-ddd-hexagonal-onion-clean-cqrs-how-i-put-it-all-together/) -- [Dependency Injection Patterns](https://martinfowler.com/articles/injection.html) - ---- - -## 📧 Documentation Questions? - -If you have questions about the documentation: - -1. Check existing documentation first -2. Search [GitHub Discussions](https://github.com/vypdev/cursor-whisper/discussions) -3. Create a new discussion with tag `documentation` -4. For specific issues, open a [GitHub Issue](https://github.com/vypdev/cursor-whisper/issues) - ---- - -## 📅 Last Updated - -This documentation index was last updated: **2026-05-23** - -Check individual document headers for specific update dates. - ---- - -**Happy documenting! 📚** +- [OpenAI Whisper API](https://platform.openai.com/docs/guides/speech-to-text) +- [Clean Architecture (Robert C. Martin)](https://blog.cleancoder.com/uncle-bob/2012/08/13/the-clean-architecture.html) diff --git a/docs/adr/0001-use-typescript.md b/docs/adr/0001-use-typescript.md index c20902f..f4ef979 100644 --- a/docs/adr/0001-use-typescript.md +++ b/docs/adr/0001-use-typescript.md @@ -12,7 +12,7 @@ ## Context -VSCode extensions can be written in JavaScript or TypeScript. We need to decide which language to use for Cursor Whisper. +VSCode extensions can be written in JavaScript or TypeScript. We need to decide which language to use for Promptimize. Key considerations: - **Type safety**: Complex business logic around audio, transcription, and transformations diff --git a/docs/adr/0002-clean-architecture.md b/docs/adr/0002-clean-architecture.md index 7dc39f0..acb64bd 100644 --- a/docs/adr/0002-clean-architecture.md +++ b/docs/adr/0002-clean-architecture.md @@ -12,7 +12,7 @@ ## Context -We need to decide on the architectural pattern for Cursor Whisper. The extension has significant complexity: +We need to decide on the architectural pattern for Promptimize. The extension has significant complexity: - **Multiple external integrations**: OpenAI Whisper, GPT-4, VSCode APIs, Audio APIs - **Complex business logic**: Recording state management, transcription, transformation @@ -182,7 +182,7 @@ Application Layer (StartRecordingUseCase) ↓ depends on Application Layer Port (IAudioRecorder interface) ↑ implemented by -Infrastructure Layer (WebviewAudioRecorder) +Infrastructure Layer (NativeAudioRecorder) ``` ### Port/Adapter Pattern @@ -195,13 +195,7 @@ export interface IAudioRecorder { } // Infrastructure layer provides adapter (implementation) -export class WebviewAudioRecorder implements IAudioRecorder { - async startRecording(): Promise { /* ... */ } - async stopRecording(): Promise { /* ... */ } -} - -// Alternative implementation can be swapped -export class NodeAudioRecorder implements IAudioRecorder { +export class NativeAudioRecorder implements IAudioRecorder { async startRecording(): Promise { /* ... */ } async stopRecording(): Promise { /* ... */ } } diff --git a/docs/adr/0004-dependency-injection.md b/docs/adr/0004-dependency-injection.md index fdf4527..a19787b 100644 --- a/docs/adr/0004-dependency-injection.md +++ b/docs/adr/0004-dependency-injection.md @@ -80,7 +80,7 @@ export function activate(context: vscode.ExtensionContext) { // Register command context.subscriptions.push( vscode.commands.registerCommand( - 'cursor-whisper.startRecording', + 'promptimize.startRecording', () => startRecordingUseCase.execute() ) ); diff --git a/docs/adr/0005-webview-audio-recording.md b/docs/adr/0005-webview-audio-recording.md index db823af..6a9af37 100644 --- a/docs/adr/0005-webview-audio-recording.md +++ b/docs/adr/0005-webview-audio-recording.md @@ -220,7 +220,7 @@ export class RecordingPanel { } const panel = vscode.window.createWebviewPanel( - 'cursorWhisperRecording', + 'promptimizeRecording', 'Voice Recording', vscode.ViewColumn.Beside, { enableScripts: true } diff --git a/docs/adr/0007-cursor-compatibility.md b/docs/adr/0007-cursor-compatibility.md index e6c1f39..86d4699 100644 --- a/docs/adr/0007-cursor-compatibility.md +++ b/docs/adr/0007-cursor-compatibility.md @@ -62,7 +62,7 @@ export class CursorCompatibilityChecker { static async showCompatibilityWarning(): Promise { if (await this.isAgentsWindow()) { const selection = await vscode.window.showWarningMessage( - 'Cursor Whisper works best in Classic Mode or Editor Window. ' + + 'Promptimize works best in Classic Mode or Editor Window. ' + 'The Agents Window has limited extension support.', 'Open in Editor Window', 'Continue Anyway' @@ -117,7 +117,7 @@ export class CursorCompatibilityChecker { - **Cons**: - Misses primary use case (Cursor chat integration) - Less compelling value proposition - - Name "Cursor Whisper" is misleading + - Former name "Cursor Whisper" was misleading - Cursor users are target audience - **Why not chosen**: Cursor integration is a key differentiator @@ -216,7 +216,7 @@ From README.md: ```markdown ## Compatibility -Cursor Whisper works best in: +Promptimize works best in: - ✅ **Cursor Classic Mode** (`cursor --classic`) - ✅ **Cursor Editor Window** - ✅ **Standard VSCode** diff --git a/docs/adr/0008-secret-storage.md b/docs/adr/0008-secret-storage.md index 61bc48c..483908c 100644 --- a/docs/adr/0008-secret-storage.md +++ b/docs/adr/0008-secret-storage.md @@ -50,7 +50,7 @@ Key aspects: ```typescript export class SecretStorage { - private static readonly API_KEY_KEY = 'cursor-whisper.openai.apiKey'; + private static readonly API_KEY_KEY = 'promptimize.openai.apiKey'; constructor(private context: vscode.ExtensionContext) {} diff --git a/docs/adr/0009-no-persistent-audio.md b/docs/adr/0009-no-persistent-audio.md index d4cc33b..e6a9ead 100644 --- a/docs/adr/0009-no-persistent-audio.md +++ b/docs/adr/0009-no-persistent-audio.md @@ -247,7 +247,7 @@ Add to [`docs/security/privacy.md`](../security/privacy.md): ### No Persistent Storage -Cursor Whisper does **NOT** store your audio recordings: +Promptimize does **NOT** store your audio recordings: - Audio exists only in memory during processing - Never written to disk diff --git a/docs/adr/0014-multiple-transformation-providers.md b/docs/adr/0014-multiple-transformation-providers.md new file mode 100644 index 0000000..415014a --- /dev/null +++ b/docs/adr/0014-multiple-transformation-providers.md @@ -0,0 +1,111 @@ +# ADR-0014: Multiple Transformation Providers + +**Status**: Accepted + +**Date**: 2026-05-23 + +**Deciders**: Core Team + +**Related**: [ADR-0011](0011-gpt4-transformation.md), [ADR-0003](0003-openai-whisper.md) + +--- + +## Context + +Promptimize originally used OpenAI GPT models exclusively for prompt transformation after Whisper transcription. Users requested flexibility to choose alternative LLM providers based on cost, privacy, availability, or organizational requirements. + +Requirements: + +- Support multiple LLM providers for prompt transformation +- Keep OpenAI Whisper as the sole transcription provider +- Maintain backward compatibility with existing OpenAI-only configurations +- Follow Clean Architecture with swappable adapters behind `IPromptTransformer` + +--- + +## Decision + +**We will support multiple transformation providers via a factory pattern:** + +| Provider | Use Case | +|----------|----------| +| OpenAI | Default; same API key as Whisper | +| Anthropic | Claude models for high-quality structuring | +| Google Gemini | Cost-effective cloud alternative | +| Azure OpenAI | Enterprise deployments on Azure | +| Ollama | Local/offline inference | +| OpenCode | Local multi-provider gateway via opencode-llm-proxy | +| OpenRouter | Cloud gateway to 200+ models with one API key | +| Cursor | Native Cursor AI models via Cursor SDK | + +Key aspects: + +- `PromptTransformerFactory` resolves the active provider from configuration +- Provider-specific API keys stored in VSCode SecretStorage (`promptimize.apiKey.{provider}`) +- Provider selection via settings (`promptimize.transformationProvider`) and command palette +- Shared system prompt and improvement heuristics across providers +- No automatic fallback to another provider on failure (user must opt in via settings in future) + +--- + +## Alternatives Considered + +### Alternative 1: OpenAI Only + +- **Pros**: Simplest, single API key, consistent quality +- **Cons**: Vendor lock-in, no local option, limits user choice +- **Why not chosen**: Does not address user request (Issue #1) + +### Alternative 2: Unified API Gateway (LiteLLM, etc.) + +- **Pros**: Single integration point for many providers +- **Cons**: Additional dependency, harder to debug, overkill for VSCode extension +- **Why not chosen**: Direct SDK integrations are clearer and more maintainable + +### Alternative 3: Multiple Transcription Providers + +- **Pros**: Full provider flexibility +- **Cons**: Out of scope for Issue #1; Whisper quality is sufficient +- **Why not chosen**: Explicitly deferred; transcription stays on Whisper + +--- + +## Consequences + +### Positive + +- Users can choose providers matching their budget and privacy needs +- Clean Architecture ports already supported this extension +- Ollama enables fully local transformation (no cloud for optimization step) +- Provider-specific keys allow easy switching without reconfiguration + +### Negative + +- Increased codebase complexity (8 provider adapters) +- More configuration surface area for users +- Quality and latency vary by provider +- Additional SDK dependencies to maintain + +### Risks + +- **Provider API changes**: Pin SDK versions, add unit tests with mocks +- **Configuration confusion**: Mitigated by `Configure Transformation Provider` command and docs +- **Azure setup complexity**: Document endpoint/deployment requirements clearly + +--- + +## Implementation Notes + +- Factory: `src/infrastructure/transformation/PromptTransformerFactory.ts` +- Value object: `src/domain/value-objects/TransformationProvider.ts` +- Config: `promptimize.transformationProvider` and provider-specific model settings +- OpenCode and OpenRouter use the OpenAI SDK with custom `baseURL` (OpenAI-compatible APIs) +- Cursor uses `@cursor/sdk` with `Agent.prompt()` for one-shot transformations +- Commands: `promptimize.configureTransformationProvider`, `promptimize.testTransformation` + +--- + +## References + +- [GitHub Issue #1](https://github.com/vypdev/cursor-whisper/issues/1) +- [Configuration guide](../configuration/README.md) diff --git a/docs/adr/README.md b/docs/adr/README.md index acc664e..c801dd5 100644 --- a/docs/adr/README.md +++ b/docs/adr/README.md @@ -1,6 +1,6 @@ # Architecture Decision Records (ADRs) -This directory contains Architecture Decision Records for Cursor Whisper. +This directory contains Architecture Decision Records for Promptimize. ## What is an ADR? @@ -40,6 +40,7 @@ Each ADR follows this structure: - [ADR-0011](0011-gpt4-transformation.md) - Use GPT-4 for Prompt Transformation - [ADR-0012](0012-mono-audio-16khz.md) - Use Mono Audio at 16kHz Sample Rate - [ADR-0013](0013-native-audio-capture.md) - Use Native Audio Capture with @kstonekuan/audio-capture +- [ADR-0014](0014-multiple-transformation-providers.md) - Support Multiple Transformation Providers ### Proposed ADRs diff --git a/docs/api/README.md b/docs/api/README.md deleted file mode 100644 index 0f8bce2..0000000 --- a/docs/api/README.md +++ /dev/null @@ -1,508 +0,0 @@ -# API Reference - -**Last Updated**: 2026-05-23 - ---- - -## Overview - -This section contains detailed API documentation for all modules in Cursor Whisper. - ---- - -## Structure - -``` -docs/api/ -├── README.md (this file) -├── domain/ -│ ├── entities.md # Recording, Transcription, Prompt -│ ├── value-objects.md # AudioData, AudioFormat, RecordingState -│ └── errors.md # All domain errors -│ -├── application/ -│ ├── use-cases.md # All use cases -│ ├── ports.md # All interface definitions -│ └── dto.md # Data transfer objects -│ -├── infrastructure/ -│ ├── audio/ -│ │ └── NativeAudioRecorder (see src/infrastructure/audio/) -│ ├── transcription/ -│ │ └── OpenAIWhisperService.md -│ ├── transformation/ -│ │ └── OpenAIPromptTransformer.md -│ ├── insertion/ -│ │ ├── ChatParticipantInserter.md -│ │ ├── EditorTextInserter.md -│ │ └── FallbackTextInserter.md -│ └── configuration/ -│ └── VSCodeConfigRepository.md -│ -└── presentation/ - ├── commands.md # All command handlers - ├── status-bar.md # Status bar item - └── status-bar.md # Status bar item -``` - -> **Note**: Layer implementation details live in [`src/`](../../src/). The tree above is the documentation target structure; not every file exists yet. - ---- - -## Quick Reference - -### Domain Layer - -#### Entities - -| Class | Purpose | Key Methods | -|-------|---------|-------------| -| `Recording` | Audio recording session (not yet wired in pipeline) | `isLongRecording()`, `getFileSizeInMB()` | -| `Transcription` | Transcription result | `hasLowConfidence()`, `getWordCount()` | -| `Prompt` | Transformed prompt | `wasTransformed()`, `getCompressionRatio()` | - -#### Value Objects - -| Class | Purpose | Key Methods | -|-------|---------|-------------| -| `AudioData` | Audio binary data | `getSizeInMB()`, `getDurationInSeconds()` | -| `AudioFormat` | Audio format enum | - | -| `RecordingState` | State enum | - | -| `ApiKey` | Secure API key | `getMasked()`, `equals()` | - -#### Errors - -| Error | Parent | When Thrown | -|-------|--------|-------------| -| `RecordingError` | `Error` | Recording failures | -| `TranscriptionError` | `Error` | Transcription failures | -| `ValidationError` | `Error` | Validation failures | -| `ConfigError` | `Error` | Configuration issues | -| `PermissionError` | `Error` | Permission denied | - ---- - -### Application Layer - -#### Use Cases - -| Use Case | Purpose | Dependencies | -|----------|---------|--------------| -| `StartRecordingUseCase` | Start audio recording | `IAudioRecorder`, `IConfigRepository` | -| `StopRecordingUseCase` | Stop recording & process | `IAudioRecorder`, `ITranscriptionService` | -| `TranscribeAudioUseCase` | Transcribe audio to text | `ITranscriptionService` | -| `TransformPromptUseCase` | Transform text to prompt | `IPromptTransformer` | -| `InsertTextUseCase` | Insert text to target | `ITextInserter[]` | - -#### Ports (Interfaces) - -| Port | Purpose | Implementations | -|------|---------|----------------| -| `IAudioRecorder` | Audio recording | `NativeAudioRecorder` (primary), `WebviewAudioRecorder` (deprecated) | -| `ITranscriptionService` | Speech-to-text | `OpenAIWhisperService` | -| `IPromptTransformer` | Prompt optimization | `OpenAIPromptTransformer` | -| `ITextInserter` | Text insertion | `ChatParticipantInserter`, `EditorTextInserter`, `FallbackTextInserter` | -| `IConfigRepository` | Configuration | `VSCodeConfigRepository` | -| `ILogger` | Logging | `VSCodeOutputChannelLogger` (production), `ConsoleLogger` (unused) | - ---- - -### Infrastructure Layer - -#### Audio - -| Class | Purpose | External Dependencies | -|-------|---------|---------------------| -| `NativeAudioRecorder` | Native PCM capture in extension host | `@kstonekuan/audio-capture` | -| `WebviewAudioRecorder` | Deprecated browser-based recording | MediaRecorder API (not wired) | - -#### External Services - -| Class | Purpose | External Dependencies | -|-------|---------|---------------------| -| `OpenAIWhisperService` | Whisper API integration | OpenAI SDK | -| `OpenAIPromptTransformer` | GPT-4 API integration | OpenAI SDK | - -#### Text Insertion - -| Class | Purpose | Priority | -|-------|---------|----------| -| `ChatParticipantInserter` | Insert into Cursor chat | 1 (highest) | -| `EditorTextInserter` | Insert into active editor | 2 | -| `FallbackTextInserter` | Copy to clipboard | 3 (lowest) | - ---- - -### Presentation Layer - -#### Commands - -| Command ID | Handler | Purpose | -|-----------|---------|---------| -| `cursor-whisper.startRecording` | `StartRecordingCommand` | Start recording | -| `cursor-whisper.stopRecording` | `StopRecordingCommand` | Stop recording | -| `cursor-whisper.cancelRecording` | `CancelRecordingCommand` | Cancel recording | -| `cursor-whisper.configureApiKey` | `ConfigureApiKeyCommand` | Set API key | - -#### UI Components - -| Component | Purpose | -|-----------|---------| -| `RecordingStatusBarItem` | Status bar indicator | -| `RecordingWebview` | Optional recording panel | -| `MicrophoneButton` | Main recording button | - ---- - -## Usage Examples - -### Recording Workflow - -```typescript -// 1. Start recording -const startUseCase = new StartRecordingUseCase( - audioRecorder, - configRepository, - logger -); - -await startUseCase.execute(); - -// 2. Stop recording and transcribe -const stopUseCase = new StopRecordingUseCase( - audioRecorder, - transcriptionService, - logger -); - -const transcription = await stopUseCase.execute(); - -// 3. Transform prompt -const transformUseCase = new TransformPromptUseCase( - promptTransformer, - logger -); - -const prompt = await transformUseCase.execute(transcription.text); - -// 4. Insert text -const insertUseCase = new InsertTextUseCase( - [chatInserter, editorInserter, fallbackInserter], - logger -); - -await insertUseCase.execute(prompt.transformedText); -``` - -### Direct Adapter Usage - -```typescript -// Audio recording -const recorder = new WebviewAudioRecorder( - permissionManager, - logger -); - -await recorder.startRecording(); -const audioData = await recorder.stopRecording(); - -// Transcription -const whisperService = new OpenAIWhisperService( - secretStorage, - logger -); - -const result = await whisperService.transcribe(audioData, { - language: 'en', - prompt: 'Technical programming terminology' -}); - -console.log(result.text); -``` - ---- - -## Type Definitions - -### Common Types - -```typescript -// Configuration -interface Config { - apiKey?: string; - transcriptionLanguage: string; - enablePromptTransformation: boolean; - audioQuality: 'low' | 'medium' | 'high'; - maxRecordingDuration: number; - showNotifications: boolean; -} - -// Transcription Options -interface TranscriptionOptions { - language?: string; - prompt?: string; - temperature?: number; -} - -// Transcription Result -interface TranscriptionResult { - text: string; - language: string; - duration: number; - confidence?: number; - metadata?: Record; -} - -// Transformed Prompt -interface TransformedPrompt { - originalText: string; - transformedText: string; - improvements: string[]; - sections?: { - context?: string; - objectives?: string[]; - requirements?: string[]; - constraints?: string[]; - }; -} - -// Prompt Context -interface PromptContext { - editorLanguage?: string; - projectType?: string; - previousPrompts?: string[]; -} -``` - ---- - -## Error Handling - -### Error Hierarchy - -``` -Error -├── RecordingError -│ ├── InvalidRecordingError -│ └── RecordingTimeoutError -├── TranscriptionError -│ ├── TranscriptionTimeoutError -│ └── AudioTooLargeError -├── ValidationError -├── ConfigError -│ ├── MissingApiKeyError -│ └── InvalidConfigError -└── PermissionError -``` - -### Error Response Pattern - -```typescript -try { - await useCase.execute(); -} catch (error) { - if (error instanceof ConfigError) { - // Handle configuration error - showConfigDialog(); - } else if (error instanceof PermissionError) { - // Handle permission error - showPermissionInstructions(); - } else if (error instanceof RecordingError) { - // Handle recording error - showRetryOption(); - } else { - // Handle unexpected error - logger.error('Unexpected error', error); - throw error; - } -} -``` - ---- - -## Events and State - -### Recording State Machine - -```typescript -enum RecordingState { - IDLE = 'idle', - RECORDING = 'recording', - PROCESSING = 'processing', - TRANSCRIBING = 'transcribing', - TRANSFORMING = 'transforming', - INSERTING = 'inserting', - COMPLETED = 'completed', - ERROR = 'error', - CANCELLED = 'cancelled' -} -``` - -### State Change Events - -```typescript -interface StateChangeEvent { - previousState: RecordingState; - currentState: RecordingState; - timestamp: Date; - error?: Error; -} - -// Subscribe to state changes -audioRecorder.onStateChange((state: RecordingState) => { - updateUI(state); -}); -``` - ---- - -## Configuration Schema - -### VSCode Settings - -```json -{ - "cursorWhisper.transcriptionLanguage": { - "type": "string", - "default": "auto", - "enum": ["auto", "en", "es", "fr", "de", "it", "pt", "ja", "ko", "zh"], - "description": "Language for transcription" - }, - "cursorWhisper.enablePromptTransformation": { - "type": "boolean", - "default": true, - "description": "Enable AI-powered prompt transformation" - }, - "cursorWhisper.audioQuality": { - "type": "string", - "enum": ["low", "medium", "high"], - "default": "high", - "description": "Audio recording quality" - }, - "cursorWhisper.maxRecordingDuration": { - "type": "number", - "default": 120, - "minimum": 10, - "maximum": 300, - "description": "Maximum recording duration in seconds" - }, - "cursorWhisper.showNotifications": { - "type": "boolean", - "default": true, - "description": "Show status notifications" - } -} -``` - ---- - -## Extension Points - -### Adding New Adapters - -**Example: Add Google Speech-to-Text** - -```typescript -// 1. Implement the port -export class GoogleSpeechService implements ITranscriptionService { - async transcribe( - audio: AudioData, - options?: TranscriptionOptions - ): Promise { - // Google Speech API implementation - } - - validateAudioFile(audio: AudioData): boolean { - // Validation logic - } -} - -// 2. Register in composition root -const transcriptionService = config.provider === 'google' - ? new GoogleSpeechService(credentials, logger) - : new OpenAIWhisperService(secretStorage, logger); - -// 3. Use in use cases (no changes needed!) -const useCase = new TranscribeAudioUseCase( - transcriptionService, - configRepository, - logger -); -``` - ---- - -## Performance Considerations - -### Async Operations - -All I/O operations are async: -- `startRecording()`: ~500ms -- `stopRecording()`: ~1-2s (audio conversion) -- `transcribe()`: ~3-8s (API call) -- `transform()`: ~2-4s (API call) -- `insert()`: ~100ms - -### Memory Management - -- Audio data cleaned up immediately after transcription -- No persistent audio storage -- Webview disposed when not needed -- Event listeners unregistered on disposal - ---- - -## Testing Guidelines - -### Mocking Ports - -```typescript -// Mock audio recorder -const mockRecorder: jest.Mocked = { - startRecording: jest.fn().mockResolvedValue(undefined), - stopRecording: jest.fn().mockResolvedValue(mockAudioData), - cancelRecording: jest.fn(), - isRecording: jest.fn().mockReturnValue(false), - getState: jest.fn().mockReturnValue(RecordingState.IDLE), - onStateChange: jest.fn() -}; - -// Use in tests -const useCase = new StartRecordingUseCase( - mockRecorder, - mockConfig, - mockLogger -); - -await useCase.execute(); - -expect(mockRecorder.startRecording).toHaveBeenCalled(); -``` - ---- - -## Versioning - -**Current API Version**: v0.1.0 - -**Compatibility**: -- Semantic versioning (MAJOR.MINOR.PATCH) -- Breaking changes increment MAJOR -- New features increment MINOR -- Bug fixes increment PATCH - ---- - -## Support - -For detailed documentation on specific modules, see: -- [Domain Layer](../domain/README.md) -- [Application Ports](../application/ports.md) -- [Source: Infrastructure](../../src/infrastructure/) -- [Source: Presentation](../../src/presentation/) - ---- - -**Last Updated**: 2026-05-23 -**API Version**: 0.1.0 -**VSCode API**: 1.120+ diff --git a/docs/application/ports.md b/docs/application/ports.md deleted file mode 100644 index 71e8489..0000000 --- a/docs/application/ports.md +++ /dev/null @@ -1,460 +0,0 @@ -# Application Layer - Ports & Interfaces - -**Last Updated**: 2026-05-23 - ---- - -## Overview - -This document defines all **ports** (interfaces) in the Application Layer. These are contracts that infrastructure adapters must implement. - -**Location**: `src/application/ports/` - ---- - -## Audio Recording - -### IAudioRecorder - -**Purpose**: Contract for audio recording implementations. - -**File**: `src/application/ports/IAudioRecorder.ts` - -```typescript -import { AudioData } from '../../domain/value-objects/AudioData'; -import { RecordingState } from '../../domain/value-objects/RecordingState'; - -/** - * Port for audio recording functionality. - * - * Implementations: - * - NativeAudioRecorder (primary): Uses @kstonekuan/audio-capture in the extension host - * - WebviewAudioRecorder (deprecated): Uses browser MediaRecorder — not wired in extension.ts - */ -export interface IAudioRecorder { - /** - * Start recording audio from microphone. - * - * @throws PermissionError if microphone access denied - * @throws RecordingError if recording fails to start - */ - startRecording(): Promise; - - /** - * Stop recording and return audio data. - * - * @returns AudioData object with recorded audio - * @throws RecordingError if no active recording - */ - stopRecording(): Promise; - - /** - * Cancel current recording without returning data. - * Cleans up resources immediately. - */ - cancelRecording(): void; - - /** - * Check if currently recording. - * - * @returns true if recording is active - */ - isRecording(): boolean; - - /** - * Get current recording state. - * - * @returns Current RecordingState - */ - getState(): RecordingState; - - /** - * Register callback for state changes. - * - * @param callback Function called when state changes - */ - onStateChange(callback: (state: RecordingState) => void): void; -} -``` - ---- - -## Transcription - -### ITranscriptionService - -**Purpose**: Contract for audio-to-text transcription. - -**File**: `src/application/ports/ITranscriptionService.ts` - -```typescript -import { AudioData } from '../../domain/value-objects/AudioData'; -import { TranscriptionResult } from '../dto/TranscriptionResult'; - -export interface TranscriptionOptions { - /** - * Language of the audio (ISO 639-1 code). - * If undefined, service will auto-detect. - */ - language?: string; - - /** - * Optional prompt to guide transcription. - * Useful for technical terminology. - */ - prompt?: string; - - /** - * Temperature for sampling (0.0 - 1.0). - * Lower = more deterministic. - */ - temperature?: number; -} - -/** - * Port for audio transcription functionality. - * - * Implementations: - * - OpenAIWhisperService (primary): Uses OpenAI Whisper API - * - GoogleSpeechService (future): Uses Google Cloud Speech-to-Text - */ -export interface ITranscriptionService { - /** - * Transcribe audio to text. - * - * @param audio Audio data to transcribe - * @param options Optional transcription options - * @returns Transcription result with text - * @throws TranscriptionError if transcription fails - * @throws AudioTooLargeError if audio exceeds size limit - */ - transcribe( - audio: AudioData, - options?: TranscriptionOptions - ): Promise; - - /** - * Validate audio file meets service requirements. - * - * @param audio Audio data to validate - * @returns true if valid, throws error otherwise - * @throws ValidationError if audio is invalid - */ - validateAudioFile(audio: AudioData): boolean; -} -``` - ---- - -## Prompt Transformation - -### IPromptTransformer - -**Purpose**: Contract for transforming transcription into optimized prompts. - -**File**: `src/application/ports/IPromptTransformer.ts` - -```typescript -import { TransformedPrompt } from '../dto/TransformedPrompt'; - -export interface PromptContext { - /** - * Programming language of active editor. - */ - editorLanguage?: string; - - /** - * Detected project type (Node.js, Python, etc.). - */ - projectType?: string; - - /** - * Previous prompts for context (future). - */ - previousPrompts?: string[]; -} - -/** - * Port for prompt transformation functionality. - * - * Implementations: - * - OpenAIPromptTransformer (primary): Uses GPT-4 - * - RuleBasedTransformer (future): Uses regex/NLP rules - */ -export interface IPromptTransformer { - /** - * Transform raw transcription into structured prompt. - * - * @param transcription Raw transcription text - * @param context Optional context for transformation - * @returns Transformed prompt with improvements - * @throws TransformationError if transformation fails - */ - transform( - transcription: string, - context?: PromptContext - ): Promise; -} -``` - ---- - -## Text Insertion - -### ITextInserter - -**Purpose**: Contract for inserting text into various targets. - -**File**: `src/application/ports/ITextInserter.ts` - -```typescript -/** - * Port for text insertion functionality. - * - * Implementations (in priority order): - * - ChatParticipantInserter: Insert into Cursor chat - * - EditorTextInserter: Insert into active editor - * - FallbackTextInserter: Copy to clipboard + notify - */ -export interface ITextInserter { - /** - * Check if this inserter can handle current context. - * - * @returns true if can insert in current context - */ - canInsert(): boolean; - - /** - * Insert text using this strategy. - * - * @param text Text to insert - * @returns true if successful - * @throws InsertionError if insertion fails - */ - insert(text: string): Promise; - - /** - * Get priority of this inserter. - * Higher priority inserters are tried first. - * - * @returns Priority number (1 = highest) - */ - getPriority(): number; -} -``` - ---- - -## Configuration - -### IConfigRepository - -**Purpose**: Contract for configuration management. - -**File**: `src/application/ports/IConfigRepository.ts` - -```typescript -export interface Config { - /** - * OpenAI API key (stored separately in SecretStorage). - */ - apiKey?: string; - - /** - * Language for transcription (ISO 639-1 code or 'auto'). - */ - transcriptionLanguage: string; - - /** - * Enable prompt transformation via GPT-4. - */ - enablePromptTransformation: boolean; - - /** - * Audio recording quality ('low' | 'medium' | 'high'). - */ - audioQuality: 'low' | 'medium' | 'high'; - - /** - * Maximum recording duration in seconds. - */ - maxRecordingDuration: number; - - /** - * Show status notifications. - */ - showNotifications: boolean; - - /** - * Transcription hint for technical terms (future). - */ - transcriptionHint?: string; -} - -/** - * Port for configuration repository. - * - * Implementations: - * - VSCodeConfigRepository: Uses VSCode workspace configuration - */ -export interface IConfigRepository { - /** - * Get current configuration. - * - * @returns Current config with defaults applied - */ - getConfig(): Promise; - - /** - * Update configuration. - * - * @param config Partial config to update - */ - updateConfig(config: Partial): Promise; - - /** - * Watch for configuration changes. - * - * @param callback Function called when config changes - */ - onConfigChange(callback: (config: Config) => void): void; -} -``` - ---- - -## Logging - -### ILogger - -**Purpose**: Contract for logging functionality. - -**File**: `src/application/ports/ILogger.ts` - -```typescript -export enum LogLevel { - DEBUG = 'debug', - INFO = 'info', - WARN = 'warn', - ERROR = 'error' -} - -/** - * Port for logging functionality. - * - * Implementations: - * - ConsoleLogger: Logs to console - * - VSCodeOutputChannelLogger: Logs to VSCode output channel - * - FileLogger (future): Logs to file - */ -export interface ILogger { - /** - * Log debug message. - */ - debug(message: string, data?: any): void; - - /** - * Log info message. - */ - info(message: string, data?: any): void; - - /** - * Log warning message. - */ - warn(message: string, data?: any): void; - - /** - * Log error message. - */ - error(message: string, error?: Error): void; - - /** - * Set minimum log level. - */ - setLevel(level: LogLevel): void; -} -``` - ---- - -## Data Transfer Objects (DTOs) - -### TranscriptionResult - -**File**: `src/application/dto/TranscriptionResult.ts` - -```typescript -export interface TranscriptionResult { - /** - * Transcribed text. - */ - text: string; - - /** - * Detected or specified language. - */ - language: string; - - /** - * Audio duration in seconds. - */ - duration: number; - - /** - * Confidence score (0.0 - 1.0), if available. - */ - confidence?: number; - - /** - * Additional metadata from transcription service. - */ - metadata?: Record; -} -``` - -### TransformedPrompt - -**File**: `src/application/dto/TransformedPrompt.ts` - -```typescript -export interface TransformedPrompt { - /** - * Original transcribed text. - */ - originalText: string; - - /** - * Transformed/optimized text. - */ - transformedText: string; - - /** - * List of improvements made. - */ - improvements: string[]; - - /** - * Optional sections extracted by transformer. - */ - sections?: { - context?: string; - objectives?: string[]; - requirements?: string[]; - constraints?: string[]; - }; -} -``` - ---- - -## Summary - -All ports are: -- ✅ Well-documented with JSDoc -- ✅ Type-safe with TypeScript -- ✅ Framework-agnostic -- ✅ Easy to mock for testing -- ✅ Clear single responsibility -- ✅ Versioned for future changes - -**Implementation**: See [`src/infrastructure/`](../../src/infrastructure/) for adapter implementations and [`api/README.md`](../api/README.md) for the API overview. diff --git a/docs/architecture/clean-architecture.md b/docs/architecture/clean-architecture.md deleted file mode 100644 index 9d5b118..0000000 --- a/docs/architecture/clean-architecture.md +++ /dev/null @@ -1,732 +0,0 @@ -# Clean Architecture in Cursor Whisper - -**Last Updated**: 2026-05-23 - ---- - -## Overview - -Cursor Whisper implements Clean Architecture (also known as Hexagonal Architecture or Ports & Adapters). This document explains what that means and how it's applied in our codebase. - ---- - -## What is Clean Architecture? - -Clean Architecture is an architectural pattern created by Robert C. Martin (Uncle Bob) that emphasizes: - -1. **Independence of Frameworks**: Business logic doesn't depend on libraries -2. **Testability**: Business rules testable without UI, database, web server -3. **Independence of UI**: UI can change without changing business rules -4. **Independence of Database**: Can swap databases without affecting business logic -5. **Independence of External Agencies**: Business rules don't know about the outside world - ---- - -## The Dependency Rule - -**Source code dependencies must point only inward, toward higher-level policies.** - -``` - ┌──────────────┐ - │ Entities │ ← Domain Layer (Inner) - └──────┬───────┘ - │ - ┌──────▼───────┐ - │ Use Cases │ ← Application Layer - └──────┬───────┘ - │ - ┌───────────────┴───────────────┐ - │ │ -┌───────▼────────┐ ┌────────▼──────┐ -│ Controllers │ │ Gateways │ ← Outer Layers -│ Presenters │ │ Adapters │ -└────────────────┘ └───────────────┘ -``` - -**Key Point**: Outer layers can depend on inner layers, but NEVER the reverse. - ---- - -## Layers in Cursor Whisper - -### 1. Domain Layer (Innermost) - -**What it is**: The heart of the application - pure business logic. - -**Contains**: -- Entities (core business objects) -- Value Objects (immutable values) -- Business Rules (domain logic) -- Domain Errors (business exceptions) - -**What it DOESN'T contain**: -- No framework imports (VSCode, React, etc.) -- No infrastructure code -- No I/O operations -- No external service calls - -**Example - Domain Entity**: - -```typescript -// src/domain/entities/Recording.ts -import { AudioData } from '../value-objects/AudioData'; -import { RecordingState } from '../value-objects/RecordingState'; - -export class Recording { - private state: RecordingState; - - constructor( - public readonly id: string, - public readonly audioData: AudioData, - public readonly timestamp: Date, - public readonly duration: number - ) { - this.state = RecordingState.COMPLETED; - this.validate(); - } - - private validate(): void { - if (this.duration <= 0) { - throw new InvalidRecordingError('Duration must be positive'); - } - - if (this.duration > 300) { - throw new InvalidRecordingError('Duration exceeds maximum (5 minutes)'); - } - - if (this.audioData.buffer.length === 0) { - throw new InvalidRecordingError('Audio data is empty'); - } - } - - isLongRecording(): boolean { - return this.duration > 60; - } - - getFileSizeInMB(): number { - return this.audioData.buffer.length / (1024 * 1024); - } -} -``` - -**Example - Value Object**: - -```typescript -// src/domain/value-objects/AudioFormat.ts -export enum AudioFormat { - WAV = 'wav', - MP3 = 'mp3', - WEBM = 'webm', - OGG = 'ogg' -} - -export class AudioData { - constructor( - public readonly buffer: Buffer, - public readonly format: AudioFormat, - public readonly sampleRate: number, - public readonly channels: number - ) { - if (buffer.length === 0) { - throw new Error('Audio buffer cannot be empty'); - } - - if (sampleRate <= 0) { - throw new Error('Sample rate must be positive'); - } - - if (channels < 1 || channels > 2) { - throw new Error('Channels must be 1 (mono) or 2 (stereo)'); - } - } - - getSizeInBytes(): number { - return this.buffer.length; - } - - getDurationInSeconds(bitDepth: number = 16): number { - const bytesPerSample = bitDepth / 8; - const samplesCount = this.buffer.length / (bytesPerSample * this.channels); - return samplesCount / this.sampleRate; - } -} -``` - -### 2. Application Layer - -**What it is**: Application-specific business rules and orchestration. - -**Contains**: -- Use Cases (application operations) -- Ports/Interfaces (contracts for dependencies) -- DTOs (data transfer objects) -- Application-specific errors - -**What it DOESN'T contain**: -- No framework-specific code -- No direct external service usage -- No UI code -- No infrastructure implementations - -**Example - Port (Interface)**: - -```typescript -// src/application/ports/IAudioRecorder.ts -import { AudioData } from '../../domain/value-objects/AudioData'; -import { RecordingState } from '../../domain/value-objects/RecordingState'; - -export interface IAudioRecorder { - /** - * Start recording audio from microphone - * @throws PermissionError if microphone access denied - * @throws RecordingError if recording fails to start - */ - startRecording(): Promise; - - /** - * Stop recording and return audio data - * @returns AudioData object with recorded audio - * @throws RecordingError if no active recording - */ - stopRecording(): Promise; - - /** - * Cancel current recording without returning data - */ - cancelRecording(): void; - - /** - * Check if currently recording - */ - isRecording(): boolean; - - /** - * Get current recording state - */ - getState(): RecordingState; - - /** - * Register callback for state changes - */ - onStateChange(callback: (state: RecordingState) => void): void; -} -``` - -**Example - Use Case**: - -```typescript -// src/application/use-cases/StartRecordingUseCase.ts -import { IAudioRecorder } from '../ports/IAudioRecorder'; -import { IConfigRepository } from '../ports/IConfigRepository'; -import { ILogger } from '../ports/ILogger'; -import { PermissionError } from '../../domain/errors/PermissionError'; -import { ConfigError } from '../../domain/errors/ConfigError'; - -export class StartRecordingUseCase { - constructor( - private readonly audioRecorder: IAudioRecorder, - private readonly configRepo: IConfigRepository, - private readonly logger: ILogger - ) {} - - async execute(): Promise { - this.logger.info('Starting recording use case'); - - // 1. Validate configuration - const config = await this.configRepo.getConfig(); - if (!config.apiKey) { - throw new ConfigError('OpenAI API Key not configured'); - } - - // 2. Check if already recording - if (this.audioRecorder.isRecording()) { - throw new RecordingError('Already recording'); - } - - // 3. Check microphone permission - const hasPermission = await this.checkMicrophonePermission(); - if (!hasPermission) { - throw new PermissionError('Microphone permission denied'); - } - - // 4. Start recording - try { - await this.audioRecorder.startRecording(); - this.logger.info('Recording started successfully'); - } catch (error) { - this.logger.error('Failed to start recording', error); - throw new RecordingError('Failed to start recording', error); - } - } - - private async checkMicrophonePermission(): Promise { - // Implementation depends on platform - // This is abstracted away from the use case - return true; - } -} -``` - -### 3. Infrastructure Layer - -**What it is**: Implementations of application ports, external service integrations. - -**Contains**: -- Adapters (implement ports) -- External service clients (OpenAI, VSCode APIs) -- Repositories (config, storage) -- File management -- Network communication - -**What it DOESN'T contain**: -- No business logic (that's in Domain) -- No use case orchestration (that's in Application) -- No UI code (that's in Presentation) - -**Example - Adapter**: - -```typescript -// src/infrastructure/audio/WebviewAudioRecorder.ts -import { IAudioRecorder } from '../../application/ports/IAudioRecorder'; -import { AudioData } from '../../domain/value-objects/AudioData'; -import { AudioFormat } from '../../domain/value-objects/AudioFormat'; -import { RecordingState } from '../../domain/value-objects/RecordingState'; -import { MicrophonePermissionManager } from './MicrophonePermissionManager'; -import { ILogger } from '../../application/ports/ILogger'; - -export class WebviewAudioRecorder implements IAudioRecorder { - private mediaRecorder: MediaRecorder | null = null; - private audioChunks: Blob[] = []; - private stream: MediaStream | null = null; - private state: RecordingState = RecordingState.IDLE; - private stateListeners: Array<(state: RecordingState) => void> = []; - private startTime: number = 0; - - constructor( - private readonly permissionManager: MicrophonePermissionManager, - private readonly logger: ILogger - ) {} - - async startRecording(): Promise { - // Check permission first - const hasPermission = await this.permissionManager.requestPermission(); - if (!hasPermission) { - throw new PermissionError('Microphone permission denied'); - } - - // Get user media - this.stream = await navigator.mediaDevices.getUserMedia({ - audio: { - channelCount: 1, - sampleRate: 16000, - echoCancellation: true, - noiseSuppression: true, - autoGainControl: true - } - }); - - // Create MediaRecorder - const mimeType = this.getSupportedMimeType(); - this.mediaRecorder = new MediaRecorder(this.stream, { - mimeType, - audioBitsPerSecond: 128000 - }); - - // Setup handlers - this.setupMediaRecorderHandlers(); - - // Start recording - this.audioChunks = []; - this.startTime = Date.now(); - this.mediaRecorder.start(100); - this.setState(RecordingState.RECORDING); - - this.logger.info('Recording started'); - } - - async stopRecording(): Promise { - if (!this.mediaRecorder || this.state !== RecordingState.RECORDING) { - throw new RecordingError('No active recording to stop'); - } - - return new Promise((resolve, reject) => { - this.mediaRecorder!.onstop = async () => { - try { - const duration = (Date.now() - this.startTime) / 1000; - const audioBlob = new Blob(this.audioChunks, { - type: this.mediaRecorder!.mimeType - }); - - // Convert to WAV - const wavBlob = await this.convertToWav(audioBlob); - const arrayBuffer = await wavBlob.arrayBuffer(); - const buffer = Buffer.from(arrayBuffer); - - this.cleanup(); - - const audioData = new AudioData( - buffer, - AudioFormat.WAV, - 16000, - 1 - ); - - this.logger.info('Recording stopped', { - duration, - size: buffer.length - }); - - resolve(audioData); - } catch (error) { - reject(error); - } - }; - - this.mediaRecorder!.stop(); - this.setState(RecordingState.IDLE); - }); - } - - // ... other methods -} -``` - -### 4. Presentation Layer - -**What it is**: User interface and framework-specific code. - -**Contains**: -- Commands (VSCode command handlers) -- UI Components (React for webview) -- Status Bar items -- State management for UI -- VSCode-specific integrations - -**What it DOESN'T contain**: -- No business logic (call use cases instead) -- No direct external service calls (use infrastructure through application) -- No domain entities (use DTOs) - -**Example - Command**: - -```typescript -// src/presentation/commands/StartRecordingCommand.ts -import * as vscode from 'vscode'; -import { StartRecordingUseCase } from '../../application/use-cases/StartRecordingUseCase'; -import { PermissionError } from '../../domain/errors/PermissionError'; -import { ConfigError } from '../../domain/errors/ConfigError'; -import { RecordingError } from '../../domain/errors/RecordingError'; - -export function registerStartRecordingCommand( - context: vscode.ExtensionContext, - useCase: StartRecordingUseCase -): vscode.Disposable { - return vscode.commands.registerCommand( - 'cursor-whisper.startRecording', - async () => { - try { - // Just call the use case - no business logic here - await useCase.execute(); - - vscode.window.showInformationMessage('Recording started'); - } catch (error) { - // Handle different error types with appropriate UI response - if (error instanceof ConfigError) { - const selection = await vscode.window.showErrorMessage( - 'OpenAI API Key not configured', - 'Configure Now' - ); - - if (selection === 'Configure Now') { - await vscode.commands.executeCommand('cursor-whisper.configureApiKey'); - } - } else if (error instanceof PermissionError) { - await vscode.window.showErrorMessage( - 'Microphone permission denied. Please check system settings.', - 'Open Settings' - ); - } else if (error instanceof RecordingError) { - await vscode.window.showErrorMessage( - `Recording failed: ${error.message}` - ); - } else { - await vscode.window.showErrorMessage( - `Unexpected error: ${error.message}` - ); - } - } - } - ); -} -``` - ---- - -## Ports and Adapters - -### What are Ports? - -**Ports** are interfaces that define contracts. They live in the Application layer. - -```typescript -// Application layer defines the port -export interface ITranscriptionService { - transcribe(audio: AudioData, options?: TranscriptionOptions): Promise; - validateAudioFile(audio: AudioData): boolean; -} -``` - -### What are Adapters? - -**Adapters** are implementations of ports. They live in the Infrastructure layer. - -```typescript -// Infrastructure layer provides the adapter -export class OpenAIWhisperService implements ITranscriptionService { - async transcribe(audio: AudioData, options?: TranscriptionOptions): Promise { - // Implementation using OpenAI SDK - } - - validateAudioFile(audio: AudioData): boolean { - // Validation logic - } -} -``` - -### Why This Pattern? - -1. **Application doesn't care about implementation**: Use case uses `ITranscriptionService`, not `OpenAIWhisperService` -2. **Easy to swap**: Can replace OpenAI with Google, Azure, etc. without touching use cases -3. **Easy to test**: Mock the port, test the use case -4. **Clear contracts**: Interface documents what's expected - ---- - -## Dependency Inversion - -### The Problem (Without DI) - -```typescript -// ❌ BAD: Use case depends on concrete implementation -export class TranscribeAudioUseCase { - private whisperService = new OpenAIWhisperService(); // Tight coupling! - - async execute(audio: AudioData) { - return await this.whisperService.transcribe(audio); - } -} -``` - -**Problems**: -- Can't test without calling real OpenAI API -- Can't swap to different provider -- Changes to OpenAIWhisperService break use case - -### The Solution (With DI) - -```typescript -// ✅ GOOD: Use case depends on abstraction -export class TranscribeAudioUseCase { - constructor( - private transcriptionService: ITranscriptionService // Interface! - ) {} - - async execute(audio: AudioData) { - return await this.transcriptionService.transcribe(audio); - } -} -``` - -**Benefits**: -- Test with mock implementation -- Swap providers easily -- Use case doesn't know about OpenAI - ---- - -## Composition Root - -All dependencies are wired together in ONE place: `extension.ts` - -```typescript -// src/extension.ts - The Composition Root -export function activate(context: vscode.ExtensionContext) { - // 1. Create infrastructure instances - const logger = new ConsoleLogger(); - const secretStorage = new SecretStorage(context); - const configRepo = new VSCodeConfigRepository(context); - - const whisperService = new OpenAIWhisperService(secretStorage, logger); - const audioRecorder = new WebviewAudioRecorder(permissionManager, logger); - - // 2. Create use cases with dependencies injected - const startRecordingUseCase = new StartRecordingUseCase( - audioRecorder, - configRepo, - logger - ); - - const transcribeUseCase = new TranscribeAudioUseCase( - whisperService, - configRepo, - logger - ); - - // 3. Create presentation layer with use cases - const startRecordingCommand = registerStartRecordingCommand( - context, - startRecordingUseCase - ); - - context.subscriptions.push(startRecordingCommand); -} -``` - ---- - -## Testing Strategy - -### Domain Layer Tests - -```typescript -describe('Recording Entity', () => { - it('should throw error for negative duration', () => { - expect(() => { - new Recording('id', audioData, new Date(), -5); - }).toThrow(InvalidRecordingError); - }); - - it('should identify long recordings', () => { - const recording = new Recording('id', audioData, new Date(), 90); - expect(recording.isLongRecording()).toBe(true); - }); -}); -``` - -### Application Layer Tests (with Mocks) - -```typescript -describe('StartRecordingUseCase', () => { - it('should start recording when config is valid', async () => { - // Arrange: Create mocks - const mockAudioRecorder: IAudioRecorder = { - startRecording: jest.fn().mockResolvedValue(undefined), - stopRecording: jest.fn(), - cancelRecording: jest.fn(), - isRecording: jest.fn().mockReturnValue(false), - getState: jest.fn(), - onStateChange: jest.fn() - }; - - const mockConfig: IConfigRepository = { - getConfig: jest.fn().mockResolvedValue({ apiKey: 'test-key' }) - }; - - const useCase = new StartRecordingUseCase( - mockAudioRecorder, - mockConfig, - mockLogger - ); - - // Act - await useCase.execute(); - - // Assert - expect(mockAudioRecorder.startRecording).toHaveBeenCalled(); - }); -}); -``` - -### Infrastructure Layer Tests (Integration) - -```typescript -describe('OpenAIWhisperService', () => { - it('should transcribe audio successfully', async () => { - const service = new OpenAIWhisperService(secretStorage, logger); - const audio = createTestAudioData(); - - const result = await service.transcribe(audio); - - expect(result.text).toBeDefined(); - expect(result.text.length).toBeGreaterThan(0); - }); -}); -``` - ---- - -## Benefits We Get - -1. **Testability**: Each layer testable independently -2. **Flexibility**: Easy to swap implementations -3. **Maintainability**: Clear structure, easy to find code -4. **Scalability**: Can grow without tangling -5. **Team Collaboration**: Clear boundaries reduce conflicts -6. **Documentation**: Architecture IS documentation - ---- - -## Common Pitfalls to Avoid - -### ❌ Don't: Bypass Layers - -```typescript -// BAD: Presentation calling Infrastructure directly -export class SomeCommand { - async execute() { - const whisper = new OpenAIWhisperService(); // NO! - await whisper.transcribe(audio); - } -} -``` - -### ✅ Do: Go Through Application - -```typescript -// GOOD: Presentation calls Use Case -export class SomeCommand { - constructor(private useCase: TranscribeAudioUseCase) {} - - async execute() { - await this.useCase.execute(audio); // YES! - } -} -``` - -### ❌ Don't: Put Business Logic in Infrastructure - -```typescript -// BAD: Business logic in adapter -export class OpenAIWhisperService { - async transcribe(audio: AudioData) { - // Business logic here - NO! - if (audio.duration > 300) { - throw new Error('Too long'); - } - // ... - } -} -``` - -### ✅ Do: Keep Business Logic in Domain - -```typescript -// GOOD: Business logic in entity -export class Recording { - validate() { - if (this.duration > 300) { - throw new InvalidRecordingError('Duration exceeds maximum'); - } - } -} -``` - ---- - -## References - -- [Clean Architecture by Robert C. Martin](https://blog.cleancoder.com/uncle-bob/2012/08/13/the-clean-architecture.html) -- [Hexagonal Architecture](https://herbertograca.com/2017/11/16/explicit-architecture-01-ddd-hexagonal-onion-clean-cqrs-how-i-put-it-all-together/) -- [Dependency Inversion Principle](https://en.wikipedia.org/wiki/Dependency_inversion_principle) -- [SOLID Principles](https://en.wikipedia.org/wiki/SOLID) -- [ADR-0002: Adopt Clean Architecture](../adr/0002-clean-architecture.md) diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md index f4702fb..97ce9a8 100644 --- a/docs/architecture/overview.md +++ b/docs/architecture/overview.md @@ -1,68 +1,58 @@ # Architecture Overview -**Last Updated**: 2026-05-23 - ---- - -## Table of Contents - -1. [System Overview](#system-overview) -2. [Architectural Style](#architectural-style) -3. [Layer Architecture](#layer-architecture) -4. [Component Diagram](#component-diagram) -5. [Dependency Rules](#dependency-rules) -6. [Data Flow](#data-flow) -7. [Technology Stack](#technology-stack) -8. [Design Patterns](#design-patterns) +**Last Updated**: 2026-05-24 --- ## System Overview -Cursor Whisper is a VSCode/Cursor extension that transforms voice into optimized prompts through: +Promptimize is a VSCode/Cursor extension that: + +1. **Captures audio** via native `@kstonekuan/audio-capture` +2. **Transcribes** with OpenAI Whisper (always required) +3. **Transforms** transcribed speech into structured prompts via a configurable provider ([ADR-0014](../adr/0014-multiple-transformation-providers.md)) +4. **Inserts** the result into the editor, chat, or clipboard -1. **Audio Capture** - Record user speech via native `@kstonekuan/audio-capture` -2. **Transcription** - Convert audio to text using OpenAI Whisper -3. **Transformation** - Optimize text into structured prompts using GPT-4 -4. **Insertion** - Insert result into editor or chat intelligently +For setup and provider selection, see the [Configuration Guide](../configuration/README.md). ### High-Level Architecture ```mermaid flowchart TB User[User] - + subgraph Presentation["Presentation Layer"] StatusBar[Status Bar UI] Commands[VSCode Commands] + ConfigPanel[Configuration Webview] end - + subgraph Application["Application Layer"] UseCases[Use Cases] Ports[Ports/Interfaces] end - + subgraph Domain["Domain Layer"] Entities[Entities] ValueObjects[Value Objects] BusinessRules[Business Rules] end - + subgraph Infrastructure["Infrastructure Layer"] - AudioAdapter[Audio Recorder] + AudioAdapter[Native Audio Recorder] WhisperAdapter[Whisper Service] - GPTAdapter[GPT-4 Service] + ProviderFactory[Prompt Transformer Factory] InsertionAdapters[Text Inserters] ConfigAdapter[Config Repository] StorageAdapter[Secret Storage] end - + subgraph External["External Services"] OpenAI[OpenAI API] + OtherLLMs[Anthropic / Google / Azure / Ollama / OpenCode / OpenRouter / Cursor] VSCodeAPI[VSCode API] - NativeAudio[Native Audio APIs] end - + User -->|Interact| Presentation Presentation -->|Execute| Application Application -->|Use| Domain @@ -74,223 +64,51 @@ flowchart TB ## Architectural Style -### Clean/Hexagonal Architecture - -Cursor Whisper follows **Clean Architecture** (also known as Hexagonal Architecture or Ports & Adapters): - -**Core Principles**: -1. **Independence**: Business logic independent of frameworks -2. **Testability**: Core logic testable without external dependencies -3. **Flexibility**: Easy to swap implementations -4. **Maintainability**: Clear separation of concerns +Promptimize follows **Clean/Hexagonal Architecture** (ports and adapters). Rationale and alternatives are documented in [ADR-0002](../adr/0002-clean-architecture.md). -**Why This Architecture?** -- Extension will evolve significantly (MVP → v1.0+) -- Multiple integration points (OpenAI, VSCode, Audio, etc.) -- Need to support alternative providers in future -- High testability requirement -- Team unfamiliar with codebase needs clear structure - -See [ADR-0002](../adr/0002-clean-architecture.md) for detailed rationale. +**Why this matters for this project:** +- Multiple external integrations (Whisper, several LLM providers, VSCode APIs, native audio) +- Swappable transformation providers without changing use cases +- Business logic testable with mocked ports --- ## Layer Architecture -### The Four Layers - ``` -┌─────────────────────────────────────────────┐ -│ Presentation Layer │ -│ Commands, UI, Status Bar │ -└────────────┬────────────────────────────────┘ - │ depends on - ▼ -┌─────────────────────────────────────────────┐ -│ Application Layer │ -│ Use Cases, Ports (Interfaces), DTOs │ -└────────────┬────────────────────────────────┘ - │ depends on - ▼ -┌─────────────────────────────────────────────┐ -│ Domain Layer │ -│ Entities, Value Objects, Business Logic │ -└─────────────────────────────────────────────┘ - ▲ - │ implemented by - │ -┌────────────┴────────────────────────────────┐ -│ Infrastructure Layer │ -│ Adapters, External Service Integrations │ -└─────────────────────────────────────────────┘ -``` - -### 1. Domain Layer - -**Purpose**: Pure business logic, no external dependencies - -**Contains**: -- **Entities**: Core business objects (`Recording`, `Transcription`, `Prompt`) -- **Value Objects**: Immutable values (`AudioFormat`, `RecordingState`, `ApiKey`) -- **Business Rules**: Core validation and logic -- **Domain Errors**: Business exception types - -**Rules**: -- NO imports from other layers -- NO framework dependencies -- Pure TypeScript/JavaScript -- Fully unit testable - -**Example**: -```typescript -// domain/entities/Recording.ts -export class Recording { - constructor( - public readonly id: string, - public readonly audioData: AudioData, - public readonly timestamp: Date, - public readonly duration: number - ) { - if (duration <= 0) { - throw new InvalidRecordingError('Duration must be positive'); - } - } - - isLongRecording(): boolean { - return this.duration > 60; // 60 seconds - } -} -``` - -### 2. Application Layer - -**Purpose**: Orchestrate business logic, define contracts - -**Contains**: -- **Use Cases**: Application-specific business operations -- **Ports (Interfaces)**: Contracts for external dependencies -- **DTOs**: Data transfer objects for layer communication - -**Rules**: -- Can import from Domain layer -- CANNOT import from Infrastructure or Presentation -- Depends on abstractions (ports), not implementations -- Framework-agnostic - -**Example**: -```typescript -// application/use-cases/StartRecordingUseCase.ts -export class StartRecordingUseCase { - constructor( - private audioRecorder: IAudioRecorder, // Port - private configRepo: IConfigRepository, // Port - private logger: ILogger // Port - ) {} - - async execute(): Promise { - const config = await this.configRepo.getConfig(); - - if (!config.apiKey) { - throw new ConfigError('API key not configured'); - } - - await this.audioRecorder.startRecording(); - } -} +Presentation ──→ Application ──→ Domain + ▲ +Infrastructure ───────┘ ``` -### 3. Infrastructure Layer - -**Purpose**: Implement ports, integrate with external systems - -**Contains**: -- **Adapters**: Implementations of application ports -- **External Service Clients**: OpenAI, VSCode API wrappers -- **Repositories**: Configuration, storage implementations -- **Utilities**: File management, logging - -**Rules**: -- Can import from Application and Domain -- Implements ports defined in Application -- Contains framework/library dependencies -- Isolated from Presentation - -**Example**: -```typescript -// infrastructure/transcription/OpenAIWhisperService.ts -export class OpenAIWhisperService implements ITranscriptionService { - private client: OpenAI; - - constructor( - private secretStorage: SecretStorage, - private logger: ILogger - ) { - this.initializeClient(); - } - - async transcribe(audio: AudioData): Promise { - // Implementation using OpenAI SDK - } -} -``` +| Layer | Location | Responsibility | +|-------|----------|----------------| +| **Domain** | [`src/domain/`](../../src/domain/) | Entities, value objects, domain errors — no framework imports | +| **Application** | [`src/application/`](../../src/application/) | Use cases, port interfaces, DTOs | +| **Infrastructure** | [`src/infrastructure/`](../../src/infrastructure/) | Port implementations (audio, Whisper, transformers, config, storage) | +| **Presentation** | [`src/presentation/`](../../src/presentation/) | Commands, status bar, configuration webview | -### 4. Presentation Layer - -**Purpose**: User interface and VSCode integration - -**Contains**: -- **Commands**: VSCode command handlers -- **Status Bar**: Status bar item and updates -- **State Management**: UI state coordination - -**Rules**: -- Can import from Application and Domain -- Orchestrates use case execution -- Handles VSCode-specific APIs -- Contains VSCode-specific APIs - -**Example**: -```typescript -// presentation/commands/StartRecordingCommand.ts -export function registerStartRecordingCommand( - context: vscode.ExtensionContext, - useCase: StartRecordingUseCase -): vscode.Disposable { - return vscode.commands.registerCommand( - 'cursor-whisper.startRecording', - async () => { - try { - await useCase.execute(); - vscode.window.showInformationMessage('Recording started'); - } catch (error) { - vscode.window.showErrorMessage(`Failed: ${error.message}`); - } - } - ); -} -``` +**Dependency rule:** Inner layers never depend on outer layers. Presentation orchestrates use cases; it does not call infrastructure directly. --- ## Component Diagram -### Complete Component View - ```mermaid graph TB - subgraph Presentation["🎨 Presentation Layer"] + subgraph Presentation["Presentation Layer"] CMD[Commands] SB[StatusBarItem] - STATE[StateManager] + CFG[ConfigurationPanel] end - subgraph Application["🔧 Application Layer"] + subgraph Application["Application Layer"] UC1[StartRecordingUseCase] UC2[StopRecordingUseCase] UC3[TranscribeAudioUseCase] UC4[TransformPromptUseCase] UC5[InsertTextUseCase] - + PORT1[IAudioRecorder] PORT2[ITranscriptionService] PORT3[IPromptTransformer] @@ -298,284 +116,132 @@ graph TB PORT5[IConfigRepository] end - subgraph Domain["💎 Domain Layer"] - ENT1[Recording] - ENT2[Transcription] - ENT3[Prompt] - VO1[AudioFormat] - VO2[RecordingState] - VO3[ApiKey] - end - - subgraph Infrastructure["⚙️ Infrastructure Layer"] + subgraph Infrastructure["Infrastructure Layer"] AUDIO[NativeAudioRecorder] WHISPER[OpenAIWhisperService] + FACTORY[PromptTransformerFactory] GPT[OpenAIPromptTransformer] + CLAUDE[AnthropicPromptTransformer] + GEMINI[GooglePromptTransformer] + AZURE[AzureOpenAIPromptTransformer] + OLLAMA[OllamaPromptTransformer] + OPENCODE[OpenCodePromptTransformer] + OPENROUTER[OpenRouterPromptTransformer] + CURSOR[CursorPromptTransformer] INSERT1[ChatParticipantInserter] INSERT2[EditorTextInserter] INSERT3[FallbackTextInserter] CONFIG[VSCodeConfigRepository] - SECRET[SecretStorage] end - CMD -->|executes| UC1 - CMD -->|executes| UC2 - SB -->|observes| STATE - WV -->|sends messages| CMD - - UC1 -->|uses| PORT1 - UC2 -->|uses| PORT1 - UC2 -->|uses| UC3 - UC3 -->|uses| PORT2 - UC4 -->|uses| PORT3 - UC5 -->|uses| PORT4 - - UC1 -->|creates| ENT1 - UC3 -->|creates| ENT2 - UC4 -->|creates| ENT3 - - AUDIO -->|implements| PORT1 - WHISPER -->|implements| PORT2 - GPT -->|implements| PORT3 - INSERT1 -->|implements| PORT4 - INSERT2 -->|implements| PORT4 - INSERT3 -->|implements| PORT4 - CONFIG -->|implements| PORT5 -``` - ---- - -## Dependency Rules - -### The Dependency Rule - -**Dependencies point inward**. Inner layers NEVER depend on outer layers. - -``` -Presentation ──→ Application ──→ Domain - ▲ -Infrastructure ───────┘ -``` - -### What Each Layer Can Import - -| Layer | Can Import From | Cannot Import From | -|-------|----------------|-------------------| -| Domain | Nothing | Everything | -| Application | Domain | Infrastructure, Presentation | -| Infrastructure | Application, Domain | Presentation | -| Presentation | Application, Domain | Infrastructure (directly) | - -### Why This Matters - -1. **Domain stays pure**: Business logic has no framework coupling -2. **Application is portable**: Use cases work anywhere -3. **Infrastructure is swappable**: Change OpenAI to Google without touching business logic -4. **Presentation is replaceable**: Could build CLI, web UI, etc. - -### Enforcing Dependencies - -Use ESLint rules to enforce: - -```javascript -// .eslintrc.js -module.exports = { - rules: { - 'no-restricted-imports': ['error', { - patterns: [ - { - group: ['**/infrastructure/**'], - message: 'Domain and Application cannot import Infrastructure' - }, - { - group: ['**/presentation/**'], - message: 'Domain, Application, and Infrastructure cannot import Presentation' - } - ] - }] - } -}; + CMD --> UC1 + CMD --> UC2 + SB --> CMD + CFG --> CONFIG + + UC1 --> PORT1 + UC3 --> PORT2 + UC4 --> PORT3 + UC5 --> PORT4 + + AUDIO --> PORT1 + WHISPER --> PORT2 + FACTORY --> GPT + FACTORY --> CLAUDE + FACTORY --> GEMINI + FACTORY --> AZURE + FACTORY --> OLLAMA + FACTORY --> OPENCODE + FACTORY --> OPENROUTER + FACTORY --> CURSOR + GPT --> PORT3 + CLAUDE --> PORT3 + GEMINI --> PORT3 + AZURE --> PORT3 + OLLAMA --> PORT3 + OPENCODE --> PORT3 + OPENROUTER --> PORT3 + CURSOR --> PORT3 + INSERT1 --> PORT4 + INSERT2 --> PORT4 + INSERT3 --> PORT4 + CONFIG --> PORT5 ``` --- ## Data Flow -### Complete Recording Flow - -```mermaid -sequenceDiagram - actor User - participant UI as Presentation
(Status Bar) - participant CMD as Commands - participant UC1 as StartRecording
UseCase - participant UC2 as StopRecording
UseCase - participant Audio as Audio
Recorder - participant Whisper as Whisper
Service - participant GPT as Prompt
Transformer - participant Insert as Text
Inserter - participant API as OpenAI API - - User->>UI: Click Mic Button - UI->>CMD: Execute Command - CMD->>UC1: execute() - UC1->>Audio: startRecording() - Audio-->>UC1: Recording Started - UC1-->>CMD: Success - CMD-->>UI: Update State - UI-->>User: Show Recording - - User->>UI: Click Stop - UI->>CMD: Execute Command - CMD->>UC2: execute() - UC2->>Audio: stopRecording() - Audio-->>UC2: AudioData - - UC2->>Whisper: transcribe(audioData) - Whisper->>API: POST /audio/transcriptions - API-->>Whisper: { text: "..." } - Whisper-->>UC2: TranscriptionResult - - UC2->>GPT: transform(transcription) - GPT->>API: POST /chat/completions - API-->>GPT: { content: "..." } - GPT-->>UC2: TransformedPrompt - - UC2->>Insert: insert(transformedText) - Insert-->>UC2: Success - - UC2-->>CMD: Success - CMD-->>UI: Update State - UI-->>User: Show Success -``` - -### Error Flow +End-to-end recording flow with error branches: see [Complete Flow](../flows/complete-flow.md). ```mermaid sequenceDiagram actor User - participant UC as Use Case - participant Service as External Service - participant Error as Error Handler - participant UI as User Interface - - User->>UC: Request Action - UC->>Service: Call External API - Service--xUC: API Error - UC->>Error: Handle Error - Error->>Error: Log Error - Error->>Error: Transform to User Message - Error-->>UI: User-Friendly Message - UI-->>User: Show Error + Actions - - alt Retry Available - User->>UC: Retry - else Fallback Available - UC->>UC: Use Fallback Strategy - else Fatal Error - UI-->>User: Show Instructions + participant UI as Status Bar + participant UC as StopRecording Pipeline + participant Audio as NativeAudioRecorder + participant Whisper as OpenAI Whisper + participant Transform as PromptTransformer + participant Insert as TextInserter + + User->>UI: Toggle recording (stop) + UI->>UC: execute() + UC->>Audio: stopRecording() + Audio-->>UC: AudioData + UC->>Whisper: transcribe(audioData) + Whisper-->>UC: TranscriptionResult + opt Optimization enabled + UC->>Transform: transform(transcription) + Transform-->>UC: TransformedPrompt end + UC->>Insert: insert(text) + Insert-->>UC: Success + UC-->>UI: Update state ``` --- ## Technology Stack -### Core Technologies +| Component | Technology | Purpose | +|-----------|-----------|---------| +| Language | TypeScript 5.4+ | Type-safe development | +| Runtime | Node.js 22 LTS | Extension host | +| Framework | VSCode Extension API 1.120+ | Extension foundation | +| Bundler | Webpack 5 | Module bundling | +| Audio | @kstonekuan/audio-capture | Native microphone capture | +| Transcription | OpenAI Whisper | Speech-to-text | +| Transformation | OpenAI, Anthropic, Google, Azure, Ollama, OpenCode, OpenRouter, Cursor | Prompt optimization | +| Testing | Jest | Unit and integration tests | -| Component | Technology | Version | Purpose | -|-----------|-----------|---------|---------| -| Language | TypeScript | 5.4+ | Type-safe development | -| Runtime | Node.js | 20 LTS | Extension host | -| Framework | VSCode Extension API | 1.120+ | Extension foundation | -| Bundler | Webpack | 5.x | Module bundling | -| Audio Capture | @kstonekuan/audio-capture | 0.0.3+ | Native microphone capture | - -### External Services - -| Service | Purpose | Cost | -|---------|---------|------| -| OpenAI Whisper | Audio transcription | $0.006/minute | -| OpenAI GPT-4o | Prompt transformation | $15/1M tokens | - -### Development Tools - -| Tool | Purpose | -|------|---------| -| Jest | Unit testing | -| @vscode/test-electron | Integration testing | -| ESLint | Code linting | -| Prettier | Code formatting | -| Husky | Git hooks | - -See [ADR-0001](../adr/0001-use-typescript.md) for technology decisions. +Technology decisions: [ADR-0001](../adr/0001-use-typescript.md), [ADR-0013](../adr/0013-native-audio-capture.md). --- ## Design Patterns -### Patterns Used - -1. **Clean Architecture** (overall structure) - - Separation of concerns - - Dependency inversion - - See [ADR-0002](../adr/0002-clean-architecture.md) - -2. **Dependency Injection** (throughout) - - Constructor injection - - Manual wiring in composition root - - See [ADR-0004](../adr/0004-dependency-injection.md) - -3. **Chain of Responsibility** (text insertion) - - Multiple insertion strategies - - Automatic fallback - - See [ADR-0006](../adr/0006-text-insertion-strategy.md) - -4. **Adapter Pattern** (infrastructure) - - Wrap external APIs - - Implement application ports - - Isolate external dependencies - -5. **Strategy Pattern** (multiple implementations) - - Different audio recorders - - Different text inserters - - Swappable at runtime - -6. **Observer Pattern** (state management) - - UI observes state changes - - Reactive updates - - Event-driven architecture - -7. **Repository Pattern** (configuration) - - Abstract configuration access - - Consistent interface - - Easy to test - -8. **Factory Pattern** (object creation) - - Complex object construction - - Composition root - - Dependency wiring - -### Pattern Benefits - -- **Maintainability**: Clear structure, easy to modify -- **Testability**: Each component testable in isolation -- **Flexibility**: Easy to add new implementations -- **Scalability**: Patterns support growth -- **Understandability**: Standard patterns are familiar +| Pattern | Where | ADR | +|---------|-------|-----| +| Clean Architecture | Layer structure | [0002](../adr/0002-clean-architecture.md) | +| Dependency Injection | Constructor injection in use cases | [0004](../adr/0004-dependency-injection.md) | +| Chain of Responsibility | Text insertion (chat → editor → clipboard) | [0006](../adr/0006-text-insertion-strategy.md) | +| Strategy / Factory | Swappable prompt transformers | [0014](../adr/0014-multiple-transformation-providers.md) | +| Adapter | Infrastructure wraps external APIs | — | +| Repository | Configuration access abstraction | — | --- -## Next Steps - -For more detailed documentation, see: +## Source Code Map -- [Domain Layer](../domain/README.md) -- [Application Layer](../application/ports.md) -- [Infrastructure Layer (source)](../../src/infrastructure/) -- [Presentation Layer (source)](../../src/presentation/) -- [API Reference](../api/README.md) +| Concern | Start here | +|---------|------------| +| Entry point & DI wiring | [`src/extension.ts`](../../src/extension.ts) | +| Use cases | [`src/application/use-cases/`](../../src/application/use-cases/) | +| Port interfaces | [`src/application/ports/`](../../src/application/ports/) | +| Domain model | [`src/domain/`](../../src/domain/) | +| External integrations | [`src/infrastructure/`](../../src/infrastructure/) | +| UI & commands | [`src/presentation/`](../../src/presentation/) | --- -**This architecture is designed to last and evolve from MVP through v1.0 and beyond.** +**Related:** [Complete Flow](../flows/complete-flow.md) · [ADRs](../adr/) · [Testing Strategy](../testing/strategy.md) diff --git a/docs/configuration/README.md b/docs/configuration/README.md new file mode 100644 index 0000000..909354e --- /dev/null +++ b/docs/configuration/README.md @@ -0,0 +1,277 @@ +# Configuration Guide + +Complete reference for installing, configuring, and using Promptimize. + +--- + +## Service Architecture + +Promptimize uses two independent AI services: + +| Service | Provider | Required | Credentials | +|---------|----------|----------|-------------| +| **Transcription** | OpenAI Whisper | Yes | OpenAI API key | +| **Prompt optimization** | User choice | No | Provider-specific (see below) | + +```mermaid +flowchart TB + subgraph required [Required: Transcription] + Mic[Microphone] --> Whisper[OpenAI Whisper API] + Whisper --> Text[Raw transcription] + end + + subgraph optional [Optional: Optimization] + Text --> Enabled{enabled?} + Enabled -->|No| Insert[Insert raw text] + Enabled -->|Yes| Provider[Selected provider] + Provider --> Optimized[Structured prompt] + Optimized --> Insert + end +``` + +--- + +## Step 1: OpenAI API Key (Whisper) + +**Always required** for voice-to-text. + +1. Create a key at https://platform.openai.com/api-keys +2. Run **Promptimize: Configure OpenAI API Key (Whisper)** or use the setup wizard +3. Paste your key (stored securely in VSCode SecretStorage) + +**Cost:** ~$0.006 per minute of audio + +The same OpenAI key can be reused for OpenAI prompt optimization (Step 2, Option A). + +--- + +## Step 2: Prompt Optimization (Optional) + +Enable in settings: `promptimize.enablePromptTransformation` + +Or run **Promptimize: Configure Prompt Optimization Provider**. + +### Provider comparison + +| Provider | Cost/Transform* | Speed | Privacy | Quality | Best For | +|----------|-----------------|-------|---------|---------|----------| +| OpenAI GPT-4o | ~$0.01 | Fast | Cloud | High | General use; reuse Whisper key | +| Anthropic Claude | ~$0.01–0.02 | Fast | Cloud | Very High | Complex reasoning | +| Google Gemini | ~$0.001 | Very Fast | Cloud | Good | Cost-sensitive usage | +| Azure OpenAI | Varies | Fast | Private Cloud | High | Enterprise deployments | +| Ollama | Free | Medium | Local | Good | Privacy-first, offline | +| OpenCode | Free | Medium | Local | High | Reuse OpenCode multi-provider setup | +| OpenRouter | Varies | Fast | Cloud | High | 200+ models with one API key | +| Cursor | ~$0.01 | Fast | Cloud | High | Cursor Composer and frontier models with one API key | + +\*Plus Whisper transcription cost (~$0.006/min, always OpenAI) + +API keys are stored per provider (`promptimize.apiKey.{provider}`). Switching providers does not delete saved keys. + +--- + +### Option A: OpenAI (default) + +```json +{ + "promptimize.enablePromptTransformation": true, + "promptimize.transformationProvider": "openai", + "promptimize.transformationModel": "gpt-4o" +} +``` + +**Setup:** Get a key from [OpenAI Platform](https://platform.openai.com/api-keys). Run the setup wizard or **Configure Prompt Optimization Provider** and select OpenAI. The same key used for Whisper works for optimization. + +**Recommended models:** `gpt-4o` (default), `gpt-4o-mini`, `gpt-4-turbo` + +**Pitfalls:** Keys must start with `sk-`. Whisper and GPT share the same OpenAI account balance. Whisper key is required even if you use another provider for optimization. + +--- + +### Option B: Anthropic + +```json +{ + "promptimize.transformationProvider": "anthropic", + "promptimize.anthropicModel": "claude-3-5-sonnet-20241022" +} +``` + +**Setup:** Configure OpenAI for Whisper first. Get an Anthropic key from [Anthropic Console](https://console.anthropic.com/). Run **Configure Prompt Optimization Provider**, select Anthropic, and enter your key. + +**Recommended models:** `claude-3-5-sonnet-20241022` (default), `claude-3-5-haiku-20241022`, `claude-3-opus-20240229` + +**Pitfalls:** Anthropic only handles optimization — Whisper still needs OpenAI. Use Anthropic keys from console.anthropic.com, not OpenAI keys. + +--- + +### Option C: Google Gemini + +```json +{ + "promptimize.transformationProvider": "google", + "promptimize.googleModel": "gemini-1.5-pro" +} +``` + +**Setup:** Configure OpenAI for Whisper first. Get a key from [Google AI Studio](https://aistudio.google.com/app/apikey). Run **Configure Prompt Optimization Provider**, select Google Gemini, and enter your key. + +**Recommended models:** `gemini-1.5-pro`, `gemini-1.5-flash`, `gemini-2.0-flash` + +**Pitfalls:** Gemini only handles optimization. Use Google AI Studio keys, not GCP service account keys unless configured for the Generative Language API. + +--- + +### Option D: Azure OpenAI + +```json +{ + "promptimize.transformationProvider": "azure", + "promptimize.azureEndpoint": "https://my-resource.openai.azure.com", + "promptimize.azureDeployment": "gpt-4o-deployment" +} +``` + +**Setup:** Configure OpenAI for Whisper first. Create an Azure OpenAI resource and deploy a chat model. Run **Configure Prompt Optimization Provider**, select Azure OpenAI, and enter your Azure API key, endpoint URL, and deployment name. + +**Notes:** The **deployment name** (not the model name) is used for API calls. Endpoint should be the resource URL without a trailing slash. Azure API key is stored separately from your OpenAI Whisper key. + +**Pitfalls:** Azure cannot be used for Whisper — transcription uses the public OpenAI API only. Use the deployment name from the Azure portal, not the model ID. + +--- + +### Option E: Ollama (local) + +```json +{ + "promptimize.transformationProvider": "ollama", + "promptimize.ollamaBaseUrl": "http://localhost:11434", + "promptimize.ollamaModel": "llama3.1:8b" +} +``` + +**Setup:** Configure OpenAI for Whisper first. Install [Ollama](https://ollama.com/), pull a model (`ollama pull llama3.1:8b`), ensure Ollama is running, then select Ollama in **Configure Prompt Optimization Provider**. No API key required for Ollama. + +**Recommended models:** `llama3.1:8b` (default), `mistral:latest`, `codellama:latest` + +**Troubleshooting:** Confirm Ollama is reachable at the configured base URL. Run `ollama pull ` if the model is missing. Whisper still sends audio to OpenAI — only optimization runs locally. + +--- + +### Option F: OpenCode (local multi-provider) + +```json +{ + "promptimize.transformationProvider": "opencode", + "promptimize.openCodeBaseUrl": "http://127.0.0.1:4010/v1", + "promptimize.openCodeModel": "anthropic/claude-sonnet-4-5" +} +``` + +**Setup:** + +1. Install [OpenCode](https://opencode.ai/) and configure providers in `~/.config/opencode/opencode.json` +2. Install the [opencode-llm-proxy](https://github.com/KochC/opencode-llm-proxy) plugin: `opencode plugin add opencode-llm-proxy` +3. Start OpenCode (the proxy listens on `http://127.0.0.1:4010/v1` by default) +4. Run **Configure Prompt Optimization Provider**, select OpenCode, set the base URL, and pick a model + +**Notes:** OpenCode acts as a local gateway to providers you have already configured (Anthropic, OpenAI, Ollama, GitHub Copilot, etc.). Model IDs use `provider/model` format (e.g. `ollama/qwen2.5-coder`). Optional proxy authentication token is stored in SecretStorage if `OPENCODE_LLM_PROXY_TOKEN` is enabled on the proxy. + +**Optional token authentication:** If your opencode-llm-proxy requires a token, the extension stores it in SecretStorage when configured. Most local setups do not require a token. + +**Troubleshooting:** Ensure opencode-llm-proxy is running and reachable. List available models with `GET http://127.0.0.1:4010/v1/models`. + +--- + +### Option G: OpenRouter + +```json +{ + "promptimize.transformationProvider": "openrouter", + "promptimize.openRouterModel": "openai/gpt-4o" +} +``` + +**Setup:** Configure OpenAI for Whisper first. Get an API key from [OpenRouter](https://openrouter.ai/settings/keys). Run **Configure Prompt Optimization Provider**, select OpenRouter, enter your key, and choose a model. + +**Recommended models:** `openai/gpt-4o` (default), `anthropic/claude-3.5-sonnet`, `google/gemini-2.0-flash-001` + +**Pitfalls:** OpenRouter only handles optimization — Whisper still needs OpenAI. Ensure your OpenRouter account has sufficient credits. + +--- + +### Option H: Cursor (SDK) + +```json +{ + "promptimize.transformationProvider": "cursor", + "promptimize.cursorModel": "composer-2.5" +} +``` + +**Setup:** Configure OpenAI for Whisper first. Get a Cursor API key from [Cursor Dashboard → Integrations](https://cursor.com/dashboard/integrations). Run **Configure Prompt Optimization Provider**, select Cursor, enter your key, and choose a model. + +**Recommended models:** `composer-2.5` (default), `composer-2.5-fast`, `claude-4.5-sonnet`, `gpt-5.1`, `gpt-5.2-codex` + +**Notes:** Works in any editor (VSCode, Cursor, VSCodium, etc.). Uses the `@cursor/sdk` package to connect to Cursor's agent API. No Cursor IDE installation required — only a Cursor API key and internet access. + +**Pitfalls:** Cursor only handles optimization — Whisper still needs OpenAI. Ensure your Cursor account has sufficient credits. + +--- + +## Step 3: Verify Configuration + +Run **Promptimize: Test Configuration** + +Expected toast: + +``` +✓ Whisper: Working | ✓ Optimization (Provider): Working +``` + +When optimization is enabled, a **Configuration Test Result** webview opens showing: + +- Original sample transcription (developer ramble about JWT refactor) +- Transformed prompt from your configured provider +- **Improvements** list — heuristic analysis (filler removal, conciseness, structure) + +See [Advanced Settings — Test Configuration](advanced-settings.md#test-configuration-output) for improvement heuristics. + +You can also test inline in the [Configuration Webview](webview-guide.md) using **Test** and **Test optimization** buttons. + +--- + +## Advanced Settings + +| Setting | Default | Applied | Description | +|---------|---------|---------|-------------| +| `transcriptionLanguage` | `auto` | Yes | Whisper language (ISO 639-1 or auto) | +| `transcriptionHint` | `""` | Yes | Optional vocabulary hint for Whisper | +| `transformationSystemPrompt` | (built-in) | Yes | Custom transformation instructions | +| `audioQuality` | `high` | **Planned** | Recording quality — not yet applied (always 16 kHz mono) | +| `maxRecordingDuration` | `120` | **Planned** | Auto-stop — not yet applied | +| `showNotifications` | `true` | **Planned** | Hide toasts — not yet applied | + +Settings marked **Planned** appear in VS Code Settings but do not change runtime behavior yet. + +Full reference: [Advanced Settings](advanced-settings.md) + +--- + +## Common Questions + +### Do I need two OpenAI keys? + +No. One OpenAI key powers Whisper transcription. The same key can power OpenAI optimization. + +### Can I use Anthropic for optimization and OpenAI for transcription? + +Yes. That is the intended design. Whisper always uses OpenAI; optimization uses your chosen provider. + +### Can I disable optimization? + +Yes. Set `enablePromptTransformation` to `false` or choose **transcription only** in the setup wizard. + +--- + +**See also:** [Quick Start](../quickstart.md) · [Configuration Webview Guide](webview-guide.md) · [Provider Selection](provider-selection.md) · [Advanced Settings](advanced-settings.md) diff --git a/docs/configuration/advanced-settings.md b/docs/configuration/advanced-settings.md new file mode 100644 index 0000000..b44cb37 --- /dev/null +++ b/docs/configuration/advanced-settings.md @@ -0,0 +1,154 @@ +# Advanced Settings + +Settings available in VS Code Settings (`Cmd/Ctrl+,` → search **Promptimize**) that are not exposed in the configuration webview, plus details on planned settings and test output. + +--- + +## Transcription + +### `promptimize.transcriptionLanguage` + +| | | +|---|---| +| **Default** | `auto` | +| **Values** | `auto`, `en`, `es`, `fr`, `de`, `it`, `pt`, `ja`, `ko`, `zh` | +| **Applied** | Yes — passed to OpenAI Whisper | +| **Webview** | Not available — configure in Settings | + +When set to `auto`, Whisper auto-detects the spoken language. + +```json +{ + "promptimize.transcriptionLanguage": "en" +} +``` + +--- + +### `promptimize.transcriptionHint` + +| | | +|---|---| +| **Default** | (empty) | +| **Applied** | Yes — passed to Whisper as the `prompt` parameter | +| **Webview** | Not available — configure in Settings | + +Optional hint text to improve Whisper accuracy for domain-specific vocabulary, acronyms, or technical terms. Whisper uses this as context, not as content to transcribe. + +**Example:** Project uses unusual terms or stack-specific jargon: + +```json +{ + "promptimize.transcriptionHint": "Kubernetes, Helm, ArgoCD, TypeScript, NestJS" +} +``` + +**Tips:** + +- List proper nouns, product names, and acronyms you expect to say +- Keep hints concise (Whisper has a prompt length limit) +- Does not replace `transcriptionLanguage` — use both when needed + +--- + +## Transformation + +### `promptimize.transformationSystemPrompt` + +| | | +|---|---| +| **Default** | Built-in prompt engineer template | +| **Applied** | Yes — sent to all optimization providers | +| **Webview** | Editable in configuration panel | + +Customize transformation style, structure, and rules. See [Configuration Webview Guide](webview-guide.md#3-transformation-system-prompt). + +--- + +## Context-Aware Optimization + +When using **Promptimize** mode, the extension passes editor context to the optimization provider: + +| Context | Source | Purpose | +|---------|--------|---------| +| `editorLanguage` | Active editor's `document.languageId` | Tailor prompt terminology (e.g. `typescript`, `python`) | +| `projectType` | Derived from language | Additional framing for the transformer | + +**Requirements:** + +- An editor tab must be active when you stop recording +- If no editor is open, optimization still runs without language context + +This is automatic — no setting required. + +--- + +## Dynamic Model Loading + +Some providers fetch model lists at configuration time: + +| Provider | Model list source | +|----------|-------------------| +| OpenAI | Live API (`listGptModels`) | +| Ollama | `GET /api/tags` on configured base URL | +| OpenCode | `GET /v1/models` on proxy base URL | +| OpenRouter | OpenRouter models API | +| Anthropic | Static curated list | +| Google | Static curated list | +| Azure | Deployment name from settings | +| Cursor | Static curated list | + +Use **Refresh models** in the configuration webview or re-run provider configuration commands after adding new local models. + +--- + +## Planned Settings (Not Yet Applied) + +These settings appear in VS Code Settings and are loaded by the extension, but **do not change runtime behavior yet**: + +| Setting | Default | Planned behavior | +|---------|---------|------------------| +| `promptimize.audioQuality` | `high` | Adjust recording sample rate / encoding | +| `promptimize.maxRecordingDuration` | `120` | Auto-stop recording after N seconds | +| `promptimize.showNotifications` | `true` | Suppress progress toasts when `false` | + +**Current behavior:** + +- Audio is always captured at **16 kHz mono** (optimal for Whisper) +- Recording stops only when you click stop or cancel +- Progress notifications always appear during stop/processing + +Configure these in Settings for forward compatibility; they will take effect in a future release. + +--- + +## Test Configuration Output + +**Promptimize: Test Configuration** validates Whisper and optimization, then opens a **Configuration Test Result** webview when optimization succeeds. + +### Webview contents + +| Section | Description | +|---------|-------------| +| **Status lines** | `✓ Whisper (OpenAI): Working` and `✓ Optimization (Provider): Working` | +| **Cost note** | Reminder that Whisper charges apply; optimization test uses sample text only | +| **Original** | Sample developer ramble sent to the transformer | +| **Transformed** | Optimized prompt from your configured provider | +| **Improvements** | Heuristic list of detected enhancements (see below) | + +### Improvements heuristics + +The improvements list is computed locally (not from the LLM). Possible entries: + +| Improvement | Trigger | +|-------------|---------| +| **Removed filler words** | Original contained fillers (`um`, `uh`, `like`, etc.) and transformed text has fewer | +| **Made more concise** | Transformed text is at least 10% shorter | +| **Added clear structure** | Transformed text includes `Context:`, `Objective:`, or `Requirements:` sections | +| **Improved sentence structure** | Transformed text has more sentences than the original | + +If optimization is disabled, the command tests Whisper only and shows an information toast. + +--- + +**See also:** [Configuration Guide](README.md) · [Configuration Webview Guide](webview-guide.md) diff --git a/docs/configuration/provider-selection.md b/docs/configuration/provider-selection.md new file mode 100644 index 0000000..e969b1d --- /dev/null +++ b/docs/configuration/provider-selection.md @@ -0,0 +1,81 @@ +# Provider Selection Guide + +Choose the right prompt optimization provider for your workflow. **Whisper transcription always uses OpenAI** regardless of this choice. + +--- + +## Quick Decision Tree + +```mermaid +flowchart TD + Start[Need prompt optimization?] -->|No| TranscribeOnly[Disable optimization
Use Transcribe mode only] + Start -->|Yes| Privacy{Privacy priority?} + Privacy -->|Local / offline| Local[Ollama or OpenCode] + Privacy -->|Cloud OK| Existing{Already have API keys?} + Existing -->|OpenAI| OpenAI[OpenAI GPT-4o] + Existing -->|Anthropic| Anthropic[Anthropic Claude] + Existing -->|Google| Google[Google Gemini] + Existing -->|Cursor account| Cursor[Cursor SDK] + Existing -->|Many models / one key| OpenRouter[OpenRouter] + Existing -->|Enterprise Azure| Azure[Azure OpenAI] + Local --> OllamaChoice[Ollama: simple local models] + Local --> OpenCodeChoice[OpenCode: multi-provider proxy] +``` + +--- + +## Provider Comparison + +| Provider | Cost/Transform* | Speed | Privacy | Quality | Best For | +|----------|-----------------|-------|---------|---------|----------| +| OpenAI GPT-4o | ~$0.01 | Fast | Cloud | High | General use; reuse Whisper key | +| Anthropic Claude | ~$0.01–0.02 | Fast | Cloud | Very High | Complex reasoning | +| Google Gemini | ~$0.001 | Very Fast | Cloud | Good | Cost-sensitive usage | +| Azure OpenAI | Varies | Fast | Private Cloud | High | Enterprise deployments | +| Ollama | Free | Medium | Local | Good | Privacy-first, offline | +| OpenCode | Free | Medium | Local | High | Reuse OpenCode multi-provider setup | +| OpenRouter | Varies | Fast | Cloud | High | 200+ models with one API key | +| Cursor | ~$0.01 | Fast | Cloud | High | Cursor Composer and frontier models | + +\*Plus Whisper transcription (~$0.006/min, always OpenAI) + +--- + +## Recommendations by Use Case + +### Default / simplest setup +**OpenAI** — Same API key as Whisper, `gpt-4o` default, fast and reliable. + +### Best quality for complex prompts +**Anthropic Claude 3.5 Sonnet** — Strong reasoning and structured output. + +### Lowest optimization cost +**Google Gemini Flash** — Very fast, low per-request cost. + +### Privacy / no cloud LLM for optimization +**Ollama** — Run `llama3.1:8b` or similar locally. Whisper still sends audio to OpenAI. + +### Already use OpenCode +**OpenCode** — Route through your existing `opencode-llm-proxy` with `provider/model` identifiers. + +### One key, many models +**OpenRouter** — Access OpenAI, Anthropic, Google, and more through a single gateway. + +### Cursor ecosystem +**Cursor SDK** — Use Cursor API key and models (`composer-2.5`, etc.) from any editor. + +### Enterprise compliance +**Azure OpenAI** — Private cloud deployment with your own Azure resource. + +--- + +## Switching Providers + +- API keys are stored per provider (`promptimize.apiKey.{provider}`) +- Switching providers does **not** delete saved keys +- Change provider in the configuration webview or via **Configure Prompt Optimization Provider** +- Run **Test Configuration** after switching + +--- + +**See also:** [Configuration Guide](README.md) · [Configuration Webview Guide](webview-guide.md) diff --git a/docs/configuration/webview-guide.md b/docs/configuration/webview-guide.md new file mode 100644 index 0000000..0fdf0d6 --- /dev/null +++ b/docs/configuration/webview-guide.md @@ -0,0 +1,134 @@ +# Configuration Webview Guide + +The **Promptimize Configuration** panel is the primary way to set up and manage the extension. It opens automatically on first launch and is available anytime via **Promptimize: Open Configuration** or the status bar gear icon. + +--- + +## Opening the Panel + +| Method | Action | +|--------|--------| +| First launch | Welcome notification → **Open Configuration** | +| Status bar | Click $(gear) **Settings** (or $(warning) **Setup** if incomplete) | +| Command Palette | `Promptimize: Open Configuration` or `Promptimize: Setup Wizard` | + +Both **Setup Wizard** and **Open Configuration** open the same webview panel. + +--- + +## Panel Sections + +### 1. Transcription (Required) + +Configure OpenAI Whisper for voice-to-text. + +| Control | Purpose | +|---------|---------| +| **OpenAI API Key** | Password field for your Whisper API key (`sk-...`) | +| **Test** | Verifies the key against the OpenAI API | +| **Badge** | Shows configured / not configured status | + +Keys are stored in VSCode SecretStorage (Keychain / Credential Manager). Saved keys appear masked (e.g. `sk-abc...xyz9`). + +--- + +### 2. Prompt Optimization (Optional) + +Convert transcribed speech into structured prompts using your chosen provider. + +| Control | Purpose | +|---------|---------| +| **Enable prompt optimization** | Toggle optimization on/off | +| **Optimization provider** | Dropdown: OpenAI, Anthropic, Google, Azure, Ollama, OpenCode, OpenRouter, Cursor | +| **Provider API Key** | Shown when the selected provider requires credentials | +| **Azure / Ollama / OpenCode fields** | Provider-specific endpoint and deployment settings | +| **Model** | Model dropdown with **Refresh models** for dynamic lists | +| **Test optimization** | Runs a sample transformation against your provider | + +**Dynamic model loading:** OpenAI, Ollama, OpenCode, and OpenRouter fetch live model lists. Anthropic, Google, and Cursor use curated static lists. + +--- + +### 3. Transformation System Prompt + +Customize how the AI rewrites your transcriptions. + +| Control | Purpose | +|---------|---------| +| **System Prompt** | Multiline editor for the transformation instructions | +| **Reset to default** | Restores the built-in prompt engineer template | +| **Save prompt** | Persists to `promptimize.transformationSystemPrompt` | + +Changes apply to all optimization providers. + +--- + +### 4. Provider Comparison + +Expandable table comparing all eight providers by cost, speed, privacy, and best use case. Helps choose a provider without leaving the panel. + +--- + +### Footer Actions + +| Button | Purpose | +|--------|---------| +| **Save & Close** | Validates Whisper key (+ provider if optimization enabled), marks setup complete, closes panel | +| **Open documentation** | Opens the GitHub docs in your browser | + +--- + +## Configuration UI vs Command Palette + +| Task | Webview | Command Palette | +|------|---------|-----------------| +| Set OpenAI key | ✅ Password field + test | `Configure OpenAI API Key (Whisper)` | +| Choose provider | ✅ Dropdown + comparison table | `Configure Prompt Optimization Provider` (step wizard) | +| Pick OpenAI model | ✅ Dynamic dropdown | `Configure OpenAI Optimization Model` | +| Edit system prompt | ✅ Full editor + reset | VS Code Settings only | +| Test Whisper | ✅ Inline test button | Part of `Test Configuration` | +| Test optimization | ✅ Inline test button | `Test Configuration` (opens results webview) | +| Transcription language | ❌ Use VS Code Settings | Settings → `promptimize.transcriptionLanguage` | +| Transcription hint | ❌ Use VS Code Settings | Settings → `promptimize.transcriptionHint` | +| Advanced settings (planned) | ❌ Use VS Code Settings | See [Advanced Settings](advanced-settings.md) | + +--- + +## Inline Feedback + +The webview provides real-time validation without reloading: + +- **Badges** — Whisper and optimization configuration status +- **Status text** — Contextual hints below each section +- **Notifications** — Success/error banners at the top after save or test +- **Test results** — Whisper and optimization test pass/fail inline + +--- + +## Complete Setup Flow + +1. Enter OpenAI API key → click **Test** (optional) +2. Enable optimization if desired → select provider → enter credentials +3. Choose model → click **Test optimization** (optional) +4. Customize system prompt if needed (optional) +5. Click **Save & Close** + +If Whisper is missing or optimization is enabled but the provider is incomplete, **Save & Close** shows an error and keeps the panel open. + +--- + +## Test Configuration Command (Full Results) + +For a detailed before/after comparison, run **Promptimize: Test Configuration** from the Command Palette. This opens a separate webview panel showing: + +- Whisper connection status +- Optimization provider status +- Original sample transcription +- Transformed prompt +- **Improvements** list (heuristic analysis) + +See [Advanced Settings — Test Configuration](advanced-settings.md#test-configuration-output) for details on the improvements list. + +--- + +**See also:** [Configuration Guide](README.md) · [Provider Selection](provider-selection.md) · [Advanced Settings](advanced-settings.md) diff --git a/docs/deployment/release-process.md b/docs/deployment/release-process.md index 8f93ff0..6932391 100644 --- a/docs/deployment/release-process.md +++ b/docs/deployment/release-process.md @@ -1,239 +1,34 @@ # Deployment and Release Process -**Last Updated**: 2026-05-23 +**Last Updated**: 2026-05-24 --- -## Build Process - -### Local Build +## Local Build ```bash -# Install dependencies -pnpm install - -# Run linter +source scripts/ensure-node.sh && pnpm install pnpm run lint - -# Run tests pnpm test - -# Build extension pnpm run compile - -# Package VSIX pnpm run package +pnpm run package:verify # confirm all platform native binaries in VSIX ``` -### Build Scripts - -```json -// package.json -{ - "scripts": { - "compile": "webpack --mode production", - "compile:dev": "webpack --mode development --watch", - "lint": "eslint src --ext ts", - "lint:fix": "eslint src --ext ts --fix", - "test": "jest --coverage", - "test:watch": "jest --watch", - "package": "vsce package", - "publish": "vsce publish", - "publish:ovsx": "ovsx publish" - } -} -``` - ---- - -## Package Configuration - -### Extension Manifest - -```json -// package.json (extension fields) -{ - "name": "cursor-whisper", - "displayName": "Cursor Whisper - Voice to Optimized Prompt", - "description": "Transform voice into optimized prompts using OpenAI Whisper and GPT-4", - "version": "0.1.0", - "publisher": "cursor-whisper", - "icon": "assets/icon.png", - "repository": { - "type": "git", - "url": "https://github.com/vypdev/cursor-whisper" - }, - "engines": { - "vscode": "^1.120.0" - }, - "categories": [ - "Other", - "Machine Learning" - ], - "keywords": [ - "voice", - "speech-to-text", - "whisper", - "ai", - "cursor", - "productivity" - ], - "activationEvents": [ - "onStartupFinished" - ], - "main": "./out/extension.js", - "contributes": { - "commands": [ - { - "command": "cursor-whisper.startRecording", - "title": "Cursor Whisper: Start Recording" - }, - { - "command": "cursor-whisper.stopRecording", - "title": "Cursor Whisper: Stop Recording" - }, - { - "command": "cursor-whisper.configureApiKey", - "title": "Cursor Whisper: Configure API Key" - } - ], - "keybindings": [ - { - "command": "cursor-whisper.startRecording", - "key": "ctrl+alt+v", - "mac": "cmd+alt+v" - } - ], - "configuration": { - "title": "Cursor Whisper", - "properties": { - "cursorWhisper.transcriptionLanguage": { - "type": "string", - "default": "auto", - "description": "Language for transcription (ISO 639-1 code or 'auto')" - }, - "cursorWhisper.enablePromptTransformation": { - "type": "boolean", - "default": true, - "description": "Enable AI-powered prompt transformation" - }, - "cursorWhisper.audioQuality": { - "type": "string", - "enum": ["low", "medium", "high"], - "default": "high", - "description": "Audio recording quality" - }, - "cursorWhisper.maxRecordingDuration": { - "type": "number", - "default": 120, - "description": "Maximum recording duration in seconds" - } - } - } - } -} -``` +Scripts and manifest fields are defined in [`package.json`](../../package.json). Webpack config: [`webpack.config.js`](../../webpack.config.js). --- -## Webpack Configuration +## CI/CD -```javascript -// webpack.config.js -const path = require('path'); +Release workflow: [`.github/workflows/release_workflow.yml`](../../.github/workflows/release_workflow.yml) -module.exports = { - target: 'node', - entry: './src/extension.ts', - output: { - path: path.resolve(__dirname, 'out'), - filename: 'extension.js', - libraryTarget: 'commonjs2' - }, - externals: { - vscode: 'commonjs vscode' - }, - resolve: { - extensions: ['.ts', '.js'] - }, - module: { - rules: [ - { - test: /\.ts$/, - exclude: /node_modules/, - use: 'ts-loader' - } - ] - }, - mode: 'production', - devtool: 'source-map' -}; -``` - ---- - -## CI/CD Pipeline - -### GitHub Actions - -```yaml -# .github/workflows/release.yml -name: Release - -on: - push: - tags: - - 'v*' - -jobs: - build: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - - name: Setup pnpm - uses: pnpm/action-setup@v4 - with: - version: 9 - - - name: Setup Node.js - uses: actions/setup-node@v3 - with: - node-version: 22 - cache: pnpm - - - name: Install dependencies - run: pnpm install --frozen-lockfile - - - name: Lint - run: pnpm run lint - - - name: Test - run: pnpm test - - - name: Build - run: pnpm run compile - - - name: Package - run: pnpm run package - - - name: Publish to VSCode Marketplace - run: pnpm run publish - env: - VSCE_PAT: ${{ secrets.VSCE_PAT }} - - - name: Publish to Open VSX - run: pnpm run publish:ovsx - env: - OVSX_PAT: ${{ secrets.OVSX_PAT }} - - - name: Create GitHub Release - uses: softprops/action-gh-release@v1 - with: - files: '*.vsix' - body_path: CHANGELOG.md -``` +Typical pipeline on version tag: +1. Install dependencies (Node 22, pnpm) +2. Lint and test +3. Compile and package VSIX +4. Publish to VSCode Marketplace / Open VSX (when configured) +5. Attach VSIX to GitHub Release --- @@ -241,192 +36,59 @@ jobs: ### Pre-Release -- [ ] All tests passing -- [ ] Coverage >80% -- [ ] No linter errors -- [ ] No TypeScript errors -- [ ] Manual testing complete +- [ ] All tests passing (`pnpm test`) +- [ ] No linter errors (`pnpm run lint`) +- [ ] Manual smoke tests complete (see [Testing Strategy](../testing/strategy.md)) - [ ] Cross-platform tested (macOS, Windows, Linux) - [ ] Documentation updated -- [ ] CHANGELOG.md updated - [ ] Version bumped in `package.json` - [ ] Git tag created ### Release -1. **Version Bump**: - ```bash - pnpm version patch # 0.1.0 -> 0.1.1 - pnpm version minor # 0.1.1 -> 0.2.0 - pnpm version major # 0.2.0 -> 1.0.0 - ``` - -2. **Create Tag**: - ```bash - git tag v0.1.0 - git push origin v0.1.0 - ``` - -3. **Build & Test**: - ```bash - pnpm run compile - pnpm test - pnpm run package - ``` - -4. **Test VSIX Locally**: - ```bash - code --install-extension cursor-whisper-0.1.0.vsix - ``` - -5. **Publish**: - ```bash - # VSCode Marketplace - vsce publish - - # Open VSX - ovsx publish cursor-whisper-0.1.0.vsix - ``` - -6. **Create GitHub Release**: - - Go to GitHub Releases - - Create new release from tag - - Copy CHANGELOG content - - Attach .vsix file +1. Bump version: `pnpm version patch|minor|major` +2. Push tag: `git push origin vX.Y.Z` +3. Verify CI produces VSIX +4. Test VSIX locally: `code --install-extension promptimize-X.Y.Z.vsix` +5. Confirm marketplace listing (if published) ### Post-Release -- [ ] Verify extension appears on VSCode Marketplace -- [ ] Verify extension appears on Open VSX -- [ ] Test installation from marketplace -- [ ] Update documentation site (if any) -- [ ] Announce on social media -- [ ] Monitor for issues +- [ ] Verify installation from marketplace or GitHub Releases +- [ ] Monitor GitHub Issues for regressions --- ## Version Management -### Semantic Versioning - -Format: `MAJOR.MINOR.PATCH` +Semantic versioning: `MAJOR.MINOR.PATCH` - **MAJOR**: Breaking changes - **MINOR**: New features (backward compatible) -- **PATCH**: Bug fixes (backward compatible) - -**Examples**: -- `0.1.0` → `0.1.1`: Bug fix -- `0.1.1` → `0.2.0`: New feature (prompt transformation) -- `0.2.0` → `1.0.0`: First stable release +- **PATCH**: Bug fixes --- ## Distribution -### Channels - -1. **VSCode Marketplace**: Primary distribution -2. **Open VSX**: For VSCodium and other forks -3. **GitHub Releases**: Direct VSIX download - -### Installation Methods - -**From Marketplace**: -``` -1. Open VSCode/Cursor -2. Extensions → Search "Cursor Whisper" -3. Click Install -``` - -**From VSIX**: -```bash -code --install-extension cursor-whisper-0.1.0.vsix -``` - -**From Source**: -```bash -git clone https://github.com/vypdev/cursor-whisper -cd extension -pnpm install -pnpm run compile -code --extensionDevelopmentPath=$PWD -``` - ---- - -## Rollback Procedure - -### If Critical Bug Found - -1. **Unpublish broken version** (if possible): - ```bash - vsce unpublish cursor-whisper@0.1.1 - ``` - -2. **Fix bug**: - ```bash - # Create hotfix branch - git checkout -b hotfix/critical-bug +| Channel | Purpose | +|---------|---------| +| VSCode Marketplace | Primary (when available) | +| Open VSX | VSCodium and forks | +| GitHub Releases | Direct VSIX download | - # Fix issue - # ... - - # Test thoroughly - pnpm test - - # Merge and release patch - git checkout main - git merge hotfix/critical-bug - pnpm version patch - git push --tags - ``` - -3. **Communicate**: - - Update GitHub issue - - Add comment to marketplace - - Notify users via release notes +**Install from VSIX:** Extensions → `...` → Install from VSIX, or `code --install-extension promptimize-X.Y.Z.vsix` --- -## Monitoring - -### Post-Release Metrics - -Track: -- Download count -- Active installations -- Ratings and reviews -- GitHub issues opened -- Error reports (if telemetry added) - -### Health Checks - -- Extension loads without errors -- Commands are registered -- Configuration works -- API calls succeed -- Cross-platform compatibility - ---- - -## Summary - -**Release Flow**: -1. ✅ Code complete & tested -2. ✅ Version bumped & tagged -3. ✅ Built & packaged -4. ✅ Published to marketplaces -5. ✅ GitHub release created -6. ✅ Announced & monitored +## Rollback -**Distribution**: -- VSCode Marketplace (primary) -- Open VSX (secondary) -- GitHub Releases (direct download) +If a critical bug ships: -**Versioning**: Semantic (MAJOR.MINOR.PATCH) +1. Unpublish broken version if possible (`vsce unpublish promptimize@X.Y.Z`) +2. Fix on a hotfix branch, test, release patch version +3. Communicate via GitHub Release notes and Issues --- -**Next**: See [Roadmap](../roadmap/versions.md) for planned releases. +**Related:** [Testing Strategy](../testing/strategy.md) · [Project Progress](../../PROGRESS.md) diff --git a/docs/domain/README.md b/docs/domain/README.md deleted file mode 100644 index 4fb7cb3..0000000 --- a/docs/domain/README.md +++ /dev/null @@ -1,700 +0,0 @@ -# Domain Layer Documentation - -**Last Updated**: 2026-05-23 - ---- - -## Overview - -The Domain Layer contains the core business logic of Cursor Whisper. It is the heart of the application and has ZERO external dependencies. - -**Location**: `src/domain/` - -**Principles**: -- Pure business logic -- No framework dependencies -- No I/O operations -- Fully unit testable -- Framework-agnostic - ---- - -## Structure - -``` -src/domain/ -├── entities/ # Core business objects -│ ├── Recording.ts -│ ├── Transcription.ts -│ └── Prompt.ts -│ -├── value-objects/ # Immutable values -│ ├── AudioData.ts -│ ├── AudioFormat.ts -│ ├── RecordingState.ts -│ └── ApiKey.ts -│ -├── errors/ # Domain exceptions -│ ├── RecordingError.ts -│ ├── TranscriptionError.ts -│ ├── ValidationError.ts -│ └── ConfigError.ts -│ -└── services/ # Domain services (optional, not yet implemented) -``` - -> **Note**: Audio validation is implemented in `AudioData`, `Recording`, and `TranscribeAudioUseCase` rather than a separate `AudioValidator` service. - ---- - -## Entities - -### Recording Entity - -**Purpose**: Represents a single audio recording session. - -**File**: `src/domain/entities/Recording.ts` - -```typescript -import { AudioData } from '../value-objects/AudioData'; -import { RecordingState } from '../value-objects/RecordingState'; -import { InvalidRecordingError } from '../errors/RecordingError'; - -export class Recording { - private state: RecordingState; - - constructor( - public readonly id: string, - public readonly audioData: AudioData, - public readonly timestamp: Date, - public readonly duration: number - ) { - this.state = RecordingState.COMPLETED; - this.validate(); - } - - private validate(): void { - if (this.duration <= 0) { - throw new InvalidRecordingError('Duration must be positive'); - } - - if (this.duration > 300) { - throw new InvalidRecordingError('Duration exceeds maximum (5 minutes)'); - } - - if (this.audioData.buffer.length === 0) { - throw new InvalidRecordingError('Audio data is empty'); - } - - const calculatedDuration = this.audioData.getDurationInSeconds(); - const durationDiff = Math.abs(calculatedDuration - this.duration); - - if (durationDiff > 1) { - throw new InvalidRecordingError( - `Duration mismatch: recorded ${this.duration}s, actual ${calculatedDuration}s` - ); - } - } - - isLongRecording(): boolean { - return this.duration > 60; - } - - isShortRecording(): boolean { - return this.duration < 3; - } - - getFileSizeInMB(): number { - return this.audioData.getSizeInBytes() / (1024 * 1024); - } - - exceedsSizeLimit(limitMB: number = 25): boolean { - return this.getFileSizeInMB() > limitMB; - } - - getState(): RecordingState { - return this.state; - } - - setState(newState: RecordingState): void { - this.state = newState; - } -} -``` - -**Business Rules**: -- Duration must be positive -- Duration cannot exceed 5 minutes (300s) -- Audio data cannot be empty -- Calculated duration must match recorded duration (within 1s tolerance) - -### Transcription Entity - -**Purpose**: Represents the result of transcribing audio to text. - -**File**: `src/domain/entities/Transcription.ts` - -```typescript -export class Transcription { - constructor( - public readonly id: string, - public readonly recordingId: string, - public readonly text: string, - public readonly language: string, - public readonly confidence: number | undefined, - public readonly timestamp: Date - ) { - this.validate(); - } - - private validate(): void { - if (!this.text || this.text.trim().length === 0) { - throw new TranscriptionError('Transcription text cannot be empty'); - } - - if (this.text.length > 100000) { - throw new TranscriptionError('Transcription text too long'); - } - - if (this.confidence !== undefined) { - if (this.confidence < 0 || this.confidence > 1) { - throw new TranscriptionError('Confidence must be between 0 and 1'); - } - } - } - - hasLowConfidence(): boolean { - return this.confidence !== undefined && this.confidence < 0.7; - } - - getWordCount(): number { - return this.text.trim().split(/\s+/).length; - } - - getCharacterCount(): number { - return this.text.length; - } - - isEmpty(): boolean { - return this.text.trim().length === 0; - } -} -``` - -### Prompt Entity - -**Purpose**: Represents a transformed prompt ready for insertion. - -**File**: `src/domain/entities/Prompt.ts` - -```typescript -export class Prompt { - constructor( - public readonly id: string, - public readonly transcriptionId: string, - public readonly originalText: string, - public readonly transformedText: string, - public readonly improvements: string[], - public readonly timestamp: Date - ) { - this.validate(); - } - - private validate(): void { - if (!this.originalText || this.originalText.trim().length === 0) { - throw new ValidationError('Original text cannot be empty'); - } - - if (!this.transformedText || this.transformedText.trim().length === 0) { - throw new ValidationError('Transformed text cannot be empty'); - } - } - - wasTransformed(): boolean { - return this.originalText !== this.transformedText; - } - - getCompressionRatio(): number { - return this.transformedText.length / this.originalText.length; - } - - hasImprovements(): boolean { - return this.improvements.length > 0; - } - - getSummary(): string { - return `Prompt (${this.transformedText.length} chars, ${this.improvements.length} improvements)`; - } -} -``` - ---- - -## Value Objects - -### AudioData Value Object - -**Purpose**: Immutable representation of audio binary data. - -**File**: `src/domain/value-objects/AudioData.ts` - -```typescript -import { AudioFormat } from './AudioFormat'; - -export class AudioData { - constructor( - public readonly buffer: Buffer, - public readonly format: AudioFormat, - public readonly sampleRate: number, - public readonly channels: number - ) { - this.validate(); - } - - private validate(): void { - if (buffer.length === 0) { - throw new ValidationError('Audio buffer cannot be empty'); - } - - if (sampleRate <= 0) { - throw new ValidationError('Sample rate must be positive'); - } - - if (channels < 1 || channels > 2) { - throw new ValidationError('Channels must be 1 (mono) or 2 (stereo)'); - } - } - - getSizeInBytes(): number { - return this.buffer.length; - } - - getSizeInKB(): number { - return this.getSizeInBytes() / 1024; - } - - getSizeInMB(): number { - return this.getSizeInKB() / 1024; - } - - getDurationInSeconds(bitDepth: number = 16): number { - const bytesPerSample = bitDepth / 8; - const samplesCount = this.buffer.length / (bytesPerSample * this.channels); - return samplesCount / this.sampleRate; - } - - isMono(): boolean { - return this.channels === 1; - } - - isStereo(): boolean { - return this.channels === 2; - } -} -``` - -### AudioFormat Enum - -**File**: `src/domain/value-objects/AudioFormat.ts` - -```typescript -export enum AudioFormat { - WAV = 'wav', - MP3 = 'mp3', - WEBM = 'webm', - OGG = 'ogg', - M4A = 'm4a' -} - -export function getAudioFormatFromMimeType(mimeType: string): AudioFormat { - if (mimeType.includes('wav')) return AudioFormat.WAV; - if (mimeType.includes('mp3') || mimeType.includes('mpeg')) return AudioFormat.MP3; - if (mimeType.includes('webm')) return AudioFormat.WEBM; - if (mimeType.includes('ogg')) return AudioFormat.OGG; - if (mimeType.includes('m4a')) return AudioFormat.M4A; - - throw new ValidationError(`Unsupported MIME type: ${mimeType}`); -} - -export function getMimeTypeFromFormat(format: AudioFormat): string { - switch (format) { - case AudioFormat.WAV: - return 'audio/wav'; - case AudioFormat.MP3: - return 'audio/mpeg'; - case AudioFormat.WEBM: - return 'audio/webm'; - case AudioFormat.OGG: - return 'audio/ogg'; - case AudioFormat.M4A: - return 'audio/mp4'; - } -} -``` - -### RecordingState Enum - -**File**: `src/domain/value-objects/RecordingState.ts` - -```typescript -export enum RecordingState { - IDLE = 'idle', - RECORDING = 'recording', - PROCESSING = 'processing', - TRANSCRIBING = 'transcribing', - TRANSFORMING = 'transforming', - INSERTING = 'inserting', - COMPLETED = 'completed', - ERROR = 'error', - CANCELLED = 'cancelled' -} - -export function isActiveState(state: RecordingState): boolean { - return state === RecordingState.RECORDING || - state === RecordingState.PROCESSING || - state === RecordingState.TRANSCRIBING || - state === RecordingState.TRANSFORMING || - state === RecordingState.INSERTING; -} - -export function isTerminalState(state: RecordingState): boolean { - return state === RecordingState.COMPLETED || - state === RecordingState.ERROR || - state === RecordingState.CANCELLED; -} -``` - -### ApiKey Value Object - -**File**: `src/domain/value-objects/ApiKey.ts` - -```typescript -export class ApiKey { - private readonly value: string; - - constructor(key: string) { - this.validate(key); - this.value = key; - } - - private validate(key: string): void { - if (!key || key.trim().length === 0) { - throw new ValidationError('API key cannot be empty'); - } - - if (!key.startsWith('sk-')) { - throw new ValidationError('API key must start with sk-'); - } - - if (key.length < 20) { - throw new ValidationError('API key seems too short'); - } - } - - toString(): string { - return this.value; - } - - getMasked(): string { - return `${this.value.substring(0, 7)}...${this.value.substring(this.value.length - 4)}`; - } - - equals(other: ApiKey): boolean { - return this.value === other.value; - } -} -``` - ---- - -## Domain Errors - -### RecordingError - -**File**: `src/domain/errors/RecordingError.ts` - -```typescript -export class RecordingError extends Error { - constructor( - message: string, - public readonly cause?: Error - ) { - super(message); - this.name = 'RecordingError'; - - if (cause) { - this.stack = `${this.stack}\nCaused by: ${cause.stack}`; - } - } -} - -export class InvalidRecordingError extends RecordingError { - constructor(message: string) { - super(message); - this.name = 'InvalidRecordingError'; - } -} - -export class RecordingTimeoutError extends RecordingError { - constructor(timeoutSeconds: number) { - super(`Recording timeout after ${timeoutSeconds} seconds`); - this.name = 'RecordingTimeoutError'; - } -} -``` - -### TranscriptionError - -**File**: `src/domain/errors/TranscriptionError.ts` - -```typescript -export class TranscriptionError extends Error { - constructor( - message: string, - public readonly statusCode?: number, - public readonly cause?: Error - ) { - super(message); - this.name = 'TranscriptionError'; - } -} - -export class TranscriptionTimeoutError extends TranscriptionError { - constructor() { - super('Transcription request timed out'); - this.name = 'TranscriptionTimeoutError'; - } -} - -export class AudioTooLargeError extends TranscriptionError { - constructor(sizeInMB: number) { - super(`Audio file too large: ${sizeInMB.toFixed(2)}MB (max 25MB)`); - this.name = 'AudioTooLargeError'; - } -} -``` - -### ValidationError - -**File**: `src/domain/errors/ValidationError.ts` - -```typescript -export class ValidationError extends Error { - constructor( - message: string, - public readonly field?: string - ) { - super(message); - this.name = 'ValidationError'; - } -} -``` - -### ConfigError - -**File**: `src/domain/errors/ConfigError.ts` - -```typescript -export class ConfigError extends Error { - constructor(message: string) { - super(message); - this.name = 'ConfigError'; - } -} - -export class MissingApiKeyError extends ConfigError { - constructor() { - super('OpenAI API Key not configured'); - this.name = 'MissingApiKeyError'; - } -} - -export class InvalidConfigError extends ConfigError { - constructor(field: string, reason: string) { - super(`Invalid configuration for ${field}: ${reason}`); - this.name = 'InvalidConfigError'; - } -} -``` - ---- - -## Domain Services - -> **Planned, not implemented**: A dedicated `AudioValidator` domain service was documented during design but validation is currently handled by: -> - `AudioData` value object (buffer size, format) -> - `Recording` entity (duration and size limits) -> - `TranscribeAudioUseCase` and `OpenAIWhisperService.validateAudioFile()` (Whisper API constraints) - -See [`src/domain/value-objects/AudioData.ts`](../../src/domain/value-objects/AudioData.ts) and [`src/application/use-cases/TranscribeAudioUseCase.ts`](../../src/application/use-cases/TranscribeAudioUseCase.ts). - ---- - -## Testing Domain Logic - -### Example Domain Tests - -```typescript -// __tests__/domain/entities/Recording.test.ts -describe('Recording Entity', () => { - describe('construction', () => { - it('should create valid recording', () => { - const audioData = createMockAudioData(); - const recording = new Recording( - 'rec-123', - audioData, - new Date(), - 5.2 - ); - - expect(recording.id).toBe('rec-123'); - expect(recording.duration).toBe(5.2); - }); - - it('should throw error for negative duration', () => { - expect(() => { - new Recording('rec-123', audioData, new Date(), -5); - }).toThrow(InvalidRecordingError); - }); - - it('should throw error for excessive duration', () => { - expect(() => { - new Recording('rec-123', audioData, new Date(), 400); - }).toThrow(InvalidRecordingError); - }); - - it('should throw error for empty audio', () => { - const emptyAudio = new AudioData( - Buffer.alloc(0), - AudioFormat.WAV, - 16000, - 1 - ); - - expect(() => { - new Recording('rec-123', emptyAudio, new Date(), 5); - }).toThrow(InvalidRecordingError); - }); - }); - - describe('business logic', () => { - it('should identify long recordings', () => { - const recording = new Recording('id', audioData, new Date(), 90); - expect(recording.isLongRecording()).toBe(true); - }); - - it('should identify short recordings', () => { - const recording = new Recording('id', audioData, new Date(), 2); - expect(recording.isShortRecording()).toBe(true); - }); - - it('should calculate file size correctly', () => { - const audioData = createMockAudioData(1024 * 1024); // 1MB - const recording = new Recording('id', audioData, new Date(), 5); - - expect(recording.getFileSizeInMB()).toBeCloseTo(1.0, 2); - }); - - it('should detect size limit exceeded', () => { - const largeAudio = createMockAudioData(30 * 1024 * 1024); // 30MB - const recording = new Recording('id', largeAudio, new Date(), 5); - - expect(recording.exceedsSizeLimit()).toBe(true); - }); - }); -}); -``` - ---- - -## Key Principles - -### 1. No External Dependencies - -Domain entities should NEVER import from: -- `vscode` -- `react` -- `openai` -- Any infrastructure code -- Any framework - -### 2. Immutability Where Possible - -Value objects are immutable: -```typescript -// ✅ GOOD - Immutable -export class AudioData { - constructor( - public readonly buffer: Buffer, - public readonly format: AudioFormat - ) {} -} - -// ❌ BAD - Mutable -export class AudioData { - public buffer: Buffer; - public format: AudioFormat; - - setBuffer(buffer: Buffer) { - this.buffer = buffer; - } -} -``` - -### 3. Business Rules in Domain - -```typescript -// ✅ GOOD - Business rule in entity -export class Recording { - isLongRecording(): boolean { - return this.duration > 60; // Business rule - } -} - -// ❌ BAD - Business rule in use case -export class SomeUseCase { - execute(recording: Recording) { - if (recording.duration > 60) { // Business rule leaked! - // ... - } - } -} -``` - -### 4. Rich Domain Model - -```typescript -// ✅ GOOD - Rich with behavior -export class Transcription { - getWordCount(): number { - return this.text.trim().split(/\s+/).length; - } - - hasLowConfidence(): boolean { - return this.confidence !== undefined && this.confidence < 0.7; - } -} - -// ❌ BAD - Anemic domain model -export interface Transcription { - text: string; - confidence: number; -} -``` - ---- - -## Summary - -The Domain Layer: -- ✅ Contains pure business logic -- ✅ Has zero external dependencies -- ✅ Is fully unit testable -- ✅ Defines core entities and value objects -- ✅ Encapsulates business rules -- ✅ Provides clear, type-safe interfaces -- ✅ Is the foundation of the application - ---- - -**Next**: See [Application Layer](../application/ports.md) for use cases and ports. diff --git a/docs/flows/complete-flow.md b/docs/flows/complete-flow.md index 420ce43..9938e8d 100644 --- a/docs/flows/complete-flow.md +++ b/docs/flows/complete-flow.md @@ -6,7 +6,7 @@ ## Overview -This document details all workflows in Cursor Whisper with sequence diagrams and step-by-step explanations. +This document details all workflows in Promptimize with sequence diagrams and step-by-step explanations. --- @@ -263,6 +263,17 @@ sequenceDiagram ## 3. Alternative Flows +### Transcribe vs Promptimize + +Two recording modes share the same audio capture and Whisper transcription but differ after transcription: + +| Mode | Start | Stop pipeline | +|------|-------|---------------| +| **Transcribe** | `Cmd/Ctrl+Alt+V` or status bar | Stop → Whisper → insert **raw** text | +| **Promptimize** | `Cmd/Ctrl+Alt+P` or status bar | Stop → Whisper → transform → insert **optimized** text | + +See [Recording Modes](../user-guide/recording-modes.md). + ### Skip Transformation (Direct Transcription) User can disable prompt transformation in settings: @@ -343,7 +354,7 @@ stateDiagram-v2 ```mermaid flowchart TB - subgraph Extension["Cursor Whisper Extension"] + subgraph Extension["Promptimize Extension"] UC[Use Cases] Adapters[Infrastructure Adapters] end diff --git a/docs/quickstart.md b/docs/quickstart.md new file mode 100644 index 0000000..34e2c41 --- /dev/null +++ b/docs/quickstart.md @@ -0,0 +1,177 @@ +# Quick Start Guide + +Get Promptimize running in a few minutes. + +--- + +## What Promptimize Does + +Promptimize has **two separate services** and **two recording modes**: + +1. **Voice-to-text (required)** — Always uses **OpenAI Whisper**. Requires an **OpenAI API key**. +2. **Prompt optimization (optional)** — Converts transcribed speech into structured prompts. Choose from 8 providers. + +| Mode | Shortcut | Pipeline | +|------|----------|----------| +| **Transcribe** | `Cmd/Ctrl+Alt+V` | Record → Whisper → insert raw text | +| **Promptimize** | `Cmd/Ctrl+Alt+P` | Record → Whisper → optimize → insert | + +See [Recording Modes](user-guide/recording-modes.md) for when to use each mode. + +```mermaid +graph LR + Voice[Your Voice] --> Whisper[OpenAI Whisper
Transcription] + Whisper --> RawText[Raw Text] + RawText --> Choice{Mode?} + Choice -->|Transcribe| Editor[Insert raw text] + Choice -->|Promptimize| Provider[Optimization Provider] + Provider --> OptimizedText[Optimized Prompt] + OptimizedText --> Editor +``` + +--- + +## Installation + +### From VSIX (current) + +1. Download the latest `.vsix` from [Releases](https://github.com/vypdev/cursor-whisper/releases) +2. Open VSCode or Cursor +3. Extensions → `...` menu → **Install from VSIX...** +4. Select the downloaded file +5. Reload the window if prompted + +### From Marketplace (coming soon) + +Search for **Promptimize** in the Extensions view. + +--- + +## First-Time Setup + +On first launch, Promptimize prompts you to open the **Configuration panel**. You can also open it anytime: + +**Command Palette** → `Promptimize: Open Configuration` or click $(gear) **Settings** in the status bar. + +### Configuration panel overview + +The panel is a single webview (not a multi-step wizard) with these sections: + +1. **Transcription** — Enter OpenAI API key, test connection +2. **Prompt Optimization** — Enable/disable, choose provider, enter credentials, pick model +3. **System Prompt** — Customize transformation style (optional) +4. **Provider Comparison** — Compare all 8 providers +5. **Save & Close** — Validates and completes setup + +Full details: [Configuration Webview Guide](configuration/webview-guide.md) + +### Minimum configuration (transcription only) + +1. Open the configuration panel +2. Enter your OpenAI API key → **Test** (optional) +3. Leave **Enable prompt optimization** unchecked +4. Click **Save & Close** + +--- + +## First Recording + +### Transcribe (raw text) + +1. Open an editor or Cursor chat input +2. Press `Cmd+Alt+V` (macOS) or `Ctrl+Alt+V` (Windows/Linux) +3. Speak clearly +4. Click **Recording...** in the status bar to stop +5. Raw transcription appears in your editor or chat + +### Promptimize (optimized prompt) + +1. Ensure optimization is enabled in the configuration panel +2. Press `Cmd+Alt+P` (macOS) or `Ctrl+Alt+P` (Windows/Linux) +3. Speak clearly +4. Click **Recording...** in the status bar to stop +5. Optimized prompt appears after Whisper + transformation + +**Note:** Keyboard shortcuts start recording only. Stop via the status bar or stop commands. + +--- + +## Configuration Commands + +| Command | Purpose | +|---------|---------| +| `Promptimize: Open Configuration` | Configuration webview (primary) | +| `Promptimize: Setup Wizard` | Same as Open Configuration | +| `Promptimize: Configure OpenAI API Key (Whisper)` | Set or update OpenAI key | +| `Promptimize: Configure Prompt Optimization Provider` | Command Palette provider wizard | +| `Promptimize: Configure OpenAI Optimization Model` | Pick GPT model (OpenAI provider) | +| `Promptimize: Test Configuration` | Test setup; opens before/after webview | + +--- + +## Status Bar + +Three items in the status bar (right side): + +| Indicator | Meaning | +|-----------|---------| +| $(mic) **Transcribe** | Start/stop raw transcription mode | +| $(sparkle) **Promptimize** | Start/stop optimized prompt mode | +| $(gear) **Settings** / $(warning) **Setup** | Open configuration panel | + +While recording, the active mode shows **$(record) Recording...** (click to stop). + +During processing, notifications show: Transcribing... → Optimizing... → Inserting... + +Tooltip when idle: `Transcription: OpenAI Whisper | Optimization: [Provider]` + +--- + +## Test Configuration + +Run **Promptimize: Test Configuration** to validate your setup. When optimization is enabled, a webview opens showing: + +- Original sample transcription +- Transformed prompt from your provider +- **Improvements** list (filler removal, conciseness, structure) + +See [Advanced Settings — Test Configuration](configuration/advanced-settings.md#test-configuration-output). + +--- + +## Troubleshooting + +See the full [Troubleshooting Guide](user-guide/troubleshooting.md) with decision trees. + +### OpenAI API key errors + +- Confirm the key starts with `sk-` +- Check credits at https://platform.openai.com/account/billing +- Run **Test** in the configuration panel or **Test Configuration** + +### Optimization provider errors + +- Each provider needs its own API key (except Ollama and OpenCode) +- OpenAI for Whisper and OpenAI for optimization can use the **same key** +- Reconfigure via **Open Configuration** panel + +### Microphone not working + +**macOS:** System Settings → Privacy & Security → Microphone → enable Cursor/VSCode + +**Windows:** Settings → Privacy → Microphone → enable Cursor/VSCode + +### Text not inserting + +- Focus an editor or chat input before recording +- Check status bar for errors +- Text may fall back to clipboard — paste manually + +### Need more help? + +- [Configuration guide](configuration/README.md) +- [GitHub Issues](https://github.com/vypdev/cursor-whisper/issues) + +--- + +**Next:** [Configuration Guide](configuration/README.md) · [Recording Modes](user-guide/recording-modes.md) diff --git a/docs/research/technical-investigation.md b/docs/research/technical-investigation.md index efe6f4b..1be5192 100644 --- a/docs/research/technical-investigation.md +++ b/docs/research/technical-investigation.md @@ -258,7 +258,7 @@ import * as vscode from 'vscode'; // Register command const disposable = vscode.commands.registerCommand( - 'cursor-whisper.startRecording', + 'promptimize.startRecording', async () => { // Command logic } @@ -276,7 +276,7 @@ const statusBar = vscode.window.createStatusBarItem( ); statusBar.text = '$(mic) Voice'; -statusBar.command = 'cursor-whisper.startRecording'; +statusBar.command = 'promptimize.startRecording'; statusBar.show(); ``` @@ -284,13 +284,13 @@ statusBar.show(); ```typescript // Store API key -await context.secrets.store('cursor-whisper.openai.apiKey', apiKey); +await context.secrets.store('promptimize.openai.apiKey', apiKey); // Retrieve API key -const apiKey = await context.secrets.get('cursor-whisper.openai.apiKey'); +const apiKey = await context.secrets.get('promptimize.openai.apiKey'); // Delete API key -await context.secrets.delete('cursor-whisper.openai.apiKey'); +await context.secrets.delete('promptimize.openai.apiKey'); ``` **Platform Storage**: @@ -302,7 +302,7 @@ await context.secrets.delete('cursor-whisper.openai.apiKey'); ```typescript // Get configuration -const config = vscode.workspace.getConfiguration('cursorWhisper'); +const config = vscode.workspace.getConfiguration('promptimize'); const language = config.get('transcriptionLanguage', 'auto'); // Update configuration @@ -314,7 +314,7 @@ await config.update( // Watch for changes vscode.workspace.onDidChangeConfiguration(event => { - if (event.affectsConfiguration('cursorWhisper')) { + if (event.affectsConfiguration('promptimize')) { // Configuration changed } }); @@ -347,8 +347,8 @@ if (editor) { ```typescript // Create webview panel const panel = vscode.window.createWebviewPanel( - 'cursorWhisperRecorder', - 'Cursor Whisper', + 'promptimizeRecorder', + 'Promptimize', vscode.ViewColumn.One, { enableScripts: true, @@ -376,105 +376,6 @@ panel.webview.postMessage({ --- -## Browser APIs (Webview) - -### MediaRecorder API - -```typescript -// Request microphone permission -const stream = await navigator.mediaDevices.getUserMedia({ - audio: { - channelCount: 1, - sampleRate: 16000, - echoCancellation: true, - noiseSuppression: true, - autoGainControl: true - } -}); - -// Create MediaRecorder -const mimeType = MediaRecorder.isTypeSupported('audio/webm') - ? 'audio/webm' - : 'audio/ogg'; - -const recorder = new MediaRecorder(stream, { - mimeType, - audioBitsPerSecond: 128000 -}); - -// Collect audio chunks -const chunks: Blob[] = []; - -recorder.ondataavailable = (event) => { - if (event.data.size > 0) { - chunks.push(event.data); - } -}; - -recorder.onstop = () => { - const audioBlob = new Blob(chunks, { type: mimeType }); - // Convert to WAV... -}; - -// Start recording -recorder.start(100); // Chunk every 100ms - -// Stop recording -recorder.stop(); - -// Release microphone -stream.getTracks().forEach(track => track.stop()); -``` - -### Web Audio API (WAV Conversion) - -```typescript -async function convertToWav(blob: Blob): Promise { - // Decode audio - const arrayBuffer = await blob.arrayBuffer(); - const audioContext = new AudioContext(); - const audioBuffer = await audioContext.decodeAudioData(arrayBuffer); - - // Convert to WAV - const wavBuffer = audioBufferToWav(audioBuffer); - return new Blob([wavBuffer], { type: 'audio/wav' }); -} - -function audioBufferToWav(buffer: AudioBuffer): ArrayBuffer { - const length = buffer.length * buffer.numberOfChannels * 2; - const wavBuffer = new ArrayBuffer(44 + length); - const view = new DataView(wavBuffer); - - // WAV header - writeString(view, 0, 'RIFF'); - view.setUint32(4, 36 + length, true); - writeString(view, 8, 'WAVE'); - writeString(view, 12, 'fmt '); - view.setUint32(16, 16, true); // Subchunk1Size - view.setUint16(20, 1, true); // AudioFormat (PCM) - view.setUint16(22, buffer.numberOfChannels, true); - view.setUint32(24, buffer.sampleRate, true); - view.setUint32(28, buffer.sampleRate * buffer.numberOfChannels * 2, true); - view.setUint16(32, buffer.numberOfChannels * 2, true); - view.setUint16(34, 16, true); // BitsPerSample - writeString(view, 36, 'data'); - view.setUint32(40, length, true); - - // Write audio data - const offset = 44; - const channelData = buffer.getChannelData(0); - - for (let i = 0; i < channelData.length; i++) { - const sample = Math.max(-1, Math.min(1, channelData[i])); - view.setInt16(offset + i * 2, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true); - } - - return wavBuffer; -} -``` - ---- - ## Cursor Compatibility Research ### Cursor Modes @@ -590,14 +491,14 @@ Size = (16000 * 16 * 1 * 60) / 8 / 1024 / 1024 **Key Research Findings**: 1. ✅ **Whisper API**: Reliable, fast, accurate for English speech -2. ✅ **GPT-4o**: Cost-effective for transformation (~$0.01 per use) +2. ✅ **LLM providers**: Multiple cloud and local options for transformation 3. ✅ **VSCode API**: Mature, well-documented, stable 4. ⚠️ **Cursor Chat**: Limited extension API access -5. ✅ **Browser APIs**: MediaRecorder works reliably -6. ✅ **Security**: SecretStorage is best option +5. ✅ **Native audio capture**: Cross-platform via `@kstonekuan/audio-capture` ([ADR-0013](../adr/0013-native-audio-capture.md)) +6. ✅ **Security**: SecretStorage is best option for API keys -**Technical Feasibility**: ✅ All MVP features are feasible +**Technical Feasibility**: ✅ All core features are feasible --- -**Next**: See [API Reference](../api/README.md). +**Related:** [Cursor Compatibility ADR](../adr/0007-cursor-compatibility.md) · [Configuration Guide](../configuration/README.md) diff --git a/docs/roadmap/versions.md b/docs/roadmap/versions.md deleted file mode 100644 index 56d76d6..0000000 --- a/docs/roadmap/versions.md +++ /dev/null @@ -1,427 +0,0 @@ -# MVP and Roadmap - -**Last Updated**: 2026-05-23 - ---- - -## MVP Definition (v0.1.0) - -### Scope - -**What's IN the MVP**: -- ✅ Audio recording from microphone (webview) -- ✅ Transcription with OpenAI Whisper -- ✅ Insertion into active text editor -- ✅ API key configuration (SecretStorage) -- ✅ Basic visual feedback (status bar) -- ✅ Error handling and notifications -- ✅ Cross-platform support (macOS, Windows, Linux) - -**What's OUT of the MVP**: -- ❌ Prompt transformation (GPT-4 optimization) → v0.2.0 -- ❌ Chat integration (Cursor chat input) → v0.3.0 -- ❌ Real-time streaming transcription → v0.4.0 -- ❌ Recording history → v0.4.0 -- ❌ Multi-language auto-detection → v0.5.0 -- ❌ Custom vocabulary → v0.5.0 - -### Success Criteria - -**MVP is successful if**: -1. ✅ Users can record → transcribe → insert in <30 seconds -2. ✅ Transcription accuracy >90% for clear English speech -3. ✅ Works reliably on all three platforms -4. ✅ No data loss (audio properly processed or error shown) -5. ✅ Zero crashes in normal operation -6. ✅ API key configuration is clear and secure -7. ✅ 10+ users actively using it for 1 week - ---- - -## Release Timeline - -```mermaid -timeline - title Cursor Whisper Roadmap - section MVP - v0.1.0 (Week 1-3) : Basic recording + transcription - Alpha Testing (Week 4) : Internal testing - section Beta - v0.2.0 (Week 5-6) : Add prompt transformation - v0.3.0 (Week 7-8) : Cursor chat integration - Beta Testing (Week 9) : Public beta - section Stable - v0.4.0 (Week 10-11) : Streaming + history - v0.5.0 (Week 12-13) : Multi-language + vocab - v1.0.0 (Week 14-15) : Production release -``` - ---- - -## Version Roadmap - -### v0.1.0 - MVP (Weeks 1-3) - -**Goal**: Prove core concept works - -**Features**: -- Basic audio recording (16kHz mono) -- OpenAI Whisper transcription -- Insert into active editor -- Status bar UI -- API key configuration -- Error handling - -**Technical**: -- Clean Architecture foundation -- Domain/Application/Infrastructure layers -- Unit test coverage >70% -- Basic documentation - -**Non-Goals**: -- Prompt optimization -- Chat integration -- Advanced UI - -**Release Criteria**: -- ✅ All features working -- ✅ Tests passing -- ✅ No critical bugs -- ✅ Basic documentation complete - ---- - -### v0.2.0 - Prompt Transformation (Weeks 5-6) - -**Goal**: Make transcriptions more useful - -**New Features**: -- ✅ GPT-4 prompt transformation -- ✅ Structured output (sections) -- ✅ Before/after comparison -- ✅ Toggle transformation on/off - -**Improvements**: -- Better error messages -- Faster audio processing -- Improved status indicators - -**Technical**: -- `OpenAIPromptTransformer` implementation -- `TransformPromptUseCase` -- Context gathering (editor language, project type) -- Transformation caching (future optimization) - -**Release Criteria**: -- ✅ Transformation works >90% of time -- ✅ Clear improvements visible -- ✅ Option to disable -- ✅ Tests for transformation logic - ---- - -### v0.3.0 - Chat Integration (Weeks 7-8) - -**Goal**: Seamless Cursor chat integration - -**New Features**: -- ✅ Chat Participant API integration -- ✅ Auto-detect chat context -- ✅ Direct insertion into chat input -- ✅ Cursor compatibility warnings - -**Improvements**: -- Chain of Responsibility for insertion -- Fallback strategies (chat → editor → clipboard) -- Better state management - -**Technical**: -- `ChatParticipantInserter` -- Chat context detection -- Cursor Classic vs Glass detection -- Compatibility checker - -**Release Criteria**: -- ✅ Works in Cursor Classic mode -- ✅ Works in Editor Window -- ✅ Graceful fallback in Agents Window -- ✅ Clear user feedback - ---- - -### v0.4.0 - Advanced Features (Weeks 10-11) - -**Goal**: Power user features - -**New Features**: -- ✅ Real-time streaming transcription -- ✅ Recording history (optional) -- ✅ Edit before insert -- ✅ Push-to-talk mode -- ✅ Customizable shortcuts - -**Improvements**: -- Faster transcription start -- Progress indicators -- Better UX polish - -**Technical**: -- Streaming audio chunks -- Local storage for history -- Preview modal/panel -- Keyboard shortcut configuration - -**Release Criteria**: -- ✅ Streaming works without latency issues -- ✅ History is opt-in and secure -- ✅ All features well-tested - ---- - -### v0.5.0 - Multi-Language (Weeks 12-13) - -**Goal**: International support - -**New Features**: -- ✅ Auto-detect language -- ✅ Support 10+ languages -- ✅ Custom vocabulary per project -- ✅ Technical term correction -- ✅ Acronym glossary - -**Improvements**: -- Better transcription accuracy -- Context-aware hints -- Project-specific settings - -**Technical**: -- Language detection -- Vocabulary management -- Per-project configuration -- Whisper prompt optimization - -**Release Criteria**: -- ✅ Accurate transcription in 5+ languages -- ✅ Custom vocabulary works -- ✅ Performance not degraded - ---- - -### v1.0.0 - Production Release (Weeks 14-15) - -**Goal**: Production-ready, stable, well-documented - -**Features** (all previous + polish): -- ✅ All features from v0.1-v0.5 -- ✅ Complete documentation -- ✅ Comprehensive testing -- ✅ Performance optimization -- ✅ Accessibility compliance - -**Improvements**: -- Final UX polish -- Performance benchmarks met -- All edge cases handled -- Security audit complete - -**Technical**: -- 90%+ test coverage -- Zero known critical bugs -- Performance optimization -- Telemetry (optional, opt-in) - -**Release Criteria**: -- ✅ 100+ active users -- ✅ <5% error rate -- ✅ <5s average transcription time -- ✅ Full documentation -- ✅ Security audit passed -- ✅ All platforms tested - ---- - -## Post-1.0 Features (Future) - -### v1.1+ - Enterprise Features -- Team API key sharing -- Usage analytics dashboard -- Cost tracking and budgets -- Admin controls - -### v1.2+ - Advanced AI -- Multiple STT providers (Google, Azure) -- Local Whisper option (privacy) -- Custom transformation styles -- Context from recent code - -### v1.3+ - Collaboration -- Share prompt templates -- Team vocabulary -- Prompt library -- Best practices integration - -### v2.0+ - Major Evolution -- Voice commands ("Cursor, refactor this") -- Multi-modal (voice + screen context) -- Automated code generation from voice -- Voice-driven debugging - ---- - -## Feature Priority Matrix - -### Priority Scoring - -| Feature | User Value | Effort | Risk | Priority | -|---------|-----------|--------|------|----------| -| **MVP Features** ||||| -| Basic Recording | 10 | 5 | 3 | **P0** | -| Whisper Transcription | 10 | 3 | 2 | **P0** | -| Editor Insertion | 10 | 2 | 1 | **P0** | -| API Key Config | 10 | 2 | 1 | **P0** | -| **v0.2 Features** ||||| -| Prompt Transformation | 9 | 3 | 2 | **P1** | -| Before/After Preview | 6 | 4 | 1 | **P2** | -| **v0.3 Features** ||||| -| Chat Integration | 9 | 6 | 5 | **P1** | -| Fallback Strategies | 8 | 3 | 2 | **P1** | -| **v0.4 Features** ||||| -| Streaming | 7 | 8 | 6 | **P2** | -| History | 6 | 4 | 3 | **P2** | -| Edit Before Insert | 7 | 3 | 1 | **P2** | -| **v0.5 Features** ||||| -| Multi-Language | 8 | 5 | 3 | **P2** | -| Custom Vocabulary | 7 | 6 | 3 | **P2** | - -**Priority Levels**: -- **P0**: Must have for MVP -- **P1**: Critical for beta -- **P2**: Important for v1.0 -- **P3**: Nice to have (post-1.0) - ---- - -## Risk Assessment - -### High-Risk Items - -| Risk | Impact | Probability | Mitigation | -|------|--------|-------------|-----------| -| Cursor Agents Window incompatible | High | High | Detect + warn, fallback to clipboard | -| Whisper API slow/unreliable | High | Medium | Retry logic, error handling, user feedback | -| Audio permission blocked | High | Medium | Clear instructions, platform-specific guides | -| GPT-4 cost too high | Medium | Low | Make optional, cache results | -| Cross-platform audio issues | Medium | Medium | Test early on all platforms | - -### Medium-Risk Items - -| Risk | Impact | Probability | Mitigation | -|------|--------|-------------|-----------| -| VSCode API changes | Medium | Low | Version lock, test before updating | -| OpenAI rate limiting | Medium | Medium | Backoff strategy, user communication | -| Large audio files | Low | Medium | Duration limits, size validation | - ---- - -## Development Phases - -### Phase 1: Foundation (Weeks 1-3) -- ✅ Set up project structure -- ✅ Implement domain layer -- ✅ Implement application ports -- ✅ Basic infrastructure adapters -- ✅ Unit tests - -### Phase 2: Integration (Weeks 4-5) -- ✅ Webview audio recording -- ✅ OpenAI Whisper integration -- ✅ Editor insertion -- ✅ Integration tests - -### Phase 3: UX & Polish (Week 6) -- ✅ Status bar UI -- ✅ Error handling -- ✅ Notifications -- ✅ Documentation - -### Phase 4: Testing & Release (Week 7-8) -- ✅ Manual testing -- ✅ Bug fixes -- ✅ Performance tuning -- ✅ Alpha release - ---- - -## Metrics & Success Indicators - -### Technical Metrics - -- **Test Coverage**: >80% -- **Build Time**: <30 seconds -- **Extension Size**: <5MB -- **Memory Usage**: <50MB idle -- **Transcription Time**: <10s for 30s audio - -### User Metrics - -- **Active Users**: 100+ by v1.0 -- **Retention**: >50% weekly active -- **Error Rate**: <5% -- **Average Session**: 5+ recordings/week -- **User Satisfaction**: >4/5 stars - -### Performance Benchmarks - -| Metric | Target | Acceptable | Poor | -|--------|--------|------------|------| -| Recording start | <500ms | <1s | >2s | -| Transcription | <8s | <15s | >30s | -| Transformation | <4s | <8s | >15s | -| Total (30s audio) | <15s | <25s | >45s | - ---- - -## Release Process - -### Pre-Release Checklist - -- [ ] All tests passing -- [ ] Coverage >80% -- [ ] No linter errors -- [ ] Documentation updated -- [ ] CHANGELOG updated -- [ ] Version bumped -- [ ] Manual testing complete -- [ ] Cross-platform tested -- [ ] Security review done - -### Release Steps - -1. **Tag release**: `git tag v0.1.0` -2. **Build**: `pnpm run package` -3. **Test .vsix**: Install and test manually -4. **Publish to VSCode**: `vsce publish` -5. **Publish to Open VSX**: `ovsx publish` -6. **Create GitHub release**: With changelog -7. **Announce**: Twitter, Reddit, Discord - ---- - -## Summary - -**Roadmap Overview**: -- 🎯 **MVP (v0.1.0)**: 3 weeks - Basic recording + transcription -- 🚀 **Beta (v0.2-v0.3)**: 4 weeks - Transformation + chat -- ⭐ **Stable (v0.4-v0.5)**: 4 weeks - Advanced features -- 🎉 **v1.0**: Week 15 - Production release - -**Total Timeline**: ~15 weeks from start to v1.0 - -**Key Milestones**: -1. Week 3: MVP release -2. Week 9: Public beta -3. Week 15: v1.0 production - ---- - -**Next**: See [Development Standards](../standards/coding-conventions.md). diff --git a/docs/security/privacy.md b/docs/security/privacy.md index d3365e3..3ff46e9 100644 --- a/docs/security/privacy.md +++ b/docs/security/privacy.md @@ -6,7 +6,7 @@ ## Overview -Cursor Whisper takes security and privacy seriously. This document outlines our security model, data handling practices, and privacy guarantees. +Promptimize takes security and privacy seriously. This document outlines our security model, data handling practices, and privacy guarantees. --- @@ -275,7 +275,7 @@ For EU users: **Immediate actions**: 1. Revoke compromised key in [OpenAI dashboard](https://platform.openai.com/api-keys) 2. Generate new API key -3. Update key in Cursor Whisper settings +3. Update key in Promptimize settings 4. Review OpenAI usage logs for unauthorized activity 5. Consider reporting to OpenAI if abuse detected diff --git a/docs/standards/coding-conventions.md b/docs/standards/coding-conventions.md index dc7758f..fee541b 100644 --- a/docs/standards/coding-conventions.md +++ b/docs/standards/coding-conventions.md @@ -1,632 +1,89 @@ # Coding Standards and Conventions -**Last Updated**: 2026-05-23 +**Last Updated**: 2026-05-24 ---- - -## Code Style - -### TypeScript Configuration - -```json -// tsconfig.json -{ - "compilerOptions": { - "target": "ES2022", - "module": "commonjs", - "lib": ["ES2022"], - "strict": true, - "esModuleInterop": true, - "skipLibCheck": true, - "forceConsistentCasingInFileNames": true, - "resolveJsonModule": true, - "declaration": true, - "declarationMap": true, - "sourceMap": true, - "outDir": "./out", - "rootDir": "./src", - "noUnusedLocals": true, - "noUnusedParameters": true, - "noImplicitReturns": true, - "noFallthroughCasesInSwitch": true - } -} -``` - -### ESLint Configuration - -```javascript -// .eslintrc.js -module.exports = { - parser: '@typescript-eslint/parser', - extends: [ - 'eslint:recommended', - 'plugin:@typescript-eslint/recommended', - 'prettier' - ], - rules: { - '@typescript-eslint/explicit-function-return-type': 'error', - '@typescript-eslint/no-explicit-any': 'error', - '@typescript-eslint/no-unused-vars': ['error', { - argsIgnorePattern: '^_' - }], - 'no-console': 'warn', - 'prefer-const': 'error', - 'eqeqeq': ['error', 'always'] - } -}; -``` +Tooling configuration lives in the repo root — do not duplicate it here: -### Prettier Configuration - -```json -// .prettierrc -{ - "semi": true, - "trailingComma": "es5", - "singleQuote": true, - "printWidth": 100, - "tabWidth": 2, - "useTabs": false, - "arrowParens": "avoid" -} -``` +- TypeScript: [`tsconfig.json`](../../tsconfig.json) +- ESLint: [`.eslintrc.js`](../../.eslintrc.js) +- Prettier: [`.prettierrc`](../../.prettierrc) +- Jest: [`jest.config.js`](../../jest.config.js) --- ## Naming Conventions -### Files and Directories - -**Pattern**: `PascalCase` for classes, `camelCase` for others - -``` -✅ GOOD: -src/domain/entities/Recording.ts -src/application/use-cases/StartRecordingUseCase.ts -src/infrastructure/audio/WebviewAudioRecorder.ts -src/shared/utils/generateId.ts - -❌ BAD: -src/domain/entities/recording.ts -src/application/use-cases/start-recording-use-case.ts -src/infrastructure/audio/webview_audio_recorder.ts -``` - -### TypeScript Naming - | Type | Convention | Example | |------|-----------|---------| -| **Interfaces** | PascalCase, prefix `I` | `IAudioRecorder`, `ILogger` | +| **Interfaces (ports)** | PascalCase, prefix `I` | `IAudioRecorder`, `ILogger` | | **Classes** | PascalCase | `Recording`, `OpenAIWhisperService` | -| **Types** | PascalCase | `RecordingState`, `AudioFormat` | -| **Enums** | PascalCase | `RecordingState`, `LogLevel` | -| **Functions** | camelCase | `startRecording`, `validateAudioFile` | -| **Variables** | camelCase | `audioData`, `transcriptionResult` | +| **Types / Enums** | PascalCase | `RecordingState`, `TransformationProvider` | +| **Functions / variables** | camelCase | `startRecording`, `audioData` | | **Constants** | SCREAMING_SNAKE_CASE | `MAX_RECORDING_DURATION` | -| **Private fields** | camelCase, prefix `_` | `_mediaRecorder`, `_logger` | - -### Examples - -```typescript -// ✅ GOOD -export interface IAudioRecorder { - startRecording(): Promise; -} - -export class WebviewAudioRecorder implements IAudioRecorder { - private _mediaRecorder: MediaRecorder | null = null; - private readonly MAX_DURATION_SECONDS = 300; - - public async startRecording(): Promise { - // ... - } -} - -export enum RecordingState { - IDLE = 'idle', - RECORDING = 'recording' -} - -// ❌ BAD -export interface audioRecorder { - StartRecording(): Promise; -} - -export class webview_audio_recorder implements audioRecorder { - private MediaRecorder: MediaRecorder | null = null; - private readonly maxDurationSeconds = 300; - - public async StartRecording(): Promise { - // ... - } -} -``` +| **Files** | PascalCase for classes | `StartRecordingUseCase.ts` | --- ## Code Organization -### File Structure - -**Every file should have**: -1. Import statements (grouped and ordered) -2. Type definitions -3. Constants -4. Main implementation -5. Helper functions -6. Exports - -**Example**: -```typescript -// 1. External imports -import * as vscode from 'vscode'; -import { OpenAI } from 'openai'; - -// 2. Internal imports (grouped by layer) -import { IAudioRecorder } from '../../application/ports/IAudioRecorder'; -import { AudioData } from '../../domain/value-objects/AudioData'; -import { RecordingError } from '../../domain/errors/RecordingError'; - -// 3. Type definitions -interface MediaRecorderOptions { - mimeType: string; - audioBitsPerSecond: number; -} +Follow Clean Architecture layer boundaries. See [Architecture Overview](../architecture/overview.md). -// 4. Constants -const MAX_RECORDING_DURATION = 300; -const SUPPORTED_MIME_TYPES = ['audio/webm', 'audio/ogg']; - -// 5. Main class -export class WebviewAudioRecorder implements IAudioRecorder { - // Implementation -} - -// 6. Helper functions -function getSupportedMimeType(): string { - // Helper logic -} +``` +src/ +├── domain/ # Entities, value objects, errors — no framework imports +├── application/ # Use cases, ports, DTOs +├── infrastructure/ # Port implementations, external integrations +├── presentation/ # Commands, status bar, webviews +└── shared/ # Constants, utilities ``` -### Import Grouping Order - -```typescript -// 1. Node built-ins -import { Buffer } from 'buffer'; -import * as path from 'path'; - -// 2. External packages -import * as vscode from 'vscode'; -import { OpenAI } from 'openai'; - -// 3. Internal - Domain -import { Recording } from '../../domain/entities/Recording'; - -// 4. Internal - Application -import { IAudioRecorder } from '../../application/ports/IAudioRecorder'; - -// 5. Internal - Infrastructure -import { Logger } from '../logging/Logger'; - -// 6. Internal - Shared -import { generateId } from '../../shared/utils/generateId'; +**Import order:** Node built-ins → external packages → domain → application → infrastructure → shared → types -// 7. Types -import type { Config } from '../../application/dto/Config'; -``` +**Dependency rule:** Domain and application must not import infrastructure or presentation. Enforced via ESLint `no-restricted-imports`. --- ## Documentation -### JSDoc Comments - -**Required for**: -- All public interfaces -- All public methods -- Complex algorithms -- Non-obvious behavior - -**Template**: -```typescript -/** - * Brief one-line description. - * - * More detailed explanation if needed. - * Can span multiple lines. - * - * @param paramName - Parameter description - * @returns Return value description - * @throws ErrorType - When this error is thrown - * - * @example - * ```typescript - * const recorder = new WebviewAudioRecorder(); - * await recorder.startRecording(); - * ``` - */ -public async startRecording(): Promise { - // Implementation -} -``` - -**Examples**: - -```typescript -/** - * Port for audio recording functionality. - * - * Implementations: - * - WebviewAudioRecorder (primary): Uses browser MediaRecorder - * - NodeAudioRecorder (fallback): Uses Node.js libraries - */ -export interface IAudioRecorder { - /** - * Start recording audio from microphone. - * - * Requests microphone permission if not already granted. - * Initializes MediaRecorder with optimal settings (16kHz mono). - * - * @throws PermissionError if microphone access denied - * @throws RecordingError if recording fails to start - * - * @example - * ```typescript - * const recorder = new WebviewAudioRecorder(); - * await recorder.startRecording(); - * console.log('Recording started'); - * ``` - */ - startRecording(): Promise; - - /** - * Stop recording and return audio data. - * - * Stops MediaRecorder, converts audio to WAV format (16kHz mono), - * and returns AudioData object. Audio is immediately cleared from memory - * after this method returns. - * - * @returns AudioData object with recorded audio - * @throws RecordingError if no active recording - */ - stopRecording(): Promise; -} -``` +Add JSDoc to public interfaces and non-obvious behavior. Implementation details belong in code comments, not separate markdown mirrors. --- ## Error Handling -### Custom Errors - -**Always extend base Error**: -```typescript -// ✅ GOOD -export class RecordingError extends Error { - constructor( - message: string, - public readonly cause?: Error - ) { - super(message); - this.name = 'RecordingError'; - - // Preserve stack trace - if (cause) { - this.stack = `${this.stack}\nCaused by: ${cause.stack}`; - } - } -} - -// ❌ BAD -export class RecordingError { - constructor(public message: string) {} -} -``` - -### Error Handling Pattern - -```typescript -// ✅ GOOD: Specific error types -try { - await recorder.startRecording(); -} catch (error) { - if (error instanceof PermissionError) { - // Handle permission denied - } else if (error instanceof RecordingError) { - // Handle recording error - } else { - // Handle unexpected error - throw error; - } -} - -// ❌ BAD: Generic catch-all -try { - await recorder.startRecording(); -} catch (error) { - console.error('Error:', error); -} -``` - -### Logging Errors - -```typescript -// ✅ GOOD: Structured logging -this.logger.error('Failed to start recording', { - error: error.message, - stack: error.stack, - context: { userId, timestamp } -}); - -// ❌ BAD: Console logging -console.error('Error:', error); -``` - ---- - -## Testing Conventions - -### Test File Naming - -``` -✅ GOOD: -__tests__/domain/entities/Recording.test.ts -__tests__/application/use-cases/StartRecordingUseCase.test.ts - -❌ BAD: -__tests__/Recording.spec.ts -tests/recording-test.ts -``` - -### Test Structure - -```typescript -describe('ClassName or Feature', () => { - // Setup - let instance: ClassName; - let mockDependency: jest.Mocked; - - beforeEach(() => { - mockDependency = { - method: jest.fn() - }; - instance = new ClassName(mockDependency); - }); - - describe('methodName', () => { - it('should do something when condition', () => { - // Arrange - const input = 'test'; - mockDependency.method.mockResolvedValue('result'); - - // Act - const result = instance.methodName(input); - - // Assert - expect(result).toBe('expected'); - expect(mockDependency.method).toHaveBeenCalledWith(input); - }); - - it('should throw error when invalid input', () => { - // Arrange - const invalidInput = null; - - // Act & Assert - expect(() => instance.methodName(invalidInput)).toThrow(ValidationError); - }); - }); -}); -``` +- Extend `Error` for domain-specific errors; preserve `cause` and stack traces +- Catch specific error types in use cases; map to user-facing messages in presentation +- Use `ILogger` — no `console.log` in production code --- -## TypeScript Best Practices - -### Type Safety - -```typescript -// ✅ GOOD: Explicit types -function processRecording(recording: Recording): TranscriptionResult { - return { - text: recording.audioData.toString(), - duration: recording.duration - }; -} - -// ❌ BAD: Implicit any -function processRecording(recording) { - return { - text: recording.audioData.toString(), - duration: recording.duration - }; -} -``` - -### Avoid `any` - -```typescript -// ✅ GOOD: Use generics or specific types -function parseResponse(response: Response): T { - return response.json() as T; -} - -// ❌ BAD: Using any -function parseResponse(response: any): any { - return response.json(); -} -``` - -### Null Checks - -```typescript -// ✅ GOOD: Optional chaining and nullish coalescing -const duration = recording?.audioData?.getDurationInSeconds() ?? 0; - -// ❌ BAD: Manual null checks -const duration = recording && recording.audioData - ? recording.audioData.getDurationInSeconds() - : 0; -``` - -### Enums vs Union Types - -```typescript -// ✅ GOOD: Use enums for closed sets -export enum RecordingState { - IDLE = 'idle', - RECORDING = 'recording', - PROCESSING = 'processing' -} - -// ✅ ALSO GOOD: Use union types for simple cases -export type AudioQuality = 'low' | 'medium' | 'high'; +## Testing -// ❌ BAD: String literals everywhere -function setState(state: 'idle' | 'recording' | 'processing') { - // Repeated everywhere -} -``` +- Test files: `src/__tests__/` mirroring source structure +- Naming: `FeatureName.test.ts` +- Pattern: Arrange-Act-Assert; mock ports, not internals +- See [Testing Strategy](../testing/strategy.md) --- ## Git Conventions -### Commit Messages - -**Format**: `(): ` - -**Types**: -- `feat`: New feature -- `fix`: Bug fix -- `docs`: Documentation only -- `style`: Code style (formatting, semicolons) -- `refactor`: Code refactoring -- `test`: Adding tests -- `chore`: Build process, dependencies - -**Examples**: -``` -✅ GOOD: -feat(recording): add push-to-talk mode -fix(transcription): handle rate limit errors -docs(adr): add decision record for audio format -refactor(domain): extract AudioValidator service -test(use-cases): add StartRecordingUseCase tests - -❌ BAD: -updated code -fixes -WIP -asdfasdf -``` - -### Branch Naming - -**Format**: `/-` - -``` -✅ GOOD: -feat/123-prompt-transformation -fix/456-audio-permission-error -docs/789-architecture-diagrams - -❌ BAD: -my-feature -update -branch-1 -``` - ---- - -## Performance Guidelines - -### Async/Await - -```typescript -// ✅ GOOD: Parallel execution -const [transcription, transformation] = await Promise.all([ - whisperService.transcribe(audio), - gptService.transform(text) -]); - -// ❌ BAD: Sequential execution (slower) -const transcription = await whisperService.transcribe(audio); -const transformation = await gptService.transform(text); -``` - -### Memory Management +**Commits:** `(): ` — e.g. `feat(recording): add push-to-talk mode` -```typescript -// ✅ GOOD: Clean up resources -public async stopRecording(): Promise { - const audioData = await this.getAudioData(); - - // Clean up - this.audioChunks = []; - this.stream?.getTracks().forEach(track => track.stop()); - this.stream = null; - - return audioData; -} +**Types:** `feat`, `fix`, `docs`, `refactor`, `test`, `chore` -// ❌ BAD: Memory leaks -public async stopRecording(): Promise { - return await this.getAudioData(); - // Stream and chunks still in memory -} -``` +**Branches:** `/` — e.g. `feat/prompt-transformation` --- ## Code Review Checklist -### Before Creating PR - -- [ ] All tests pass -- [ ] No linter errors -- [ ] Code formatted with Prettier -- [ ] JSDoc comments added -- [ ] No console.log statements -- [ ] Error handling implemented +- [ ] Correct layer; dependencies point inward - [ ] Types explicit (no `any`) -- [ ] Tests added for new code -- [ ] Documentation updated - -### During Code Review - -**Look for**: -- [ ] Correct layer (domain/application/infrastructure/presentation) -- [ ] Dependencies point inward -- [ ] Business logic in domain -- [ ] Adapters implement ports -- [ ] Error handling complete -- [ ] Type safety maintained -- [ ] Tests cover edge cases -- [ ] No performance issues -- [ ] Security considerations - ---- - -## Summary - -**Core Principles**: -1. ✅ **Type Safety**: Explicit types, no `any` -2. ✅ **Clean Architecture**: Respect layer boundaries -3. ✅ **Documentation**: JSDoc for public APIs -4. ✅ **Testing**: Test behavior, not implementation -5. ✅ **Error Handling**: Specific error types -6. ✅ **Consistency**: Follow conventions -7. ✅ **Performance**: Async parallel execution -8. ✅ **Security**: No secrets in code - -**Tools**: -- TypeScript 5.4+ (strict mode) -- ESLint (recommended + TypeScript rules) -- Prettier (opinionated formatting) -- Jest (testing) -- Husky (pre-commit hooks) +- [ ] Error handling and tests for new behavior +- [ ] No secrets or transcription content in logs +- [ ] Documentation updated if user-facing behavior changed --- -**Next**: See [Deployment Documentation](../deployment/release-process.md). +**Related:** [Architecture Overview](../architecture/overview.md) · [Testing Strategy](../testing/strategy.md) diff --git a/docs/testing/strategy.md b/docs/testing/strategy.md index f0b753f..501ca4d 100644 --- a/docs/testing/strategy.md +++ b/docs/testing/strategy.md @@ -1,16 +1,16 @@ # Testing Strategy -**Last Updated**: 2026-05-23 +**Last Updated**: 2026-05-24 --- ## Overview -Cursor Whisper uses a focused testing strategy aligned with the **stop → transcribe → transform → insert** pipeline. +Promptimize uses a focused testing strategy aligned with the **stop → transcribe → transform → insert** pipeline. **Testing philosophy**: Test behavior at integration boundaries, not implementation details. -**Current status**: Jest is configured (`jest.config.js`, `__tests__/setup.ts`) but automated tests are not yet implemented. See [`PROGRESS.md`](../../PROGRESS.md). +**Current status**: Jest is configured with 12+ test files under `src/__tests__/`, covering use cases, transformers, native audio, and provider switching. Run `pnpm test` from the repo root (Node 22 via nvm). --- @@ -32,16 +32,30 @@ Unit Tests (75%) --- +## Existing Test Coverage + +| Area | Test files | +|------|------------| +| Use cases | `StartRecordingUseCase.test.ts` | +| Configuration | `ConfigurationValidationService.test.ts` | +| Transformers | `Anthropic`, `Azure`, `Google`, `Ollama`, `OpenAI`, `PromptTransformerFactory`, `transformationUtils` | +| Integration | `providerSwitching.test.ts` | +| Audio | `NativeAudioRecorder.test.ts` | +| Presentation | `RecordingStatusBarItem.test.ts` | +| Domain | `TransformationProvider.test.ts` | + +--- + ## Critical Test Priorities -These tests protect the core value of the extension. Implement in this order. +These tests protect the core value of the extension. Expand coverage in this order. ### Tier 1 — Pipeline use cases (highest ROI) | Target | What to verify | |--------|----------------| | `InsertTextUseCase` | Chain of responsibility: chat → editor → clipboard; `InsertionError` when all fail | -| `TransformPromptUseCase` | Transformation disabled → passthrough; GPT failure → fallback to raw text | +| `TransformPromptUseCase` | Transformation disabled → passthrough; failure → fallback to raw text | | `TranscribeAudioUseCase` | Config options passed to service; audio > 25 MB → `AudioTooLargeError` | | `StopRecordingUseCase` | Returns `AudioData`; errors when not recording | | `StartRecordingUseCase` | `MissingApiKeyError`; no start when already recording; permission/recorder errors | @@ -53,72 +67,19 @@ These tests protect the core value of the extension. Implement in this order. |--------|----------------| | `OpenAIWhisperService.validateAudioFile()` | Empty buffer, duration < 0.1s, oversized file | | `OpenAIWhisperService.transcribe()` | Mock OpenAI client; error mapping (401, 429) | -| `OpenAIPromptTransformer.transform()` | Mock completion; context (`editorLanguage`) in prompt | +| Prompt transformers | Mock completion; context in system prompt | | Text inserters | `EditorTextInserter`, `FallbackTextInserter`, `ChatParticipantInserter` with mocked `vscode` | ### Tier 3 — Audio encoding | Target | What to verify | |--------|----------------| -| `NativeAudioRecorder` | WAV encoding from PCM; permission error classification (mock `@kstonekuan/audio-capture`) | +| `NativeAudioRecorder` | WAV encoding from PCM; permission error classification | -### Deferred (low priority for MVP) +### Deferred -- **`Recording` entity** — not used in the live pipeline today -- **`WebviewAudioRecorder`** — deprecated; kept as fallback only ([ADR-0013](../adr/0013-native-audio-capture.md)) - **Full E2E in VS Code/Cursor** — manual smoke before release -- **Cross-platform CI matrix** — after unit tests exist - ---- - -## Example: Use Case Test - -```typescript -// __tests__/application/use-cases/InsertTextUseCase.test.ts -import { InsertTextUseCase, InsertionError } from '../../../src/application/use-cases/InsertTextUseCase'; -import { ITextInserter } from '../../../src/application/ports/ITextInserter'; -import { ILogger } from '../../../src/application/ports/ILogger'; - -describe('InsertTextUseCase', () => { - const mockLogger: ILogger = { - debug: jest.fn(), - info: jest.fn(), - warn: jest.fn(), - error: jest.fn(), - setLevel: jest.fn(), - }; - - it('should try inserters in priority order until one succeeds', async () => { - const chat: jest.Mocked = { - canInsert: jest.fn().mockReturnValue(true), - insert: jest.fn().mockResolvedValue(false), - getPriority: jest.fn().mockReturnValue(1), - }; - const editor: jest.Mocked = { - canInsert: jest.fn().mockReturnValue(true), - insert: jest.fn().mockResolvedValue(true), - getPriority: jest.fn().mockReturnValue(2), - }; - - const useCase = new InsertTextUseCase([editor, chat], mockLogger); - await useCase.execute('Hello world'); - - expect(chat.insert).toHaveBeenCalled(); - expect(editor.insert).toHaveBeenCalled(); - }); - - it('should throw InsertionError when all inserters fail', async () => { - const inserter: jest.Mocked = { - canInsert: jest.fn().mockReturnValue(true), - insert: jest.fn().mockResolvedValue(false), - getPriority: jest.fn().mockReturnValue(1), - }; - - const useCase = new InsertTextUseCase([inserter], mockLogger); - await expect(useCase.execute('text')).rejects.toThrow(InsertionError); - }); -}); -``` +- **Cross-platform CI matrix** — after core unit coverage is solid --- @@ -131,16 +92,15 @@ Run these manually with a real OpenAI API key: 3. **Fallback insertion**: Close all editors → record → stop → text on clipboard 4. **Cancellation**: Start recording → press Escape → returns to idle 5. **Short recording**: Record < 0.5s → appropriate error message +6. **Provider switching**: Change optimization provider → verify transformation uses new provider --- -## Jest Configuration - -Existing config: [`jest.config.js`](../../jest.config.js) +## Configuration -- Preset: `ts-jest`, environment: `node` -- VSCode APIs mocked in [`__tests__/setup.ts`](../../__tests__/setup.ts) -- Coverage thresholds defined but not yet met (tests pending) +- Jest config: [`jest.config.js`](../../jest.config.js) +- VSCode API mocks: [`__tests__/setup.ts`](../../__tests__/setup.ts) +- Run: `source scripts/ensure-node.sh && pnpm test` --- @@ -162,7 +122,7 @@ Existing config: [`jest.config.js`](../../jest.config.js) --- -## Coverage Goals (When Tests Exist) +## Coverage Goals | Layer | Line Coverage | Branch Coverage | |-------|--------------|----------------| @@ -176,4 +136,4 @@ Focus coverage on Tier 1 use cases before chasing presentation-layer percentages --- -**Next**: See [Deployment Documentation](../deployment/release-process.md). +**Next**: [Release Process](../deployment/release-process.md) diff --git a/docs/user-guide/keyboard-shortcuts.md b/docs/user-guide/keyboard-shortcuts.md new file mode 100644 index 0000000..0b8e981 --- /dev/null +++ b/docs/user-guide/keyboard-shortcuts.md @@ -0,0 +1,86 @@ +# Keyboard Shortcuts + +Complete reference for Promptimize keyboard shortcuts and Command Palette commands. + +--- + +## Default Keybindings + +| Shortcut | Command | Behavior | +|----------|---------|----------| +| `Cmd+Alt+V` (macOS) / `Ctrl+Alt+V` (Windows/Linux) | Start Transcribe Recording | Starts **Transcribe** mode (raw transcription) | +| `Cmd+Alt+P` (macOS) / `Ctrl+Alt+P` (Windows/Linux) | Start Promptimize Recording | Starts **Promptimize** mode (optimized prompt) | +| `Escape` | Cancel Recording | Cancels active recording (while `promptimize.isRecording` is true) | + +### Important: Start-only shortcuts + +`Cmd/Ctrl+Alt+V` and `Cmd/Ctrl+Alt+P` **start** recording only. They do **not** toggle or stop recording. + +To **stop** recording: + +1. Click the status bar item showing **Recording...**, or +2. Run the matching stop command from the Command Palette + +--- + +## Command Palette Reference + +Open with `Cmd/Ctrl+Shift+P`, then search for **Promptimize**. + +### Recording + +| Command | Purpose | +|---------|---------| +| `Promptimize: Start Transcribe Recording` | Start raw transcription mode | +| `Promptimize: Stop Transcribe Recording` | Stop and process Transcribe recording | +| `Promptimize: Start Promptimize Recording` | Start optimized prompt mode | +| `Promptimize: Stop Promptimize Recording` | Stop and process Promptimize recording | +| `Promptimize: Cancel Recording` | Discard recording without processing | + +### Configuration + +| Command | Purpose | +|---------|---------| +| `Promptimize: Setup Wizard` | Opens the configuration panel | +| `Promptimize: Open Configuration` | Opens the configuration webview | +| `Promptimize: Configure OpenAI API Key (Whisper)` | Set OpenAI key for Whisper | +| `Promptimize: Configure Prompt Optimization Provider` | Interactive provider setup wizard | +| `Promptimize: Configure OpenAI Optimization Model` | Pick GPT model (OpenAI provider only) | +| `Promptimize: Test Configuration` | Test Whisper + optimization; opens results webview | + +### Deprecated (compatibility) + +| Command | Replacement | +|---------|-------------| +| `Promptimize: (Deprecated) Start Recording` | Start Transcribe or Start Promptimize | +| `Promptimize: (Deprecated) Stop Recording` | Stop Transcribe or Stop Promptimize | + +--- + +## Customizing Keybindings + +1. Open **Keyboard Shortcuts** (`Cmd/Ctrl+K Cmd/Ctrl+S`) +2. Search for **Promptimize** +3. Click the pencil icon to rebind + +Example `keybindings.json` override: + +```json +{ + "key": "ctrl+shift+v", + "command": "promptimize.startTranscribeRecording", + "when": "editorTextFocus" +} +``` + +--- + +## Context Keys + +| Context key | When true | Used by | +|-------------|-----------|---------| +| `promptimize.isRecording` | Microphone is actively recording | `Escape` → Cancel Recording | + +--- + +**See also:** [Recording Modes](recording-modes.md) · [Quick Start](../quickstart.md) diff --git a/docs/user-guide/recording-modes.md b/docs/user-guide/recording-modes.md new file mode 100644 index 0000000..d8022e9 --- /dev/null +++ b/docs/user-guide/recording-modes.md @@ -0,0 +1,123 @@ +# Recording Modes + +Promptimize provides **two distinct recording modes**. Each mode uses the same microphone capture and OpenAI Whisper transcription, but differs in what happens after transcription and which keyboard shortcut starts recording. + +--- + +## Transcribe vs Promptimize + +| | **Transcribe** | **Promptimize** | +|---|----------------|-----------------| +| **Purpose** | Voice-to-text only | Voice → optimized prompt | +| **Start shortcut** | `Cmd+Alt+V` / `Ctrl+Alt+V` | `Cmd+Alt+P` / `Ctrl+Alt+P` | +| **Status bar** | $(mic) Transcribe | $(sparkle) Promptimize | +| **Pipeline** | Record → Whisper → insert raw text | Record → Whisper → optimize → insert | +| **Requires optimization** | No | Yes (`enablePromptTransformation`) | +| **API cost** | Whisper only (~$0.006/min) | Whisper + optimization provider | + +--- + +## When to Use Each Mode + +### Use Transcribe when you want: + +- Raw speech-to-text with minimal processing +- Exact wording preserved (meeting notes, quotes, dictation) +- Lower cost (no optimization API call) +- Quick capture without LLM rewrite + +### Use Promptimize when you want: + +- Structured prompts for AI coding assistants +- Filler words removed and intent clarified +- Technical requirements organized into sections +- The extension's primary "voice → optimized prompt" workflow + +--- + +## Workflows + +### Transcribe mode + +```mermaid +flowchart LR + Start["Cmd/Ctrl+Alt+V
or Transcribe button"] --> Record[Recording] + Record --> Stop[Click status bar
to stop] + Stop --> Whisper[OpenAI Whisper] + Whisper --> Insert[Insert raw text] +``` + +### Promptimize mode + +```mermaid +flowchart LR + Start["Cmd/Ctrl+Alt+P
or Promptimize button"] --> Record[Recording] + Record --> Stop[Click status bar
to stop] + Stop --> Whisper[OpenAI Whisper] + Whisper --> Transform[Optimization provider] + Transform --> Insert[Insert optimized prompt] +``` + +--- + +## Starting and Stopping + +### Start recording + +| Action | Transcribe | Promptimize | +|--------|------------|-------------| +| Keyboard | `Cmd/Ctrl+Alt+V` | `Cmd/Ctrl+Alt+P` | +| Status bar | Click **Transcribe** | Click **Promptimize** | +| Command Palette | `Promptimize: Start Transcribe Recording` | `Promptimize: Start Promptimize Recording` | + +**Note:** Keyboard shortcuts **start** recording only. They do not stop recording. + +### Stop recording + +- Click the active mode's status bar item while it shows **Recording...** +- Run `Promptimize: Stop Transcribe Recording` or `Promptimize: Stop Promptimize Recording` from the Command Palette + +### Cancel recording + +- Press `Escape` while recording (discards audio without transcribing) +- Run `Promptimize: Cancel Recording` from the Command Palette + +--- + +## Status Bar During Recording + +While recording, the active mode shows **$(record) Recording...** (click to stop). The other mode is disabled until recording finishes or is cancelled. + +| Idle state | Recording state | +|------------|-----------------| +| $(mic) Transcribe | $(record) Recording... (Transcribe active) | +| $(sparkle) Promptimize | $(record) Recording... (Promptimize active) | +| $(gear) Settings | Settings remains available | + +Tooltip when idle: `Transcription: OpenAI Whisper | Optimization: [Provider]` + +--- + +## Configuration Requirements + +| Mode | OpenAI API key (Whisper) | Optimization enabled | Provider credentials | +|------|--------------------------|----------------------|----------------------| +| Transcribe | Required | Not required | Not required | +| Promptimize | Required | Required | Required when provider needs API key | + +If Promptimize is disabled in settings, the Promptimize button shows a warning and opens configuration when clicked. + +--- + +## Deprecated Commands + +These legacy commands still work but are superseded by mode-specific commands: + +| Deprecated | Use instead | +|------------|-------------| +| `Promptimize: (Deprecated) Start Recording` | Start Transcribe or Start Promptimize | +| `Promptimize: (Deprecated) Stop Recording` | Stop Transcribe or Stop Promptimize (routes by active session) | + +--- + +**See also:** [Keyboard Shortcuts](keyboard-shortcuts.md) · [Quick Start](../quickstart.md) · [Configuration Guide](../configuration/README.md) diff --git a/docs/user-guide/troubleshooting.md b/docs/user-guide/troubleshooting.md new file mode 100644 index 0000000..00aaea3 --- /dev/null +++ b/docs/user-guide/troubleshooting.md @@ -0,0 +1,184 @@ +# Troubleshooting + +Decision trees and fixes for common Promptimize issues. + +--- + +## Recording Won't Start + +```mermaid +flowchart TD + A[Recording won't start] --> B{OpenAI key configured?} + B -->|No| C[Open Configuration panel
Enter sk-... key] + B -->|Yes| D{Which mode?} + D -->|Promptimize| E{Optimization enabled?} + E -->|No| F[Enable in config panel
or use Transcribe mode] + E -->|Yes| G{Provider configured?} + G -->|No| H[Complete provider setup
in config panel] + G -->|Yes| I{Microphone permission?} + D -->|Transcribe| I + I -->|Denied| J[Enable mic for VS Code/Cursor
in OS settings] + I -->|Granted| K[Check Output channel
Promptimize for errors] +``` + +### Quick checks + +1. Status bar shows $(warning) **Setup** → click to open configuration +2. **Transcribe** requires OpenAI key only +3. **Promptimize** requires OpenAI key + optimization enabled + provider credentials + +--- + +## Transcription Fails + +```mermaid +flowchart TD + A[Transcription failed] --> B{API key valid?} + B -->|Unknown| C[Run Test Configuration
or Test in config panel] + B -->|Invalid| D[Re-enter key at platform.openai.com] + B -->|Valid| E{OpenAI credits?} + E -->|No| F[Add billing at OpenAI dashboard] + E -->|Yes| G{Audio length?} + G -->|Too short| H[Record at least 0.5 seconds] + G -->|Too long| I[Keep under 5 minutes / 25 MB] + G -->|OK| J{Rate limit 429?} + J -->|Yes| K[Wait and click Retry] + J -->|No| L[Check network / firewall] +``` + +### Error actions + +- Transcription errors show a **Retry** button in the notification +- Verify key starts with `sk-` +- Check credits: https://platform.openai.com/account/billing + +--- + +## Optimization Fails (Promptimize) + +```mermaid +flowchart TD + A[Optimization failed] --> B{Whisper working?} + B -->|No| C[Fix Whisper first] + B -->|Yes| D{Provider type?} + D -->|Cloud API| E{API key correct?} + E -->|No| F[Re-enter provider key
in config panel] + E -->|Yes| G{Account credits?} + G -->|No| H[Top up provider account] + D -->|Ollama| I{Server running?} + I -->|No| J[Start Ollama
ollama serve] + I -->|Yes| K{Model pulled?} + K -->|No| L[ollama pull model-name] + D -->|OpenCode| M{Proxy reachable?} + M -->|No| N[Start opencode-llm-proxy
Check base URL] + D -->|Azure| O{Endpoint + deployment?} + O -->|Wrong| P[Use deployment name
not model ID] +``` + +**Note:** If optimization fails during recording, the extension **falls back to raw transcription** and still inserts text. You may not see an optimization error if insertion succeeds. + +--- + +## Text Not Inserting + +```mermaid +flowchart TD + A[Text not appearing] --> B{Active editor or chat?} + B -->|No| C[Focus editor or chat input
before recording] + B -->|Yes| D{Check clipboard} + D -->|Text there| E[Fallback worked
Paste manually] + D -->|Empty| F{Status bar error?} + F -->|Insertion error| G[Open editor tab
Try again] + F -->|None| H[Check Output channel
Promptimize] +``` + +### Insertion priority + +1. **Cursor chat** — `composer.focusComposer` + paste +2. **VS Code chat** — `workbench.action.chat.open` with query +3. **Active editor** — Insert at cursor +4. **Clipboard fallback** — Copies text + shows notification + +--- + +## Microphone Not Working + +| Platform | Fix | +|----------|-----| +| **macOS** | System Settings → Privacy & Security → Microphone → enable Cursor/VS Code | +| **Windows** | Settings → Privacy → Microphone → enable Cursor/VS Code | +| **Linux** | Usually automatic; check `pavucontrol` if using PulseAudio | + +Restart the editor after granting permission. + +--- + +## Escape Doesn't Cancel + +`Escape` cancels recording only while actively recording (`promptimize.isRecording` context). + +If Escape doesn't work: + +1. Click the status bar **Recording...** item and use **Cancel Recording** from Command Palette +2. Reload the window after updating the extension + +--- + +## Deprecated Commands + +If scripts or keybindings use old commands: + +| Old | New | +|-----|-----| +| `promptimize.startRecording` | `startTranscribeRecording` or `startPromptimizeRecording` | +| `promptimize.stopRecording` | `stopTranscribeRecording` or `stopPromptimizeRecording` | + +--- + +## API Key Migration (Upgrading) + +The extension migrates legacy secret storage keys automatically: + +| Legacy key | Current key | +|------------|-------------| +| `openai-api-key` | `promptimize.apiKey.openai` | +| `promptimize.openai.apiKey` | `promptimize.apiKey.openai` | + +If keys seem missing after upgrade, re-enter via **Open Configuration** panel. + +--- + +## Settings Not Taking Effect + +| Setting | Status | +|---------|--------| +| `transcriptionLanguage` | ✅ Applied | +| `transcriptionHint` | ✅ Applied | +| `transformationSystemPrompt` | ✅ Applied | +| `audioQuality` | ⏳ Planned — not applied yet | +| `maxRecordingDuration` | ⏳ Planned — not applied yet | +| `showNotifications` | ⏳ Planned — not applied yet | + +See [Advanced Settings](configuration/advanced-settings.md#planned-settings-not-yet-applied). + +--- + +## Cursor-Specific Issues + +Promptimize works in: + +- **Classic Mode** (`cursor --classic`) +- **Editor Window** +- **VS Code / VSCodium** + +For chat insertion in Cursor, focus the Composer input before stopping recording. + +--- + +## Getting More Help + +- [Configuration Guide](configuration/README.md) +- [Configuration Webview Guide](configuration/webview-guide.md) +- [GitHub Issues](https://github.com/vypdev/cursor-whisper/issues) + +Enable the **Promptimize** output channel for operational logs (timestamps, durations, error types). Transcriptions and optimized prompts are **never** written to logs. diff --git a/docs/ux/states.md b/docs/ux/states.md index 70e625a..8caa2a6 100644 --- a/docs/ux/states.md +++ b/docs/ux/states.md @@ -1,25 +1,26 @@ # UX States and Transitions -**Last Updated**: 2026-05-23 +**Last Updated**: 2026-05-24 --- ## Recording States -> **MVP implementation**: The status bar currently reflects states emitted by `NativeAudioRecorder`: `IDLE`, `RECORDING`, `PROCESSING`, `ERROR`, and `CANCELLED`. Fine-grained states (`TRANSCRIBING`, `TRANSFORMING`, `INSERTING`) are shown via the progress notification during stop/processing, not on the status bar. The table below documents the full target UX. +The status bar reflects states emitted by `NativeAudioRecorder`: `IDLE`, `RECORDING`, `PROCESSING`, `ERROR`, and `CANCELLED`. Fine-grained states (`TRANSCRIBING`, `TRANSFORMING`, `INSERTING`, `COMPLETED`) are shown via **progress notifications** during stop/processing, not on the status bar. This is the current design, not a temporary MVP limitation. ### State Definitions -| State | Description | Visual Indicator | User Actions | -|-------|-------------|------------------|--------------| -| **IDLE** | Ready to record | 🎤 Voice (gray) | Can start recording | -| **RECORDING** | Actively recording | 🔴 Recording... (red, pulsing) | Can stop or cancel | -| **PROCESSING** | Preparing audio | ⏳ Processing... (spinner) | Wait | -| **TRANSCRIBING** | Sending to Whisper | ⏳ Transcribing... (spinner) | Wait | -| **TRANSFORMING** | Optimizing with GPT-4 | ⏳ Optimizing... (spinner) | Wait | -| **INSERTING** | Inserting text | ⏳ Inserting... | Wait | -| **COMPLETED** | Successfully done | ✓ Inserted (green, brief) | Auto-returns to IDLE | -| **ERROR** | Something failed | ❌ Error (red) | Can retry or dismiss | +| State | Status bar | Notification | Description | User Actions | +|-------|------------|--------------|-------------|--------------| +| **IDLE** | $(mic) Transcribe / $(sparkle) Promptimize | — | Ready to record | Start either mode | +| **RECORDING** | $(record) Recording... | — | Actively recording | Click status bar to stop; Escape to cancel | +| **PROCESSING** | $(sync~spin) Processing... | May show progress | Audio captured, preparing | Wait | +| **TRANSCRIBING** | — | "Transcribing..." | Sending to Whisper | Wait | +| **TRANSFORMING** | — | "Optimizing..." | Running optimization (Promptimize) | Wait | +| **INSERTING** | — | "Inserting..." | Inserting text | Wait | +| **COMPLETED** | Returns to IDLE | Success toast | Done | — | +| **ERROR** | Error styling | Error toast | Something failed | Retry or dismiss | +| **CANCELLED** | Returns to IDLE | "Recording cancelled" | User cancelled | — | ### State Transition Diagram @@ -27,122 +28,60 @@ stateDiagram-v2 [*] --> IDLE - IDLE --> RECORDING: User clicks
mic button + IDLE --> RECORDING: Start Transcribe
or Promptimize - RECORDING --> IDLE: User cancels
(Escape) - RECORDING --> PROCESSING: User stops
recording + RECORDING --> IDLE: Cancel (Escape) + RECORDING --> PROCESSING: Stop (status bar) PROCESSING --> TRANSCRIBING: Audio prepared - TRANSCRIBING --> TRANSFORMING: Text received + TRANSCRIBING --> TRANSFORMING: Text received (Promptimize) + TRANSCRIBING --> INSERTING: Text received (Transcribe only) TRANSFORMING --> INSERTING: Prompt optimized INSERTING --> COMPLETED: Text inserted PROCESSING --> ERROR: Error occurred TRANSCRIBING --> ERROR: API error - TRANSFORMING --> IDLE: Transform disabled
or failed (fallback) + TRANSFORMING --> INSERTING: Transform disabled or failed (fallback to raw) INSERTING --> ERROR: All inserters failed - COMPLETED --> IDLE: Auto (after 2s) + COMPLETED --> IDLE: Auto ERROR --> IDLE: User dismisses + CANCELLED --> IDLE: Immediate ``` ### Timing Expectations | Transition | Expected Duration | User Feedback | |------------|-------------------|---------------| -| IDLE → RECORDING | <1s | Immediate visual change | -| RECORDING → PROCESSING | <2s | Audio capture complete | -| PROCESSING → TRANSCRIBING | <1s | Preparing upload | -| TRANSCRIBING → TRANSFORMING | 3-8s | "Transcribing..." shown | -| TRANSFORMING → INSERTING | 2-4s | "Optimizing..." shown | -| INSERTING → COMPLETED | <1s | "Inserted ✓" shown | -| COMPLETED → IDLE | 2s auto | Success message visible | +| IDLE → RECORDING | <1s | Status bar changes immediately | +| RECORDING → PROCESSING | <2s | Status bar shows Processing | +| PROCESSING → TRANSCRIBING | <1s | Notification: "Transcribing..." | +| TRANSCRIBING → TRANSFORMING | 3-8s | Notification: "Optimizing..." | +| TRANSFORMING → INSERTING | 2-4s | Notification: "Inserting..." | +| INSERTING → COMPLETED | <1s | Success toast | +| COMPLETED → IDLE | Immediate | Status bar returns to idle | --- ## Visual Design -### Status Bar Item +### Status Bar Items -**Idle State**: -``` -┌────────────┐ -│ 🎤 Voice │ ← Clickable, gray -└────────────┘ -``` +Three items appear right-aligned: -**Recording State**: +**Idle:** ``` -┌──────────────────────┐ -│ 🔴 Recording... 0:15 │ ← Red background, pulsing, timer -└──────────────────────┘ -``` - -**Processing States**: -``` -┌─────────────────────────┐ -│ ⏳ Transcribing... 45% │ ← Spinner, progress if available -└─────────────────────────┘ -``` - -**Success State** (brief): +$(mic) Transcribe $(sparkle) Promptimize $(gear) Settings ``` -┌────────────────┐ -│ ✓ Inserted │ ← Green, 2 seconds -└────────────────┘ -``` - -**Error State**: -``` -┌────────────────┐ -│ ❌ Error │ ← Red, clickable for details -└────────────────┘ -``` - -### Webview Panel (Deprecated) - -> **Not in production**: A dedicated webview panel with React UI was planned in [ADR-0010](../adr/0010-react-for-ui.md) but superseded by native capture ([ADR-0013](../adr/0013-native-audio-capture.md)). The MVP uses status bar feedback only. - -For reference, the planned webview layout was: +**Recording (Transcribe active):** ``` -┌─────────────────────────────────┐ -│ Cursor Whisper │ -├─────────────────────────────────┤ -│ │ -│ ┌───────────┐ │ -│ │ │ │ ← Large mic button -│ │ 🎤 │ │ -│ │ │ │ -│ └───────────┘ │ -│ │ -│ Click to start recording │ -│ │ -│ Status: Idle │ -│ Duration: 0:00 │ -│ │ -└─────────────────────────────────┘ +$(record) Recording... $(sparkle) Promptimize (disabled) $(gear) Settings ``` -During recording: +**Processing:** ``` -┌─────────────────────────────────┐ -│ Cursor Whisper │ -├─────────────────────────────────┤ -│ │ -│ ┌───────────┐ │ -│ │ │ │ -│ │ 🔴 │ │ ← Pulsing red -│ │ │ │ -│ └───────────┘ │ -│ │ -│ ⏹️ Stop Recording │ -│ │ -│ Status: Recording │ -│ Duration: 0:23 │ -│ ▓▓▓▓▓▓▓▓▓░░░░░░░░ (waveform) │ -│ │ -└─────────────────────────────────┘ +$(sync~spin) Processing... (both modes disabled) ``` --- @@ -151,16 +90,17 @@ During recording: | Shortcut | Action | Context | |----------|--------|---------| -| `Cmd/Ctrl + Alt + V` | Toggle recording | Global | -| `Escape` | Cancel recording | While recording | -| `Enter` | Stop recording | While recording | +| `Cmd/Ctrl + Alt + V` | Start Transcribe recording | Global | +| `Cmd/Ctrl + Alt + P` | Start Promptimize recording | Global | +| `Escape` | Cancel recording | While `promptimize.isRecording` | + +**Note:** Start shortcuts do **not** stop recording. Stop by clicking the status bar **Recording...** item. + +See [Keyboard Shortcuts](../user-guide/keyboard-shortcuts.md) for the full command reference. ### Push-to-Talk Mode (Future) -Hold `Cmd/Ctrl + Alt + V` to record, release to stop: -- Press: Start recording -- Hold: Keep recording -- Release: Stop and process +Hold `Cmd/Ctrl + Alt + V` to record, release to stop — planned for a future release. --- @@ -168,135 +108,69 @@ Hold `Cmd/Ctrl + Alt + V` to record, release to stop: ### Success Notifications -**Minimal** (default): +**Default:** ``` -✓ Prompt inserted -``` - -**Detailed** (optional setting): -``` -✓ Prompt inserted successfully - Original: 145 characters - Optimized: 98 characters - Improvements: 3 +✓ Transcription inserted +✓ Optimized prompt inserted ``` ### Error Notifications -**Interactive**: +**Interactive (transcription):** ``` ❌ Transcription failed - OpenAI API error (429: Rate limit) [Retry] [Cancel] ``` -**With Instructions**: +**With Instructions:** ``` ❌ Microphone permission denied Please enable microphone access in System Settings - [Open Settings] [Learn More] ``` ### Info Notifications -**Configuration Needed**: +**Configuration Needed:** ``` -ℹ️ API Key not configured - Cursor Whisper needs an OpenAI API key - [Configure Now] [Learn More] +ℹ️ OpenAI API key required for transcription + [Open Configuration] ``` +### Planned: Detailed Notifications + +When `showNotifications` setting is implemented, optional detailed success messages may include character counts and improvement counts. + --- ## Accessibility ### Screen Reader Support -All states announced clearly: -- "Voice recording ready" -- "Recording audio, 15 seconds" -- "Transcribing audio, please wait" -- "Prompt inserted successfully" -- "Error: Transcription failed" +States should be announced clearly via status bar text and notifications. ### Keyboard Navigation -- All commands accessible via keyboard -- No mouse-only interactions -- Clear focus indicators -- Logical tab order - -### Visual Accessibility - -- High contrast mode support -- Color not sole indicator (icons + text) -- Minimum text size 12px -- WCAG 2.1 AA compliant +- All commands accessible via Command Palette +- Escape cancels active recording +- No mouse-only interactions required for core workflow --- ## Error Messages -### User-Friendly Error Messages - -❌ **Bad**: -``` -Error: ECONNREFUSED 401 -``` - -✅ **Good**: -``` -Could not connect to OpenAI API -Your API key may be invalid or expired. -[Check API Key] [Help] -``` +See [Troubleshooting](../user-guide/troubleshooting.md) for decision trees. ### Error Categories -**Configuration Errors**: -- "API key not configured" -- "Invalid API key format" -- "API key has expired" +**Configuration:** API key not configured, invalid format, provider incomplete -**Permission Errors**: -- "Microphone permission denied" -- "Please enable microphone in System Settings" +**Permission:** Microphone permission denied -**Network Errors**: -- "Could not connect to OpenAI" -- "Request timed out, please try again" -- "Rate limit exceeded, please wait" +**Network:** Connection failed, timeout, rate limit (429) -**Audio Errors**: -- "Recording failed to start" -- "Audio file too large (max 25MB)" -- "Audio duration too short (min 0.1s)" +**Audio:** Recording failed, file too large (>25 MB), too short (<0.1s) -**Insertion Errors**: -- "Could not insert text automatically" -- "Prompt copied to clipboard instead" - ---- - -## Performance Targets - -### Response Times - -| Action | Target | Acceptable | Poor | -|--------|--------|------------|------| -| Start recording | <500ms | <1s | >2s | -| Stop recording | <1s | <2s | >3s | -| Transcription | <8s | <15s | >30s | -| Transformation | <4s | <8s | >15s | -| Insertion | <500ms | <1s | >2s | -| **Total (30s audio)** | **<15s** | **<25s** | **>45s** | - -### Perceived Performance - -- Immediate visual feedback (<100ms) -- Progress indicators for long operations -- Optimistic UI updates -- Smooth animations (60fps) +**Insertion:** All inserters failed; clipboard fallback used --- @@ -304,97 +178,45 @@ Your API key may be invalid or expired. ### Very Short Recording (<1s) -``` -⚠️ Recording too short (0.4s) - Minimum duration is 0.5 seconds - [Try Again] -``` +Whisper rejects audio shorter than 0.1s. User sees transcription error with Retry option. -### Very Long Recording (>120s) +### Very Long Recording -``` -⚠️ Approaching maximum duration - Recording will auto-stop at 2:00 - (Current: 1:50) -``` +`maxRecordingDuration` setting is **planned but not yet applied**. Recording continues until manually stopped. ### No Active Editor Falls back to clipboard: ``` -ℹ️ No active editor found - Prompt copied to clipboard +ℹ️ Prompt copied to clipboard Paste it where you need it ``` -### Transformation Disabled +### Transformation Disabled or Failed -``` -ℹ️ Transcribed (transformation disabled) - Raw transcription inserted - Enable transformation in settings -``` +Raw transcription is inserted. Optimization failure does not block insertion. --- ## User Preferences -### Configurable Behavior - -Users can configure: -- Enable/disable notifications -- Enable/disable prompt transformation -- Maximum recording duration -- Audio quality -- Transcription language -- Keyboard shortcuts - -### Settings UI - -``` -Cursor Whisper Settings +### Configurable in VS Code Settings -🎤 Recording - ✓ Show recording indicator - ✓ Show duration timer - Maximum duration: [120] seconds +- `enablePromptTransformation` — Enable/disable optimization +- `transcriptionLanguage` — Whisper language +- `transcriptionHint` — Whisper vocabulary hint +- `transformationSystemPrompt` — Custom transformation instructions +- `transformationProvider` and provider-specific model settings -🔤 Transcription - Language: [Auto-detect ▾] - ✓ Technical vocabulary hints - -✨ Transformation - ✓ Optimize prompts with AI - Style: [Technical ▾] - ✓ Show before/after preview - -📝 Insertion - Priority: [Chat > Editor > Clipboard] - ✓ Show success notifications - -⌨️ Shortcuts - Toggle recording: [Cmd+Alt+V] - ✓ Enable push-to-talk mode -``` - ---- +### Configurable in Configuration Webview -## Summary +- OpenAI API key, provider selection, model, system prompt +- See [Configuration Webview Guide](../configuration/webview-guide.md) -**UX Principles**: -1. ✅ **Immediate feedback**: Visual changes <100ms -2. ✅ **Clear states**: Always know what's happening -3. ✅ **Graceful errors**: User-friendly messages with actions -4. ✅ **Minimal friction**: One-click recording -5. ✅ **Smart defaults**: Works great out of the box -6. ✅ **Accessibility**: Works for everyone +### Planned (not yet applied) -**Design Philosophy**: -- **Fast**: Optimized for speed -- **Clear**: No ambiguity about state -- **Forgiving**: Easy to cancel/retry -- **Professional**: Fits VSCode aesthetic +- `audioQuality`, `maxRecordingDuration`, `showNotifications` --- -**Next**: See [Security Documentation](../security/privacy.md). +**Next:** [Security Documentation](../security/privacy.md) · [Recording Modes](../user-guide/recording-modes.md) diff --git a/jest.config.js b/jest.config.js index fcb84d0..66fdedd 100644 --- a/jest.config.js +++ b/jest.config.js @@ -31,6 +31,7 @@ module.exports = { }, setupFilesAfterEnv: ['/__tests__/setup.ts'], moduleNameMapper: { - '^@/(.*)$': '/src/$1' + '^@/(.*)$': '/src/$1', + '^token-costs$': '/__tests__/__mocks__/token-costs.ts', } }; diff --git a/package.json b/package.json index a98a515..c3885c6 100644 --- a/package.json +++ b/package.json @@ -1,11 +1,11 @@ { - "name": "cursor-whisper", - "displayName": "Cursor Whisper - Voice to Optimized Prompt", - "description": "Transform voice into optimized prompts using OpenAI Whisper and GPT-4", + "name": "promptimize", + "displayName": "Promptimize", + "description": "Transform voice into optimized prompts using OpenAI Whisper and AI providers", "version": "0.1.0", - "publisher": "cursor-whisper", + "publisher": "promptimize", "author": { - "name": "Cursor Whisper Team" + "name": "Promptimize Team" }, "license": "MIT", "repository": { @@ -28,7 +28,7 @@ "speech-to-text", "whisper", "ai", - "cursor", + "promptimize", "productivity", "gpt-4", "openai" @@ -40,42 +40,79 @@ "contributes": { "commands": [ { - "command": "cursor-whisper.startRecording", - "title": "Cursor Whisper: Start Recording" + "command": "promptimize.startRecording", + "title": "Promptimize: (Deprecated) Start Recording" }, { - "command": "cursor-whisper.stopRecording", - "title": "Cursor Whisper: Stop Recording" + "command": "promptimize.stopRecording", + "title": "Promptimize: (Deprecated) Stop Recording" }, { - "command": "cursor-whisper.cancelRecording", - "title": "Cursor Whisper: Cancel Recording" + "command": "promptimize.startTranscribeRecording", + "title": "Promptimize: Start Transcribe Recording" }, { - "command": "cursor-whisper.configureApiKey", - "title": "Cursor Whisper: Configure API Key" + "command": "promptimize.stopTranscribeRecording", + "title": "Promptimize: Stop Transcribe Recording" }, { - "command": "cursor-whisper.configureModel", - "title": "Cursor Whisper: Configure Model" + "command": "promptimize.startPromptimizeRecording", + "title": "Promptimize: Start Promptimize Recording" + }, + { + "command": "promptimize.stopPromptimizeRecording", + "title": "Promptimize: Stop Promptimize Recording" + }, + { + "command": "promptimize.cancelRecording", + "title": "Promptimize: Cancel Recording" + }, + { + "command": "promptimize.configureApiKey", + "title": "Promptimize: Configure OpenAI API Key (Whisper)" + }, + { + "command": "promptimize.configureModel", + "title": "Promptimize: Configure OpenAI Optimization Model" + }, + { + "command": "promptimize.configureTransformationProvider", + "title": "Promptimize: Configure Prompt Optimization Provider" + }, + { + "command": "promptimize.testTransformation", + "title": "Promptimize: Test Configuration" + }, + { + "command": "promptimize.firstTimeSetup", + "title": "Promptimize: Setup Wizard" + }, + { + "command": "promptimize.openConfigurationPanel", + "title": "Promptimize: Open Configuration" } ], "keybindings": [ { - "command": "cursor-whisper.startRecording", + "command": "promptimize.startTranscribeRecording", "key": "ctrl+alt+v", "mac": "cmd+alt+v" }, { - "command": "cursor-whisper.cancelRecording", + "command": "promptimize.startPromptimizeRecording", + "key": "ctrl+alt+p", + "mac": "cmd+alt+p" + }, + { + "command": "promptimize.cancelRecording", "key": "escape", - "when": "cursorWhisper.isRecording" + "when": "promptimize.isRecording" } ], "configuration": { - "title": "Cursor Whisper", + "title": "Promptimize", "properties": { - "cursorWhisper.transcriptionLanguage": { + "promptimize.transcriptionLanguage": { "type": "string", "default": "auto", "enum": [ @@ -90,14 +127,44 @@ "ko", "zh" ], - "description": "Language for transcription (ISO 639-1 code or 'auto')" + "markdownDescription": "**Transcription language** for OpenAI Whisper.\n\nWhisper transcription always uses your OpenAI API key. [Setup guide](https://github.com/vypdev/cursor-whisper/blob/master/docs/quickstart.md)" }, - "cursorWhisper.enablePromptTransformation": { + "promptimize.transcriptionHint": { + "type": "string", + "default": "", + "markdownDescription": "**Transcription hint** for OpenAI Whisper (optional).\n\nComma-separated vocabulary, acronyms, or technical terms to improve recognition accuracy. Passed to Whisper as the `prompt` parameter — not transcribed as content. Configure in VS Code Settings (not the configuration webview). [Advanced settings](https://github.com/vypdev/cursor-whisper/blob/master/docs/configuration/advanced-settings.md)" + }, + "promptimize.enablePromptTransformation": { "type": "boolean", "default": true, - "description": "Enable AI-powered prompt transformation (requires GPT-4 API access, costs ~$0.01 per transformation)" + "markdownDescription": "**Enable prompt optimization** after Whisper transcription.\n\nWhen disabled, raw transcription is inserted. When enabled, configure a provider below. [Compare providers](https://github.com/vypdev/cursor-whisper/blob/master/docs/configuration/README.md)" }, - "cursorWhisper.transformationModel": { + "promptimize.transformationProvider": { + "type": "string", + "default": "openai", + "enum": [ + "openai", + "anthropic", + "google", + "azure", + "ollama", + "opencode", + "openrouter", + "cursor" + ], + "enumDescriptions": [ + "OpenAI GPT models (requires OpenAI API key; can reuse Whisper key)", + "Anthropic Claude models (requires Anthropic API key)", + "Google Gemini models (requires Google AI API key)", + "Azure OpenAI deployments (requires Azure API key and endpoint)", + "Local Ollama models (no API key required)", + "Local OpenCode multi-provider proxy via opencode-llm-proxy (no API key required)", + "OpenRouter unified gateway (requires OpenRouter API key)", + "Native Cursor AI via Cursor SDK (works in any editor - requires Cursor API key)" + ], + "markdownDescription": "**Prompt optimization provider** (separate from Whisper transcription).\n\nWhisper always uses OpenAI. This setting chooses which AI service optimizes transcribed speech into structured prompts. [Configuration guide](https://github.com/vypdev/cursor-whisper/blob/master/docs/configuration/README.md)" + }, + "promptimize.transformationModel": { "type": "string", "default": "gpt-4o", "enum": [ @@ -114,9 +181,78 @@ "GPT-4 — original GPT-4 model", "GPT-3.5 Turbo — cheapest option, lower quality" ], - "description": "OpenAI model used for prompt transformation. Use 'Cursor Whisper: Configure Model' to pick from models available on your API key." + "markdownDescription": "**OpenAI model for prompt optimization** when `transformationProvider` is `openai`.\n\nRun **Promptimize: Configure OpenAI Optimization Model** to pick from models available on your API key." + }, + "promptimize.anthropicModel": { + "type": "string", + "default": "claude-3-5-sonnet-20241022", + "enum": [ + "claude-3-5-sonnet-20241022", + "claude-3-5-haiku-20241022", + "claude-3-opus-20240229", + "claude-3-sonnet-20240229", + "claude-3-haiku-20240307" + ], + "markdownDescription": "**Anthropic model** for prompt optimization when `transformationProvider` is `anthropic`. Requires an Anthropic API key." }, - "cursorWhisper.audioQuality": { + "promptimize.googleModel": { + "type": "string", + "default": "gemini-1.5-pro", + "enum": [ + "gemini-1.5-pro", + "gemini-1.5-flash", + "gemini-2.0-flash" + ], + "markdownDescription": "**Google Gemini model** for prompt optimization when `transformationProvider` is `google`. Requires a Google AI API key." + }, + "promptimize.azureEndpoint": { + "type": "string", + "default": "", + "markdownDescription": "**Azure OpenAI endpoint** for prompt optimization when `transformationProvider` is `azure` (e.g. `https://my-resource.openai.azure.com`)." + }, + "promptimize.azureDeployment": { + "type": "string", + "default": "", + "markdownDescription": "**Azure OpenAI deployment name** for the chat model used in prompt optimization." + }, + "promptimize.ollamaBaseUrl": { + "type": "string", + "default": "http://localhost:11434", + "markdownDescription": "**Ollama server URL** for local prompt optimization when `transformationProvider` is `ollama`." + }, + "promptimize.ollamaModel": { + "type": "string", + "default": "llama3.1:8b", + "markdownDescription": "**Ollama model name/tag** for local prompt optimization." + }, + "promptimize.openCodeBaseUrl": { + "type": "string", + "default": "http://127.0.0.1:4010/v1", + "markdownDescription": "**OpenCode LLM proxy base URL** for prompt optimization when `transformationProvider` is `opencode`." + }, + "promptimize.openCodeModel": { + "type": "string", + "default": "", + "markdownDescription": "**OpenCode model identifier** (provider/model format) for prompt optimization when `transformationProvider` is `opencode`." + }, + "promptimize.openRouterModel": { + "type": "string", + "default": "openai/gpt-4o", + "markdownDescription": "**OpenRouter model identifier** for prompt optimization when `transformationProvider` is `openrouter`. Requires an OpenRouter API key." + }, + "promptimize.cursorModel": { + "type": "string", + "default": "composer-2.5", + "enum": [ + "composer-2.5", + "composer-2.5-fast", + "claude-4.5-sonnet", + "gpt-5.1", + "gpt-5.2-codex" + ], + "markdownDescription": "**Cursor model** for prompt optimization when `transformationProvider` is `cursor`. Works in any editor (VSCode, Cursor, etc.). Requires a Cursor API key from [Cursor Dashboard](https://cursor.com/dashboard/integrations)." + }, + "promptimize.audioQuality": { "type": "string", "enum": [ "low", @@ -124,27 +260,33 @@ "high" ], "default": "high", - "description": "Audio recording quality" + "markdownDescription": "**Audio recording quality** sent to OpenAI Whisper for transcription.\n\n**Note:** Not yet applied at runtime — recording always uses 16 kHz mono (optimal for Whisper). Reserved for a future release. [Advanced settings](https://github.com/vypdev/cursor-whisper/blob/master/docs/configuration/advanced-settings.md)" }, - "cursorWhisper.maxRecordingDuration": { + "promptimize.maxRecordingDuration": { "type": "number", "default": 120, "minimum": 10, "maximum": 300, - "description": "Maximum recording duration in seconds" + "markdownDescription": "**Maximum recording duration** in seconds before auto-stop.\n\n**Note:** Not yet applied at runtime — recording stops when you click stop or cancel. Reserved for a future release. [Advanced settings](https://github.com/vypdev/cursor-whisper/blob/master/docs/configuration/advanced-settings.md)" }, - "cursorWhisper.showNotifications": { + "promptimize.showNotifications": { "type": "boolean", "default": true, - "description": "Show status notifications" + "markdownDescription": "Show progress and status notifications during recording, transcription, and optimization.\n\n**Note:** Not yet applied at runtime — notifications always appear during processing. Reserved for a future release. [Advanced settings](https://github.com/vypdev/cursor-whisper/blob/master/docs/configuration/advanced-settings.md)" + }, + "promptimize.transformationSystemPrompt": { + "type": "string", + "default": "You are an expert prompt engineer specialized in transforming raw developer voice transcriptions into highly effective prompts for AI coding assistants such as Cursor, Claude, Copilot, and ChatGPT.\n\nYour task is to convert spoken, unstructured developer input into a concise, technically precise, execution-oriented prompt.\n\nRules:\n\n1. Clean the transcription\n - Remove filler words, hesitations, repetitions, and verbal noise\n - Fix grammar and sentence structure\n - Preserve the original intent\n - Preserve all technical terminology, APIs, framework names, libraries, file names, variables, and code references exactly as spoken\n\n2. Infer developer intent\n - Detect the actual engineering goal behind the transcription\n - Resolve fragmented speech into coherent technical instructions\n - Preserve implicit requirements when clearly inferred from context\n - Do NOT invent features, requirements, or assumptions not supported by the transcription\n\n3. Optimize for AI coding assistants\n - Make the prompt actionable and implementation-focused\n - Convert vague requests into precise engineering tasks when possible\n - Prioritize clarity, execution order, and technical accuracy\n - Encourage maintainable, production-grade solutions unless explicitly stated otherwise\n\n4. Structure intelligently\n - Only use sections when they improve clarity\n - Possible sections include:\n - Context\n - Objective\n - Requirements\n - Constraints\n - Expected Output\n - Technical Notes\n\n5. Preserve important engineering constraints\n - Maintain architecture preferences\n - Preserve mentioned technologies and stack decisions\n - Preserve performance, security, scalability, UX, DX, or maintainability concerns\n - Preserve coding style preferences if mentioned\n\n6. Improve readability\n - Remove redundancy\n - Shorten unnecessary wording\n - Use professional technical language\n - Prefer bullet points when useful\n - Keep the final prompt dense with useful information\n\n7. Output rules\n - Output ONLY the final optimized prompt\n - Do NOT explain your changes\n - Do NOT add commentary\n - Do NOT wrap the output in markdown\n - Do NOT add quotation marks\n\nAdditional behavior:\n- If the transcription is already clear, improve it minimally\n- If the transcription is incomplete, produce the best technically coherent prompt possible without asking questions\n- If multiple tasks are mentioned, organize them logically by priority or execution order\n- Prefer explicit engineering instructions over conversational phrasing\n- Optimize prompts for implementation quality, not just readability", + "markdownDescription": "**System prompt** for prompt transformation.\n\nThis prompt instructs the AI how to transform your voice transcriptions into optimized prompts. Customize it to change the transformation style and structure.", + "editPresentation": "multilineText" } } } }, "scripts": { "vscode:prepublish": "pnpm run compile", - "compile": "webpack --mode production", - "compile:dev": "webpack --mode development", + "compile": "webpack --mode production && node scripts/copy-webview-assets.js", + "compile:dev": "webpack --mode development && node scripts/copy-webview-assets.js", "watch": "webpack --mode development --watch", "lint": "eslint src --ext ts", "lint:fix": "eslint src --ext ts --fix", @@ -159,8 +301,14 @@ "format:check": "prettier --check \"src/**/*.ts\"" }, "dependencies": { + "@anthropic-ai/sdk": "^0.30.1", + "@cursor/sdk": "^1.0.13", + "@google/generative-ai": "^0.21.0", "@kstonekuan/audio-capture": "^0.0.3", - "openai": "^4.20.0" + "@vscode/webview-ui-toolkit": "1.4.0", + "axios": "^1.7.9", + "openai": "^4.20.0", + "token-costs": "^3.5.0" }, "devDependencies": { "@types/jest": "^29.5.11", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 3682232..609e897 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -8,15 +8,30 @@ importers: .: dependencies: + '@anthropic-ai/sdk': + specifier: ^0.30.1 + version: 0.30.1(encoding@0.1.13) + '@cursor/sdk': + specifier: ^1.0.13 + version: 1.0.13 + '@google/generative-ai': + specifier: ^0.21.0 + version: 0.21.0 '@kstonekuan/audio-capture': specifier: ^0.0.3 version: 0.0.3 - copilot: - specifier: ^0.0.2 - version: 0.0.2 + '@vscode/webview-ui-toolkit': + specifier: 1.4.0 + version: 1.4.0(react@19.2.6) + axios: + specifier: ^1.7.9 + version: 1.16.1 openai: specifier: ^4.20.0 - version: 4.104.0 + version: 4.104.0(encoding@0.1.13)(zod@3.25.76) + token-costs: + specifier: ^3.5.0 + version: 3.5.0 devDependencies: '@types/jest': specifier: ^29.5.11 @@ -75,6 +90,9 @@ importers: packages: + '@anthropic-ai/sdk@0.30.1': + resolution: {integrity: sha512-nuKvp7wOIz6BFei8WrTdhmSsx5mwnArYyJgh4+vYu3V4J0Ltb8Xm3odPm51n1aSI0XxNCrDl7O88cxCtUdAkaw==} + '@babel/code-frame@7.29.0': resolution: {integrity: sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw==} engines: {node: '>=6.9.0'} @@ -240,6 +258,50 @@ packages: '@bcoe/v8-coverage@0.2.3': resolution: {integrity: sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==} + '@bufbuild/protobuf@1.10.0': + resolution: {integrity: sha512-QDdVFLoN93Zjg36NoQPZfsVH9tZew7wKDKyV5qRdj8ntT4wQCOradQjRaTdwMhWUYsgKsvCINKKm87FdEk96Ag==} + + '@connectrpc/connect-node@1.7.0': + resolution: {integrity: sha512-6vaPIkG/NyhxlYgytLoR9KYbPhczEboFB2OYWkA9qvUz1K7efXfeGrlRxoLtpa+r8VxyIOw73w5ktNe743nD+A==} + engines: {node: '>=16.0.0'} + peerDependencies: + '@bufbuild/protobuf': ^1.10.0 + '@connectrpc/connect': 1.7.0 + + '@connectrpc/connect@1.7.0': + resolution: {integrity: sha512-iNKdJRi69YP3mq6AePRT8F/HrxWCewrhxnLMNm0vpqXAR8biwzRtO6Hjx80C6UvtKJ5sFmffQT7I4Baecz389w==} + peerDependencies: + '@bufbuild/protobuf': ^1.10.0 + + '@cursor/sdk-darwin-arm64@1.0.13': + resolution: {integrity: sha512-zHRTNtVRHw4KSAEFmtO0Av7jv9D60DrB+pygVNWGyKtRR44fcwtRHuLAJmO4HThxQw7MMvUJuAaNmCQxzHtPDQ==} + cpu: [arm64] + os: [darwin] + + '@cursor/sdk-darwin-x64@1.0.13': + resolution: {integrity: sha512-7XsIkMKp6h/4W9zBx02Py1euJLAJVxlkwmm9GSoUjc+3hfFvHY/R/WTbX2TFgF4g1vOAq/HM7GmXBXq+e4M4+w==} + cpu: [x64] + os: [darwin] + + '@cursor/sdk-linux-arm64@1.0.13': + resolution: {integrity: sha512-bDgfPPgc84gUn3k+Iiq5OLZozzM0UYZdKbQ821pbZy1OPWTFaSkjXsoAB6xqf9wALWyW1eQxOC4RprPBLoy+yA==} + cpu: [arm64] + os: [linux] + + '@cursor/sdk-linux-x64@1.0.13': + resolution: {integrity: sha512-BTccnB5hVqK8Y0778oql6gbk7kIIlzQrBqt5QNLJpwBidjjde/mlvAajVB9hB3a29jelOwm0gJjMsLfqTkEPdw==} + cpu: [x64] + os: [linux] + + '@cursor/sdk-win32-x64@1.0.13': + resolution: {integrity: sha512-GxWlwj4G513EfGmvPVBa4y+vNn9B5Cj+npu8fVcJ0P+U9sruhgo4pvqGbWxkn5EIKbpGoraLq9QB4nFeoT1uRQ==} + cpu: [x64] + os: [win32] + + '@cursor/sdk@1.0.13': + resolution: {integrity: sha512-w6MWkgOTL6yb6GV/4Odx7QcamQgqhzX/CzcMBkqiiOPTPuEWItWrgA0qdivchm5YJXTt+LZkFSEQ/Ti44hVbfg==} + engines: {node: '>=18'} + '@discoveryjs/json-ext@0.5.7': resolution: {integrity: sha512-dBVuXR082gk3jsFp7Rd/JI4kytwGHecnCoTtXFb7DB6CNHp4rg5k1bhg0nWdLGLnOV71lmDzGQaLMy8iPLY0pw==} engines: {node: '>=10.0.0'} @@ -262,57 +324,16 @@ packages: resolution: {integrity: sha512-d9zaMRSTIKDLhctzH12MtXvJKSSUhaHcjV+2Z+GK+EEY7XKpP5yR4x+N3TAcHTcu963nIr+TMcCb4DBCYX1z6Q==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} - '@github/copilot-darwin-arm64@1.0.51': - resolution: {integrity: sha512-i713sW3GzbeLKowGVY6/A97lGkUMJNVdUD0oaUWTWmXX08u+hWsnVKbqL4EQlw7x8xU511X5vkgFMi31DWyCuQ==} - cpu: [arm64] - os: [darwin] - hasBin: true - - '@github/copilot-darwin-x64@1.0.51': - resolution: {integrity: sha512-c67SbMznclcHqlJINXBCwudhqRgE5HNaY9fqMQqu954+ezVa6Q/2hwhCU51PNbYLWtZTGgXsgWnrxOg77hh0ug==} - cpu: [x64] - os: [darwin] - hasBin: true - - '@github/copilot-linux-arm64@1.0.51': - resolution: {integrity: sha512-MlQeTB4CSPnG2BZTxsPSV5a7rjsqFOzhTCVCNjLeht3ODObWjrIYhtzVF7h/nue9ii96u9RBB0gIAfoBReryTw==} - cpu: [arm64] - os: [linux] - hasBin: true - - '@github/copilot-linux-x64@1.0.51': - resolution: {integrity: sha512-fniGTwR5KLFfNDjSFbWvZ3Bno+2bXsMdNM0l3dFHwVTHyBqQSXZ3xvEEDadGimCxgKfRDRt1M1FYnUpqhLYf/Q==} - cpu: [x64] - os: [linux] - hasBin: true - - '@github/copilot-linuxmusl-arm64@1.0.51': - resolution: {integrity: sha512-vg9sWZw4u/bqHa7ylF/GZeuznt+k4/Em899C++CTBU4CKhtAaxd2TZDsEV0Ap2DXzP2UFxCn77vZoHyxByMI5A==} - cpu: [arm64] - os: [linux] - hasBin: true - - '@github/copilot-linuxmusl-x64@1.0.51': - resolution: {integrity: sha512-zxXRdzjshHTQd/LDWmOIDXt0T8nvw66ue6cneAXHhLXWzuiv5mqPKnxuHQyvQDt+IBEyq9utuetlKxcAVo+gYw==} - cpu: [x64] - os: [linux] - hasBin: true - - '@github/copilot-win32-arm64@1.0.51': - resolution: {integrity: sha512-/SP8DfOukjllCXavgBNI0qwJa+8hCFRNK7Q3/Q3qzAOvaWUZZkabKSVZfXaGxerTGpGq009Zg3nyIPR0jfm60w==} - cpu: [arm64] - os: [win32] - hasBin: true + '@fastify/busboy@2.1.1': + resolution: {integrity: sha512-vBZP4NlzfOlerQTnba4aqZoMhE/a9HY7HRqoOPaETQcSQuWEIyZMHGfVu6w9wGtGK5fED5qRs2DteVCjOH60sA==} + engines: {node: '>=14'} - '@github/copilot-win32-x64@1.0.51': - resolution: {integrity: sha512-ZB5Jr9m4ZR8gFOwXnYGNfdU+bMFeUgj1OCU3x64Tx5GC6Uln/pf8Ue5LHlsBkBq/NuKvkp/g4GARDIHBCKXEnQ==} - cpu: [x64] - os: [win32] - hasBin: true + '@gar/promisify@1.1.3': + resolution: {integrity: sha512-k2Ty1JcVojjJFwrg/ThKi2ujJ7XNLYaFGNB/bWT9wGR+oSMJHMa5w+CUq6p/pVrKeNNgA7pCqEcjSnHVoqJQFw==} - '@github/copilot@1.0.51': - resolution: {integrity: sha512-yKXbMeApxO8P68/BeSS/lmIRsCprcMdY8MRRp+Vp/QymCv59o4lxDcAIVq2h/CD8vJHoiG4OijdWydd76yoqLw==} - hasBin: true + '@google/generative-ai@0.21.0': + resolution: {integrity: sha512-7XhUbtnlkSEZK15kN3t+tzIMxsbKm/dSkKBFalj+20NvPKe1kBY7mR2P7vuijEn+f06z5+A8bVGKO0v39cr6Wg==} + engines: {node: '>=18.0.0'} '@humanwhocodes/config-array@0.13.0': resolution: {integrity: sha512-DZLEEqFWQFiyK6h5YIeynKx7JlvCYWL0cImfSRXZ9l4Sg2efkFGTuFf6vzXjK1cq6IYkU+Eg/JizXw+TD2vRNw==} @@ -443,6 +464,20 @@ packages: '@kstonekuan/audio-capture@0.0.3': resolution: {integrity: sha512-5NieKdyOT9d7RF4sJh+BZSfXJhi0OVDsOiUGc1tQs2Hm80nhWDyz6Hzz4jVFaGVUwmHR71B5bN5VLvM74SHDZw==} + '@microsoft/fast-element@1.14.0': + resolution: {integrity: sha512-zXvuSOzvsu8zDTy9eby8ix8VqLop2rwKRgp++ZN2kTCsoB3+QJVoaGD2T/Cyso2ViZQFXNpiNCVKfnmxBvmWkQ==} + + '@microsoft/fast-foundation@2.50.0': + resolution: {integrity: sha512-8mFYG88Xea1jZf2TI9Lm/jzZ6RWR8x29r24mGuLojNYqIR2Bl8+hnswoV6laApKdCbGMPKnsAL/O68Q0sRxeVg==} + + '@microsoft/fast-react-wrapper@0.3.25': + resolution: {integrity: sha512-jKzmk2xJV93RL/jEFXEZgBvXlKIY4N4kXy3qrjmBfFpqNi3VjY+oUTWyMnHRMC5EUhIFxD+Y1VD4u9uIPX3jQw==} + peerDependencies: + react: '>=16.9.0' + + '@microsoft/fast-web-utilities@5.4.1': + resolution: {integrity: sha512-ReWYncndjV3c8D8iq9tp7NcFNc1vbVHvcBFPME2nNFKNbS1XCesYZGlIlf3ot5EmuOXPlrzUHOWzQ2vFpIkqDg==} + '@nodelib/fs.scandir@2.1.5': resolution: {integrity: sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==} engines: {node: '>= 8'} @@ -455,6 +490,14 @@ packages: resolution: {integrity: sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==} engines: {node: '>= 8'} + '@npmcli/fs@1.1.1': + resolution: {integrity: sha512-8KG5RD0GVP4ydEzRn/I4BNDuxDtqVbOdm8675T49OIG/NGhaK0pjPX7ZcDlvKYbA+ulvVK3ztfcF4uBdOxuJbQ==} + + '@npmcli/move-file@1.1.2': + resolution: {integrity: sha512-1SUf/Cg2GzGDyaf15aR9St9TWlb+XvbZXWpDx8YKs7MLzMH/BCeopv+y9vzrzgkfykCGuWOlSu3mZhj2+FQcrg==} + engines: {node: '>=10'} + deprecated: This functionality has been moved to @npmcli/fs + '@pkgr/core@0.2.9': resolution: {integrity: sha512-QNqXyfVS2wm9hweSYD2O7F0G06uurj9kZ96TRQE5Y9hU7+tgdZwIkbAKc5Ocy1HxEY2kuDQa6cQ1WRs/O5LFKA==} engines: {node: ^12.20.0 || ^14.18.0 || >=16.0.0} @@ -468,6 +511,16 @@ packages: '@sinonjs/fake-timers@10.3.0': resolution: {integrity: sha512-V4BG07kuYSUkTCSBHG8G8TNhM+F19jXFWnQtzj+we8DrkpSBCee9Z3Ms8yiGer/dlmhe35/Xdgyo3/0rQKg7YA==} + '@statsig/client-core@3.31.0': + resolution: {integrity: sha512-SuxQD6TmVszPG7FoMKwTk/uyBuVFk7XnxI3T/E0uyb7PL7GNjONtfsoh+NqBBVUJVse0CUeSFfgJPoZy1ZOslQ==} + + '@statsig/js-client@3.31.0': + resolution: {integrity: sha512-LFa5E0LjT6sTfZv3sNGoyRLSZ1078+agdgOA+Vm1ecjG+KbSOfBLTW7hMwimrJ29slRwbYDzbtKaPJo/R37N2g==} + + '@tootallnate/once@1.1.2': + resolution: {integrity: sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw==} + engines: {node: '>= 6'} + '@types/babel__core@7.20.5': resolution: {integrity: sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==} @@ -590,6 +643,12 @@ packages: resolution: {integrity: sha512-8ukpxv4wYe0iWMRQU18jhzJOHkeGKbnw7xWRX3Zw1WJA4cEKbHcmmLPdPrPtL6rhDcrlCZN+xKRpv09n4gRHYg==} engines: {node: '>=16'} + '@vscode/webview-ui-toolkit@1.4.0': + resolution: {integrity: sha512-modXVHQkZLsxgmd5yoP3ptRC/G8NBDD+ob+ngPiWNQdlrH6H1xR/qgOBD85bfU3BhOB5sZzFWBwwhp9/SfoHww==} + deprecated: This package has been deprecated, https://github.com/microsoft/vscode-webview-ui-toolkit/issues/561 + peerDependencies: + react: '>=16.9.0' + '@webassemblyjs/ast@1.14.1': resolution: {integrity: sha512-nuBEDgQfm1ccRp/8bCQrx1frohyufl4JlbMMZ4P1wpeOfDhF6FQkxZJ1b/e+PLwr6X1Nhw6OLme5usuBWYBvuQ==} @@ -666,6 +725,9 @@ packages: '@xtuc/long@4.2.2': resolution: {integrity: sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ==} + abbrev@1.1.1: + resolution: {integrity: sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==} + abort-controller@3.0.0: resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==} engines: {node: '>=6.5'} @@ -686,6 +748,10 @@ packages: engines: {node: '>=0.4.0'} hasBin: true + agent-base@6.0.2: + resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==} + engines: {node: '>= 6.0.0'} + agent-base@7.1.4: resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==} engines: {node: '>= 14'} @@ -694,6 +760,10 @@ packages: resolution: {integrity: sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==} engines: {node: '>= 8.0.0'} + aggregate-error@3.1.0: + resolution: {integrity: sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA==} + engines: {node: '>=8'} + ajv-formats@2.1.1: resolution: {integrity: sha512-Wx0Kx52hxE7C18hkMEggYlEifqWZtYaRgouJor+WMdPnQyEK13vgEWyVNup7SoeeoLMsr4kf5h6dOW11I15MUA==} peerDependencies: @@ -741,6 +811,14 @@ packages: resolution: {integrity: sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==} engines: {node: '>= 8'} + aproba@2.1.0: + resolution: {integrity: sha512-tLIEcj5GuR2RSTnxNKdkK0dJ/GrC7P38sUkiDmDuHfsHmbagTFAxDVIBltoklXEVIQ/f14IL8IMJ5pn9Hez1Ew==} + + are-we-there-yet@3.0.1: + resolution: {integrity: sha512-QZW4EDmGwlYur0Yyf/b2uGucHQMa8aFUP7eu9ddR73vvhFyt4V0Vl3QHPcTNJ8l6qYOBdxgXdnBXQrHilfRQBg==} + engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0} + deprecated: This package is no longer supported. + argparse@1.0.10: resolution: {integrity: sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==} @@ -754,6 +832,9 @@ packages: asynckit@0.4.0: resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==} + axios@1.16.1: + resolution: {integrity: sha512-caYkukvroVPO8KrzuJEb50Hm07KwfBZPEC3VeFHTsqWHvKTsy54hjJz9BS/cdaypROE2rH6xvm9mHX4fgWkr3A==} + azure-devops-node-api@11.2.0: resolution: {integrity: sha512-XdiGPhrpaT5J8wdERRKs5g8E0Zy1pvOYTli7z9E8nmOn3YGp4FhtjhrOyFmX/8veWCwdI69mCHKJw6l+4J/bHA==} @@ -793,6 +874,9 @@ packages: engines: {node: '>=6.0.0'} hasBin: true + bindings@1.5.0: + resolution: {integrity: sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==} + bl@4.1.0: resolution: {integrity: sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==} @@ -830,6 +914,10 @@ packages: buffer@5.7.1: resolution: {integrity: sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==} + cacache@15.3.0: + resolution: {integrity: sha512-VVdYzXEn+cnbXpFgWs5hTT7OScegHVmLhJIR8Ufqk3iFD6A6j5iSX1KuBTfNEv4tdJWE2PzA6IVFtcLC7fN9wQ==} + engines: {node: '>= 10'} + call-bind-apply-helpers@1.0.2: resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==} engines: {node: '>= 0.4'} @@ -879,6 +967,10 @@ packages: chownr@1.1.4: resolution: {integrity: sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==} + chownr@2.0.0: + resolution: {integrity: sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==} + engines: {node: '>=10'} + chrome-trace-event@1.0.4: resolution: {integrity: sha512-rNjApaLzuwaOTjCiT8lSDdGN1APCiqkChLMJxJPWLunPAt5fy8xgU9/jNOchV84wfIxrA0lRQB7oCT8jrn/wrQ==} engines: {node: '>=6.0'} @@ -890,6 +982,10 @@ packages: cjs-module-lexer@1.4.3: resolution: {integrity: sha512-9z8TZaGM1pfswYeXrUpzPrkx8UnWYdhJclsiYMm6x/w5+nN+8Tf/LnAgfLGQCm59qAOxU8WwHEq2vNwF6i4j+Q==} + clean-stack@2.2.0: + resolution: {integrity: sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A==} + engines: {node: '>=6'} + cli-cursor@5.0.0: resolution: {integrity: sha512-aCj4O5wKyszjMmDT4tZj93kxyydN/K5zPWSCe6/0AV/AA1pqe5ZBIw0a2ZfPQV7lL5/yb5HsUreJ6UFAF1tEQw==} engines: {node: '>=18'} @@ -926,6 +1022,10 @@ packages: color-name@1.1.4: resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==} + color-support@1.1.3: + resolution: {integrity: sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg==} + hasBin: true + colorette@2.0.20: resolution: {integrity: sha512-IfEDxwoWIjkeXL1eXcDiow4UbKjhLdq6/EuSVR9GMN7KVH3r9gQ83e73hsz1Nd1T3ijd5xv1wcWRYO+D6kCI2w==} @@ -947,13 +1047,12 @@ packages: concat-map@0.0.1: resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==} + console-control-strings@1.1.0: + resolution: {integrity: sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ==} + convert-source-map@2.0.0: resolution: {integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==} - copilot@0.0.2: - resolution: {integrity: sha512-nedf34AaYj9JnFhRmiJEZemAno2WDXMypq6FW5aCVR0N+QdpQ6viukP1JpvJDChpaMEVvbUkMjmjMifJbO/AgQ==} - hasBin: true - core-util-is@1.0.3: resolution: {integrity: sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==} @@ -1009,6 +1108,9 @@ packages: resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==} engines: {node: '>=0.4.0'} + delegates@1.0.0: + resolution: {integrity: sha512-bd2L678uiWATM6m5Z1VzNCErI3jiGzt6HGY8OVICs40JQq/HALfbyNJmp0UDakEY4pMMaN0Ly5om/B1VI/+xfQ==} + detect-libc@2.1.2: resolution: {integrity: sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==} engines: {node: '>=8'} @@ -1062,6 +1164,9 @@ packages: encoding-sniffer@0.2.1: resolution: {integrity: sha512-5gvq20T6vfpekVtqrYQsSCFZ1wEg5+wW0/QaZMWkFr6BqD3NfKs0rLCx4rrVlSWJeZb5NBJgVLswK/w2MWU+Gw==} + encoding@0.1.13: + resolution: {integrity: sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==} + end-of-stream@1.4.5: resolution: {integrity: sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==} @@ -1084,11 +1189,18 @@ packages: resolution: {integrity: sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==} engines: {node: '>=0.12'} + env-paths@2.2.1: + resolution: {integrity: sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==} + engines: {node: '>=6'} + envinfo@7.21.0: resolution: {integrity: sha512-Lw7I8Zp5YKHFCXL7+Dz95g4CcbMEpgvqZNNq3AmlT5XAV6CgAAk6gyAMqn2zjw08K9BHfcNuKrMiCPLByGafow==} engines: {node: '>=4'} hasBin: true + err-code@2.0.3: + resolution: {integrity: sha512-2bmlRpNKBxT/CRmPOlyISQpNj+qSeYvcym/uT0Jx2bMOlKLtSy1ZmLuVxSEKKyor/N5yhvp/ZiG1oE3DEYMSFA==} + error-ex@1.3.4: resolution: {integrity: sha512-sqQamAnR14VgCr1A618A3sGrygcpK+HEbenA/HiEAkkUwcZIIB/tgWqHFxWgOyDh4nB4JCRimh79dR5Ywc9MDQ==} @@ -1206,6 +1318,9 @@ packages: resolution: {integrity: sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==} engines: {node: '>=10'} + exenv-es6@1.1.1: + resolution: {integrity: sha512-vlVu3N8d6yEMpMsEm+7sUBAI81aqYYuEvfK0jNqmdb/OPXzzH7QWDDnVjMvDSY47JdHEqx/dfC/q8WkfoTmpGQ==} + exit@0.1.2: resolution: {integrity: sha512-Zk/eNKV2zbjpKzrsQ+n1G6poVbErQxJ0LBOJXaKZ1EViLzH+hrLu9cdXI4zw9dBQJslwBEpbQ2P1oS7nDxs6jQ==} engines: {node: '>= 0.8.0'} @@ -1254,6 +1369,9 @@ packages: resolution: {integrity: sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==} engines: {node: ^10.12.0 || >=12.0.0} + file-uri-to-path@1.0.0: + resolution: {integrity: sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==} + fill-range@7.1.1: resolution: {integrity: sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==} engines: {node: '>=8'} @@ -1277,6 +1395,15 @@ packages: flatted@3.4.2: resolution: {integrity: sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA==} + follow-redirects@1.16.0: + resolution: {integrity: sha512-y5rN/uOsadFT/JfYwhxRS5R7Qce+g3zG97+JrtFZlC9klX/W5hD7iiLzScI4nZqUS7DNUdhPgw4xI8W2LuXlUw==} + engines: {node: '>=4.0'} + peerDependencies: + debug: '*' + peerDependenciesMeta: + debug: + optional: true + form-data-encoder@1.7.2: resolution: {integrity: sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==} @@ -1291,6 +1418,10 @@ packages: fs-constants@1.0.0: resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==} + fs-minipass@2.1.0: + resolution: {integrity: sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==} + engines: {node: '>= 8'} + fs.realpath@1.0.0: resolution: {integrity: sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==} @@ -1302,6 +1433,11 @@ packages: function-bind@1.1.2: resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==} + gauge@4.0.4: + resolution: {integrity: sha512-f9m+BEN5jkg6a0fZjleidjN51VE1X+mPFQ2DJ0uv1V39oCLCbsGe6yjbBnp7eK7z/+GAon99a3nHuqbuuthyPg==} + engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0} + deprecated: This package is no longer supported. + gensync@1.0.0-beta.2: resolution: {integrity: sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==} engines: {node: '>=6.9.0'} @@ -1387,6 +1523,9 @@ packages: resolution: {integrity: sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==} engines: {node: '>= 0.4'} + has-unicode@2.0.1: + resolution: {integrity: sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ==} + hasown@2.0.3: resolution: {integrity: sha512-ej4AhfhfL2Q2zpMmLo7U1Uv9+PyhIZpgQLGT1F9miIGmiCJIoCgSmczFdrc97mWT4kVY72KA+WnnhJ5pghSvSg==} engines: {node: '>= 0.4'} @@ -1401,10 +1540,21 @@ packages: htmlparser2@10.1.0: resolution: {integrity: sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==} + http-cache-semantics@4.2.0: + resolution: {integrity: sha512-dTxcvPXqPvXBQpq5dUr6mEMJX4oIEFv6bwom3FDwKRDsuIjjJGANqhBuoAn9c1RQJIdAKav33ED65E2ys+87QQ==} + + http-proxy-agent@4.0.1: + resolution: {integrity: sha512-k0zdNgqWTGA6aeIRVpvfVob4fL52dTfaehylg0Y4UvSySvOq/Y+BOyPrgpUrA7HylqvU8vIZGsRuXmspskV0Tg==} + engines: {node: '>= 6'} + http-proxy-agent@7.0.2: resolution: {integrity: sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==} engines: {node: '>= 14'} + https-proxy-agent@5.0.1: + resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==} + engines: {node: '>= 6'} + https-proxy-agent@7.0.6: resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==} engines: {node: '>= 14'} @@ -1448,6 +1598,13 @@ packages: resolution: {integrity: sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==} engines: {node: '>=0.8.19'} + indent-string@4.0.0: + resolution: {integrity: sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==} + engines: {node: '>=8'} + + infer-owner@1.0.4: + resolution: {integrity: sha512-IClj+Xz94+d7irH5qRyfJonOdfTzuDaifE6ZPWfx0N0+/ATZCbuTPq2prFl526urkQd90WyUKIh1DfBQ2hMz9A==} + inflight@1.0.6: resolution: {integrity: sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==} deprecated: This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful. @@ -1462,6 +1619,10 @@ packages: resolution: {integrity: sha512-6xwYfHbajpoF0xLW+iwLkhwgvLoZDfjYfoFNu8ftMoXINzwuymNLd9u/KmwtdT2GbR+/Cz66otEGEVVUHX9QLQ==} engines: {node: '>=10.13.0'} + ip-address@10.2.0: + resolution: {integrity: sha512-/+S6j4E9AHvW9SWMSEY9Xfy66O5PWvVEJ08O0y5JGyEKQpojb0K0GKpz/v5HJ/G0vi3D2sjGK78119oXZeE0qA==} + engines: {node: '>= 12'} + is-arrayish@0.2.1: resolution: {integrity: sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==} @@ -1489,6 +1650,9 @@ packages: resolution: {integrity: sha512-qP1vozQRI+BMOPcjFzrjXuQvdak2pHNUMZoeG2eRbiSqyvbEf/wQtEOTOX1guk6E3t36RkaqiSt8A/6YElNxLQ==} engines: {node: '>=12'} + is-lambda@1.0.1: + resolution: {integrity: sha512-z7CMFGNrENq5iFB9Bqo64Xk6Y9sg+epq1myIcdHaGnbMTYOxvzsEtdYqQUylB7LxfkvgrrjP32T6Ywciio9UIQ==} + is-number@7.0.0: resolution: {integrity: sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==} engines: {node: '>=0.12.0'} @@ -1786,6 +1950,10 @@ packages: make-error@1.3.6: resolution: {integrity: sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==} + make-fetch-happen@9.1.0: + resolution: {integrity: sha512-+zopwDy7DNknmwPQplem5lAZX/eCOzSvSNNcSKm5eVwTkOBzoktEfXsa9L23J/GIRhxRsaxzkPEhrJEpE2F4Gg==} + engines: {node: '>= 10'} + makeerror@1.0.12: resolution: {integrity: sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==} @@ -1850,9 +2018,46 @@ packages: minimist@1.2.8: resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==} + minipass-collect@1.0.2: + resolution: {integrity: sha512-6T6lH0H8OG9kITm/Jm6tdooIbogG9e0tLgpY6mphXSm/A9u8Nq1ryBG+Qspiub9LjWlBPsPS3tWQ/Botq4FdxA==} + engines: {node: '>= 8'} + + minipass-fetch@1.4.1: + resolution: {integrity: sha512-CGH1eblLq26Y15+Azk7ey4xh0J/XfJfrCox5LDJiKqI2Q2iwOLOKrlmIaODiSQS8d18jalF6y2K2ePUm0CmShw==} + engines: {node: '>=8'} + + minipass-flush@1.0.7: + resolution: {integrity: sha512-TbqTz9cUwWyHS2Dy89P3ocAGUGxKjjLuR9z8w4WUTGAVgEj17/4nhgo2Du56i0Fm3Pm30g4iA8Lcqctc76jCzA==} + engines: {node: '>= 8'} + + minipass-pipeline@1.2.4: + resolution: {integrity: sha512-xuIq7cIOt09RPRJ19gdi4b+RiNvDFYe5JH+ggNvBqGqpQXcru3PcRmOZuHBKWK1Txf9+cQ+HMVN4d6z46LZP7A==} + engines: {node: '>=8'} + + minipass-sized@1.0.3: + resolution: {integrity: sha512-MbkQQ2CTiBMlA2Dm/5cY+9SWFEN8pzzOXi6rlM5Xxq0Yqbda5ZQy9sU75a673FE9ZK0Zsbr6Y5iP6u9nktfg2g==} + engines: {node: '>=8'} + + minipass@3.3.6: + resolution: {integrity: sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==} + engines: {node: '>=8'} + + minipass@5.0.0: + resolution: {integrity: sha512-3FnjYuehv9k6ovOEbyOswadCDPX1piCfhV8ncmYtHOjuPwylVWsghTLo7rabjC3Rx5xD4HDx8Wm1xnMF7S5qFQ==} + engines: {node: '>=8'} + + minizlib@2.1.2: + resolution: {integrity: sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg==} + engines: {node: '>= 8'} + mkdirp-classic@0.5.3: resolution: {integrity: sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==} + mkdirp@1.0.4: + resolution: {integrity: sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==} + engines: {node: '>=10'} + hasBin: true + ms@2.1.3: resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} @@ -1865,6 +2070,10 @@ packages: natural-compare@1.4.0: resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==} + negotiator@0.6.4: + resolution: {integrity: sha512-myRT3DiWPHqho5PrJaIRyaMv2kgYf0mUVgBNOYMuCH5Ki1yEiQaf/ZJuQ62nvpc44wL5WDbTX7yGJi1Neevw8w==} + engines: {node: '>= 0.6'} + neo-async@2.6.2: resolution: {integrity: sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==} @@ -1875,6 +2084,9 @@ packages: node-addon-api@4.3.0: resolution: {integrity: sha512-73sE9+3UaLYYFmDsFZnqCInzPyh3MqIwZO9cw58yIqAZhONrrabrYyYe3TuIqtIiOuTXVhsGau8hcrhhwSsDIQ==} + node-addon-api@7.1.1: + resolution: {integrity: sha512-5m3bsyrjFWE1xf7nz7YXdN4udnVtXK6/Yfgn5qnahL6bCkf2yKt4k3nuTKAtT4r3IG8JNR2ncsIMdZuAzJjHQQ==} + node-domexception@1.0.0: resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==} engines: {node: '>=10.5.0'} @@ -1889,6 +2101,11 @@ packages: encoding: optional: true + node-gyp@8.4.1: + resolution: {integrity: sha512-olTJRgUtAb/hOXG0E93wZDs5YiJlgbXxTwQAFHyNlRsXQnYzUaF2aGgujZbw+hR8aF4ZG/rST57bWMWD16jr9w==} + engines: {node: '>= 10.12.0'} + hasBin: true + node-int64@0.4.0: resolution: {integrity: sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==} @@ -1896,6 +2113,11 @@ packages: resolution: {integrity: sha512-GYVXHE2KnrzAfsAjl4uP++evGFCrAU1jta4ubEjIG7YWt/64Gqv66a30yKwWczVjA6j3bM4nBwH7Pk1JmDHaxQ==} engines: {node: '>=18'} + nopt@5.0.0: + resolution: {integrity: sha512-Tbj67rffqceeLpcRXrT7vKAN8CwfPeIBgM7E6iBkmKLV7bEMwpGgYLGv0jACUsECaa/vuxP0IjEont6umdMgtQ==} + engines: {node: '>=6'} + hasBin: true + normalize-path@3.0.0: resolution: {integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==} engines: {node: '>=0.10.0'} @@ -1904,6 +2126,11 @@ packages: resolution: {integrity: sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==} engines: {node: '>=8'} + npmlog@6.0.2: + resolution: {integrity: sha512-/vBvz5Jfr9dT/aFWd0FIRf+T/Q2WBsLENygUaFUqstqsycmZAP/t5BvFJTK0viFmSUxiUKTUplWy5vt+rvKIxg==} + engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0} + deprecated: This package is no longer supported. + nth-check@2.1.1: resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==} @@ -1958,6 +2185,10 @@ packages: resolution: {integrity: sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==} engines: {node: '>=10'} + p-map@4.0.0: + resolution: {integrity: sha512-/bjOqmgETBYB5BoEeGVea8dmvHb2m9GLy1E9W43yeyfP6QQCZGFNa+XRceJEuDB6zqr+gKpIAmlLebMpykw/MQ==} + engines: {node: '>=10'} + p-try@2.2.0: resolution: {integrity: sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==} engines: {node: '>=6'} @@ -2048,10 +2279,26 @@ packages: process-nextick-args@2.0.1: resolution: {integrity: sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==} + promise-inflight@1.0.1: + resolution: {integrity: sha512-6zWPyEOFaQBJYcGMHBKTKJ3u6TBsnMFOIZSa6ce1e/ZrrsOlnHRHbabMjLiBYKp+n44X9eUI6VUPaukCXHuG4g==} + peerDependencies: + bluebird: '*' + peerDependenciesMeta: + bluebird: + optional: true + + promise-retry@2.0.1: + resolution: {integrity: sha512-y+WKFlBR8BGXnsNlIHFGPZmyDf3DFMoLhaflAnyZgV6rG6xu+JwesTo2Q9R6XwYmtmwAFCkAk3e35jEdoeh/3g==} + engines: {node: '>=10'} + prompts@2.4.2: resolution: {integrity: sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==} engines: {node: '>= 6'} + proxy-from-env@2.1.0: + resolution: {integrity: sha512-cJ+oHTW1VAEa8cJslgmUZrc+sjRKgAKl3Zyse6+PV38hZe/V6Z14TbCuXcan9F9ghlz4QrFr2c92TNF82UkYHA==} + engines: {node: '>=10'} + pump@3.0.4: resolution: {integrity: sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==} @@ -2076,6 +2323,10 @@ packages: react-is@18.3.1: resolution: {integrity: sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==} + react@19.2.6: + resolution: {integrity: sha512-sfWGGfavi0xr8Pg0sVsyHMAOziVYKgPLNrS7ig+ivMNb3wbCBw3KxtflsGBAwD3gYQlE/AEZsTLgToRrSCjb0Q==} + engines: {node: '>=0.10.0'} + read@1.0.7: resolution: {integrity: sha512-rSOKNYUmaxy0om1BNjMN4ezNT6VKK+2xF4GBhc81mkH7L60i6dp8qPYrkndNLT3QPphoII3maL9PVC9XmhHwVQ==} engines: {node: '>=0.8'} @@ -2124,6 +2375,10 @@ packages: resolution: {integrity: sha512-oMA2dcrw6u0YfxJQXm342bFKX/E4sG9rbTzO9ptUcR/e8A33cHuvStiYOwH7fszkZlZ1z/ta9AAoPk2F4qIOHA==} engines: {node: '>=18'} + retry@0.12.0: + resolution: {integrity: sha512-9LkiTwjUh6rT555DtE9rTX+BKByPfrMzEAtnlEtdEwr3Nkffwiihqe2bWADg+OQRjt9gl6ICdmB/ZFDCGAtSow==} + engines: {node: '>= 4'} + reusify@1.1.0: resolution: {integrity: sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==} engines: {iojs: '>=1.0.0', node: '>=0.10.0'} @@ -2166,6 +2421,9 @@ packages: engines: {node: '>=10'} hasBin: true + set-blocking@2.0.0: + resolution: {integrity: sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==} + setimmediate@1.0.5: resolution: {integrity: sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==} @@ -2217,6 +2475,18 @@ packages: resolution: {integrity: sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==} engines: {node: '>=8'} + smart-buffer@4.2.0: + resolution: {integrity: sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==} + engines: {node: '>= 6.0.0', npm: '>= 3.0.0'} + + socks-proxy-agent@6.2.1: + resolution: {integrity: sha512-a6KW9G+6B3nWZ1yB8G7pJwL3ggLy1uTzKAgCb7ttblwqdz9fMGJUuTy3uFzEP48FAs9FLILlmzDlE2JJhVQaXQ==} + engines: {node: '>= 10'} + + socks@2.8.9: + resolution: {integrity: sha512-LJhUYUvItdQ0LkJTmPeaEObWXAqFyfmP85x0tch/ez9cahmhlBBLbIqDFnvBnUJGagb0JbIQrkBs1wJ+yRYpEw==} + engines: {node: '>= 10.0.0', npm: '>= 3.0.0'} + source-map-support@0.5.13: resolution: {integrity: sha512-SHSKFHadjVA5oR4PPqhtAVdcBWwRYVd6g6cAXnIbRiIwc2EhPrTuKUBdSLvlEKyIP3GCf89fltvcZiP9MMFA1w==} @@ -2234,6 +2504,13 @@ packages: sprintf-js@1.0.3: resolution: {integrity: sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==} + sqlite3@5.1.7: + resolution: {integrity: sha512-GGIyOiFaG+TUra3JIfkI/zGP8yZYLPQ0pl1bH+ODjiX57sPhrLU5sQJn1y9bDKZUFYkX1crlrPfSYt0BKKdkog==} + + ssri@8.0.1: + resolution: {integrity: sha512-97qShzy1AiyxvPNIkLWoGua7xoQzzPjQ0HAH4B0rWKo7SZ6USuPcrUiAFrws0UH8RrbWmgq3LMTObhPIHbbBeQ==} + engines: {node: '>= 8'} + stack-utils@2.0.6: resolution: {integrity: sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ==} engines: {node: '>=10'} @@ -2304,6 +2581,9 @@ packages: resolution: {integrity: sha512-Bh7QjT8/SuKUIfObSXNHNSK6WHo6J1tHCqJsuaFDP7gP0fkzSfTxI8y85JrppZ0h8l0maIgc2tfuZQ6/t3GtnQ==} engines: {node: ^14.18.0 || >=16.0.0} + tabbable@5.3.3: + resolution: {integrity: sha512-QD9qKY3StfbZqWOPLp0++pOrAVb/HbUi5xCc8cUo4XjP19808oaMiDzn0leBY5mCespIBM0CIZePzZjgzR83kA==} + tapable@2.3.3: resolution: {integrity: sha512-uxc/zpqFg6x7C8vOE7lh6Lbda8eEL9zmVm/PLeTPBRhh1xCgdWaQ+J1CUieGpIfm2HdtsUpRv+HshiasBMcc6A==} engines: {node: '>=6'} @@ -2315,6 +2595,11 @@ packages: resolution: {integrity: sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==} engines: {node: '>=6'} + tar@6.2.1: + resolution: {integrity: sha512-DZ4yORTwrbTj/7MZYq2w+/ZFdI6OZ/f9SFHR+71gIVUZhOQPHzVCLpvRnPgyaMpfWxxk/4ONva3GQSyNIKRv6A==} + engines: {node: '>=10'} + deprecated: Old versions of tar are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me + terser-webpack-plugin@5.6.0: resolution: {integrity: sha512-Eum+5ajkaOhf5KbM26osvv21kLD7BaGqQ1UA4Ami4arYwylmGUQTgHFpHDdmJod1q4QXa66p0to/FBKID+J1vA==} engines: {node: '>= 10.13.0'} @@ -2381,6 +2666,10 @@ packages: resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==} engines: {node: '>=8.0'} + token-costs@3.5.0: + resolution: {integrity: sha512-mBMc8sv6BlqW22psMtBjWVli4Qmyeu2ratU2u5Zm7Sf9v2lSFDPYdcAzcW8iKy7YwWX+Dq6J5TA54vy7QudaAw==} + engines: {node: '>=20.0.0'} + tr46@0.0.3: resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} @@ -2424,6 +2713,12 @@ packages: typescript: '*' webpack: ^5.0.0 + tslib@1.14.1: + resolution: {integrity: sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==} + + tslib@2.8.1: + resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==} + tunnel-agent@0.6.0: resolution: {integrity: sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==} @@ -2476,10 +2771,20 @@ packages: undici-types@6.21.0: resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==} + undici@5.29.0: + resolution: {integrity: sha512-raqeBD6NQK4SkWhQzeYKd1KmIG6dllBOTt55Rmkt4HtI9mwdWtJljnrXjAFUBLTSN67HWrOIZ3EPF4kjUw80Bg==} + engines: {node: '>=14.0'} + undici@7.25.0: resolution: {integrity: sha512-xXnp4kTyor2Zq+J1FfPI6Eq3ew5h6Vl0F/8d9XU5zZQf1tX9s2Su1/3PiMmUANFULpmksxkClamIZcaUqryHsQ==} engines: {node: '>=20.18.1'} + unique-filename@1.1.1: + resolution: {integrity: sha512-Vmp0jIp2ln35UTXuryvjzkjGdRyf9b2lTXuSYUiPmzRcl3FDtYqAwOnTJkAngD9SWhnoJzDbTKwaOrZ+STtxNQ==} + + unique-slug@2.0.2: + resolution: {integrity: sha512-zoWr9ObaxALD3DOPfjPSqxt4fnZiWblxHIgeWqW8x7UqDzEtHEQLzji2cuJYQFCU6KmoJikOYAZlrTHHebjx2w==} + update-browserslist-db@1.2.3: resolution: {integrity: sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==} hasBin: true @@ -2571,6 +2876,9 @@ packages: engines: {node: '>= 8'} hasBin: true + wide-align@1.1.5: + resolution: {integrity: sha512-eDMORYaPNZ4sQIuuYPDHdQvf4gyCF9rEEV/yPxGfwPkRodwEgiMUUXTx/dex+Me0wxx53S+NgUHaP7y3MGlDmg==} + wildcard@2.0.1: resolution: {integrity: sha512-CC1bOL87PIWSBhDcTrdeLo6eGT7mCFtrg0uIJtqJUFyK+eJnzl8A1niH56uu7KMa5XFrtiV+AQuHO3n7DsHnLQ==} @@ -2628,8 +2936,23 @@ packages: resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==} engines: {node: '>=10'} + zod@3.25.76: + resolution: {integrity: sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==} + snapshots: + '@anthropic-ai/sdk@0.30.1(encoding@0.1.13)': + dependencies: + '@types/node': 18.19.130 + '@types/node-fetch': 2.6.13 + abort-controller: 3.0.0 + agentkeepalive: 4.6.0 + form-data-encoder: 1.7.2 + formdata-node: 4.4.1 + node-fetch: 2.7.0(encoding@0.1.13) + transitivePeerDependencies: + - encoding + '@babel/code-frame@7.29.0': dependencies: '@babel/helper-validator-identifier': 7.28.5 @@ -2819,6 +3142,51 @@ snapshots: '@bcoe/v8-coverage@0.2.3': {} + '@bufbuild/protobuf@1.10.0': {} + + '@connectrpc/connect-node@1.7.0(@bufbuild/protobuf@1.10.0)(@connectrpc/connect@1.7.0(@bufbuild/protobuf@1.10.0))': + dependencies: + '@bufbuild/protobuf': 1.10.0 + '@connectrpc/connect': 1.7.0(@bufbuild/protobuf@1.10.0) + undici: 5.29.0 + + '@connectrpc/connect@1.7.0(@bufbuild/protobuf@1.10.0)': + dependencies: + '@bufbuild/protobuf': 1.10.0 + + '@cursor/sdk-darwin-arm64@1.0.13': + optional: true + + '@cursor/sdk-darwin-x64@1.0.13': + optional: true + + '@cursor/sdk-linux-arm64@1.0.13': + optional: true + + '@cursor/sdk-linux-x64@1.0.13': + optional: true + + '@cursor/sdk-win32-x64@1.0.13': + optional: true + + '@cursor/sdk@1.0.13': + dependencies: + '@bufbuild/protobuf': 1.10.0 + '@connectrpc/connect': 1.7.0(@bufbuild/protobuf@1.10.0) + '@connectrpc/connect-node': 1.7.0(@bufbuild/protobuf@1.10.0)(@connectrpc/connect@1.7.0(@bufbuild/protobuf@1.10.0)) + '@statsig/js-client': 3.31.0 + sqlite3: 5.1.7 + zod: 3.25.76 + optionalDependencies: + '@cursor/sdk-darwin-arm64': 1.0.13 + '@cursor/sdk-darwin-x64': 1.0.13 + '@cursor/sdk-linux-arm64': 1.0.13 + '@cursor/sdk-linux-x64': 1.0.13 + '@cursor/sdk-win32-x64': 1.0.13 + transitivePeerDependencies: + - bluebird + - supports-color + '@discoveryjs/json-ext@0.5.7': {} '@eslint-community/eslint-utils@4.9.1(eslint@8.57.1)': @@ -2844,42 +3212,12 @@ snapshots: '@eslint/js@8.57.1': {} - '@github/copilot-darwin-arm64@1.0.51': - optional: true - - '@github/copilot-darwin-x64@1.0.51': - optional: true - - '@github/copilot-linux-arm64@1.0.51': - optional: true - - '@github/copilot-linux-x64@1.0.51': - optional: true - - '@github/copilot-linuxmusl-arm64@1.0.51': - optional: true - - '@github/copilot-linuxmusl-x64@1.0.51': - optional: true - - '@github/copilot-win32-arm64@1.0.51': - optional: true + '@fastify/busboy@2.1.1': {} - '@github/copilot-win32-x64@1.0.51': + '@gar/promisify@1.1.3': optional: true - '@github/copilot@1.0.51': - dependencies: - detect-libc: 2.1.2 - optionalDependencies: - '@github/copilot-darwin-arm64': 1.0.51 - '@github/copilot-darwin-x64': 1.0.51 - '@github/copilot-linux-arm64': 1.0.51 - '@github/copilot-linux-x64': 1.0.51 - '@github/copilot-linuxmusl-arm64': 1.0.51 - '@github/copilot-linuxmusl-x64': 1.0.51 - '@github/copilot-win32-arm64': 1.0.51 - '@github/copilot-win32-x64': 1.0.51 + '@google/generative-ai@0.21.0': {} '@humanwhocodes/config-array@0.13.0': dependencies: @@ -3108,6 +3446,25 @@ snapshots: '@kstonekuan/audio-capture-linux-x64-gnu': 0.0.3 '@kstonekuan/audio-capture-win32-x64-msvc': 0.0.3 + '@microsoft/fast-element@1.14.0': {} + + '@microsoft/fast-foundation@2.50.0': + dependencies: + '@microsoft/fast-element': 1.14.0 + '@microsoft/fast-web-utilities': 5.4.1 + tabbable: 5.3.3 + tslib: 1.14.1 + + '@microsoft/fast-react-wrapper@0.3.25(react@19.2.6)': + dependencies: + '@microsoft/fast-element': 1.14.0 + '@microsoft/fast-foundation': 2.50.0 + react: 19.2.6 + + '@microsoft/fast-web-utilities@5.4.1': + dependencies: + exenv-es6: 1.1.1 + '@nodelib/fs.scandir@2.1.5': dependencies: '@nodelib/fs.stat': 2.0.5 @@ -3120,6 +3477,18 @@ snapshots: '@nodelib/fs.scandir': 2.1.5 fastq: 1.20.1 + '@npmcli/fs@1.1.1': + dependencies: + '@gar/promisify': 1.1.3 + semver: 7.8.1 + optional: true + + '@npmcli/move-file@1.1.2': + dependencies: + mkdirp: 1.0.4 + rimraf: 3.0.2 + optional: true + '@pkgr/core@0.2.9': {} '@sinclair/typebox@0.27.10': {} @@ -3132,6 +3501,15 @@ snapshots: dependencies: '@sinonjs/commons': 3.0.1 + '@statsig/client-core@3.31.0': {} + + '@statsig/js-client@3.31.0': + dependencies: + '@statsig/client-core': 3.31.0 + + '@tootallnate/once@1.1.2': + optional: true + '@types/babel__core@7.20.5': dependencies: '@babel/parser': 7.29.3 @@ -3299,6 +3677,14 @@ snapshots: transitivePeerDependencies: - supports-color + '@vscode/webview-ui-toolkit@1.4.0(react@19.2.6)': + dependencies: + '@microsoft/fast-element': 1.14.0 + '@microsoft/fast-foundation': 2.50.0 + '@microsoft/fast-react-wrapper': 0.3.25(react@19.2.6) + react: 19.2.6 + tslib: 2.8.1 + '@webassemblyjs/ast@1.14.1': dependencies: '@webassemblyjs/helper-numbers': 1.13.2 @@ -3394,6 +3780,9 @@ snapshots: '@xtuc/long@4.2.2': {} + abbrev@1.1.1: + optional: true + abort-controller@3.0.0: dependencies: event-target-shim: 5.0.1 @@ -3408,12 +3797,24 @@ snapshots: acorn@8.16.0: {} + agent-base@6.0.2: + dependencies: + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + agent-base@7.1.4: {} agentkeepalive@4.6.0: dependencies: humanize-ms: 1.2.1 + aggregate-error@3.1.0: + dependencies: + clean-stack: 2.2.0 + indent-string: 4.0.0 + optional: true + ajv-formats@2.1.1(ajv@8.20.0): optionalDependencies: ajv: 8.20.0 @@ -3460,6 +3861,15 @@ snapshots: normalize-path: 3.0.0 picomatch: 2.3.2 + aproba@2.1.0: + optional: true + + are-we-there-yet@3.0.1: + dependencies: + delegates: 1.0.0 + readable-stream: 3.6.2 + optional: true + argparse@1.0.10: dependencies: sprintf-js: 1.0.3 @@ -3470,6 +3880,16 @@ snapshots: asynckit@0.4.0: {} + axios@1.16.1: + dependencies: + follow-redirects: 1.16.0 + form-data: 4.0.5 + https-proxy-agent: 5.0.1 + proxy-from-env: 2.1.0 + transitivePeerDependencies: + - debug + - supports-color + azure-devops-node-api@11.2.0: dependencies: tunnel: 0.0.6 @@ -3536,6 +3956,10 @@ snapshots: baseline-browser-mapping@2.10.32: {} + bindings@1.5.0: + dependencies: + file-uri-to-path: 1.0.0 + bl@4.1.0: dependencies: buffer: 5.7.1 @@ -3582,6 +4006,30 @@ snapshots: base64-js: 1.5.1 ieee754: 1.2.1 + cacache@15.3.0: + dependencies: + '@npmcli/fs': 1.1.1 + '@npmcli/move-file': 1.1.2 + chownr: 2.0.0 + fs-minipass: 2.1.0 + glob: 7.2.3 + infer-owner: 1.0.4 + lru-cache: 6.0.0 + minipass: 3.3.6 + minipass-collect: 1.0.2 + minipass-flush: 1.0.7 + minipass-pipeline: 1.2.4 + mkdirp: 1.0.4 + p-map: 4.0.0 + promise-inflight: 1.0.1 + rimraf: 3.0.2 + ssri: 8.0.1 + tar: 6.2.1 + unique-filename: 1.1.1 + transitivePeerDependencies: + - bluebird + optional: true + call-bind-apply-helpers@1.0.2: dependencies: es-errors: 1.3.0 @@ -3640,12 +4088,17 @@ snapshots: chownr@1.1.4: {} + chownr@2.0.0: {} + chrome-trace-event@1.0.4: {} ci-info@3.9.0: {} cjs-module-lexer@1.4.3: {} + clean-stack@2.2.0: + optional: true + cli-cursor@5.0.0: dependencies: restore-cursor: 5.1.0 @@ -3680,6 +4133,9 @@ snapshots: color-name@1.1.4: {} + color-support@1.1.3: + optional: true + colorette@2.0.20: {} combined-stream@1.0.8: @@ -3694,11 +4150,10 @@ snapshots: concat-map@0.0.1: {} - convert-source-map@2.0.0: {} + console-control-strings@1.1.0: + optional: true - copilot@0.0.2: - dependencies: - '@github/copilot': 1.0.51 + convert-source-map@2.0.0: {} core-util-is@1.0.3: {} @@ -3751,6 +4206,9 @@ snapshots: delayed-stream@1.0.0: {} + delegates@1.0.0: + optional: true + detect-libc@2.1.2: {} detect-newline@3.1.0: {} @@ -3802,6 +4260,11 @@ snapshots: iconv-lite: 0.6.3 whatwg-encoding: 3.1.1 + encoding@0.1.13: + dependencies: + iconv-lite: 0.6.3 + optional: true + end-of-stream@1.4.5: dependencies: once: 1.4.0 @@ -3819,8 +4282,14 @@ snapshots: entities@7.0.1: {} + env-paths@2.2.1: + optional: true + envinfo@7.21.0: {} + err-code@2.0.3: + optional: true + error-ex@1.3.4: dependencies: is-arrayish: 0.2.1 @@ -3956,6 +4425,8 @@ snapshots: signal-exit: 3.0.7 strip-final-newline: 2.0.0 + exenv-es6@1.1.1: {} + exit@0.1.2: {} expand-template@2.0.3: {} @@ -4004,6 +4475,8 @@ snapshots: dependencies: flat-cache: 3.2.0 + file-uri-to-path@1.0.0: {} + fill-range@7.1.1: dependencies: to-regex-range: 5.0.1 @@ -4028,6 +4501,8 @@ snapshots: flatted@3.4.2: {} + follow-redirects@1.16.0: {} + form-data-encoder@1.7.2: {} form-data@4.0.5: @@ -4045,6 +4520,10 @@ snapshots: fs-constants@1.0.0: {} + fs-minipass@2.1.0: + dependencies: + minipass: 3.3.6 + fs.realpath@1.0.0: {} fsevents@2.3.3: @@ -4052,6 +4531,18 @@ snapshots: function-bind@1.1.2: {} + gauge@4.0.4: + dependencies: + aproba: 2.1.0 + color-support: 1.1.3 + console-control-strings: 1.1.0 + has-unicode: 2.0.1 + signal-exit: 3.0.7 + string-width: 4.2.3 + strip-ansi: 6.0.1 + wide-align: 1.1.5 + optional: true + gensync@1.0.0-beta.2: {} get-caller-file@2.0.5: {} @@ -4139,6 +4630,9 @@ snapshots: dependencies: has-symbols: 1.1.0 + has-unicode@2.0.1: + optional: true + hasown@2.0.3: dependencies: function-bind: 1.1.2 @@ -4156,6 +4650,18 @@ snapshots: domutils: 3.2.2 entities: 7.0.1 + http-cache-semantics@4.2.0: + optional: true + + http-proxy-agent@4.0.1: + dependencies: + '@tootallnate/once': 1.1.2 + agent-base: 6.0.2 + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + optional: true + http-proxy-agent@7.0.2: dependencies: agent-base: 7.1.4 @@ -4163,6 +4669,13 @@ snapshots: transitivePeerDependencies: - supports-color + https-proxy-agent@5.0.1: + dependencies: + agent-base: 6.0.2 + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + https-proxy-agent@7.0.6: dependencies: agent-base: 7.1.4 @@ -4200,6 +4713,12 @@ snapshots: imurmurhash@0.1.4: {} + indent-string@4.0.0: + optional: true + + infer-owner@1.0.4: + optional: true + inflight@1.0.6: dependencies: once: 1.4.0 @@ -4211,6 +4730,9 @@ snapshots: interpret@3.1.1: {} + ip-address@10.2.0: + optional: true + is-arrayish@0.2.1: {} is-core-module@2.16.2: @@ -4229,6 +4751,9 @@ snapshots: is-interactive@2.0.0: {} + is-lambda@1.0.1: + optional: true + is-number@7.0.0: {} is-path-inside@3.0.3: {} @@ -4699,6 +5224,29 @@ snapshots: make-error@1.3.6: {} + make-fetch-happen@9.1.0: + dependencies: + agentkeepalive: 4.6.0 + cacache: 15.3.0 + http-cache-semantics: 4.2.0 + http-proxy-agent: 4.0.1 + https-proxy-agent: 5.0.1 + is-lambda: 1.0.1 + lru-cache: 6.0.0 + minipass: 3.3.6 + minipass-collect: 1.0.2 + minipass-fetch: 1.4.1 + minipass-flush: 1.0.7 + minipass-pipeline: 1.2.4 + negotiator: 0.6.4 + promise-retry: 2.0.1 + socks-proxy-agent: 6.2.1 + ssri: 8.0.1 + transitivePeerDependencies: + - bluebird + - supports-color + optional: true + makeerror@1.0.12: dependencies: tmpl: 1.0.5 @@ -4750,8 +5298,50 @@ snapshots: minimist@1.2.8: {} + minipass-collect@1.0.2: + dependencies: + minipass: 3.3.6 + optional: true + + minipass-fetch@1.4.1: + dependencies: + minipass: 3.3.6 + minipass-sized: 1.0.3 + minizlib: 2.1.2 + optionalDependencies: + encoding: 0.1.13 + optional: true + + minipass-flush@1.0.7: + dependencies: + minipass: 3.3.6 + optional: true + + minipass-pipeline@1.2.4: + dependencies: + minipass: 3.3.6 + optional: true + + minipass-sized@1.0.3: + dependencies: + minipass: 3.3.6 + optional: true + + minipass@3.3.6: + dependencies: + yallist: 4.0.0 + + minipass@5.0.0: {} + + minizlib@2.1.2: + dependencies: + minipass: 3.3.6 + yallist: 4.0.0 + mkdirp-classic@0.5.3: {} + mkdirp@1.0.4: {} + ms@2.1.3: {} mute-stream@0.0.8: {} @@ -4760,6 +5350,9 @@ snapshots: natural-compare@1.4.0: {} + negotiator@0.6.4: + optional: true + neo-async@2.6.2: {} node-abi@3.92.0: @@ -4768,22 +5361,56 @@ snapshots: node-addon-api@4.3.0: {} + node-addon-api@7.1.1: {} + node-domexception@1.0.0: {} - node-fetch@2.7.0: + node-fetch@2.7.0(encoding@0.1.13): dependencies: whatwg-url: 5.0.0 + optionalDependencies: + encoding: 0.1.13 + + node-gyp@8.4.1: + dependencies: + env-paths: 2.2.1 + glob: 7.2.3 + graceful-fs: 4.2.11 + make-fetch-happen: 9.1.0 + nopt: 5.0.0 + npmlog: 6.0.2 + rimraf: 3.0.2 + semver: 7.8.1 + tar: 6.2.1 + which: 2.0.2 + transitivePeerDependencies: + - bluebird + - supports-color + optional: true node-int64@0.4.0: {} node-releases@2.0.46: {} + nopt@5.0.0: + dependencies: + abbrev: 1.1.1 + optional: true + normalize-path@3.0.0: {} npm-run-path@4.0.1: dependencies: path-key: 3.1.1 + npmlog@6.0.2: + dependencies: + are-we-there-yet: 3.0.1 + console-control-strings: 1.1.0 + gauge: 4.0.4 + set-blocking: 2.0.0 + optional: true + nth-check@2.1.1: dependencies: boolbase: 1.0.0 @@ -4802,7 +5429,7 @@ snapshots: dependencies: mimic-function: 5.0.1 - openai@4.104.0: + openai@4.104.0(encoding@0.1.13)(zod@3.25.76): dependencies: '@types/node': 18.19.130 '@types/node-fetch': 2.6.13 @@ -4810,7 +5437,9 @@ snapshots: agentkeepalive: 4.6.0 form-data-encoder: 1.7.2 formdata-node: 4.4.1 - node-fetch: 2.7.0 + node-fetch: 2.7.0(encoding@0.1.13) + optionalDependencies: + zod: 3.25.76 transitivePeerDependencies: - encoding @@ -4851,6 +5480,11 @@ snapshots: dependencies: p-limit: 3.1.0 + p-map@4.0.0: + dependencies: + aggregate-error: 3.1.0 + optional: true + p-try@2.2.0: {} pako@1.0.11: {} @@ -4936,11 +5570,22 @@ snapshots: process-nextick-args@2.0.1: {} + promise-inflight@1.0.1: + optional: true + + promise-retry@2.0.1: + dependencies: + err-code: 2.0.3 + retry: 0.12.0 + optional: true + prompts@2.4.2: dependencies: kleur: 3.0.3 sisteransi: 1.0.5 + proxy-from-env@2.1.0: {} + pump@3.0.4: dependencies: end-of-stream: 1.4.5 @@ -4965,6 +5610,8 @@ snapshots: react-is@18.3.1: {} + react@19.2.6: {} + read@1.0.7: dependencies: mute-stream: 0.0.8 @@ -5015,6 +5662,9 @@ snapshots: onetime: 7.0.0 signal-exit: 4.1.0 + retry@0.12.0: + optional: true + reusify@1.1.0: {} rimraf@3.0.2: @@ -5046,6 +5696,9 @@ snapshots: semver@7.8.1: {} + set-blocking@2.0.0: + optional: true + setimmediate@1.0.5: {} shallow-clone@3.0.1: @@ -5102,6 +5755,24 @@ snapshots: slash@3.0.0: {} + smart-buffer@4.2.0: + optional: true + + socks-proxy-agent@6.2.1: + dependencies: + agent-base: 6.0.2 + debug: 4.4.3 + socks: 2.8.9 + transitivePeerDependencies: + - supports-color + optional: true + + socks@2.8.9: + dependencies: + ip-address: 10.2.0 + smart-buffer: 4.2.0 + optional: true + source-map-support@0.5.13: dependencies: buffer-from: 1.1.2 @@ -5118,6 +5789,23 @@ snapshots: sprintf-js@1.0.3: {} + sqlite3@5.1.7: + dependencies: + bindings: 1.5.0 + node-addon-api: 7.1.1 + prebuild-install: 7.1.3 + tar: 6.2.1 + optionalDependencies: + node-gyp: 8.4.1 + transitivePeerDependencies: + - bluebird + - supports-color + + ssri@8.0.1: + dependencies: + minipass: 3.3.6 + optional: true + stack-utils@2.0.6: dependencies: escape-string-regexp: 2.0.0 @@ -5183,6 +5871,8 @@ snapshots: dependencies: '@pkgr/core': 0.2.9 + tabbable@5.3.3: {} + tapable@2.3.3: {} tar-fs@2.1.4: @@ -5200,6 +5890,15 @@ snapshots: inherits: 2.0.4 readable-stream: 3.6.2 + tar@6.2.1: + dependencies: + chownr: 2.0.0 + fs-minipass: 2.1.0 + minipass: 5.0.0 + minizlib: 2.1.2 + mkdirp: 1.0.4 + yallist: 4.0.0 + terser-webpack-plugin@5.6.0(webpack@5.107.1): dependencies: '@jridgewell/trace-mapping': 0.3.31 @@ -5231,6 +5930,8 @@ snapshots: dependencies: is-number: 7.0.0 + token-costs@3.5.0: {} + tr46@0.0.3: {} ts-api-utils@1.4.3(typescript@5.9.3): @@ -5267,6 +5968,10 @@ snapshots: typescript: 5.9.3 webpack: 5.107.1(webpack-cli@5.1.4) + tslib@1.14.1: {} + + tslib@2.8.1: {} + tunnel-agent@0.6.0: dependencies: safe-buffer: 5.2.1 @@ -5304,8 +6009,22 @@ snapshots: undici-types@6.21.0: {} + undici@5.29.0: + dependencies: + '@fastify/busboy': 2.1.1 + undici@7.25.0: {} + unique-filename@1.1.1: + dependencies: + unique-slug: 2.0.2 + optional: true + + unique-slug@2.0.2: + dependencies: + imurmurhash: 0.1.4 + optional: true + update-browserslist-db@1.2.3(browserslist@4.28.2): dependencies: browserslist: 4.28.2 @@ -5443,6 +6162,11 @@ snapshots: dependencies: isexe: 2.0.0 + wide-align@1.1.5: + dependencies: + string-width: 4.2.3 + optional: true + wildcard@2.0.1: {} word-wrap@1.2.5: {} @@ -5497,3 +6221,5 @@ snapshots: buffer-crc32: 0.2.13 yocto-queue@0.1.0: {} + + zod@3.25.76: {} diff --git a/promptimize-0.1.0.vsix b/promptimize-0.1.0.vsix new file mode 100644 index 0000000..aee936a Binary files /dev/null and b/promptimize-0.1.0.vsix differ diff --git a/scripts/copy-webview-assets.js b/scripts/copy-webview-assets.js new file mode 100644 index 0000000..0b634bf --- /dev/null +++ b/scripts/copy-webview-assets.js @@ -0,0 +1,38 @@ +#!/usr/bin/env node +const fs = require('fs'); +const path = require('path'); + +const sourceDir = path.join(__dirname, '..', 'src', 'presentation', 'webview'); +const targetDir = path.join(__dirname, '..', 'out', 'presentation', 'webview'); +const toolkitSource = path.join( + __dirname, + '..', + 'node_modules', + '@vscode', + 'webview-ui-toolkit', + 'dist', + 'toolkit.min.js' +); + +const extensions = ['.html', '.css']; + +if (!fs.existsSync(sourceDir)) { + process.exit(0); +} + +fs.mkdirSync(targetDir, { recursive: true }); + +for (const file of fs.readdirSync(sourceDir)) { + if (extensions.some(ext => file.endsWith(ext))) { + fs.copyFileSync(path.join(sourceDir, file), path.join(targetDir, file)); + } +} + +if (fs.existsSync(toolkitSource)) { + fs.copyFileSync(toolkitSource, path.join(targetDir, 'toolkit.min.js')); + console.log('Copied webview UI toolkit to out/presentation/webview'); +} else { + console.warn('Warning: @vscode/webview-ui-toolkit not found; webview controls may not render'); +} + +console.log('Copied webview assets to out/presentation/webview'); diff --git a/setup.sh b/setup.sh index dcb5371..ee541d2 100755 --- a/setup.sh +++ b/setup.sh @@ -1,8 +1,8 @@ #!/bin/bash -# Cursor Whisper - Development Setup Script +# Promptimize - Development Setup Script -echo "🎤 Setting up Cursor Whisper development environment..." +echo "🎤 Setting up Promptimize development environment..." # Use Node 22 via nvm source "$(dirname "$0")/scripts/ensure-node.sh" diff --git a/skills-lock.json b/skills-lock.json new file mode 100644 index 0000000..cff06f0 --- /dev/null +++ b/skills-lock.json @@ -0,0 +1,11 @@ +{ + "version": 1, + "skills": { + "find-skills": { + "source": "vercel-labs/skills", + "sourceType": "github", + "skillPath": "skills/find-skills/SKILL.md", + "computedHash": "9e1c8b3103f92fa8092568a44fe64858de7c5c9dc65ce4bea8f168080e889cfd" + } + } +} diff --git a/src/__tests__/application/services/ConfigurationValidationService.test.ts b/src/__tests__/application/services/ConfigurationValidationService.test.ts new file mode 100644 index 0000000..628bfea --- /dev/null +++ b/src/__tests__/application/services/ConfigurationValidationService.test.ts @@ -0,0 +1,229 @@ +import { + validateConfigurationForRecording, + validateConfigurationForTranscription, + validateConfigurationForPromptimize, + validateConfigurationOnStartup, +} from '../../../application/services/ConfigurationValidationService'; +import { IConfigRepository, Config } from '../../../application/ports/IConfigRepository'; +import { ITransformationProviderValidator } from '../../../application/ports/ITransformationProviderValidator'; +import { TransformationProvider } from '../../../domain/value-objects/TransformationProvider'; +import { OPENAI_API_KEY_REQUIRED_RECORDING } from '../../../shared/constants/uxMessages'; +import { TRANSFORMATION_SYSTEM_PROMPT } from '../../../infrastructure/transformation/transformationUtils'; + +const baseConfig: Config = { + transformationProvider: TransformationProvider.Anthropic, + transcriptionLanguage: 'auto', + enablePromptTransformation: true, + transformationModel: 'gpt-4o', + anthropicModel: 'claude-3-5-sonnet-20241022', + googleModel: 'gemini-1.5-pro', + azureEndpoint: 'https://example.openai.azure.com', + azureDeployment: 'gpt-4o', + ollamaBaseUrl: 'http://localhost:11434', + ollamaModel: 'llama3.1:8b', + openCodeBaseUrl: 'http://127.0.0.1:4010/v1', + openCodeModel: 'anthropic/claude-sonnet-4-5', + openRouterModel: 'openai/gpt-4o', + cursorModel: 'composer-2.5', + audioQuality: 'high', + maxRecordingDuration: 120, + showNotifications: true, + transformationSystemPrompt: TRANSFORMATION_SYSTEM_PROMPT, +}; + +function createConfigRepo( + overrides: Partial = {}, + providerKeys: Partial> = {} +): IConfigRepository { + const config = { ...baseConfig, ...overrides }; + + return { + getConfig: jest.fn(async () => config), + updateConfig: jest.fn(async () => undefined), + getProviderApiKey: jest.fn(async provider => { + if (provider in providerKeys) { + return providerKeys[provider]; + } + return provider === TransformationProvider.OpenAI ? 'openai-key' : 'provider-key'; + }), + setProviderApiKey: jest.fn(async () => undefined), + onConfigChange: jest.fn(), + }; +} + +function createProviderValidator( + validateProvider: ITransformationProviderValidator['validateProvider'] +): ITransformationProviderValidator { + return { validateProvider }; +} + +describe('ConfigurationValidationService', () => { + describe('validateConfigurationForTranscription', () => { + it('requires OpenAI key for Whisper transcription', async () => { + const configRepo = createConfigRepo({}, { [TransformationProvider.OpenAI]: undefined }); + + const issue = await validateConfigurationForTranscription(configRepo); + + expect(issue).toEqual({ + message: OPENAI_API_KEY_REQUIRED_RECORDING, + configureCommand: 'promptimize.openConfigurationPanel', + }); + }); + + it('passes when OpenAI key is configured', async () => { + const configRepo = createConfigRepo(); + + const issue = await validateConfigurationForTranscription(configRepo); + + expect(issue).toBeUndefined(); + }); + }); + + describe('validateConfigurationForPromptimize', () => { + it('opens configuration when prompt optimization is disabled', async () => { + const configRepo = createConfigRepo({ enablePromptTransformation: false }); + const validator = createProviderValidator(jest.fn(async () => undefined)); + + const issue = await validateConfigurationForPromptimize(configRepo, validator); + + expect(issue).toEqual({ + message: 'Prompt optimization is disabled. Enable it in configuration to use Promptimize.', + configureCommand: 'promptimize.openConfigurationPanel', + }); + }); + + it('requires OpenAI key and provider configuration when optimization is enabled', async () => { + const configRepo = createConfigRepo({}, { [TransformationProvider.OpenAI]: undefined }); + const validator = createProviderValidator(jest.fn(async () => undefined)); + + const issue = await validateConfigurationForPromptimize(configRepo, validator); + + expect(issue?.configureCommand).toBe('promptimize.configureApiKey'); + }); + }); + + describe('validateConfigurationForRecording', () => { + it('requires OpenAI key for Whisper transcription', async () => { + const configRepo = createConfigRepo({}, { [TransformationProvider.OpenAI]: undefined }); + const validateProvider = jest.fn(async (_provider: TransformationProvider) => undefined); + const validator = createProviderValidator(validateProvider); + + const issue = await validateConfigurationForRecording(configRepo, validator); + + expect(issue).toEqual({ + message: + 'OpenAI API key is required for voice-to-text transcription (Whisper). Prompt optimization uses a separate provider you can configure later.', + configureCommand: 'promptimize.configureApiKey', + }); + }); + + it('skips provider validation when prompt transformation is disabled', async () => { + const configRepo = createConfigRepo({ enablePromptTransformation: false }); + const validateProvider = jest.fn(async (_provider: TransformationProvider) => + 'Anthropic API key is not configured.' + ); + const validator = createProviderValidator(validateProvider); + + const issue = await validateConfigurationForRecording(configRepo, validator); + + expect(issue).toBeUndefined(); + expect(validateProvider).not.toHaveBeenCalled(); + }); + + it('validates active provider when prompt transformation is enabled', async () => { + const configRepo = createConfigRepo(); + const validateProvider = jest.fn(async (_provider: TransformationProvider) => + 'Anthropic API key is not configured.' + ); + const validator = createProviderValidator(validateProvider); + + const issue = await validateConfigurationForRecording(configRepo, validator); + + expect(validateProvider).toHaveBeenCalledWith(TransformationProvider.Anthropic); + expect(issue).toEqual({ + message: 'Anthropic API key is not configured.', + configureCommand: 'promptimize.openConfigurationPanel', + }); + }); + }); + + describe('validateConfigurationOnStartup', () => { + it('warns about missing OpenAI key for transcription', async () => { + const configRepo = createConfigRepo({}, { [TransformationProvider.OpenAI]: undefined }); + const validateProvider = jest.fn(async (_provider: TransformationProvider) => undefined); + const validator = createProviderValidator(validateProvider); + + const issue = await validateConfigurationOnStartup(configRepo, validator); + + expect(issue?.message).toContain('OpenAI API key is required for voice-to-text transcription'); + expect(issue?.configureCommand).toBe('promptimize.openConfigurationPanel'); + }); + + it('does not warn about transformation provider keys when transformation is disabled', async () => { + const configRepo = createConfigRepo({ enablePromptTransformation: false }); + const validateProvider = jest.fn(async (_provider: TransformationProvider) => + 'Anthropic API key is not configured.' + ); + const validator = createProviderValidator(validateProvider); + + const issue = await validateConfigurationOnStartup(configRepo, validator); + + expect(issue).toBeUndefined(); + expect(validateProvider).not.toHaveBeenCalled(); + }); + + it('warns about missing key for the active transformation provider', async () => { + const configRepo = createConfigRepo( + { transformationProvider: TransformationProvider.Anthropic }, + { + [TransformationProvider.OpenAI]: 'openai-key', + [TransformationProvider.Anthropic]: undefined, + } + ); + const validateProvider = jest.fn(async (_provider: TransformationProvider) => undefined); + const validator = createProviderValidator(validateProvider); + + const issue = await validateConfigurationOnStartup(configRepo, validator); + + expect(issue).toEqual({ + message: 'Promptimize: Anthropic credentials are not configured for prompt optimization.', + configureCommand: 'promptimize.openConfigurationPanel', + }); + }); + + it('validates Ollama reachability without requiring an API key', async () => { + const configRepo = createConfigRepo( + { transformationProvider: TransformationProvider.Ollama }, + { [TransformationProvider.OpenAI]: 'openai-key' } + ); + const validateProvider = jest.fn(async (_provider: TransformationProvider) => + 'Ollama server is not reachable. Ensure Ollama is running locally.' + ); + const validator = createProviderValidator(validateProvider); + + const issue = await validateConfigurationOnStartup(configRepo, validator); + + expect(issue?.message).toContain('Ollama server is not reachable'); + expect(issue?.configureCommand).toBe('promptimize.openConfigurationPanel'); + }); + + it('warns about missing key for Cursor transformation provider', async () => { + const configRepo = createConfigRepo( + { transformationProvider: TransformationProvider.Cursor }, + { + [TransformationProvider.OpenAI]: 'openai-key', + [TransformationProvider.Cursor]: undefined, + } + ); + const validateProvider = jest.fn(async (_provider: TransformationProvider) => undefined); + const validator = createProviderValidator(validateProvider); + + const issue = await validateConfigurationOnStartup(configRepo, validator); + + expect(issue).toEqual({ + message: 'Promptimize: Cursor credentials are not configured for prompt optimization.', + configureCommand: 'promptimize.openConfigurationPanel', + }); + }); + }); +}); diff --git a/src/__tests__/application/services/ProviderPricingService.test.ts b/src/__tests__/application/services/ProviderPricingService.test.ts new file mode 100644 index 0000000..5eb3284 --- /dev/null +++ b/src/__tests__/application/services/ProviderPricingService.test.ts @@ -0,0 +1,90 @@ +import { CostClient } from 'token-costs'; +import { TransformationProvider } from '../../../domain/value-objects/TransformationProvider'; +import { ProviderPricingService } from '../../../application/services/ProviderPricingService'; + +jest.mock('token-costs', () => ({ + CostClient: jest.fn(), +})); + +const MockCostClient = CostClient as jest.MockedClass; + +describe('ProviderPricingService', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('returns real-time pricing for supported cloud providers', async () => { + MockCostClient.mockImplementation( + () => + ({ + calculateCost: jest.fn().mockResolvedValue({ + inputCost: 0.001, + outputCost: 0.003, + totalCost: 0.004, + usedCachedPricing: false, + date: '2026-05-24', + stale: false, + }), + }) as unknown as CostClient + ); + + const service = new ProviderPricingService(); + const comparison = await service.getProviderComparison(); + + expect(comparison).toHaveLength(8); + + const openAi = comparison.find(entry => entry.provider === TransformationProvider.OpenAI); + expect(openAi).toMatchObject({ + costPerTransform: '~$0.004/transform', + isRealTime: true, + lastUpdated: '2026-05-24', + }); + + const ollama = comparison.find(entry => entry.provider === TransformationProvider.Ollama); + expect(ollama).toMatchObject({ + costPerTransform: 'Free (local compute)', + isRealTime: false, + }); + }); + + it('falls back to static pricing when token-costs fetch fails', async () => { + MockCostClient.mockImplementation( + () => + ({ + calculateCost: jest.fn().mockRejectedValue(new Error('network error')), + }) as unknown as CostClient + ); + + const service = new ProviderPricingService(); + const comparison = await service.getProviderComparison(); + + expect(comparison.every(entry => !entry.isRealTime)).toBe(true); + expect(comparison.find(entry => entry.provider === TransformationProvider.OpenAI)?.costPerTransform).toBe( + '~$0.01' + ); + }); + + it('reuses cached pricing within the cache window', async () => { + const calculateCost = jest.fn().mockResolvedValue({ + inputCost: 0.001, + outputCost: 0.003, + totalCost: 0.004, + usedCachedPricing: false, + date: '2026-05-24', + stale: false, + }); + + MockCostClient.mockImplementation( + () => + ({ + calculateCost, + }) as unknown as CostClient + ); + + const service = new ProviderPricingService(); + await service.getProviderComparison(); + await service.getProviderComparison(); + + expect(calculateCost).toHaveBeenCalledTimes(4); + }); +}); diff --git a/src/__tests__/application/use-cases/StartRecordingUseCase.test.ts b/src/__tests__/application/use-cases/StartRecordingUseCase.test.ts new file mode 100644 index 0000000..b7e6286 --- /dev/null +++ b/src/__tests__/application/use-cases/StartRecordingUseCase.test.ts @@ -0,0 +1,67 @@ +import { StartRecordingUseCase } from '../../../application/use-cases/StartRecordingUseCase'; +import { IAudioRecorder } from '../../../application/ports/IAudioRecorder'; +import { RecordingError } from '../../../domain/errors/RecordingError'; +import { createMockLogger } from '../../helpers/mockLogger'; +import { + getRecordingSessionMode, + setRecordingSessionMode, +} from '../../../shared/services/RecordingSessionMode'; + +function createMocks(options?: { isRecording?: boolean; startFails?: boolean }) { + const audioRecorder: IAudioRecorder = { + startRecording: jest.fn(async () => { + if (options?.startFails) { + throw new Error('Microphone unavailable'); + } + }), + stopRecording: jest.fn(), + cancelRecording: jest.fn(), + isRecording: jest.fn(() => options?.isRecording ?? false), + getState: jest.fn(), + onStateChange: jest.fn(), + }; + + return { audioRecorder }; +} + +describe('StartRecordingUseCase', () => { + beforeEach(() => { + setRecordingSessionMode(null); + }); + + it('starts recording in transcribe mode', async () => { + const { audioRecorder } = createMocks(); + const useCase = new StartRecordingUseCase(audioRecorder, createMockLogger()); + + await useCase.execute('transcribe'); + + expect(audioRecorder.startRecording).toHaveBeenCalled(); + expect(getRecordingSessionMode()).toBe('transcribe'); + }); + + it('starts recording in promptimize mode', async () => { + const { audioRecorder } = createMocks(); + const useCase = new StartRecordingUseCase(audioRecorder, createMockLogger()); + + await useCase.execute('promptimize'); + + expect(audioRecorder.startRecording).toHaveBeenCalled(); + expect(getRecordingSessionMode()).toBe('promptimize'); + }); + + it('throws RecordingError when already recording', async () => { + const { audioRecorder } = createMocks({ isRecording: true }); + const useCase = new StartRecordingUseCase(audioRecorder, createMockLogger()); + + await expect(useCase.execute('transcribe')).rejects.toThrow(RecordingError); + expect(getRecordingSessionMode()).toBeNull(); + }); + + it('clears session mode when recording fails to start', async () => { + const { audioRecorder } = createMocks({ startFails: true }); + const useCase = new StartRecordingUseCase(audioRecorder, createMockLogger()); + + await expect(useCase.execute('transcribe')).rejects.toThrow(RecordingError); + expect(getRecordingSessionMode()).toBeNull(); + }); +}); diff --git a/src/__tests__/domain/value-objects/TransformationProvider.test.ts b/src/__tests__/domain/value-objects/TransformationProvider.test.ts new file mode 100644 index 0000000..e53eb74 --- /dev/null +++ b/src/__tests__/domain/value-objects/TransformationProvider.test.ts @@ -0,0 +1,40 @@ +import { + TransformationProvider, + parseTransformationProvider, + isTransformationProvider, + getProviderSecretKey, + PROVIDER_METADATA, +} from '../../../domain/value-objects/TransformationProvider'; + +describe('TransformationProvider', () => { + it('parses valid provider values', () => { + expect(parseTransformationProvider('anthropic')).toBe(TransformationProvider.Anthropic); + expect(parseTransformationProvider('ollama')).toBe(TransformationProvider.Ollama); + expect(parseTransformationProvider('opencode')).toBe(TransformationProvider.OpenCode); + expect(parseTransformationProvider('openrouter')).toBe(TransformationProvider.OpenRouter); + expect(parseTransformationProvider('cursor')).toBe(TransformationProvider.Cursor); + }); + + it('falls back to OpenAI for invalid values', () => { + expect(parseTransformationProvider('invalid')).toBe(TransformationProvider.OpenAI); + expect(parseTransformationProvider(undefined)).toBe(TransformationProvider.OpenAI); + }); + + it('validates provider strings', () => { + expect(isTransformationProvider('google')).toBe(true); + expect(isTransformationProvider('unknown')).toBe(false); + }); + + it('generates provider-specific secret keys', () => { + expect(getProviderSecretKey(TransformationProvider.Anthropic)).toBe( + 'promptimize.apiKey.anthropic' + ); + }); + + it('defines metadata for all providers', () => { + for (const provider of Object.values(TransformationProvider)) { + expect(PROVIDER_METADATA[provider].id).toBe(provider); + expect(PROVIDER_METADATA[provider].displayName).toBeTruthy(); + } + }); +}); diff --git a/src/__tests__/helpers/mockLogger.ts b/src/__tests__/helpers/mockLogger.ts new file mode 100644 index 0000000..45063e3 --- /dev/null +++ b/src/__tests__/helpers/mockLogger.ts @@ -0,0 +1,11 @@ +import { ILogger } from '../../application/ports/ILogger'; + +export function createMockLogger(): ILogger { + return { + debug: jest.fn(), + info: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + setLevel: jest.fn(), + }; +} diff --git a/src/__tests__/infrastructure/audio/NativeAudioRecorder.test.ts b/src/__tests__/infrastructure/audio/NativeAudioRecorder.test.ts new file mode 100644 index 0000000..3a827f3 --- /dev/null +++ b/src/__tests__/infrastructure/audio/NativeAudioRecorder.test.ts @@ -0,0 +1,19 @@ +import { NativeAudioRecorder } from '../../../infrastructure/audio/NativeAudioRecorder'; +import { RecordingError } from '../../../domain/errors/RecordingError'; +import { createMockLogger } from '../../helpers/mockLogger'; + +describe('NativeAudioRecorder', () => { + it('returns a rejected promise when stopRecording is called without an active recording', async () => { + const recorder = new NativeAudioRecorder(createMockLogger()); + + await expect(recorder.stopRecording()).rejects.toBeInstanceOf(RecordingError); + }); + + it('does not throw synchronously when stopRecording is called without an active recording', () => { + const recorder = new NativeAudioRecorder(createMockLogger()); + + expect(() => { + void recorder.stopRecording().catch(() => undefined); + }).not.toThrow(); + }); +}); diff --git a/src/__tests__/infrastructure/transformation/AnthropicPromptTransformer.test.ts b/src/__tests__/infrastructure/transformation/AnthropicPromptTransformer.test.ts new file mode 100644 index 0000000..b3cefc2 --- /dev/null +++ b/src/__tests__/infrastructure/transformation/AnthropicPromptTransformer.test.ts @@ -0,0 +1,34 @@ +import Anthropic from '@anthropic-ai/sdk'; +import { AnthropicPromptTransformer } from '../../../infrastructure/transformation/AnthropicPromptTransformer'; +import { TRANSFORMATION_SYSTEM_PROMPT } from '../../../infrastructure/transformation/transformationUtils'; +import { createMockLogger } from '../../helpers/mockLogger'; + +jest.mock('@anthropic-ai/sdk'); + +const logger = createMockLogger(); + +describe('AnthropicPromptTransformer', () => { + it('transforms transcription using Anthropic API', async () => { + const create = jest.fn().mockResolvedValue({ + content: [{ type: 'text', text: 'Objective: Refactor authentication to JWT.' }], + }); + + (Anthropic as unknown as jest.Mock).mockImplementation(() => ({ + messages: { create }, + })); + + const transformer = new AnthropicPromptTransformer( + async () => 'anthropic-key', + async () => 'claude-3-5-sonnet-20241022', + async () => TRANSFORMATION_SYSTEM_PROMPT, + logger + ); + + const result = await transformer.transform('refactor auth to jwt'); + + expect(result.transformedText).toContain('JWT'); + expect(create).toHaveBeenCalledWith( + expect.objectContaining({ model: 'claude-3-5-sonnet-20241022' }) + ); + }); +}); diff --git a/src/__tests__/infrastructure/transformation/AzureOpenAIPromptTransformer.test.ts b/src/__tests__/infrastructure/transformation/AzureOpenAIPromptTransformer.test.ts new file mode 100644 index 0000000..1a6a1a6 --- /dev/null +++ b/src/__tests__/infrastructure/transformation/AzureOpenAIPromptTransformer.test.ts @@ -0,0 +1,42 @@ +import OpenAI from 'openai'; +import { AzureOpenAIPromptTransformer } from '../../../infrastructure/transformation/AzureOpenAIPromptTransformer'; +import { TRANSFORMATION_SYSTEM_PROMPT } from '../../../infrastructure/transformation/transformationUtils'; +import { createMockLogger } from '../../helpers/mockLogger'; + +jest.mock('openai'); + +const logger = createMockLogger(); + +describe('AzureOpenAIPromptTransformer', () => { + it('transforms transcription using Azure OpenAI deployment', async () => { + const create = jest.fn().mockResolvedValue({ + choices: [{ message: { content: 'Objective: Refactor authentication to JWT.' } }], + }); + + (OpenAI as unknown as jest.Mock).mockImplementation(() => ({ + chat: { completions: { create } }, + })); + + const transformer = new AzureOpenAIPromptTransformer( + async () => 'azure-key', + async () => ({ + endpoint: 'https://example.openai.azure.com', + deployment: 'gpt-4o', + }), + async () => TRANSFORMATION_SYSTEM_PROMPT, + logger + ); + + const result = await transformer.transform('refactor auth to jwt'); + + expect(OpenAI).toHaveBeenCalledWith( + expect.objectContaining({ + baseURL: 'https://example.openai.azure.com/openai/deployments/gpt-4o', + }) + ); + expect(result.transformedText).toContain('JWT'); + expect(create).toHaveBeenCalledWith( + expect.objectContaining({ model: 'gpt-4o' }) + ); + }); +}); diff --git a/src/__tests__/infrastructure/transformation/CursorPromptTransformer.test.ts b/src/__tests__/infrastructure/transformation/CursorPromptTransformer.test.ts new file mode 100644 index 0000000..004d44e --- /dev/null +++ b/src/__tests__/infrastructure/transformation/CursorPromptTransformer.test.ts @@ -0,0 +1,70 @@ +import { Agent } from '@cursor/sdk'; +import { CursorPromptTransformer } from '../../../infrastructure/transformation/CursorPromptTransformer'; +import { TRANSFORMATION_SYSTEM_PROMPT } from '../../../infrastructure/transformation/transformationUtils'; +import { createMockLogger } from '../../helpers/mockLogger'; + +jest.mock('@cursor/sdk', () => ({ + Agent: { + prompt: jest.fn(), + }, +})); + +const logger = createMockLogger(); + +describe('CursorPromptTransformer', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('transforms transcription using Cursor SDK', async () => { + (Agent.prompt as jest.Mock).mockResolvedValue({ + status: 'finished', + result: 'Objective: Refactor authentication to JWT.', + }); + + const transformer = new CursorPromptTransformer( + async () => 'cursor-api-key', + async () => 'composer-2.5', + async () => TRANSFORMATION_SYSTEM_PROMPT, + logger + ); + + const result = await transformer.transform('refactor auth to jwt'); + + expect(result.transformedText).toContain('JWT'); + expect(Agent.prompt).toHaveBeenCalledWith( + expect.stringContaining('refactor auth to jwt'), + expect.objectContaining({ + apiKey: 'cursor-api-key', + model: { id: 'composer-2.5' }, + }) + ); + }); + + it('throws error when API key is not configured', async () => { + const transformer = new CursorPromptTransformer( + async () => undefined, + async () => 'composer-2.5', + async () => TRANSFORMATION_SYSTEM_PROMPT, + logger + ); + + await expect(transformer.transform('test')).rejects.toThrow('Cursor API key not configured'); + }); + + it('handles agent run errors', async () => { + (Agent.prompt as jest.Mock).mockResolvedValue({ + status: 'error', + result: null, + }); + + const transformer = new CursorPromptTransformer( + async () => 'cursor-api-key', + async () => 'composer-2.5', + async () => TRANSFORMATION_SYSTEM_PROMPT, + logger + ); + + await expect(transformer.transform('test')).rejects.toThrow('Cursor agent run failed'); + }); +}); diff --git a/src/__tests__/infrastructure/transformation/GooglePromptTransformer.test.ts b/src/__tests__/infrastructure/transformation/GooglePromptTransformer.test.ts new file mode 100644 index 0000000..bfb5b5d --- /dev/null +++ b/src/__tests__/infrastructure/transformation/GooglePromptTransformer.test.ts @@ -0,0 +1,34 @@ +import { GoogleGenerativeAI } from '@google/generative-ai'; +import { GooglePromptTransformer } from '../../../infrastructure/transformation/GooglePromptTransformer'; +import { TRANSFORMATION_SYSTEM_PROMPT } from '../../../infrastructure/transformation/transformationUtils'; +import { createMockLogger } from '../../helpers/mockLogger'; + +jest.mock('@google/generative-ai'); + +const logger = createMockLogger(); + +describe('GooglePromptTransformer', () => { + it('transforms transcription using Gemini API', async () => { + const generateContent = jest.fn().mockResolvedValue({ + response: { + text: () => 'Objective: Refactor authentication to JWT.', + }, + }); + + (GoogleGenerativeAI as unknown as jest.Mock).mockImplementation(() => ({ + getGenerativeModel: jest.fn(() => ({ generateContent })), + })); + + const transformer = new GooglePromptTransformer( + async () => 'google-key', + async () => 'gemini-1.5-pro', + async () => TRANSFORMATION_SYSTEM_PROMPT, + logger + ); + + const result = await transformer.transform('refactor auth to jwt'); + + expect(result.transformedText).toContain('JWT'); + expect(generateContent).toHaveBeenCalled(); + }); +}); diff --git a/src/__tests__/infrastructure/transformation/OllamaPromptTransformer.test.ts b/src/__tests__/infrastructure/transformation/OllamaPromptTransformer.test.ts new file mode 100644 index 0000000..018d814 --- /dev/null +++ b/src/__tests__/infrastructure/transformation/OllamaPromptTransformer.test.ts @@ -0,0 +1,81 @@ +import axios from 'axios'; +import { OllamaPromptTransformer } from '../../../infrastructure/transformation/OllamaPromptTransformer'; +import { TRANSFORMATION_SYSTEM_PROMPT } from '../../../infrastructure/transformation/transformationUtils'; +import { createMockLogger } from '../../helpers/mockLogger'; + +jest.mock('axios'); +const mockedAxios = axios as jest.Mocked; + +const logger = createMockLogger(); + +describe('OllamaPromptTransformer', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('checks server availability', async () => { + mockedAxios.get.mockResolvedValueOnce({ status: 200, data: { models: [] } }); + + await expect(OllamaPromptTransformer.isAvailable('http://localhost:11434')).resolves.toBe( + true + ); + }); + + it('lists available models', async () => { + mockedAxios.get.mockResolvedValueOnce({ + status: 200, + data: { models: [{ name: 'llama3.1:8b' }, { name: 'mistral:latest' }] }, + }); + + await expect(OllamaPromptTransformer.listModels('http://localhost:11434')).resolves.toEqual([ + 'llama3.1:8b', + 'mistral:latest', + ]); + }); + + it('transforms transcription using Ollama API', async () => { + mockedAxios.post.mockResolvedValueOnce({ + status: 200, + data: { response: 'Refactor auth service to JWT with backward compatibility.' }, + }); + + const transformer = new OllamaPromptTransformer( + async () => ({ baseUrl: 'http://localhost:11434', model: 'llama3.1:8b' }), + async () => TRANSFORMATION_SYSTEM_PROMPT, + logger + ); + + const result = await transformer.transform('um refactor auth to jwt'); + + expect(result.transformedText).toContain('JWT'); + expect(mockedAxios.post).toHaveBeenCalledWith( + 'http://localhost:11434/api/generate', + expect.objectContaining({ model: 'llama3.1:8b', stream: false }), + expect.any(Object) + ); + }); + + it('uses configured system prompt in Ollama request', async () => { + mockedAxios.post.mockResolvedValueOnce({ + status: 200, + data: { response: 'Transformed output.' }, + }); + + const customPrompt = 'Custom system prompt for transformation.'; + const transformer = new OllamaPromptTransformer( + async () => ({ baseUrl: 'http://localhost:11434', model: 'llama3.1:8b' }), + async () => customPrompt, + logger + ); + + await transformer.transform('hello world'); + + expect(mockedAxios.post).toHaveBeenCalledWith( + 'http://localhost:11434/api/generate', + expect.objectContaining({ + prompt: expect.stringContaining(customPrompt), + }), + expect.any(Object) + ); + }); +}); diff --git a/src/__tests__/infrastructure/transformation/OpenCodePromptTransformer.test.ts b/src/__tests__/infrastructure/transformation/OpenCodePromptTransformer.test.ts new file mode 100644 index 0000000..22c8baf --- /dev/null +++ b/src/__tests__/infrastructure/transformation/OpenCodePromptTransformer.test.ts @@ -0,0 +1,123 @@ +import axios from 'axios'; +import OpenAI from 'openai'; +import { OpenCodePromptTransformer } from '../../../infrastructure/transformation/OpenCodePromptTransformer'; +import { TRANSFORMATION_SYSTEM_PROMPT } from '../../../infrastructure/transformation/transformationUtils'; +import { createMockLogger } from '../../helpers/mockLogger'; + +jest.mock('axios'); +jest.mock('openai'); + +const mockedAxios = axios as jest.Mocked; +const MockedOpenAI = OpenAI as jest.MockedClass; +const logger = createMockLogger(); + +describe('OpenCodePromptTransformer', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('checks proxy availability', async () => { + mockedAxios.get.mockResolvedValueOnce({ status: 200, data: { data: [] } }); + + await expect( + OpenCodePromptTransformer.isAvailable('http://127.0.0.1:4010/v1') + ).resolves.toBe(true); + }); + + it('lists available models', async () => { + mockedAxios.get.mockResolvedValueOnce({ + status: 200, + data: { + data: [{ id: 'anthropic/claude-sonnet-4-5' }, { id: 'ollama/llama3.2' }], + }, + }); + + await expect( + OpenCodePromptTransformer.listModels('http://127.0.0.1:4010/v1') + ).resolves.toEqual(['anthropic/claude-sonnet-4-5', 'ollama/llama3.2']); + }); + + it('transforms transcription using OpenCode proxy', async () => { + const createMock = jest.fn().mockResolvedValue({ + choices: [{ message: { content: 'Refactor auth service to JWT with backward compatibility.' } }], + }); + MockedOpenAI.mockImplementation( + () => + ({ + chat: { + completions: { + create: createMock, + }, + }, + }) as unknown as OpenAI + ); + + const transformer = new OpenCodePromptTransformer( + async () => ({ + baseUrl: 'http://127.0.0.1:4010/v1', + model: 'anthropic/claude-sonnet-4-5', + }), + async () => undefined, + async () => TRANSFORMATION_SYSTEM_PROMPT, + logger + ); + + const result = await transformer.transform('um refactor auth to jwt'); + + expect(result.transformedText).toContain('JWT'); + expect(MockedOpenAI).toHaveBeenCalledWith({ + apiKey: 'unused', + baseURL: 'http://127.0.0.1:4010/v1', + }); + expect(createMock).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'anthropic/claude-sonnet-4-5', + temperature: 0.3, + }) + ); + }); + + it('uses authentication token when configured', async () => { + const createMock = jest.fn().mockResolvedValue({ + choices: [{ message: { content: 'Transformed output.' } }], + }); + MockedOpenAI.mockImplementation( + () => + ({ + chat: { + completions: { + create: createMock, + }, + }, + }) as unknown as OpenAI + ); + + const transformer = new OpenCodePromptTransformer( + async () => ({ + baseUrl: 'http://127.0.0.1:4010/v1', + model: 'anthropic/claude-sonnet-4-5', + }), + async () => 'proxy-token', + async () => TRANSFORMATION_SYSTEM_PROMPT, + logger + ); + + await transformer.transform('hello world'); + + expect(MockedOpenAI).toHaveBeenCalledWith({ + apiKey: 'proxy-token', + baseURL: 'http://127.0.0.1:4010/v1', + }); + }); + + it('throws when model is not configured', async () => { + const transformer = new OpenCodePromptTransformer( + async () => ({ baseUrl: 'http://127.0.0.1:4010/v1', model: '' }), + async () => undefined, + async () => TRANSFORMATION_SYSTEM_PROMPT, + logger + ); + + await expect(transformer.transform('hello')).rejects.toThrow('OpenCode model is not configured'); + }); +}); diff --git a/src/__tests__/infrastructure/transformation/OpenRouterPromptTransformer.test.ts b/src/__tests__/infrastructure/transformation/OpenRouterPromptTransformer.test.ts new file mode 100644 index 0000000..41cc6f0 --- /dev/null +++ b/src/__tests__/infrastructure/transformation/OpenRouterPromptTransformer.test.ts @@ -0,0 +1,113 @@ +import axios from 'axios'; +import OpenAI from 'openai'; +import { OpenRouterPromptTransformer } from '../../../infrastructure/transformation/OpenRouterPromptTransformer'; +import { TRANSFORMATION_SYSTEM_PROMPT } from '../../../infrastructure/transformation/transformationUtils'; +import { createMockLogger } from '../../helpers/mockLogger'; + +jest.mock('axios'); +jest.mock('openai'); + +const mockedAxios = axios as jest.Mocked; +const MockedOpenAI = OpenAI as jest.MockedClass; +const logger = createMockLogger(); +const TEST_API_KEY = 'sk-or-v1-test-openrouter-api-key-1234567890'; + +describe('OpenRouterPromptTransformer', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('lists available models', async () => { + mockedAxios.get.mockResolvedValueOnce({ + status: 200, + data: { + data: [{ id: 'openai/gpt-4o' }, { id: 'anthropic/claude-3.5-sonnet' }], + }, + }); + + await expect(OpenRouterPromptTransformer.listModels(TEST_API_KEY)).resolves.toEqual([ + 'anthropic/claude-3.5-sonnet', + 'openai/gpt-4o', + ]); + }); + + it('transforms transcription using OpenRouter', async () => { + const createMock = jest.fn().mockResolvedValue({ + choices: [{ message: { content: 'Refactor auth service to JWT with backward compatibility.' } }], + }); + MockedOpenAI.mockImplementation( + () => + ({ + chat: { + completions: { + create: createMock, + }, + }, + }) as unknown as OpenAI + ); + + const transformer = new OpenRouterPromptTransformer( + async () => TEST_API_KEY, + async () => 'openai/gpt-4o', + async () => TRANSFORMATION_SYSTEM_PROMPT, + logger + ); + + const result = await transformer.transform('um refactor auth to jwt'); + + expect(result.transformedText).toContain('JWT'); + expect(MockedOpenAI).toHaveBeenCalledWith( + expect.objectContaining({ + apiKey: TEST_API_KEY, + baseURL: 'https://openrouter.ai/api/v1', + defaultHeaders: expect.objectContaining({ + 'X-OpenRouter-Title': 'Promptimize', + }), + }) + ); + expect(createMock).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'openai/gpt-4o', + temperature: 0.3, + }) + ); + }); + + it('throws when API key is missing', async () => { + const transformer = new OpenRouterPromptTransformer( + async () => undefined, + async () => 'openai/gpt-4o', + async () => TRANSFORMATION_SYSTEM_PROMPT, + logger + ); + + await expect(transformer.transform('hello')).rejects.toThrow('OpenRouter API key not configured'); + }); + + it('maps invalid API key errors', async () => { + const apiError = Object.assign(new Error('Unauthorized'), { + status: 401, + }); + Object.setPrototypeOf(apiError, OpenAI.APIError.prototype); + + MockedOpenAI.mockImplementation( + () => + ({ + chat: { + completions: { + create: jest.fn().mockRejectedValue(apiError), + }, + }, + }) as unknown as OpenAI + ); + + const transformer = new OpenRouterPromptTransformer( + async () => TEST_API_KEY, + async () => 'openai/gpt-4o', + async () => TRANSFORMATION_SYSTEM_PROMPT, + logger + ); + + await expect(transformer.transform('hello')).rejects.toThrow('Invalid OpenRouter API key'); + }); +}); diff --git a/src/__tests__/infrastructure/transformation/PromptTransformerFactory.test.ts b/src/__tests__/infrastructure/transformation/PromptTransformerFactory.test.ts new file mode 100644 index 0000000..4cb6264 --- /dev/null +++ b/src/__tests__/infrastructure/transformation/PromptTransformerFactory.test.ts @@ -0,0 +1,129 @@ +import { PromptTransformerFactory } from '../../../infrastructure/transformation/PromptTransformerFactory'; +import { OpenAIPromptTransformer } from '../../../infrastructure/transformation/OpenAIPromptTransformer'; +import { AnthropicPromptTransformer } from '../../../infrastructure/transformation/AnthropicPromptTransformer'; +import { GooglePromptTransformer } from '../../../infrastructure/transformation/GooglePromptTransformer'; +import { AzureOpenAIPromptTransformer } from '../../../infrastructure/transformation/AzureOpenAIPromptTransformer'; +import { OllamaPromptTransformer } from '../../../infrastructure/transformation/OllamaPromptTransformer'; +import { OpenCodePromptTransformer } from '../../../infrastructure/transformation/OpenCodePromptTransformer'; +import { OpenRouterPromptTransformer } from '../../../infrastructure/transformation/OpenRouterPromptTransformer'; +import { CursorPromptTransformer } from '../../../infrastructure/transformation/CursorPromptTransformer'; +import { TransformationProvider } from '../../../domain/value-objects/TransformationProvider'; +import { IConfigRepository, Config } from '../../../application/ports/IConfigRepository'; +import { createMockLogger } from '../../helpers/mockLogger'; +import { TRANSFORMATION_SYSTEM_PROMPT } from '../../../infrastructure/transformation/transformationUtils'; + +const baseConfig: Config = { + transformationProvider: TransformationProvider.OpenAI, + transcriptionLanguage: 'auto', + enablePromptTransformation: true, + transformationModel: 'gpt-4o', + anthropicModel: 'claude-3-5-sonnet-20241022', + googleModel: 'gemini-1.5-pro', + azureEndpoint: 'https://example.openai.azure.com', + azureDeployment: 'gpt-4o', + ollamaBaseUrl: 'http://localhost:11434', + ollamaModel: 'llama3.1:8b', + openCodeBaseUrl: 'http://127.0.0.1:4010/v1', + openCodeModel: 'anthropic/claude-sonnet-4-5', + openRouterModel: 'openai/gpt-4o', + cursorModel: 'composer-2.5', + audioQuality: 'high', + maxRecordingDuration: 120, + showNotifications: true, + transformationSystemPrompt: TRANSFORMATION_SYSTEM_PROMPT, +}; + +function createConfigRepo(overrides: Partial = {}): IConfigRepository { + const config = { ...baseConfig, ...overrides }; + + return { + getConfig: jest.fn(async () => config), + updateConfig: jest.fn(async () => undefined), + getProviderApiKey: jest.fn(async provider => { + if ( + provider === TransformationProvider.Ollama || + provider === TransformationProvider.OpenCode + ) { + return undefined; + } + return 'test-api-key'; + }), + setProviderApiKey: jest.fn(async () => undefined), + onConfigChange: jest.fn(), + }; +} + +const logger = createMockLogger(); + +describe('PromptTransformerFactory', () => { + it('creates OpenAI transformer by default', async () => { + const factory = new PromptTransformerFactory(createConfigRepo(), logger); + const transformer = await factory.createForProvider(TransformationProvider.OpenAI); + expect(transformer).toBeInstanceOf(OpenAIPromptTransformer); + }); + + it('creates provider-specific transformers', async () => { + const factory = new PromptTransformerFactory(createConfigRepo(), logger); + + expect(await factory.createForProvider(TransformationProvider.Anthropic)).toBeInstanceOf( + AnthropicPromptTransformer + ); + expect(await factory.createForProvider(TransformationProvider.Google)).toBeInstanceOf( + GooglePromptTransformer + ); + expect(await factory.createForProvider(TransformationProvider.Azure)).toBeInstanceOf( + AzureOpenAIPromptTransformer + ); + expect(await factory.createForProvider(TransformationProvider.Ollama)).toBeInstanceOf( + OllamaPromptTransformer + ); + expect(await factory.createForProvider(TransformationProvider.OpenCode)).toBeInstanceOf( + OpenCodePromptTransformer + ); + expect(await factory.createForProvider(TransformationProvider.OpenRouter)).toBeInstanceOf( + OpenRouterPromptTransformer + ); + expect(await factory.createForProvider(TransformationProvider.Cursor)).toBeInstanceOf( + CursorPromptTransformer + ); + }); + + it('validates missing API keys', async () => { + const configRepo = createConfigRepo(); + (configRepo.getProviderApiKey as jest.Mock).mockResolvedValue(undefined); + + const factory = new PromptTransformerFactory(configRepo, logger); + const error = await factory.validateProvider(TransformationProvider.Anthropic); + + expect(error).toContain('Anthropic API key'); + }); + + it('validates Azure configuration', async () => { + const factory = new PromptTransformerFactory( + createConfigRepo({ azureEndpoint: '', azureDeployment: '' }), + logger + ); + + const error = await factory.validateProvider(TransformationProvider.Azure); + expect(error).toContain('endpoint'); + }); + + it('validates OpenCode configuration', async () => { + jest.spyOn(OpenCodePromptTransformer, 'isAvailable').mockResolvedValueOnce(false); + + const factory = new PromptTransformerFactory(createConfigRepo(), logger); + const error = await factory.validateProvider(TransformationProvider.OpenCode); + + expect(error).toContain('OpenCode proxy is not reachable'); + }); + + it('validates missing OpenCode model', async () => { + const factory = new PromptTransformerFactory( + createConfigRepo({ openCodeModel: '' }), + logger + ); + const error = await factory.validateProvider(TransformationProvider.OpenCode); + + expect(error).toContain('OpenCode model is not configured'); + }); +}); diff --git a/src/__tests__/infrastructure/transformation/transformationUtils.test.ts b/src/__tests__/infrastructure/transformation/transformationUtils.test.ts new file mode 100644 index 0000000..9a2febf --- /dev/null +++ b/src/__tests__/infrastructure/transformation/transformationUtils.test.ts @@ -0,0 +1,45 @@ +import { + buildUserPrompt, + calculateImprovements, + getSystemPrompt, + TRANSFORMATION_SYSTEM_PROMPT, +} from '../../../infrastructure/transformation/transformationUtils'; + +describe('transformationUtils', () => { + it('builds user prompt with optional context', () => { + const prompt = buildUserPrompt('hello world', { + editorLanguage: 'typescript', + projectType: 'Node.js', + }); + + expect(prompt).toContain('hello world'); + expect(prompt).toContain('typescript'); + expect(prompt).toContain('Node.js'); + }); + + it('detects improvements between original and transformed text', () => { + const original = 'um I need to like refactor this service'; + const transformed = `Context: Auth service\nObjective: Refactor to JWT`; + + const improvements = calculateImprovements(original, transformed); + + expect(improvements).toContain('Removed filler words'); + expect(improvements).toContain('Added clear structure'); + }); + + it('includes a system prompt', () => { + expect(TRANSFORMATION_SYSTEM_PROMPT).toContain('prompt engineer'); + }); + + it('returns configured system prompt when set', () => { + expect( + getSystemPrompt({ transformationSystemPrompt: 'Custom prompt for testing.' }) + ).toBe('Custom prompt for testing.'); + }); + + it('falls back to default system prompt when empty', () => { + expect(getSystemPrompt({ transformationSystemPrompt: ' ' })).toBe( + TRANSFORMATION_SYSTEM_PROMPT + ); + }); +}); diff --git a/src/__tests__/integration/providerSwitching.test.ts b/src/__tests__/integration/providerSwitching.test.ts new file mode 100644 index 0000000..7c85177 --- /dev/null +++ b/src/__tests__/integration/providerSwitching.test.ts @@ -0,0 +1,114 @@ +import { + ConfigurablePromptTransformer, + PromptTransformerFactory, +} from '../../infrastructure/transformation/PromptTransformerFactory'; +import { OpenAIPromptTransformer } from '../../infrastructure/transformation/OpenAIPromptTransformer'; +import { AnthropicPromptTransformer } from '../../infrastructure/transformation/AnthropicPromptTransformer'; +import { OllamaPromptTransformer } from '../../infrastructure/transformation/OllamaPromptTransformer'; +import { TransformationProvider } from '../../domain/value-objects/TransformationProvider'; +import { IConfigRepository, Config } from '../../application/ports/IConfigRepository'; +import { createMockLogger } from '../helpers/mockLogger'; +import { TRANSFORMATION_SYSTEM_PROMPT } from '../../infrastructure/transformation/transformationUtils'; + +const baseConfig: Config = { + transformationProvider: TransformationProvider.OpenAI, + transcriptionLanguage: 'auto', + enablePromptTransformation: true, + transformationModel: 'gpt-4o', + anthropicModel: 'claude-3-5-sonnet-20241022', + googleModel: 'gemini-1.5-pro', + azureEndpoint: 'https://example.openai.azure.com', + azureDeployment: 'gpt-4o', + ollamaBaseUrl: 'http://localhost:11434', + ollamaModel: 'llama3.1:8b', + openCodeBaseUrl: 'http://127.0.0.1:4010/v1', + openCodeModel: 'anthropic/claude-sonnet-4-5', + openRouterModel: 'openai/gpt-4o', + cursorModel: 'composer-2.5', + audioQuality: 'high', + maxRecordingDuration: 120, + showNotifications: true, + transformationSystemPrompt: TRANSFORMATION_SYSTEM_PROMPT, +}; + +function createSwitchableConfigRepo(): IConfigRepository & { + setProvider: (provider: TransformationProvider) => void; +} { + let activeProvider = TransformationProvider.OpenAI; + const config = { ...baseConfig }; + + return { + setProvider(provider: TransformationProvider) { + activeProvider = provider; + config.transformationProvider = provider; + }, + getConfig: jest.fn(async () => ({ + ...config, + transformationProvider: activeProvider, + })), + updateConfig: jest.fn(async partial => { + if (partial.transformationProvider) { + activeProvider = partial.transformationProvider; + config.transformationProvider = partial.transformationProvider; + } + }), + getProviderApiKey: jest.fn(async provider => `${provider}-key`), + setProviderApiKey: jest.fn(async () => undefined), + onConfigChange: jest.fn(), + }; +} + +describe('Provider switching workflow', () => { + it('ConfigurablePromptTransformer resolves the active provider on each transform call', async () => { + const configRepo = createSwitchableConfigRepo(); + const factory = new PromptTransformerFactory(configRepo, createMockLogger()); + const transformer = new ConfigurablePromptTransformer(factory); + + const openAiTransformer = factory.createForProvider(TransformationProvider.OpenAI); + const anthropicTransformer = factory.createForProvider(TransformationProvider.Anthropic); + const transformSpy = jest + .spyOn(OpenAIPromptTransformer.prototype, 'transform') + .mockResolvedValue({ + originalText: 'first prompt', + transformedText: 'first prompt', + improvements: [], + }); + jest.spyOn(AnthropicPromptTransformer.prototype, 'transform').mockResolvedValue({ + originalText: 'second prompt', + transformedText: 'second prompt', + improvements: [], + }); + + configRepo.setProvider(TransformationProvider.OpenAI); + await transformer.transform('first prompt'); + + configRepo.setProvider(TransformationProvider.Anthropic); + await transformer.transform('second prompt'); + + expect(transformSpy).toHaveBeenCalledTimes(1); + expect(AnthropicPromptTransformer.prototype.transform).toHaveBeenCalledTimes(1); + expect(openAiTransformer).toBeInstanceOf(OpenAIPromptTransformer); + expect(anthropicTransformer).toBeInstanceOf(AnthropicPromptTransformer); + }); + + it('factory validation reflects provider-specific requirements after switching', async () => { + const configRepo = createSwitchableConfigRepo(); + const factory = new PromptTransformerFactory(configRepo, createMockLogger()); + + configRepo.setProvider(TransformationProvider.Anthropic); + (configRepo.getProviderApiKey as jest.Mock).mockImplementation(async provider => + provider === TransformationProvider.Anthropic ? undefined : `${provider}-key` + ); + + await expect(factory.validateProvider(TransformationProvider.Anthropic)).resolves.toContain( + 'Anthropic API key' + ); + + configRepo.setProvider(TransformationProvider.Ollama); + jest.spyOn(OllamaPromptTransformer, 'isAvailable').mockResolvedValueOnce(false); + + await expect(factory.validateProvider(TransformationProvider.Ollama)).resolves.toContain( + 'Ollama server is not reachable' + ); + }); +}); diff --git a/src/__tests__/presentation/commands/FirstTimeSetupCommand.test.ts b/src/__tests__/presentation/commands/FirstTimeSetupCommand.test.ts new file mode 100644 index 0000000..4fc4da3 --- /dev/null +++ b/src/__tests__/presentation/commands/FirstTimeSetupCommand.test.ts @@ -0,0 +1,95 @@ +import { getSetupChecklist } from '../../../presentation/commands/FirstTimeSetupCommand'; +import { IConfigRepository, Config } from '../../../application/ports/IConfigRepository'; +import { TransformationProvider } from '../../../domain/value-objects/TransformationProvider'; +import { TRANSFORMATION_SYSTEM_PROMPT } from '../../../infrastructure/transformation/transformationUtils'; + +const baseConfig: Config = { + transformationProvider: TransformationProvider.OpenAI, + transcriptionLanguage: 'auto', + enablePromptTransformation: true, + transformationModel: 'gpt-4o', + anthropicModel: 'claude-3-5-sonnet-20241022', + googleModel: 'gemini-1.5-pro', + azureEndpoint: 'https://example.openai.azure.com', + azureDeployment: 'gpt-4o', + ollamaBaseUrl: 'http://localhost:11434', + ollamaModel: 'llama3.1:8b', + openCodeBaseUrl: 'http://127.0.0.1:4010/v1', + openCodeModel: 'anthropic/claude-sonnet-4-5', + openRouterModel: 'openai/gpt-4o', + cursorModel: 'composer-2.5', + audioQuality: 'high', + maxRecordingDuration: 120, + showNotifications: true, + transformationSystemPrompt: TRANSFORMATION_SYSTEM_PROMPT, +}; + +function createConfigRepo( + overrides: Partial = {}, + providerKeys: Partial> = {} +): IConfigRepository { + const config = { ...baseConfig, ...overrides }; + + return { + getConfig: jest.fn(async () => config), + updateConfig: jest.fn(async () => undefined), + getProviderApiKey: jest.fn(async provider => { + if (provider in providerKeys) { + return providerKeys[provider]; + } + return provider === TransformationProvider.OpenAI ? 'openai-key' : 'provider-key'; + }), + setProviderApiKey: jest.fn(async () => undefined), + onConfigChange: jest.fn(), + }; +} + +describe('getSetupChecklist', () => { + it('marks checklist complete when Whisper and optimization provider are configured', async () => { + const configRepo = createConfigRepo(); + + const checklist = await getSetupChecklist(configRepo); + + expect(checklist.every(item => item.complete)).toBe(true); + expect(checklist).toEqual([ + { label: 'Extension installed', complete: true }, + { label: 'OpenAI API key configured (Whisper)', complete: true }, + { label: 'Optimization provider configured (OpenAI)', complete: true }, + ]); + }); + + it('marks Whisper incomplete when OpenAI key is missing', async () => { + const configRepo = createConfigRepo({}, { [TransformationProvider.OpenAI]: undefined }); + + const checklist = await getSetupChecklist(configRepo); + + expect(checklist.some(item => !item.complete)).toBe(true); + expect(checklist.find(item => item.label.includes('Whisper'))?.complete).toBe(false); + }); + + it('marks optimization disabled as complete when transformation is disabled', async () => { + const configRepo = createConfigRepo({ enablePromptTransformation: false }); + + const checklist = await getSetupChecklist(configRepo); + + expect(checklist).toEqual([ + { label: 'Extension installed', complete: true }, + { label: 'OpenAI API key configured (Whisper)', complete: true }, + { label: 'Prompt optimization configured (disabled)', complete: true }, + ]); + }); + + it('marks optimization provider incomplete when enabled but key is missing', async () => { + const configRepo = createConfigRepo( + { transformationProvider: TransformationProvider.Anthropic }, + { + [TransformationProvider.OpenAI]: 'openai-key', + [TransformationProvider.Anthropic]: undefined, + } + ); + + const checklist = await getSetupChecklist(configRepo); + + expect(checklist.find(item => item.label.includes('Anthropic'))?.complete).toBe(false); + }); +}); diff --git a/src/__tests__/presentation/ui/RecordingStatusBarItem.test.ts b/src/__tests__/presentation/ui/RecordingStatusBarItem.test.ts new file mode 100644 index 0000000..a49423d --- /dev/null +++ b/src/__tests__/presentation/ui/RecordingStatusBarItem.test.ts @@ -0,0 +1,140 @@ +import { RecordingStatusBarItem } from '../../../presentation/ui/RecordingStatusBarItem'; +import { RecordingState } from '../../../domain/value-objects/RecordingState'; +import * as vscode from 'vscode'; +import { + getRecordingSessionMode, + setRecordingSessionMode, +} from '../../../shared/services/RecordingSessionMode'; + +function getStatusBarItems(): Array<{ + text: string; + command?: string; + tooltip?: string; + backgroundColor?: unknown; +}> { + return (vscode.window.createStatusBarItem as jest.Mock).mock.results.map(result => result.value); +} + +describe('RecordingStatusBarItem', () => { + beforeEach(() => { + setRecordingSessionMode(null); + }); + + it('creates transcribe and promptimize buttons with idle labels', () => { + const statusBar = new RecordingStatusBarItem(); + const [transcribeItem, promptimizeItem, settingsItem] = getStatusBarItems(); + + expect(vscode.window.createStatusBarItem).toHaveBeenNthCalledWith( + 1, + 'transcribe', + vscode.StatusBarAlignment.Right, + 1001 + ); + expect(vscode.window.createStatusBarItem).toHaveBeenNthCalledWith( + 2, + 'promptimize', + vscode.StatusBarAlignment.Right, + 1001 + ); + expect(vscode.window.createStatusBarItem).toHaveBeenNthCalledWith( + 3, + 'settings', + vscode.StatusBarAlignment.Right, + 1001 + ); + expect(transcribeItem.text).toBe('$(mic) Transcribe'); + expect(transcribeItem.command).toBe('promptimize.startTranscribeRecording'); + expect(promptimizeItem.text).toBe('$(sparkle) Promptimize'); + expect(promptimizeItem.command).toBe('promptimize.startPromptimizeRecording'); + expect(settingsItem.text).toBe('$(gear) Settings'); + expect(settingsItem.command).toBe('promptimize.openConfigurationPanel'); + statusBar.dispose(); + }); + + it('uses the active provider label in the transforming tooltip', () => { + const statusBar = new RecordingStatusBarItem(); + const promptimizeItem = getStatusBarItems()[1]; + + statusBar.setSetupState({ + optimizationEnabled: true, + hasOpenAIKey: true, + }); + statusBar.setTransformationProviderLabel('Anthropic'); + setRecordingSessionMode('promptimize'); + statusBar.setState(RecordingState.TRANSFORMING); + + expect(promptimizeItem.tooltip).toBe( + 'Optimizing prompt with Anthropic (Whisper transcription already complete)' + ); + statusBar.dispose(); + }); + + it('shows configuration tooltip when OpenAI key is missing', () => { + const statusBar = new RecordingStatusBarItem(); + const transcribeItem = getStatusBarItems()[0]; + + statusBar.setSetupState({ + optimizationEnabled: true, + hasOpenAIKey: false, + }); + statusBar.setState(RecordingState.IDLE); + + expect(transcribeItem.text).toBe('$(mic) Transcribe'); + expect(transcribeItem.command).toBe('promptimize.startTranscribeRecording'); + expect(transcribeItem.tooltip).toContain('OpenAI API key required'); + expect(transcribeItem.backgroundColor).toEqual({ + id: 'statusBarItem.warningBackground', + }); + statusBar.dispose(); + }); + + it('shows configuration tooltip when optimization is disabled', () => { + const statusBar = new RecordingStatusBarItem(); + const promptimizeItem = getStatusBarItems()[1]; + + statusBar.setSetupState({ + optimizationEnabled: false, + hasOpenAIKey: true, + }); + statusBar.setState(RecordingState.IDLE); + + expect(promptimizeItem.text).toBe('$(sparkle) Promptimize'); + expect(promptimizeItem.command).toBe('promptimize.startPromptimizeRecording'); + expect(promptimizeItem.tooltip).toContain('Prompt optimization is disabled'); + statusBar.dispose(); + }); + + it('shows setup label when configuration checklist is incomplete', () => { + const statusBar = new RecordingStatusBarItem(); + const settingsItem = getStatusBarItems()[2]; + + statusBar.setSetupState({ + optimizationEnabled: true, + hasOpenAIKey: false, + setupChecklist: [ + { label: 'OpenAI API key', complete: false }, + { label: 'Optimization provider', complete: true }, + ], + }); + + expect(settingsItem.text).toBe('$(warning) Setup'); + statusBar.dispose(); + }); + + it('disables sibling button while a transcribe session is active', () => { + const statusBar = new RecordingStatusBarItem(); + const [transcribeItem, promptimizeItem] = getStatusBarItems(); + + statusBar.setSetupState({ + optimizationEnabled: true, + hasOpenAIKey: true, + }); + setRecordingSessionMode('transcribe'); + statusBar.setState(RecordingState.RECORDING); + + expect(transcribeItem.command).toBe('promptimize.stopTranscribeRecording'); + expect(promptimizeItem.command).toBeUndefined(); + expect(getRecordingSessionMode()).toBe('transcribe'); + statusBar.dispose(); + }); +}); diff --git a/src/application/ports/IConfigRepository.ts b/src/application/ports/IConfigRepository.ts index f81185a..0d372e3 100644 --- a/src/application/ports/IConfigRepository.ts +++ b/src/application/ports/IConfigRepository.ts @@ -1,9 +1,17 @@ +import { TransformationProvider } from '../../domain/value-objects/TransformationProvider'; + export interface Config { /** * OpenAI API key (stored separately in SecretStorage). + * Used for Whisper transcription and OpenAI prompt transformation. */ apiKey?: string; + /** + * Active provider for prompt transformation. + */ + transformationProvider: TransformationProvider; + /** * Language for transcription (ISO 639-1 code or 'auto'). */ @@ -19,6 +27,56 @@ export interface Config { */ transformationModel: string; + /** + * Anthropic model ID for Claude-based transformation. + */ + anthropicModel: string; + + /** + * Google Gemini model ID for transformation. + */ + googleModel: string; + + /** + * Azure OpenAI endpoint URL. + */ + azureEndpoint: string; + + /** + * Azure OpenAI deployment name for the chat model. + */ + azureDeployment: string; + + /** + * Ollama server base URL. + */ + ollamaBaseUrl: string; + + /** + * Ollama model name/tag. + */ + ollamaModel: string; + + /** + * OpenCode LLM proxy base URL. + */ + openCodeBaseUrl: string; + + /** + * OpenCode model identifier (provider/model format). + */ + openCodeModel: string; + + /** + * OpenRouter model identifier. + */ + openRouterModel: string; + + /** + * Cursor model identifier for SDK-based transformation. + */ + cursorModel: string; + /** * Audio recording quality ('low' | 'medium' | 'high'). */ @@ -38,6 +96,11 @@ export interface Config { * Transcription hint for technical terms (future). */ transcriptionHint?: string; + + /** + * System prompt used to instruct the AI during prompt transformation. + */ + transformationSystemPrompt: string; } /** @@ -61,6 +124,16 @@ export interface IConfigRepository { */ updateConfig(config: Partial): Promise; + /** + * Get API key for a specific transformation provider. + */ + getProviderApiKey(provider: TransformationProvider): Promise; + + /** + * Store API key for a specific transformation provider. + */ + setProviderApiKey(provider: TransformationProvider, apiKey: string | undefined): Promise; + /** * Watch for configuration changes. * diff --git a/src/application/ports/IPromptTransformer.ts b/src/application/ports/IPromptTransformer.ts index e248c31..c95f9da 100644 --- a/src/application/ports/IPromptTransformer.ts +++ b/src/application/ports/IPromptTransformer.ts @@ -21,8 +21,12 @@ export interface PromptContext { * Port for prompt transformation functionality. * * Implementations: - * - OpenAIPromptTransformer (primary): Uses GPT-4 - * - RuleBasedTransformer (future): Uses regex/NLP rules + * - OpenAIPromptTransformer: Uses GPT-4 + * - AnthropicPromptTransformer: Uses Claude + * - GooglePromptTransformer: Uses Gemini + * - AzureOpenAIPromptTransformer: Uses Azure OpenAI + * - OllamaPromptTransformer: Uses local Ollama models + * - CursorPromptTransformer: Uses Cursor SDK */ export interface IPromptTransformer { /** diff --git a/src/application/ports/ITransformationProviderValidator.ts b/src/application/ports/ITransformationProviderValidator.ts new file mode 100644 index 0000000..d38bfa8 --- /dev/null +++ b/src/application/ports/ITransformationProviderValidator.ts @@ -0,0 +1,11 @@ +import { TransformationProvider } from '../../domain/value-objects/TransformationProvider'; + +/** + * Validates that a transformation provider is configured and reachable. + */ +export interface ITransformationProviderValidator { + /** + * Returns an error message when the provider is not ready, otherwise undefined. + */ + validateProvider(provider: TransformationProvider): Promise; +} diff --git a/src/application/services/ConfigurationValidationService.ts b/src/application/services/ConfigurationValidationService.ts new file mode 100644 index 0000000..a557ea5 --- /dev/null +++ b/src/application/services/ConfigurationValidationService.ts @@ -0,0 +1,146 @@ +import { IConfigRepository } from '../ports/IConfigRepository'; +import { ITransformationProviderValidator } from '../ports/ITransformationProviderValidator'; +import { + PROVIDER_METADATA, + TransformationProvider, +} from '../../domain/value-objects/TransformationProvider'; +import { + OPENAI_API_KEY_REQUIRED_RECORDING, + OPENAI_API_KEY_REQUIRED_STARTUP, + OPTIMIZATION_PROVIDER_MISSING_KEY, +} from '../../shared/constants/uxMessages'; + +export interface ConfigurationValidationIssue { + message: string; + configureCommand: + | 'promptimize.configureApiKey' + | 'promptimize.configureTransformationProvider' + | 'promptimize.openConfigurationPanel' + | 'promptimize.firstTimeSetup'; +} + +/** + * Validates configuration required before transcription-only recording. + */ +export async function validateConfigurationForTranscription( + configRepo: IConfigRepository +): Promise { + const openAiKey = await configRepo.getProviderApiKey(TransformationProvider.OpenAI); + + if (!openAiKey) { + return { + message: OPENAI_API_KEY_REQUIRED_RECORDING, + configureCommand: 'promptimize.openConfigurationPanel', + }; + } + + return undefined; +} + +/** + * Validates configuration required before promptimize recording (transcribe + optimize). + */ +export async function validateConfigurationForPromptimize( + configRepo: IConfigRepository, + providerValidator: ITransformationProviderValidator +): Promise { + const config = await configRepo.getConfig(); + + if (!config.enablePromptTransformation) { + return { + message: 'Prompt optimization is disabled. Enable it in configuration to use Promptimize.', + configureCommand: 'promptimize.openConfigurationPanel', + }; + } + + return validateConfigurationForRecording(configRepo, providerValidator); +} + +/** + * Validates configuration required before recording starts. + * Whisper transcription always requires an OpenAI API key. + * When prompt transformation is enabled, the active provider must also be configured. + */ +export async function validateConfigurationForRecording( + configRepo: IConfigRepository, + providerValidator: ITransformationProviderValidator +): Promise { + const config = await configRepo.getConfig(); + const openAiKey = await configRepo.getProviderApiKey(TransformationProvider.OpenAI); + + if (!openAiKey) { + return { + message: OPENAI_API_KEY_REQUIRED_RECORDING, + configureCommand: 'promptimize.configureApiKey', + }; + } + + if (!config.enablePromptTransformation) { + return undefined; + } + + const providerError = await providerValidator.validateProvider(config.transformationProvider); + if (providerError) { + return { + message: providerError, + configureCommand: 'promptimize.openConfigurationPanel', + }; + } + + return undefined; +} + +/** + * Validates configuration on extension startup. + * Warns only about missing keys relevant to the current configuration. + */ +export async function validateConfigurationOnStartup( + configRepo: IConfigRepository, + providerValidator: ITransformationProviderValidator +): Promise { + const config = await configRepo.getConfig(); + const openAiKey = await configRepo.getProviderApiKey(TransformationProvider.OpenAI); + + if (!openAiKey) { + return { + message: OPENAI_API_KEY_REQUIRED_STARTUP, + configureCommand: 'promptimize.openConfigurationPanel', + }; + } + + if (!config.enablePromptTransformation) { + return undefined; + } + + const provider = config.transformationProvider; + const metadata = PROVIDER_METADATA[provider]; + + if (!metadata.requiresApiKey) { + const providerError = await providerValidator.validateProvider(provider); + if (providerError) { + return { + message: `Promptimize: ${providerError}`, + configureCommand: 'promptimize.openConfigurationPanel', + }; + } + return undefined; + } + + const providerApiKey = await configRepo.getProviderApiKey(provider); + if (!providerApiKey) { + return { + message: OPTIMIZATION_PROVIDER_MISSING_KEY(metadata.displayName), + configureCommand: 'promptimize.openConfigurationPanel', + }; + } + + const providerError = await providerValidator.validateProvider(provider); + if (providerError) { + return { + message: `Promptimize: ${providerError}`, + configureCommand: 'promptimize.openConfigurationPanel', + }; + } + + return undefined; +} diff --git a/src/application/services/ProviderPricingService.ts b/src/application/services/ProviderPricingService.ts new file mode 100644 index 0000000..dce6a4e --- /dev/null +++ b/src/application/services/ProviderPricingService.ts @@ -0,0 +1,145 @@ +import { CostClient, type Provider } from 'token-costs'; +import { TransformationProvider } from '../../domain/value-objects/TransformationProvider'; +import { PROVIDER_COMPARISON as STATIC_FALLBACK } from '../../shared/constants/providerComparison'; + +/** Typical prompt optimization workload: system prompt + transcription in, structured prompt out. */ +const ESTIMATED_INPUT_TOKENS = 500; +const ESTIMATED_OUTPUT_TOKENS = 200; + +const CACHE_TTL_MS = 60 * 60 * 1000; +const FETCH_TIMEOUT_MS = 3000; + +interface TokenCostsMapping { + tokenCostsProvider: Provider; + modelId: string; +} + +const TOKEN_COSTS_MAPPINGS: Partial> = { + [TransformationProvider.OpenAI]: { + tokenCostsProvider: 'openai', + modelId: 'gpt-4o', + }, + [TransformationProvider.Anthropic]: { + tokenCostsProvider: 'anthropic', + modelId: 'claude-3-5-sonnet-20241022', + }, + [TransformationProvider.Google]: { + tokenCostsProvider: 'google', + modelId: 'gemini-1.5-pro', + }, + [TransformationProvider.OpenRouter]: { + tokenCostsProvider: 'openrouter', + modelId: 'openai/gpt-4o', + }, +}; + +export interface ProviderPricingData { + provider: TransformationProvider; + costPerTransform: string; + speed: string; + privacy: string; + bestFor: string; + isRealTime: boolean; + lastUpdated?: string; +} + +export class ProviderPricingService { + private costClient: CostClient | null = null; + private cache: ProviderPricingData[] | null = null; + private cacheTimestamp = 0; + + async getProviderComparison(): Promise { + if (this.isCacheValid() && this.cache) { + return this.cache; + } + + if (!this.costClient) { + this.costClient = new CostClient(); + } + + const fetchTargets = STATIC_FALLBACK.filter(entry => TOKEN_COSTS_MAPPINGS[entry.provider]); + const realTimeResults = await Promise.all( + fetchTargets.map(entry => this.fetchProviderPricing(entry.provider)) + ); + + const realTimeMap = new Map( + realTimeResults + .filter((result): result is ProviderPricingData => result !== null) + .map(result => [result.provider, result]) + ); + + const comparison = STATIC_FALLBACK.map(entry => { + const realTime = realTimeMap.get(entry.provider); + if (realTime) { + return realTime; + } + + return { + provider: entry.provider, + costPerTransform: entry.costPerTransform, + speed: entry.speed, + privacy: entry.privacy, + bestFor: entry.bestFor, + isRealTime: false, + }; + }); + + this.cache = comparison; + this.cacheTimestamp = Date.now(); + return comparison; + } + + private async fetchProviderPricing( + provider: TransformationProvider + ): Promise { + const mapping = TOKEN_COSTS_MAPPINGS[provider]; + if (!mapping || !this.costClient) { + return null; + } + + const staticEntry = STATIC_FALLBACK.find(entry => entry.provider === provider); + if (!staticEntry) { + return null; + } + + try { + const costResult = await Promise.race([ + this.costClient.calculateCost(mapping.tokenCostsProvider, mapping.modelId, { + inputTokens: ESTIMATED_INPUT_TOKENS, + outputTokens: ESTIMATED_OUTPUT_TOKENS, + }), + new Promise((_, reject) => + setTimeout(() => reject(new Error('timeout')), FETCH_TIMEOUT_MS) + ), + ]); + + return { + provider, + costPerTransform: formatTransformCost(costResult.totalCost), + speed: staticEntry.speed, + privacy: staticEntry.privacy, + bestFor: staticEntry.bestFor, + isRealTime: true, + lastUpdated: costResult.date, + }; + } catch { + return null; + } + } + + private isCacheValid(): boolean { + if (!this.cache || this.cache.length === 0) { + return false; + } + + return Date.now() - this.cacheTimestamp < CACHE_TTL_MS; + } +} + +function formatTransformCost(totalCostUsd: number): string { + if (totalCostUsd >= 0.01) { + return `~$${totalCostUsd.toFixed(2)}/transform`; + } + + return `~$${totalCostUsd.toFixed(3)}/transform`; +} diff --git a/src/application/use-cases/StartRecordingUseCase.ts b/src/application/use-cases/StartRecordingUseCase.ts index 2215305..f425b9a 100644 --- a/src/application/use-cases/StartRecordingUseCase.ts +++ b/src/application/use-cases/StartRecordingUseCase.ts @@ -1,35 +1,31 @@ import { IAudioRecorder } from '../ports/IAudioRecorder'; -import { IConfigRepository } from '../ports/IConfigRepository'; import { ILogger } from '../ports/ILogger'; -import { MissingApiKeyError } from '../../domain/errors/ConfigError'; import { RecordingError } from '../../domain/errors/RecordingError'; +import { + RecordingSessionMode, + setRecordingSessionMode, +} from '../../shared/services/RecordingSessionMode'; export class StartRecordingUseCase { constructor( private readonly audioRecorder: IAudioRecorder, - private readonly configRepo: IConfigRepository, private readonly logger: ILogger ) {} - async execute(): Promise { - this.logger.info('Starting recording use case'); + async execute(mode: RecordingSessionMode): Promise { + this.logger.info('Starting recording use case', { mode }); - // 1. Validate configuration - const config = await this.configRepo.getConfig(); - if (!config.apiKey) { - throw new MissingApiKeyError(); - } - - // 2. Check if already recording if (this.audioRecorder.isRecording()) { throw new RecordingError('Already recording'); } - // 3. Start recording + setRecordingSessionMode(mode); + try { await this.audioRecorder.startRecording(); - this.logger.info('Recording started successfully'); + this.logger.info('Recording started successfully', { mode }); } catch (error) { + setRecordingSessionMode(null); this.logger.error('Failed to start recording', error as Error); throw new RecordingError( 'Failed to start recording', diff --git a/src/application/use-cases/TranscribeAudioUseCase.ts b/src/application/use-cases/TranscribeAudioUseCase.ts index f02bfb1..e67e681 100644 --- a/src/application/use-cases/TranscribeAudioUseCase.ts +++ b/src/application/use-cases/TranscribeAudioUseCase.ts @@ -43,7 +43,10 @@ export class TranscribeAudioUseCase { try { // Transcribe - const result: TranscriptionResult = await this.transcriptionService.transcribe(audio, options); + const result: TranscriptionResult = await this.transcriptionService.transcribe( + audio, + options + ); this.logger.info('Transcription completed', { language: result.language, diff --git a/src/application/use-cases/TransformPromptUseCase.ts b/src/application/use-cases/TransformPromptUseCase.ts index c8f5ff9..e855710 100644 --- a/src/application/use-cases/TransformPromptUseCase.ts +++ b/src/application/use-cases/TransformPromptUseCase.ts @@ -1,4 +1,5 @@ import { IPromptTransformer, PromptContext } from '../ports/IPromptTransformer'; +import { ITransformationProviderValidator } from '../ports/ITransformationProviderValidator'; import { IConfigRepository } from '../ports/IConfigRepository'; import { ILogger } from '../ports/ILogger'; import { Prompt } from '../../domain/entities/Prompt'; @@ -8,6 +9,7 @@ import { generateId } from '../../shared/utils/generateId'; export class TransformPromptUseCase { constructor( private readonly promptTransformer: IPromptTransformer, + private readonly providerValidator: ITransformationProviderValidator, private readonly configRepo: IConfigRepository, private readonly logger: ILogger ) {} @@ -15,7 +17,6 @@ export class TransformPromptUseCase { async execute(transcription: Transcription, context?: PromptContext): Promise { this.logger.info('Starting prompt transformation'); - // Check if transformation is enabled const config = await this.configRepo.getConfig(); if (!config.enablePromptTransformation) { this.logger.info('Prompt transformation disabled, returning original text'); @@ -30,6 +31,13 @@ export class TransformPromptUseCase { } try { + const validationError = await this.providerValidator.validateProvider( + config.transformationProvider + ); + if (validationError) { + throw new Error(validationError); + } + const transformed = await this.promptTransformer.transform(transcription.text, context); this.logger.info('Prompt transformation completed', { @@ -52,7 +60,10 @@ export class TransformPromptUseCase { return prompt; } catch (error) { - this.logger.error('Prompt transformation failed, falling back to original text', error as Error); + this.logger.error( + 'Prompt transformation failed, falling back to original text', + error as Error + ); // Fallback: return original text if transformation fails return new Prompt( generateId(), diff --git a/src/domain/errors/RecordingError.ts b/src/domain/errors/RecordingError.ts index 03beefa..96d42e1 100644 --- a/src/domain/errors/RecordingError.ts +++ b/src/domain/errors/RecordingError.ts @@ -1,5 +1,8 @@ export class RecordingError extends Error { - constructor(message: string, public readonly cause?: Error) { + constructor( + message: string, + public readonly cause?: Error + ) { super(message); this.name = 'RecordingError'; diff --git a/src/domain/errors/TranscriptionError.ts b/src/domain/errors/TranscriptionError.ts index fe88814..6b2911b 100644 --- a/src/domain/errors/TranscriptionError.ts +++ b/src/domain/errors/TranscriptionError.ts @@ -1,5 +1,9 @@ export class TranscriptionError extends Error { - constructor(message: string, public readonly statusCode?: number, public readonly cause?: Error) { + constructor( + message: string, + public readonly statusCode?: number, + public readonly cause?: Error + ) { super(message); this.name = 'TranscriptionError'; } diff --git a/src/domain/errors/ValidationError.ts b/src/domain/errors/ValidationError.ts index 3d11429..32d8e4e 100644 --- a/src/domain/errors/ValidationError.ts +++ b/src/domain/errors/ValidationError.ts @@ -1,5 +1,8 @@ export class ValidationError extends Error { - constructor(message: string, public readonly field?: string) { + constructor( + message: string, + public readonly field?: string + ) { super(message); this.name = 'ValidationError'; } diff --git a/src/domain/value-objects/TransformationProvider.ts b/src/domain/value-objects/TransformationProvider.ts new file mode 100644 index 0000000..2df2bbc --- /dev/null +++ b/src/domain/value-objects/TransformationProvider.ts @@ -0,0 +1,95 @@ +export enum TransformationProvider { + OpenAI = 'openai', + Anthropic = 'anthropic', + Google = 'google', + Azure = 'azure', + Ollama = 'ollama', + OpenCode = 'opencode', + OpenRouter = 'openrouter', + Cursor = 'cursor', +} + +export interface ProviderMetadata { + id: TransformationProvider; + displayName: string; + description: string; + requiresApiKey: boolean; + defaultModel: string; +} + +export const PROVIDER_METADATA: Record = { + [TransformationProvider.OpenAI]: { + id: TransformationProvider.OpenAI, + displayName: 'OpenAI', + description: 'GPT-4o and other OpenAI chat models', + requiresApiKey: true, + defaultModel: 'gpt-4o', + }, + [TransformationProvider.Anthropic]: { + id: TransformationProvider.Anthropic, + displayName: 'Anthropic', + description: 'Claude 3.5 Sonnet and other Claude models', + requiresApiKey: true, + defaultModel: 'claude-3-5-sonnet-20241022', + }, + [TransformationProvider.Google]: { + id: TransformationProvider.Google, + displayName: 'Google Gemini', + description: 'Gemini 1.5 Pro and Flash models', + requiresApiKey: true, + defaultModel: 'gemini-1.5-pro', + }, + [TransformationProvider.Azure]: { + id: TransformationProvider.Azure, + displayName: 'Azure OpenAI', + description: 'GPT models deployed on Azure OpenAI Service', + requiresApiKey: true, + defaultModel: 'gpt-4o', + }, + [TransformationProvider.Ollama]: { + id: TransformationProvider.Ollama, + displayName: 'Ollama (Local)', + description: 'Local LLMs via Ollama (Llama, Mistral, etc.)', + requiresApiKey: false, + defaultModel: 'llama3.1:8b', + }, + [TransformationProvider.OpenCode]: { + id: TransformationProvider.OpenCode, + displayName: 'OpenCode (Local Multi-Provider)', + description: 'Local OpenCode instance via opencode-llm-proxy (Anthropic, OpenAI, Ollama, etc.)', + requiresApiKey: false, + defaultModel: '', + }, + [TransformationProvider.OpenRouter]: { + id: TransformationProvider.OpenRouter, + displayName: 'OpenRouter', + description: 'Unified gateway to 200+ models from multiple providers', + requiresApiKey: true, + defaultModel: 'openai/gpt-4o', + }, + [TransformationProvider.Cursor]: { + id: TransformationProvider.Cursor, + displayName: 'Cursor', + description: 'Native Cursor AI models via Cursor SDK (composer-2.5, etc.)', + requiresApiKey: true, + defaultModel: 'composer-2.5', + }, +}; + +export function isTransformationProvider(value: string): value is TransformationProvider { + return Object.values(TransformationProvider).includes(value as TransformationProvider); +} + +export function parseTransformationProvider( + value: string | undefined, + fallback: TransformationProvider = TransformationProvider.OpenAI +): TransformationProvider { + if (value && isTransformationProvider(value)) { + return value; + } + return fallback; +} + +export function getProviderSecretKey(provider: TransformationProvider): string { + return `promptimize.apiKey.${provider}`; +} diff --git a/src/extension.ts b/src/extension.ts index df6d90c..9737384 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -4,12 +4,16 @@ import * as vscode from 'vscode'; import { VSCodeOutputChannelLogger } from './infrastructure/logging/VSCodeOutputChannelLogger'; import { VSCodeConfigRepository } from './infrastructure/configuration/VSCodeConfigRepository'; import { OpenAIWhisperService } from './infrastructure/transcription/OpenAIWhisperService'; -import { OpenAIPromptTransformer } from './infrastructure/transformation/OpenAIPromptTransformer'; +import { + PromptTransformerFactory, + ConfigurablePromptTransformer, +} from './infrastructure/transformation/PromptTransformerFactory'; import { OpenAIModelService } from './infrastructure/openai/OpenAIModelService'; import { ChatParticipantInserter } from './infrastructure/insertion/ChatParticipantInserter'; import { EditorTextInserter } from './infrastructure/insertion/EditorTextInserter'; import { FallbackTextInserter } from './infrastructure/insertion/FallbackTextInserter'; import { NativeAudioRecorder } from './infrastructure/audio/NativeAudioRecorder'; +import { RecordingState } from './domain/value-objects/RecordingState'; // Use Cases import { StartRecordingUseCase } from './application/use-cases/StartRecordingUseCase'; @@ -22,10 +26,26 @@ import { InsertTextUseCase } from './application/use-cases/InsertTextUseCase'; // Presentation import { registerStartRecordingCommand } from './presentation/commands/StartRecordingCommand'; import { registerStopRecordingCommand } from './presentation/commands/StopRecordingCommand'; +import { registerStartTranscribeRecordingCommand } from './presentation/commands/StartTranscribeRecordingCommand'; +import { registerStopTranscribeRecordingCommand } from './presentation/commands/StopTranscribeRecordingCommand'; +import { registerStartPromptimizeRecordingCommand } from './presentation/commands/StartPromptimizeRecordingCommand'; +import { registerStopPromptimizeRecordingCommand } from './presentation/commands/StopPromptimizeRecordingCommand'; import { registerCancelRecordingCommand } from './presentation/commands/CancelRecordingCommand'; import { registerConfigureApiKeyCommand } from './presentation/commands/ConfigureApiKeyCommand'; import { registerConfigureModelCommand } from './presentation/commands/ConfigureModelCommand'; +import { registerConfigureTransformationProviderCommand } from './presentation/commands/ConfigureTransformationProviderCommand'; +import { registerTestTransformationCommand } from './presentation/commands/TestTransformationCommand'; +import { + getSetupChecklist, + registerFirstTimeSetupCommand, +} from './presentation/commands/FirstTimeSetupCommand'; +import { registerOpenConfigurationPanelCommand } from './presentation/commands/OpenConfigurationPanelCommand'; import { RecordingStatusBarItem } from './presentation/ui/RecordingStatusBarItem'; +import { validateConfigurationOnStartup } from './application/services/ConfigurationValidationService'; +import { + PROVIDER_METADATA, + TransformationProvider, +} from './domain/value-objects/TransformationProvider'; let activeAudioRecorder: NativeAudioRecorder | null = null; @@ -34,15 +54,13 @@ let activeAudioRecorder: NativeAudioRecorder | null = null; * Composition root - all dependencies are wired here. */ export function activate(context: vscode.ExtensionContext): void { - console.log('Cursor Whisper extension is activating...'); - // ======================================== // INFRASTRUCTURE LAYER // ======================================== // Logging - const logger = new VSCodeOutputChannelLogger('Cursor Whisper'); - logger.info('Extension activated'); + const logger = new VSCodeOutputChannelLogger('Promptimize'); + logger.info('Promptimize extension is activating...'); // Configuration const configRepository = new VSCodeConfigRepository(context, context.secrets); @@ -58,13 +76,9 @@ export function activate(context: vscode.ExtensionContext): void { return config.apiKey; }; - const getModel = async (): Promise => { - const config = await configRepository.getConfig(); - return config.transformationModel; - }; - const whisperService = new OpenAIWhisperService(getApiKey, logger); - const promptTransformer = new OpenAIPromptTransformer(getApiKey, getModel, logger); + const transformerFactory = new PromptTransformerFactory(configRepository, logger); + const promptTransformer = new ConfigurablePromptTransformer(transformerFactory); const modelService = new OpenAIModelService(getApiKey, logger); // Text Insertion (Chain of Responsibility) @@ -78,24 +92,17 @@ export function activate(context: vscode.ExtensionContext): void { // APPLICATION LAYER (Use Cases) // ======================================== - const startRecordingUseCase = new StartRecordingUseCase( - audioRecorder, - configRepository, - logger - ); + const startRecordingUseCase = new StartRecordingUseCase(audioRecorder, logger); const stopRecordingUseCase = new StopRecordingUseCase(audioRecorder, logger); const cancelRecordingUseCase = new CancelRecordingUseCase(audioRecorder, logger); - const transcribeUseCase = new TranscribeAudioUseCase( - whisperService, - configRepository, - logger - ); + const transcribeUseCase = new TranscribeAudioUseCase(whisperService, configRepository, logger); const transformUseCase = new TransformPromptUseCase( promptTransformer, + transformerFactory, configRepository, logger ); @@ -110,14 +117,61 @@ export function activate(context: vscode.ExtensionContext): void { const statusBar = new RecordingStatusBarItem(); context.subscriptions.push(statusBar); - // Sync status bar with recorder state + const syncTransformationProviderLabel = async (): Promise => { + const config = await configRepository.getConfig(); + const metadata = PROVIDER_METADATA[config.transformationProvider]; + statusBar.setTransformationProviderLabel(metadata.displayName); + + const checklist = await getSetupChecklist(configRepository); + const openAiKey = await configRepository.getProviderApiKey(TransformationProvider.OpenAI); + + statusBar.setSetupState({ + optimizationEnabled: config.enablePromptTransformation, + hasOpenAIKey: Boolean(openAiKey), + setupChecklist: checklist, + }); + }; + + void syncTransformationProviderLabel(); + configRepository.onConfigChange(() => { + void syncTransformationProviderLabel(); + }); + + // Sync status bar with recorder state and keybinding context audioRecorder.onStateChange(state => { statusBar.setState(state); + void vscode.commands.executeCommand( + 'setContext', + 'promptimize.isRecording', + state === RecordingState.RECORDING + ); }); // Commands - const startCommand = registerStartRecordingCommand(context, startRecordingUseCase); - const stopCommand = registerStopRecordingCommand(context, { + const startCommand = registerStartRecordingCommand( + context, + configRepository, + transformerFactory, + startRecordingUseCase + ); + const startTranscribeCommand = registerStartTranscribeRecordingCommand( + context, + configRepository, + startRecordingUseCase + ); + const startPromptimizeCommand = registerStartPromptimizeRecordingCommand( + context, + configRepository, + transformerFactory, + startRecordingUseCase + ); + const stopCommand = registerStopRecordingCommand(context); + const stopTranscribeCommand = registerStopTranscribeRecordingCommand(context, { + stopRecordingUseCase, + transcribeUseCase, + insertUseCase, + }); + const stopPromptimizeCommand = registerStopPromptimizeRecordingCommand(context, { stopRecordingUseCase, transcribeUseCase, transformUseCase, @@ -129,43 +183,82 @@ export function activate(context: vscode.ExtensionContext): void { context, configRepository, modelService, + transformerFactory, + logger + ); + const configureProviderCommand = registerConfigureTransformationProviderCommand( + context, + configRepository, + transformerFactory, + modelService, + logger + ); + const testTransformationCommand = registerTestTransformationCommand( + context, + promptTransformer, + configRepository, + modelService, + logger + ); + const firstTimeSetupCommand = registerFirstTimeSetupCommand( + context, + configRepository, + transformerFactory, + modelService, + promptTransformer, + logger + ); + const openConfigurationPanelCommand = registerOpenConfigurationPanelCommand( + context, + configRepository, + transformerFactory, + modelService, + promptTransformer, logger ); context.subscriptions.push( startCommand, + startTranscribeCommand, + startPromptimizeCommand, stopCommand, + stopTranscribeCommand, + stopPromptimizeCommand, cancelCommand, configureCommand, - configureModelCommand + configureModelCommand, + configureProviderCommand, + testTransformationCommand, + firstTimeSetupCommand, + openConfigurationPanelCommand ); // ======================================== // STARTUP CHECKS // ======================================== - // Check if API key is configured - void configRepository.getConfig().then(config => { - if (!config.apiKey) { - logger.warn('OpenAI API Key not configured'); - void vscode.window - .showWarningMessage( - 'Cursor Whisper: OpenAI API Key not configured', - 'Configure Now', - 'Later' - ) - .then(selection => { - if (selection === 'Configure Now') { - void vscode.commands.executeCommand('cursor-whisper.configureApiKey'); - } - }); - } else { - logger.info('Configuration loaded successfully'); + void validateConfigurationOnStartup(configRepository, transformerFactory).then(async issue => { + if (issue) { + logger.warn(issue.message); + const selection = await vscode.window.showWarningMessage( + issue.message, + 'Configure Now', + 'Open Configuration', + 'Later' + ); + if (selection === 'Configure Now') { + await vscode.commands.executeCommand(issue.configureCommand); + } else if (selection === 'Open Configuration') { + await vscode.commands.executeCommand('promptimize.openConfigurationPanel'); + } + return; } + + logger.info('Configuration loaded successfully'); }); - logger.info('Cursor Whisper extension fully activated'); - console.log('✨ Cursor Whisper is ready!'); + logger.info('Promptimize extension fully activated'); + logger.info('Promptimize is ready'); } /** @@ -175,5 +268,4 @@ export function activate(context: vscode.ExtensionContext): void { export function deactivate(): void { activeAudioRecorder?.dispose(); activeAudioRecorder = null; - console.log('Cursor Whisper extension is now deactivated'); } diff --git a/src/infrastructure/audio/NativeAudioRecorder.ts b/src/infrastructure/audio/NativeAudioRecorder.ts index d4a5ce5..55c3492 100644 --- a/src/infrastructure/audio/NativeAudioRecorder.ts +++ b/src/infrastructure/audio/NativeAudioRecorder.ts @@ -55,16 +55,16 @@ export class NativeAudioRecorder implements IAudioRecorder { constructor(private readonly logger: ILogger) {} - async startRecording(): Promise { - if (this.state !== RecordingState.IDLE) { - throw new RecordingError('Already recording or processing'); - } + startRecording(): Promise { + try { + if (this.state !== RecordingState.IDLE) { + throw new RecordingError('Already recording or processing'); + } - this.logger.info('Starting native audio recorder'); - this.sampleChunks = []; - this.captureError = null; + this.logger.info('Starting native audio recorder'); + this.sampleChunks = []; + this.captureError = null; - try { this.recorder = new Recorder(); this.recorder.start((error, samples) => { if (error) { @@ -82,30 +82,30 @@ export class NativeAudioRecorder implements IAudioRecorder { this.setState(RecordingState.RECORDING); this.logger.info('Native recording started successfully'); + return Promise.resolve(); } catch (error) { this.cleanupRecorder(); this.logger.error('Failed to start native recording', error as Error); if (error instanceof Error && isPermissionError(error)) { - throw new PermissionError('Microphone permission denied'); + return Promise.reject(new PermissionError('Microphone permission denied')); } - throw new RecordingError( - 'Failed to start recording', - error instanceof Error ? error : undefined + return Promise.reject( + new RecordingError('Failed to start recording', error instanceof Error ? error : undefined) ); } } - async stopRecording(): Promise { - if (this.state !== RecordingState.RECORDING) { - throw new RecordingError('No active recording to stop'); - } + stopRecording(): Promise { + try { + if (this.state !== RecordingState.RECORDING) { + throw new RecordingError('No active recording to stop'); + } - this.logger.info('Stopping native recording'); - this.setState(RecordingState.PROCESSING); + this.logger.info('Stopping native recording'); + this.setState(RecordingState.PROCESSING); - try { this.cleanupRecorder(); if (this.captureError) { @@ -113,10 +113,7 @@ export class NativeAudioRecorder implements IAudioRecorder { throw new PermissionError('Microphone permission denied'); } - throw new RecordingError( - 'Failed to capture audio', - this.captureError - ); + throw new RecordingError('Failed to capture audio', this.captureError); } const pcmBuffer = this.combineSampleChunks(); @@ -135,11 +132,11 @@ export class NativeAudioRecorder implements IAudioRecorder { this.sampleChunks = []; this.setState(RecordingState.IDLE); - return audioData; + return Promise.resolve(audioData); } catch (error) { this.sampleChunks = []; this.setState(RecordingState.ERROR); - throw error; + return Promise.reject(error); } } diff --git a/src/infrastructure/audio/WebviewAudioRecorder.ts b/src/infrastructure/audio/WebviewAudioRecorder.ts index 385847d..6669be9 100644 --- a/src/infrastructure/audio/WebviewAudioRecorder.ts +++ b/src/infrastructure/audio/WebviewAudioRecorder.ts @@ -1,5 +1,6 @@ import * as vscode from 'vscode'; import * as path from 'path'; +import * as fs from 'fs'; import { IAudioRecorder } from '../../application/ports/IAudioRecorder'; import { AudioData } from '../../domain/value-objects/AudioData'; import { getAudioFormatFromMimeType } from '../../domain/value-objects/AudioFormat'; @@ -20,6 +21,15 @@ type WebviewMessage = | { type: 'cancelled' } | { type: 'error'; error: string }; +function isWebviewMessage(value: unknown): value is WebviewMessage { + if (typeof value !== 'object' || value === null || !('type' in value)) { + return false; + } + + const messageType = (value as { type: unknown }).type; + return typeof messageType === 'string'; +} + /** * @deprecated Superseded by {@link NativeAudioRecorder} (ADR-0013). Retained as an * alternative implementation; not wired in `extension.ts`. @@ -47,15 +57,17 @@ export class WebviewAudioRecorder implements IAudioRecorder { // Create or show webview panel if (!this.panel) { this.panel = vscode.window.createWebviewPanel( - 'cursorWhisperRecorder', - 'Cursor Whisper Recorder', + 'promptimizeRecorder', + 'Promptimize Recorder', vscode.ViewColumn.One, { enableScripts: true, retainContextWhenHidden: true, localResourceRoots: [ - vscode.Uri.file(path.join(this.context.extensionPath, 'out', 'infrastructure', 'audio', 'webview')) - ] + vscode.Uri.file( + path.join(this.context.extensionPath, 'out', 'infrastructure', 'audio', 'webview') + ), + ], } ); @@ -64,7 +76,11 @@ export class WebviewAudioRecorder implements IAudioRecorder { // Handle messages from webview this.panel.webview.onDidReceiveMessage( - message => this.handleWebviewMessage(message), + message => { + if (isWebviewMessage(message)) { + this.handleWebviewMessage(message); + } + }, undefined, this.context.subscriptions ); @@ -97,7 +113,6 @@ export class WebviewAudioRecorder implements IAudioRecorder { // Send start command to webview await this.panel.webview.postMessage({ type: 'start' }); - } catch (error) { this.logger.error('Failed to start recording', error as Error); throw new RecordingError( @@ -150,7 +165,7 @@ export class WebviewAudioRecorder implements IAudioRecorder { } this.setState(RecordingState.CANCELLED); - + // Auto-reset after brief delay setTimeout(() => { if (this.state === RecordingState.CANCELLED) { @@ -176,7 +191,7 @@ export class WebviewAudioRecorder implements IAudioRecorder { this.stateListeners.forEach(listener => listener(newState)); } - private async handleWebviewMessage(message: WebviewMessage): Promise { + private handleWebviewMessage(message: WebviewMessage): void { switch (message.type) { case 'ready': this.logger.debug('Webview ready'); @@ -188,7 +203,7 @@ export class WebviewAudioRecorder implements IAudioRecorder { break; case 'audioData': - await this.handleAudioData(message); + this.handleAudioData(message); break; case 'cancelled': @@ -221,7 +236,7 @@ export class WebviewAudioRecorder implements IAudioRecorder { } } - private async handleAudioData(message: Extract): Promise { + private handleAudioData(message: Extract): void { try { const maxBytes = 30 * 1024 * 1024; if (message.data.length > maxBytes) { @@ -231,7 +246,7 @@ export class WebviewAudioRecorder implements IAudioRecorder { this.logger.info('Received audio data from webview', { size: message.data.length, mimeType: message.mimeType, - duration: message.duration + duration: message.duration, }); this.setState(RecordingState.PROCESSING); @@ -248,12 +263,12 @@ export class WebviewAudioRecorder implements IAudioRecorder { buffer, format, 16000, // Sample rate - 1 // Mono + 1 // Mono ); this.logger.info('Audio data processed successfully', { size: audioData.getSizeInMB().toFixed(2) + 'MB', - duration: audioData.getDurationInSeconds().toFixed(2) + 's' + duration: audioData.getDurationInSeconds().toFixed(2) + 's', }); if (this.resolveAudioData) { @@ -263,7 +278,6 @@ export class WebviewAudioRecorder implements IAudioRecorder { } this.setState(RecordingState.IDLE); - } catch (error) { this.logger.error('Failed to process audio data', error as Error); if (this.rejectAudioData) { @@ -286,8 +300,8 @@ export class WebviewAudioRecorder implements IAudioRecorder { reject(new Error('Webview initialization timeout')); }, 5000); - const disposable = this.panel!.webview.onDidReceiveMessage(message => { - if (message.type === 'ready') { + const disposable = this.panel!.webview.onDidReceiveMessage((message: unknown) => { + if (isWebviewMessage(message) && message.type === 'ready') { clearTimeout(timeout); disposable.dispose(); resolve(); @@ -307,7 +321,6 @@ export class WebviewAudioRecorder implements IAudioRecorder { 'recorder.html' ); - const fs = require('fs'); return fs.readFileSync(htmlPath, 'utf8'); } diff --git a/src/infrastructure/configuration/VSCodeConfigRepository.ts b/src/infrastructure/configuration/VSCodeConfigRepository.ts index 164a889..804c2ae 100644 --- a/src/infrastructure/configuration/VSCodeConfigRepository.ts +++ b/src/infrastructure/configuration/VSCodeConfigRepository.ts @@ -1,10 +1,18 @@ import * as vscode from 'vscode'; import { IConfigRepository, Config } from '../../application/ports/IConfigRepository'; import { ConfigError } from '../../domain/errors/ConfigError'; +import { + TransformationProvider, + getProviderSecretKey, + parseTransformationProvider, + PROVIDER_METADATA, +} from '../../domain/value-objects/TransformationProvider'; +import { TRANSFORMATION_SYSTEM_PROMPT } from '../transformation/transformationUtils'; +import { OpenCodePromptTransformer } from '../transformation/OpenCodePromptTransformer'; export class VSCodeConfigRepository implements IConfigRepository { - private static readonly SECTION = 'cursorWhisper'; - private static readonly SECRET_KEY = 'cursor-whisper.openai.apiKey'; + private static readonly SECTION = 'promptimize'; + private static readonly SECRET_KEY = 'promptimize.openai.apiKey'; private static readonly LEGACY_SECRET_KEY = 'openai-api-key'; static readonly DEFAULT_TRANSFORMATION_MODEL = 'gpt-4o'; private callbacks: Array<(config: Config) => void> = []; @@ -13,7 +21,6 @@ export class VSCodeConfigRepository implements IConfigRepository { _context: vscode.ExtensionContext, private readonly secretStorage: vscode.SecretStorage ) { - // Watch for configuration changes vscode.workspace.onDidChangeConfiguration(event => { if (event.affectsConfiguration(VSCodeConfigRepository.SECTION)) { void this.getConfig().then(config => { @@ -26,66 +33,141 @@ export class VSCodeConfigRepository implements IConfigRepository { async getConfig(): Promise { const config = vscode.workspace.getConfiguration(VSCodeConfigRepository.SECTION); - // Get API key from secure storage (migrate legacy key if present) - let apiKey = await this.secretStorage.get(VSCodeConfigRepository.SECRET_KEY); - if (!apiKey) { - const legacyKey = await this.secretStorage.get(VSCodeConfigRepository.LEGACY_SECRET_KEY); - if (legacyKey) { - apiKey = legacyKey; - await this.secretStorage.store(VSCodeConfigRepository.SECRET_KEY, legacyKey); - await this.secretStorage.delete(VSCodeConfigRepository.LEGACY_SECRET_KEY); - } - } + const apiKey = await this.getProviderApiKey(TransformationProvider.OpenAI); return { apiKey, + transformationProvider: parseTransformationProvider( + config.get('transformationProvider'), + TransformationProvider.OpenAI + ), transcriptionLanguage: config.get('transcriptionLanguage', 'auto'), - enablePromptTransformation: config.get('enablePromptTransformation', false), + enablePromptTransformation: config.get('enablePromptTransformation', true), transformationModel: config.get( 'transformationModel', VSCodeConfigRepository.DEFAULT_TRANSFORMATION_MODEL ), + anthropicModel: config.get( + 'anthropicModel', + PROVIDER_METADATA[TransformationProvider.Anthropic].defaultModel + ), + googleModel: config.get( + 'googleModel', + PROVIDER_METADATA[TransformationProvider.Google].defaultModel + ), + azureEndpoint: config.get('azureEndpoint', ''), + azureDeployment: config.get('azureDeployment', ''), + ollamaBaseUrl: config.get('ollamaBaseUrl', 'http://localhost:11434'), + ollamaModel: config.get( + 'ollamaModel', + PROVIDER_METADATA[TransformationProvider.Ollama].defaultModel + ), + openCodeBaseUrl: config.get( + 'openCodeBaseUrl', + OpenCodePromptTransformer.DEFAULT_BASE_URL + ), + openCodeModel: config.get( + 'openCodeModel', + PROVIDER_METADATA[TransformationProvider.OpenCode].defaultModel + ), + openRouterModel: config.get( + 'openRouterModel', + PROVIDER_METADATA[TransformationProvider.OpenRouter].defaultModel + ), + cursorModel: config.get( + 'cursorModel', + PROVIDER_METADATA[TransformationProvider.Cursor].defaultModel + ), audioQuality: config.get<'low' | 'medium' | 'high'>('audioQuality', 'high'), maxRecordingDuration: config.get('maxRecordingDuration', 120), showNotifications: config.get('showNotifications', true), transcriptionHint: config.get('transcriptionHint'), + transformationSystemPrompt: config.get( + 'transformationSystemPrompt', + TRANSFORMATION_SYSTEM_PROMPT + ), }; } - async updateConfig(partialConfig: Partial): Promise { - const config = vscode.workspace.getConfiguration(VSCodeConfigRepository.SECTION); + async getProviderApiKey(provider: TransformationProvider): Promise { + if (provider === TransformationProvider.OpenAI) { + let apiKey = await this.secretStorage.get(getProviderSecretKey(provider)); + if (!apiKey) { + apiKey = await this.secretStorage.get(VSCodeConfigRepository.SECRET_KEY); + } + if (!apiKey) { + const legacyKey = await this.secretStorage.get(VSCodeConfigRepository.LEGACY_SECRET_KEY); + if (legacyKey) { + apiKey = legacyKey; + await this.secretStorage.store(getProviderSecretKey(provider), legacyKey); + await this.secretStorage.store(VSCodeConfigRepository.SECRET_KEY, legacyKey); + await this.secretStorage.delete(VSCodeConfigRepository.LEGACY_SECRET_KEY); + } + } + return apiKey; + } - // Handle API key separately (secure storage) - if (partialConfig.apiKey !== undefined) { - try { - if (partialConfig.apiKey) { - await this.secretStorage.store( - VSCodeConfigRepository.SECRET_KEY, - partialConfig.apiKey - ); + return this.secretStorage.get(getProviderSecretKey(provider)); + } + + async setProviderApiKey( + provider: TransformationProvider, + apiKey: string | undefined + ): Promise { + try { + const secretKey = getProviderSecretKey(provider); + + if (apiKey) { + await this.secretStorage.store(secretKey, apiKey); + if (provider === TransformationProvider.OpenAI) { + await this.secretStorage.store(VSCodeConfigRepository.SECRET_KEY, apiKey); await this.secretStorage.delete(VSCodeConfigRepository.LEGACY_SECRET_KEY); - } else { + } + } else { + await this.secretStorage.delete(secretKey); + if (provider === TransformationProvider.OpenAI) { await this.secretStorage.delete(VSCodeConfigRepository.SECRET_KEY); await this.secretStorage.delete(VSCodeConfigRepository.LEGACY_SECRET_KEY); } - } catch (error) { - throw new ConfigError( - 'Failed to save API key securely. Check your system keychain settings.' - ); } + } catch { + throw new ConfigError( + 'Failed to save API key securely. Check your system keychain settings.' + ); + } + } + + async updateConfig(partialConfig: Partial): Promise { + const config = vscode.workspace.getConfiguration(VSCodeConfigRepository.SECTION); + + if (partialConfig.apiKey !== undefined) { + await this.setProviderApiKey(TransformationProvider.OpenAI, partialConfig.apiKey); } - // Update other settings const updates: Array> = []; - if (partialConfig.transcriptionLanguage !== undefined) { - updates.push( - config.update( - 'transcriptionLanguage', - partialConfig.transcriptionLanguage, - vscode.ConfigurationTarget.Global - ) - ); + const stringFields: Array = [ + 'transformationProvider', + 'transcriptionLanguage', + 'transformationModel', + 'anthropicModel', + 'googleModel', + 'azureEndpoint', + 'azureDeployment', + 'ollamaBaseUrl', + 'ollamaModel', + 'openCodeBaseUrl', + 'openCodeModel', + 'openRouterModel', + 'cursorModel', + 'transformationSystemPrompt', + ]; + + for (const field of stringFields) { + const value = partialConfig[field]; + if (value !== undefined && typeof value === 'string') { + updates.push(config.update(field, value, vscode.ConfigurationTarget.Global)); + } } if (partialConfig.enablePromptTransformation !== undefined) { @@ -98,16 +180,6 @@ export class VSCodeConfigRepository implements IConfigRepository { ); } - if (partialConfig.transformationModel !== undefined) { - updates.push( - config.update( - 'transformationModel', - partialConfig.transformationModel, - vscode.ConfigurationTarget.Global - ) - ); - } - if (partialConfig.audioQuality !== undefined) { updates.push( config.update('audioQuality', partialConfig.audioQuality, vscode.ConfigurationTarget.Global) diff --git a/src/infrastructure/insertion/ChatParticipantInserter.ts b/src/infrastructure/insertion/ChatParticipantInserter.ts index a44e131..957306b 100644 --- a/src/infrastructure/insertion/ChatParticipantInserter.ts +++ b/src/infrastructure/insertion/ChatParticipantInserter.ts @@ -28,10 +28,7 @@ export class ChatParticipantInserter implements ITextInserter { hasCursorComposer: this.hasCursorComposer, }); } catch (error) { - this.logger?.warn( - 'ChatParticipantInserter: Failed to detect chat commands', - error as Error - ); + this.logger?.warn('ChatParticipantInserter: Failed to detect chat commands', error as Error); this.commandsInitialized = true; } } diff --git a/src/infrastructure/insertion/EditorTextInserter.ts b/src/infrastructure/insertion/EditorTextInserter.ts index da20579..1ba46a2 100644 --- a/src/infrastructure/insertion/EditorTextInserter.ts +++ b/src/infrastructure/insertion/EditorTextInserter.ts @@ -21,8 +21,7 @@ export class EditorTextInserter implements ITextInserter { this.logger?.debug('EditorTextInserter: Inserting text', { documentLanguage: editor.document.languageId, - cursorPosition: - editor.selection.active.line + ':' + editor.selection.active.character, + cursorPosition: editor.selection.active.line + ':' + editor.selection.active.character, textLength: text.length, }); diff --git a/src/infrastructure/logging/ConsoleLogger.ts b/src/infrastructure/logging/ConsoleLogger.ts index 5597594..c4e8480 100644 --- a/src/infrastructure/logging/ConsoleLogger.ts +++ b/src/infrastructure/logging/ConsoleLogger.ts @@ -1,3 +1,4 @@ +/* eslint-disable no-console -- ConsoleLogger intentionally writes to the console. */ import { ILogger, LogLevel } from '../../application/ports/ILogger'; /** diff --git a/src/infrastructure/logging/VSCodeOutputChannelLogger.ts b/src/infrastructure/logging/VSCodeOutputChannelLogger.ts index c6ac599..d47e9c8 100644 --- a/src/infrastructure/logging/VSCodeOutputChannelLogger.ts +++ b/src/infrastructure/logging/VSCodeOutputChannelLogger.ts @@ -5,7 +5,7 @@ export class VSCodeOutputChannelLogger implements ILogger { private level: LogLevel = LogLevel.INFO; private outputChannel: vscode.OutputChannel; - constructor(channelName: string = 'Cursor Whisper') { + constructor(channelName: string = 'Promptimize') { this.outputChannel = vscode.window.createOutputChannel(channelName); } diff --git a/src/infrastructure/openai/OpenAIModelService.ts b/src/infrastructure/openai/OpenAIModelService.ts index 8feec91..451bba1 100644 --- a/src/infrastructure/openai/OpenAIModelService.ts +++ b/src/infrastructure/openai/OpenAIModelService.ts @@ -3,7 +3,10 @@ import { ApiKey } from '../../domain/value-objects/ApiKey'; import { ILogger } from '../../application/ports/ILogger'; export class OpenAIModelServiceError extends Error { - constructor(message: string, public readonly cause?: Error) { + constructor( + message: string, + public readonly cause?: Error + ) { super(message); this.name = 'OpenAIModelServiceError'; } diff --git a/src/infrastructure/transformation/AnthropicPromptTransformer.ts b/src/infrastructure/transformation/AnthropicPromptTransformer.ts new file mode 100644 index 0000000..36da365 --- /dev/null +++ b/src/infrastructure/transformation/AnthropicPromptTransformer.ts @@ -0,0 +1,113 @@ +import Anthropic from '@anthropic-ai/sdk'; +import { IPromptTransformer, PromptContext } from '../../application/ports/IPromptTransformer'; +import { TransformedPrompt } from '../../application/dto/TransformedPrompt'; +import { ILogger } from '../../application/ports/ILogger'; +import { + TransformationError, + buildUserPrompt, + calculateImprovements, +} from './transformationUtils'; + +export class AnthropicPromptTransformer implements IPromptTransformer { + private client: Anthropic | null = null; + private cachedApiKey: string | null = null; + static readonly DEFAULT_MODEL = 'claude-3-5-sonnet-20241022'; + + constructor( + private readonly getApiKey: () => Promise, + private readonly getModel: () => Promise, + private readonly getSystemPrompt: () => Promise, + private readonly logger: ILogger + ) {} + + private async ensureClient(): Promise { + const apiKeyStr = await this.getApiKey(); + if (!apiKeyStr) { + throw new TransformationError('Anthropic API key not configured'); + } + + if (this.client && this.cachedApiKey === apiKeyStr) { + return this.client; + } + + this.client = new Anthropic({ apiKey: apiKeyStr }); + this.cachedApiKey = apiKeyStr; + + return this.client; + } + + private async resolveModel(): Promise { + const model = await this.getModel(); + return model || AnthropicPromptTransformer.DEFAULT_MODEL; + } + + async transform(transcription: string, context?: PromptContext): Promise { + this.logger.info('Starting Anthropic prompt transformation', { + textLength: transcription.length, + hasContext: !!context, + }); + + const client = await this.ensureClient(); + const model = await this.resolveModel(); + const systemPrompt = await this.getSystemPrompt(); + const userPrompt = buildUserPrompt(transcription, context); + + try { + const startTime = Date.now(); + + this.logger.debug('Anthropic transformation request', { + model, + promptLength: userPrompt.length, + }); + + const response = await client.messages.create({ + model, + max_tokens: 2000, + system: systemPrompt, + messages: [{ role: 'user', content: userPrompt }], + temperature: 0.3, + }); + + const duration = (Date.now() - startTime) / 1000; + const textBlock = response.content.find(block => block.type === 'text'); + const transformedText = textBlock ? textBlock.text : transcription; + const improvements = calculateImprovements(transcription, transformedText); + + this.logger.info('Anthropic prompt transformation completed', { + model, + duration: duration.toFixed(2) + 's', + originalLength: transcription.length, + transformedLength: transformedText.length, + improvements: improvements.length, + }); + + return { + originalText: transcription, + transformedText, + improvements, + }; + } catch (error) { + this.logger.error('Anthropic prompt transformation failed', error as Error); + + if (error instanceof Anthropic.APIError) { + if (error.status === 401) { + throw new TransformationError('Invalid Anthropic API key', error); + } + if (error.status === 404) { + throw new TransformationError( + `Model '${model}' is not available. Choose another Anthropic model in settings.`, + error + ); + } + if (error.status === 429) { + throw new TransformationError('Rate limit exceeded. Please try again later.', error); + } + } + + throw new TransformationError( + 'Transformation failed', + error instanceof Error ? error : undefined + ); + } + } +} diff --git a/src/infrastructure/transformation/AzureOpenAIPromptTransformer.ts b/src/infrastructure/transformation/AzureOpenAIPromptTransformer.ts new file mode 100644 index 0000000..1850cb7 --- /dev/null +++ b/src/infrastructure/transformation/AzureOpenAIPromptTransformer.ts @@ -0,0 +1,133 @@ +import OpenAI from 'openai'; +import { IPromptTransformer, PromptContext } from '../../application/ports/IPromptTransformer'; +import { TransformedPrompt } from '../../application/dto/TransformedPrompt'; +import { ILogger } from '../../application/ports/ILogger'; +import { + TransformationError, + buildUserPrompt, + calculateImprovements, +} from './transformationUtils'; + +export interface AzureOpenAIConfig { + endpoint: string; + deployment: string; +} + +export class AzureOpenAIPromptTransformer implements IPromptTransformer { + private client: OpenAI | null = null; + private cachedKey: string | null = null; + + constructor( + private readonly getApiKey: () => Promise, + private readonly getAzureConfig: () => Promise, + private readonly getSystemPrompt: () => Promise, + private readonly logger: ILogger + ) {} + + private normalizeEndpoint(endpoint: string): string { + return endpoint.replace(/\/+$/, ''); + } + + private async ensureClient(): Promise<{ client: OpenAI; deployment: string }> { + const apiKeyStr = await this.getApiKey(); + if (!apiKeyStr) { + throw new TransformationError('Azure OpenAI API key not configured'); + } + + const azureConfig = await this.getAzureConfig(); + if (!azureConfig.endpoint.trim()) { + throw new TransformationError('Azure OpenAI endpoint is not configured'); + } + if (!azureConfig.deployment.trim()) { + throw new TransformationError('Azure OpenAI deployment name is not configured'); + } + + const endpoint = this.normalizeEndpoint(azureConfig.endpoint); + const deployment = azureConfig.deployment.trim(); + const cacheKey = `${apiKeyStr}:${endpoint}:${deployment}`; + + if (this.client && this.cachedKey === cacheKey) { + return { client: this.client, deployment }; + } + + this.client = new OpenAI({ + apiKey: apiKeyStr, + baseURL: `${endpoint}/openai/deployments/${deployment}`, + defaultQuery: { 'api-version': '2024-02-15-preview' }, + defaultHeaders: { 'api-key': apiKeyStr }, + }); + this.cachedKey = cacheKey; + + return { client: this.client, deployment }; + } + + async transform(transcription: string, context?: PromptContext): Promise { + this.logger.info('Starting Azure OpenAI prompt transformation', { + textLength: transcription.length, + hasContext: !!context, + }); + + const { client, deployment } = await this.ensureClient(); + const systemPrompt = await this.getSystemPrompt(); + const userPrompt = buildUserPrompt(transcription, context); + + try { + const startTime = Date.now(); + + this.logger.debug('Azure OpenAI transformation request', { + deployment, + promptLength: userPrompt.length, + }); + + const response = await client.chat.completions.create({ + model: deployment, + messages: [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: userPrompt }, + ], + temperature: 0.3, + max_tokens: 2000, + }); + + const duration = (Date.now() - startTime) / 1000; + const transformedText = response.choices[0]?.message?.content || transcription; + const improvements = calculateImprovements(transcription, transformedText); + + this.logger.info('Azure OpenAI prompt transformation completed', { + deployment, + duration: duration.toFixed(2) + 's', + originalLength: transcription.length, + transformedLength: transformedText.length, + improvements: improvements.length, + }); + + return { + originalText: transcription, + transformedText, + improvements, + }; + } catch (error) { + this.logger.error('Azure OpenAI prompt transformation failed', error as Error); + + if (error instanceof OpenAI.APIError) { + if (error.status === 401) { + throw new TransformationError('Invalid Azure OpenAI API key', error); + } + if (error.status === 404) { + throw new TransformationError( + `Deployment '${deployment}' was not found. Check your Azure OpenAI configuration.`, + error + ); + } + if (error.status === 429) { + throw new TransformationError('Rate limit exceeded. Please try again later.', error); + } + } + + throw new TransformationError( + 'Transformation failed', + error instanceof Error ? error : undefined + ); + } + } +} diff --git a/src/infrastructure/transformation/CursorPromptTransformer.ts b/src/infrastructure/transformation/CursorPromptTransformer.ts new file mode 100644 index 0000000..5f46e0f --- /dev/null +++ b/src/infrastructure/transformation/CursorPromptTransformer.ts @@ -0,0 +1,129 @@ +import { Agent } from '@cursor/sdk'; +import { IPromptTransformer, PromptContext } from '../../application/ports/IPromptTransformer'; +import { TransformedPrompt } from '../../application/dto/TransformedPrompt'; +import { ILogger } from '../../application/ports/ILogger'; +import { + TransformationError, + buildUserPrompt, + calculateImprovements, +} from './transformationUtils'; + +export const CURSOR_MODELS = [ + 'composer-2.5', + 'composer-2.5-fast', + 'claude-4.5-sonnet', + 'gpt-5.1', + 'gpt-5.2-codex', +] as const; + +export class CursorPromptTransformer implements IPromptTransformer { + static readonly DEFAULT_MODEL = 'composer-2.5'; + + constructor( + private readonly getApiKey: () => Promise, + private readonly getModel: () => Promise, + private readonly getSystemPrompt: () => Promise, + private readonly logger: ILogger + ) {} + + async transform(transcription: string, context?: PromptContext): Promise { + this.logger.info('Starting Cursor SDK prompt transformation', { + textLength: transcription.length, + hasContext: !!context, + }); + + const apiKey = await this.getApiKey(); + if (!apiKey) { + throw new TransformationError( + 'Cursor API key not configured. Get your key at https://cursor.com/dashboard/integrations' + ); + } + + const model = (await this.getModel()) || CursorPromptTransformer.DEFAULT_MODEL; + const systemPrompt = await this.getSystemPrompt(); + const userPrompt = buildUserPrompt(transcription, context); + + try { + const startTime = Date.now(); + + this.logger.debug('Cursor SDK transformation request', { + model, + promptLength: userPrompt.length, + }); + + const fullPrompt = `${systemPrompt}\n\n${userPrompt}`; + + const result = await Agent.prompt(fullPrompt, { + apiKey, + model: { id: model }, + local: { cwd: process.cwd() }, + }); + + const duration = (Date.now() - startTime) / 1000; + + if (result.status === 'error') { + throw new TransformationError( + 'Cursor agent run failed. Check your API key and network connection.' + ); + } + + const transformedText = result.result?.trim() || transcription; + const improvements = calculateImprovements(transcription, transformedText); + + this.logger.info('Cursor SDK prompt transformation completed', { + model, + duration: duration.toFixed(2) + 's', + status: result.status, + originalLength: transcription.length, + transformedLength: transformedText.length, + improvements: improvements.length, + }); + + return { + originalText: transcription, + transformedText, + improvements, + }; + } catch (error) { + this.logger.error('Cursor SDK prompt transformation failed', error as Error); + + if (error instanceof TransformationError) { + throw error; + } + + if (error instanceof Error) { + const message = error.message.toLowerCase(); + + if (message.includes('api key') || message.includes('auth') || message.includes('401')) { + throw new TransformationError( + 'Invalid Cursor API key. Get your key at https://cursor.com/dashboard/integrations', + error + ); + } + + if (message.includes('model') || message.includes('404')) { + throw new TransformationError( + `Model '${model}' is not available. Try 'composer-2.5', 'claude-4.5-sonnet', or 'gpt-5.1'.`, + error + ); + } + + if ( + message.includes('network') || + message.includes('econnrefused') || + message.includes('timeout') + ) { + throw new TransformationError( + 'Network error connecting to Cursor API. Check your internet connection.', + error + ); + } + } + + throw new TransformationError( + 'Transformation failed', + error instanceof Error ? error : undefined + ); + } + } +} diff --git a/src/infrastructure/transformation/GooglePromptTransformer.ts b/src/infrastructure/transformation/GooglePromptTransformer.ts new file mode 100644 index 0000000..da0a309 --- /dev/null +++ b/src/infrastructure/transformation/GooglePromptTransformer.ts @@ -0,0 +1,110 @@ +import { GoogleGenerativeAI } from '@google/generative-ai'; +import { IPromptTransformer, PromptContext } from '../../application/ports/IPromptTransformer'; +import { TransformedPrompt } from '../../application/dto/TransformedPrompt'; +import { ILogger } from '../../application/ports/ILogger'; +import { + TransformationError, + buildUserPrompt, + calculateImprovements, +} from './transformationUtils'; + +export class GooglePromptTransformer implements IPromptTransformer { + private client: GoogleGenerativeAI | null = null; + private cachedApiKey: string | null = null; + static readonly DEFAULT_MODEL = 'gemini-1.5-pro'; + + constructor( + private readonly getApiKey: () => Promise, + private readonly getModel: () => Promise, + private readonly getSystemPrompt: () => Promise, + private readonly logger: ILogger + ) {} + + private async ensureClient(): Promise { + const apiKeyStr = await this.getApiKey(); + if (!apiKeyStr) { + throw new TransformationError('Google API key not configured'); + } + + if (this.client && this.cachedApiKey === apiKeyStr) { + return this.client; + } + + this.client = new GoogleGenerativeAI(apiKeyStr); + this.cachedApiKey = apiKeyStr; + + return this.client; + } + + private async resolveModel(): Promise { + const model = await this.getModel(); + return model || GooglePromptTransformer.DEFAULT_MODEL; + } + + async transform(transcription: string, context?: PromptContext): Promise { + this.logger.info('Starting Google Gemini prompt transformation', { + textLength: transcription.length, + hasContext: !!context, + }); + + const client = await this.ensureClient(); + const modelName = await this.resolveModel(); + const systemPrompt = await this.getSystemPrompt(); + const userPrompt = buildUserPrompt(transcription, context); + + try { + const startTime = Date.now(); + const model = client.getGenerativeModel({ + model: modelName, + systemInstruction: systemPrompt, + }); + + this.logger.debug('Google Gemini transformation request', { + model: modelName, + promptLength: userPrompt.length, + }); + + const response = await model.generateContent({ + contents: [{ role: 'user', parts: [{ text: userPrompt }] }], + generationConfig: { + temperature: 0.3, + maxOutputTokens: 2000, + }, + }); + + const duration = (Date.now() - startTime) / 1000; + const transformedText = response.response.text() || transcription; + const improvements = calculateImprovements(transcription, transformedText); + + this.logger.info('Google Gemini prompt transformation completed', { + model: modelName, + duration: duration.toFixed(2) + 's', + originalLength: transcription.length, + transformedLength: transformedText.length, + improvements: improvements.length, + }); + + return { + originalText: transcription, + transformedText, + improvements, + }; + } catch (error) { + this.logger.error('Google Gemini prompt transformation failed', error as Error); + + if (error instanceof Error) { + if (error.message.includes('API key not valid')) { + throw new TransformationError('Invalid Google API key', error); + } + if (error.message.includes('429')) { + throw new TransformationError('Rate limit exceeded. Please try again later.', error); + } + } + + throw new TransformationError( + 'Transformation failed', + error instanceof Error ? error : undefined + ); + } + } +} diff --git a/src/infrastructure/transformation/OllamaPromptTransformer.ts b/src/infrastructure/transformation/OllamaPromptTransformer.ts new file mode 100644 index 0000000..2bb453d --- /dev/null +++ b/src/infrastructure/transformation/OllamaPromptTransformer.ts @@ -0,0 +1,138 @@ +import axios from 'axios'; +import { IPromptTransformer, PromptContext } from '../../application/ports/IPromptTransformer'; +import { TransformedPrompt } from '../../application/dto/TransformedPrompt'; +import { ILogger } from '../../application/ports/ILogger'; +import { + TransformationError, + buildUserPrompt, + calculateImprovements, +} from './transformationUtils'; + +export interface OllamaConfig { + baseUrl: string; + model: string; +} + +interface OllamaGenerateResponse { + response?: string; + error?: string; +} + +interface OllamaTagsResponse { + models?: Array<{ name: string }>; +} + +export class OllamaPromptTransformer implements IPromptTransformer { + static readonly DEFAULT_BASE_URL = 'http://localhost:11434'; + static readonly DEFAULT_MODEL = 'llama3.1:8b'; + + constructor( + private readonly getOllamaConfig: () => Promise, + private readonly getSystemPrompt: () => Promise, + private readonly logger: ILogger + ) {} + + private normalizeBaseUrl(baseUrl: string): string { + return baseUrl.replace(/\/+$/, ''); + } + + static async isAvailable(baseUrl: string): Promise { + try { + const normalized = baseUrl.replace(/\/+$/, ''); + const response = await axios.get(`${normalized}/api/tags`, { timeout: 3000 }); + return response.status === 200; + } catch { + return false; + } + } + + static async listModels(baseUrl: string): Promise { + const normalized = baseUrl.replace(/\/+$/, ''); + const response = await axios.get(`${normalized}/api/tags`, { + timeout: 5000, + }); + return (response.data.models ?? []).map(model => model.name).sort(); + } + + async transform(transcription: string, context?: PromptContext): Promise { + this.logger.info('Starting Ollama prompt transformation', { + textLength: transcription.length, + hasContext: !!context, + }); + + const config = await this.getOllamaConfig(); + const baseUrl = this.normalizeBaseUrl( + config.baseUrl || OllamaPromptTransformer.DEFAULT_BASE_URL + ); + const model = config.model || OllamaPromptTransformer.DEFAULT_MODEL; + const systemPrompt = await this.getSystemPrompt(); + const userPrompt = buildUserPrompt(transcription, context); + + try { + const startTime = Date.now(); + + this.logger.debug('Ollama transformation request', { + baseUrl, + model, + promptLength: userPrompt.length, + }); + + const response = await axios.post( + `${baseUrl}/api/generate`, + { + model, + prompt: `${systemPrompt}\n\n${userPrompt}`, + stream: false, + options: { + temperature: 0.3, + }, + }, + { timeout: 120000 } + ); + + if (response.data.error) { + throw new TransformationError(response.data.error); + } + + const duration = (Date.now() - startTime) / 1000; + const transformedText = response.data.response?.trim() || transcription; + const improvements = calculateImprovements(transcription, transformedText); + + this.logger.info('Ollama prompt transformation completed', { + model, + duration: duration.toFixed(2) + 's', + originalLength: transcription.length, + transformedLength: transformedText.length, + improvements: improvements.length, + }); + + return { + originalText: transcription, + transformedText, + improvements, + }; + } catch (error) { + this.logger.error('Ollama prompt transformation failed', error as Error); + + if (axios.isAxiosError(error)) { + if (error.code === 'ECONNREFUSED') { + throw new TransformationError( + `Cannot connect to Ollama at ${baseUrl}. Ensure Ollama is running.`, + error + ); + } + if (error.response?.status === 404) { + throw new TransformationError( + `Model '${model}' was not found in Ollama. Pull it with: ollama pull ${model}`, + error + ); + } + } + + throw new TransformationError( + 'Transformation failed', + error instanceof Error ? error : undefined + ); + } + } +} diff --git a/src/infrastructure/transformation/OpenAIPromptTransformer.ts b/src/infrastructure/transformation/OpenAIPromptTransformer.ts index 06c2621..027d93a 100644 --- a/src/infrastructure/transformation/OpenAIPromptTransformer.ts +++ b/src/infrastructure/transformation/OpenAIPromptTransformer.ts @@ -3,41 +3,21 @@ import { IPromptTransformer, PromptContext } from '../../application/ports/IProm import { TransformedPrompt } from '../../application/dto/TransformedPrompt'; import { ILogger } from '../../application/ports/ILogger'; import { ApiKey } from '../../domain/value-objects/ApiKey'; - -export class TransformationError extends Error { - constructor(message: string, public readonly cause?: Error) { - super(message); - this.name = 'TransformationError'; - } -} +import { + TransformationError, + buildUserPrompt, + calculateImprovements, +} from './transformationUtils'; export class OpenAIPromptTransformer implements IPromptTransformer { private client: OpenAI | null = null; private cachedApiKey: string | null = null; static readonly DEFAULT_MODEL = 'gpt-4o'; - private static readonly SYSTEM_PROMPT = `You are an expert at transforming natural speech into structured, optimized prompts for AI coding assistants. - -Given a voice transcription, transform it into a clear, structured prompt following these rules: - -1. Remove filler words ("um", "uh", "like", etc.) -2. Fix grammar and sentence structure -3. Preserve technical terms exactly -4. Structure into sections when appropriate: - - Context (what's the situation) - - Objective (what needs to be done) - - Requirements (specific needs) - - Constraints (limitations or preferences) - -5. Make it concise but complete -6. Use technical language appropriate for developers -7. Remove redundancy - -Output ONLY the transformed prompt, no explanations.`; - constructor( private readonly getApiKey: () => Promise, private readonly getModel: () => Promise, + private readonly getSystemPrompt: () => Promise, private readonly logger: ILogger ) {} @@ -66,29 +46,20 @@ Output ONLY the transformed prompt, no explanations.`; } async transform(transcription: string, context?: PromptContext): Promise { - this.logger.info('Starting prompt transformation', { + this.logger.info('Starting OpenAI prompt transformation', { textLength: transcription.length, hasContext: !!context, }); const client = await this.ensureClient(); const model = await this.resolveModel(); + const systemPrompt = await this.getSystemPrompt(); + const userPrompt = buildUserPrompt(transcription, context); try { - // Build user prompt with context - let userPrompt = `Transform this voice transcription into a clear, structured prompt:\n\n${transcription}`; - - if (context?.editorLanguage) { - userPrompt += `\n\nContext: User is working in ${context.editorLanguage}`; - } - - if (context?.projectType) { - userPrompt += `\nProject type: ${context.projectType}`; - } - const startTime = Date.now(); - this.logger.debug('GPT transformation request', { + this.logger.debug('OpenAI transformation request', { model, temperature: 0.3, promptLength: userPrompt.length, @@ -97,27 +68,18 @@ Output ONLY the transformed prompt, no explanations.`; const response = await client.chat.completions.create({ model, messages: [ - { - role: 'system', - content: OpenAIPromptTransformer.SYSTEM_PROMPT, - }, - { - role: 'user', - content: userPrompt, - }, + { role: 'system', content: systemPrompt }, + { role: 'user', content: userPrompt }, ], - temperature: 0.3, // Low temperature for consistency + temperature: 0.3, max_tokens: 2000, }); const duration = (Date.now() - startTime) / 1000; - const transformedText = response.choices[0]?.message?.content || transcription; + const improvements = calculateImprovements(transcription, transformedText); - // Calculate improvements (simple heuristics) - const improvements = this.calculateImprovements(transcription, transformedText); - - this.logger.info('Prompt transformation completed', { + this.logger.info('OpenAI prompt transformation completed', { model, duration: duration.toFixed(2) + 's', originalLength: transcription.length, @@ -131,12 +93,12 @@ Output ONLY the transformed prompt, no explanations.`; improvements, }; } catch (error) { - this.logger.error('Prompt transformation failed', error as Error); + this.logger.error('OpenAI prompt transformation failed', error as Error); if (error instanceof OpenAI.APIError) { if (error.status === 404 || error.code === 'model_not_found') { throw new TransformationError( - `Model '${model}' is not available for your API key. Use "Cursor Whisper: Configure Model" to choose another model.`, + `Model '${model}' is not available for your API key. Use "Promptimize: Configure Model" to choose another model.`, error ); } @@ -158,42 +120,4 @@ Output ONLY the transformed prompt, no explanations.`; ); } } - - private calculateImprovements(original: string, transformed: string): string[] { - const improvements: string[] = []; - - // Check if filler words were removed - const fillers = ['um', 'uh', 'like', 'you know', 'basically', 'actually']; - const hadFillers = fillers.some(filler => original.toLowerCase().includes(filler)); - const hasFewerFillers = fillers.every( - filler => - (original.toLowerCase().match(new RegExp(filler, 'g')) || []).length >= - (transformed.toLowerCase().match(new RegExp(filler, 'g')) || []).length - ); - - if (hadFillers && hasFewerFillers) { - improvements.push('Removed filler words'); - } - - // Check if text was shortened - if (transformed.length < original.length * 0.9) { - improvements.push('Made more concise'); - } - - // Check if structure was added - if ( - transformed.includes('Context:') || - transformed.includes('Objective:') || - transformed.includes('Requirements:') - ) { - improvements.push('Added clear structure'); - } - - // Check if grammar was improved (simple heuristic) - if (transformed.split('.').length > original.split('.').length) { - improvements.push('Improved sentence structure'); - } - - return improvements; - } } diff --git a/src/infrastructure/transformation/OpenCodePromptTransformer.ts b/src/infrastructure/transformation/OpenCodePromptTransformer.ts new file mode 100644 index 0000000..6cfe087 --- /dev/null +++ b/src/infrastructure/transformation/OpenCodePromptTransformer.ts @@ -0,0 +1,171 @@ +import axios from 'axios'; +import OpenAI from 'openai'; +import { IPromptTransformer, PromptContext } from '../../application/ports/IPromptTransformer'; +import { TransformedPrompt } from '../../application/dto/TransformedPrompt'; +import { ILogger } from '../../application/ports/ILogger'; +import { + TransformationError, + buildUserPrompt, + calculateImprovements, +} from './transformationUtils'; + +export interface OpenCodeConfig { + baseUrl: string; + model: string; +} + +interface OpenAIModelsResponse { + data?: Array<{ id: string }>; +} + +export class OpenCodePromptTransformer implements IPromptTransformer { + static readonly DEFAULT_BASE_URL = 'http://127.0.0.1:4010/v1'; + + private client: OpenAI | null = null; + private cachedKey: string | null = null; + + constructor( + private readonly getOpenCodeConfig: () => Promise, + private readonly getApiKey: () => Promise, + private readonly getSystemPrompt: () => Promise, + private readonly logger: ILogger + ) {} + + private normalizeBaseUrl(baseUrl: string): string { + return baseUrl.replace(/\/+$/, ''); + } + + static async isAvailable(baseUrl: string, apiKey?: string): Promise { + try { + const normalized = baseUrl.replace(/\/+$/, ''); + const headers = apiKey?.trim() ? { Authorization: `Bearer ${apiKey.trim()}` } : undefined; + const response = await axios.get(`${normalized}/models`, { + timeout: 3000, + headers, + }); + return response.status === 200; + } catch { + return false; + } + } + + static async listModels(baseUrl: string, apiKey?: string): Promise { + const normalized = baseUrl.replace(/\/+$/, ''); + const headers = apiKey?.trim() ? { Authorization: `Bearer ${apiKey.trim()}` } : undefined; + const response = await axios.get(`${normalized}/models`, { + timeout: 5000, + headers, + }); + return (response.data.data ?? []).map(model => model.id).sort(); + } + + private async ensureClient(): Promise<{ client: OpenAI; model: string }> { + const config = await this.getOpenCodeConfig(); + const baseUrl = this.normalizeBaseUrl( + config.baseUrl || OpenCodePromptTransformer.DEFAULT_BASE_URL + ); + const model = config.model.trim(); + if (!model) { + throw new TransformationError('OpenCode model is not configured'); + } + + const apiKey = (await this.getApiKey())?.trim() || 'unused'; + const cacheKey = `${baseUrl}:${model}:${apiKey}`; + + if (this.client && this.cachedKey === cacheKey) { + return { client: this.client, model }; + } + + this.client = new OpenAI({ + apiKey, + baseURL: baseUrl, + }); + this.cachedKey = cacheKey; + + return { client: this.client, model }; + } + + async transform(transcription: string, context?: PromptContext): Promise { + this.logger.info('Starting OpenCode prompt transformation', { + textLength: transcription.length, + hasContext: !!context, + }); + + const { client, model } = await this.ensureClient(); + const systemPrompt = await this.getSystemPrompt(); + const userPrompt = buildUserPrompt(transcription, context); + + try { + const startTime = Date.now(); + + this.logger.debug('OpenCode transformation request', { + model, + promptLength: userPrompt.length, + }); + + const response = await client.chat.completions.create({ + model, + messages: [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: userPrompt }, + ], + temperature: 0.3, + max_tokens: 2000, + }); + + const duration = (Date.now() - startTime) / 1000; + const transformedText = response.choices[0]?.message?.content?.trim() || transcription; + const improvements = calculateImprovements(transcription, transformedText); + + this.logger.info('OpenCode prompt transformation completed', { + model, + duration: duration.toFixed(2) + 's', + originalLength: transcription.length, + transformedLength: transformedText.length, + improvements: improvements.length, + }); + + return { + originalText: transcription, + transformedText, + improvements, + }; + } catch (error) { + this.logger.error('OpenCode prompt transformation failed', error as Error); + + if (error instanceof OpenAI.APIError) { + if (error.status === 401) { + throw new TransformationError('Invalid OpenCode proxy authentication token', error); + } + if (error.status === 404) { + throw new TransformationError( + `Model '${model}' was not found. Check available models via GET /v1/models on your OpenCode proxy.`, + error + ); + } + if (error.status === 429) { + throw new TransformationError('Rate limit exceeded. Please try again later.', error); + } + } + + if (error instanceof Error) { + const message = error.message.toLowerCase(); + if (message.includes('econnrefused') || message.includes('connect')) { + const config = await this.getOpenCodeConfig(); + const baseUrl = this.normalizeBaseUrl( + config.baseUrl || OpenCodePromptTransformer.DEFAULT_BASE_URL + ); + throw new TransformationError( + `OpenCode server not running or not reachable at ${baseUrl}. Ensure opencode-llm-proxy is installed and running.`, + error + ); + } + } + + throw new TransformationError( + 'Transformation failed', + error instanceof Error ? error : undefined + ); + } + } +} diff --git a/src/infrastructure/transformation/OpenRouterPromptTransformer.ts b/src/infrastructure/transformation/OpenRouterPromptTransformer.ts new file mode 100644 index 0000000..54a6662 --- /dev/null +++ b/src/infrastructure/transformation/OpenRouterPromptTransformer.ts @@ -0,0 +1,147 @@ +import axios from 'axios'; +import OpenAI from 'openai'; +import { IPromptTransformer, PromptContext } from '../../application/ports/IPromptTransformer'; +import { TransformedPrompt } from '../../application/dto/TransformedPrompt'; +import { ILogger } from '../../application/ports/ILogger'; +import { ApiKey } from '../../domain/value-objects/ApiKey'; +import { + TransformationError, + buildUserPrompt, + calculateImprovements, +} from './transformationUtils'; + +interface OpenRouterModelsResponse { + data?: Array<{ id: string }>; +} + +export class OpenRouterPromptTransformer implements IPromptTransformer { + static readonly BASE_URL = 'https://openrouter.ai/api/v1'; + static readonly DEFAULT_MODEL = 'openai/gpt-4o'; + static readonly APP_TITLE = 'Promptimize'; + + private client: OpenAI | null = null; + private cachedKey: string | null = null; + + constructor( + private readonly getApiKey: () => Promise, + private readonly getModel: () => Promise, + private readonly getSystemPrompt: () => Promise, + private readonly logger: ILogger + ) {} + + private async ensureClient(): Promise<{ client: OpenAI; model: string }> { + const apiKeyStr = await this.getApiKey(); + if (!apiKeyStr) { + throw new TransformationError('OpenRouter API key not configured'); + } + + const model = (await this.getModel()) || OpenRouterPromptTransformer.DEFAULT_MODEL; + const cacheKey = `${apiKeyStr}:${model}`; + + if (this.client && this.cachedKey === cacheKey) { + return { client: this.client, model }; + } + + const apiKey = new ApiKey(apiKeyStr); + this.client = new OpenAI({ + apiKey: apiKey.toString(), + baseURL: OpenRouterPromptTransformer.BASE_URL, + defaultHeaders: { + 'HTTP-Referer': 'https://github.com/vypdev/cursor-whisper', + 'X-OpenRouter-Title': OpenRouterPromptTransformer.APP_TITLE, + }, + }); + this.cachedKey = cacheKey; + + return { client: this.client, model }; + } + + static async listModels(apiKey: string): Promise { + const response = await axios.get( + `${OpenRouterPromptTransformer.BASE_URL}/models`, + { + timeout: 10000, + headers: { + Authorization: `Bearer ${apiKey}`, + }, + } + ); + return (response.data.data ?? []).map(model => model.id).sort(); + } + + async transform(transcription: string, context?: PromptContext): Promise { + this.logger.info('Starting OpenRouter prompt transformation', { + textLength: transcription.length, + hasContext: !!context, + }); + + const { client, model } = await this.ensureClient(); + const systemPrompt = await this.getSystemPrompt(); + const userPrompt = buildUserPrompt(transcription, context); + + try { + const startTime = Date.now(); + + this.logger.debug('OpenRouter transformation request', { + model, + promptLength: userPrompt.length, + }); + + const response = await client.chat.completions.create({ + model, + messages: [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: userPrompt }, + ], + temperature: 0.3, + max_tokens: 2000, + }); + + const duration = (Date.now() - startTime) / 1000; + const transformedText = response.choices[0]?.message?.content?.trim() || transcription; + const improvements = calculateImprovements(transcription, transformedText); + + this.logger.info('OpenRouter prompt transformation completed', { + model, + duration: duration.toFixed(2) + 's', + originalLength: transcription.length, + transformedLength: transformedText.length, + improvements: improvements.length, + }); + + return { + originalText: transcription, + transformedText, + improvements, + }; + } catch (error) { + this.logger.error('OpenRouter prompt transformation failed', error as Error); + + if (error instanceof OpenAI.APIError) { + if (error.status === 401) { + throw new TransformationError('Invalid OpenRouter API key', error); + } + if (error.status === 402) { + throw new TransformationError('Insufficient OpenRouter credits', error); + } + if (error.status === 404) { + throw new TransformationError( + `Model '${model}' is not available on OpenRouter.`, + error + ); + } + if (error.status === 429) { + throw new TransformationError('Rate limit exceeded. Please try again later.', error); + } + if (error.status === 502) { + throw new TransformationError('Upstream provider unavailable', error); + } + } + + throw new TransformationError( + 'Transformation failed', + error instanceof Error ? error : undefined + ); + } + } +} diff --git a/src/infrastructure/transformation/PromptTransformerFactory.ts b/src/infrastructure/transformation/PromptTransformerFactory.ts new file mode 100644 index 0000000..4e82d4b --- /dev/null +++ b/src/infrastructure/transformation/PromptTransformerFactory.ts @@ -0,0 +1,190 @@ +import { TransformedPrompt } from '../../application/dto/TransformedPrompt'; +import { IPromptTransformer } from '../../application/ports/IPromptTransformer'; +import { ITransformationProviderValidator } from '../../application/ports/ITransformationProviderValidator'; +import { IConfigRepository } from '../../application/ports/IConfigRepository'; +import { ILogger } from '../../application/ports/ILogger'; +import { + TransformationProvider, + PROVIDER_METADATA, +} from '../../domain/value-objects/TransformationProvider'; +import { OpenAIPromptTransformer } from './OpenAIPromptTransformer'; +import { AnthropicPromptTransformer } from './AnthropicPromptTransformer'; +import { GooglePromptTransformer } from './GooglePromptTransformer'; +import { AzureOpenAIPromptTransformer } from './AzureOpenAIPromptTransformer'; +import { OllamaPromptTransformer } from './OllamaPromptTransformer'; +import { OpenCodePromptTransformer } from './OpenCodePromptTransformer'; +import { OpenRouterPromptTransformer } from './OpenRouterPromptTransformer'; +import { CursorPromptTransformer } from './CursorPromptTransformer'; +import { TransformationError, getSystemPrompt } from './transformationUtils'; + +export class PromptTransformerFactory implements ITransformationProviderValidator { + constructor( + private readonly configRepo: IConfigRepository, + private readonly logger: ILogger + ) {} + + async create(): Promise { + const config = await this.configRepo.getConfig(); + return this.createForProvider(config.transformationProvider); + } + + createForProvider(provider: TransformationProvider): IPromptTransformer { + const resolveSystemPrompt = () => + this.configRepo.getConfig().then(config => getSystemPrompt(config)); + + switch (provider) { + case TransformationProvider.OpenAI: + return new OpenAIPromptTransformer( + () => this.configRepo.getProviderApiKey(TransformationProvider.OpenAI), + () => this.configRepo.getConfig().then(c => c.transformationModel), + resolveSystemPrompt, + this.logger + ); + + case TransformationProvider.Anthropic: + return new AnthropicPromptTransformer( + () => this.configRepo.getProviderApiKey(TransformationProvider.Anthropic), + () => this.configRepo.getConfig().then(c => c.anthropicModel), + resolveSystemPrompt, + this.logger + ); + + case TransformationProvider.Google: + return new GooglePromptTransformer( + () => this.configRepo.getProviderApiKey(TransformationProvider.Google), + () => this.configRepo.getConfig().then(c => c.googleModel), + resolveSystemPrompt, + this.logger + ); + + case TransformationProvider.Azure: + return new AzureOpenAIPromptTransformer( + () => this.configRepo.getProviderApiKey(TransformationProvider.Azure), + async () => { + const config = await this.configRepo.getConfig(); + return { + endpoint: config.azureEndpoint, + deployment: config.azureDeployment, + }; + }, + resolveSystemPrompt, + this.logger + ); + + case TransformationProvider.Ollama: + return new OllamaPromptTransformer( + async () => { + const config = await this.configRepo.getConfig(); + return { + baseUrl: config.ollamaBaseUrl, + model: config.ollamaModel, + }; + }, + resolveSystemPrompt, + this.logger + ); + + case TransformationProvider.OpenCode: + return new OpenCodePromptTransformer( + async () => { + const config = await this.configRepo.getConfig(); + return { + baseUrl: config.openCodeBaseUrl, + model: config.openCodeModel, + }; + }, + () => this.configRepo.getProviderApiKey(TransformationProvider.OpenCode), + resolveSystemPrompt, + this.logger + ); + + case TransformationProvider.OpenRouter: + return new OpenRouterPromptTransformer( + () => this.configRepo.getProviderApiKey(TransformationProvider.OpenRouter), + () => this.configRepo.getConfig().then(c => c.openRouterModel), + resolveSystemPrompt, + this.logger + ); + + case TransformationProvider.Cursor: + return new CursorPromptTransformer( + () => this.configRepo.getProviderApiKey(TransformationProvider.Cursor), + () => this.configRepo.getConfig().then(c => c.cursorModel), + resolveSystemPrompt, + this.logger + ); + + default: { + const exhaustiveCheck: never = provider; + throw new TransformationError( + `Unsupported transformation provider: ${String(exhaustiveCheck)}` + ); + } + } + } + + async validateProvider(provider: TransformationProvider): Promise { + const metadata = PROVIDER_METADATA[provider]; + + if (metadata.requiresApiKey) { + const apiKey = await this.configRepo.getProviderApiKey(provider); + if (!apiKey) { + return `${metadata.displayName} API key is not configured.`; + } + } + + if (provider === TransformationProvider.Azure) { + const config = await this.configRepo.getConfig(); + if (!config.azureEndpoint.trim()) { + return 'Azure OpenAI endpoint is not configured.'; + } + if (!config.azureDeployment.trim()) { + return 'Azure OpenAI deployment name is not configured.'; + } + } + + if (provider === TransformationProvider.Ollama) { + const available = await OllamaPromptTransformer.isAvailable( + (await this.configRepo.getConfig()).ollamaBaseUrl + ); + if (!available) { + return 'Ollama server is not reachable. Ensure Ollama is running locally.'; + } + } + + if (provider === TransformationProvider.OpenCode) { + const config = await this.configRepo.getConfig(); + if (!config.openCodeBaseUrl.trim()) { + return 'OpenCode proxy base URL is not configured.'; + } + if (!config.openCodeModel.trim()) { + return 'OpenCode model is not configured.'; + } + const apiKey = await this.configRepo.getProviderApiKey(TransformationProvider.OpenCode); + const available = await OpenCodePromptTransformer.isAvailable( + config.openCodeBaseUrl, + apiKey + ); + if (!available) { + return 'OpenCode proxy is not reachable. Ensure opencode-llm-proxy is installed and running.'; + } + } + + return undefined; + } +} + +/** + * Resolves the active prompt transformer from configuration on each call. + */ +export class ConfigurablePromptTransformer implements IPromptTransformer { + constructor(private readonly factory: PromptTransformerFactory) {} + + async transform( + transcription: string, + context?: import('../../application/ports/IPromptTransformer').PromptContext + ): Promise { + const transformer = await this.factory.create(); + return transformer.transform(transcription, context); + } +} diff --git a/src/infrastructure/transformation/transformationUtils.ts b/src/infrastructure/transformation/transformationUtils.ts new file mode 100644 index 0000000..9053f37 --- /dev/null +++ b/src/infrastructure/transformation/transformationUtils.ts @@ -0,0 +1,126 @@ +import { Config } from '../../application/ports/IConfigRepository'; +import { PromptContext } from '../../application/ports/IPromptTransformer'; + +export class TransformationError extends Error { + constructor( + message: string, + public readonly cause?: Error + ) { + super(message); + this.name = 'TransformationError'; + } +} + +export const TRANSFORMATION_SYSTEM_PROMPT = `You are an expert prompt engineer specialized in transforming raw developer voice transcriptions into highly effective prompts for AI coding assistants such as Cursor, Claude, Copilot, and ChatGPT. + +Your task is to convert spoken, unstructured developer input into a concise, technically precise, execution-oriented prompt. + +Rules: + +1. Clean the transcription + - Remove filler words, hesitations, repetitions, and verbal noise + - Fix grammar and sentence structure + - Preserve the original intent + - Preserve all technical terminology, APIs, framework names, libraries, file names, variables, and code references exactly as spoken + +2. Infer developer intent + - Detect the actual engineering goal behind the transcription + - Resolve fragmented speech into coherent technical instructions + - Preserve implicit requirements when clearly inferred from context + - Do NOT invent features, requirements, or assumptions not supported by the transcription + +3. Optimize for AI coding assistants + - Make the prompt actionable and implementation-focused + - Convert vague requests into precise engineering tasks when possible + - Prioritize clarity, execution order, and technical accuracy + - Encourage maintainable, production-grade solutions unless explicitly stated otherwise + +4. Structure intelligently + - Only use sections when they improve clarity + - Possible sections include: + - Context + - Objective + - Requirements + - Constraints + - Expected Output + - Technical Notes + +5. Preserve important engineering constraints + - Maintain architecture preferences + - Preserve mentioned technologies and stack decisions + - Preserve performance, security, scalability, UX, DX, or maintainability concerns + - Preserve coding style preferences if mentioned + +6. Improve readability + - Remove redundancy + - Shorten unnecessary wording + - Use professional technical language + - Prefer bullet points when useful + - Keep the final prompt dense with useful information + +7. Output rules + - Output ONLY the final optimized prompt + - Do NOT explain your changes + - Do NOT add commentary + - Do NOT wrap the output in markdown + - Do NOT add quotation marks + +Additional behavior: +- If the transcription is already clear, improve it minimally +- If the transcription is incomplete, produce the best technically coherent prompt possible without asking questions +- If multiple tasks are mentioned, organize them logically by priority or execution order +- Prefer explicit engineering instructions over conversational phrasing +- Optimize prompts for implementation quality, not just readability`; + +export function getSystemPrompt(config: Pick): string { + const prompt = config.transformationSystemPrompt?.trim(); + return prompt || TRANSFORMATION_SYSTEM_PROMPT; +} + +export function buildUserPrompt(transcription: string, context?: PromptContext): string { + let userPrompt = `Transform this voice transcription into a clear, structured prompt:\n\n${transcription}`; + + if (context?.editorLanguage) { + userPrompt += `\n\nContext: User is working in ${context.editorLanguage}`; + } + + if (context?.projectType) { + userPrompt += `\nProject type: ${context.projectType}`; + } + + return userPrompt; +} + +export function calculateImprovements(original: string, transformed: string): string[] { + const improvements: string[] = []; + + const fillers = ['um', 'uh', 'like', 'you know', 'basically', 'actually']; + const hadFillers = fillers.some(filler => original.toLowerCase().includes(filler)); + const hasFewerFillers = fillers.every( + filler => + (original.toLowerCase().match(new RegExp(filler, 'g')) || []).length >= + (transformed.toLowerCase().match(new RegExp(filler, 'g')) || []).length + ); + + if (hadFillers && hasFewerFillers) { + improvements.push('Removed filler words'); + } + + if (transformed.length < original.length * 0.9) { + improvements.push('Made more concise'); + } + + if ( + transformed.includes('Context:') || + transformed.includes('Objective:') || + transformed.includes('Requirements:') + ) { + improvements.push('Added clear structure'); + } + + if (transformed.split('.').length > original.split('.').length) { + improvements.push('Improved sentence structure'); + } + + return improvements; +} diff --git a/src/presentation/commands/CancelRecordingCommand.ts b/src/presentation/commands/CancelRecordingCommand.ts index 3be6c49..b120f31 100644 --- a/src/presentation/commands/CancelRecordingCommand.ts +++ b/src/presentation/commands/CancelRecordingCommand.ts @@ -1,14 +1,16 @@ import * as vscode from 'vscode'; import { CancelRecordingUseCase } from '../../application/use-cases/CancelRecordingUseCase'; import { RecordingError } from '../../domain/errors/RecordingError'; +import { setRecordingSessionMode } from '../../shared/services/RecordingSessionMode'; export function registerCancelRecordingCommand( _context: vscode.ExtensionContext, useCase: CancelRecordingUseCase ): vscode.Disposable { - return vscode.commands.registerCommand('cursor-whisper.cancelRecording', async () => { + return vscode.commands.registerCommand('promptimize.cancelRecording', async () => { try { - await useCase.execute(); + useCase.execute(); + setRecordingSessionMode(null); await vscode.window.showInformationMessage('Recording cancelled'); } catch (error) { const message = diff --git a/src/presentation/commands/ConfigureApiKeyCommand.ts b/src/presentation/commands/ConfigureApiKeyCommand.ts index b97e791..82fc083 100644 --- a/src/presentation/commands/ConfigureApiKeyCommand.ts +++ b/src/presentation/commands/ConfigureApiKeyCommand.ts @@ -1,17 +1,23 @@ import * as vscode from 'vscode'; import { IConfigRepository } from '../../application/ports/IConfigRepository'; import { ApiKey } from '../../domain/value-objects/ApiKey'; +import { + OPENAI_API_KEY_DETAIL, + OPENAI_API_KEY_PROMPT, + OPENAI_API_KEY_SUCCESS, +} from '../../shared/constants/uxMessages'; export function registerConfigureApiKeyCommand( _context: vscode.ExtensionContext, configRepo: IConfigRepository ): vscode.Disposable { - return vscode.commands.registerCommand('cursor-whisper.configureApiKey', async () => { + return vscode.commands.registerCommand('promptimize.configureApiKey', async () => { const apiKey = await vscode.window.showInputBox({ - prompt: 'Enter your OpenAI API Key', + title: 'Configure OpenAI API Key (Required for Whisper)', + prompt: OPENAI_API_KEY_PROMPT, password: true, placeHolder: 'sk-...', - validateInput: (value) => { + validateInput: value => { try { new ApiKey(value); return null; @@ -24,10 +30,12 @@ export function registerConfigureApiKeyCommand( if (apiKey) { try { await configRepo.updateConfig({ apiKey }); - await vscode.window.showInformationMessage('API Key configured successfully'); + await vscode.window.showInformationMessage(OPENAI_API_KEY_SUCCESS, { + detail: OPENAI_API_KEY_DETAIL, + }); } catch (error) { await vscode.window.showErrorMessage( - `Failed to save API Key: ${error instanceof Error ? error.message : 'Unknown error'}` + `Failed to save OpenAI API key: ${error instanceof Error ? error.message : 'Unknown error'}` ); } } diff --git a/src/presentation/commands/ConfigureModelCommand.ts b/src/presentation/commands/ConfigureModelCommand.ts index bb579fb..407b842 100644 --- a/src/presentation/commands/ConfigureModelCommand.ts +++ b/src/presentation/commands/ConfigureModelCommand.ts @@ -5,25 +5,51 @@ import { OpenAIModelService, OpenAIModelServiceError, } from '../../infrastructure/openai/OpenAIModelService'; +import { PromptTransformerFactory } from '../../infrastructure/transformation/PromptTransformerFactory'; import { ILogger } from '../../application/ports/ILogger'; +import { + TransformationProvider, + PROVIDER_METADATA, +} from '../../domain/value-objects/TransformationProvider'; export function registerConfigureModelCommand( _context: vscode.ExtensionContext, configRepo: IConfigRepository, modelService: OpenAIModelService, + transformerFactory: PromptTransformerFactory, logger: ILogger ): vscode.Disposable { - return vscode.commands.registerCommand('cursor-whisper.configureModel', async () => { + return vscode.commands.registerCommand('promptimize.configureModel', async () => { const config = await configRepo.getConfig(); + const provider = config.transformationProvider; + const providerMeta = PROVIDER_METADATA[provider]; + + if (provider !== TransformationProvider.OpenAI) { + const switchProvider = await vscode.window.showInformationMessage( + `Current optimization provider is ${providerMeta.displayName}. This command configures OpenAI models only. Whisper transcription always uses OpenAI separately.`, + 'Configure Provider', + 'Open Settings' + ); + + if (switchProvider === 'Configure Provider') { + await vscode.commands.executeCommand('promptimize.configureTransformationProvider'); + } else if (switchProvider === 'Open Settings') { + await vscode.commands.executeCommand( + 'workbench.action.openSettings', + 'promptimize.transformationProvider' + ); + } + return; + } if (!config.apiKey) { const configureKey = await vscode.window.showWarningMessage( - 'Cursor Whisper: Configure your OpenAI API key before selecting a model.', + 'Configure your OpenAI API key first. It is required for Whisper transcription and OpenAI optimization.', 'Configure API Key' ); if (configureKey === 'Configure API Key') { - await vscode.commands.executeCommand('cursor-whisper.configureApiKey'); + await vscode.commands.executeCommand('promptimize.configureApiKey'); } return; } @@ -61,7 +87,7 @@ export function registerConfigureModelCommand( })), { placeHolder: 'Select a GPT model for prompt transformation', - title: 'Cursor Whisper: Configure Model', + title: 'Promptimize: Configure Model', } ); @@ -72,8 +98,16 @@ export function registerConfigureModelCommand( await configRepo.updateConfig({ transformationModel: selection.label }); logger.info('Transformation model updated', { model: selection.label }); + const validationError = await transformerFactory.validateProvider( + TransformationProvider.OpenAI + ); + if (validationError) { + await vscode.window.showWarningMessage(validationError); + return; + } + await vscode.window.showInformationMessage( - `Prompt transformation model set to ${selection.label}` + `Prompt transformation model set to ${selection.label} (OpenAI optimization; Whisper transcription unchanged).` ); } catch (error) { const message = diff --git a/src/presentation/commands/ConfigureTransformationProviderCommand.ts b/src/presentation/commands/ConfigureTransformationProviderCommand.ts new file mode 100644 index 0000000..1203eae --- /dev/null +++ b/src/presentation/commands/ConfigureTransformationProviderCommand.ts @@ -0,0 +1,114 @@ +import * as vscode from 'vscode'; +import { IConfigRepository } from '../../application/ports/IConfigRepository'; +import { ILogger } from '../../application/ports/ILogger'; +import { PromptTransformerFactory } from '../../infrastructure/transformation/PromptTransformerFactory'; +import { + OpenAIModelService, + OpenAIModelServiceError, +} from '../../infrastructure/openai/OpenAIModelService'; +import { PROVIDER_METADATA } from '../../domain/value-objects/TransformationProvider'; +import { TransformationProvider } from '../../domain/value-objects/TransformationProvider'; +import { + applyProviderConfiguration, + configureProviderCredentials, + confirmOptimizationIntro, + selectModelForProvider, + selectTransformationProvider, +} from '../setup/providerConfigurationFlow'; + +export function registerConfigureTransformationProviderCommand( + _context: vscode.ExtensionContext, + configRepo: IConfigRepository, + transformerFactory: PromptTransformerFactory, + modelService: OpenAIModelService, + logger: ILogger +): vscode.Disposable { + return vscode.commands.registerCommand( + 'promptimize.configureTransformationProvider', + async () => { + const config = await configRepo.getConfig(); + + const proceed = await confirmOptimizationIntro(); + if (!proceed) { + return; + } + + const provider = await selectTransformationProvider(config.transformationProvider); + if (!provider) { + return; + } + + const configured = await configureProviderCredentials(provider, configRepo); + if (!configured) { + return; + } + + if ( + provider === TransformationProvider.OpenAI && + !(await configRepo.getProviderApiKey(provider)) + ) { + await vscode.window.showWarningMessage( + 'Configure your OpenAI API key first. It is required for Whisper transcription and OpenAI optimization.' + ); + return; + } + + let selectedModel: string | undefined; + try { + selectedModel = await vscode.window.withProgress( + { + location: vscode.ProgressLocation.Notification, + title: `Loading models for ${PROVIDER_METADATA[provider].displayName}...`, + cancellable: false, + }, + async () => selectModelForProvider(provider, configRepo, modelService, logger) + ); + } catch (error) { + const message = + error instanceof OpenAIModelServiceError + ? error.message + : error instanceof Error + ? error.message + : 'Unknown error'; + await vscode.window.showErrorMessage(`Failed to load models: ${message}`); + return; + } + + const applied = await applyProviderConfiguration( + provider, + selectedModel, + configRepo, + transformerFactory + ); + + const metadata = PROVIDER_METADATA[provider]; + if (!applied.success) { + await vscode.window.showWarningMessage( + `Provider set to ${metadata.displayName}, but configuration is incomplete: ${applied.message}` + ); + return; + } + + logger.info('Transformation provider updated', { provider, model: selectedModel }); + await vscode.window + .showInformationMessage( + `Prompt optimization provider set to ${metadata.displayName}${ + selectedModel ? ` (${selectedModel})` : '' + }. Whisper transcription still uses OpenAI.`, + 'Test Optimization', + 'Learn About Providers' + ) + .then(async selection => { + if (selection === 'Test Optimization') { + await vscode.commands.executeCommand('promptimize.testTransformation'); + } else if (selection === 'Learn About Providers') { + await vscode.env.openExternal( + vscode.Uri.parse( + 'https://github.com/vypdev/cursor-whisper/blob/master/docs/configuration/README.md' + ) + ); + } + }); + } + ); +} diff --git a/src/presentation/commands/FirstTimeSetupCommand.ts b/src/presentation/commands/FirstTimeSetupCommand.ts new file mode 100644 index 0000000..06c2014 --- /dev/null +++ b/src/presentation/commands/FirstTimeSetupCommand.ts @@ -0,0 +1,51 @@ +import * as vscode from 'vscode'; +import { IConfigRepository } from '../../application/ports/IConfigRepository'; +import { ILogger } from '../../application/ports/ILogger'; +import { IPromptTransformer } from '../../application/ports/IPromptTransformer'; +import { PromptTransformerFactory } from '../../infrastructure/transformation/PromptTransformerFactory'; +import { OpenAIModelService } from '../../infrastructure/openai/OpenAIModelService'; +import { + TransformationProvider, + PROVIDER_METADATA, +} from '../../domain/value-objects/TransformationProvider'; + +export function registerFirstTimeSetupCommand( + _context: vscode.ExtensionContext, + _configRepo: IConfigRepository, + _transformerFactory: PromptTransformerFactory, + _modelService: OpenAIModelService, + _promptTransformer: IPromptTransformer, + _logger: ILogger +): vscode.Disposable { + + return vscode.commands.registerCommand('promptimize.firstTimeSetup', async () => { + await vscode.commands.executeCommand('promptimize.openConfigurationPanel'); + }); +} + +export async function getSetupChecklist( + configRepo: IConfigRepository +): Promise> { + const config = await configRepo.getConfig(); + const openAiKey = await configRepo.getProviderApiKey(TransformationProvider.OpenAI); + + const items: Array<{ label: string; complete: boolean }> = [ + { label: 'Extension installed', complete: true }, + { label: 'OpenAI API key configured (Whisper)', complete: Boolean(openAiKey) }, + ]; + + if (config.enablePromptTransformation) { + const providerMeta = PROVIDER_METADATA[config.transformationProvider]; + const providerKey = providerMeta.requiresApiKey + ? await configRepo.getProviderApiKey(config.transformationProvider) + : true; + items.push({ + label: `Optimization provider configured (${providerMeta.displayName})`, + complete: Boolean(providerKey), + }); + } else { + items.push({ label: 'Prompt optimization configured (disabled)', complete: true }); + } + + return items; +} diff --git a/src/presentation/commands/OpenConfigurationPanelCommand.ts b/src/presentation/commands/OpenConfigurationPanelCommand.ts new file mode 100644 index 0000000..76e623d --- /dev/null +++ b/src/presentation/commands/OpenConfigurationPanelCommand.ts @@ -0,0 +1,27 @@ +import * as vscode from 'vscode'; +import { IConfigRepository } from '../../application/ports/IConfigRepository'; +import { ILogger } from '../../application/ports/ILogger'; +import { IPromptTransformer } from '../../application/ports/IPromptTransformer'; +import { PromptTransformerFactory } from '../../infrastructure/transformation/PromptTransformerFactory'; +import { OpenAIModelService } from '../../infrastructure/openai/OpenAIModelService'; +import { ConfigurationPanel } from '../webview/ConfigurationPanel'; + +export function registerOpenConfigurationPanelCommand( + context: vscode.ExtensionContext, + configRepo: IConfigRepository, + transformerFactory: PromptTransformerFactory, + modelService: OpenAIModelService, + promptTransformer: IPromptTransformer, + logger: ILogger +): vscode.Disposable { + return vscode.commands.registerCommand('promptimize.openConfigurationPanel', () => { + ConfigurationPanel.render( + context, + configRepo, + transformerFactory, + modelService, + promptTransformer, + logger + ); + }); +} diff --git a/src/presentation/commands/StartPromptimizeRecordingCommand.ts b/src/presentation/commands/StartPromptimizeRecordingCommand.ts new file mode 100644 index 0000000..a2cc82e --- /dev/null +++ b/src/presentation/commands/StartPromptimizeRecordingCommand.ts @@ -0,0 +1,44 @@ +import * as vscode from 'vscode'; +import { StartRecordingUseCase } from '../../application/use-cases/StartRecordingUseCase'; +import { IConfigRepository } from '../../application/ports/IConfigRepository'; +import { ITransformationProviderValidator } from '../../application/ports/ITransformationProviderValidator'; +import { validateConfigurationForPromptimize } from '../../application/services/ConfigurationValidationService'; +import { PermissionError } from '../../domain/errors/PermissionError'; +import { RecordingError } from '../../domain/errors/RecordingError'; + +export function registerStartPromptimizeRecordingCommand( + _context: vscode.ExtensionContext, + configRepo: IConfigRepository, + providerValidator: ITransformationProviderValidator, + useCase: StartRecordingUseCase +): vscode.Disposable { + return vscode.commands.registerCommand('promptimize.startPromptimizeRecording', async () => { + try { + const validationIssue = await validateConfigurationForPromptimize( + configRepo, + providerValidator + ); + + if (validationIssue) { + await vscode.commands.executeCommand('promptimize.openConfigurationPanel'); + return; + } + + await useCase.execute('promptimize'); + await vscode.window.showInformationMessage('Recording started'); + } catch (error) { + if (error instanceof PermissionError) { + await vscode.window.showErrorMessage( + 'Microphone permission denied. Please check system settings.', + 'OK' + ); + } else if (error instanceof RecordingError) { + await vscode.window.showErrorMessage(`Recording failed: ${error.message}`); + } else { + await vscode.window.showErrorMessage( + `Unexpected error: ${error instanceof Error ? error.message : 'Unknown error'}` + ); + } + } + }); +} diff --git a/src/presentation/commands/StartRecordingCommand.ts b/src/presentation/commands/StartRecordingCommand.ts index 7a61831..d2c23d3 100644 --- a/src/presentation/commands/StartRecordingCommand.ts +++ b/src/presentation/commands/StartRecordingCommand.ts @@ -1,27 +1,48 @@ import * as vscode from 'vscode'; import { StartRecordingUseCase } from '../../application/use-cases/StartRecordingUseCase'; +import { IConfigRepository } from '../../application/ports/IConfigRepository'; +import { ITransformationProviderValidator } from '../../application/ports/ITransformationProviderValidator'; +import { validateConfigurationForPromptimize } from '../../application/services/ConfigurationValidationService'; import { PermissionError } from '../../domain/errors/PermissionError'; import { ConfigError, MissingApiKeyError } from '../../domain/errors/ConfigError'; import { RecordingError } from '../../domain/errors/RecordingError'; export function registerStartRecordingCommand( _context: vscode.ExtensionContext, + configRepo: IConfigRepository, + providerValidator: ITransformationProviderValidator, useCase: StartRecordingUseCase ): vscode.Disposable { - return vscode.commands.registerCommand('cursor-whisper.startRecording', async () => { + return vscode.commands.registerCommand('promptimize.startRecording', async () => { try { - await useCase.execute(); + const validationIssue = await validateConfigurationForPromptimize(configRepo, providerValidator); + + if (validationIssue) { + if (validationIssue.configureCommand === 'promptimize.configureApiKey') { + const selection = await vscode.window.showErrorMessage( + 'OpenAI API key is required for Whisper voice-to-text transcription.', + { detail: 'Prompt optimization uses a separate provider you can configure later.' }, + 'Configure Now', + 'Open Configuration' + ); + + if (selection === 'Configure Now') { + await vscode.commands.executeCommand('promptimize.configureApiKey'); + } else if (selection === 'Open Configuration') { + await vscode.commands.executeCommand('promptimize.openConfigurationPanel'); + } + return; + } + + await vscode.commands.executeCommand('promptimize.openConfigurationPanel'); + return; + } + + await useCase.execute('promptimize'); await vscode.window.showInformationMessage('Recording started'); } catch (error) { if (error instanceof MissingApiKeyError) { - const selection = await vscode.window.showErrorMessage( - 'OpenAI API Key not configured', - 'Configure Now' - ); - - if (selection === 'Configure Now') { - await vscode.commands.executeCommand('cursor-whisper.configureApiKey'); - } + await vscode.commands.executeCommand('promptimize.openConfigurationPanel'); } else if (error instanceof ConfigError) { await vscode.window.showErrorMessage(`Configuration error: ${error.message}`); } else if (error instanceof PermissionError) { diff --git a/src/presentation/commands/StartTranscribeRecordingCommand.ts b/src/presentation/commands/StartTranscribeRecordingCommand.ts new file mode 100644 index 0000000..bf0f1cf --- /dev/null +++ b/src/presentation/commands/StartTranscribeRecordingCommand.ts @@ -0,0 +1,39 @@ +import * as vscode from 'vscode'; +import { StartRecordingUseCase } from '../../application/use-cases/StartRecordingUseCase'; +import { IConfigRepository } from '../../application/ports/IConfigRepository'; +import { validateConfigurationForTranscription } from '../../application/services/ConfigurationValidationService'; +import { PermissionError } from '../../domain/errors/PermissionError'; +import { RecordingError } from '../../domain/errors/RecordingError'; + +export function registerStartTranscribeRecordingCommand( + _context: vscode.ExtensionContext, + configRepo: IConfigRepository, + useCase: StartRecordingUseCase +): vscode.Disposable { + return vscode.commands.registerCommand('promptimize.startTranscribeRecording', async () => { + try { + const validationIssue = await validateConfigurationForTranscription(configRepo); + + if (validationIssue) { + await vscode.commands.executeCommand('promptimize.openConfigurationPanel'); + return; + } + + await useCase.execute('transcribe'); + await vscode.window.showInformationMessage('Recording started'); + } catch (error) { + if (error instanceof PermissionError) { + await vscode.window.showErrorMessage( + 'Microphone permission denied. Please check system settings.', + 'OK' + ); + } else if (error instanceof RecordingError) { + await vscode.window.showErrorMessage(`Recording failed: ${error.message}`); + } else { + await vscode.window.showErrorMessage( + `Unexpected error: ${error instanceof Error ? error.message : 'Unknown error'}` + ); + } + } + }); +} diff --git a/src/presentation/commands/StopPromptimizeRecordingCommand.ts b/src/presentation/commands/StopPromptimizeRecordingCommand.ts new file mode 100644 index 0000000..48aeb62 --- /dev/null +++ b/src/presentation/commands/StopPromptimizeRecordingCommand.ts @@ -0,0 +1,84 @@ +import * as vscode from 'vscode'; +import { StopRecordingUseCase } from '../../application/use-cases/StopRecordingUseCase'; +import { TranscribeAudioUseCase } from '../../application/use-cases/TranscribeAudioUseCase'; +import { TransformPromptUseCase } from '../../application/use-cases/TransformPromptUseCase'; +import { InsertTextUseCase } from '../../application/use-cases/InsertTextUseCase'; +import { RecordingError } from '../../domain/errors/RecordingError'; +import { TranscriptionError } from '../../domain/errors/TranscriptionError'; +import { PermissionError } from '../../domain/errors/PermissionError'; +import { ConfigError } from '../../domain/errors/ConfigError'; +import { InsertionError } from '../../application/use-cases/InsertTextUseCase'; +import { generateId } from '../../shared/utils/generateId'; + +interface StopPromptimizeRecordingDependencies { + stopRecordingUseCase: StopRecordingUseCase; + transcribeUseCase: TranscribeAudioUseCase; + transformUseCase: TransformPromptUseCase; + insertUseCase: InsertTextUseCase; +} + +export function registerStopPromptimizeRecordingCommand( + _context: vscode.ExtensionContext, + deps: StopPromptimizeRecordingDependencies +): vscode.Disposable { + return vscode.commands.registerCommand('promptimize.stopPromptimizeRecording', async () => { + try { + await vscode.window.withProgress( + { + location: vscode.ProgressLocation.Notification, + title: 'Processing audio...', + cancellable: false, + }, + async progress => { + progress.report({ message: 'Stopping recording...' }); + const audioData = await deps.stopRecordingUseCase.execute(); + + progress.report({ message: 'Transcribing...', increment: 25 }); + const recordingId = generateId(); + const transcription = await deps.transcribeUseCase.execute(audioData, recordingId); + + progress.report({ message: 'Optimizing prompt...', increment: 50 }); + const activeEditor = vscode.window.activeTextEditor; + const context = { + editorLanguage: activeEditor?.document.languageId, + }; + const prompt = await deps.transformUseCase.execute(transcription, context); + + progress.report({ message: 'Inserting text...', increment: 75 }); + await deps.insertUseCase.execute(prompt.transformedText); + + progress.report({ increment: 100 }); + } + ); + + await vscode.window.showInformationMessage('Prompt inserted successfully'); + } catch (error) { + if (error instanceof RecordingError) { + await vscode.window.showErrorMessage(`Recording error: ${error.message}`); + } else if (error instanceof TranscriptionError) { + const action = await vscode.window.showErrorMessage( + `Transcription failed: ${error.message}`, + 'Retry' + ); + if (action === 'Retry') { + await vscode.commands.executeCommand('promptimize.stopPromptimizeRecording'); + } + } else if (error instanceof InsertionError) { + await vscode.window.showErrorMessage( + `Could not insert text: ${error.message}. Check clipboard fallback.` + ); + } else if (error instanceof PermissionError) { + await vscode.window.showErrorMessage( + `Microphone permission denied: ${error.message}`, + 'Open Settings' + ); + } else if (error instanceof ConfigError) { + await vscode.window.showErrorMessage(`Configuration error: ${error.message}`); + } else { + await vscode.window.showErrorMessage( + `Failed to process recording: ${error instanceof Error ? error.message : 'Unknown error'}` + ); + } + } + }); +} diff --git a/src/presentation/commands/StopRecordingCommand.ts b/src/presentation/commands/StopRecordingCommand.ts index 8d9c706..7d026cb 100644 --- a/src/presentation/commands/StopRecordingCommand.ts +++ b/src/presentation/commands/StopRecordingCommand.ts @@ -1,88 +1,14 @@ import * as vscode from 'vscode'; -import { StopRecordingUseCase } from '../../application/use-cases/StopRecordingUseCase'; -import { TranscribeAudioUseCase } from '../../application/use-cases/TranscribeAudioUseCase'; -import { TransformPromptUseCase } from '../../application/use-cases/TransformPromptUseCase'; -import { InsertTextUseCase } from '../../application/use-cases/InsertTextUseCase'; -import { RecordingError } from '../../domain/errors/RecordingError'; -import { TranscriptionError } from '../../domain/errors/TranscriptionError'; -import { PermissionError } from '../../domain/errors/PermissionError'; -import { ConfigError } from '../../domain/errors/ConfigError'; -import { InsertionError } from '../../application/use-cases/InsertTextUseCase'; -import { generateId } from '../../shared/utils/generateId'; +import { getRecordingSessionMode } from '../../shared/services/RecordingSessionMode'; -interface StopRecordingDependencies { - stopRecordingUseCase: StopRecordingUseCase; - transcribeUseCase: TranscribeAudioUseCase; - transformUseCase: TransformPromptUseCase; - insertUseCase: InsertTextUseCase; -} - -export function registerStopRecordingCommand( - _context: vscode.ExtensionContext, - deps: StopRecordingDependencies -): vscode.Disposable { - return vscode.commands.registerCommand('cursor-whisper.stopRecording', async () => { - try { - // Step 1: Stop recording - await vscode.window.withProgress( - { - location: vscode.ProgressLocation.Notification, - title: 'Processing audio...', - cancellable: false, - }, - async progress => { - progress.report({ message: 'Stopping recording...' }); - const audioData = await deps.stopRecordingUseCase.execute(); - - // Step 2: Transcribe - progress.report({ message: 'Transcribing...', increment: 25 }); - const recordingId = generateId(); - const transcription = await deps.transcribeUseCase.execute(audioData, recordingId); - - // Step 3: Transform (optional) - progress.report({ message: 'Optimizing prompt...', increment: 50 }); - const activeEditor = vscode.window.activeTextEditor; - const context = { - editorLanguage: activeEditor?.document.languageId, - }; - const prompt = await deps.transformUseCase.execute(transcription, context); - - // Step 4: Insert - progress.report({ message: 'Inserting text...', increment: 75 }); - await deps.insertUseCase.execute(prompt.transformedText); - - progress.report({ increment: 100 }); - } - ); +export function registerStopRecordingCommand(_context: vscode.ExtensionContext): vscode.Disposable { + return vscode.commands.registerCommand('promptimize.stopRecording', async () => { + const mode = getRecordingSessionMode() ?? 'promptimize'; + const command = + mode === 'transcribe' + ? 'promptimize.stopTranscribeRecording' + : 'promptimize.stopPromptimizeRecording'; - await vscode.window.showInformationMessage('Prompt inserted successfully'); - } catch (error) { - if (error instanceof RecordingError) { - await vscode.window.showErrorMessage(`Recording error: ${error.message}`); - } else if (error instanceof TranscriptionError) { - const action = await vscode.window.showErrorMessage( - `Transcription failed: ${error.message}`, - 'Retry' - ); - if (action === 'Retry') { - await vscode.commands.executeCommand('cursor-whisper.stopRecording'); - } - } else if (error instanceof InsertionError) { - await vscode.window.showErrorMessage( - `Could not insert text: ${error.message}. Check clipboard fallback.` - ); - } else if (error instanceof PermissionError) { - await vscode.window.showErrorMessage( - `Microphone permission denied: ${error.message}`, - 'Open Settings' - ); - } else if (error instanceof ConfigError) { - await vscode.window.showErrorMessage(`Configuration error: ${error.message}`); - } else { - await vscode.window.showErrorMessage( - `Failed to process recording: ${error instanceof Error ? error.message : 'Unknown error'}` - ); - } - } + await vscode.commands.executeCommand(command); }); } diff --git a/src/presentation/commands/StopTranscribeRecordingCommand.ts b/src/presentation/commands/StopTranscribeRecordingCommand.ts new file mode 100644 index 0000000..f0f5380 --- /dev/null +++ b/src/presentation/commands/StopTranscribeRecordingCommand.ts @@ -0,0 +1,75 @@ +import * as vscode from 'vscode'; +import { StopRecordingUseCase } from '../../application/use-cases/StopRecordingUseCase'; +import { TranscribeAudioUseCase } from '../../application/use-cases/TranscribeAudioUseCase'; +import { InsertTextUseCase } from '../../application/use-cases/InsertTextUseCase'; +import { RecordingError } from '../../domain/errors/RecordingError'; +import { TranscriptionError } from '../../domain/errors/TranscriptionError'; +import { PermissionError } from '../../domain/errors/PermissionError'; +import { ConfigError } from '../../domain/errors/ConfigError'; +import { InsertionError } from '../../application/use-cases/InsertTextUseCase'; +import { generateId } from '../../shared/utils/generateId'; + +interface StopTranscribeRecordingDependencies { + stopRecordingUseCase: StopRecordingUseCase; + transcribeUseCase: TranscribeAudioUseCase; + insertUseCase: InsertTextUseCase; +} + +export function registerStopTranscribeRecordingCommand( + _context: vscode.ExtensionContext, + deps: StopTranscribeRecordingDependencies +): vscode.Disposable { + return vscode.commands.registerCommand('promptimize.stopTranscribeRecording', async () => { + try { + await vscode.window.withProgress( + { + location: vscode.ProgressLocation.Notification, + title: 'Processing audio...', + cancellable: false, + }, + async progress => { + progress.report({ message: 'Stopping recording...' }); + const audioData = await deps.stopRecordingUseCase.execute(); + + progress.report({ message: 'Transcribing...', increment: 40 }); + const recordingId = generateId(); + const transcription = await deps.transcribeUseCase.execute(audioData, recordingId); + + progress.report({ message: 'Inserting text...', increment: 80 }); + await deps.insertUseCase.execute(transcription.text); + + progress.report({ increment: 100 }); + } + ); + + await vscode.window.showInformationMessage('Transcription inserted successfully'); + } catch (error) { + if (error instanceof RecordingError) { + await vscode.window.showErrorMessage(`Recording error: ${error.message}`); + } else if (error instanceof TranscriptionError) { + const action = await vscode.window.showErrorMessage( + `Transcription failed: ${error.message}`, + 'Retry' + ); + if (action === 'Retry') { + await vscode.commands.executeCommand('promptimize.stopTranscribeRecording'); + } + } else if (error instanceof InsertionError) { + await vscode.window.showErrorMessage( + `Could not insert text: ${error.message}. Check clipboard fallback.` + ); + } else if (error instanceof PermissionError) { + await vscode.window.showErrorMessage( + `Microphone permission denied: ${error.message}`, + 'Open Settings' + ); + } else if (error instanceof ConfigError) { + await vscode.window.showErrorMessage(`Configuration error: ${error.message}`); + } else { + await vscode.window.showErrorMessage( + `Failed to process recording: ${error instanceof Error ? error.message : 'Unknown error'}` + ); + } + } + }); +} diff --git a/src/presentation/commands/TestTransformationCommand.ts b/src/presentation/commands/TestTransformationCommand.ts new file mode 100644 index 0000000..c899a76 --- /dev/null +++ b/src/presentation/commands/TestTransformationCommand.ts @@ -0,0 +1,143 @@ +import * as vscode from 'vscode'; +import { IPromptTransformer } from '../../application/ports/IPromptTransformer'; +import { IConfigRepository } from '../../application/ports/IConfigRepository'; +import { ILogger } from '../../application/ports/ILogger'; +import { OpenAIModelService } from '../../infrastructure/openai/OpenAIModelService'; +import { PROVIDER_METADATA } from '../../domain/value-objects/TransformationProvider'; +import { testOpenAiApiKey } from '../setup/providerConfigurationFlow'; +import { WHISPER_COST_NOTE } from '../../shared/constants/providerComparison'; + +const SAMPLE_TRANSCRIPTION = + 'So um I need to like refactor the auth service to use JWT tokens instead of sessions and we should keep backward compatibility for about six months and also add unit tests for the validation logic'; + +export function registerTestTransformationCommand( + _context: vscode.ExtensionContext, + promptTransformer: IPromptTransformer, + configRepo: IConfigRepository, + modelService: OpenAIModelService, + logger: ILogger +): vscode.Disposable { + return vscode.commands.registerCommand('promptimize.testTransformation', async () => { + const config = await configRepo.getConfig(); + const providerMeta = PROVIDER_METADATA[config.transformationProvider]; + + await vscode.window.withProgress( + { + location: vscode.ProgressLocation.Notification, + title: 'Testing Promptimize configuration...', + cancellable: false, + }, + async () => { + const whisperTest = await testOpenAiApiKey(modelService); + const whisperStatus = whisperTest.ok + ? 'Whisper (OpenAI): Working' + : `Whisper (OpenAI): Failed — ${whisperTest.message}`; + + if (!config.enablePromptTransformation) { + const detail = whisperTest.ok + ? 'Prompt optimization is disabled. Only Whisper transcription was tested.' + : 'Configure your OpenAI API key for Whisper transcription.'; + await vscode.window.showInformationMessage(whisperStatus, { detail }); + return; + } + + if (!whisperTest.ok) { + await vscode.window.showErrorMessage(whisperStatus, { + detail: 'Fix your OpenAI API key before testing prompt optimization.', + }); + return; + } + + try { + const result = await promptTransformer.transform(SAMPLE_TRANSCRIPTION, { + editorLanguage: 'typescript', + projectType: 'Node.js/JavaScript', + }); + + const panel = vscode.window.createWebviewPanel( + 'promptimizeTestTransformation', + 'Promptimize: Configuration Test', + vscode.ViewColumn.One, + { enableScripts: false } + ); + + panel.webview.html = ` + + + + + Configuration Test + + + +

Configuration Test Result

+

✓ ${escapeHtml(whisperStatus)}

+

✓ Optimization (${escapeHtml(providerMeta.displayName)}): Working

+

Estimated Whisper cost: ${escapeHtml(WHISPER_COST_NOTE)}. Optimization test used sample text only.

+

Original

+
${escapeHtml(result.originalText)}
+

Transformed

+
${escapeHtml(result.transformedText)}
+ ${ + result.improvements.length > 0 + ? `

Improvements

    ${result.improvements.map(item => `
  • ${escapeHtml(item)}
  • `).join('')}
` + : '' + } + +`; + + logger.info('Configuration test completed', { + whisper: whisperTest.ok, + provider: config.transformationProvider, + improvements: result.improvements.length, + }); + + await vscode.window.showInformationMessage( + `✓ Whisper: Working | ✓ Optimization (${providerMeta.displayName}): Working` + ); + } catch (error) { + await vscode.window + .showErrorMessage( + `✓ Whisper: Working | ✗ Optimization (${providerMeta.displayName}): Failed — ${ + error instanceof Error ? error.message : 'Unknown error' + }`, + 'Configure Provider', + 'Troubleshooting' + ) + .then(async selection => { + if (selection === 'Configure Provider') { + await vscode.commands.executeCommand('promptimize.openConfigurationPanel'); + } else if (selection === 'Troubleshooting') { + await vscode.env.openExternal( + vscode.Uri.parse( + 'https://github.com/vypdev/cursor-whisper/blob/master/docs/quickstart.md#troubleshooting' + ) + ); + } + }); + } + } + ); + }); +} + +function escapeHtml(value: string): string { + return value + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"'); +} diff --git a/src/presentation/setup/providerConfigurationFlow.ts b/src/presentation/setup/providerConfigurationFlow.ts new file mode 100644 index 0000000..dd1bc61 --- /dev/null +++ b/src/presentation/setup/providerConfigurationFlow.ts @@ -0,0 +1,483 @@ +import * as vscode from 'vscode'; +import { IConfigRepository } from '../../application/ports/IConfigRepository'; +import { ILogger } from '../../application/ports/ILogger'; +import { PromptTransformerFactory } from '../../infrastructure/transformation/PromptTransformerFactory'; +import { + OpenAIModelService, + OpenAIModelServiceError, +} from '../../infrastructure/openai/OpenAIModelService'; +import { OllamaPromptTransformer } from '../../infrastructure/transformation/OllamaPromptTransformer'; +import { OpenCodePromptTransformer } from '../../infrastructure/transformation/OpenCodePromptTransformer'; +import { OpenRouterPromptTransformer } from '../../infrastructure/transformation/OpenRouterPromptTransformer'; +import { CURSOR_MODELS } from '../../infrastructure/transformation/CursorPromptTransformer'; +import { + TransformationProvider, + PROVIDER_METADATA, +} from '../../domain/value-objects/TransformationProvider'; +import { ApiKey } from '../../domain/value-objects/ApiKey'; +import { PROVIDER_COMPARISON, WHISPER_COST_NOTE } from '../../shared/constants/providerComparison'; +import { + OPENAI_API_KEY_DETAIL, + OPENAI_API_KEY_PROMPT, + OPTIMIZATION_PROVIDER_INTRO, + OPTIMIZATION_PROVIDER_INTRO_DETAIL, +} from '../../shared/constants/uxMessages'; + +const ANTHROPIC_MODELS = [ + 'claude-3-5-sonnet-20241022', + 'claude-3-5-haiku-20241022', + 'claude-3-opus-20240229', + 'claude-3-sonnet-20240229', + 'claude-3-haiku-20240307', +]; + +const GOOGLE_MODELS = ['gemini-1.5-pro', 'gemini-1.5-flash', 'gemini-2.0-flash']; + +export async function promptForOpenAiApiKey(): Promise { + return vscode.window.showInputBox({ + title: 'Configure OpenAI API Key (Required for Whisper)', + prompt: OPENAI_API_KEY_PROMPT, + password: true, + placeHolder: 'sk-...', + validateInput: value => { + try { + new ApiKey(value); + return null; + } catch (error) { + return error instanceof Error ? error.message : 'Invalid API key'; + } + }, + }); +} + +export async function testOpenAiApiKey( + modelService: OpenAIModelService +): Promise<{ ok: true } | { ok: false; message: string }> { + try { + const models = await modelService.listGptModels(); + if (models.length === 0) { + return { ok: false, message: 'API key is valid but no GPT models were returned.' }; + } + return { ok: true }; + } catch (error) { + const message = + error instanceof OpenAIModelServiceError + ? error.message + : error instanceof Error + ? error.message + : 'Unknown error'; + return { ok: false, message }; + } +} + +export async function promptForProviderApiKey( + provider: TransformationProvider +): Promise { + const metadata = PROVIDER_METADATA[provider]; + const comparison = PROVIDER_COMPARISON.find(entry => entry.provider === provider); + const keyHint = comparison?.apiKeyUrl ? ` Get a key at ${comparison.apiKeyUrl}` : ''; + + return vscode.window.showInputBox({ + title: `Configure ${metadata.displayName} for Prompt Optimization`, + prompt: `Enter your ${metadata.displayName} API key or credentials.${keyHint}`, + password: true, + placeHolder: provider === TransformationProvider.OpenAI ? 'sk-...' : 'API key', + validateInput: value => { + if (!value.trim()) { + return 'API key or credentials are required'; + } + + if (provider === TransformationProvider.OpenAI) { + try { + new ApiKey(value); + return null; + } catch (error) { + return error instanceof Error ? error.message : 'Invalid API key'; + } + } + + return null; + }, + }); +} + +export async function selectModelForProvider( + provider: TransformationProvider, + configRepo: IConfigRepository, + modelService: OpenAIModelService, + logger: ILogger +): Promise { + const config = await configRepo.getConfig(); + + switch (provider) { + case TransformationProvider.OpenAI: { + const models = await modelService.listGptModels(); + if (models.length === 0) { + await vscode.window.showWarningMessage('No GPT models were returned for your API key.'); + return config.transformationModel; + } + + const selection = await vscode.window.showQuickPick( + models.map(modelId => ({ + label: modelId, + picked: modelId === config.transformationModel, + })), + { placeHolder: 'Select an OpenAI model for prompt optimization' } + ); + return selection?.label; + } + + case TransformationProvider.Anthropic: { + const selection = await vscode.window.showQuickPick( + ANTHROPIC_MODELS.map(modelId => ({ + label: modelId, + picked: modelId === config.anthropicModel, + })), + { placeHolder: 'Select an Anthropic model for prompt optimization' } + ); + return selection?.label; + } + + case TransformationProvider.Google: { + const selection = await vscode.window.showQuickPick( + GOOGLE_MODELS.map(modelId => ({ + label: modelId, + picked: modelId === config.googleModel, + })), + { placeHolder: 'Select a Google Gemini model for prompt optimization' } + ); + return selection?.label; + } + + case TransformationProvider.Azure: { + const deployment = await vscode.window.showInputBox({ + prompt: 'Enter your Azure OpenAI deployment name', + value: config.azureDeployment, + placeHolder: 'gpt-4o-deployment', + validateInput: value => (value.trim() ? null : 'Deployment name is required'), + }); + return deployment?.trim(); + } + + case TransformationProvider.Ollama: { + const baseUrl = config.ollamaBaseUrl || OllamaPromptTransformer.DEFAULT_BASE_URL; + const available = await OllamaPromptTransformer.isAvailable(baseUrl); + + if (available) { + try { + const models = await OllamaPromptTransformer.listModels(baseUrl); + if (models.length > 0) { + const selection = await vscode.window.showQuickPick( + models.map(modelId => ({ + label: modelId, + picked: modelId === config.ollamaModel, + })), + { placeHolder: 'Select an Ollama model for prompt optimization' } + ); + if (selection) { + return selection.label; + } + } + } catch (error) { + logger.warn('Failed to list Ollama models', { + error: error instanceof Error ? error.message : 'Unknown error', + }); + } + } + + return vscode.window.showInputBox({ + prompt: 'Enter the Ollama model name/tag', + value: config.ollamaModel, + placeHolder: OllamaPromptTransformer.DEFAULT_MODEL, + validateInput: value => (value.trim() ? null : 'Model name is required'), + }); + } + + case TransformationProvider.OpenCode: { + const baseUrl = config.openCodeBaseUrl || OpenCodePromptTransformer.DEFAULT_BASE_URL; + const apiKey = await configRepo.getProviderApiKey(TransformationProvider.OpenCode); + const available = await OpenCodePromptTransformer.isAvailable(baseUrl, apiKey); + + if (!available) { + await vscode.window.showWarningMessage( + 'Cannot connect to OpenCode proxy. Ensure opencode-llm-proxy is installed and running.' + ); + } else { + try { + const models = await OpenCodePromptTransformer.listModels(baseUrl, apiKey); + if (models.length > 0) { + const selection = await vscode.window.showQuickPick( + models.map(modelId => ({ + label: modelId, + picked: modelId === config.openCodeModel, + })), + { placeHolder: 'Select an OpenCode model for prompt optimization' } + ); + if (selection) { + return selection.label; + } + } + } catch (error) { + logger.warn('Failed to list OpenCode models', { + error: error instanceof Error ? error.message : 'Unknown error', + }); + } + } + + return vscode.window.showInputBox({ + prompt: 'Enter the OpenCode model identifier (provider/model format)', + value: config.openCodeModel, + placeHolder: 'anthropic/claude-sonnet-4-5', + validateInput: value => (value.trim() ? null : 'Model identifier is required'), + }); + } + + case TransformationProvider.OpenRouter: { + const apiKey = await configRepo.getProviderApiKey(TransformationProvider.OpenRouter); + if (!apiKey) { + await vscode.window.showWarningMessage( + 'OpenRouter API key is required before selecting a model.' + ); + return config.openRouterModel || OpenRouterPromptTransformer.DEFAULT_MODEL; + } + + try { + const models = await OpenRouterPromptTransformer.listModels(apiKey); + if (models.length > 0) { + const currentModel = config.openRouterModel || OpenRouterPromptTransformer.DEFAULT_MODEL; + const selection = await vscode.window.showQuickPick( + models.map(modelId => ({ + label: modelId, + picked: modelId === currentModel, + })), + { placeHolder: 'Select an OpenRouter model for prompt optimization' } + ); + if (selection) { + return selection.label; + } + } + } catch (error) { + logger.warn('Failed to list OpenRouter models', { + error: error instanceof Error ? error.message : 'Unknown error', + }); + await vscode.window.showWarningMessage( + 'Could not load OpenRouter models. Check your API key and try again.' + ); + } + + return config.openRouterModel || OpenRouterPromptTransformer.DEFAULT_MODEL; + } + + case TransformationProvider.Cursor: { + const selection = await vscode.window.showQuickPick( + [...CURSOR_MODELS].map(modelId => ({ + label: modelId, + picked: modelId === config.cursorModel, + })), + { placeHolder: 'Select a Cursor model for prompt optimization' } + ); + return selection?.label; + } + + default: + return undefined; + } +} + +export async function configureProviderSpecificSettings( + provider: TransformationProvider, + configRepo: IConfigRepository +): Promise { + if (provider === TransformationProvider.Azure) { + const config = await configRepo.getConfig(); + const endpoint = await vscode.window.showInputBox({ + prompt: 'Enter your Azure OpenAI endpoint', + value: config.azureEndpoint, + placeHolder: 'https://my-resource.openai.azure.com', + validateInput: value => (value.trim() ? null : 'Endpoint is required'), + }); + + if (endpoint) { + await configRepo.updateConfig({ azureEndpoint: endpoint.trim() }); + } + } + + if (provider === TransformationProvider.Ollama) { + const config = await configRepo.getConfig(); + const baseUrl = await vscode.window.showInputBox({ + prompt: 'Enter your Ollama base URL', + value: config.ollamaBaseUrl, + placeHolder: OllamaPromptTransformer.DEFAULT_BASE_URL, + validateInput: value => (value.trim() ? null : 'Base URL is required'), + }); + + if (baseUrl) { + await configRepo.updateConfig({ ollamaBaseUrl: baseUrl.trim() }); + } + } + + if (provider === TransformationProvider.OpenCode) { + const config = await configRepo.getConfig(); + const baseUrl = await vscode.window.showInputBox({ + prompt: 'Enter your OpenCode LLM proxy base URL', + value: config.openCodeBaseUrl, + placeHolder: OpenCodePromptTransformer.DEFAULT_BASE_URL, + validateInput: value => (value.trim() ? null : 'Base URL is required'), + }); + + if (baseUrl) { + await configRepo.updateConfig({ openCodeBaseUrl: baseUrl.trim() }); + } + + const authChoice = await vscode.window.showQuickPick( + [ + { label: 'Skip authentication', value: 'skip' as const }, + { label: 'Set proxy authentication token', value: 'token' as const }, + ], + { placeHolder: 'OpenCode proxy authentication (optional)' } + ); + + if (authChoice?.value === 'token') { + const existingKey = await configRepo.getProviderApiKey(TransformationProvider.OpenCode); + const token = await vscode.window.showInputBox({ + prompt: 'Enter your OpenCode proxy authentication token (OPENCODE_LLM_PROXY_TOKEN)', + value: existingKey, + password: true, + validateInput: value => (value.trim() ? null : 'Token cannot be empty'), + }); + + if (token) { + await configRepo.setProviderApiKey(TransformationProvider.OpenCode, token.trim()); + } + } + } +} + +export function buildProviderQuickPickItems(currentProvider: TransformationProvider): Array<{ + label: string; + description: string; + detail: string; + provider: TransformationProvider; +}> { + return PROVIDER_COMPARISON.map(entry => { + const metadata = PROVIDER_METADATA[entry.provider]; + return { + label: metadata.displayName, + description: `${entry.costPerTransform}/transform · ${entry.speed} · ${entry.quality}`, + detail: `${entry.bestFor}${metadata.id === currentProvider ? ' (current provider)' : ''}`, + provider: entry.provider, + }; + }); +} + +export async function confirmOptimizationIntro(): Promise { + const proceed = await vscode.window.showInformationMessage( + OPTIMIZATION_PROVIDER_INTRO, + { modal: true, detail: OPTIMIZATION_PROVIDER_INTRO_DETAIL }, + 'Continue' + ); + return proceed === 'Continue'; +} + +export async function selectTransformationProvider( + currentProvider: TransformationProvider +): Promise { + const selection = await vscode.window.showQuickPick( + buildProviderQuickPickItems(currentProvider), + { + placeHolder: 'Select a provider for prompt optimization', + title: 'Promptimize: Prompt Optimization Provider', + } + ); + + return selection?.provider; +} + +export async function applyProviderConfiguration( + provider: TransformationProvider, + selectedModel: string | undefined, + configRepo: IConfigRepository, + transformerFactory: PromptTransformerFactory +): Promise<{ success: boolean; message?: string }> { + const updates: Parameters[0] = { + transformationProvider: provider, + }; + + if (selectedModel) { + switch (provider) { + case TransformationProvider.OpenAI: + updates.transformationModel = selectedModel; + break; + case TransformationProvider.Anthropic: + updates.anthropicModel = selectedModel; + break; + case TransformationProvider.Google: + updates.googleModel = selectedModel; + break; + case TransformationProvider.Azure: + updates.azureDeployment = selectedModel; + break; + case TransformationProvider.Ollama: + updates.ollamaModel = selectedModel; + break; + case TransformationProvider.OpenCode: + updates.openCodeModel = selectedModel; + break; + case TransformationProvider.OpenRouter: + updates.openRouterModel = selectedModel; + break; + case TransformationProvider.Cursor: + updates.cursorModel = selectedModel; + break; + } + } + + await configRepo.updateConfig(updates); + + const validationError = await transformerFactory.validateProvider(provider); + if (validationError) { + return { success: false, message: validationError }; + } + + return { success: true }; +} + +export function formatProviderComparisonForWizard(): string { + const rows = PROVIDER_COMPARISON.map(entry => { + const metadata = PROVIDER_METADATA[entry.provider]; + const keyNote = metadata.requiresApiKey ? 'API key required' : 'No API key'; + return `${metadata.displayName}: ${entry.costPerTransform}, ${entry.bestFor} (${keyNote})`; + }); + return `Whisper transcription cost: ${WHISPER_COST_NOTE}\n\n${rows.join('\n')}`; +} + +export async function configureProviderCredentials( + provider: TransformationProvider, + configRepo: IConfigRepository +): Promise { + const metadata = PROVIDER_METADATA[provider]; + + await configureProviderSpecificSettings(provider, configRepo); + + if (!metadata.requiresApiKey) { + return true; + } + + const existingKey = await configRepo.getProviderApiKey(provider); + if (existingKey) { + return true; + } + + const apiKey = await promptForProviderApiKey(provider); + if (!apiKey) { + await vscode.window.showWarningMessage( + `${metadata.displayName} credentials are required for prompt optimization.` + ); + return false; + } + + await configRepo.setProviderApiKey(provider, apiKey); + return true; +} + +export { OPENAI_API_KEY_DETAIL }; diff --git a/src/presentation/ui/RecordingStatusBarItem.ts b/src/presentation/ui/RecordingStatusBarItem.ts index a65eb82..925eb85 100644 --- a/src/presentation/ui/RecordingStatusBarItem.ts +++ b/src/presentation/ui/RecordingStatusBarItem.ts @@ -1,76 +1,212 @@ import * as vscode from 'vscode'; import { RecordingState } from '../../domain/value-objects/RecordingState'; +import { + SETUP_CHECKLIST_TOOLTIP, + STATUS_BAR_SERVICES_TOOLTIP, +} from '../../shared/constants/uxMessages'; +import { getRecordingSessionMode, setRecordingSessionMode } from '../../shared/services/RecordingSessionMode'; + +export interface StatusBarSetupState { + optimizationEnabled: boolean; + hasOpenAIKey: boolean; + setupChecklist?: Array<{ label: string; complete: boolean }>; +} export class RecordingStatusBarItem { - private statusBarItem: vscode.StatusBarItem; + private transcribeStatusBarItem: vscode.StatusBarItem; + private promptimizeStatusBarItem: vscode.StatusBarItem; + private settingsStatusBarItem: vscode.StatusBarItem; private currentState: RecordingState = RecordingState.IDLE; + private transformationProviderLabel = 'OpenAI'; + private optimizationEnabled = true; + private hasOpenAIKey = false; + private setupChecklist: Array<{ label: string; complete: boolean }> = []; constructor() { - this.statusBarItem = vscode.window.createStatusBarItem( + this.transcribeStatusBarItem = vscode.window.createStatusBarItem( + 'transcribe', + vscode.StatusBarAlignment.Right, + 1001 + ); + this.promptimizeStatusBarItem = vscode.window.createStatusBarItem( + 'promptimize', vscode.StatusBarAlignment.Right, - 100 + 1001 ); - this.statusBarItem.command = 'cursor-whisper.startRecording'; + this.settingsStatusBarItem = vscode.window.createStatusBarItem( + 'settings', + vscode.StatusBarAlignment.Right, + 1001 + ); + this.settingsStatusBarItem.command = 'promptimize.openConfigurationPanel'; this.updateUI(); - this.statusBarItem.show(); + this.transcribeStatusBarItem.show(); + this.promptimizeStatusBarItem.show(); + this.settingsStatusBarItem.show(); } setState(state: RecordingState): void { this.currentState = state; + if (state === RecordingState.IDLE) { + setRecordingSessionMode(null); + } + this.updateUI(); + } + + setTransformationProviderLabel(label: string): void { + this.transformationProviderLabel = label; + this.updateUI(); + } + + setSetupState(state: StatusBarSetupState): void { + this.optimizationEnabled = state.optimizationEnabled; + this.hasOpenAIKey = state.hasOpenAIKey; + this.setupChecklist = state.setupChecklist ?? []; this.updateUI(); } + private getTranscribeIdleTooltip(): string { + if (!this.hasOpenAIKey) { + return 'OpenAI API key required for transcription.\n\nClick to open configuration'; + } + + return `${STATUS_BAR_SERVICES_TOOLTIP(this.transformationProviderLabel, this.optimizationEnabled)}\n\nTranscribe only (no optimization)`; + } + + private getPromptimizeIdleTooltip(): string { + if (!this.optimizationEnabled) { + return 'Prompt optimization is disabled.\n\nClick to open configuration and enable it'; + } + + if (!this.hasOpenAIKey) { + return 'OpenAI API key required for transcription before optimization.\n\nClick to open configuration'; + } + + return `${STATUS_BAR_SERVICES_TOOLTIP(this.transformationProviderLabel, this.optimizationEnabled)}\n\nTranscribe and optimize prompt`; + } + + private getSettingsTooltip(): string { + if (this.setupChecklist.some(item => !item.complete)) { + return SETUP_CHECKLIST_TOOLTIP(this.setupChecklist); + } + + return 'Open Promptimize configuration'; + } + + private getSettingsText(): string { + if (this.setupChecklist.some(item => !item.complete)) { + return '$(warning) Setup'; + } + + return '$(gear) Settings'; + } + private updateUI(): void { - switch (this.currentState) { - case RecordingState.IDLE: - this.statusBarItem.text = '$(mic) Voice'; - this.statusBarItem.tooltip = 'Start recording (Cmd/Ctrl+Alt+V)'; - this.statusBarItem.command = 'cursor-whisper.startRecording'; - this.statusBarItem.backgroundColor = undefined; - break; + this.settingsStatusBarItem.text = this.getSettingsText(); + this.settingsStatusBarItem.tooltip = this.getSettingsTooltip(); + + const sessionMode = getRecordingSessionMode(); + const isActiveSession = sessionMode !== null && this.currentState !== RecordingState.IDLE; + + if (!isActiveSession) { + this.applyIdleState(this.transcribeStatusBarItem, 'transcribe'); + this.applyIdleState(this.promptimizeStatusBarItem, 'promptimize'); + return; + } + + if (sessionMode === 'transcribe') { + this.applySessionState(this.transcribeStatusBarItem, 'transcribe'); + this.applyInactiveSiblingState(this.promptimizeStatusBarItem, 'promptimize'); + } else { + this.applySessionState(this.promptimizeStatusBarItem, 'promptimize'); + this.applyInactiveSiblingState(this.transcribeStatusBarItem, 'transcribe'); + } + } + + private applyIdleState(item: vscode.StatusBarItem, mode: 'transcribe' | 'promptimize'): void { + if (mode === 'transcribe') { + item.text = '$(mic) Transcribe'; + item.tooltip = this.getTranscribeIdleTooltip(); + item.command = 'promptimize.startTranscribeRecording'; + item.backgroundColor = !this.hasOpenAIKey + ? new vscode.ThemeColor('statusBarItem.warningBackground') + : undefined; + return; + } + + item.text = '$(sparkle) Promptimize'; + item.tooltip = this.getPromptimizeIdleTooltip(); + item.command = 'promptimize.startPromptimizeRecording'; + item.backgroundColor = + !this.optimizationEnabled || !this.hasOpenAIKey + ? new vscode.ThemeColor('statusBarItem.warningBackground') + : undefined; + } + private applyInactiveSiblingState( + item: vscode.StatusBarItem, + mode: 'transcribe' | 'promptimize' + ): void { + item.text = mode === 'transcribe' ? '$(mic) Transcribe' : '$(sparkle) Promptimize'; + item.tooltip = + mode === 'transcribe' + ? 'Transcribe is unavailable while another recording is in progress' + : 'Promptimize is unavailable while another recording is in progress'; + item.command = undefined; + item.backgroundColor = undefined; + } + + private applySessionState(item: vscode.StatusBarItem, mode: 'transcribe' | 'promptimize'): void { + const stopCommand = + mode === 'transcribe' + ? 'promptimize.stopTranscribeRecording' + : 'promptimize.stopPromptimizeRecording'; + const retryCommand = + mode === 'transcribe' + ? 'promptimize.startTranscribeRecording' + : 'promptimize.startPromptimizeRecording'; + + switch (this.currentState) { case RecordingState.RECORDING: - this.statusBarItem.text = '$(record) Recording...'; - this.statusBarItem.tooltip = 'Click to stop recording'; - this.statusBarItem.command = 'cursor-whisper.stopRecording'; - this.statusBarItem.backgroundColor = new vscode.ThemeColor( - 'statusBarItem.errorBackground' - ); + item.text = '$(record) Recording...'; + item.tooltip = 'Click to stop recording'; + item.command = stopCommand; + item.backgroundColor = new vscode.ThemeColor('statusBarItem.errorBackground'); break; case RecordingState.PROCESSING: - this.statusBarItem.text = '$(sync~spin) Processing...'; - this.statusBarItem.tooltip = 'Processing audio'; - this.statusBarItem.command = undefined; - this.statusBarItem.backgroundColor = undefined; + item.text = '$(sync~spin) Processing...'; + item.tooltip = 'Processing audio with OpenAI Whisper'; + item.command = undefined; + item.backgroundColor = undefined; break; case RecordingState.TRANSCRIBING: - this.statusBarItem.text = '$(sync~spin) Transcribing...'; - this.statusBarItem.tooltip = 'Transcribing with Whisper'; - this.statusBarItem.command = undefined; + item.text = '$(sync~spin) Transcribing...'; + item.tooltip = 'Transcribing with OpenAI Whisper'; + item.command = undefined; + item.backgroundColor = undefined; break; case RecordingState.TRANSFORMING: - this.statusBarItem.text = '$(sync~spin) Optimizing...'; - this.statusBarItem.tooltip = 'Optimizing prompt with GPT-4'; - this.statusBarItem.command = undefined; + item.text = '$(sync~spin) Optimizing...'; + item.tooltip = `Optimizing prompt with ${this.transformationProviderLabel} (Whisper transcription already complete)`; + item.command = undefined; + item.backgroundColor = undefined; break; case RecordingState.INSERTING: - this.statusBarItem.text = '$(sync~spin) Inserting...'; - this.statusBarItem.tooltip = 'Inserting text'; - this.statusBarItem.command = undefined; + item.text = '$(sync~spin) Inserting...'; + item.tooltip = 'Inserting text'; + item.command = undefined; + item.backgroundColor = undefined; break; case RecordingState.COMPLETED: - this.statusBarItem.text = '$(check) Inserted'; - this.statusBarItem.tooltip = 'Prompt inserted successfully'; - this.statusBarItem.command = 'cursor-whisper.startRecording'; - this.statusBarItem.backgroundColor = new vscode.ThemeColor( - 'statusBarItem.warningBackground' - ); - // Auto-reset after 2 seconds + item.text = '$(check) Inserted'; + item.tooltip = 'Prompt inserted successfully'; + item.command = retryCommand; + item.backgroundColor = new vscode.ThemeColor('statusBarItem.warningBackground'); setTimeout(() => { if (this.currentState === RecordingState.COMPLETED) { this.setState(RecordingState.IDLE); @@ -79,13 +215,10 @@ export class RecordingStatusBarItem { break; case RecordingState.ERROR: - this.statusBarItem.text = '$(x) Error'; - this.statusBarItem.tooltip = 'Click to retry'; - this.statusBarItem.command = 'cursor-whisper.startRecording'; - this.statusBarItem.backgroundColor = new vscode.ThemeColor( - 'statusBarItem.errorBackground' - ); - // Auto-reset after 3 seconds + item.text = '$(x) Error'; + item.tooltip = 'Click to retry'; + item.command = retryCommand; + item.backgroundColor = new vscode.ThemeColor('statusBarItem.errorBackground'); setTimeout(() => { if (this.currentState === RecordingState.ERROR) { this.setState(RecordingState.IDLE); @@ -94,20 +227,26 @@ export class RecordingStatusBarItem { break; case RecordingState.CANCELLED: - this.statusBarItem.text = '$(circle-slash) Cancelled'; - this.statusBarItem.tooltip = 'Recording cancelled'; - this.statusBarItem.command = 'cursor-whisper.startRecording'; - // Auto-reset after 2 seconds + item.text = '$(circle-slash) Cancelled'; + item.tooltip = 'Recording cancelled'; + item.command = retryCommand; + item.backgroundColor = undefined; setTimeout(() => { if (this.currentState === RecordingState.CANCELLED) { this.setState(RecordingState.IDLE); } }, 2000); break; + + default: + this.applyIdleState(item, mode); + break; } } dispose(): void { - this.statusBarItem.dispose(); + this.transcribeStatusBarItem.dispose(); + this.promptimizeStatusBarItem.dispose(); + this.settingsStatusBarItem.dispose(); } } diff --git a/src/presentation/webview/ConfigurationPanel.ts b/src/presentation/webview/ConfigurationPanel.ts new file mode 100644 index 0000000..c3c7046 --- /dev/null +++ b/src/presentation/webview/ConfigurationPanel.ts @@ -0,0 +1,729 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import * as vscode from 'vscode'; +import { IConfigRepository } from '../../application/ports/IConfigRepository'; +import { ILogger } from '../../application/ports/ILogger'; +import { IPromptTransformer } from '../../application/ports/IPromptTransformer'; +import { PromptTransformerFactory } from '../../infrastructure/transformation/PromptTransformerFactory'; +import { OpenAIModelService } from '../../infrastructure/openai/OpenAIModelService'; +import { OllamaPromptTransformer } from '../../infrastructure/transformation/OllamaPromptTransformer'; +import { OpenCodePromptTransformer } from '../../infrastructure/transformation/OpenCodePromptTransformer'; +import { OpenRouterPromptTransformer } from '../../infrastructure/transformation/OpenRouterPromptTransformer'; +import { CURSOR_MODELS } from '../../infrastructure/transformation/CursorPromptTransformer'; +import { + TransformationProvider, + PROVIDER_METADATA, + isTransformationProvider, +} from '../../domain/value-objects/TransformationProvider'; +import { ApiKey } from '../../domain/value-objects/ApiKey'; +import { PROVIDER_COMPARISON } from '../../shared/constants/providerComparison'; +import { ProviderPricingService } from '../../application/services/ProviderPricingService'; +import { getNonce } from '../../shared/utils/getNonce'; +import { + applyProviderConfiguration, + testOpenAiApiKey, +} from '../setup/providerConfigurationFlow'; +import { TRANSFORMATION_SYSTEM_PROMPT } from '../../infrastructure/transformation/transformationUtils'; + +const ANTHROPIC_MODELS = [ + 'claude-3-5-sonnet-20241022', + 'claude-3-5-haiku-20241022', + 'claude-3-opus-20240229', + 'claude-3-sonnet-20240229', + 'claude-3-haiku-20240307', +]; + +const GOOGLE_MODELS = ['gemini-1.5-pro', 'gemini-1.5-flash', 'gemini-2.0-flash']; + +const SAMPLE_TRANSCRIPTION = + 'I need to refactor the authentication service to use JWT tokens instead of sessions.'; + +export interface ConfigurationWebviewState { + whisperConfigured: boolean; + whisperApiKeyMasked: string; + enablePromptTransformation: boolean; + transformationProvider: TransformationProvider; + providers: Array<{ + id: TransformationProvider; + displayName: string; + description: string; + requiresApiKey: boolean; + defaultModel: string; + }>; + providerComparison: Array<{ + displayName: string; + costPerTransform: string; + speed: string; + privacy: string; + bestFor: string; + isRealTime: boolean; + }>; + model: string; + azureEndpoint: string; + azureDeployment: string; + ollamaBaseUrl: string; + openCodeBaseUrl: string; + providerApiKeyMasked: string; + providerConfigured: boolean; + transformationSystemPrompt: string; +} + +type WebviewToExtensionMessage = + | { type: 'ready' } + | { type: 'requestConfig' } + | { type: 'saveOpenAiApiKey'; apiKey: string } + | { type: 'saveEnableOptimization'; enabled: boolean } + | { type: 'saveProvider'; provider: string } + | { type: 'saveProviderApiKey'; provider: string; apiKey: string } + | { + type: 'saveProviderSettings'; + azureEndpoint?: string; + azureDeployment?: string; + ollamaBaseUrl?: string; + openCodeBaseUrl?: string; + model?: string; + } + | { type: 'getModels'; provider: string } + | { type: 'testWhisper' } + | { type: 'testOptimization' } + | { type: 'saveSystemPrompt'; systemPrompt: string } + | { type: 'resetSystemPrompt' } + | { type: 'openDocs' }; + +function isWebviewMessage(value: unknown): value is WebviewToExtensionMessage { + return typeof value === 'object' && value !== null && 'type' in value; +} + +function maskApiKey(apiKey: string | undefined): string { + if (!apiKey) { + return ''; + } + if (apiKey.length <= 8) { + return '••••••••'; + } + return `${apiKey.slice(0, 3)}...${apiKey.slice(-4)}`; +} + +function getModelForProvider( + config: Awaited>, + provider: TransformationProvider +): string { + switch (provider) { + case TransformationProvider.OpenAI: + return config.transformationModel; + case TransformationProvider.Anthropic: + return config.anthropicModel; + case TransformationProvider.Google: + return config.googleModel; + case TransformationProvider.Azure: + return config.azureDeployment; + case TransformationProvider.Ollama: + return config.ollamaModel; + case TransformationProvider.OpenCode: + return config.openCodeModel; + case TransformationProvider.OpenRouter: + return config.openRouterModel; + case TransformationProvider.Cursor: + return config.cursorModel; + } +} + +export class ConfigurationPanel { + public static currentPanel: ConfigurationPanel | undefined; + private static readonly pricingService = new ProviderPricingService(); + + private readonly _panel: vscode.WebviewPanel; + private readonly _extensionUri: vscode.Uri; + private _disposables: vscode.Disposable[] = []; + + private constructor( + panel: vscode.WebviewPanel, + extensionUri: vscode.Uri, + private readonly context: vscode.ExtensionContext, + private readonly configRepo: IConfigRepository, + private readonly transformerFactory: PromptTransformerFactory, + private readonly modelService: OpenAIModelService, + private readonly promptTransformer: IPromptTransformer, + private readonly logger: ILogger + ) { + this._panel = panel; + this._extensionUri = extensionUri; + + this._panel.onDidDispose(() => this.dispose(), null, this._disposables); + this._panel.webview.onDidReceiveMessage( + message => { + void this._handleMessage(message); + }, + undefined, + this._disposables + ); + + const configChangeDisposable = vscode.workspace.onDidChangeConfiguration(() => { + void this._sendConfigState(); + }); + this._disposables.push(configChangeDisposable); + + this._panel.webview.html = this._getWebviewContent(this._panel.webview); + + // Send initial state after HTML is set to ensure webview receives it + // even if there's a timing issue with the 'ready' message + setTimeout(() => { + this.logger.info('Sending initial config state after HTML setup'); + void this._sendConfigState(); + }, 100); + } + + public static render( + context: vscode.ExtensionContext, + configRepo: IConfigRepository, + transformerFactory: PromptTransformerFactory, + modelService: OpenAIModelService, + promptTransformer: IPromptTransformer, + logger: ILogger + ): void { + const column = vscode.window.activeTextEditor?.viewColumn ?? vscode.ViewColumn.One; + + if (ConfigurationPanel.currentPanel) { + ConfigurationPanel.currentPanel._panel.reveal(column); + void ConfigurationPanel.currentPanel._sendConfigState(); + return; + } + + const panel = vscode.window.createWebviewPanel( + 'promptimizeConfiguration', + 'Promptimize Configuration', + column, + { + enableScripts: true, + retainContextWhenHidden: true, + localResourceRoots: [ + vscode.Uri.joinPath(context.extensionUri, 'out', 'presentation', 'webview'), + ], + } + ); + + ConfigurationPanel.currentPanel = new ConfigurationPanel( + panel, + context.extensionUri, + context, + configRepo, + transformerFactory, + modelService, + promptTransformer, + logger + ); + } + + public dispose(): void { + ConfigurationPanel.currentPanel = undefined; + this._panel.dispose(); + while (this._disposables.length) { + const disposable = this._disposables.pop(); + disposable?.dispose(); + } + } + + private async _handleMessage(message: unknown): Promise { + if (!isWebviewMessage(message)) { + this.logger.warn('Invalid webview message received', { + message: typeof message === 'object' ? JSON.stringify(message) : String(message), + }); + return; + } + + this.logger.debug('Webview message received', { type: message.type }); + + switch (message.type) { + case 'ready': + this.logger.info('Webview ready, sending config state'); + await new Promise(resolve => setTimeout(resolve, 150)); + await this._sendConfigState(); + break; + case 'requestConfig': + this.logger.info('Webview requesting config state'); + await this._sendConfigState(); + break; + case 'saveOpenAiApiKey': + await this._saveOpenAiApiKey(message.apiKey); + break; + case 'saveEnableOptimization': + await this._saveEnableOptimization(message.enabled); + break; + case 'saveProvider': + await this._saveProvider(message.provider); + break; + case 'saveProviderApiKey': + await this._saveProviderApiKey(message.provider, message.apiKey); + break; + case 'saveProviderSettings': + await this._saveProviderSettings(message); + break; + case 'getModels': + await this._loadModels(message.provider); + break; + case 'testWhisper': + await this._testWhisper(); + break; + case 'testOptimization': + await this._testOptimization(); + break; + case 'saveSystemPrompt': + await this._saveSystemPrompt(message.systemPrompt); + break; + case 'resetSystemPrompt': + await this._resetSystemPrompt(); + break; + case 'openDocs': + await vscode.env.openExternal( + vscode.Uri.parse('https://github.com/vypdev/cursor-whisper/tree/master/docs') + ); + break; + } + } + + private async _buildConfigState(): Promise { + const config = await this.configRepo.getConfig(); + const openAiKey = await this.configRepo.getProviderApiKey(TransformationProvider.OpenAI); + const provider = config.transformationProvider; + const metadata = PROVIDER_METADATA[provider]; + const providerKey = metadata.requiresApiKey + ? await this.configRepo.getProviderApiKey(provider) + : 'local'; + + let providerConfigured = true; + if (metadata.requiresApiKey && !providerKey) { + providerConfigured = false; + } + if (provider === TransformationProvider.Azure) { + providerConfigured = + providerConfigured && Boolean(config.azureEndpoint.trim() && config.azureDeployment.trim()); + } + if (provider === TransformationProvider.Ollama) { + providerConfigured = Boolean(config.ollamaBaseUrl.trim() && config.ollamaModel.trim()); + } + if (provider === TransformationProvider.OpenCode) { + providerConfigured = Boolean(config.openCodeBaseUrl.trim() && config.openCodeModel.trim()); + } + + return { + whisperConfigured: Boolean(openAiKey), + whisperApiKeyMasked: maskApiKey(openAiKey), + enablePromptTransformation: config.enablePromptTransformation, + transformationProvider: provider, + providers: Object.values(PROVIDER_METADATA).map(meta => ({ + id: meta.id, + displayName: meta.displayName, + description: meta.description, + requiresApiKey: meta.requiresApiKey, + defaultModel: meta.defaultModel, + })), + providerComparison: await this._getProviderComparisonData(), + model: getModelForProvider(config, provider), + azureEndpoint: config.azureEndpoint, + azureDeployment: config.azureDeployment, + ollamaBaseUrl: config.ollamaBaseUrl, + openCodeBaseUrl: config.openCodeBaseUrl, + providerApiKeyMasked: maskApiKey(typeof providerKey === 'string' ? providerKey : undefined), + providerConfigured, + transformationSystemPrompt: config.transformationSystemPrompt, + }; + } + + private async _getProviderComparisonData(): Promise { + try { + const pricingData = await ConfigurationPanel.pricingService.getProviderComparison(); + return pricingData.map(entry => ({ + displayName: PROVIDER_METADATA[entry.provider].displayName, + costPerTransform: entry.costPerTransform, + speed: entry.speed, + privacy: entry.privacy, + bestFor: entry.bestFor, + isRealTime: entry.isRealTime, + })); + } catch (error) { + this.logger.warn('Failed to fetch provider pricing, using static data', { + error: error instanceof Error ? error.message : String(error), + }); + + return PROVIDER_COMPARISON.map(entry => ({ + displayName: PROVIDER_METADATA[entry.provider].displayName, + costPerTransform: entry.costPerTransform, + speed: entry.speed, + privacy: entry.privacy, + bestFor: entry.bestFor, + isRealTime: false, + })); + } + } + + private async _sendConfigState(): Promise { + const state = await this._buildConfigState(); + this.logger.info('Sending config state to webview', { + whisperConfigured: state.whisperConfigured, + whisperApiKeyMasked: state.whisperApiKeyMasked ? '(set)' : '(empty)', + provider: state.transformationProvider, + optimizationEnabled: state.enablePromptTransformation, + providerConfigured: state.providerConfigured, + model: state.model, + }); + await this._panel.webview.postMessage({ type: 'loadConfig', state }); + this.logger.info('Config state message posted to webview successfully'); + } + + private async _postConfigUpdated(): Promise { + const state = await this._buildConfigState(); + this.logger.info('Sending updated config state to webview', { + whisperConfigured: state.whisperConfigured, + provider: state.transformationProvider, + }); + await this._panel.webview.postMessage({ type: 'configUpdated', state }); + this.logger.info('Updated config state message posted to webview successfully'); + } + + private async _saveOpenAiApiKey(apiKey: string): Promise { + try { + new ApiKey(apiKey); + await this.configRepo.updateConfig({ apiKey }); + await this._postSaveResult(true, 'OpenAI API key saved for Whisper transcription.'); + await this._postConfigUpdated(); + } catch (error) { + const message = error instanceof Error ? error.message : 'Invalid API key'; + await this._postSaveResult(false, message); + } + } + + private async _saveEnableOptimization(enabled: boolean): Promise { + await this.configRepo.updateConfig({ enablePromptTransformation: enabled }); + await this._postSaveResult( + true, + enabled ? 'Prompt optimization enabled.' : 'Prompt optimization disabled.' + ); + await this._postConfigUpdated(); + } + + private async _saveProvider(providerValue: string): Promise { + if (!isTransformationProvider(providerValue)) { + await this._postSaveResult(false, 'Invalid provider selected.'); + return; + } + + const config = await this.configRepo.getConfig(); + const model = getModelForProvider(config, providerValue); + const applied = await applyProviderConfiguration( + providerValue, + model, + this.configRepo, + this.transformerFactory + ); + + if (!applied.success) { + await this._postSaveResult(false, applied.message ?? 'Provider configuration incomplete.'); + } else { + await this._postSaveResult( + true, + `Provider set to ${PROVIDER_METADATA[providerValue].displayName}.` + ); + } + + await this._postConfigUpdated(); + await this._loadModels(providerValue); + } + + private async _saveProviderApiKey(providerValue: string, apiKey: string): Promise { + if (!isTransformationProvider(providerValue)) { + await this._postSaveResult(false, 'Invalid provider.'); + return; + } + + if (providerValue === TransformationProvider.OpenAI) { + try { + new ApiKey(apiKey); + } catch (error) { + await this._postSaveResult(false, error instanceof Error ? error.message : 'Invalid API key'); + return; + } + } + + if (!apiKey.trim()) { + await this._postSaveResult(false, 'API key is required.'); + return; + } + + await this.configRepo.setProviderApiKey(providerValue, apiKey.trim()); + await this._postSaveResult(true, `${PROVIDER_METADATA[providerValue].displayName} API key saved.`); + await this._postConfigUpdated(); + } + + private async _saveProviderSettings( + settings: Extract + ): Promise { + const config = await this.configRepo.getConfig(); + const provider = config.transformationProvider; + const updates: Parameters[0] = {}; + + if (settings.azureEndpoint !== undefined) { + updates.azureEndpoint = settings.azureEndpoint; + } + if (settings.azureDeployment !== undefined) { + updates.azureDeployment = settings.azureDeployment; + } + if (settings.ollamaBaseUrl !== undefined) { + updates.ollamaBaseUrl = settings.ollamaBaseUrl; + } + if (settings.openCodeBaseUrl !== undefined) { + updates.openCodeBaseUrl = settings.openCodeBaseUrl; + } + + if (settings.model) { + switch (provider) { + case TransformationProvider.OpenAI: + updates.transformationModel = settings.model; + break; + case TransformationProvider.Anthropic: + updates.anthropicModel = settings.model; + break; + case TransformationProvider.Google: + updates.googleModel = settings.model; + break; + case TransformationProvider.Azure: + updates.azureDeployment = settings.model; + break; + case TransformationProvider.Ollama: + updates.ollamaModel = settings.model; + break; + case TransformationProvider.OpenCode: + updates.openCodeModel = settings.model; + break; + case TransformationProvider.OpenRouter: + updates.openRouterModel = settings.model; + break; + case TransformationProvider.Cursor: + updates.cursorModel = settings.model; + break; + } + } + + if (Object.keys(updates).length > 0) { + await this.configRepo.updateConfig(updates); + const validationError = await this.transformerFactory.validateProvider(provider); + if (validationError) { + await this._postSaveResult(false, validationError); + } else { + await this._postSaveResult(true, 'Settings saved.'); + } + await this._postConfigUpdated(); + } + } + + private async _loadModels(providerValue: string): Promise { + if (!isTransformationProvider(providerValue)) { + return; + } + + const config = await this.configRepo.getConfig(); + const selectedModel = getModelForProvider(config, providerValue); + + try { + const models = await this._fetchModelsForProvider(providerValue, config); + await this._panel.webview.postMessage({ + type: 'modelsLoaded', + models, + selectedModel: models.includes(selectedModel) ? selectedModel : models[0], + }); + } catch (error) { + const message = error instanceof Error ? error.message : 'Failed to load models'; + await this._panel.webview.postMessage({ + type: 'modelsLoaded', + models: selectedModel ? [selectedModel] : [], + selectedModel, + error: message, + }); + } + } + + private async _fetchModelsForProvider( + provider: TransformationProvider, + config: Awaited> + ): Promise { + switch (provider) { + case TransformationProvider.OpenAI: + return this.modelService.listGptModels(); + case TransformationProvider.Anthropic: + return ANTHROPIC_MODELS; + case TransformationProvider.Google: + return GOOGLE_MODELS; + case TransformationProvider.Azure: + return config.azureDeployment ? [config.azureDeployment] : []; + case TransformationProvider.Ollama: { + const baseUrl = config.ollamaBaseUrl || OllamaPromptTransformer.DEFAULT_BASE_URL; + const available = await OllamaPromptTransformer.isAvailable(baseUrl); + if (!available) { + throw new Error('Ollama server is not reachable. Check the base URL.'); + } + const models = await OllamaPromptTransformer.listModels(baseUrl); + return models.length > 0 ? models : [config.ollamaModel || OllamaPromptTransformer.DEFAULT_MODEL]; + } + case TransformationProvider.OpenCode: { + const baseUrl = config.openCodeBaseUrl || OpenCodePromptTransformer.DEFAULT_BASE_URL; + const apiKey = await this.configRepo.getProviderApiKey(TransformationProvider.OpenCode); + const available = await OpenCodePromptTransformer.isAvailable(baseUrl, apiKey); + if (!available) { + throw new Error('OpenCode proxy is not reachable. Check the base URL.'); + } + const models = await OpenCodePromptTransformer.listModels(baseUrl, apiKey); + return models.length > 0 + ? models + : config.openCodeModel + ? [config.openCodeModel] + : []; + } + case TransformationProvider.OpenRouter: { + const apiKey = await this.configRepo.getProviderApiKey(TransformationProvider.OpenRouter); + if (!apiKey) { + throw new Error('OpenRouter API key is not configured.'); + } + const models = await OpenRouterPromptTransformer.listModels(apiKey); + return models.length > 0 + ? models + : [config.openRouterModel || OpenRouterPromptTransformer.DEFAULT_MODEL]; + } + case TransformationProvider.Cursor: + return [...CURSOR_MODELS]; + default: + return []; + } + } + + private async _testWhisper(): Promise { + const result = await testOpenAiApiKey(this.modelService); + await this._panel.webview.postMessage({ + type: 'testResult', + service: 'whisper', + ok: result.ok, + message: result.ok ? undefined : result.message, + }); + } + + private async _testOptimization(): Promise { + const config = await this.configRepo.getConfig(); + if (!config.enablePromptTransformation) { + await this._panel.webview.postMessage({ + type: 'testResult', + service: 'optimization', + ok: false, + message: 'Prompt optimization is disabled.', + }); + return; + } + + const whisperTest = await testOpenAiApiKey(this.modelService); + if (!whisperTest.ok) { + await this._panel.webview.postMessage({ + type: 'testResult', + service: 'optimization', + ok: false, + message: 'Configure Whisper first: ' + whisperTest.message, + }); + return; + } + + try { + await this.promptTransformer.transform(SAMPLE_TRANSCRIPTION, { + editorLanguage: 'typescript', + projectType: 'Node.js/JavaScript', + }); + await this._panel.webview.postMessage({ + type: 'testResult', + service: 'optimization', + ok: true, + }); + this.logger.info('Optimization test passed from configuration panel'); + } catch (error) { + await this._panel.webview.postMessage({ + type: 'testResult', + service: 'optimization', + ok: false, + message: error instanceof Error ? error.message : 'Unknown error', + }); + } + } + + private async _saveSystemPrompt(systemPrompt: string): Promise { + const trimmed = systemPrompt.trim(); + if (!trimmed) { + await this._postSaveResult(false, 'System prompt cannot be empty.'); + return; + } + + try { + await this.configRepo.updateConfig({ transformationSystemPrompt: trimmed }); + await this._postSaveResult(true, 'System prompt saved.'); + await this._postConfigUpdated(); + } catch (error) { + const message = error instanceof Error ? error.message : 'Failed to save system prompt'; + await this._postSaveResult(false, message); + } + } + + private async _resetSystemPrompt(): Promise { + try { + await this.configRepo.updateConfig({ + transformationSystemPrompt: TRANSFORMATION_SYSTEM_PROMPT, + }); + await this._postSaveResult(true, 'System prompt reset to default.'); + await this._postConfigUpdated(); + } catch (error) { + const message = error instanceof Error ? error.message : 'Failed to reset system prompt'; + await this._postSaveResult(false, message); + } + } + + private async _postSaveResult(ok: boolean, message: string): Promise { + await this._panel.webview.postMessage({ type: 'saveResult', ok, message }); + } + + private _getWebviewContent(webview: vscode.Webview): string { + const nonce = getNonce(); + const htmlPath = path.join( + this.context.extensionPath, + 'out', + 'presentation', + 'webview', + 'configurationWebview.html' + ); + + if (!fs.existsSync(htmlPath)) { + this.logger.error('Configuration webview HTML not found', new Error(htmlPath)); + throw new Error( + 'Configuration webview assets are missing. Run "pnpm run compile" and reload the extension.' + ); + } + + const styleUri = webview.asWebviewUri( + vscode.Uri.joinPath(this._extensionUri, 'out', 'presentation', 'webview', 'configurationWebview.css') + ); + const toolkitUri = webview.asWebviewUri( + vscode.Uri.joinPath(this._extensionUri, 'out', 'presentation', 'webview', 'toolkit.min.js') + ); + + if (!fs.existsSync(path.join(this.context.extensionPath, 'out', 'presentation', 'webview', 'toolkit.min.js'))) { + this.logger.warn( + 'Webview UI toolkit bundle is missing. Run "pnpm run compile" to copy toolkit.min.js.' + ); + } + + let html = fs.readFileSync(htmlPath, 'utf8'); + html = html.replace(/\{\{nonce\}\}/g, nonce); + html = html.replace(/\{\{cspSource\}\}/g, webview.cspSource); + html = html.replace(/\{\{styleUri\}\}/g, styleUri.toString()); + html = html.replace(/\{\{toolkitUri\}\}/g, toolkitUri.toString()); + + this.logger.debug('Loaded configuration webview HTML', { + htmlPath, + styleUri: styleUri.toString(), + toolkitUri: toolkitUri.toString(), + }); + + return html; + } +} diff --git a/src/presentation/webview/configurationWebview.css b/src/presentation/webview/configurationWebview.css new file mode 100644 index 0000000..0d04b36 --- /dev/null +++ b/src/presentation/webview/configurationWebview.css @@ -0,0 +1,161 @@ +body { + margin: 0; + padding: 0; + font-family: var(--vscode-font-family); + font-size: var(--vscode-font-size); + color: var(--vscode-foreground); + background-color: var(--vscode-editor-background); +} + +.page { + max-width: 720px; + margin: 0 auto; + padding: 24px 20px 40px; +} + +.page-header { + margin-bottom: 24px; +} + +.page-header h1 { + margin: 0 0 8px; + font-size: 1.5em; + font-weight: 600; +} + +.page-header p { + margin: 0; + color: var(--vscode-descriptionForeground); + line-height: 1.5; +} + +.section { + margin-bottom: 28px; +} + +.section-header { + display: flex; + align-items: center; + justify-content: space-between; + gap: 12px; + margin-bottom: 12px; +} + +.section-header h2 { + margin: 0; + font-size: 1.1em; + font-weight: 600; +} + +.section-description { + margin: 0 0 16px; + color: var(--vscode-descriptionForeground); + line-height: 1.5; +} + +.field-row { + display: flex; + gap: 12px; + align-items: flex-end; + margin-bottom: 12px; +} + +.field-row vscode-text-field, +.field-row vscode-dropdown { + flex: 1; +} + +.field-stack { + display: flex; + flex-direction: column; + gap: 12px; + margin-bottom: 12px; +} + +.field-stack vscode-text-area { + width: 100%; +} + +.button-row { + display: flex; + gap: 12px; + flex-wrap: wrap; +} + +.status-row { + display: flex; + align-items: center; + gap: 8px; + margin-top: 8px; + min-height: 24px; +} + +.status-text { + color: var(--vscode-descriptionForeground); + font-size: 0.95em; +} + +.status-text.ok { + color: var(--vscode-testing-iconPassed); +} + +.status-text.error { + color: var(--vscode-errorForeground); +} + +.provider-fields { + margin-top: 12px; +} + +.hidden { + display: none !important; +} + +.comparison-table { + width: 100%; + border-collapse: collapse; + font-size: 0.9em; +} + +.comparison-table th, +.comparison-table td { + border: 1px solid var(--vscode-panel-border); + padding: 8px 10px; + text-align: left; + vertical-align: top; +} + +.comparison-table th { + background: var(--vscode-editor-inactiveSelectionBackground); +} + +.realtime-badge { + color: var(--vscode-testing-iconPassed); + font-size: 0.8em; + margin-left: 4px; +} + +.footer-actions { + display: flex; + gap: 12px; + flex-wrap: wrap; + margin-top: 24px; + padding-top: 16px; + border-top: 1px solid var(--vscode-panel-border); +} + +.notification { + margin-bottom: 16px; + padding: 10px 12px; + border-radius: 4px; + border: 1px solid var(--vscode-panel-border); + background: var(--vscode-textBlockQuote-background); +} + +.notification.success { + border-color: var(--vscode-testing-iconPassed); +} + +.notification.error { + border-color: var(--vscode-errorForeground); +} diff --git a/src/presentation/webview/configurationWebview.html b/src/presentation/webview/configurationWebview.html new file mode 100644 index 0000000..c824f80 --- /dev/null +++ b/src/presentation/webview/configurationWebview.html @@ -0,0 +1,820 @@ + + + + + + + + + Promptimize Configuration + + +
+ + + + +
+
+

Transcription (Required)

+ Not configured +
+

+ Voice-to-text always uses OpenAI Whisper. An OpenAI API key is required to record. +

+
+ + OpenAI API Key + + Test +
+
+ Enter your OpenAI API key for Whisper transcription. +
+
+ + + +
+
+

Prompt Optimization (Optional)

+ Disabled +
+

+ Turn transcribed speech into structured, LLM-ready prompts. Uses a separate provider from Whisper. +

+ + Enable prompt optimization + + +
+ + + +
+
+

Transformation System Prompt

+
+

+ Customize how the AI transforms your voice transcriptions. This prompt instructs the AI on formatting, structure, and style. +

+ +
+ + System Prompt + +
+ Reset to default + Save prompt +
+
+ +
+ Modify the prompt to customize transformation behavior. +
+
+ + + +
+
+

Provider Comparison

+
+

+ Estimated cost per prompt optimization (~500 input + 200 output tokens). Cloud provider prices update daily when online. +

+
+ + + + + + + + + + + +
ProviderCostSpeedPrivacyBest for
+
+
+ + +
+ + + + diff --git a/src/shared/constants/providerComparison.ts b/src/shared/constants/providerComparison.ts new file mode 100644 index 0000000..151e9a8 --- /dev/null +++ b/src/shared/constants/providerComparison.ts @@ -0,0 +1,119 @@ +import { TransformationProvider } from '../../domain/value-objects/TransformationProvider'; + +/** + * Provider cost comparison static fallback data. + * + * PRICING STRATEGY: + * + * 1. Primary source: token-costs npm package + * - Fetches pricing from https://mikkotikkanen.github.io/token-costs/ + * - Daily updates at 00:01 UTC + * - Covers: OpenAI, Anthropic, Google, OpenRouter + * - 0 runtime dependencies, automatic caching in ProviderPricingService + * + * 2. Fallback: This static data + * - Used when token-costs fetch fails (no internet, timeout, service down) + * - Used for local/custom providers (Ollama, OpenCode, Azure, Cursor) + * - Manually updated when major pricing changes occur + * + * ARCHITECTURE: + * - ProviderPricingService attempts token-costs fetch with 3s timeout + * - On success: Uses calculated per-transform cost (~500 input + 200 output tokens) + * - On failure: Falls back to these static values + * - Cache TTL: 1 hour + * + * Last manual update: 2026-05-24 + * Sources: openai.com/api/pricing, anthropic.com/pricing, ai.google.dev/pricing + */ +export interface ProviderComparisonEntry { + provider: TransformationProvider; + costPerTransform: string; + speed: string; + privacy: string; + quality: string; + bestFor: string; + apiKeyUrl?: string; +} + +export const WHISPER_COST_NOTE = '~$0.006/min (always OpenAI Whisper)'; + +export const PROVIDER_COMPARISON: ProviderComparisonEntry[] = [ + { + provider: TransformationProvider.OpenAI, + costPerTransform: '~$0.01', + speed: 'Fast', + privacy: 'Cloud', + quality: 'High', + bestFor: 'General use; reuse the same OpenAI key as Whisper', + apiKeyUrl: 'https://platform.openai.com/api-keys', + }, + { + provider: TransformationProvider.Anthropic, + costPerTransform: '~$0.01–0.02', + speed: 'Fast', + privacy: 'Cloud', + quality: 'Very High', + bestFor: 'Complex reasoning and instruction following', + apiKeyUrl: 'https://console.anthropic.com/', + }, + { + provider: TransformationProvider.Google, + costPerTransform: '~$0.001', + speed: 'Very Fast', + privacy: 'Cloud', + quality: 'Good', + bestFor: 'Cost-sensitive or high-volume usage', + apiKeyUrl: 'https://aistudio.google.com/app/apikey', + }, + { + provider: TransformationProvider.Azure, + costPerTransform: 'Varies', + speed: 'Fast', + privacy: 'Private Cloud', + quality: 'High', + bestFor: 'Enterprise Azure OpenAI deployments', + apiKeyUrl: 'https://portal.azure.com/', + }, + { + provider: TransformationProvider.Ollama, + costPerTransform: 'Free (local compute)', + speed: 'Medium', + privacy: 'Local', + quality: 'Good', + bestFor: 'Privacy-first or offline use (no API key)', + }, + { + provider: TransformationProvider.OpenCode, + costPerTransform: 'Free (local compute)', + speed: 'Medium', + privacy: 'Local', + quality: 'High', + bestFor: 'Reuse OpenCode provider setup (Anthropic, OpenAI, Ollama, etc.)', + }, + { + provider: TransformationProvider.OpenRouter, + costPerTransform: 'Varies by model', + speed: 'Fast', + privacy: 'Cloud', + quality: 'High', + bestFor: 'Access 200+ models with one API key', + apiKeyUrl: 'https://openrouter.ai/settings/keys', + }, + { + provider: TransformationProvider.Cursor, + costPerTransform: '~$0.01', + speed: 'Fast', + privacy: 'Cloud', + quality: 'High', + bestFor: 'Access Cursor Composer and frontier models with one API key', + apiKeyUrl: 'https://cursor.com/dashboard/integrations', + }, +]; + +export function formatProviderComparisonSummary(): string { + const header = `Whisper transcription: ${WHISPER_COST_NOTE}\n\nPrompt optimization providers:`; + const rows = PROVIDER_COMPARISON.map( + entry => `• ${entry.provider}: ${entry.costPerTransform}/transform — ${entry.bestFor}` + ); + return `${header}\n${rows.join('\n')}`; +} diff --git a/src/shared/constants/uxMessages.ts b/src/shared/constants/uxMessages.ts new file mode 100644 index 0000000..adf7e6d --- /dev/null +++ b/src/shared/constants/uxMessages.ts @@ -0,0 +1,49 @@ +/** User-facing copy that separates Whisper transcription from prompt optimization. */ + +export const WHISPER_SERVICE_NAME = 'OpenAI Whisper'; +export const WHISPER_SERVICE_DESCRIPTION = + 'Voice-to-text transcription always uses OpenAI Whisper and requires an OpenAI API key.'; + +export const OPTIMIZATION_SERVICE_DESCRIPTION = + 'Prompt optimization is optional. Choose a provider and supply its API key or credentials when required.'; + +export const OPENAI_API_KEY_PROMPT = + 'Enter your OpenAI API key for Whisper voice-to-text transcription.'; + +export const OPENAI_API_KEY_DETAIL = + 'If you choose OpenAI for prompt optimization, the same key can be reused. Get a key at https://platform.openai.com/api-keys'; + +export const OPENAI_API_KEY_SUCCESS = + 'OpenAI API key saved. Whisper transcription is ready. You can reuse this key for OpenAI prompt optimization.'; + +export const OPENAI_API_KEY_REQUIRED_RECORDING = + 'OpenAI API key is required for voice-to-text transcription (Whisper). Prompt optimization uses a separate provider you can configure later.'; + +export const OPENAI_API_KEY_REQUIRED_STARTUP = + 'Promptimize: OpenAI API key is required for voice-to-text transcription (Whisper). Run setup to configure your key.'; + +export const OPTIMIZATION_PROVIDER_INTRO = + 'Prompt optimization turns transcribed speech into structured, LLM-ready prompts. This step is separate from Whisper transcription.'; + +export const OPTIMIZATION_PROVIDER_INTRO_DETAIL = + 'Transcription always uses OpenAI Whisper. Select a provider below and provide its API key or credentials when prompted.'; + +export const OPTIMIZATION_PROVIDER_MISSING_KEY = (providerName: string): string => + `Promptimize: ${providerName} credentials are not configured for prompt optimization.`; + +export const STATUS_BAR_SERVICES_TOOLTIP = ( + optimizationProvider: string, + optimizationEnabled: boolean +): string => { + const optimizationLine = optimizationEnabled + ? `Optimization: ${optimizationProvider}` + : 'Optimization: disabled (raw transcription only)'; + return `Transcription: ${WHISPER_SERVICE_NAME}\n${optimizationLine}\n\nClick to start recording (Cmd/Ctrl+Alt+V)\nRun "Promptimize: Open Configuration" to change settings`; +}; + +export const SETUP_CHECKLIST_TOOLTIP = ( + items: Array<{ label: string; complete: boolean }> +): string => { + const lines = items.map(item => `${item.complete ? '✓' : '✗'} ${item.label}`); + return `Setup checklist:\n${lines.join('\n')}\n\nClick to open configuration`; +}; diff --git a/src/shared/services/RecordingSessionMode.ts b/src/shared/services/RecordingSessionMode.ts new file mode 100644 index 0000000..b316144 --- /dev/null +++ b/src/shared/services/RecordingSessionMode.ts @@ -0,0 +1,11 @@ +export type RecordingSessionMode = 'transcribe' | 'promptimize'; + +let currentMode: RecordingSessionMode | null = null; + +export function setRecordingSessionMode(mode: RecordingSessionMode | null): void { + currentMode = mode; +} + +export function getRecordingSessionMode(): RecordingSessionMode | null { + return currentMode; +} diff --git a/src/shared/utils/getNonce.ts b/src/shared/utils/getNonce.ts new file mode 100644 index 0000000..a8b974c --- /dev/null +++ b/src/shared/utils/getNonce.ts @@ -0,0 +1,11 @@ +/** + * Generates a cryptographically random nonce for webview Content Security Policy. + */ +export function getNonce(): string { + let text = ''; + const possible = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'; + for (let i = 0; i < 32; i++) { + text += possible.charAt(Math.floor(Math.random() * possible.length)); + } + return text; +} diff --git a/webpack.config.js b/webpack.config.js index 33bdba2..d28b447 100644 --- a/webpack.config.js +++ b/webpack.config.js @@ -12,10 +12,14 @@ module.exports = { }, externals: { vscode: 'commonjs vscode', - '@kstonekuan/audio-capture': 'commonjs @kstonekuan/audio-capture' + '@kstonekuan/audio-capture': 'commonjs @kstonekuan/audio-capture', + '@cursor/sdk': 'commonjs @cursor/sdk', }, resolve: { - extensions: ['.ts', '.js'] + extensions: ['.ts', '.js'], + alias: { + 'token-costs': path.resolve(__dirname, 'node_modules/token-costs/dist/npm/index.js'), + }, }, module: { rules: [