diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml index e602318..cf85322 100644 --- a/.github/workflows/ci-cd.yml +++ b/.github/workflows/ci-cd.yml @@ -34,7 +34,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ["3.12", "3.13", "3.14"] + python-version: ["3.12", "3.13"] steps: - uses: actions/checkout@ff7abcd0c3c05ccf6adc123a8cd1fd4fb30fb493 - name: Set up Python ${{ matrix.python-version }} diff --git a/.gitignore b/.gitignore index fe71e5e..d059968 100644 --- a/.gitignore +++ b/.gitignore @@ -146,3 +146,4 @@ cython_debug/ # Runtime Logs logs/ +optimized_manifest.json diff --git a/README.md b/README.md index d1e95ba..d601cf2 100644 --- a/README.md +++ b/README.md @@ -1,42 +1,69 @@ # coreason-optimizer -coreason-optimizer - -[![CI/CD](https://github.com/CoReason-AI/coreason_optimizer/actions/workflows/ci-cd.yml/badge.svg)](https://github.com/CoReason-AI/coreason_optimizer/actions/workflows/ci-cd.yml) -[![PyPI](https://img.shields.io/pypi/v/coreason_optimizer.svg)](https://pypi.org/project/coreason_optimizer/) -[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/coreason_optimizer.svg)](https://pypi.org/project/coreason_optimizer/) -[![License](https://img.shields.io/github/license/CoReason-AI/coreason_optimizer)](https://github.com/CoReason-AI/coreason_optimizer/blob/main/LICENSE) -[![Codecov](https://codecov.io/gh/CoReason-AI/coreason_optimizer/branch/main/graph/badge.svg)](https://codecov.io/gh/CoReason-AI/coreason_optimizer) -[![Downloads](https://static.pepy.tech/badge/coreason_optimizer)](https://pepy.tech/project/coreason_optimizer) -[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) -[![Pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit) - -## Getting Started - -### Prerequisites - -- Python 3.12+ -- Poetry - -### Installation - -1. Clone the repository: - ```sh - git clone https://github.com/CoReason-AI/coreason_optimizer.git - cd coreason_optimizer - ``` -2. Install dependencies: - ```sh - poetry install - ``` - -### Usage - -- Run the linter: - ```sh - poetry run pre-commit run --all-files - ``` -- Run the tests: - ```sh - poetry run pytest - ``` +**Automated Prompt Engineering / LLM Compilation / DSPy Integration for CoReason-AI** + +[![License: Prosperity 3.0](https://img.shields.io/badge/license-Prosperity%203.0-blue)](https://prosperitylicense.com/versions/3.0.0) +[![CI Status](https://github.com/CoReason-AI/coreason-optimizer/actions/workflows/main.yml/badge.svg)](https://github.com/CoReason-AI/coreason-optimizer/actions) +[![Code Style: Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff) +[![Documentation](https://img.shields.io/badge/docs-product_requirements-blue)](docs/product_requirements.md) + +**coreason-optimizer** is the "Compiler" for the CoReason Agentic Platform. It automates prompt engineering by treating prompts as trainable weights, optimizing them against ground-truth datasets to maximize performance metrics. + +--- + +## Installation + +```bash +pip install coreason-optimizer +``` + +## Features + +- **Automated Optimization:** Rewrites instructions and selects examples to maximize a score, not human intuition. +- **Model-Specific Compilation:** Generates optimized prompts specifically tuned for target models (e.g., GPT-4, Claude 3.5). +- **Continuous Learning:** Re-runs optimization on recent logs to patch prompts against data drift. +- **Mutate-Evaluate Loop:** Systematic cycle of drafting, evaluating, diagnosing, mutating, and selecting prompts. +- **Strategies:** Includes BootstrapFewShot (mining successful traces) and MIPRO (Multi-prompt Instruction PRoposal Optimizer). +- **Integration:** Works seamlessly with `coreason-construct`, `coreason-archive`, and `coreason-assay`. + +For full product requirements, see [docs/product_requirements.md](docs/product_requirements.md). + +## Usage + +Here is how to initialize and use the library to compile an agent: + +```python +from coreason_optimizer import OptimizerConfig, PromptOptimizer +from coreason_optimizer.core.interfaces import Construct +from coreason_optimizer.data import Dataset + +# 1. Configuration +config = OptimizerConfig( + target_model="gpt-4o", + metric="exact_match", + max_rounds=10 +) + +# 2. Load Data +dataset = Dataset.from_csv("data/gold_set.csv") +train_set, val_set = dataset.split(test_size=0.2) + +# 3. Load Agent (Construct) +# In a real scenario, this would be imported from your agent code +# from src.agents.analyst import analyst_agent +class MockAgent(Construct): + inputs = ["question"] + outputs = ["answer"] + system_prompt = "You are a helpful assistant." +agent = MockAgent() + +# 4. Compile +optimizer = PromptOptimizer(config=config) +optimized_manifest = optimizer.compile( + agent=agent, + trainset=train_set, + valset=val_set +) + +print(f"Optimization complete. New Score: {optimized_manifest.performance_metric}") +print(f"Optimized Instruction: {optimized_manifest.optimized_instruction}") diff --git a/VIGNETTE.md b/VIGNETTE.md new file mode 100644 index 0000000..e55bd8b --- /dev/null +++ b/VIGNETTE.md @@ -0,0 +1,69 @@ +# The Architecture and Utility of coreason-optimizer + +## 1. The Philosophy (The Why) + +The prevailing method of interacting with Large Language Models (LLMs)—manual "prompt engineering"—is an exercise in frustration. It is artisan work: fragile, unscalable, and often relying on "magic words" that break when models update. The author of `coreason-optimizer` recognizes that prompts are not merely text; they are **trainable parameters** of a software system. + +This package exists to replace intuition with optimization. Instead of a developer guessing which few-shot examples might help, `coreason-optimizer` empirically selects them. Instead of rewriting instructions hoping for better JSON compliance, it uses a meta-learner to rewrite them for you. It shifts the paradigm from "Prompt Whisperer" to "Prompt Compiler," treating the agent definition as source code and the deployed prompt as a compiled, frozen binary. + +## 2. Under the Hood (The Dependencies & Logic) + +The engine runs on a focused stack designed for iterative evaluation: + +* **Pydantic** enforces the rigorous schema definitions (`OptimizerConfig`, `OptimizedManifest`) required for a compiler that must output deterministic artifacts. +* **OpenAI** & **Numpy/Scikit-Learn** power the semantic search and generation capabilities. The package doesn't just call LLMs; it uses embeddings to find "nearest neighbor" successful examples to inject into prompts (`SemanticSelector`). +* **Loguru** provides the observability backbone. When an optimization run takes 4 hours and spends $10, you need structured, searchable logs to understand *why* a specific mutation was rejected. +* **Click** exposes the compiler interface to CI/CD pipelines, allowing optimization to be a step in the build process, not a manual task. + +The core logic revolves around the **Mutate-Evaluate Loop**. Inspired by DSPy, the `MiproOptimizer` (Multi-prompt Instruction PRoposal Optimizer) generates candidate instructions using a "Teacher" model. Simultaneously, it selects sets of few-shot examples. It then performs a grid search across these combinations, scoring them against a ground-truth dataset using a defined `Metric` (like `exact_match`). The result is not just a better prompt, but a mathematically optimal one for that specific dataset and model. + +## 3. In Practice (The How) + +Here is how `coreason-optimizer` transforms a raw agent definition into a deployed artifact. + +### Compiling an Agent + +The `compile` method is the heart of the system. It takes your agent logic and training data, runs the optimization strategies (like BootstrapFewShot or MIPRO), and returns a frozen manifest. + +```python +from coreason_optimizer.core.config import OptimizerConfig +from coreason_optimizer.strategies.mipro import MiproOptimizer +from coreason_optimizer.core.metrics import MetricFactory + +# 1. Configuration: Define the target environment +config = OptimizerConfig( + target_model="gpt-4o", + budget_limit_usd=5.00, # Safety first + max_rounds=10, +) + +# 2. Instantiate the Optimizer with a specific Metric +# "exact_match" ensures the output strictly adheres to the reference +optimizer = MiproOptimizer( + llm_client=client, metric=MetricFactory.get("exact_match"), config=config +) + +# 3. The Compilation Step +# This runs the "Mutate-Evaluate" loop, finding the best instruction/example pair +manifest = optimizer.compile( + agent=my_agent_construct, + trainset=training_examples, + valset=validation_examples, +) + +print(f"Optimization improved score to: {manifest.performance_metric}") +``` + +### The Optimized Artifact + +The output is a portable JSON manifest. This file allows the runtime to execute the optimized agent without needing the optimizer or the training data again. + +```python +# The manifest contains the "compiled" prompt logic +print(manifest.optimized_instruction) +# > "Extract adverse events from the text. Format as JSON. [Optimized Instructions...]" + +# It also holds the mathematically selected few-shot examples +for example in manifest.few_shot_examples: + print(f"Input: {example.inputs} -> Output: {example.reference}") +``` diff --git a/docs/product_requirements.md b/docs/product_requirements.md new file mode 100644 index 0000000..e50d824 --- /dev/null +++ b/docs/product_requirements.md @@ -0,0 +1,129 @@ +# Product Requirements Document: coreason-optimizer + +**Domain:** Automated Prompt Engineering / LLM Compilation / DSPy Integration +**Package Name:** coreason-optimizer + +--- + +## 1. Executive Summary + +**coreason-optimizer** is the "Compiler" for the CoReason Agentic Platform. + +In the current SOTA (State-of-the-Art), writing static prompts by hand is considered technical debt. **coreason-optimizer** automates this by treating prompts (instructions and few-shot examples) as **trainable weights**. It ingests a "Draft Agent" defined in `coreason-construct` and iterates on it against a ground-truth dataset (validated by `coreason-assay`), mathematically maximizing performance metrics. It outputs a "Frozen Manifest" that is deployed to production, ensuring GxP stability. + +## 2. Problem Statement & Rationale + +| Problem | Impact | The coreason-optimizer Solution | +| :---- | :---- | :---- | +| **The "Prompt Whisperer" Bottleneck** | Engineers spend hours tweaking words ("Please be careful") with unpredictable results. | **Automated Optimization:** A meta-algorithm rewrites instructions and selects examples to maximize a score, not human intuition. | +| **Brittleness** | A prompt that works for GPT-4 often fails for Claude 3.5 or Llama 3. | **Model-Specific Compilation:** The optimizer can run separate jobs to generate optimized prompts specifically tuned for the target model. | +| **Drift** | Agents degrade over time as data distributions change (e.g., new medical slang). | **Continuous Learning:** Re-running the optimizer on recent "Gold" logs from `coreason-archive` automatically patches the prompt. | + +## 3. Architectural Design + +### 3.1 The "Mutate-Evaluate" Loop + +The package implements a systematic optimization cycle (inspired by DSPy): + +1. **Draft:** Start with the developer's base intention. +2. **Evaluate:** Run the agent on a training set. +3. **Diagnose:** Identify failing examples using `coreason-assay` metrics. +4. **Mutate:** + * **Bootstrap Few-Shot:** Find historical examples where the agent *succeeded* on similar hard cases and inject them into the prompt. + * **Instruction Induction:** Use a Meta-LLM to rewrite the System Prompt to explicitly address the observed failures. +5. **Select:** Keep the mutation that yields the highest metric score. + +### 3.2 Integration Map + +* **Input (Schema):** `coreason-construct` defines the Agent structure (Inputs/Outputs). +* **Input (Data):** `coreason-archive` provides historical logs to mine for training examples. +* **Feedback (Loss Function):** `coreason-assay` provides the scoring function (e.g., accuracy, json_validity, f1_score). +* **Output (Artifact):** Produces a versioned `OptimizedManifest.json` used by the runtime. + +## 4. Functional Specifications + +### 4.1 The Optimization Engine + +* **Strategy: BootstrapFewShot:** + * Automatically mines the "Teacher" model's successful traces to create few-shot examples for the "Student" prompt. +* **Strategy: MIPRO (Multi-prompt Instruction PRoposal Optimizer):** + * Generates 10 candidates for the System Instruction and 5 combinations of Few-Shot examples, finding the optimal pair via Bayesian optimization or simple grid search. +* **Cost Awareness:** + * Must implement a `BudgetManager` to halt optimization if the token spend exceeds a defined limit (e.g., $10.00). + +### 4.2 Data Management + +* **Dataset Loader:** Standardizes inputs from CSV, JSONL, or `coreason-archive` SQL queries into a `TrainingExample` object. +* **Splitter:** automatically creates Train/Dev/Test splits to prevent overfitting the prompt to the training data. + +### 4.3 The Manifest Serializer + +* The output must be deterministic and immutable. +* **Schema:** + ```json + { + "agent_id": "adverse_event_extractor", + "base_model": "gpt-4o", + "optimized_instruction": "Extract adverse events... [Modified by Optimizer]", + "few_shot_examples": [ ... ], + "performance_metric": "0.94", + "optimization_run_id": "opt_20250119_xyz" + } + ``` + +## 5. Technical Specifications (API) + +### 5.1 The Interface + +```python +class OptimizerConfig(BaseModel): + target_model: str = "gpt-4o" + metric: str = "exact_match" + max_bootstrapped_demos: int = 4 + max_rounds: int = 10 + +class PromptOptimizer(ABC): + @abstractmethod + def compile(self, + agent: Construct, + trainset: List[Example], + valset: List[Example]) -> OptimizedManifest: + """Run the optimization loop.""" + pass +``` + +### 5.2 The CLI (coreason-opt) + +The package should expose a command-line interface for CI/CD integration: + +* `coreason-opt tune --agent src/agents/analyst.py --dataset data/gold_set.csv` +* `coreason-opt evaluate --manifest dist/analyst_v2.json --dataset data/test_set.csv` + +## 6. Implementation Plan: Atomic Units of Change (AUC) + +### Phase 1: Foundation + +* **AUC-1: Scaffold & Configuration:** Project structure, `pyproject.toml`, and `OptimizerConfig` Pydantic models. +* **AUC-2: Abstract Base Classes:** Define `BaseOptimizer`, `BaseSelector` (for examples), and `BaseMutator` (for instructions). + +### Phase 2: Data & Metrics + +* **AUC-3: Dataset Loader:** Implement `Dataset` class that handles loading/splitting from CSV and `coreason-archive`. +* **AUC-4: Metric Adapter:** Create a wrapper that adapts `coreason-assay` functions into the format required by the optimization loop. + +### Phase 3: The Strategies + +* **AUC-5: Few-Shot Selector:** Implement logic to select examples using Semantic Similarity (via `coreason-foundry` embeddings) or Random Sampling. +* **AUC-6: Bootstrap Logic:** Implement the "Teacher-Student" loop where the model generates its own training data from input questions. +* **AUC-7: Instruction Mutator:** Implement the Meta-Prompt that analyzes failures and rewrites the system prompt. + +### Phase 4: The Loop & Artifacts + +* **AUC-8: The Compile Loop:** Connect the Mutators and Selectors into the main `compile()` orchestration method. +* **AUC-9: Manifest Serializer:** Logic to dump the final state to JSON. +* **AUC-10: CLI Entrypoint:** Build the `coreason-opt` command line tool. + +## 7. Compliance & Safety + +* **Audit Trail:** Every optimization run must log the `trace_id` of the experiments to `coreason-veritas`. We must be able to explain *why* the prompt changed. +* **Human-in-the-Loop Gate:** The `OptimizedManifest` is not automatically deployed. It is saved as a "Candidate" that requires a human to review the score improvement before promotion to production. diff --git a/poetry.lock b/poetry.lock index 9a5008e..128fbe0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,48 @@ # This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +[[package]] +name = "aiofiles" +version = "25.1.0" +description = "File support for asyncio." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695"}, + {file = "aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2"}, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +description = "Reusable constraint types to use with typing.Annotated" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, + {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, +] + +[[package]] +name = "anyio" +version = "4.12.1" +description = "High-level concurrency and networking framework on top of asyncio or Trio" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c"}, + {file = "anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703"}, +] + +[package.dependencies] +idna = ">=2.8" +typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} + +[package.extras] +trio = ["trio (>=0.31.0) ; python_version < \"3.10\"", "trio (>=0.32.0) ; python_version >= \"3.10\""] + [[package]] name = "babel" version = "2.17.0" @@ -41,7 +84,7 @@ version = "2026.1.4" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.7" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c"}, {file = "certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120"}, @@ -188,7 +231,7 @@ version = "8.3.1" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.10" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6"}, {file = "click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a"}, @@ -208,7 +251,7 @@ files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] -markers = {main = "sys_platform == \"win32\""} +markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\""} [[package]] name = "coverage" @@ -327,6 +370,18 @@ files = [ {file = "distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d"}, ] +[[package]] +name = "distro" +version = "1.9.0" +description = "Distro - an OS platform information API" +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, + {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, +] + [[package]] name = "filelock" version = "3.20.3" @@ -357,6 +412,65 @@ python-dateutil = ">=2.8.1" [package.extras] dev = ["flake8", "markdown", "twine", "wheel"] +[[package]] +name = "h11" +version = "0.16.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86"}, + {file = "h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1"}, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +description = "A minimal low-level HTTP client." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55"}, + {file = "httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8"}, +] + +[package.dependencies] +certifi = "*" +h11 = ">=0.16" + +[package.extras] +asyncio = ["anyio (>=4.0,<5.0)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +trio = ["trio (>=0.22.0,<1.0)"] + +[[package]] +name = "httpx" +version = "0.28.1" +description = "The next generation HTTP client." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad"}, + {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"}, +] + +[package.dependencies] +anyio = "*" +certifi = "*" +httpcore = "==1.*" +idna = "*" + +[package.extras] +brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +zstd = ["zstandard (>=0.18.0)"] + [[package]] name = "identify" version = "2.6.16" @@ -378,7 +492,7 @@ version = "3.11" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.8" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea"}, {file = "idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902"}, @@ -405,7 +519,7 @@ version = "3.1.6" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, @@ -417,6 +531,130 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "jiter" +version = "0.12.0" +description = "Fast iterable JSON parser." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "jiter-0.12.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:e7acbaba9703d5de82a2c98ae6a0f59ab9770ab5af5fa35e43a303aee962cf65"}, + {file = "jiter-0.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:364f1a7294c91281260364222f535bc427f56d4de1d8ffd718162d21fbbd602e"}, + {file = "jiter-0.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85ee4d25805d4fb23f0a5167a962ef8e002dbfb29c0989378488e32cf2744b62"}, + {file = "jiter-0.12.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:796f466b7942107eb889c08433b6e31b9a7ed31daceaecf8af1be26fb26c0ca8"}, + {file = "jiter-0.12.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:35506cb71f47dba416694e67af996bbdefb8e3608f1f78799c2e1f9058b01ceb"}, + {file = "jiter-0.12.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:726c764a90c9218ec9e4f99a33d6bf5ec169163f2ca0fc21b654e88c2abc0abc"}, + {file = "jiter-0.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa47810c5565274810b726b0dc86d18dce5fd17b190ebdc3890851d7b2a0e74"}, + {file = "jiter-0.12.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8ec0259d3f26c62aed4d73b198c53e316ae11f0f69c8fbe6682c6dcfa0fcce2"}, + {file = "jiter-0.12.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:79307d74ea83465b0152fa23e5e297149506435535282f979f18b9033c0bb025"}, + {file = "jiter-0.12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cf6e6dd18927121fec86739f1a8906944703941d000f0639f3eb6281cc601dca"}, + {file = "jiter-0.12.0-cp310-cp310-win32.whl", hash = "sha256:b6ae2aec8217327d872cbfb2c1694489057b9433afce447955763e6ab015b4c4"}, + {file = "jiter-0.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:c7f49ce90a71e44f7e1aa9e7ec415b9686bbc6a5961e57eab511015e6759bc11"}, + {file = "jiter-0.12.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d8f8a7e317190b2c2d60eb2e8aa835270b008139562d70fe732e1c0020ec53c9"}, + {file = "jiter-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2218228a077e784c6c8f1a8e5d6b8cb1dea62ce25811c356364848554b2056cd"}, + {file = "jiter-0.12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9354ccaa2982bf2188fd5f57f79f800ef622ec67beb8329903abf6b10da7d423"}, + {file = "jiter-0.12.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8f2607185ea89b4af9a604d4c7ec40e45d3ad03ee66998b031134bc510232bb7"}, + {file = "jiter-0.12.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3a585a5e42d25f2e71db5f10b171f5e5ea641d3aa44f7df745aa965606111cc2"}, + {file = "jiter-0.12.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd9e21d34edff5a663c631f850edcb786719c960ce887a5661e9c828a53a95d9"}, + {file = "jiter-0.12.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a612534770470686cd5431478dc5a1b660eceb410abade6b1b74e320ca98de6"}, + {file = "jiter-0.12.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3985aea37d40a908f887b34d05111e0aae822943796ebf8338877fee2ab67725"}, + {file = "jiter-0.12.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b1207af186495f48f72529f8d86671903c8c10127cac6381b11dddc4aaa52df6"}, + {file = "jiter-0.12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ef2fb241de583934c9915a33120ecc06d94aa3381a134570f59eed784e87001e"}, + {file = "jiter-0.12.0-cp311-cp311-win32.whl", hash = "sha256:453b6035672fecce8007465896a25b28a6b59cfe8fbc974b2563a92f5a92a67c"}, + {file = "jiter-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:ca264b9603973c2ad9435c71a8ec8b49f8f715ab5ba421c85a51cde9887e421f"}, + {file = "jiter-0.12.0-cp311-cp311-win_arm64.whl", hash = "sha256:cb00ef392e7d684f2754598c02c409f376ddcef857aae796d559e6cacc2d78a5"}, + {file = "jiter-0.12.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:305e061fa82f4680607a775b2e8e0bcb071cd2205ac38e6ef48c8dd5ebe1cf37"}, + {file = "jiter-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5c1860627048e302a528333c9307c818c547f214d8659b0705d2195e1a94b274"}, + {file = "jiter-0.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df37577a4f8408f7e0ec3205d2a8f87672af8f17008358063a4d6425b6081ce3"}, + {file = "jiter-0.12.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:75fdd787356c1c13a4f40b43c2156276ef7a71eb487d98472476476d803fb2cf"}, + {file = "jiter-0.12.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1eb5db8d9c65b112aacf14fcd0faae9913d07a8afea5ed06ccdd12b724e966a1"}, + {file = "jiter-0.12.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:73c568cc27c473f82480abc15d1301adf333a7ea4f2e813d6a2c7d8b6ba8d0df"}, + {file = "jiter-0.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4321e8a3d868919bcb1abb1db550d41f2b5b326f72df29e53b2df8b006eb9403"}, + {file = "jiter-0.12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a51bad79f8cc9cac2b4b705039f814049142e0050f30d91695a2d9a6611f126"}, + {file = "jiter-0.12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2a67b678f6a5f1dd6c36d642d7db83e456bc8b104788262aaefc11a22339f5a9"}, + {file = "jiter-0.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efe1a211fe1fd14762adea941e3cfd6c611a136e28da6c39272dbb7a1bbe6a86"}, + {file = "jiter-0.12.0-cp312-cp312-win32.whl", hash = "sha256:d779d97c834b4278276ec703dc3fc1735fca50af63eb7262f05bdb4e62203d44"}, + {file = "jiter-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e8269062060212b373316fe69236096aaf4c49022d267c6736eebd66bbbc60bb"}, + {file = "jiter-0.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:06cb970936c65de926d648af0ed3d21857f026b1cf5525cb2947aa5e01e05789"}, + {file = "jiter-0.12.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:6cc49d5130a14b732e0612bc76ae8db3b49898732223ef8b7599aa8d9810683e"}, + {file = "jiter-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:37f27a32ce36364d2fa4f7fdc507279db604d27d239ea2e044c8f148410defe1"}, + {file = "jiter-0.12.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbc0944aa3d4b4773e348cda635252824a78f4ba44328e042ef1ff3f6080d1cf"}, + {file = "jiter-0.12.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:da25c62d4ee1ffbacb97fac6dfe4dcd6759ebdc9015991e92a6eae5816287f44"}, + {file = "jiter-0.12.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:048485c654b838140b007390b8182ba9774621103bd4d77c9c3f6f117474ba45"}, + {file = "jiter-0.12.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:635e737fbb7315bef0037c19b88b799143d2d7d3507e61a76751025226b3ac87"}, + {file = "jiter-0.12.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e017c417b1ebda911bd13b1e40612704b1f5420e30695112efdbed8a4b389ed"}, + {file = "jiter-0.12.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:89b0bfb8b2bf2351fba36bb211ef8bfceba73ef58e7f0c68fb67b5a2795ca2f9"}, + {file = "jiter-0.12.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:f5aa5427a629a824a543672778c9ce0c5e556550d1569bb6ea28a85015287626"}, + {file = "jiter-0.12.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed53b3d6acbcb0fd0b90f20c7cb3b24c357fe82a3518934d4edfa8c6898e498c"}, + {file = "jiter-0.12.0-cp313-cp313-win32.whl", hash = "sha256:4747de73d6b8c78f2e253a2787930f4fffc68da7fa319739f57437f95963c4de"}, + {file = "jiter-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:e25012eb0c456fcc13354255d0338cd5397cce26c77b2832b3c4e2e255ea5d9a"}, + {file = "jiter-0.12.0-cp313-cp313-win_arm64.whl", hash = "sha256:c97b92c54fe6110138c872add030a1f99aea2401ddcdaa21edf74705a646dd60"}, + {file = "jiter-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:53839b35a38f56b8be26a7851a48b89bc47e5d88e900929df10ed93b95fea3d6"}, + {file = "jiter-0.12.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94f669548e55c91ab47fef8bddd9c954dab1938644e715ea49d7e117015110a4"}, + {file = "jiter-0.12.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:351d54f2b09a41600ffea43d081522d792e81dcfb915f6d2d242744c1cc48beb"}, + {file = "jiter-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2a5e90604620f94bf62264e7c2c038704d38217b7465b863896c6d7c902b06c7"}, + {file = "jiter-0.12.0-cp313-cp313t-win_arm64.whl", hash = "sha256:88ef757017e78d2860f96250f9393b7b577b06a956ad102c29c8237554380db3"}, + {file = "jiter-0.12.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:c46d927acd09c67a9fb1416df45c5a04c27e83aae969267e98fba35b74e99525"}, + {file = "jiter-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:774ff60b27a84a85b27b88cd5583899c59940bcc126caca97eb2a9df6aa00c49"}, + {file = "jiter-0.12.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5433fab222fb072237df3f637d01b81f040a07dcac1cb4a5c75c7aa9ed0bef1"}, + {file = "jiter-0.12.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f8c593c6e71c07866ec6bfb790e202a833eeec885022296aff6b9e0b92d6a70e"}, + {file = "jiter-0.12.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:90d32894d4c6877a87ae00c6b915b609406819dce8bc0d4e962e4de2784e567e"}, + {file = "jiter-0.12.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:798e46eed9eb10c3adbbacbd3bdb5ecd4cf7064e453d00dbef08802dae6937ff"}, + {file = "jiter-0.12.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3f1368f0a6719ea80013a4eb90ba72e75d7ea67cfc7846db2ca504f3df0169a"}, + {file = "jiter-0.12.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:65f04a9d0b4406f7e51279710b27484af411896246200e461d80d3ba0caa901a"}, + {file = "jiter-0.12.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:fd990541982a24281d12b67a335e44f117e4c6cbad3c3b75c7dea68bf4ce3a67"}, + {file = "jiter-0.12.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:b111b0e9152fa7df870ecaebb0bd30240d9f7fff1f2003bcb4ed0f519941820b"}, + {file = "jiter-0.12.0-cp314-cp314-win32.whl", hash = "sha256:a78befb9cc0a45b5a5a0d537b06f8544c2ebb60d19d02c41ff15da28a9e22d42"}, + {file = "jiter-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:e1fe01c082f6aafbe5c8faf0ff074f38dfb911d53f07ec333ca03f8f6226debf"}, + {file = "jiter-0.12.0-cp314-cp314-win_arm64.whl", hash = "sha256:d72f3b5a432a4c546ea4bedc84cce0c3404874f1d1676260b9c7f048a9855451"}, + {file = "jiter-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e6ded41aeba3603f9728ed2b6196e4df875348ab97b28fc8afff115ed42ba7a7"}, + {file = "jiter-0.12.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a947920902420a6ada6ad51892082521978e9dd44a802663b001436e4b771684"}, + {file = "jiter-0.12.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:add5e227e0554d3a52cf390a7635edaffdf4f8fce4fdbcef3cc2055bb396a30c"}, + {file = "jiter-0.12.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f9b1cda8fcb736250d7e8711d4580ebf004a46771432be0ae4796944b5dfa5d"}, + {file = "jiter-0.12.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:deeb12a2223fe0135c7ff1356a143d57f95bbf1f4a66584f1fc74df21d86b993"}, + {file = "jiter-0.12.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c596cc0f4cb574877550ce4ecd51f8037469146addd676d7c1a30ebe6391923f"}, + {file = "jiter-0.12.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ab4c823b216a4aeab3fdbf579c5843165756bd9ad87cc6b1c65919c4715f783"}, + {file = "jiter-0.12.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:e427eee51149edf962203ff8db75a7514ab89be5cb623fb9cea1f20b54f1107b"}, + {file = "jiter-0.12.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:edb868841f84c111255ba5e80339d386d937ec1fdce419518ce1bd9370fac5b6"}, + {file = "jiter-0.12.0-cp314-cp314t-win32.whl", hash = "sha256:8bbcfe2791dfdb7c5e48baf646d37a6a3dcb5a97a032017741dea9f817dca183"}, + {file = "jiter-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2fa940963bf02e1d8226027ef461e36af472dea85d36054ff835aeed944dd873"}, + {file = "jiter-0.12.0-cp314-cp314t-win_arm64.whl", hash = "sha256:506c9708dd29b27288f9f8f1140c3cb0e3d8ddb045956d7757b1fa0e0f39a473"}, + {file = "jiter-0.12.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:c9d28b218d5f9e5f69a0787a196322a5056540cb378cac8ff542b4fa7219966c"}, + {file = "jiter-0.12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d0ee12028daf8cfcf880dd492349a122a64f42c059b6c62a2b0c96a83a8da820"}, + {file = "jiter-0.12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b135ebe757a82d67ed2821526e72d0acf87dd61f6013e20d3c45b8048af927b"}, + {file = "jiter-0.12.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:15d7fafb81af8a9e3039fc305529a61cd933eecee33b4251878a1c89859552a3"}, + {file = "jiter-0.12.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:92d1f41211d8a8fe412faad962d424d334764c01dac6691c44691c2e4d3eedaf"}, + {file = "jiter-0.12.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3a64a48d7c917b8f32f25c176df8749ecf08cec17c466114727efe7441e17f6d"}, + {file = "jiter-0.12.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:122046f3b3710b85de99d9aa2f3f0492a8233a2f54a64902b096efc27ea747b5"}, + {file = "jiter-0.12.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:27ec39225e03c32c6b863ba879deb427882f243ae46f0d82d68b695fa5b48b40"}, + {file = "jiter-0.12.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:26b9e155ddc132225a39b1995b3b9f0fe0f79a6d5cbbeacf103271e7d309b404"}, + {file = "jiter-0.12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9ab05b7c58e29bb9e60b70c2e0094c98df79a1e42e397b9bb6eaa989b7a66dd0"}, + {file = "jiter-0.12.0-cp39-cp39-win32.whl", hash = "sha256:59f9f9df87ed499136db1c2b6c9efb902f964bed42a582ab7af413b6a293e7b0"}, + {file = "jiter-0.12.0-cp39-cp39-win_amd64.whl", hash = "sha256:d3719596a1ebe7a48a498e8d5d0c4bf7553321d4c3eee1d620628d51351a3928"}, + {file = "jiter-0.12.0-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:4739a4657179ebf08f85914ce50332495811004cc1747852e8b2041ed2aab9b8"}, + {file = "jiter-0.12.0-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:41da8def934bf7bec16cb24bd33c0ca62126d2d45d81d17b864bd5ad721393c3"}, + {file = "jiter-0.12.0-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c44ee814f499c082e69872d426b624987dbc5943ab06e9bbaa4f81989fdb79e"}, + {file = "jiter-0.12.0-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd2097de91cf03eaa27b3cbdb969addf83f0179c6afc41bbc4513705e013c65d"}, + {file = "jiter-0.12.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:e8547883d7b96ef2e5fe22b88f8a4c8725a56e7f4abafff20fd5272d634c7ecb"}, + {file = "jiter-0.12.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:89163163c0934854a668ed783a2546a0617f71706a2551a4a0666d91ab365d6b"}, + {file = "jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d96b264ab7d34bbb2312dedc47ce07cd53f06835eacbc16dde3761f47c3a9e7f"}, + {file = "jiter-0.12.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c24e864cb30ab82311c6425655b0cdab0a98c5d973b065c66a3f020740c2324c"}, + {file = "jiter-0.12.0.tar.gz", hash = "sha256:64dfcd7d5c168b38d3f9f8bba7fc639edb3418abcc74f22fdbe6b8938293f30b"}, +] + +[[package]] +name = "joblib" +version = "1.5.3" +description = "Lightweight pipelining with Python functions" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713"}, + {file = "joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3"}, +] + [[package]] name = "loguru" version = "0.7.3" @@ -458,7 +696,7 @@ version = "3.0.3" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "markupsafe-3.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f981d352f04553a7171b8e44369f2af4055f888dfb147d55e42d29e29e74559"}, {file = "markupsafe-3.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e1c1493fb6e50ab01d20a22826e57520f1284df32f2d8601fdd90b6304601419"}, @@ -665,6 +903,99 @@ files = [ {file = "nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb"}, ] +[[package]] +name = "numpy" +version = "2.1.3" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "numpy-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c894b4305373b9c5576d7a12b473702afdf48ce5369c074ba304cc5ad8730dff"}, + {file = "numpy-2.1.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b47fbb433d3260adcd51eb54f92a2ffbc90a4595f8970ee00e064c644ac788f5"}, + {file = "numpy-2.1.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:825656d0743699c529c5943554d223c021ff0494ff1442152ce887ef4f7561a1"}, + {file = "numpy-2.1.3-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:6a4825252fcc430a182ac4dee5a505053d262c807f8a924603d411f6718b88fd"}, + {file = "numpy-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e711e02f49e176a01d0349d82cb5f05ba4db7d5e7e0defd026328e5cfb3226d3"}, + {file = "numpy-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78574ac2d1a4a02421f25da9559850d59457bac82f2b8d7a44fe83a64f770098"}, + {file = "numpy-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c7662f0e3673fe4e832fe07b65c50342ea27d989f92c80355658c7f888fcc83c"}, + {file = "numpy-2.1.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:fa2d1337dc61c8dc417fbccf20f6d1e139896a30721b7f1e832b2bb6ef4eb6c4"}, + {file = "numpy-2.1.3-cp310-cp310-win32.whl", hash = "sha256:72dcc4a35a8515d83e76b58fdf8113a5c969ccd505c8a946759b24e3182d1f23"}, + {file = "numpy-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:ecc76a9ba2911d8d37ac01de72834d8849e55473457558e12995f4cd53e778e0"}, + {file = "numpy-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4d1167c53b93f1f5d8a139a742b3c6f4d429b54e74e6b57d0eff40045187b15d"}, + {file = "numpy-2.1.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c80e4a09b3d95b4e1cac08643f1152fa71a0a821a2d4277334c88d54b2219a41"}, + {file = "numpy-2.1.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:576a1c1d25e9e02ed7fa5477f30a127fe56debd53b8d2c89d5578f9857d03ca9"}, + {file = "numpy-2.1.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:973faafebaae4c0aaa1a1ca1ce02434554d67e628b8d805e61f874b84e136b09"}, + {file = "numpy-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:762479be47a4863e261a840e8e01608d124ee1361e48b96916f38b119cfda04a"}, + {file = "numpy-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc6f24b3d1ecc1eebfbf5d6051faa49af40b03be1aaa781ebdadcbc090b4539b"}, + {file = "numpy-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:17ee83a1f4fef3c94d16dc1802b998668b5419362c8a4f4e8a491de1b41cc3ee"}, + {file = "numpy-2.1.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:15cb89f39fa6d0bdfb600ea24b250e5f1a3df23f901f51c8debaa6a5d122b2f0"}, + {file = "numpy-2.1.3-cp311-cp311-win32.whl", hash = "sha256:d9beb777a78c331580705326d2367488d5bc473b49a9bc3036c154832520aca9"}, + {file = "numpy-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:d89dd2b6da69c4fff5e39c28a382199ddedc3a5be5390115608345dec660b9e2"}, + {file = "numpy-2.1.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f55ba01150f52b1027829b50d70ef1dafd9821ea82905b63936668403c3b471e"}, + {file = "numpy-2.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13138eadd4f4da03074851a698ffa7e405f41a0845a6b1ad135b81596e4e9958"}, + {file = "numpy-2.1.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a6b46587b14b888e95e4a24d7b13ae91fa22386c199ee7b418f449032b2fa3b8"}, + {file = "numpy-2.1.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:0fa14563cc46422e99daef53d725d0c326e99e468a9320a240affffe87852564"}, + {file = "numpy-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8637dcd2caa676e475503d1f8fdb327bc495554e10838019651b76d17b98e512"}, + {file = "numpy-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2312b2aa89e1f43ecea6da6ea9a810d06aae08321609d8dc0d0eda6d946a541b"}, + {file = "numpy-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a38c19106902bb19351b83802531fea19dee18e5b37b36454f27f11ff956f7fc"}, + {file = "numpy-2.1.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:02135ade8b8a84011cbb67dc44e07c58f28575cf9ecf8ab304e51c05528c19f0"}, + {file = "numpy-2.1.3-cp312-cp312-win32.whl", hash = "sha256:e6988e90fcf617da2b5c78902fe8e668361b43b4fe26dbf2d7b0f8034d4cafb9"}, + {file = "numpy-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:0d30c543f02e84e92c4b1f415b7c6b5326cbe45ee7882b6b77db7195fb971e3a"}, + {file = "numpy-2.1.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96fe52fcdb9345b7cd82ecd34547fca4321f7656d500eca497eb7ea5a926692f"}, + {file = "numpy-2.1.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f653490b33e9c3a4c1c01d41bc2aef08f9475af51146e4a7710c450cf9761598"}, + {file = "numpy-2.1.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:dc258a761a16daa791081d026f0ed4399b582712e6fc887a95af09df10c5ca57"}, + {file = "numpy-2.1.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:016d0f6f5e77b0f0d45d77387ffa4bb89816b57c835580c3ce8e099ef830befe"}, + {file = "numpy-2.1.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c181ba05ce8299c7aa3125c27b9c2167bca4a4445b7ce73d5febc411ca692e43"}, + {file = "numpy-2.1.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5641516794ca9e5f8a4d17bb45446998c6554704d888f86df9b200e66bdcce56"}, + {file = "numpy-2.1.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ea4dedd6e394a9c180b33c2c872b92f7ce0f8e7ad93e9585312b0c5a04777a4a"}, + {file = "numpy-2.1.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0df3635b9c8ef48bd3be5f862cf71b0a4716fa0e702155c45067c6b711ddcef"}, + {file = "numpy-2.1.3-cp313-cp313-win32.whl", hash = "sha256:50ca6aba6e163363f132b5c101ba078b8cbd3fa92c7865fd7d4d62d9779ac29f"}, + {file = "numpy-2.1.3-cp313-cp313-win_amd64.whl", hash = "sha256:747641635d3d44bcb380d950679462fae44f54b131be347d5ec2bce47d3df9ed"}, + {file = "numpy-2.1.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:996bb9399059c5b82f76b53ff8bb686069c05acc94656bb259b1d63d04a9506f"}, + {file = "numpy-2.1.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:45966d859916ad02b779706bb43b954281db43e185015df6eb3323120188f9e4"}, + {file = "numpy-2.1.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:baed7e8d7481bfe0874b566850cb0b85243e982388b7b23348c6db2ee2b2ae8e"}, + {file = "numpy-2.1.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a9f7f672a3388133335589cfca93ed468509cb7b93ba3105fce780d04a6576a0"}, + {file = "numpy-2.1.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7aac50327da5d208db2eec22eb11e491e3fe13d22653dce51b0f4109101b408"}, + {file = "numpy-2.1.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4394bc0dbd074b7f9b52024832d16e019decebf86caf909d94f6b3f77a8ee3b6"}, + {file = "numpy-2.1.3-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:50d18c4358a0a8a53f12a8ba9d772ab2d460321e6a93d6064fc22443d189853f"}, + {file = "numpy-2.1.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:14e253bd43fc6b37af4921b10f6add6925878a42a0c5fe83daee390bca80bc17"}, + {file = "numpy-2.1.3-cp313-cp313t-win32.whl", hash = "sha256:08788d27a5fd867a663f6fc753fd7c3ad7e92747efc73c53bca2f19f8bc06f48"}, + {file = "numpy-2.1.3-cp313-cp313t-win_amd64.whl", hash = "sha256:2564fbdf2b99b3f815f2107c1bbc93e2de8ee655a69c261363a1172a79a257d4"}, + {file = "numpy-2.1.3-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:4f2015dfe437dfebbfce7c85c7b53d81ba49e71ba7eadbf1df40c915af75979f"}, + {file = "numpy-2.1.3-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:3522b0dfe983a575e6a9ab3a4a4dfe156c3e428468ff08ce582b9bb6bd1d71d4"}, + {file = "numpy-2.1.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c006b607a865b07cd981ccb218a04fc86b600411d83d6fc261357f1c0966755d"}, + {file = "numpy-2.1.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:e14e26956e6f1696070788252dcdff11b4aca4c3e8bd166e0df1bb8f315a67cb"}, + {file = "numpy-2.1.3.tar.gz", hash = "sha256:aa08e04e08aaf974d4458def539dece0d28146d866a39da5639596f4921fd761"}, +] + +[[package]] +name = "openai" +version = "1.109.1" +description = "The official Python library for the openai API" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "openai-1.109.1-py3-none-any.whl", hash = "sha256:6bcaf57086cf59159b8e27447e4e7dd019db5d29a438072fbd49c290c7e65315"}, + {file = "openai-1.109.1.tar.gz", hash = "sha256:d173ed8dbca665892a6db099b4a2dfac624f94d20a93f46eb0b56aae940ed869"}, +] + +[package.dependencies] +anyio = ">=3.5.0,<5" +distro = ">=1.7.0,<2" +httpx = ">=0.23.0,<1" +jiter = ">=0.4.0,<1" +pydantic = ">=1.9.0,<3" +sniffio = "*" +tqdm = ">4" +typing-extensions = ">=4.11,<5" + +[package.extras] +aiohttp = ["aiohttp", "httpx-aiohttp (>=0.1.8)"] +datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] +realtime = ["websockets (>=13,<16)"] +voice-helpers = ["numpy (>=2.0.2)", "sounddevice (>=0.5.1)"] + [[package]] name = "packaging" version = "25.0" @@ -763,6 +1094,162 @@ nodeenv = ">=0.11.1" pyyaml = ">=5.1" virtualenv = ">=20.10.0" +[[package]] +name = "pydantic" +version = "2.12.5" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d"}, + {file = "pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49"}, +] + +[package.dependencies] +annotated-types = ">=0.6.0" +pydantic-core = "2.41.5" +typing-extensions = ">=4.14.1" +typing-inspection = ">=0.4.2" + +[package.extras] +email = ["email-validator (>=2.0.0)"] +timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows\""] + +[[package]] +name = "pydantic-core" +version = "2.41.5" +description = "Core functionality for Pydantic validation and serialization" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pydantic_core-2.41.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:77b63866ca88d804225eaa4af3e664c5faf3568cea95360d21f4725ab6e07146"}, + {file = "pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dfa8a0c812ac681395907e71e1274819dec685fec28273a28905df579ef137e2"}, + {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5921a4d3ca3aee735d9fd163808f5e8dd6c6972101e4adbda9a4667908849b97"}, + {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25c479382d26a2a41b7ebea1043564a937db462816ea07afa8a44c0866d52f9"}, + {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f547144f2966e1e16ae626d8ce72b4cfa0caedc7fa28052001c94fb2fcaa1c52"}, + {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f52298fbd394f9ed112d56f3d11aabd0d5bd27beb3084cc3d8ad069483b8941"}, + {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:100baa204bb412b74fe285fb0f3a385256dad1d1879f0a5cb1499ed2e83d132a"}, + {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:05a2c8852530ad2812cb7914dc61a1125dc4e06252ee98e5638a12da6cc6fb6c"}, + {file = "pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:29452c56df2ed968d18d7e21f4ab0ac55e71dc59524872f6fc57dcf4a3249ed2"}, + {file = "pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:d5160812ea7a8a2ffbe233d8da666880cad0cbaf5d4de74ae15c313213d62556"}, + {file = "pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:df3959765b553b9440adfd3c795617c352154e497a4eaf3752555cfb5da8fc49"}, + {file = "pydantic_core-2.41.5-cp310-cp310-win32.whl", hash = "sha256:1f8d33a7f4d5a7889e60dc39856d76d09333d8a6ed0f5f1190635cbec70ec4ba"}, + {file = "pydantic_core-2.41.5-cp310-cp310-win_amd64.whl", hash = "sha256:62de39db01b8d593e45871af2af9e497295db8d73b085f6bfd0b18c83c70a8f9"}, + {file = "pydantic_core-2.41.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a3a52f6156e73e7ccb0f8cced536adccb7042be67cb45f9562e12b319c119da6"}, + {file = "pydantic_core-2.41.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b"}, + {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a"}, + {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7b576130c69225432866fe2f4a469a85a54ade141d96fd396dffcf607b558f8"}, + {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6cb58b9c66f7e4179a2d5e0f849c48eff5c1fca560994d6eb6543abf955a149e"}, + {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88942d3a3dff3afc8288c21e565e476fc278902ae4d6d134f1eeda118cc830b1"}, + {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f31d95a179f8d64d90f6831d71fa93290893a33148d890ba15de25642c5d075b"}, + {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c1df3d34aced70add6f867a8cf413e299177e0c22660cc767218373d0779487b"}, + {file = "pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4009935984bd36bd2c774e13f9a09563ce8de4abaa7226f5108262fa3e637284"}, + {file = "pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:34a64bc3441dc1213096a20fe27e8e128bd3ff89921706e83c0b1ac971276594"}, + {file = "pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c9e19dd6e28fdcaa5a1de679aec4141f691023916427ef9bae8584f9c2fb3b0e"}, + {file = "pydantic_core-2.41.5-cp311-cp311-win32.whl", hash = "sha256:2c010c6ded393148374c0f6f0bf89d206bf3217f201faa0635dcd56bd1520f6b"}, + {file = "pydantic_core-2.41.5-cp311-cp311-win_amd64.whl", hash = "sha256:76ee27c6e9c7f16f47db7a94157112a2f3a00e958bc626e2f4ee8bec5c328fbe"}, + {file = "pydantic_core-2.41.5-cp311-cp311-win_arm64.whl", hash = "sha256:4bc36bbc0b7584de96561184ad7f012478987882ebf9f9c389b23f432ea3d90f"}, + {file = "pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7"}, + {file = "pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0"}, + {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69"}, + {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75"}, + {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05"}, + {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc"}, + {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c"}, + {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5"}, + {file = "pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c"}, + {file = "pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294"}, + {file = "pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1"}, + {file = "pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d"}, + {file = "pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815"}, + {file = "pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3"}, + {file = "pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9"}, + {file = "pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34"}, + {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0"}, + {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33"}, + {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e"}, + {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2"}, + {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586"}, + {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d"}, + {file = "pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740"}, + {file = "pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e"}, + {file = "pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858"}, + {file = "pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36"}, + {file = "pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11"}, + {file = "pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd"}, + {file = "pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a"}, + {file = "pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14"}, + {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1"}, + {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66"}, + {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869"}, + {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2"}, + {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375"}, + {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553"}, + {file = "pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90"}, + {file = "pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07"}, + {file = "pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb"}, + {file = "pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23"}, + {file = "pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf"}, + {file = "pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c"}, + {file = "pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008"}, + {file = "pydantic_core-2.41.5-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:8bfeaf8735be79f225f3fefab7f941c712aaca36f1128c9d7e2352ee1aa87bdf"}, + {file = "pydantic_core-2.41.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:346285d28e4c8017da95144c7f3acd42740d637ff41946af5ce6e5e420502dd5"}, + {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a75dafbf87d6276ddc5b2bf6fae5254e3d0876b626eb24969a574fff9149ee5d"}, + {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7b93a4d08587e2b7e7882de461e82b6ed76d9026ce91ca7915e740ecc7855f60"}, + {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e8465ab91a4bd96d36dde3263f06caa6a8a6019e4113f24dc753d79a8b3a3f82"}, + {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:299e0a22e7ae2b85c1a57f104538b2656e8ab1873511fd718a1c1c6f149b77b5"}, + {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:707625ef0983fcfb461acfaf14de2067c5942c6bb0f3b4c99158bed6fedd3cf3"}, + {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f41eb9797986d6ebac5e8edff36d5cef9de40def462311b3eb3eeded1431e425"}, + {file = "pydantic_core-2.41.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0384e2e1021894b1ff5a786dbf94771e2986ebe2869533874d7e43bc79c6f504"}, + {file = "pydantic_core-2.41.5-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:f0cd744688278965817fd0839c4a4116add48d23890d468bc436f78beb28abf5"}, + {file = "pydantic_core-2.41.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:753e230374206729bf0a807954bcc6c150d3743928a73faffee51ac6557a03c3"}, + {file = "pydantic_core-2.41.5-cp39-cp39-win32.whl", hash = "sha256:873e0d5b4fb9b89ef7c2d2a963ea7d02879d9da0da8d9d4933dee8ee86a8b460"}, + {file = "pydantic_core-2.41.5-cp39-cp39-win_amd64.whl", hash = "sha256:e4f4a984405e91527a0d62649ee21138f8e3d0ef103be488c1dc11a80d7f184b"}, + {file = "pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034"}, + {file = "pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c"}, + {file = "pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2"}, + {file = "pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad"}, + {file = "pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd"}, + {file = "pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc"}, + {file = "pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56"}, + {file = "pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b"}, + {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b5819cd790dbf0c5eb9f82c73c16b39a65dd6dd4d1439dcdea7816ec9adddab8"}, + {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5a4e67afbc95fa5c34cf27d9089bca7fcab4e51e57278d710320a70b956d1b9a"}, + {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ece5c59f0ce7d001e017643d8d24da587ea1f74f6993467d85ae8a5ef9d4f42b"}, + {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:16f80f7abe3351f8ea6858914ddc8c77e02578544a0ebc15b4c2e1a0e813b0b2"}, + {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:33cb885e759a705b426baada1fe68cbb0a2e68e34c5d0d0289a364cf01709093"}, + {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:c8d8b4eb992936023be7dee581270af5c6e0697a8559895f527f5b7105ecd36a"}, + {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:242a206cd0318f95cd21bdacff3fcc3aab23e79bba5cac3db5a841c9ef9c6963"}, + {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d3a978c4f57a597908b7e697229d996d77a6d3c94901e9edee593adada95ce1a"}, + {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26"}, + {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808"}, + {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc"}, + {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:287dad91cfb551c363dc62899a80e9e14da1f0e2b6ebde82c806612ca2a13ef1"}, + {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:03b77d184b9eb40240ae9fd676ca364ce1085f203e1b1256f8ab9984dca80a84"}, + {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:a668ce24de96165bb239160b3d854943128f4334822900534f2fe947930e5770"}, + {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f14f8f046c14563f8eb3f45f499cc658ab8d10072961e07225e507adb700e93f"}, + {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51"}, + {file = "pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e"}, +] + +[package.dependencies] +typing-extensions = ">=4.14.1" + [[package]] name = "pygments" version = "2.19.2" @@ -819,6 +1306,26 @@ pygments = ">=2.7.2" [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] +[[package]] +name = "pytest-asyncio" +version = "1.3.0" +description = "Pytest support for asyncio" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5"}, + {file = "pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5"}, +] + +[package.dependencies] +pytest = ">=8.2,<10" +typing-extensions = {version = ">=4.12", markers = "python_version < \"3.13\""} + +[package.extras] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1)"] +testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] + [[package]] name = "pytest-cov" version = "5.0.0" @@ -1000,6 +1507,136 @@ files = [ {file = "ruff-0.4.10.tar.gz", hash = "sha256:3aa4f2bc388a30d346c56524f7cacca85945ba124945fe489952aadb6b5cd804"}, ] +[[package]] +name = "scikit-learn" +version = "1.5.2" +description = "A set of python modules for machine learning and data mining" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "scikit_learn-1.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:299406827fb9a4f862626d0fe6c122f5f87f8910b86fe5daa4c32dcd742139b6"}, + {file = "scikit_learn-1.5.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:2d4cad1119c77930b235579ad0dc25e65c917e756fe80cab96aa3b9428bd3fb0"}, + {file = "scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c412ccc2ad9bf3755915e3908e677b367ebc8d010acbb3f182814524f2e5540"}, + {file = "scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a686885a4b3818d9e62904d91b57fa757fc2bed3e465c8b177be652f4dd37c8"}, + {file = "scikit_learn-1.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:c15b1ca23d7c5f33cc2cb0a0d6aaacf893792271cddff0edbd6a40e8319bc113"}, + {file = "scikit_learn-1.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:03b6158efa3faaf1feea3faa884c840ebd61b6484167c711548fce208ea09445"}, + {file = "scikit_learn-1.5.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1ff45e26928d3b4eb767a8f14a9a6efbf1cbff7c05d1fb0f95f211a89fd4f5de"}, + {file = "scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f763897fe92d0e903aa4847b0aec0e68cadfff77e8a0687cabd946c89d17e675"}, + {file = "scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8b0ccd4a902836493e026c03256e8b206656f91fbcc4fde28c57a5b752561f1"}, + {file = "scikit_learn-1.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:6c16d84a0d45e4894832b3c4d0bf73050939e21b99b01b6fd59cbb0cf39163b6"}, + {file = "scikit_learn-1.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f932a02c3f4956dfb981391ab24bda1dbd90fe3d628e4b42caef3e041c67707a"}, + {file = "scikit_learn-1.5.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3b923d119d65b7bd555c73be5423bf06c0105678ce7e1f558cb4b40b0a5502b1"}, + {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"}, + {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"}, + {file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"}, + {file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"}, + {file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"}, + {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"}, + {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"}, + {file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"}, + {file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"}, + {file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"}, + {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"}, + {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca64b3089a6d9b9363cd3546f8978229dcbb737aceb2c12144ee3f70f95684b7"}, + {file = "scikit_learn-1.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:3bed4909ba187aca80580fe2ef370d9180dcf18e621a27c4cf2ef10d279a7efe"}, + {file = "scikit_learn-1.5.2.tar.gz", hash = "sha256:b4237ed7b3fdd0a4882792e68ef2545d5baa50aca3bb45aa7df468138ad8f94d"}, +] + +[package.dependencies] +joblib = ">=1.2.0" +numpy = ">=1.19.5" +scipy = ">=1.6.0" +threadpoolctl = ">=3.1.0" + +[package.extras] +benchmark = ["matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "pandas (>=1.1.5)"] +build = ["cython (>=3.0.10)", "meson-python (>=0.16.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "polars (>=0.20.30)", "pooch (>=1.6.0)", "pydata-sphinx-theme (>=0.15.3)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=7.3.7)", "sphinx-copybutton (>=0.5.2)", "sphinx-design (>=0.5.0)", "sphinx-design (>=0.6.0)", "sphinx-gallery (>=0.16.0)", "sphinx-prompt (>=1.4.0)", "sphinx-remove-toctrees (>=1.0.0.post1)", "sphinxcontrib-sass (>=0.3.4)", "sphinxext-opengraph (>=0.9.1)"] +examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] +install = ["joblib (>=1.2.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)", "threadpoolctl (>=3.1.0)"] +maintenance = ["conda-lock (==2.5.6)"] +tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.20.30)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.2.1)", "scikit-image (>=0.17.2)"] + +[[package]] +name = "scipy" +version = "1.17.0" +description = "Fundamental algorithms for scientific computing in Python" +optional = false +python-versions = ">=3.11" +groups = ["main"] +files = [ + {file = "scipy-1.17.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:2abd71643797bd8a106dff97894ff7869eeeb0af0f7a5ce02e4227c6a2e9d6fd"}, + {file = "scipy-1.17.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:ef28d815f4d2686503e5f4f00edc387ae58dfd7a2f42e348bb53359538f01558"}, + {file = "scipy-1.17.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:272a9f16d6bb4667e8b50d25d71eddcc2158a214df1b566319298de0939d2ab7"}, + {file = "scipy-1.17.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:7204fddcbec2fe6598f1c5fdf027e9f259106d05202a959a9f1aecf036adc9f6"}, + {file = "scipy-1.17.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fc02c37a5639ee67d8fb646ffded6d793c06c5622d36b35cfa8fe5ececb8f042"}, + {file = "scipy-1.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dac97a27520d66c12a34fd90a4fe65f43766c18c0d6e1c0a80f114d2260080e4"}, + {file = "scipy-1.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ebb7446a39b3ae0fe8f416a9a3fdc6fba3f11c634f680f16a239c5187bc487c0"}, + {file = "scipy-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:474da16199f6af66601a01546144922ce402cb17362e07d82f5a6cf8f963e449"}, + {file = "scipy-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:255c0da161bd7b32a6c898e7891509e8a9289f0b1c6c7d96142ee0d2b114c2ea"}, + {file = "scipy-1.17.0-cp311-cp311-win_arm64.whl", hash = "sha256:85b0ac3ad17fa3be50abd7e69d583d98792d7edc08367e01445a1e2076005379"}, + {file = "scipy-1.17.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:0d5018a57c24cb1dd828bcf51d7b10e65986d549f52ef5adb6b4d1ded3e32a57"}, + {file = "scipy-1.17.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:88c22af9e5d5a4f9e027e26772cc7b5922fab8bcc839edb3ae33de404feebd9e"}, + {file = "scipy-1.17.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f3cd947f20fe17013d401b64e857c6b2da83cae567adbb75b9dcba865abc66d8"}, + {file = "scipy-1.17.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e8c0b331c2c1f531eb51f1b4fc9ba709521a712cce58f1aa627bc007421a5306"}, + {file = "scipy-1.17.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5194c445d0a1c7a6c1a4a4681b6b7c71baad98ff66d96b949097e7513c9d6742"}, + {file = "scipy-1.17.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9eeb9b5f5997f75507814ed9d298ab23f62cf79f5a3ef90031b1ee2506abdb5b"}, + {file = "scipy-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:40052543f7bbe921df4408f46003d6f01c6af109b9e2c8a66dd1cf6cf57f7d5d"}, + {file = "scipy-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0cf46c8013fec9d3694dc572f0b54100c28405d55d3e2cb15e2895b25057996e"}, + {file = "scipy-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:0937a0b0d8d593a198cededd4c439a0ea216a3f36653901ea1f3e4be949056f8"}, + {file = "scipy-1.17.0-cp312-cp312-win_arm64.whl", hash = "sha256:f603d8a5518c7426414d1d8f82e253e454471de682ce5e39c29adb0df1efb86b"}, + {file = "scipy-1.17.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:65ec32f3d32dfc48c72df4291345dae4f048749bc8d5203ee0a3f347f96c5ce6"}, + {file = "scipy-1.17.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:1f9586a58039d7229ce77b52f8472c972448cded5736eaf102d5658bbac4c269"}, + {file = "scipy-1.17.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:9fad7d3578c877d606b1150135c2639e9de9cecd3705caa37b66862977cc3e72"}, + {file = "scipy-1.17.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:423ca1f6584fc03936972b5f7c06961670dbba9f234e71676a7c7ccf938a0d61"}, + {file = "scipy-1.17.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fe508b5690e9eaaa9467fc047f833af58f1152ae51a0d0aed67aa5801f4dd7d6"}, + {file = "scipy-1.17.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6680f2dfd4f6182e7d6db161344537da644d1cf85cf293f015c60a17ecf08752"}, + {file = "scipy-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eec3842ec9ac9de5917899b277428886042a93db0b227ebbe3a333b64ec7643d"}, + {file = "scipy-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d7425fcafbc09a03731e1bc05581f5fad988e48c6a861f441b7ab729a49a55ea"}, + {file = "scipy-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:87b411e42b425b84777718cc41516b8a7e0795abfa8e8e1d573bf0ef014f0812"}, + {file = "scipy-1.17.0-cp313-cp313-win_arm64.whl", hash = "sha256:357ca001c6e37601066092e7c89cca2f1ce74e2a520ca78d063a6d2201101df2"}, + {file = "scipy-1.17.0-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:ec0827aa4d36cb79ff1b81de898e948a51ac0b9b1c43e4a372c0508c38c0f9a3"}, + {file = "scipy-1.17.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:819fc26862b4b3c73a60d486dbb919202f3d6d98c87cf20c223511429f2d1a97"}, + {file = "scipy-1.17.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:363ad4ae2853d88ebcde3ae6ec46ccca903ea9835ee8ba543f12f575e7b07e4e"}, + {file = "scipy-1.17.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:979c3a0ff8e5ba254d45d59ebd38cde48fce4f10b5125c680c7a4bfe177aab07"}, + {file = "scipy-1.17.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:130d12926ae34399d157de777472bf82e9061c60cc081372b3118edacafe1d00"}, + {file = "scipy-1.17.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e886000eb4919eae3a44f035e63f0fd8b651234117e8f6f29bad1cd26e7bc45"}, + {file = "scipy-1.17.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:13c4096ac6bc31d706018f06a49abe0485f96499deb82066b94d19b02f664209"}, + {file = "scipy-1.17.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cacbaddd91fcffde703934897c5cd2c7cb0371fac195d383f4e1f1c5d3f3bd04"}, + {file = "scipy-1.17.0-cp313-cp313t-win_amd64.whl", hash = "sha256:edce1a1cf66298cccdc48a1bdf8fb10a3bf58e8b58d6c3883dd1530e103f87c0"}, + {file = "scipy-1.17.0-cp313-cp313t-win_arm64.whl", hash = "sha256:30509da9dbec1c2ed8f168b8d8aa853bc6723fede1dbc23c7d43a56f5ab72a67"}, + {file = "scipy-1.17.0-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:c17514d11b78be8f7e6331b983a65a7f5ca1fd037b95e27b280921fe5606286a"}, + {file = "scipy-1.17.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:4e00562e519c09da34c31685f6acc3aa384d4d50604db0f245c14e1b4488bfa2"}, + {file = "scipy-1.17.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:f7df7941d71314e60a481e02d5ebcb3f0185b8d799c70d03d8258f6c80f3d467"}, + {file = "scipy-1.17.0-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:aabf057c632798832f071a8dde013c2e26284043934f53b00489f1773b33527e"}, + {file = "scipy-1.17.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a38c3337e00be6fd8a95b4ed66b5d988bac4ec888fd922c2ea9fe5fb1603dd67"}, + {file = "scipy-1.17.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00fb5f8ec8398ad90215008d8b6009c9db9fa924fd4c7d6be307c6f945f9cd73"}, + {file = "scipy-1.17.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f2a4942b0f5f7c23c7cd641a0ca1955e2ae83dedcff537e3a0259096635e186b"}, + {file = "scipy-1.17.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:dbf133ced83889583156566d2bdf7a07ff89228fe0c0cb727f777de92092ec6b"}, + {file = "scipy-1.17.0-cp314-cp314-win_amd64.whl", hash = "sha256:3625c631a7acd7cfd929e4e31d2582cf00f42fcf06011f59281271746d77e061"}, + {file = "scipy-1.17.0-cp314-cp314-win_arm64.whl", hash = "sha256:9244608d27eafe02b20558523ba57f15c689357c85bdcfe920b1828750aa26eb"}, + {file = "scipy-1.17.0-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:2b531f57e09c946f56ad0b4a3b2abee778789097871fc541e267d2eca081cff1"}, + {file = "scipy-1.17.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:13e861634a2c480bd237deb69333ac79ea1941b94568d4b0efa5db5e263d4fd1"}, + {file = "scipy-1.17.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:eb2651271135154aa24f6481cbae5cc8af1f0dd46e6533fb7b56aa9727b6a232"}, + {file = "scipy-1.17.0-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:c5e8647f60679790c2f5c76be17e2e9247dc6b98ad0d3b065861e082c56e078d"}, + {file = "scipy-1.17.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5fb10d17e649e1446410895639f3385fd2bf4c3c7dfc9bea937bddcbc3d7b9ba"}, + {file = "scipy-1.17.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8547e7c57f932e7354a2319fab613981cde910631979f74c9b542bb167a8b9db"}, + {file = "scipy-1.17.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33af70d040e8af9d5e7a38b5ed3b772adddd281e3062ff23fec49e49681c38cf"}, + {file = "scipy-1.17.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f9eb55bb97d00f8b7ab95cb64f873eb0bf54d9446264d9f3609130381233483f"}, + {file = "scipy-1.17.0-cp314-cp314t-win_amd64.whl", hash = "sha256:1ff269abf702f6c7e67a4b7aad981d42871a11b9dd83c58d2d2ea624efbd1088"}, + {file = "scipy-1.17.0-cp314-cp314t-win_arm64.whl", hash = "sha256:031121914e295d9791319a1875444d55079885bbae5bdc9c5e0f2ee5f09d34ff"}, + {file = "scipy-1.17.0.tar.gz", hash = "sha256:2591060c8e648d8b96439e111ac41fd8342fdeff1876be2e19dea3fe8930454e"}, +] + +[package.dependencies] +numpy = ">=1.26.4,<2.7" + +[package.extras] +dev = ["click (<8.3.0)", "cython-lint (>=0.12.2)", "mypy (==1.10.0)", "pycodestyle", "ruff (>=0.12.0)", "spin", "types-psutil", "typing_extensions"] +doc = ["intersphinx_registry", "jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.19.1)", "jupytext", "linkify-it-py", "matplotlib (>=3.5)", "myst-nb (>=1.2.0)", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<8.2.0)", "sphinx-copybutton", "sphinx-design (>=0.4.0)", "tabulate"] +test = ["Cython", "array-api-strict (>=2.3.1)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja ; sys_platform != \"emscripten\"", "pooch", "pytest (>=8.0.0)", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + [[package]] name = "six" version = "1.17.0" @@ -1012,6 +1649,92 @@ files = [ {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, ] +[[package]] +name = "sniffio" +version = "1.3.1" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, + {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, +] + +[[package]] +name = "threadpoolctl" +version = "3.6.0" +description = "threadpoolctl" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb"}, + {file = "threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e"}, +] + +[[package]] +name = "tqdm" +version = "4.67.1" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, + {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["nbval", "pytest (>=6)", "pytest-asyncio (>=0.24)", "pytest-cov", "pytest-timeout"] +discord = ["requests"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + +[[package]] +name = "types-aiofiles" +version = "25.1.0.20251011" +description = "Typing stubs for aiofiles" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "types_aiofiles-25.1.0.20251011-py3-none-any.whl", hash = "sha256:8ff8de7f9d42739d8f0dadcceeb781ce27cd8d8c4152d4a7c52f6b20edb8149c"}, + {file = "types_aiofiles-25.1.0.20251011.tar.gz", hash = "sha256:1c2b8ab260cb3cd40c15f9d10efdc05a6e1e6b02899304d80dfa0410e028d3ff"}, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +description = "Backported and Experimental Type Hints for Python 3.9+" +optional = false +python-versions = ">=3.9" +groups = ["main", "dev"] +files = [ + {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, + {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, +] +markers = {dev = "python_version == \"3.12\""} + +[[package]] +name = "typing-inspection" +version = "0.4.2" +description = "Runtime typing introspection tools" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7"}, + {file = "typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464"}, +] + +[package.dependencies] +typing-extensions = ">=4.12.0" + [[package]] name = "urllib3" version = "2.6.3" @@ -1113,4 +1836,4 @@ dev = ["black (>=19.3b0) ; python_version >= \"3.6\"", "pytest (>=4.6.2)"] [metadata] lock-version = "2.1" python-versions = ">=3.12, <3.15" -content-hash = "65231cd72c4d276f04baf0dab180af3345b163ce034c5fa98ecdb79ac4326be4" +content-hash = "0fcb8559e0f59a4650ca8f299c5654477f9c61e0e7eb9e9179828b5dc599a3e1" diff --git a/pyproject.toml b/pyproject.toml index 0eae2ea..a607580 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,19 @@ packages = [{include = "coreason_optimizer", from = "src"}] [tool.poetry.dependencies] python = ">=3.12, <3.15" loguru = "^0.7.2" +pydantic = "^2.10" +jinja2 = "^3.1.4" +openai = "^1.50" +click = "^8.1.7" +numpy = "^2.1" +scikit-learn = "^1.5.2" +anyio = "*" +httpx = "*" +aiofiles = "*" +types-aiofiles = "*" + +[tool.poetry.scripts] +coreason-opt = "coreason_optimizer.main:cli" [tool.poetry.group.dev.dependencies] pytest = "^8.2.2" @@ -18,31 +31,12 @@ pre-commit = "^3.7.1" pytest-cov = "^5.0.0" mkdocs = "^1.6.0" mkdocs-material = "^9.5.26" +pytest-asyncio = "*" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" -[project] -name = "coreason_optimizer" -version = "0.1.0" -description = "coreason-optimizer" -readme = "README.md" -requires-python = ">=3.11" -authors = [ - { name = "Gowtham A Rao", email = "gowtham.rao@coreason.ai" }, -] -license = { file = "LICENSE" } -classifiers = [ - "License :: Other/Proprietary License", - "Programming Language :: Python :: 3.12", - "Operating System :: OS Independent", -] - -[project.urls] -Homepage = "https://github.com/CoReason-AI/coreason_optimizer" -Repository = "https://github.com/CoReason-AI/coreason_optimizer" -Documentation = "https://github.com/CoReason-AI/coreason_optimizer" [tool.ruff] line-length = 120 @@ -52,12 +46,20 @@ target-version = "py312" select = ["E", "F", "B", "I"] ignore = [] +[tool.ruff.lint.isort] +known-first-party = ["coreason_optimizer"] + [tool.mypy] python_version = "3.12" strict = true ignore_missing_imports = true +# This is needed because sometimes mypy doesn't see Pydantic types correctly in this environment, +# treating BaseModel as Any. +disallow_subclassing_any = false +disallow_untyped_decorators = false [tool.pytest.ini_options] +asyncio_mode = "auto" addopts = "--cov=src --cov-report=term-missing --cov-report=html --cov-fail-under=100" testpaths = ["tests"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..669f6c4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +click>=8.0.0 +jinja2>=3.0.0 +loguru>=0.6.0 +numpy>=1.20.0 +openai>=1.0.0 +pydantic>=2.0.0 +scikit-learn>=1.0.0 diff --git a/src/coreason_optimizer/__init__.py b/src/coreason_optimizer/__init__.py index d638efa..f7fffca 100644 --- a/src/coreason_optimizer/__init__.py +++ b/src/coreason_optimizer/__init__.py @@ -9,13 +9,17 @@ # Source Code: https://github.com/CoReason-AI/coreason_optimizer """ -coreason-optimizer +coreason-optimizer package initialization. + +This package provides the "Compiler" for the CoReason Agentic Platform, +treating prompts as trainable weights and optimizing them against ground-truth datasets. """ +from coreason_optimizer.core.config import OptimizerConfig +from coreason_optimizer.core.interfaces import PromptOptimizer + __version__ = "0.1.0" __author__ = "Gowtham A Rao" __email__ = "gowtham.rao@coreason.ai" -from .main import hello_world - -__all__ = ["hello_world"] +__all__ = ["OptimizerConfig", "PromptOptimizer"] diff --git a/tests/test_main.py b/src/coreason_optimizer/core/__init__.py similarity index 76% rename from tests/test_main.py rename to src/coreason_optimizer/core/__init__.py index a737279..5fc7189 100644 --- a/tests/test_main.py +++ b/src/coreason_optimizer/core/__init__.py @@ -7,9 +7,3 @@ # Commercial use beyond a 30-day trial requires a separate license. # # Source Code: https://github.com/CoReason-AI/coreason_optimizer - -from coreason_optimizer.main import hello_world - - -def test_hello_world() -> None: - assert hello_world() == "Hello World!" diff --git a/src/coreason_optimizer/core/budget.py b/src/coreason_optimizer/core/budget.py new file mode 100644 index 0000000..14cc77d --- /dev/null +++ b/src/coreason_optimizer/core/budget.py @@ -0,0 +1,101 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +""" +Budget management logic to track and limit token spend. + +This module provides classes to track usage statistics (tokens, cost) +and enforce a maximum budget to prevent unexpected costs. +""" + +from coreason_optimizer.core.interfaces import UsageStats +from coreason_optimizer.utils.exceptions import BudgetExceededError +from coreason_optimizer.utils.logger import logger + + +class BudgetManager: + """ + Tracks token usage and cost, enforcing a budget limit. + + This manager accumulates costs from LLM and Embedding API calls and + raises a BudgetExceededError if the total cost surpasses the limit. + + Attributes: + budget_limit_usd: The maximum allowed cost in USD. + total_cost_usd: The total cost accumulated so far. + total_prompt_tokens: The total number of prompt tokens used. + total_completion_tokens: The total number of completion tokens used. + total_tokens: The sum of prompt and completion tokens. + """ + + def __init__(self, budget_limit_usd: float) -> None: + """ + Initialize the BudgetManager. + + Args: + budget_limit_usd: The maximum allowed budget in USD. Must be positive. + + Raises: + ValueError: If budget_limit_usd is not positive. + """ + if budget_limit_usd <= 0: + raise ValueError("Budget limit must be positive.") + self.budget_limit_usd = budget_limit_usd + self.total_cost_usd = 0.0 + self.total_prompt_tokens = 0 + self.total_completion_tokens = 0 + self.total_tokens = 0 + + def consume(self, usage: UsageStats) -> None: + """ + Accumulate usage stats from an API call. + + Args: + usage: A UsageStats object containing token counts and cost. + + Raises: + ValueError: If usage stats contain negative values. + BudgetExceededError: If the new total cost exceeds the budget. + """ + if usage.cost_usd < 0 or usage.total_tokens < 0 or usage.prompt_tokens < 0 or usage.completion_tokens < 0: + raise ValueError("Usage stats cannot be negative.") + + self.total_cost_usd += usage.cost_usd + self.total_prompt_tokens += usage.prompt_tokens + self.total_completion_tokens += usage.completion_tokens + self.total_tokens += usage.total_tokens + + logger.debug( + f"Budget consumed: ${usage.cost_usd:.4f}. Total: ${self.total_cost_usd:.4f} / ${self.budget_limit_usd:.2f}" + ) + + self.check_budget() + + def check_budget(self) -> None: + """ + Check if the budget has been exceeded. + + Raises: + BudgetExceededError: If total cost is greater than budget limit. + """ + if self.total_cost_usd > self.budget_limit_usd: + msg = f"Budget exceeded! Spent ${self.total_cost_usd:.4f}, limit was ${self.budget_limit_usd:.2f}" + logger.error(msg) + raise BudgetExceededError(msg) + + def get_status(self) -> str: + """ + Return a string summary of the budget status. + + Returns: + A string indicating spent amount and percentage of budget. + """ + percentage = (self.total_cost_usd / self.budget_limit_usd) * 100 if self.budget_limit_usd > 0 else 100.0 + return f"Spent ${self.total_cost_usd:.4f} / ${self.budget_limit_usd:.2f} ({percentage:.1f}%)" diff --git a/src/coreason_optimizer/core/client.py b/src/coreason_optimizer/core/client.py new file mode 100644 index 0000000..e6480db --- /dev/null +++ b/src/coreason_optimizer/core/client.py @@ -0,0 +1,464 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +""" +LLM Client implementations for interacting with OpenAI API. + +This module provides clients for generating text and embeddings, +along with wrappers for budget tracking. +""" + +import os +from typing import Any, Optional + +import anyio +import httpx +from openai import AsyncOpenAI + +from coreason_optimizer.core.budget import BudgetManager +from coreason_optimizer.core.interfaces import ( + AsyncEmbeddingProvider, + AsyncLLMClient, + EmbeddingProvider, + EmbeddingResponse, + LLMClient, + LLMResponse, + UsageStats, +) +from coreason_optimizer.utils.logger import logger + +# Pricing per 1M tokens (approximate as of early 2025) +PRICING = { + "gpt-4o": {"input": 5.00, "output": 15.00}, + "gpt-4o-2024-08-06": {"input": 2.50, "output": 10.00}, + "gpt-4o-mini": {"input": 0.15, "output": 0.60}, + "text-embedding-3-small": {"input": 0.02, "output": 0.0}, + "text-embedding-3-large": {"input": 0.13, "output": 0.0}, +} + + +def calculate_openai_cost(model: str, input_tokens: int, output_tokens: int) -> float: + """ + Calculate the cost of an OpenAI API call based on model and usage. + + Args: + model: The model identifier (e.g., 'gpt-4o'). + input_tokens: Number of prompt tokens. + output_tokens: Number of completion tokens. + + Returns: + The estimated cost in USD. + """ + # Simple fuzzy matching for model names + sorted_keys = sorted(PRICING.keys(), key=len, reverse=True) + + pricing = None + for key in sorted_keys: + if key in model: + pricing = PRICING[key] + break + + if not pricing: + logger.warning(f"No pricing found for model {model}. Cost will be 0.0.") + return 0.0 + + input_cost = (input_tokens / 1_000_000) * pricing["input"] + output_cost = (output_tokens / 1_000_000) * pricing["output"] + return input_cost + output_cost + + +class OpenAIClientAsync: + """Async implementation of LLMClient using OpenAI.""" + + def __init__( + self, + api_key: str | None = None, + client: AsyncOpenAI | None = None, + http_client: Optional[httpx.AsyncClient] = None, + ): + """ + Initialize the OpenAIClientAsync. + + Args: + api_key: Optional API key. If not provided, reads from OPENAI_API_KEY env var. + client: Optional pre-configured AsyncOpenAI client instance. + http_client: Optional httpx.AsyncClient for connection pooling. + """ + self._internal_client = http_client is None + self._http_client = http_client or httpx.AsyncClient() + + if client: + self.client = client + else: + self.client = AsyncOpenAI(api_key=api_key or os.getenv("OPENAI_API_KEY"), http_client=self._http_client) + + async def __aenter__(self) -> "OpenAIClientAsync": + return self + + async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: + if self._internal_client: + await self._http_client.aclose() + # Close the OpenAI client session if needed, but OpenAI client doesn't strictly require + # explicit close if we manage the httpx client. However, it's good practice. + await self.client.close() + + async def generate( + self, + messages: list[dict[str, str]], + model: str | None = None, + temperature: float = 0.0, + **kwargs: Any, + ) -> LLMResponse: + """ + Generate a response from the OpenAI LLM asynchronously. + + Args: + messages: A list of message dictionaries (role, content). + model: The model identifier to use (default: 'gpt-4o'). + temperature: Sampling temperature (default: 0.0). + **kwargs: Additional arguments passed to the OpenAI API. + + Returns: + LLMResponse containing the content and usage statistics. + + Raises: + ValueError: If streaming is requested (not supported). + """ + model = model or "gpt-4o" + + if kwargs.get("stream"): + raise ValueError("Streaming is not supported by OpenAIClientAsync.") + + try: + response = await self.client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + **kwargs, + ) + + choice = response.choices[0] + content = choice.message.content or "" + + usage = response.usage + if usage: + cost = calculate_openai_cost(model, usage.prompt_tokens, usage.completion_tokens) + usage_stats = UsageStats( + prompt_tokens=usage.prompt_tokens, + completion_tokens=usage.completion_tokens, + total_tokens=usage.total_tokens, + cost_usd=cost, + ) + else: + usage_stats = UsageStats() + + return LLMResponse(content=content, usage=usage_stats) + + except Exception as e: + logger.error(f"OpenAI API call failed: {e}") + raise + + +class OpenAIClient: + """Sync Facade for OpenAIClientAsync.""" + + def __init__( + self, + api_key: str | None = None, + client: AsyncOpenAI | None = None, + http_client: Optional[httpx.AsyncClient] = None, + ): + """ + Initialize the OpenAIClient Facade. + + Args: + api_key: Optional API key. + client: Optional AsyncOpenAI client. + http_client: Optional httpx.AsyncClient. + """ + self._async = OpenAIClientAsync(api_key=api_key, client=client, http_client=http_client) + + def __enter__(self) -> "OpenAIClient": + return self + + def __exit__(self, *args: Any) -> None: + try: + anyio.run(self._async.__aexit__, *args) + except Exception: + # anyio.run might propagate ExceptionGroup if not handled. + # actually anyio.run re-raises the exception if one occurred in the coroutine. + raise + + def generate( + self, + messages: list[dict[str, str]], + model: str | None = None, + temperature: float = 0.0, + **kwargs: Any, + ) -> LLMResponse: + """ + Generate a response from the OpenAI LLM (Synchronous Facade). + """ + response: LLMResponse = anyio.run( + lambda: self._async.generate( + messages=messages, + model=model, + temperature=temperature, + **kwargs, + ) + ) + return response + + +class BudgetAwareLLMClientAsync: + """Async Wrapper for AsyncLLMClient that enforces a budget.""" + + def __init__(self, client: AsyncLLMClient, budget_manager: BudgetManager): + """ + Initialize the BudgetAwareLLMClientAsync. + + Args: + client: The underlying AsyncLLMClient to wrap. + budget_manager: The BudgetManager to track usage. + """ + self.client = client + self.budget_manager = budget_manager + + async def generate( + self, + messages: list[dict[str, str]], + model: str | None = None, + temperature: float = 0.0, + **kwargs: Any, + ) -> LLMResponse: + """ + Generate response and consume budget. + + Checks budget before and updates budget after the call. + + Args: + messages: A list of message dictionaries. + model: The model identifier. + temperature: Sampling temperature. + **kwargs: Additional arguments. + + Returns: + LLMResponse. + """ + # 0. Check Budget Pre-flight + self.budget_manager.check_budget() + + # 1. Generate + response = await self.client.generate( + messages=messages, + model=model, + temperature=temperature, + **kwargs, + ) + + # 2. Track Budget + self.budget_manager.consume(response.usage) + + return response + + +class BudgetAwareLLMClient: + """Wrapper for LLMClient (Sync) that enforces a budget.""" + + def __init__(self, client: LLMClient, budget_manager: BudgetManager): + """ + Initialize the BudgetAwareLLMClient. + + Args: + client: The underlying LLMClient to wrap. + budget_manager: The BudgetManager to track usage. + """ + self.client = client + self.budget_manager = budget_manager + + def generate( + self, + messages: list[dict[str, str]], + model: str | None = None, + temperature: float = 0.0, + **kwargs: Any, + ) -> LLMResponse: + """ + Generate response and consume budget. + """ + # 0. Check Budget Pre-flight + self.budget_manager.check_budget() + + # 1. Generate + response = self.client.generate( + messages=messages, + model=model, + temperature=temperature, + **kwargs, + ) + + # 2. Track Budget + self.budget_manager.consume(response.usage) + + return response + + +class OpenAIEmbeddingClientAsync: + """Async Implementation of EmbeddingProvider using OpenAI.""" + + def __init__( + self, + api_key: str | None = None, + client: AsyncOpenAI | None = None, + http_client: Optional[httpx.AsyncClient] = None, + ): + """ + Initialize the OpenAIEmbeddingClientAsync. + + Args: + api_key: Optional API key. If not provided, reads from OPENAI_API_KEY env var. + client: Optional pre-configured AsyncOpenAI client instance. + http_client: Optional httpx.AsyncClient for connection pooling. + """ + self._internal_client = http_client is None + self._http_client = http_client or httpx.AsyncClient() + + if client: + self.client = client + else: + self.client = AsyncOpenAI(api_key=api_key or os.getenv("OPENAI_API_KEY"), http_client=self._http_client) + + async def __aenter__(self) -> "OpenAIEmbeddingClientAsync": + return self + + async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: + if self._internal_client: + await self._http_client.aclose() + await self.client.close() + + async def embed(self, texts: list[str], model: str | None = None) -> EmbeddingResponse: + """ + Generate embeddings for a list of texts (with batching) asynchronously. + + Args: + texts: List of strings to embed. + model: The embedding model to use (default: 'text-embedding-3-small'). + + Returns: + EmbeddingResponse containing embeddings and usage stats. + """ + model = model or "text-embedding-3-small" + batch_size = 500 + all_embeddings = [] + total_prompt_tokens = 0 + total_cost = 0.0 + + try: + for i in range(0, len(texts), batch_size): + batch = texts[i : i + batch_size] + response = await self.client.embeddings.create(input=batch, model=model) + + embeddings = [data.embedding for data in response.data] + all_embeddings.extend(embeddings) + + if response.usage: + tokens = response.usage.prompt_tokens + total_prompt_tokens += tokens + total_cost += calculate_openai_cost(model, tokens, 0) + + return EmbeddingResponse( + embeddings=all_embeddings, + usage=UsageStats( + prompt_tokens=total_prompt_tokens, + total_tokens=total_prompt_tokens, + cost_usd=total_cost, + ), + ) + + except Exception as e: + logger.error(f"OpenAI Embedding failed: {e}") + raise + + +class OpenAIEmbeddingClient: + """Sync Facade for OpenAIEmbeddingClientAsync.""" + + def __init__( + self, + api_key: str | None = None, + client: AsyncOpenAI | None = None, + http_client: Optional[httpx.AsyncClient] = None, + ): + self._async = OpenAIEmbeddingClientAsync(api_key=api_key, client=client, http_client=http_client) + + def __enter__(self) -> "OpenAIEmbeddingClient": + return self + + def __exit__(self, *args: Any) -> None: + anyio.run(self._async.__aexit__, *args) + + def embed(self, texts: list[str], model: str | None = None) -> EmbeddingResponse: + response: EmbeddingResponse = anyio.run(lambda: self._async.embed(texts=texts, model=model)) + return response + + +class BudgetAwareEmbeddingProviderAsync: + """Async Wrapper for AsyncEmbeddingProvider that enforces a budget.""" + + def __init__(self, provider: AsyncEmbeddingProvider, budget_manager: BudgetManager): + """ + Initialize the BudgetAwareEmbeddingProviderAsync. + + Args: + provider: The underlying AsyncEmbeddingProvider to wrap. + budget_manager: The BudgetManager to track usage. + """ + self.provider = provider + self.budget_manager = budget_manager + + async def embed(self, texts: list[str], model: str | None = None) -> EmbeddingResponse: + """ + Generate embeddings and consume budget. + + Args: + texts: List of strings to embed. + model: The embedding model to use. + + Returns: + EmbeddingResponse. + """ + self.budget_manager.check_budget() + response = await self.provider.embed(texts, model) + self.budget_manager.consume(response.usage) + return response + + +class BudgetAwareEmbeddingProvider: + """Wrapper for EmbeddingProvider (Sync) that enforces a budget.""" + + def __init__(self, provider: EmbeddingProvider, budget_manager: BudgetManager): + """ + Initialize the BudgetAwareEmbeddingProvider. + + Args: + provider: The underlying EmbeddingProvider to wrap. + budget_manager: The BudgetManager to track usage. + """ + self.provider = provider + self.budget_manager = budget_manager + + def embed(self, texts: list[str], model: str | None = None) -> EmbeddingResponse: + """ + Generate embeddings and consume budget. + """ + self.budget_manager.check_budget() + response = self.provider.embed(texts, model) + self.budget_manager.consume(response.usage) + return response diff --git a/src/coreason_optimizer/core/config.py b/src/coreason_optimizer/core/config.py new file mode 100644 index 0000000..ef05d6e --- /dev/null +++ b/src/coreason_optimizer/core/config.py @@ -0,0 +1,72 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +""" +Configuration models for the Optimizer. + +This module defines the configuration settings for the optimization process, +including model selection, metrics, and budget limits. +""" + +from typing import Literal + +from pydantic import BaseModel, Field + + +class OptimizerConfig(BaseModel): + """ + Configuration for the Prompt Optimizer. + + Attributes: + target_model: The identifier of the target LLM to optimize for. + meta_model: The identifier of the meta-LLM used for instruction optimization. + metric: The metric function identifier to use for evaluation. + selector_type: The strategy to use for selecting few-shot examples. + embedding_model: The identifier of the embedding model (used if selector_type is semantic). + max_bootstrapped_demos: Maximum number of few-shot examples to bootstrap. + max_rounds: Maximum number of optimization rounds. + budget_limit_usd: Maximum budget in USD for the optimization run. + """ + + target_model: str = Field( + default="gpt-4o", + description="The identifier of the target LLM to optimize for.", + ) + meta_model: str = Field( + default="gpt-4o", + description="The identifier of the meta-LLM used for instruction optimization.", + ) + metric: str = Field( + default="exact_match", + description="The metric function identifier to use for evaluation.", + ) + selector_type: Literal["random", "semantic"] = Field( + default="random", + description="The strategy to use for selecting few-shot examples.", + ) + embedding_model: str = Field( + default="text-embedding-3-small", + description="The identifier of the embedding model (used if selector_type is semantic).", + ) + max_bootstrapped_demos: int = Field( + default=4, + ge=0, + description="Maximum number of few-shot examples to bootstrap.", + ) + max_rounds: int = Field( + default=10, + gt=0, + description="Maximum number of optimization rounds.", + ) + budget_limit_usd: float = Field( + default=10.0, + gt=0.0, + description="Maximum budget in USD for the optimization run.", + ) diff --git a/src/coreason_optimizer/core/formatter.py b/src/coreason_optimizer/core/formatter.py new file mode 100644 index 0000000..0c451c9 --- /dev/null +++ b/src/coreason_optimizer/core/formatter.py @@ -0,0 +1,69 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +""" +Prompt formatting utilities. + +This module provides functions to construct the final prompt string from +system instructions, few-shot examples, and user inputs. +""" + +from typing import Any + +from coreason_optimizer.core.models import TrainingExample + + +def format_prompt( + system_prompt: str, + examples: list[TrainingExample], + inputs: dict[str, Any], +) -> str: + """ + Format a complete prompt with system instruction, few-shot examples, and user input. + + Format Structure: + ### System Instruction + {system_prompt} + + ### Examples + Input: {example_input} + Output: {example_output} + ... + + ### User Input + Input: {inputs} + + Args: + system_prompt: The system instruction text. + examples: A list of TrainingExample objects to use as few-shot demonstrations. + inputs: A dictionary of input variables for the current query. + + Returns: + The formatted prompt string. + """ + parts = [] + + # System Prompt + parts.append(f"### System Instruction\n{system_prompt}") + + # Examples + if examples: + parts.append("### Examples") + for ex in examples: + # We assume inputs are dicts, we serialize them simply + input_str = ", ".join(f"{k}: {v}" for k, v in ex.inputs.items()) + parts.append(f"Input: {input_str}\nOutput: {ex.reference}") + + # User Input + parts.append("### User Input") + current_input_str = ", ".join(f"{k}: {v}" for k, v in inputs.items()) + parts.append(f"Input: {current_input_str}") + + return "\n\n".join(parts) diff --git a/src/coreason_optimizer/core/interfaces.py b/src/coreason_optimizer/core/interfaces.py new file mode 100644 index 0000000..8f5b0e7 --- /dev/null +++ b/src/coreason_optimizer/core/interfaces.py @@ -0,0 +1,218 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +""" +Core interfaces and protocols. + +This module defines the abstract base classes, protocols, and shared data models +used throughout the library, ensuring loose coupling and type safety. +""" + +from abc import ABC, abstractmethod +from typing import Any, Protocol, runtime_checkable + +from pydantic import BaseModel + +from coreason_optimizer.core.models import TrainingExample + + +class UsageStats(BaseModel): + """ + Token usage statistics for an LLM call. + + Attributes: + prompt_tokens: Number of tokens in the prompt. + completion_tokens: Number of tokens in the completion. + total_tokens: Total tokens used. + cost_usd: Estimated cost in USD. + """ + + prompt_tokens: int = 0 + completion_tokens: int = 0 + total_tokens: int = 0 + cost_usd: float = 0.0 + + +class LLMResponse(BaseModel): + """ + Standardized response from an LLM. + + Attributes: + content: The text content of the response. + usage: Usage statistics for the call. + """ + + content: str + usage: UsageStats + + +class EmbeddingResponse(BaseModel): + """ + Standardized response from an embedding provider. + + Attributes: + embeddings: List of embedding vectors. + usage: Usage statistics for the call. + """ + + embeddings: list[list[float]] + usage: UsageStats + + +@runtime_checkable +class Construct(Protocol): + """ + Protocol representing a coreason-construct Agent. + + Attributes: + inputs: List of input field names. + outputs: List of output field names. + system_prompt: The initial system prompt text. + """ + + @property + def inputs(self) -> list[str]: ... # pragma: no cover + + @property + def outputs(self) -> list[str]: ... # pragma: no cover + + @property + def system_prompt(self) -> str: ... # pragma: no cover + + +@runtime_checkable +class LLMClient(Protocol): + """Protocol for a generic LLM client.""" + + def generate( + self, + messages: list[dict[str, str]], + model: str | None = None, + temperature: float = 0.0, + **kwargs: Any, + ) -> LLMResponse: + """ + Generate a response from the LLM. + + Args: + messages: A list of message dictionaries (role, content). + model: The model identifier to use. + temperature: Sampling temperature. + **kwargs: Additional provider-specific arguments. + + Returns: + The LLM response containing content and usage stats. + """ + ... # pragma: no cover + + +@runtime_checkable +class AsyncLLMClient(Protocol): + """Protocol for a generic Async LLM client.""" + + async def generate( + self, + messages: list[dict[str, str]], + model: str | None = None, + temperature: float = 0.0, + **kwargs: Any, + ) -> LLMResponse: + """ + Generate a response from the LLM asynchronously. + + Args: + messages: A list of message dictionaries (role, content). + model: The model identifier to use. + temperature: Sampling temperature. + **kwargs: Additional provider-specific arguments. + + Returns: + The LLM response containing content and usage stats. + """ + ... # pragma: no cover + + +@runtime_checkable +class Metric(Protocol): + """Protocol for a scoring function.""" + + def __call__(self, prediction: str, reference: Any, **kwargs: Any) -> float: + """ + Calculate a score for the prediction against the reference. + + Args: + prediction: The model's output. + reference: The ground truth value. + **kwargs: Additional arguments for the metric function. + + Returns: + A float score (typically 0.0 to 1.0). + """ + ... # pragma: no cover + + +@runtime_checkable +class EmbeddingProvider(Protocol): + """Protocol for an embedding provider.""" + + def embed(self, texts: list[str], model: str | None = None) -> EmbeddingResponse: + """ + Generate embeddings for a list of texts. + + Args: + texts: List of strings to embed. + model: The embedding model to use. + + Returns: + An EmbeddingResponse containing vectors and usage stats. + """ + ... # pragma: no cover + + +@runtime_checkable +class AsyncEmbeddingProvider(Protocol): + """Protocol for an async embedding provider.""" + + async def embed(self, texts: list[str], model: str | None = None) -> EmbeddingResponse: + """ + Generate embeddings for a list of texts asynchronously. + + Args: + texts: List of strings to embed. + model: The embedding model to use. + + Returns: + An EmbeddingResponse containing vectors and usage stats. + """ + ... # pragma: no cover + + +class PromptOptimizer(ABC): + """Abstract base class for prompt optimization strategies.""" + + @abstractmethod + def compile( + self, + agent: Construct, + trainset: list[TrainingExample], + valset: list[TrainingExample], + ) -> Any: + """ + Run the optimization loop to produce an optimized manifest. + + Args: + agent: The draft agent to optimize. + trainset: List of examples for training/bootstrapping. + valset: List of examples for validation/evaluation. + + Returns: + An optimized manifest object (specific type depends on implementation). + """ + pass # pragma: no cover diff --git a/src/coreason_optimizer/core/metrics.py b/src/coreason_optimizer/core/metrics.py new file mode 100644 index 0000000..520fb07 --- /dev/null +++ b/src/coreason_optimizer/core/metrics.py @@ -0,0 +1,193 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +""" +Metrics for evaluating agent performance. + +This module contains various metric implementations (Exact Match, F1 Score, JSON Validity) +and a factory to retrieve them by name. +""" + +import collections +import json +import re +import string +from typing import Any, Callable + +from coreason_optimizer.core.interfaces import Metric + + +def normalize_answer(s: str) -> str: + """ + Lower text and remove punctuation, articles and extra whitespace. + + Args: + s: The input string. + + Returns: + Normalized string. + """ + + def remove_articles(text: str) -> str: + return re.sub(r"\b(a|an|the)\b", " ", text) + + def white_space_fix(text: str) -> str: + return " ".join(text.split()) + + def remove_punc(text: str) -> str: + exclude = set(string.punctuation) + return "".join(ch for ch in text if ch not in exclude) + + def lower(text: str) -> str: + return text.lower() + + return white_space_fix(remove_articles(remove_punc(lower(s)))) + + +class ExactMatch(Metric): + """Computes whether the prediction exactly matches the reference (after normalization).""" + + def _score_single(self, prediction: str, reference: Any) -> float: + return 1.0 if normalize_answer(prediction) == normalize_answer(str(reference)) else 0.0 + + def __call__(self, prediction: str, reference: Any, **kwargs: Any) -> float: + """ + Compute Exact Match score. + + Args: + prediction: The model's output string. + reference: The ground truth (string or list of valid strings). + + Returns: + 1.0 if match, 0.0 otherwise. + """ + if isinstance(reference, list): + return max((self._score_single(prediction, ref) for ref in reference), default=0.0) + return self._score_single(prediction, reference) + + +class F1Score(Metric): + """Computes F1 score based on token overlap.""" + + def _score_single(self, prediction: str, reference: Any) -> float: + prediction_tokens = normalize_answer(prediction).split() + reference_tokens = normalize_answer(str(reference)).split() + + common = collections.Counter(prediction_tokens) & collections.Counter(reference_tokens) + num_same = sum(common.values()) + + if len(prediction_tokens) == 0 or len(reference_tokens) == 0: + return int(prediction_tokens == reference_tokens) + + if num_same == 0: + return 0.0 + + precision = 1.0 * num_same / len(prediction_tokens) + recall = 1.0 * num_same / len(reference_tokens) + f1 = (2 * precision * recall) / (precision + recall) + + return f1 + + def __call__(self, prediction: str, reference: Any, **kwargs: Any) -> float: + """ + Compute F1 score. + + Args: + prediction: The model's output string. + reference: The ground truth (string or list of strings). + + Returns: + F1 score between 0.0 and 1.0. + """ + if isinstance(reference, list): + return max((self._score_single(prediction, ref) for ref in reference), default=0.0) + return self._score_single(prediction, reference) + + +class JsonValidity(Metric): + """Computes whether the prediction is valid JSON (ignoring reference).""" + + def __call__(self, prediction: str, reference: Any, **kwargs: Any) -> float: + """ + Check if the prediction is valid JSON. + + This handles: + 1. Pure JSON strings. + 2. Markdown code blocks (```json ... ```). + 3. Generic code blocks. + + Args: + prediction: The model's output string. + reference: Ignored. + + Returns: + 1.0 if valid JSON, 0.0 otherwise. + """ + text = prediction.strip() + + def is_valid(s: str) -> bool: + try: + json.loads(s) + return True + except json.JSONDecodeError: + return False + + # Strategy 1: Look for explicit JSON blocks (case-insensitive) + # e.g. ```json { "a": 1 } ``` + # We check ALL such blocks. If any is valid, we're good. + # Regex: ```json followed by anything until ``` + explicit_pattern = re.compile(r"```json\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE) + for match in explicit_pattern.finditer(text): + if is_valid(match.group(1)): + return 1.0 + + # Strategy 2: Look for generic blocks, stripping potential language tags + # e.g. ```\n { "a": 1 } \n``` + # We assume standard Markdown: ```[lang]\n[content]``` + # This handles ```python\n...``` by separating the 'python' from content. + generic_pattern = re.compile(r"```([^\n]*)\n(.*?)\n?```", re.DOTALL) + for match in generic_pattern.finditer(text): + content = match.group(2) + if is_valid(content): + return 1.0 + + # Strategy 3: Try the raw text (if no blocks or blocks failed) + if is_valid(text): + return 1.0 + + return 0.0 + + +class MetricFactory: + """Factory for creating metrics by name.""" + + _metrics: dict[str, Callable[[], Metric]] = { + "exact_match": ExactMatch, + "f1_score": F1Score, + "json_validity": JsonValidity, + } + + @classmethod + def get(cls, name: str) -> Metric: + """ + Get a metric instance by name. + + Args: + name: The name of the metric (e.g., 'exact_match'). + + Returns: + An instance of a Metric class. + + Raises: + ValueError: If the metric name is unknown. + """ + if name not in cls._metrics: + raise ValueError(f"Unknown metric: {name}. Available: {list(cls._metrics.keys())}") + return cls._metrics[name]() diff --git a/src/coreason_optimizer/core/models.py b/src/coreason_optimizer/core/models.py new file mode 100644 index 0000000..ecf6970 --- /dev/null +++ b/src/coreason_optimizer/core/models.py @@ -0,0 +1,56 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +""" +Pydantic data models used across the library. + +This module defines the core data structures for training examples and +the output manifest of the optimization process. +""" + +from typing import Any + +from pydantic import BaseModel, Field + + +class TrainingExample(BaseModel): + """ + A single example for training or few-shot prompting. + + Attributes: + inputs: Input variables mapping to the agent's expected inputs. + reference: The ground truth or expected output for the example. + metadata: Optional metadata (e.g. source, tags). + """ + + inputs: dict[str, Any] = Field(..., description="Input variables mapping to the agent's expected inputs.") + reference: Any = Field(..., description="The ground truth or expected output for the example.") + metadata: dict[str, Any] = Field(default_factory=dict, description="Optional metadata (e.g. source, tags).") + + +class OptimizedManifest(BaseModel): + """ + The output artifact of the optimization process. + + Attributes: + agent_id: The unique identifier of the agent. + base_model: The base LLM model used. + optimized_instruction: The optimized system prompt. + few_shot_examples: Selected few-shot examples. + performance_metric: The score achieved on the validation set. + optimization_run_id: Unique ID for this optimization run. + """ + + agent_id: str = Field(..., description="The unique identifier of the agent.") + base_model: str = Field(..., description="The base LLM model used.") + optimized_instruction: str = Field(..., description="The optimized system prompt.") + few_shot_examples: list[TrainingExample] = Field(default_factory=list, description="Selected few-shot examples.") + performance_metric: float = Field(..., description="The score achieved on the validation set.") + optimization_run_id: str = Field(..., description="Unique ID for this optimization run.") diff --git a/src/coreason_optimizer/data/loader.py b/src/coreason_optimizer/data/loader.py new file mode 100644 index 0000000..70a1bf2 --- /dev/null +++ b/src/coreason_optimizer/data/loader.py @@ -0,0 +1,174 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +""" +Dataset loading and manipulation utilities. + +This module provides the Dataset class to load training data from CSV or JSONL files +and split it into training, validation, and test sets. +""" + +import csv +import json +import random +from collections.abc import Iterator +from pathlib import Path + +from coreason_optimizer.core.models import TrainingExample + + +class Dataset: + """A container for training data with loading and splitting capabilities.""" + + def __init__(self, examples: list[TrainingExample]): + """ + Initialize the Dataset. + + Args: + examples: A list of TrainingExample objects. + """ + self.examples = examples + + def __len__(self) -> int: + return len(self.examples) + + def __getitem__(self, idx: int) -> TrainingExample: + return self.examples[idx] + + def __iter__(self) -> Iterator[TrainingExample]: + return iter(self.examples) + + @classmethod + def from_csv(cls, filepath: str | Path, input_cols: list[str], reference_col: str) -> "Dataset": + """ + Load a dataset from a CSV file. + + Args: + filepath: Path to the CSV file. + input_cols: List of column names to treat as inputs. + reference_col: Column name to treat as the reference output. + + Returns: + A Dataset instance. + + Raises: + FileNotFoundError: If the file does not exist. + """ + path = Path(filepath) + if not path.exists(): + raise FileNotFoundError(f"File not found: {path}") + + examples = [] + with path.open("r", encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + inputs = {col: row.get(col) for col in input_cols} + # Check if inputs are missing + if any(v is None or v == "" for v in inputs.values()): + continue + + reference = row.get(reference_col) + if reference is None or reference == "": + continue + + examples.append( + TrainingExample( + inputs=inputs, + reference=reference, + metadata={"source": str(path)}, + ) + ) + return cls(examples) + + @classmethod + def from_jsonl(cls, filepath: str | Path) -> "Dataset": + """ + Load a dataset from a JSONL file. + + Expected format per line: + {"inputs": {...}, "reference": ...} + or + {"input": ..., "output": ...} (will be normalized) + + Args: + filepath: Path to the JSONL file. + + Returns: + A Dataset instance. + + Raises: + FileNotFoundError: If the file does not exist. + """ + path = Path(filepath) + if not path.exists(): + raise FileNotFoundError(f"File not found: {path}") + + examples = [] + with path.open("r", encoding="utf-8") as f: + for line in f: + data = json.loads(line) + + # Normalize typical formats + if "inputs" in data and "reference" in data: + inputs = data["inputs"] + reference = data["reference"] + elif "input" in data and "output" in data: + inputs = data["input"] if isinstance(data["input"], dict) else {"input": data["input"]} + reference = data["output"] + else: + # Generic fallback: treat all keys except 'reference'/'output' as inputs + reference = data.pop("reference", data.pop("output", None)) + if reference is None: + # Skipping ambiguous lines + continue + inputs = data + + examples.append( + TrainingExample( + inputs=inputs, + reference=reference, + metadata={"source": str(path)}, + ) + ) + return cls(examples) + + def split( + self, train_ratio: float = 0.8, val_ratio: float = 0.1, seed: int = 42 + ) -> tuple["Dataset", "Dataset", "Dataset"]: + """ + Split the dataset into Train, Validation, and Test sets. + + Args: + train_ratio: Fraction of data for training. + val_ratio: Fraction of data for validation. + seed: Random seed for shuffling. + + Returns: + A tuple of (train_dataset, val_dataset, test_dataset). + + Raises: + ValueError: If train_ratio + val_ratio > 1.0. + """ + if train_ratio + val_ratio > 1.0: + raise ValueError("Sum of train and val ratios must be <= 1.0") + + random.seed(seed) + shuffled = list(self.examples) + random.shuffle(shuffled) + + n = len(shuffled) + train_end = int(n * train_ratio) + val_end = int(n * (train_ratio + val_ratio)) + + train_data = shuffled[:train_end] + val_data = shuffled[train_end:val_end] + test_data = shuffled[val_end:] + + return Dataset(train_data), Dataset(val_data), Dataset(test_data) diff --git a/src/coreason_optimizer/main.py b/src/coreason_optimizer/main.py index 91e213b..7f4b777 100644 --- a/src/coreason_optimizer/main.py +++ b/src/coreason_optimizer/main.py @@ -8,9 +8,260 @@ # # Source Code: https://github.com/CoReason-AI/coreason_optimizer +""" +CLI Entrypoint for the Coreason Optimizer. + +This module provides the command-line interface for the optimization tool, +supporting commands to tune agents and evaluate manifests. +""" + +import json +from pathlib import Path + +import click + +from coreason_optimizer.core.client import OpenAIClient, OpenAIEmbeddingClient +from coreason_optimizer.core.config import OptimizerConfig +from coreason_optimizer.core.formatter import format_prompt +from coreason_optimizer.core.interfaces import PromptOptimizer +from coreason_optimizer.core.metrics import MetricFactory +from coreason_optimizer.core.models import OptimizedManifest +from coreason_optimizer.data.loader import Dataset +from coreason_optimizer.strategies.bootstrap import BootstrapFewShot +from coreason_optimizer.strategies.mipro import MiproOptimizer +from coreason_optimizer.utils.import_utils import load_agent_from_path from coreason_optimizer.utils.logger import logger -def hello_world() -> str: - logger.info("Hello World!") - return "Hello World!" +@click.group() +def cli() -> None: + """coreason-opt: The Compiler for the CoReason Agentic Platform.""" + pass + + +@cli.command() +@click.option( + "--agent", + required=True, + help="Path to the agent file (e.g., src/agents/analyst.py[:var])", +) +@click.option("--dataset", required=True, help="Path to the dataset (CSV or JSONL)") +@click.option("--base-model", help="Target LLM model (overrides config)") +@click.option("--epochs", type=int, help="Max optimization rounds (overrides config)") +@click.option("--demos", type=int, help="Max bootstrapped demos (overrides config)") +@click.option( + "--output", + default="optimized_manifest.json", + help="Output path for the manifest", +) +@click.option( + "--strategy", + type=click.Choice(["mipro", "bootstrap"]), + default="mipro", + help="Optimization strategy", +) +@click.option( + "--selector", + type=click.Choice(["random", "semantic"]), + help="Selector strategy (random or semantic)", +) +def tune( + agent: str, + dataset: str, + base_model: str | None, + epochs: int | None, + demos: int | None, + output: str, + strategy: str, + selector: str | None, +) -> None: + """ + Optimize an agent's prompt against a dataset. + + Args: + agent: Path to the agent file (and optional variable name). + dataset: Path to the dataset file (.csv or .jsonl). + base_model: Target LLM model identifier. + epochs: Maximum number of optimization rounds. + demos: Maximum number of few-shot examples to include. + output: Path to save the resulting OptimizedManifest JSON. + strategy: Optimization strategy to use ('mipro' or 'bootstrap'). + selector: Few-shot example selection strategy ('random' or 'semantic'). + """ + logger.info(f"Starting optimization for agent: {agent}") + + # Load Agent + try: + construct = load_agent_from_path(agent) + except Exception as e: + logger.error(f"Failed to load agent: {e}") + raise click.ClickException(str(e)) from e + + # Load Dataset + try: + ds_path = Path(dataset) + if ds_path.suffix.lower() == ".jsonl": + full_ds = Dataset.from_jsonl(ds_path) + elif ds_path.suffix.lower() == ".csv": + # Assume reference col is 'reference' and inputs are from construct + input_cols = construct.inputs + full_ds = Dataset.from_csv(ds_path, input_cols=input_cols, reference_col="reference") + else: + raise click.ClickException("Unsupported file format. Use .csv or .jsonl") + except Exception as e: + logger.error(f"Failed to load dataset: {e}") # pragma: no cover + raise click.ClickException(str(e)) from e # pragma: no cover + + # Split Data (simple train/val) + train_set, val_set, _ = full_ds.split(train_ratio=0.8, val_ratio=0.2) + # Convert Dataset back to list[TrainingExample] as required by compile + train_list = list(train_set) + val_list = list(val_set) + + # Config + config = OptimizerConfig() # Defaults + if base_model: + config.target_model = base_model + if epochs: + config.max_rounds = epochs + if demos: + config.max_bootstrapped_demos = demos + if selector: + config.selector_type = selector # type: ignore + + # Client + # Uses OPENAI_API_KEY env var + try: + client = OpenAIClient() + except Exception as e: + logger.error(f"Failed to initialize OpenAI Client: {e}") # pragma: no cover + raise click.ClickException( + "Failed to initialize OpenAI Client. Check OPENAI_API_KEY." + ) from e # pragma: no cover + + # Metric + try: + metric = MetricFactory.get(config.metric) + except ValueError as e: + raise click.ClickException(str(e)) from e + + # Optimizer + optimizer: PromptOptimizer + if strategy == "bootstrap": + optimizer = BootstrapFewShot(client, metric, config) + else: + embedding_provider = None + if config.selector_type == "semantic": + # Initialize embedding provider + try: + embedding_provider = OpenAIEmbeddingClient() + except Exception as e: + logger.error(f"Failed to initialize OpenAI Embedding Client: {e}") # pragma: no cover + raise click.ClickException( + "Failed to initialize OpenAI Embedding Client. Check OPENAI_API_KEY." + ) from e # pragma: no cover + + optimizer = MiproOptimizer(client, metric, config, embedding_provider=embedding_provider) + + # Run + try: + manifest = optimizer.compile(construct, train_list, val_list) + except Exception as e: + logger.exception("Optimization failed") # pragma: no cover + raise click.ClickException(f"Optimization failed: {e}") from e # pragma: no cover + + # Save + try: + with open(output, "w", encoding="utf-8") as f: + f.write(manifest.model_dump_json(indent=2)) + logger.info(f"Manifest saved to {output}") + click.echo(f"Optimization complete. Score: {manifest.performance_metric:.4f}. Manifest saved to {output}") + except Exception as e: + logger.error(f"Failed to save manifest: {e}") # pragma: no cover + raise click.ClickException(str(e)) from e # pragma: no cover + + +@cli.command() +@click.option("--manifest", required=True, help="Path to the optimized manifest JSON") +@click.option("--dataset", required=True, help="Path to the evaluation dataset") +@click.option("--metric", default="exact_match", help="Metric to use for evaluation") +def evaluate(manifest: str, dataset: str, metric: str) -> None: + """ + Evaluate an optimized manifest against a dataset. + + Args: + manifest: Path to the optimized manifest JSON file. + dataset: Path to the evaluation dataset file. + metric: The metric to use for scoring (e.g., 'exact_match'). + """ + # Load Manifest + try: + with open(manifest, "r", encoding="utf-8") as f: + data = json.load(f) + manifest_obj = OptimizedManifest(**data) + except Exception as e: + raise click.ClickException(f"Failed to load manifest: {e}") from e + + # Load Dataset + try: + ds_path = Path(dataset) + if ds_path.suffix.lower() == ".jsonl": + eval_ds = Dataset.from_jsonl(ds_path) + else: + # Fallback for CSV: try to use keys from first few-shot example if available + if manifest_obj.few_shot_examples: + input_cols = list(manifest_obj.few_shot_examples[0].inputs.keys()) + eval_ds = Dataset.from_csv(ds_path, input_cols=input_cols, reference_col="reference") + else: + raise click.ClickException( + "Cannot infer CSV schema for evaluation without few-shot examples in manifest. Use JSONL." + ) + except Exception as e: + raise click.ClickException(f"Failed to load dataset: {e}") from e + + # Setup Evaluation + try: + client = OpenAIClient() + except Exception: + raise click.ClickException( + "Failed to initialize OpenAI Client. Check OPENAI_API_KEY." + ) from None # pragma: no cover + + try: + metric_func = MetricFactory.get(metric) + except ValueError as e: + raise click.ClickException(str(e)) from e + + total_score = 0.0 + count = 0 + + logger.info("Starting evaluation...") + # Convert to list to iterate with progress bar + examples_list = list(eval_ds) + + with click.progressbar(examples_list, label="Evaluating") as bar: + for example in bar: + # Reconstruct prompt using optimized instruction and examples + prompt = format_prompt( + system_prompt=manifest_obj.optimized_instruction, + examples=manifest_obj.few_shot_examples, + inputs=example.inputs, + ) + try: + response = client.generate( + messages=[{"role": "user", "content": prompt}], + model=manifest_obj.base_model, + temperature=0.0, + ) + score = metric_func(response.content, example.reference) + total_score += score + count += 1 + except Exception as e: + logger.warning(f"Error evaluating example: {e}") + + avg_score = total_score / count if count > 0 else 0.0 + click.echo(f"Evaluation Complete. Average {metric} Score: {avg_score:.4f}") + + +if __name__ == "__main__": # pragma: no cover + cli() # pragma: no cover diff --git a/src/coreason_optimizer/strategies/bootstrap.py b/src/coreason_optimizer/strategies/bootstrap.py new file mode 100644 index 0000000..f25da49 --- /dev/null +++ b/src/coreason_optimizer/strategies/bootstrap.py @@ -0,0 +1,204 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +""" +BootstrapFewShot Optimization Strategy. + +This strategy improves agent performance by mining successful traces from +the training set (where the model got the answer right) and using them as +few-shot examples in the final prompt. +""" + +import uuid +from typing import Any + +from coreason_optimizer.core.config import OptimizerConfig +from coreason_optimizer.core.interfaces import ( + Construct, + LLMClient, + Metric, + PromptOptimizer, +) +from coreason_optimizer.core.models import OptimizedManifest, TrainingExample +from coreason_optimizer.utils.logger import logger + + +class BootstrapFewShot(PromptOptimizer): + """ + BootstrapFewShot strategy implementation. + + Process: + 1. Iterate through the training set. + 2. Attempt to solve each example using the current system prompt (zero-shot). + 3. Verify the prediction against the ground truth using the metric. + 4. If successful, collect the example as a candidate for few-shot learning. + 5. Select the best candidates to include in the final manifest. + """ + + def __init__( + self, + llm_client: LLMClient, + metric: Metric, + config: OptimizerConfig, + ): + """ + Initialize the BootstrapFewShot optimizer. + + Args: + llm_client: The LLM client to use for generation. + metric: The metric to verify correctness. + config: Optimization configuration. + """ + self.llm_client = llm_client + self.metric = metric + self.config = config + + def _format_prompt( + self, + system_prompt: str, + examples: list[TrainingExample], + inputs: dict[str, Any], + ) -> str: + """ + Sensible default prompt formatter. + Structure: + ### System Instruction + ... + ### Examples + Input: ... + Output: ... + ### User Input + Input: ... + """ + parts = [] + + # System Prompt + parts.append(f"### System Instruction\n{system_prompt}") + + # Examples + if examples: + parts.append("### Examples") + for ex in examples: + # We assume inputs are dicts, we serialize them simply + input_str = ", ".join(f"{k}: {v}" for k, v in ex.inputs.items()) + parts.append(f"Input: {input_str}\nOutput: {ex.reference}") + + # User Input + parts.append("### User Input") + current_input_str = ", ".join(f"{k}: {v}" for k, v in inputs.items()) + parts.append(f"Input: {current_input_str}") + + return "\n\n".join(parts) + + def compile( + self, + agent: Construct, + trainset: list[TrainingExample], + valset: list[TrainingExample], + ) -> OptimizedManifest: + """ + Run the bootstrapping loop. + + Args: + agent: The agent construct to optimize. + trainset: Training examples to mine. + valset: Validation examples for final scoring. + + Returns: + An OptimizedManifest containing the best few-shot examples found. + + Raises: + BudgetExceededError: If the budget limit is reached. + """ + logger.info( + "Starting BootstrapFewShot compilation", + train_size=len(trainset), + target_model=self.config.target_model, + ) + + successful_traces: list[TrainingExample] = [] + + # 1. Mine successful traces + for i, example in enumerate(trainset): + # Format prompt with *no* examples initially (zero-shot) to see if the model can solve it + # Or should we use existing examples? The prompt implies "BootstrapFewShot" mines traces. + # Usually we start with 0-shot to find easy examples that become 1-shot for others. + prompt = self._format_prompt( + system_prompt=agent.system_prompt, + examples=[], + inputs=example.inputs, + ) + + # 2. Generate + try: + response = self.llm_client.generate( + messages=[{"role": "user", "content": prompt}], + model=self.config.target_model, + temperature=0.0, # Deterministic for mining + ) + except Exception as e: + logger.error(f"Error generating for example {i}: {e}") + continue + + prediction = response.content + + # 3. Score + score = self.metric(prediction, example.reference) + + # 4. Filter + # exact_match returns 1.0 or 0.0. F1 returns 0.0-1.0. + # We treat strict 1.0 as success for now, or maybe >= threshold? + # Given PRD examples (ExactMatch), 1.0 is safe. + if score >= 1.0: + logger.debug(f"Example {i} passed with score {score}") + successful_traces.append(example) + else: + logger.debug(f"Example {i} failed with score {score}") + + # 5. Select Candidates + # We take up to max_bootstrapped_demos + num_demos = min(len(successful_traces), self.config.max_bootstrapped_demos) + selected_examples = successful_traces[:num_demos] + + logger.info(f"Bootstrapping complete. Selected {len(selected_examples)} examples.") + + # 6. Evaluate on Validation Set (to get performance_metric) + # We run the AGENT (now with the selected examples) on the valset + total_score = 0.0 + if valset: + for example in valset: + prompt = self._format_prompt( + system_prompt=agent.system_prompt, + examples=selected_examples, + inputs=example.inputs, + ) + try: + response = self.llm_client.generate( + messages=[{"role": "user", "content": prompt}], + model=self.config.target_model, + temperature=0.0, + ) + s = self.metric(response.content, example.reference) + total_score += s + except Exception: + pass + avg_score = total_score / len(valset) + else: + avg_score = 0.0 + + # 7. Create Manifest + return OptimizedManifest( + agent_id="unknown_agent", # Agent protocol doesn't have ID? + base_model=self.config.target_model, + optimized_instruction=agent.system_prompt, # Instruction is unchanged in Bootstrap + few_shot_examples=selected_examples, + performance_metric=avg_score, + optimization_run_id=f"opt_{uuid.uuid4().hex[:8]}", + ) diff --git a/src/coreason_optimizer/strategies/mipro.py b/src/coreason_optimizer/strategies/mipro.py new file mode 100644 index 0000000..0aadca6 --- /dev/null +++ b/src/coreason_optimizer/strategies/mipro.py @@ -0,0 +1,257 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +""" +MIPRO (Multi-prompt Instruction PRoposal Optimizer) Strategy. + +This advanced optimization strategy combines instruction mutation (via a Meta-LLM) +and few-shot example selection to find the optimal prompt configuration. +""" + +import uuid + +from coreason_optimizer.core.budget import BudgetManager +from coreason_optimizer.core.client import ( + BudgetAwareEmbeddingProvider, + BudgetAwareLLMClient, +) +from coreason_optimizer.core.config import OptimizerConfig +from coreason_optimizer.core.formatter import format_prompt +from coreason_optimizer.core.interfaces import ( + Construct, + EmbeddingProvider, + LLMClient, + Metric, + PromptOptimizer, +) +from coreason_optimizer.core.models import OptimizedManifest, TrainingExample +from coreason_optimizer.data.loader import Dataset +from coreason_optimizer.strategies.mutator import LLMInstructionMutator +from coreason_optimizer.strategies.selector import ( + BaseSelector, + RandomSelector, + SemanticSelector, +) +from coreason_optimizer.utils.exceptions import BudgetExceededError +from coreason_optimizer.utils.logger import logger + + +class MiproOptimizer(PromptOptimizer): + """ + MIPRO (Multi-prompt Instruction PRoposal Optimizer) Strategy. + + This strategy: + 1. Generates N candidate system instructions using a mutator (Meta-LLM). + 2. Generates M candidate few-shot example sets using a selector. + 3. Performs a grid search over all (Instruction, ExampleSet) combinations. + 4. Selects the combination with the highest score on the training set. + """ + + def __init__( + self, + llm_client: LLMClient, + metric: Metric, + config: OptimizerConfig, + embedding_provider: EmbeddingProvider | None = None, + num_instruction_candidates: int = 10, + num_fewshot_combinations: int = 5, + ): + """ + Initialize the MIPRO Optimizer. + + Args: + llm_client: The LLM client for generation. + metric: The metric for evaluation. + config: Optimization configuration. + embedding_provider: Optional provider for semantic selection. + num_instruction_candidates: Number of instruction variations to generate. + num_fewshot_combinations: Number of few-shot sets to sample. + + Raises: + ValueError: If semantic selection is requested but no embedding provider is given. + """ + self.metric = metric + self.config = config + self.num_instruction_candidates = num_instruction_candidates + self.num_fewshot_combinations = num_fewshot_combinations + + # Wrap client with Budget Awareness + self.budget_manager = BudgetManager(config.budget_limit_usd) + self.llm_client = BudgetAwareLLMClient(llm_client, self.budget_manager) + + # Initialize components + self.mutator = LLMInstructionMutator(self.llm_client, config) + + self.selector: BaseSelector + if config.selector_type == "semantic": + if not embedding_provider: + raise ValueError("Embedding provider is required for semantic selection.") + + # Wrap embedding provider sharing the SAME budget manager + wrapped_embedder = BudgetAwareEmbeddingProvider(embedding_provider, self.budget_manager) + self.selector = SemanticSelector(wrapped_embedder, seed=42, embedding_model=config.embedding_model) + else: + self.selector = RandomSelector(seed=42) + + def _evaluate_candidate( + self, + instruction: str, + examples: list[TrainingExample], + dataset: list[TrainingExample], + ) -> float: + """Evaluate a single candidate (instruction + examples) on a dataset.""" + total_score = 0.0 + for example in dataset: + prompt = format_prompt(instruction, examples, example.inputs) + try: + response = self.llm_client.generate( + messages=[{"role": "user", "content": prompt}], + model=self.config.target_model, + temperature=0.0, + ) + score = self.metric(response.content, example.reference) + total_score += score + except BudgetExceededError: + raise + except Exception as e: + logger.warning(f"Error during evaluation: {e}") + pass + + return total_score / len(dataset) if dataset else 0.0 + + def compile( + self, + agent: Construct, + trainset: list[TrainingExample], + valset: list[TrainingExample], + ) -> OptimizedManifest: + """ + Run the MIPRO optimization loop. + + Args: + agent: The agent construct. + trainset: Training data. + valset: Validation data. + + Returns: + OptimizedManifest with best instruction and examples. + + Raises: + BudgetExceededError: If budget is exceeded. + """ + logger.info( + "Starting MIPRO compilation", + train_size=len(trainset), + target_model=self.config.target_model, + ) + + # 1. Diagnosis: Run baseline to find failures + logger.info("Running baseline diagnosis...") + dataset_obj = Dataset(trainset) + failed_examples = [] + + # We need to run at least once to get failures. + # We use the original instruction and NO examples (or random examples?) for diagnosis. + # Let's use 0-shot with original instruction. + for example in trainset: + prompt = format_prompt(agent.system_prompt, [], example.inputs) + try: + response = self.llm_client.generate( + messages=[{"role": "user", "content": prompt}], + model=self.config.target_model, + temperature=0.0, + ) + score = self.metric(response.content, example.reference) + if score < 1.0: # Assuming < 1.0 is failure/imperfect + # We store the *prediction* in metadata for the mutator + example.metadata["prediction"] = response.content + failed_examples.append(example) + except BudgetExceededError: + raise + except Exception as e: + logger.error(f"Error diagnosing example: {e}") + + logger.info(f"Diagnosis complete. Found {len(failed_examples)} failures.") + + # 2. Candidate Generation: Instructions + instruction_candidates = {agent.system_prompt} # Use set to avoid duplicates + logger.info(f"Generating {self.num_instruction_candidates} instruction candidates...") + + for i in range(self.num_instruction_candidates): + try: + new_instruction = self.mutator.mutate( + current_instruction=agent.system_prompt, + failed_examples=failed_examples, + ) + instruction_candidates.add(new_instruction) + except BudgetExceededError: + raise + except Exception as e: + logger.warning(f"Failed to generate instruction candidate {i}: {e}") + + instruction_list = list(instruction_candidates) + logger.info(f"Generated {len(instruction_list)} unique instruction candidates.") + + # 3. Candidate Generation: Example Sets + example_sets: list[list[TrainingExample]] = [] + # Always include 0-shot + example_sets.append([]) + + logger.info(f"Generating {self.num_fewshot_combinations} few-shot sets...") + for _ in range(self.num_fewshot_combinations): + # Randomly select k examples (using max_bootstrapped_demos from config) + k = self.config.max_bootstrapped_demos + selected = self.selector.select(dataset_obj, k=k) + example_sets.append(selected) + + # 4. Grid Search + best_score = -1.0 + best_instruction = agent.system_prompt + best_examples: list[TrainingExample] = [] + + logger.info( + f"Starting Grid Search: {len(instruction_list)} inst x {len(example_sets)} example sets " + f"= {len(instruction_list) * len(example_sets)} candidates." + ) + + for instr in instruction_list: + for ex_set in example_sets: + # Evaluate on Trainset (Optimization Objective) + # In production, we might want to evaluate on a held-out 'dev' split of trainset + # to avoid overfitting, but for now we use the provided trainset. + score = self._evaluate_candidate(instr, ex_set, trainset) + + logger.debug(f"Candidate Score: {score:.4f}") + + if score > best_score: + best_score = score + best_instruction = instr + best_examples = ex_set + + logger.info(f"Grid Search complete. Best Training Score: {best_score}") + + # 5. Final Evaluation on Validation Set + # If valset is provided, we compute the 'performance_metric' on it. + # Otherwise we use the best training score. + final_metric = best_score + if valset: + logger.info("Evaluating best candidate on Validation Set...") + final_metric = self._evaluate_candidate(best_instruction, best_examples, valset) + logger.info(f"Validation Score: {final_metric}") + + # 6. Create Manifest + return OptimizedManifest( + agent_id="unknown_agent", + base_model=self.config.target_model, + optimized_instruction=best_instruction, + few_shot_examples=best_examples, + performance_metric=final_metric, + optimization_run_id=f"opt_mipro_{uuid.uuid4().hex[:8]}", + ) diff --git a/src/coreason_optimizer/strategies/mutator.py b/src/coreason_optimizer/strategies/mutator.py new file mode 100644 index 0000000..804519a --- /dev/null +++ b/src/coreason_optimizer/strategies/mutator.py @@ -0,0 +1,198 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +""" +Instruction Mutation Strategy. + +This module provides the logic for using a Meta-LLM to rewrite system instructions +based on observed failure cases. +""" + +import json +from abc import ABC, abstractmethod + +from jinja2 import Template + +from coreason_optimizer.core.config import OptimizerConfig +from coreason_optimizer.core.interfaces import LLMClient +from coreason_optimizer.core.models import TrainingExample +from coreason_optimizer.utils.exceptions import BudgetExceededError +from coreason_optimizer.utils.logger import logger + +META_PROMPT_TEMPLATE = """ +You are an expert prompt engineer. Your goal is to improve the following system instruction +based on the provided failure cases. + +Current Instruction: +"{{ instruction }}" + +The instruction failed on the following examples: +{% for failure in failures %} +Example {{ loop.index }}: +Input: {{ failure.inputs }} +Expected Output: {{ failure.reference }} +Actual Output: {{ failure.prediction }} +{% endfor %} +{% if failures_hidden_count > 0 %} +... and {{ failures_hidden_count }} more failures. +{% endif %} + +Please rewrite the system instruction to address these failures while maintaining +performance on general cases. Return ONLY the new instruction text. +""" + + +class BaseMutator(ABC): + """Abstract base class for instruction mutation strategies.""" + + def __init__(self, llm_client: LLMClient): + """ + Initialize the BaseMutator. + + Args: + llm_client: The LLM client to use for mutation. + """ + self.llm_client = llm_client + + @abstractmethod + def mutate( + self, + current_instruction: str, + failed_examples: list[TrainingExample] | None = None, + ) -> str: + """ + Generate a new instruction based on the current one and optional failure cases. + + Args: + current_instruction: The existing system prompt. + failed_examples: A list of examples that the current instruction failed on. + + Returns: + The new system instruction string. + """ + pass # pragma: no cover + + +class IdentityMutator(BaseMutator): + """A mutator that returns the instruction unchanged. Useful for baselines.""" + + def mutate( + self, + current_instruction: str, + failed_examples: list[TrainingExample] | None = None, + ) -> str: + """ + Return the instruction as-is. + + Args: + current_instruction: The instruction. + failed_examples: Ignored. + + Returns: + The same instruction. + """ + return current_instruction + + +class LLMInstructionMutator(BaseMutator): + """Mutates instructions using a Meta-LLM to address failures.""" + + def __init__(self, llm_client: LLMClient, config: OptimizerConfig): + """ + Initialize the LLMInstructionMutator. + + Args: + llm_client: The LLM client for the meta-prompt. + config: Configuration object (e.g., for meta_model name). + """ + super().__init__(llm_client) + self.config = config + + def mutate( + self, + current_instruction: str, + failed_examples: list[TrainingExample] | None = None, + ) -> str: + """ + Generate a new instruction by asking the Meta-LLM to analyze failures. + + Args: + current_instruction: The current instruction. + failed_examples: List of TrainingExample where the current instruction failed. + + Returns: + A new, potentially improved instruction string. + """ + if not failed_examples: + logger.warning("No failed examples provided for mutation. Returning original instruction.") + return current_instruction + + meta_prompt = self._build_meta_prompt(current_instruction, failed_examples) + + try: + logger.info("Requesting instruction mutation from Meta-LLM.") + response = self.llm_client.generate( + messages=[{"role": "user", "content": meta_prompt}], + model=self.config.meta_model, + temperature=0.7, + ) + new_instruction = response.content.strip() + # Basic cleanup if the model wraps it in quotes or markdown + if new_instruction.startswith("```") and new_instruction.endswith("```"): + lines = new_instruction.splitlines() + if lines[0].startswith("```"): + lines = lines[1:] + if lines[-1].startswith("```"): + lines = lines[:-1] + new_instruction = "\n".join(lines).strip() + + if not new_instruction: + logger.warning("Meta-LLM returned empty instruction. Returning original.") + return current_instruction + + return new_instruction + except BudgetExceededError: + raise + except Exception as e: + logger.error(f"Failed to mutate instruction: {e}") + return current_instruction + + def _build_meta_prompt(self, instruction: str, failures: list[TrainingExample]) -> str: + """ + Construct the meta-prompt for the LLM using Jinja2. + + Args: + instruction: Current instruction. + failures: List of failure examples. + + Returns: + The full prompt string for the Meta-LLM. + """ + display_failures = failures[:10] + failures_hidden_count = len(failures) - len(display_failures) + + formatted_failures = [] + for ex in display_failures: + formatted_failures.append( + { + "inputs": json.dumps(ex.inputs, indent=2), + "reference": str(ex.reference), + "prediction": str(ex.metadata.get("prediction", "N/A")), + } + ) + + template = Template(META_PROMPT_TEMPLATE) + return str( + template.render( + instruction=instruction, + failures=formatted_failures, + failures_hidden_count=failures_hidden_count, + ) + ) diff --git a/src/coreason_optimizer/strategies/selector.py b/src/coreason_optimizer/strategies/selector.py new file mode 100644 index 0000000..eae2c5b --- /dev/null +++ b/src/coreason_optimizer/strategies/selector.py @@ -0,0 +1,167 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +""" +Example Selection Strategy. + +This module provides classes to select a subset of training examples to be used +as few-shot demonstrations, using either random sampling or semantic clustering. +""" + +import json +import random +from abc import ABC, abstractmethod + +import numpy as np +from sklearn.cluster import KMeans + +from coreason_optimizer.core.interfaces import EmbeddingProvider +from coreason_optimizer.core.models import TrainingExample +from coreason_optimizer.data.loader import Dataset + + +class BaseSelector(ABC): + """Abstract base class for few-shot example selection strategies.""" + + @abstractmethod + def select(self, trainset: Dataset, k: int = 4) -> list[TrainingExample]: + """ + Select k examples from the training set. + + Args: + trainset: The source dataset. + k: The number of examples to select. + + Returns: + A list of selected TrainingExample objects. + """ + pass # pragma: no cover + + +class RandomSelector(BaseSelector): + """Randomly selects examples from the training set.""" + + def __init__(self, seed: int = 42): + """ + Initialize RandomSelector. + + Args: + seed: Random seed for reproducibility. + """ + self.seed = seed + + def select(self, trainset: Dataset, k: int = 4) -> list[TrainingExample]: + """ + Select k random examples. + + Args: + trainset: The source dataset. + k: Number of examples to select. + + Returns: + List of randomly selected examples. + """ + if len(trainset) <= k: + return list(trainset) + + rng = random.Random(self.seed) + return rng.sample(list(trainset), k) + + +class SemanticSelector(BaseSelector): + """ + Selects diverse examples using K-Means clustering on embeddings. + + Logic: + 1. Embed all examples. + 2. Cluster into k clusters. + 3. Select the example closest to the centroid of each cluster. + """ + + def __init__( + self, + embedding_provider: EmbeddingProvider, + seed: int = 42, + embedding_model: str | None = None, + ): + """ + Initialize SemanticSelector. + + Args: + embedding_provider: Provider to generate embeddings. + seed: Random seed for clustering initialization. + embedding_model: Optional specific model to use for embeddings. + """ + self.embedding_provider = embedding_provider + self.seed = seed + self.embedding_model = embedding_model + + def select(self, trainset: Dataset, k: int = 4) -> list[TrainingExample]: + """ + Select k diverse examples using clustering. + + Args: + trainset: The source dataset. + k: Number of examples to select. + + Returns: + List of diverse examples. + """ + if len(trainset) <= k: + return list(trainset) + + # 1. Prepare texts for embedding + texts = [] + for ex in trainset: + # Use JSON serialization for robustness + text = json.dumps(ex.inputs, sort_keys=True) + texts.append(text) + + # 2. Get embeddings + response = self.embedding_provider.embed(texts, model=self.embedding_model) + X = np.array(response.embeddings) + + # 3. K-Means Clustering + # n_init="auto" is default in newer sklearn, explicit for safety + kmeans = KMeans(n_clusters=k, random_state=self.seed, n_init=10) + kmeans.fit(X) + + # 4. Select representatives (closest to centroid) + selected_indices = [] + for i in range(k): + centroid = kmeans.cluster_centers_[i] + + # Find points belonging to this cluster + cluster_indices = np.where(kmeans.labels_ == i)[0] + + if len(cluster_indices) == 0: + continue + + cluster_points = X[cluster_indices] + # Calculate Euclidean distance from centroid + distances = np.linalg.norm(cluster_points - centroid, axis=1) + closest_idx_in_cluster = np.argmin(distances) + original_idx = cluster_indices[closest_idx_in_cluster] + selected_indices.append(original_idx) + + # Handle potential duplicates or fewer points + selected_indices = sorted(list(set(selected_indices))) + + # Fill if needed + if len(selected_indices) < k: + remaining_indices = [idx for idx in range(len(trainset)) if idx not in selected_indices] + rng = random.Random(self.seed) + needed = k - len(selected_indices) + if remaining_indices: + extra = rng.sample(remaining_indices, min(len(remaining_indices), needed)) + selected_indices.extend(extra) + selected_indices.sort() + + return [trainset[idx] for idx in selected_indices] diff --git a/src/coreason_optimizer/utils/exceptions.py b/src/coreason_optimizer/utils/exceptions.py new file mode 100644 index 0000000..d35cac5 --- /dev/null +++ b/src/coreason_optimizer/utils/exceptions.py @@ -0,0 +1,31 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +""" +Custom exceptions for the Coreason Optimizer. +""" + + +class OptimizerError(Exception): + """Base class for all optimizer exceptions.""" + + pass + + +class BudgetExceededError(OptimizerError): + """Raised when the optimization budget (USD) is exceeded.""" + + pass + + +class ConfigurationError(OptimizerError): + """Raised when the configuration is invalid.""" + + pass diff --git a/src/coreason_optimizer/utils/import_utils.py b/src/coreason_optimizer/utils/import_utils.py new file mode 100644 index 0000000..0d549ff --- /dev/null +++ b/src/coreason_optimizer/utils/import_utils.py @@ -0,0 +1,100 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +""" +Dynamic import utilities. + +This module provides functionality to load python objects (specifically Agents) +from file paths, handling different path formats and protocol validation. +""" + +import importlib.util +import sys +from pathlib import Path +from typing import Any, cast + +from coreason_optimizer.core.interfaces import Construct + + +def load_agent_from_path(agent_path_str: str) -> Construct: + """ + Load an agent (Construct) from a file path string. + + Format: "path/to/file.py" (defaults to variable 'agent') + "path/to/file.py:variable_name" + + Args: + agent_path_str: The path string to the python file and optional variable. + + Returns: + The loaded agent object which conforms to the Construct protocol. + + Raises: + FileNotFoundError: If the file does not exist. + ImportError: If the module cannot be imported. + AttributeError: If the specified variable is not found in the module. + TypeError: If the loaded object does not satisfy the Construct protocol. + """ + file_path: Path | None = None + variable_name = "agent" + + # 1. Try treating the whole string as a path + p = Path(agent_path_str) + if p.exists() and p.is_file(): + file_path = p + else: + # 2. Try splitting at the last colon (for path:variable) + # Note: We use rsplit to handle Windows drive letters (C:\...) correctly + # because a drive letter colon is near the start, and separator is near the end. + if ":" in agent_path_str: + parts = agent_path_str.rsplit(":", 1) + # If split results in 2 parts, check if the first part is a valid file + if len(parts) == 2: + possible_path = Path(parts[0]) + if possible_path.exists() and possible_path.is_file(): + file_path = possible_path + variable_name = parts[1] + + if file_path is None: + raise FileNotFoundError(f"Agent file not found: {agent_path_str}") + + path = file_path + module_name = path.stem + spec = importlib.util.spec_from_file_location(module_name, path) + if spec is None or spec.loader is None: + raise ImportError(f"Could not load spec for file: {path}") # pragma: no cover + + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + try: + spec.loader.exec_module(module) + except Exception as e: + raise ImportError(f"Error executing module {path}: {e}") from e + + if not hasattr(module, variable_name): + raise AttributeError(f"Variable '{variable_name}' not found in {path}") + + agent_obj: Any = getattr(module, variable_name) + + # Basic Protocol check (runtime) + # Since Construct is @runtime_checkable, isinstance works for properties if implemented as properties. + # However, Protocols with properties are tricky with isinstance check on instances that + # implement them as instance vars. We will do a manual check for safety. + if not isinstance(agent_obj, Construct): + # Double check: maybe it has the attributes but isinstance failed due to some typing quirk? + # Let's check explicitly. + required_attrs = ["inputs", "outputs", "system_prompt"] + missing = [attr for attr in required_attrs if not hasattr(agent_obj, attr)] + if missing: + raise TypeError( + f"Agent object '{variable_name}' does not satisfy Construct protocol. Missing attributes: {missing}" + ) + + return cast(Construct, agent_obj) diff --git a/src/coreason_optimizer/utils/logger.py b/src/coreason_optimizer/utils/logger.py index 9524d10..37d71ff 100644 --- a/src/coreason_optimizer/utils/logger.py +++ b/src/coreason_optimizer/utils/logger.py @@ -8,6 +8,13 @@ # # Source Code: https://github.com/CoReason-AI/coreason_optimizer +""" +Logging configuration. + +This module configures the `loguru` logger for the application, setting up +console output and file rotation for logs. +""" + import sys from pathlib import Path diff --git a/tests/core/test_budget_edge.py b/tests/core/test_budget_edge.py new file mode 100644 index 0000000..c82b212 --- /dev/null +++ b/tests/core/test_budget_edge.py @@ -0,0 +1,40 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +import pytest + +from coreason_optimizer.core.budget import BudgetManager +from coreason_optimizer.core.interfaces import UsageStats + + +def test_budget_init_negative() -> None: + with pytest.raises(ValueError): + BudgetManager(-1.0) + with pytest.raises(ValueError): + BudgetManager(0.0) + + +def test_budget_consume_negative() -> None: + manager = BudgetManager(10.0) + stats = UsageStats(total_tokens=10, prompt_tokens=5, completion_tokens=5, cost_usd=-0.1) + with pytest.raises(ValueError): + manager.consume(stats) + + stats_bad_tokens = UsageStats(total_tokens=-1, prompt_tokens=5, completion_tokens=5, cost_usd=0.1) + with pytest.raises(ValueError): + manager.consume(stats_bad_tokens) + + +def test_budget_status_string() -> None: + manager = BudgetManager(10.0) + assert manager.get_status() == "Spent $0.0000 / $10.00 (0.0%)" + + manager.consume(UsageStats(total_tokens=10, prompt_tokens=5, completion_tokens=5, cost_usd=5.0)) + assert "50.0%" in manager.get_status() diff --git a/tests/core/test_client.py b/tests/core/test_client.py new file mode 100644 index 0000000..c736895 --- /dev/null +++ b/tests/core/test_client.py @@ -0,0 +1,138 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +import os +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from coreason_optimizer.core.client import OpenAIClient, OpenAIClientAsync +from coreason_optimizer.core.interfaces import LLMResponse + + +@pytest.fixture +def mock_openai_response() -> MagicMock: + mock_response = MagicMock() + mock_choice = MagicMock() + mock_choice.message.content = "Test response" + mock_response.choices = [mock_choice] + + mock_usage = MagicMock() + mock_usage.prompt_tokens = 100 + mock_usage.completion_tokens = 50 + mock_usage.total_tokens = 150 + mock_response.usage = mock_usage + + return mock_response + + +@pytest.mark.asyncio +async def test_openai_client_async_initialization_with_key() -> None: + with patch("coreason_optimizer.core.client.AsyncOpenAI") as MockAsyncOpenAI: + # Mock close to be awaitable + mock_instance = MockAsyncOpenAI.return_value + mock_instance.close = AsyncMock() + + async with OpenAIClientAsync(api_key="test_key"): + pass + # Check call args of the mock class constructor + call_args = MockAsyncOpenAI.call_args + assert call_args.kwargs["api_key"] == "test_key" + + +@pytest.mark.asyncio +async def test_openai_client_async_initialization_with_env_var() -> None: + with patch.dict(os.environ, {"OPENAI_API_KEY": "env_key"}): + with patch("coreason_optimizer.core.client.AsyncOpenAI") as MockAsyncOpenAI: + OpenAIClientAsync() + assert MockAsyncOpenAI.call_args.kwargs["api_key"] == "env_key" + + +@pytest.mark.asyncio +async def test_openai_client_async_generate(mock_openai_response: MagicMock) -> None: + # Fix: AsyncMock for nested calls + # Remove spec=AsyncOpenAI to allow dynamic attributes in newer openai versions + mock_client = AsyncMock() + # create needs to be an async mock that returns the response + mock_client.chat.completions.create = AsyncMock(return_value=mock_openai_response) + + client = OpenAIClientAsync(client=mock_client) + + messages = [{"role": "user", "content": "Hello"}] + response = await client.generate(messages, model="gpt-4o", temperature=0.5) + + assert isinstance(response, LLMResponse) + assert response.content == "Test response" + assert response.usage.prompt_tokens == 100 + assert response.usage.completion_tokens == 50 + assert response.usage.total_tokens == 150 + assert response.usage.cost_usd == pytest.approx(0.00125) + + mock_client.chat.completions.create.assert_called_once_with( + model="gpt-4o", + messages=messages, + temperature=0.5, + ) + + +def test_openai_client_sync_facade(mock_openai_response: MagicMock) -> None: + """Test the synchronous facade wrapping the async client.""" + mock_client = AsyncMock() + # Ensure create is awaitable + mock_client.chat.completions.create = AsyncMock(return_value=mock_openai_response) + + # Ensure close is awaitable because sync facade's __exit__ calls async __aexit__ which awaits client.close() + mock_client.close = AsyncMock() + + # Inject the mocked async client + with OpenAIClient(client=mock_client) as client: + messages = [{"role": "user", "content": "Hello"}] + response = client.generate(messages, model="gpt-4o", temperature=0.5) + + assert isinstance(response, LLMResponse) + assert response.content == "Test response" + assert response.usage.cost_usd == pytest.approx(0.00125) + + +@pytest.mark.asyncio +async def test_openai_client_async_generate_unknown_model(mock_openai_response: MagicMock) -> None: + mock_client = AsyncMock() + mock_client.chat.completions.create = AsyncMock(return_value=mock_openai_response) + + client = OpenAIClientAsync(client=mock_client) + + messages = [{"role": "user", "content": "Hello"}] + response = await client.generate(messages, model="unknown-model") + + assert response.usage.cost_usd == 0.0 + + +@pytest.mark.asyncio +async def test_openai_client_async_generate_no_usage(mock_openai_response: MagicMock) -> None: + mock_openai_response.usage = None + mock_client = AsyncMock() + mock_client.chat.completions.create = AsyncMock(return_value=mock_openai_response) + + client = OpenAIClientAsync(client=mock_client) + + response = await client.generate([{"role": "user", "content": "hi"}]) + assert response.usage.total_tokens == 0 + assert response.usage.cost_usd == 0.0 + + +@pytest.mark.asyncio +async def test_openai_client_async_generate_failure() -> None: + mock_client = AsyncMock() + mock_client.chat.completions.create = AsyncMock(side_effect=Exception("API Error")) + + client = OpenAIClientAsync(client=mock_client) + + with pytest.raises(Exception, match="API Error"): + await client.generate([{"role": "user", "content": "Fail"}]) diff --git a/tests/core/test_client_budget_wrapper.py b/tests/core/test_client_budget_wrapper.py new file mode 100644 index 0000000..de20c75 --- /dev/null +++ b/tests/core/test_client_budget_wrapper.py @@ -0,0 +1,212 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +from typing import Any +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from coreason_optimizer.core.budget import BudgetManager +from coreason_optimizer.core.client import ( + BudgetAwareEmbeddingProvider, + BudgetAwareEmbeddingProviderAsync, + BudgetAwareLLMClient, + BudgetAwareLLMClientAsync, + OpenAIClient, +) +from coreason_optimizer.core.interfaces import ( + AsyncEmbeddingProvider, + AsyncLLMClient, + EmbeddingProvider, + EmbeddingResponse, + LLMClient, + LLMResponse, + UsageStats, +) +from coreason_optimizer.utils.exceptions import BudgetExceededError + + +class MockLLMClient(LLMClient): + """Mock LLM Client for testing.""" + + def __init__(self, response_cost: float = 0.0): + self.response_cost = response_cost + + def generate( + self, + messages: list[dict[str, str]], + model: str | None = None, + temperature: float = 0.0, + **kwargs: Any, + ) -> LLMResponse: + usage = UsageStats(cost_usd=self.response_cost) + return LLMResponse(content="mock", usage=usage) + + +class MockLLMClientAsync(AsyncLLMClient): + """Mock Async LLM Client for testing.""" + + def __init__(self, response_cost: float = 0.0): + self.response_cost = response_cost + + async def generate( + self, + messages: list[dict[str, str]], + model: str | None = None, + temperature: float = 0.0, + **kwargs: Any, + ) -> LLMResponse: + usage = UsageStats(cost_usd=self.response_cost) + return LLMResponse(content="mock", usage=usage) + + +def test_wrapper_delegates_and_tracks_cost() -> None: + """Test that the wrapper calls inner client and updates budget.""" + inner = MockLLMClient(response_cost=1.0) + budget = BudgetManager(budget_limit_usd=10.0) + wrapper = BudgetAwareLLMClient(inner, budget) + + response = wrapper.generate([{"role": "user", "content": "hi"}]) + assert response.content == "mock" + assert budget.total_cost_usd == 1.0 + + wrapper.generate([{"role": "user", "content": "hi"}]) + assert budget.total_cost_usd == 2.0 + + +@pytest.mark.asyncio +async def test_async_wrapper_delegates_and_tracks_cost() -> None: + """Test that the async wrapper calls inner client and updates budget.""" + inner = MockLLMClientAsync(response_cost=1.0) + budget = BudgetManager(budget_limit_usd=10.0) + wrapper = BudgetAwareLLMClientAsync(inner, budget) + + response = await wrapper.generate([{"role": "user", "content": "hi"}]) + assert response.content == "mock" + assert budget.total_cost_usd == 1.0 + + +def test_wrapper_raises_budget_exceeded() -> None: + """Test that the wrapper raises exception when budget exceeded.""" + inner = MockLLMClient(response_cost=6.0) + budget = BudgetManager(budget_limit_usd=10.0) + wrapper = BudgetAwareLLMClient(inner, budget) + + # First call: 6.0 <= 10.0. OK. + wrapper.generate([]) + assert budget.total_cost_usd == 6.0 + + # Second call: 12.0 > 10.0. Fail. + with pytest.raises(BudgetExceededError): + wrapper.generate([]) + + +def test_wrapper_passes_args() -> None: + """Test that arguments are passed to inner client.""" + mock_inner = MagicMock() + mock_inner.generate.return_value = LLMResponse(content="", usage=UsageStats()) + + budget = BudgetManager(10.0) + wrapper = BudgetAwareLLMClient(mock_inner, budget) + + wrapper.generate(messages=[{"a": "b"}], model="gpt-test", temperature=0.7, extra="val") + + mock_inner.generate.assert_called_once_with(messages=[{"a": "b"}], model="gpt-test", temperature=0.7, extra="val") + + +def test_wrapper_blocks_call_if_already_exceeded() -> None: + """Test that generate is blocked if budget is already exceeded.""" + mock_inner = MagicMock() + budget = BudgetManager(5.0) + wrapper = BudgetAwareLLMClient(mock_inner, budget) + + # Manually consume budget to exceed it + try: + budget.consume(UsageStats(cost_usd=6.0)) + except BudgetExceededError: + pass # Expected + + # Try generate + with pytest.raises(BudgetExceededError): + wrapper.generate([]) + + # Ensure inner client was NOT called + mock_inner.generate.assert_not_called() + + +def test_boundary_conditions() -> None: + """Test exact budget match.""" + inner = MockLLMClient(response_cost=5.0) + budget = BudgetManager(10.0) + wrapper = BudgetAwareLLMClient(inner, budget) + + # 1. Spend exactly 5.0. Total 5.0 <= 10.0. OK. + wrapper.generate([]) + assert budget.total_cost_usd == 5.0 + + # 2. Spend another 5.0. Total 10.0 <= 10.0. OK. (Boundary) + wrapper.generate([]) + assert budget.total_cost_usd == 10.0 + + # 3. Spend 0.1 more. Total 10.1 > 10.0. Fail. + inner.response_cost = 0.1 + with pytest.raises(BudgetExceededError): + wrapper.generate([]) + + +class MockEmbeddingProvider(EmbeddingProvider): + def embed(self, texts: list[str], model: str | None = None) -> EmbeddingResponse: + return EmbeddingResponse(embeddings=[], usage=UsageStats(cost_usd=1.0)) + + +class MockEmbeddingProviderAsync(AsyncEmbeddingProvider): + async def embed(self, texts: list[str], model: str | None = None) -> EmbeddingResponse: + return EmbeddingResponse(embeddings=[], usage=UsageStats(cost_usd=1.0)) + + +def test_embedding_wrapper_sync() -> None: + inner = MockEmbeddingProvider() + budget = BudgetManager(10.0) + wrapper = BudgetAwareEmbeddingProvider(inner, budget) + + wrapper.embed(["test"]) + assert budget.total_cost_usd == 1.0 + + +@pytest.mark.asyncio +async def test_embedding_wrapper_async() -> None: + inner = MockEmbeddingProviderAsync() + budget = BudgetManager(10.0) + wrapper = BudgetAwareEmbeddingProviderAsync(inner, budget) + + await wrapper.embed(["test"]) + assert budget.total_cost_usd == 1.0 + + +def test_openai_client_exit_exception() -> None: + """Test that OpenAIClient.__exit__ propagates exceptions properly.""" + # We must patch AsyncOpenAI because OpenAIClient init will try to create one if not passed + with patch("coreason_optimizer.core.client.AsyncOpenAI") as MockAsyncOpenAI: + MockAsyncOpenAI.return_value = AsyncMock() + client = OpenAIClient(api_key="test") + + # Mock the internal async client behavior + mock_async = MagicMock() + + # Setup __aexit__ to raise + async def mock_aexit(*args: Any) -> None: + raise ValueError("Inner error") + + mock_async.__aexit__ = mock_aexit + client._async = mock_async + + with pytest.raises(ValueError, match="Inner error"): + with client: + pass diff --git a/tests/core/test_client_edge_cases.py b/tests/core/test_client_edge_cases.py new file mode 100644 index 0000000..313476c --- /dev/null +++ b/tests/core/test_client_edge_cases.py @@ -0,0 +1,78 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +import os +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from openai import OpenAIError + +from coreason_optimizer.core.client import OpenAIClient + + +@pytest.fixture +def mock_openai_response() -> MagicMock: + mock_response = MagicMock() + mock_choice = MagicMock() + mock_choice.message.content = "Test response" + mock_response.choices = [mock_choice] + + mock_usage = MagicMock() + mock_usage.prompt_tokens = 10 + mock_usage.completion_tokens = 10 + mock_usage.total_tokens = 20 + mock_response.usage = mock_usage + + return mock_response + + +def test_stream_raises_error(mock_openai_response: MagicMock) -> None: + # We now mock AsyncOpenAI because OpenAIClient uses OpenAIClientAsync internally + mock_client = AsyncMock() + client = OpenAIClient(client=mock_client) + + with pytest.raises(ValueError, match="Streaming is not supported"): + client.generate([], stream=True) + + +def test_empty_content_handled(mock_openai_response: MagicMock) -> None: + mock_openai_response.choices[0].message.content = None + mock_client = AsyncMock() + mock_client.chat.completions.create = AsyncMock(return_value=mock_openai_response) + + # Also need to mock close() because context manager might call it or we might need it if we used context manager + # OpenAIClient facade doesn't strictly need close() for generate(), but __exit__ does. + # Here we are just initializing and calling generate. + + client = OpenAIClient(client=mock_client) + + resp = client.generate([]) + assert resp.content == "" + + +def test_multiple_n_handled(mock_openai_response: MagicMock) -> None: + mock_client = AsyncMock() + mock_client.chat.completions.create = AsyncMock(return_value=mock_openai_response) + + client = OpenAIClient(client=mock_client) + + # We pass n=2, response still has 1 choice mocked but usage reflects total. + client.generate([], n=2) + mock_client.chat.completions.create.assert_called_once() + assert mock_client.chat.completions.create.call_args.kwargs["n"] == 2 + + +def test_missing_api_key_raises_error() -> None: + # Ensure environment is clean + with patch.dict(os.environ, {}, clear=True): + # OpenAIClient now initializes OpenAIClientAsync which initializes AsyncOpenAI + # OpenAI 1.0+ checks for API key at instantiation + with pytest.raises(OpenAIError): + OpenAIClient() diff --git a/tests/core/test_client_embedding.py b/tests/core/test_client_embedding.py new file mode 100644 index 0000000..be0fe56 --- /dev/null +++ b/tests/core/test_client_embedding.py @@ -0,0 +1,119 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +import os +from typing import Any +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from openai import OpenAIError + +from coreason_optimizer.core.client import ( + BudgetAwareEmbeddingProvider, + OpenAIEmbeddingClient, + OpenAIEmbeddingClientAsync, +) + + +def test_embed_success() -> None: + mock_client = AsyncMock() + # Mock response + mock_response = MagicMock() + mock_data = [MagicMock(embedding=[0.1, 0.2]), MagicMock(embedding=[0.3, 0.4])] + mock_response.data = mock_data + mock_response.usage.prompt_tokens = 10 + + mock_client.embeddings.create = AsyncMock(return_value=mock_response) + mock_client.close = AsyncMock() + + with OpenAIEmbeddingClient(client=mock_client) as client: + response = client.embed(["a", "b"]) + + assert len(response.embeddings) == 2 + assert response.embeddings[0] == [0.1, 0.2] + assert response.usage.prompt_tokens == 10 + assert response.usage.cost_usd > 0 # Should be calculated + + +def test_budget_aware_provider() -> None: + mock_provider = MagicMock() + # Mock usage + usage = MagicMock(prompt_tokens=10, cost_usd=0.01) + mock_provider.embed.return_value = MagicMock(embeddings=[[1.0]], usage=usage) + + budget_manager = MagicMock() + + wrapper = BudgetAwareEmbeddingProvider(provider=mock_provider, budget_manager=budget_manager) + wrapper.embed(["a"]) + + budget_manager.check_budget.assert_called_once() + budget_manager.consume.assert_called_with(usage) + + +def test_embed_error() -> None: + mock_client = AsyncMock() + mock_client.embeddings.create = AsyncMock(side_effect=RuntimeError("API Error")) + mock_client.close = AsyncMock() + + with OpenAIEmbeddingClient(client=mock_client) as client: + with pytest.raises(RuntimeError): + client.embed(["a"]) + + +@pytest.mark.asyncio +async def test_init_default() -> None: + # Test initialization without client (reads env var, assumes mock/env) + # If OPENAI_API_KEY is present, it succeeds. If not, it raises. + + # Force failure + with patch.dict(os.environ, {}, clear=True): + with pytest.raises(OpenAIError): + # Async init check (internal client creation fails without key) + OpenAIEmbeddingClientAsync() + # We need to close it if it succeeded, but it raises + + # Force success + with patch.dict(os.environ, {"OPENAI_API_KEY": "dummy"}): + async with OpenAIEmbeddingClientAsync() as c: + # Verify internal client is set + assert c.client is not None + + # If we pass client, it works + mock_client = AsyncMock() + async with OpenAIEmbeddingClientAsync(client=mock_client) as c: + assert c.client is not None + assert c.client == mock_client + + +def test_embed_large_batch() -> None: + # Test that client batches requests if input is larger than batch_size (500) + mock_client = AsyncMock() + mock_client.close = AsyncMock() + # We want 505 items. + # 1st call: 500 items. 2nd call: 5 items. + + # Setup response side_effect + async def side_effect(input: list[str], model: str) -> Any: + count = len(input) + resp = MagicMock() + resp.data = [MagicMock(embedding=[0.0] * 2) for _ in range(count)] + resp.usage.prompt_tokens = count + return resp + + mock_client.embeddings.create = AsyncMock(side_effect=side_effect) + + with OpenAIEmbeddingClient(client=mock_client) as client: + # Generate 505 items + inputs = [str(i) for i in range(505)] + response = client.embed(inputs) + + assert len(response.embeddings) == 505 + assert response.usage.prompt_tokens == 505 + assert mock_client.embeddings.create.call_count == 2 diff --git a/tests/core/test_config.py b/tests/core/test_config.py new file mode 100644 index 0000000..0a9a0db --- /dev/null +++ b/tests/core/test_config.py @@ -0,0 +1,55 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +import pytest +from pydantic import ValidationError + +from coreason_optimizer.core.config import OptimizerConfig + + +def test_default_config() -> None: + """Test that default values are set correctly.""" + config = OptimizerConfig() + assert config.target_model == "gpt-4o" + assert config.metric == "exact_match" + assert config.max_bootstrapped_demos == 4 + assert config.max_rounds == 10 + assert config.budget_limit_usd == 10.0 + + +def test_custom_config() -> None: + """Test that custom values are set correctly.""" + config = OptimizerConfig( + target_model="claude-3-opus", + metric="f1_score", + max_bootstrapped_demos=2, + max_rounds=5, + budget_limit_usd=50.0, + ) + assert config.target_model == "claude-3-opus" + assert config.metric == "f1_score" + assert config.max_bootstrapped_demos == 2 + assert config.max_rounds == 5 + assert config.budget_limit_usd == 50.0 + + +def test_validation_constraints() -> None: + """Test that validation constraints are enforced.""" + with pytest.raises(ValidationError): + # max_rounds must be > 0 + OptimizerConfig(max_rounds=0) + + with pytest.raises(ValidationError): + # max_bootstrapped_demos must be >= 0 + OptimizerConfig(max_bootstrapped_demos=-1) + + with pytest.raises(ValidationError): + # budget_limit_usd must be > 0 + OptimizerConfig(budget_limit_usd=0.0) diff --git a/tests/core/test_metrics.py b/tests/core/test_metrics.py new file mode 100644 index 0000000..98772bc --- /dev/null +++ b/tests/core/test_metrics.py @@ -0,0 +1,105 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +import pytest + +from coreason_optimizer.core.metrics import ExactMatch, F1Score, MetricFactory, normalize_answer + + +def test_normalize_answer() -> None: + assert normalize_answer("The quick Brown Fox!") == "quick brown fox" + assert normalize_answer(" spaces ") == "spaces" + assert normalize_answer("a an the") == "" + # Unicode and Special Chars + assert normalize_answer("café") == "café" + assert normalize_answer("ñandú") == "ñandú" + assert normalize_answer("Hello\u00a0World") == "hello world" # Non-breaking space + assert normalize_answer("😊") == "😊" # Emojis remain (not punctuation) + + +def test_exact_match() -> None: + em = ExactMatch() + assert em("Hello World", "hello world") == 1.0 + assert em("Hello World", "Hello World!") == 1.0 # Punctuation ignored + assert em("foo", "bar") == 0.0 + assert em("123", 123) == 1.0 + + +def test_exact_match_multiple_references() -> None: + em = ExactMatch() + # Matches one of the references + assert em("foo", ["bar", "foo", "baz"]) == 1.0 + # Matches none + assert em("qux", ["bar", "foo"]) == 0.0 + # Empty list + assert em("foo", []) == 0.0 + + +def test_f1_score() -> None: + f1 = F1Score() + # Perfect match + assert f1("hello world", "hello world") == 1.0 + # No match + assert f1("foo", "bar") == 0.0 + # Partial match + # pred: "cat sat" (2 tokens), ref: "cat sat mat" (3 tokens) + # common: 2. precision: 2/2=1.0. recall: 2/3=0.66. f1: 2*1*0.66 / 1.66 = 1.33 / 1.66 = 0.8 + assert f1("cat sat", "cat sat mat") == pytest.approx(0.8) + + # Empty cases + assert f1("", "") == 1.0 + assert f1("foo", "") == 0.0 + assert f1("", "foo") == 0.0 + + +def test_f1_score_multiple_references() -> None: + f1 = F1Score() + # Should take the max score + # "cat" vs "cat" -> 1.0 + # "cat" vs "dog" -> 0.0 + assert f1("cat", ["dog", "cat"]) == 1.0 + + # Partial matches + # "cat sat" vs "cat sat mat" -> 0.8 + # "cat sat" vs "cat sat" -> 1.0 + # Should pick 1.0 + assert f1("cat sat", ["cat sat mat", "cat sat"]) == 1.0 + + # Empty list + assert f1("cat", []) == 0.0 + + +def test_metric_factory() -> None: + assert isinstance(MetricFactory.get("exact_match"), ExactMatch) + assert isinstance(MetricFactory.get("f1_score"), F1Score) + + with pytest.raises(ValueError): + MetricFactory.get("unknown") + + +def test_non_string_types() -> None: + em = ExactMatch() + f1 = F1Score() + + # Integers + assert em("123", 123) == 1.0 + assert f1("123", 123) == 1.0 + + # Floats + assert em("123.45", 123.45) == 1.0 + + # None (prediction is typed as str, but if it comes in as None?) + # normalize_answer expects str. In Python runtime, this would raise AttributeError. + # We follow the type hint `prediction: str`. + # However, reference can be Any. + + # Reference as None -> str(None) -> "None" + assert em("None", None) == 1.0 + assert em("foo", None) == 0.0 diff --git a/tests/core/test_metrics_json.py b/tests/core/test_metrics_json.py new file mode 100644 index 0000000..2d1b1df --- /dev/null +++ b/tests/core/test_metrics_json.py @@ -0,0 +1,50 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +from coreason_optimizer.core.metrics import JsonValidity + + +def test_json_validity_simple() -> None: + metric = JsonValidity() + assert metric('{"a": 1}', None) == 1.0 + assert metric("invalid", None) == 0.0 + + +def test_json_validity_markdown_explicit() -> None: + metric = JsonValidity() + # Explicit json block + text = 'Here is the json:\n```json\n{"a": 1}\n```' + assert metric(text, None) == 1.0 + + # Invalid inside block + text_bad = "```json\n{a: 1}\n```" + assert metric(text_bad, None) == 0.0 + + +def test_json_validity_markdown_generic() -> None: + metric = JsonValidity() + # Generic block + text = '```\n{"a": 1}\n```' + assert metric(text, None) == 1.0 + + # Python block (should work if content is valid json) + text_py = '```python\n{"a": 1}\n```' + assert metric(text_py, None) == 1.0 + + +def test_json_validity_multiple_blocks() -> None: + metric = JsonValidity() + # First valid wins + text = '```json\n{"a": 1}\n```\nAnd another:\n```json\ninvalid\n```' + assert metric(text, None) == 1.0 + + # If first is invalid, keep looking + text_2 = '```json\ninvalid\n```\n```json\n{"b": 2}\n```' + assert metric(text_2, None) == 1.0 diff --git a/tests/core/test_models.py b/tests/core/test_models.py new file mode 100644 index 0000000..15ef43f --- /dev/null +++ b/tests/core/test_models.py @@ -0,0 +1,65 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +from coreason_optimizer.core.models import OptimizedManifest, TrainingExample + + +def test_training_example_creation() -> None: + """Test creating a TrainingExample.""" + example = TrainingExample( + inputs={"question": "What is 2+2?"}, + reference="4", + metadata={"source": "math_dataset"}, + ) + assert example.inputs["question"] == "What is 2+2?" + assert example.reference == "4" + assert example.metadata["source"] == "math_dataset" + + +def test_training_example_defaults() -> None: + """Test default values for TrainingExample.""" + example = TrainingExample(inputs={"q": "a"}, reference="b") + assert example.metadata == {} + + +def test_optimized_manifest_creation() -> None: + """Test creating an OptimizedManifest.""" + example = TrainingExample(inputs={"q": "foo"}, reference="bar") + manifest = OptimizedManifest( + agent_id="test_agent", + base_model="gpt-4o", + optimized_instruction="You are a helpful assistant.", + few_shot_examples=[example], + performance_metric=0.95, + optimization_run_id="run_123", + ) + + assert manifest.agent_id == "test_agent" + assert manifest.performance_metric == 0.95 + assert len(manifest.few_shot_examples) == 1 + assert manifest.few_shot_examples[0].inputs["q"] == "foo" + + +def test_manifest_serialization() -> None: + """Test JSON serialization of the manifest.""" + example = TrainingExample(inputs={"q": "foo"}, reference="bar") + manifest = OptimizedManifest( + agent_id="test_agent", + base_model="gpt-4o", + optimized_instruction="prompt", + few_shot_examples=[example], + performance_metric=1.0, + optimization_run_id="run_1", + ) + + json_str = manifest.model_dump_json() + assert "test_agent" in json_str + assert "foo" in json_str + assert "bar" in json_str diff --git a/tests/data/test_loader.py b/tests/data/test_loader.py new file mode 100644 index 0000000..e1f83ec --- /dev/null +++ b/tests/data/test_loader.py @@ -0,0 +1,115 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +import csv +import json +import tempfile +from pathlib import Path + +import pytest + +from coreason_optimizer.core.models import TrainingExample +from coreason_optimizer.data.loader import Dataset + + +def test_dataset_initialization() -> None: + examples = [ + TrainingExample(inputs={"q": "1"}, reference="A"), + TrainingExample(inputs={"q": "2"}, reference="B"), + ] + ds = Dataset(examples) + assert len(ds) == 2 + assert ds[0].reference == "A" + assert [e.reference for e in ds] == ["A", "B"] + + +def test_load_from_csv() -> None: + with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: + writer = csv.writer(f) + writer.writerow(["question", "answer", "extra"]) + writer.writerow(["What is 1+1?", "2", "ignore"]) + writer.writerow(["What is 2+2?", "4", "ignore"]) + # Add invalid rows + writer.writerow(["", "broken", "ignore"]) # Missing question + writer.writerow(["q", "", "ignore"]) # Missing answer + filepath = Path(f.name) + + try: + ds = Dataset.from_csv(filepath, input_cols=["question"], reference_col="answer") + assert len(ds) == 2 + assert ds[0].inputs["question"] == "What is 1+1?" + assert ds[0].reference == "2" + assert ds[0].metadata["source"] == str(filepath) + finally: + filepath.unlink() + + +def test_load_from_csv_missing_file() -> None: + with pytest.raises(FileNotFoundError): + Dataset.from_csv("non_existent.csv", [], "") + + +def test_load_from_jsonl() -> None: + with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: + f.write(json.dumps({"inputs": {"q": "foo"}, "reference": "bar"}) + "\n") + f.write(json.dumps({"input": {"q": "baz"}, "output": "qux"}) + "\n") + f.write(json.dumps({"q": "simple", "reference": "simple_ref"}) + "\n") + # Add invalid rows + f.write(json.dumps({"q": "no_ref"}) + "\n") # Missing reference + filepath = Path(f.name) + + try: + ds = Dataset.from_jsonl(filepath) + assert len(ds) == 3 + + # Check first format + assert ds[0].inputs["q"] == "foo" + assert ds[0].reference == "bar" + + # Check second format + assert ds[1].inputs["q"] == "baz" + assert ds[1].reference == "qux" + + # Check third format + assert ds[2].inputs["q"] == "simple" + assert ds[2].reference == "simple_ref" + finally: + filepath.unlink() + + +def test_load_from_jsonl_missing_file() -> None: + with pytest.raises(FileNotFoundError): + Dataset.from_jsonl("non_existent.jsonl") + + +def test_split_dataset() -> None: + examples = [TrainingExample(inputs={"q": str(i)}, reference=str(i)) for i in range(100)] + ds = Dataset(examples) + + train, val, test = ds.split(train_ratio=0.8, val_ratio=0.1) + + assert len(train) == 80 + assert len(val) == 10 + assert len(test) == 10 + + # Ensure no overlap (simple check on references) + train_refs = {e.reference for e in train} + val_refs = {e.reference for e in val} + test_refs = {e.reference for e in test} + + assert train_refs.isdisjoint(val_refs) + assert train_refs.isdisjoint(test_refs) + assert val_refs.isdisjoint(test_refs) + + +def test_split_invalid_ratios() -> None: + ds = Dataset([]) + with pytest.raises(ValueError): + ds.split(train_ratio=0.9, val_ratio=0.2) diff --git a/tests/data/test_loader_edge_cases.py b/tests/data/test_loader_edge_cases.py new file mode 100644 index 0000000..1ea3419 --- /dev/null +++ b/tests/data/test_loader_edge_cases.py @@ -0,0 +1,104 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +import csv +import json +import tempfile +from pathlib import Path + +import pytest + +from coreason_optimizer.data.loader import Dataset + + +def test_csv_missing_columns() -> None: + """Test that missing columns in CSV results in skipped rows or empty inputs.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: + writer = csv.writer(f) + writer.writerow(["q", "a"]) + # Row with missing value for 'a' (CSV reader might interpret this based on structure) + # If we ask for 'q' and 'a', but row has only 1 val, DictReader usually handles it mapping rest to None + writer.writerow(["only_q"]) + filepath = Path(f.name) + + try: + # csv.DictReader behavior: if row has fewer fields than fieldnames, values are None + ds = Dataset.from_csv(filepath, input_cols=["q"], reference_col="a") + # Should skip because reference 'a' is None + assert len(ds) == 0 + finally: + filepath.unlink() + + +def test_csv_empty_file() -> None: + """Test loading an empty CSV file (header only or completely empty).""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f: + f.write("q,a\n") # Header only + filepath = Path(f.name) + + try: + ds = Dataset.from_csv(filepath, input_cols=["q"], reference_col="a") + assert len(ds) == 0 + finally: + filepath.unlink() + + +def test_jsonl_malformed_line() -> None: + """Test that malformed JSON lines cause a failure (or check specific behavior).""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: + f.write('{"q": "good", "reference": "ok"}\n') + f.write("INVALID JSON\n") + filepath = Path(f.name) + + try: + # Current implementation uses json.loads inside a loop without try-except block for parsing + # So it should raise JSONDecodeError + with pytest.raises(json.JSONDecodeError): + Dataset.from_jsonl(filepath) + finally: + filepath.unlink() + + +def test_split_tiny_dataset() -> None: + """Test splitting a dataset with 0 or 1 items.""" + # Empty + ds_empty = Dataset([]) + t, v, te = ds_empty.split() + assert len(t) == 0 + assert len(v) == 0 + assert len(te) == 0 + + # Single item + # 0.8 train -> 0.8 * 1 = 0.8 -> int is 0 + # 0.1 val -> 0.1 * 1 = 0.1 -> int(0.8+0.1) = 0 + # test -> rest -> 1 + # This behavior depends on int() truncation. + # 80% of 1 is 0. + + # If we want to ensure at least 1 in train if possible, we might need ceil or logic, + # but standard logic is usually strictly ratio based. + + from coreason_optimizer.core.models import TrainingExample + + ds_one = Dataset([TrainingExample(inputs={"a": 1}, reference=1)]) + t, v, te = ds_one.split(train_ratio=0.8, val_ratio=0.1) + + # Check strict math behavior: + # train_end = int(1 * 0.8) = 0 + # val_end = int(1 * 0.9) = 0 + # test = [0:] -> 1 item + + assert len(t) == 0 + assert len(v) == 0 + assert len(te) == 1 + + # Force into train + t, v, te = ds_one.split(train_ratio=1.0, val_ratio=0.0) + assert len(t) == 1 diff --git a/tests/integration/test_workflow.py b/tests/integration/test_workflow.py new file mode 100644 index 0000000..0df7cc5 --- /dev/null +++ b/tests/integration/test_workflow.py @@ -0,0 +1,62 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +from unittest.mock import Mock + +from coreason_optimizer.core.models import OptimizedManifest, TrainingExample +from coreason_optimizer.data.loader import Dataset +from coreason_optimizer.strategies.mutator import IdentityMutator +from coreason_optimizer.strategies.selector import RandomSelector + + +def test_optimization_workflow_simulation() -> None: + """Simulate a simple optimization loop (without the actual Loop class).""" + + # 1. Load Data + raw_examples = [TrainingExample(inputs={"q": f"q{i}"}, reference=f"a{i}") for i in range(20)] + dataset = Dataset(raw_examples) + + # 2. Split Data + train_set, val_set, test_set = dataset.split(train_ratio=0.5, val_ratio=0.25) + assert len(train_set) == 10 + assert len(val_set) == 5 + assert len(test_set) == 5 + + # 3. Select Few-Shot Examples (Strategy) + selector = RandomSelector(seed=999) + few_shot_examples = selector.select(train_set, k=3) + assert len(few_shot_examples) == 3 + + # 4. Mutate Instruction (Strategy) + llm_mock = Mock() + mutator = IdentityMutator(llm_client=llm_mock) + base_instruction = "Answer the question." + # Simulate finding failures in validation (mocked) + failures = [TrainingExample(inputs={"q": "q_fail_1"}, reference="ref")] + + optimized_instruction = mutator.mutate(current_instruction=base_instruction, failed_examples=failures) + assert optimized_instruction == base_instruction # Identity mutator + + # 5. Create Manifest (Artifact) + # Assume we evaluated it and got a score + score = 0.85 + + manifest = OptimizedManifest( + agent_id="test_agent_v1", + base_model="gpt-4o", + optimized_instruction=optimized_instruction, + few_shot_examples=few_shot_examples, + performance_metric=score, + optimization_run_id="run_sim_001", + ) + + assert manifest.performance_metric == 0.85 + assert len(manifest.few_shot_examples) == 3 + assert manifest.optimized_instruction == "Answer the question." diff --git a/tests/strategies/test_bootstrap.py b/tests/strategies/test_bootstrap.py new file mode 100644 index 0000000..8f1816a --- /dev/null +++ b/tests/strategies/test_bootstrap.py @@ -0,0 +1,190 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +from typing import Any + +from coreason_optimizer.core.config import OptimizerConfig +from coreason_optimizer.core.interfaces import LLMResponse, UsageStats +from coreason_optimizer.core.metrics import ExactMatch +from coreason_optimizer.core.models import TrainingExample +from coreason_optimizer.strategies.bootstrap import BootstrapFewShot + + +class MockAgent: + @property + def inputs(self) -> list[str]: + return ["question"] + + @property + def outputs(self) -> list[str]: + return ["answer"] + + @property + def system_prompt(self) -> str: + return "Answer the question." + + +class MockLLMClient: + def __init__(self) -> None: + self.calls: list[Any] = [] + + def generate( + self, + messages: list[dict[str, str]], + model: str | None = None, + temperature: float = 0.0, + **kwargs: Any, + ) -> LLMResponse: + self.calls.append(messages) + prompt = messages[0]["content"] + + # Parse prompt to find the active user input + parts = prompt.split("### User Input") + user_input_part = parts[-1] if len(parts) > 1 else prompt + + # Simulate correct answer for "2+2" + if "Input: question: 2+2" in user_input_part: + return LLMResponse(content="4", usage=UsageStats()) + # Simulate incorrect answer for "3+3" + if "Input: question: 3+3" in user_input_part: + return LLMResponse(content="99", usage=UsageStats()) + # Simulate correct answer for val set "5+5" + if "Input: question: 5+5" in user_input_part: + return LLMResponse(content="10", usage=UsageStats()) + + return LLMResponse(content="unknown", usage=UsageStats()) + + +class FailingLLMClient: + def __init__(self, fail_on_train: bool = True) -> None: + self.fail_on_train = fail_on_train + + def generate( + self, + messages: list[dict[str, str]], + model: str | None = None, + temperature: float = 0.0, + **kwargs: Any, + ) -> LLMResponse: + prompt = messages[0]["content"] + + if self.fail_on_train: + # Check if this is the training call (no 5+5) + if "5+5" not in prompt: + raise RuntimeError("LLM Failure") + + if not self.fail_on_train: + # Train step: succeed to generate a demo + if "Input: q: 1" in prompt and "### Examples" not in prompt: + return LLMResponse(content="1", usage=UsageStats()) + + # Validation step: fail + if "Input: q: 2" in prompt: + raise RuntimeError("Validation Failure") + + return LLMResponse(content="42", usage=UsageStats()) + + +def test_bootstrap_few_shot_compile() -> None: + llm = MockLLMClient() + metric = ExactMatch() + config = OptimizerConfig(target_model="test-model", max_bootstrapped_demos=1) + optimizer = BootstrapFewShot(llm_client=llm, metric=metric, config=config) + + agent = MockAgent() + trainset = [ + TrainingExample(inputs={"question": "2+2"}, reference="4"), # Should pass + TrainingExample(inputs={"question": "3+3"}, reference="6"), # Should fail + ] + valset = [ + TrainingExample(inputs={"question": "5+5"}, reference="10"), + ] + + manifest = optimizer.compile(agent, trainset, valset) + + # 1. Verify successful traces mined + assert len(manifest.few_shot_examples) == 1 + assert manifest.few_shot_examples[0].inputs["question"] == "2+2" + + # 2. Verify manifest fields + assert manifest.base_model == "test-model" + assert manifest.performance_metric == 1.0 # Val set should pass + assert manifest.optimized_instruction == "Answer the question." + + # 3. Verify interaction with LLM + assert len(llm.calls) == 3 + + +def test_bootstrap_few_shot_empty_trainset() -> None: + llm = MockLLMClient() + metric = ExactMatch() + config = OptimizerConfig() + optimizer = BootstrapFewShot(llm_client=llm, metric=metric, config=config) + + agent = MockAgent() + manifest = optimizer.compile(agent, [], []) + + assert len(manifest.few_shot_examples) == 0 + assert manifest.performance_metric == 0.0 + + +def test_bootstrap_limit_demos() -> None: + """Test that max_bootstrapped_demos is respected.""" + llm = MockLLMClient() + metric = ExactMatch() + # Limit to 1 demo + config = OptimizerConfig(max_bootstrapped_demos=1) + optimizer = BootstrapFewShot(llm_client=llm, metric=metric, config=config) + + agent = MockAgent() + # Two passing examples + trainset = [ + TrainingExample(inputs={"question": "2+2"}, reference="4"), + TrainingExample(inputs={"question": "2+2"}, reference="4"), + ] + + manifest = optimizer.compile(agent, trainset, []) + + assert len(manifest.few_shot_examples) == 1 + + +def test_bootstrap_llm_exception_mining() -> None: + """Test exception handling during mining.""" + # This client fails on training + llm = FailingLLMClient(fail_on_train=True) + metric = ExactMatch() + config = OptimizerConfig() + optimizer = BootstrapFewShot(llm_client=llm, metric=metric, config=config) + + agent = MockAgent() + trainset = [TrainingExample(inputs={"q": "1"}, reference="1")] + + # Should not crash, just produce empty manifest + manifest = optimizer.compile(agent, trainset, []) + assert len(manifest.few_shot_examples) == 0 + + +def test_bootstrap_llm_exception_validation() -> None: + """Test exception handling during validation.""" + # This client fails on validation + llm = FailingLLMClient(fail_on_train=False) + metric = ExactMatch() + config = OptimizerConfig(max_bootstrapped_demos=1) + optimizer = BootstrapFewShot(llm_client=llm, metric=metric, config=config) + + agent = MockAgent() + trainset = [TrainingExample(inputs={"q": "1"}, reference="1")] + valset = [TrainingExample(inputs={"q": "2"}, reference="2")] + + manifest = optimizer.compile(agent, trainset, valset) + + # Should have 1 example, but score 0.0 because validation failed + assert len(manifest.few_shot_examples) == 1 + assert manifest.performance_metric == 0.0 diff --git a/tests/strategies/test_bootstrap_complex.py b/tests/strategies/test_bootstrap_complex.py new file mode 100644 index 0000000..a5bf8e2 --- /dev/null +++ b/tests/strategies/test_bootstrap_complex.py @@ -0,0 +1,157 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +from typing import Any + +from coreason_optimizer.core.config import OptimizerConfig +from coreason_optimizer.core.interfaces import LLMResponse, UsageStats +from coreason_optimizer.core.metrics import ExactMatch +from coreason_optimizer.core.models import TrainingExample +from coreason_optimizer.strategies.bootstrap import BootstrapFewShot + + +class GenericMockAgent: + def __init__(self, system_prompt: str = "Default system prompt") -> None: + self._system_prompt = system_prompt + + @property + def inputs(self) -> list[str]: + return ["input"] + + @property + def outputs(self) -> list[str]: + return ["output"] + + @property + def system_prompt(self) -> str: + return self._system_prompt + + +class ComplexMockLLMClient: + def __init__(self) -> None: + self.calls: list[Any] = [] + + def generate( + self, + messages: list[dict[str, str]], + model: str | None = None, + temperature: float = 0.0, + **kwargs: Any, + ) -> LLMResponse: + self.calls.append(messages) + prompt = messages[0]["content"] + + # Case 1: Non-string inputs + # Formatter should convert 42 to "42" + if "Input: count: 42" in prompt: + return LLMResponse(content="valid", usage=UsageStats()) + + # Case 2: List reference + if "Input: q: color" in prompt: + return LLMResponse(content="red", usage=UsageStats()) + + # Case 3: Multiline prompt + if "### System Instruction\nLine 1\nLine 2" in prompt: + # We just need to return something that matches reference to pass + if "Input: q: multi" in prompt: + return LLMResponse(content="yes", usage=UsageStats()) + + return LLMResponse(content="unknown", usage=UsageStats()) + + +def test_bootstrap_non_string_inputs() -> None: + """Test that integer/float inputs are correctly formatted and processed.""" + llm = ComplexMockLLMClient() + metric = ExactMatch() + config = OptimizerConfig(max_bootstrapped_demos=1) + optimizer = BootstrapFewShot(llm_client=llm, metric=metric, config=config) + + agent = GenericMockAgent() + # Input is an integer + trainset = [ + TrainingExample(inputs={"count": 42}, reference="valid"), + ] + + manifest = optimizer.compile(agent, trainset, []) + + assert len(manifest.few_shot_examples) == 1 + assert manifest.few_shot_examples[0].inputs["count"] == 42 + + +def test_bootstrap_list_reference() -> None: + """Test that mining works when reference is a list of valid options.""" + llm = ComplexMockLLMClient() + metric = ExactMatch() + config = OptimizerConfig() + optimizer = BootstrapFewShot(llm_client=llm, metric=metric, config=config) + + agent = GenericMockAgent() + # Reference allows "red" or "blue" + trainset = [ + TrainingExample(inputs={"q": "color"}, reference=["red", "blue"]), + ] + + manifest = optimizer.compile(agent, trainset, []) + + # LLM returns "red", which matches one of the options + assert len(manifest.few_shot_examples) == 1 + assert manifest.few_shot_examples[0].reference == ["red", "blue"] + + +def test_bootstrap_multiline_system_prompt() -> None: + """Test that multiline system prompts are handled correctly.""" + llm = ComplexMockLLMClient() + metric = ExactMatch() + config = OptimizerConfig() + optimizer = BootstrapFewShot(llm_client=llm, metric=metric, config=config) + + agent = GenericMockAgent(system_prompt="Line 1\nLine 2") + trainset = [ + TrainingExample(inputs={"q": "multi"}, reference="yes"), + ] + + manifest = optimizer.compile(agent, trainset, []) + + assert len(manifest.few_shot_examples) == 1 + assert manifest.optimized_instruction == "Line 1\nLine 2" + + +def test_bootstrap_duplicate_mining() -> None: + """Test behavior when multiple identical examples succeed.""" + + # We use a simple client for this + class EchoLLMClient: + def generate( + self, + messages: list[dict[str, str]], + model: str | None = None, + temperature: float = 0.0, + **kwargs: Any, + ) -> LLMResponse: + return LLMResponse(content="4", usage=UsageStats()) + + llm = EchoLLMClient() + metric = ExactMatch() + config = OptimizerConfig(max_bootstrapped_demos=5) # Allow enough + optimizer = BootstrapFewShot(llm_client=llm, metric=metric, config=config) + + agent = GenericMockAgent() + # Duplicate examples in trainset + trainset = [ + TrainingExample(inputs={"q": "2+2"}, reference="4"), + TrainingExample(inputs={"q": "2+2"}, reference="4"), + ] + + manifest = optimizer.compile(agent, trainset, []) + + # Current implementation does not deduplicate, so we expect 2 + assert len(manifest.few_shot_examples) == 2 + assert manifest.few_shot_examples[0].inputs == {"q": "2+2"} + assert manifest.few_shot_examples[1].inputs == {"q": "2+2"} diff --git a/tests/strategies/test_mipro.py b/tests/strategies/test_mipro.py new file mode 100644 index 0000000..afe0858 --- /dev/null +++ b/tests/strategies/test_mipro.py @@ -0,0 +1,180 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +from unittest.mock import MagicMock, patch + +import pytest + +from coreason_optimizer.core.config import OptimizerConfig +from coreason_optimizer.core.interfaces import Construct, LLMClient, LLMResponse +from coreason_optimizer.core.models import TrainingExample +from coreason_optimizer.strategies.mipro import MiproOptimizer +from coreason_optimizer.utils.exceptions import BudgetExceededError + + +@pytest.fixture +def mock_client() -> MagicMock: + client = MagicMock(spec=LLMClient) + client.generate.return_value = LLMResponse(content="Response", usage={"total_tokens": 10}, cost_usd=0.001) + return client + + +@pytest.fixture +def mock_metric() -> MagicMock: + return MagicMock(return_value=1.0) + + +@pytest.fixture +def config() -> OptimizerConfig: + return OptimizerConfig(target_model="gpt-4", meta_model="gpt-4") + + +@pytest.fixture +def agent() -> Construct: + class Agent(Construct): + system_prompt = "Sys" + inputs = ["q"] + outputs = ["a"] + + return Agent() + + +@pytest.fixture +def trainset() -> list[TrainingExample]: + return [TrainingExample(inputs={"q": "q1"}, reference="a1")] + + +def test_mipro_init_semantic_requires_embedding(mock_client: MagicMock, mock_metric: MagicMock) -> None: + conf = OptimizerConfig(selector_type="semantic") + with pytest.raises(ValueError, match="Embedding provider is required"): + MiproOptimizer(mock_client, mock_metric, conf) + + +def test_mipro_diagnosis_budget_exceeded( + mock_client: MagicMock, + mock_metric: MagicMock, + config: OptimizerConfig, + agent: Construct, + trainset: list[TrainingExample], +) -> None: + mock_client.generate.side_effect = BudgetExceededError("Budget") + optimizer = MiproOptimizer(mock_client, mock_metric, config) + + with pytest.raises(BudgetExceededError): + optimizer.compile(agent, trainset, []) + + +def test_mipro_diagnosis_failure_logging( + mock_client: MagicMock, + mock_metric: MagicMock, + config: OptimizerConfig, + agent: Construct, + trainset: list[TrainingExample], +) -> None: + # Simulate a generic error during diagnosis + mock_client.generate.side_effect = Exception("API Error") + optimizer = MiproOptimizer(mock_client, mock_metric, config) + + # We patch mutator to avoid network calls there too + with patch("coreason_optimizer.strategies.mipro.LLMInstructionMutator") as MockMutator: + mock_mutator_inst = MagicMock() + mock_mutator_inst.mutate.return_value = "New Instr" + MockMutator.return_value = mock_mutator_inst + + manifest = optimizer.compile(agent, trainset, []) + assert manifest.optimized_instruction == "Sys" # Default if nothing better found + + +def test_mipro_candidate_generation_budget_exceeded( + mock_client: MagicMock, + mock_metric: MagicMock, + config: OptimizerConfig, + agent: Construct, + trainset: list[TrainingExample], +) -> None: + optimizer = MiproOptimizer(mock_client, mock_metric, config) + + # Diagnosis succeeds + mock_client.generate.return_value = LLMResponse(content="wrong", usage={}, cost_usd=0.0) + mock_metric.return_value = 0.0 # Force failure + + # Let's manually replace the mutator on the instance using patch.object + with patch.object(optimizer.mutator, "mutate", side_effect=BudgetExceededError("Budget")): + with pytest.raises(BudgetExceededError): + optimizer.compile(agent, trainset, []) + + +def test_mipro_candidate_generation_failure( + mock_client: MagicMock, + mock_metric: MagicMock, + config: OptimizerConfig, + agent: Construct, + trainset: list[TrainingExample], +) -> None: + optimizer = MiproOptimizer(mock_client, mock_metric, config) + + # Diagnosis succeeds + mock_client.generate.return_value = LLMResponse(content="wrong", usage={}, cost_usd=0.0) + mock_metric.return_value = 0.0 + + # Mutator raises generic error + with patch.object(optimizer.mutator, "mutate", side_effect=Exception("Mutator Error")): + # Should continue and use base instruction + manifest = optimizer.compile(agent, trainset, []) + assert manifest.optimized_instruction == "Sys" + + +def test_mipro_grid_search_budget_exceeded( + mock_client: MagicMock, + mock_metric: MagicMock, + config: OptimizerConfig, + agent: Construct, + trainset: list[TrainingExample], +) -> None: + # Mock mutator to return 1 candidate + optimizer = MiproOptimizer(mock_client, mock_metric, config) + + with patch.object(optimizer.mutator, "mutate", return_value="New Instr"): + # Diagnosis succeeds + # We need to control the generate call. + # The FIRST calls are diagnosis (1 call per trainset example). + # Then mutation calls (controlled by mutator mock - we mocked mutate directly). + # Then grid search calls. + + call_count = 0 + + def side_effect(*args, **kwargs): # type: ignore + nonlocal call_count + call_count += 1 + if call_count > 1: # Fail after diagnosis (on grid search eval) + raise BudgetExceededError("Budget") + return LLMResponse(content="resp", usage={}, cost_usd=0.0) + + mock_client.generate.side_effect = side_effect + + with pytest.raises(BudgetExceededError): + optimizer.compile(agent, trainset, []) + + +def test_mipro_evaluate_candidate_generic_exception( + mock_client: MagicMock, + mock_metric: MagicMock, + config: OptimizerConfig, + agent: Construct, + trainset: list[TrainingExample], +) -> None: + optimizer = MiproOptimizer(mock_client, mock_metric, config) + + # Force evaluate_candidate to raise exception + mock_client.generate.side_effect = Exception("Grid Search Error") + + # Just check it returns 0.0 score essentially (doesn't crash) + score = optimizer._evaluate_candidate("Instr", [], trainset) + assert score == 0.0 diff --git a/tests/strategies/test_mutator.py b/tests/strategies/test_mutator.py new file mode 100644 index 0000000..ff40310 --- /dev/null +++ b/tests/strategies/test_mutator.py @@ -0,0 +1,28 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +from unittest.mock import Mock + +from coreason_optimizer.core.models import TrainingExample +from coreason_optimizer.strategies.mutator import IdentityMutator + + +def test_identity_mutator() -> None: + mock_llm = Mock() + mutator = IdentityMutator(llm_client=mock_llm) + + instruction = "You are a helpful assistant." + new_instruction = mutator.mutate(instruction) + + assert new_instruction == instruction + + failed_examples = [TrainingExample(inputs={"q": "fail1"}, reference="ref")] + new_instruction_with_failures = mutator.mutate(instruction, failed_examples=failed_examples) + assert new_instruction_with_failures == instruction diff --git a/tests/strategies/test_mutator_llm.py b/tests/strategies/test_mutator_llm.py new file mode 100644 index 0000000..15007a6 --- /dev/null +++ b/tests/strategies/test_mutator_llm.py @@ -0,0 +1,113 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +from unittest.mock import MagicMock + +import pytest + +from coreason_optimizer.core.config import OptimizerConfig +from coreason_optimizer.core.interfaces import LLMClient, LLMResponse +from coreason_optimizer.core.models import TrainingExample +from coreason_optimizer.strategies.mutator import IdentityMutator, LLMInstructionMutator +from coreason_optimizer.utils.exceptions import BudgetExceededError + + +def test_identity_mutator() -> None: + mutator = IdentityMutator(llm_client=MagicMock()) + assert mutator.mutate("instr", None) == "instr" + assert mutator.mutate("instr", []) == "instr" + + +def test_llm_mutator_no_failures() -> None: + """Test returning original instruction if no failures provided.""" + mock_client = MagicMock(spec=LLMClient) + config = OptimizerConfig() + mutator = LLMInstructionMutator(mock_client, config) + + res = mutator.mutate("original", []) + assert res == "original" + mock_client.generate.assert_not_called() + + +def test_llm_mutator_success() -> None: + """Test successful mutation.""" + mock_client = MagicMock(spec=LLMClient) + # The client.generate is called with messages=[{"role": "user", "content": ...}] + mock_client.generate.return_value = LLMResponse( + content="new instruction", usage={"total_tokens": 10}, cost_usd=0.001 + ) + + config = OptimizerConfig(meta_model="gpt-4") + mutator = LLMInstructionMutator(mock_client, config) + + failures = [TrainingExample(inputs={"q": "fail"}, reference="ref")] + + new_instr = mutator.mutate("original", failures) + + assert new_instr == "new instruction" + mock_client.generate.assert_called_once() + + # Check call arguments. + # If called as keyword args: + kwargs = mock_client.generate.call_args.kwargs + assert kwargs["model"] == "gpt-4" + assert "messages" in kwargs + content = kwargs["messages"][0]["content"] + assert "original" in content + assert "fail" in content + + +def test_llm_mutator_cleanup_markdown() -> None: + """Test cleaning up markdown code blocks from response.""" + mock_client = MagicMock(spec=LLMClient) + content = "```\ncleaned instruction\n```" + mock_client.generate.return_value = LLMResponse(content=content, usage={}, cost_usd=0.0) + + mutator = LLMInstructionMutator(mock_client, OptimizerConfig()) + failures = [TrainingExample(inputs={"q": "fail"}, reference="ref")] + + new_instr = mutator.mutate("original", failures) + assert new_instr == "cleaned instruction" + + +def test_llm_mutator_empty_response() -> None: + """Test handling empty response (fallback to original).""" + mock_client = MagicMock(spec=LLMClient) + mock_client.generate.return_value = LLMResponse(content="", usage={}, cost_usd=0.0) + + mutator = LLMInstructionMutator(mock_client, OptimizerConfig()) + failures = [TrainingExample(inputs={"q": "fail"}, reference="ref")] + + new_instr = mutator.mutate("original", failures) + assert new_instr == "original" + + +def test_llm_mutator_budget_exceeded() -> None: + """Test re-raising BudgetExceededError.""" + mock_client = MagicMock(spec=LLMClient) + mock_client.generate.side_effect = BudgetExceededError("Budget exceeded") + + mutator = LLMInstructionMutator(mock_client, OptimizerConfig()) + failures = [TrainingExample(inputs={"q": "fail"}, reference="ref")] + + with pytest.raises(BudgetExceededError): + mutator.mutate("original", failures) + + +def test_llm_mutator_generic_exception() -> None: + """Test catching generic exceptions and returning original.""" + mock_client = MagicMock(spec=LLMClient) + mock_client.generate.side_effect = Exception("API Error") + + mutator = LLMInstructionMutator(mock_client, OptimizerConfig()) + failures = [TrainingExample(inputs={"q": "fail"}, reference="ref")] + + new_instr = mutator.mutate("original", failures) + assert new_instr == "original" diff --git a/tests/strategies/test_selector.py b/tests/strategies/test_selector.py new file mode 100644 index 0000000..21aca38 --- /dev/null +++ b/tests/strategies/test_selector.py @@ -0,0 +1,55 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +from coreason_optimizer.core.models import TrainingExample +from coreason_optimizer.data.loader import Dataset +from coreason_optimizer.strategies.selector import RandomSelector + + +def test_random_selector_selection() -> None: + examples = [TrainingExample(inputs={"q": str(i)}, reference=str(i)) for i in range(10)] + ds = Dataset(examples) + + selector = RandomSelector(seed=42) + selected = selector.select(ds, k=3) + + assert len(selected) == 3 + # Check if they are actually from the dataset + for ex in selected: + assert ex in examples + + +def test_random_selector_oversized_request() -> None: + examples = [ + TrainingExample(inputs={"q": "1"}, reference="1"), + TrainingExample(inputs={"q": "2"}, reference="2"), + ] + ds = Dataset(examples) + + selector = RandomSelector() + selected = selector.select(ds, k=5) + + # Should return all available if k > len(ds) + assert len(selected) == 2 + assert len(selected) == len(examples) + + +def test_random_selector_determinism() -> None: + examples = [TrainingExample(inputs={"q": str(i)}, reference=str(i)) for i in range(20)] + ds = Dataset(examples) + + s1 = RandomSelector(seed=123) + s2 = RandomSelector(seed=123) + + sel1 = s1.select(ds, k=5) + sel2 = s2.select(ds, k=5) + + # Inputs should be identical + assert [e.inputs for e in sel1] == [e.inputs for e in sel2] diff --git a/tests/strategies/test_selector_edge_cases.py b/tests/strategies/test_selector_edge_cases.py new file mode 100644 index 0000000..04d59a7 --- /dev/null +++ b/tests/strategies/test_selector_edge_cases.py @@ -0,0 +1,30 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +import pytest + +from coreason_optimizer.core.models import TrainingExample +from coreason_optimizer.data.loader import Dataset +from coreason_optimizer.strategies.selector import RandomSelector + + +def test_select_k_zero() -> None: + ds = Dataset([TrainingExample(inputs={"a": 1}, reference=1)]) + sel = RandomSelector() + selected = sel.select(ds, k=0) + assert selected == [] + + +def test_select_k_negative() -> None: + ds = Dataset([TrainingExample(inputs={"a": 1}, reference=1)]) + sel = RandomSelector() + # random.sample raises ValueError for negative k + with pytest.raises(ValueError): + sel.select(ds, k=-1) diff --git a/tests/strategies/test_selector_semantic.py b/tests/strategies/test_selector_semantic.py new file mode 100644 index 0000000..f4e4b9b --- /dev/null +++ b/tests/strategies/test_selector_semantic.py @@ -0,0 +1,124 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +from unittest.mock import MagicMock, patch + +import numpy as np + +from coreason_optimizer.core.interfaces import EmbeddingProvider, EmbeddingResponse +from coreason_optimizer.core.models import TrainingExample +from coreason_optimizer.data.loader import Dataset +from coreason_optimizer.strategies.selector import RandomSelector, SemanticSelector + + +def test_random_selector_k_greater_than_len() -> None: + """Test behavior when k > dataset length.""" + examples = [TrainingExample(inputs={"q": i}, reference=i) for i in range(3)] + ds = Dataset(examples) + selector = RandomSelector() + selected = selector.select(ds, k=5) + assert len(selected) == 3 + assert selected == examples + + +def test_random_selector_subset() -> None: + """Test subset selection.""" + examples = [TrainingExample(inputs={"q": i}, reference=i) for i in range(10)] + ds = Dataset(examples) + selector = RandomSelector(seed=42) + selected = selector.select(ds, k=4) + assert len(selected) == 4 + # Check reproducibility + selector_2 = RandomSelector(seed=42) + selected_2 = selector_2.select(ds, k=4) + assert selected == selected_2 + + +def test_semantic_selector_init() -> None: + """Test initialization.""" + mock_provider = MagicMock(spec=EmbeddingProvider) + selector = SemanticSelector(embedding_provider=mock_provider, embedding_model="test-model") + assert selector.embedding_provider == mock_provider + assert selector.embedding_model == "test-model" + + +def test_semantic_selector_select_small_dataset() -> None: + """Test selecting when dataset is smaller than k.""" + examples = [TrainingExample(inputs={"q": i}, reference=i) for i in range(2)] + ds = Dataset(examples) + mock_provider = MagicMock(spec=EmbeddingProvider) + selector = SemanticSelector(embedding_provider=mock_provider) + + selected = selector.select(ds, k=5) + assert len(selected) == 2 + mock_provider.embed.assert_not_called() # Should short-circuit + + +def test_semantic_selector_clustering() -> None: + """Test clustering logic.""" + examples = [TrainingExample(inputs={"q": i}, reference=i) for i in range(10)] + ds = Dataset(examples) + + # Mock embeddings: 10 vectors of dim 2 + embeddings = [] + for i in range(5): + embeddings.append([0.0 + i * 0.1, 0.0]) + for i in range(5): + embeddings.append([10.0 + i * 0.1, 10.0]) + + mock_response = EmbeddingResponse(embeddings=embeddings, total_tokens=100, cost_usd=0.001, usage={}) + + mock_provider = MagicMock(spec=EmbeddingProvider) + mock_provider.embed.return_value = mock_response + + with patch("coreason_optimizer.strategies.selector.KMeans") as MockKMeans: + mock_kmeans_instance = MagicMock() + MockKMeans.return_value = mock_kmeans_instance + + # Manually set labels and centers to simulate clustering result + # 5 points in cluster 0, 5 points in cluster 1 + mock_kmeans_instance.labels_ = np.array([0] * 5 + [1] * 5) + mock_kmeans_instance.cluster_centers_ = np.array([[0.0, 0.0], [10.0, 10.0]]) + + selector = SemanticSelector(embedding_provider=mock_provider, seed=42) + + selected = selector.select(ds, k=2) + + assert len(selected) == 2 + assert mock_kmeans_instance.fit.called + + +def test_semantic_selector_backfill() -> None: + """Test backfilling if clustering returns fewer unique points (unlikely but logic exists).""" + + examples = [TrainingExample(inputs={"q": i}, reference=i) for i in range(5)] + ds = Dataset(examples) + + embeddings = [[0.0, 0.0] for _ in range(5)] + mock_response = EmbeddingResponse(embeddings=embeddings, total_tokens=10, cost_usd=0.0, usage={}) + mock_provider = MagicMock(spec=EmbeddingProvider) + mock_provider.embed.return_value = mock_response + + with patch("coreason_optimizer.strategies.selector.KMeans") as MockKMeans: + kmeans_instance = MagicMock() + MockKMeans.return_value = kmeans_instance + + # Assume 2 clusters requested, but everything assigned to cluster 0 + kmeans_instance.labels_ = np.array([0, 0, 0, 0, 0]) + kmeans_instance.cluster_centers_ = np.array([[0.0, 0.0], [1.0, 1.0]]) + + selector = SemanticSelector(embedding_provider=mock_provider, seed=42) + + selected = selector.select(ds, k=2) + + # Should pick 1 from cluster 0, and backfill 1 random + assert len(selected) == 2 + # Check they are unique + assert selected[0] != selected[1] diff --git a/tests/test_logger_coverage.py b/tests/test_logger_coverage.py new file mode 100644 index 0000000..4ce3e8a --- /dev/null +++ b/tests/test_logger_coverage.py @@ -0,0 +1,40 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +import importlib +import shutil +from pathlib import Path + +import coreason_optimizer.utils.logger + + +def test_logger_creates_directory_coverage() -> None: + """ + Test that the logger module creates the 'logs' directory if it doesn't exist. + This is to ensure 100% coverage of the 'if not log_path.exists():' block. + """ + log_path = Path("logs") + + # 1. Clean up existing logs directory if possible + # Note: On some systems, this might fail if a file is locked, but on Linux (CI) it usually works. + if log_path.exists(): + try: + shutil.rmtree(log_path) + except OSError: + # If we can't delete it (e.g. open file), we might skip this test or try another way. + # But for coverage we really want to hit that line. + pass + + # 2. Reload the module. This should re-execute the module-level code. + importlib.reload(coreason_optimizer.utils.logger) + + # 3. Verify directory was created + assert log_path.exists() + assert log_path.is_dir() diff --git a/tests/test_main_cli.py b/tests/test_main_cli.py new file mode 100644 index 0000000..c4b1d38 --- /dev/null +++ b/tests/test_main_cli.py @@ -0,0 +1,374 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +import json +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from click.testing import CliRunner + +from coreason_optimizer.core.interfaces import LLMResponse +from coreason_optimizer.core.models import OptimizedManifest, TrainingExample +from coreason_optimizer.main import cli + + +@pytest.fixture +def runner() -> CliRunner: + return CliRunner() + + +@pytest.fixture +def mock_agent_file(tmp_path: Path) -> str: + d = tmp_path / "agents" + d.mkdir() + p = d / "test_agent.py" + p.touch() + return str(p) + + +@pytest.fixture +def mock_dataset_file(tmp_path: Path) -> str: + d = tmp_path / "data" + d.mkdir() + p = d / "data.jsonl" + with open(p, "w") as f: + f.write(json.dumps({"input": "q1", "reference": "a1"}) + "\n") + f.write(json.dumps({"input": "q2", "reference": "a2"}) + "\n") + return str(p) + + +@pytest.fixture +def mock_manifest_file(tmp_path: Path) -> str: + d = tmp_path / "out" + d.mkdir() + p = d / "manifest.json" + manifest = OptimizedManifest( + agent_id="agent", + base_model="gpt-4", + optimized_instruction="Instr", + few_shot_examples=[TrainingExample(inputs={"q": "ex"}, reference="ref")], + performance_metric=1.0, + optimization_run_id="run", + ) + with open(p, "w") as f: + f.write(manifest.model_dump_json()) + return str(p) + + +def test_cli_help(runner: CliRunner) -> None: + result = runner.invoke(cli, ["--help"]) + assert result.exit_code == 0 + assert "coreason-opt" in result.output + + +def test_tune_bootstrap_success( + runner: CliRunner, mock_agent_file: str, mock_dataset_file: str, tmp_path: Path +) -> None: + output_file = str(tmp_path / "out.json") + with patch("coreason_optimizer.main.load_agent_from_path") as mock_load: + mock_construct = MagicMock() + mock_construct.system_prompt = "Sys" + mock_construct.inputs = ["input"] + mock_load.return_value = mock_construct + + with patch("coreason_optimizer.main.OpenAIClient") as MockClient: + mock_client_inst = MagicMock() + mock_client_inst.generate.return_value = LLMResponse(content="a1", usage={}) + MockClient.return_value = mock_client_inst + + with patch("coreason_optimizer.strategies.bootstrap.BootstrapFewShot.compile") as mock_compile: + mock_compile.return_value = OptimizedManifest( + agent_id="test", + base_model="gpt-4", + optimized_instruction="Opt", + few_shot_examples=[], + performance_metric=1.0, + optimization_run_id="1", + ) + + result = runner.invoke( + cli, + [ + "tune", + "--agent", + mock_agent_file, + "--dataset", + mock_dataset_file, + "--strategy", + "bootstrap", + "--output", + output_file, + ], + ) + + assert result.exit_code == 0 + assert "Optimization complete" in result.output + + +def test_tune_mipro_success(runner: CliRunner, mock_agent_file: str, mock_dataset_file: str, tmp_path: Path) -> None: + output_file = str(tmp_path / "out_mipro.json") + with patch("coreason_optimizer.main.load_agent_from_path") as mock_load: + mock_construct = MagicMock() + mock_load.return_value = mock_construct + + with patch("coreason_optimizer.main.OpenAIClient"): + # If selector is semantic, we need embedding client + with patch("coreason_optimizer.main.OpenAIEmbeddingClient"): + pass + + with patch("coreason_optimizer.strategies.mipro.MiproOptimizer.compile") as mock_compile: + mock_compile.return_value = OptimizedManifest( + agent_id="test", + base_model="gpt-4", + optimized_instruction="Opt", + few_shot_examples=[], + performance_metric=1.0, + optimization_run_id="1", + ) + + result = runner.invoke( + cli, + [ + "tune", + "--agent", + mock_agent_file, + "--dataset", + mock_dataset_file, + "--strategy", + "mipro", + "--output", + output_file, + ], + ) + + assert result.exit_code == 0 + + +def test_tune_semantic_selector( + runner: CliRunner, mock_agent_file: str, mock_dataset_file: str, tmp_path: Path +) -> None: + output_file = str(tmp_path / "out_sem.json") + with patch("coreason_optimizer.main.load_agent_from_path") as mock_load: + mock_construct = MagicMock() + mock_load.return_value = mock_construct + + with patch("coreason_optimizer.main.OpenAIClient"): + with patch("coreason_optimizer.main.OpenAIEmbeddingClient") as MockEmbed: + with patch("coreason_optimizer.strategies.mipro.MiproOptimizer.compile") as mock_compile: + mock_compile.return_value = OptimizedManifest( + agent_id="test", + base_model="gpt-4", + optimized_instruction="Opt", + few_shot_examples=[], + performance_metric=1.0, + optimization_run_id="1", + ) + + result = runner.invoke( + cli, + [ + "tune", + "--agent", + mock_agent_file, + "--dataset", + mock_dataset_file, + "--strategy", + "mipro", + "--selector", + "semantic", + "--output", + output_file, + ], + ) + assert result.exit_code == 0 + MockEmbed.assert_called_once() + + +def test_tune_semantic_selector_fail_init(runner: CliRunner, mock_agent_file: str, mock_dataset_file: str) -> None: + with patch("coreason_optimizer.main.load_agent_from_path") as mock_load: + mock_construct = MagicMock() + mock_load.return_value = mock_construct + + with patch("coreason_optimizer.main.OpenAIClient"): + with patch("coreason_optimizer.main.OpenAIEmbeddingClient", side_effect=Exception("Embed Error")): + result = runner.invoke( + cli, + [ + "tune", + "--agent", + mock_agent_file, + "--dataset", + mock_dataset_file, + "--strategy", + "mipro", + "--selector", + "semantic", + "--output", + "out_sem.json", + ], + ) + assert result.exit_code != 0 + assert "Failed to initialize OpenAI Embedding Client" in result.output + + +def test_tune_fail_load_agent(runner: CliRunner, mock_agent_file: str, mock_dataset_file: str) -> None: + with patch("coreason_optimizer.main.load_agent_from_path", side_effect=Exception("Load Error")): + result = runner.invoke(cli, ["tune", "--agent", mock_agent_file, "--dataset", mock_dataset_file]) + assert result.exit_code != 0 + # Check for part of exception message that propagates + assert "Load Error" in result.output + + +def test_tune_fail_dataset(runner: CliRunner, mock_agent_file: str) -> None: + with patch("coreason_optimizer.main.load_agent_from_path"): + result = runner.invoke(cli, ["tune", "--agent", mock_agent_file, "--dataset", "missing.jsonl"]) + assert result.exit_code != 0 + # Dataset.from_jsonl raises FileNotFoundError + # ClickException format: Error: ... + assert "missing.jsonl" in result.output or "File not found" in result.output + + +def test_tune_fail_dataset_invalid_ext(runner: CliRunner, mock_agent_file: str) -> None: + with patch("coreason_optimizer.main.load_agent_from_path"): + result = runner.invoke(cli, ["tune", "--agent", mock_agent_file, "--dataset", "invalid.txt"]) + assert result.exit_code != 0 + assert "Unsupported file format" in result.output + + +def test_tune_fail_client_init(runner: CliRunner, mock_agent_file: str, mock_dataset_file: str) -> None: + with patch("coreason_optimizer.main.load_agent_from_path"): + with patch("coreason_optimizer.main.OpenAIClient", side_effect=Exception("Auth Error")): + result = runner.invoke(cli, ["tune", "--agent", mock_agent_file, "--dataset", mock_dataset_file]) + assert result.exit_code != 0 + assert "Failed to initialize OpenAI Client" in result.output + + +def test_tune_compile_fail(runner: CliRunner, mock_agent_file: str, mock_dataset_file: str) -> None: + with patch("coreason_optimizer.main.load_agent_from_path"): + with patch("coreason_optimizer.main.OpenAIClient"): + with patch( + "coreason_optimizer.strategies.mipro.MiproOptimizer.compile", side_effect=Exception("Compile Error") + ): + result = runner.invoke(cli, ["tune", "--agent", mock_agent_file, "--dataset", mock_dataset_file]) + assert result.exit_code != 0 + assert "Optimization failed" in result.output + + +def test_evaluate_success(runner: CliRunner, mock_manifest_file: str, mock_dataset_file: str) -> None: + with patch("coreason_optimizer.main.OpenAIClient") as MockClient: + mock_client = MagicMock() + mock_client.generate.return_value = LLMResponse(content="a1", usage={}) + MockClient.return_value = mock_client + + result = runner.invoke( + cli, + [ + "evaluate", + "--manifest", + mock_manifest_file, + "--dataset", + mock_dataset_file, + ], + ) + + assert result.exit_code == 0 + assert "Evaluation Complete" in result.output + + +def test_evaluate_fail_manifest(runner: CliRunner) -> None: + result = runner.invoke(cli, ["evaluate", "--manifest", "missing.json", "--dataset", "d.jsonl"]) + assert result.exit_code != 0 + assert "Failed to load manifest" in result.output + + +def test_evaluate_fail_dataset(runner: CliRunner, mock_manifest_file: str) -> None: + result = runner.invoke(cli, ["evaluate", "--manifest", mock_manifest_file, "--dataset", "missing.jsonl"]) + assert result.exit_code != 0 + assert "Failed to load dataset" in result.output + + +def test_evaluate_fail_dataset_csv_inference(runner: CliRunner, mock_manifest_file: str, tmp_path: Path) -> None: + # Create a CSV without few shot in manifest to infer columns from (if manifest had no examples) + # But mock_manifest_file HAS examples. So let's create a manifest WITHOUT examples. + + d = tmp_path / "out" + p = d / "manifest_empty.json" + manifest = OptimizedManifest( + agent_id="agent", + base_model="gpt-4", + optimized_instruction="Instr", + few_shot_examples=[], # EMPTY + performance_metric=1.0, + optimization_run_id="run", + ) + with open(p, "w") as f: + f.write(manifest.model_dump_json()) + + csv_path = tmp_path / "data.csv" + csv_path.touch() + + result = runner.invoke(cli, ["evaluate", "--manifest", str(p), "--dataset", str(csv_path)]) + assert result.exit_code != 0 + assert "Cannot infer CSV schema" in result.output + + +def test_evaluate_save_fail(runner: CliRunner, mock_agent_file: str, mock_dataset_file: str, tmp_path: Path) -> None: + # Testing save failure in tune actually + with patch("coreason_optimizer.main.load_agent_from_path"): + with patch("coreason_optimizer.main.OpenAIClient"): + with patch("coreason_optimizer.strategies.bootstrap.BootstrapFewShot.compile") as mock_compile: + mock_compile.return_value = OptimizedManifest( + agent_id="test", + base_model="m", + optimized_instruction="i", + few_shot_examples=[], + performance_metric=1, + optimization_run_id="1", + ) + # Output to invalid path (directory) + result = runner.invoke( + cli, + [ + "tune", + "--agent", + mock_agent_file, + "--dataset", + mock_dataset_file, + "--strategy", + "bootstrap", + "--output", + str(tmp_path), + ], + ) + assert result.exit_code != 0 + # Errno 21 Is a directory, or File exists (Mac), or Permission denied + assert ( + "Is a directory" in result.output + or "Permission denied" in result.output + or "File exists" in result.output + or "Failed to save manifest" in result.output + ) + + +def test_evaluate_metric_error(runner: CliRunner, mock_manifest_file: str, mock_dataset_file: str) -> None: + with patch("coreason_optimizer.main.OpenAIClient"): + result = runner.invoke( + cli, ["evaluate", "--manifest", mock_manifest_file, "--dataset", mock_dataset_file, "--metric", "unknown"] + ) + assert result.exit_code != 0 + assert "Unknown metric" in result.output + + +def test_evaluate_client_init_fail(runner: CliRunner, mock_manifest_file: str, mock_dataset_file: str) -> None: + with patch("coreason_optimizer.main.OpenAIClient", side_effect=Exception("Client Error")): + result = runner.invoke(cli, ["evaluate", "--manifest", mock_manifest_file, "--dataset", mock_dataset_file]) + assert result.exit_code != 0 + assert "Failed to initialize OpenAI Client" in result.output diff --git a/tests/test_main_cli_coverage.py b/tests/test_main_cli_coverage.py new file mode 100644 index 0000000..ff281fa --- /dev/null +++ b/tests/test_main_cli_coverage.py @@ -0,0 +1,268 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +import json +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from click.testing import CliRunner + +from coreason_optimizer.core.interfaces import Construct, LLMResponse +from coreason_optimizer.core.models import OptimizedManifest, TrainingExample +from coreason_optimizer.main import cli + + +@pytest.fixture +def runner() -> CliRunner: + return CliRunner() + + +@pytest.fixture +def mock_agent_file(tmp_path: Path) -> str: + d = tmp_path / "agents" + d.mkdir() + p = d / "test_agent.py" + p.touch() + return str(p) + + +@pytest.fixture +def mock_dataset_file(tmp_path: Path) -> str: + d = tmp_path / "data" + d.mkdir() + p = d / "data.jsonl" + with open(p, "w") as f: + f.write(json.dumps({"input": "q1", "reference": "a1"}) + "\n") + return str(p) + + +@pytest.fixture +def mock_dataset_csv(tmp_path: Path) -> str: + d = tmp_path / "data" + d.mkdir() + p = d / "data.csv" + with open(p, "w") as f: + f.write("input,reference\nq1,a1\n") + return str(p) + + +@pytest.fixture +def mock_manifest_file(tmp_path: Path) -> str: + d = tmp_path / "out" + d.mkdir() + p = d / "manifest.json" + manifest = OptimizedManifest( + agent_id="agent", + base_model="gpt-4", + optimized_instruction="Instr", + few_shot_examples=[TrainingExample(inputs={"q": "ex"}, reference="ref")], + performance_metric=1.0, + optimization_run_id="run", + ) + with open(p, "w") as f: + f.write(manifest.model_dump_json()) + return str(p) + + +def test_tune_metric_error(runner: CliRunner, mock_agent_file: str, mock_dataset_file: str) -> None: + """Test ValueError when getting metric in tune command.""" + with patch("coreason_optimizer.main.load_agent_from_path"): + with patch("coreason_optimizer.main.OpenAIClient"): + with patch("coreason_optimizer.main.MetricFactory.get", side_effect=ValueError("Invalid Metric")): + # Removed --metric option as it doesn't exist in tune command + result = runner.invoke(cli, ["tune", "--agent", mock_agent_file, "--dataset", mock_dataset_file]) + assert result.exit_code != 0 + assert "Invalid Metric" in result.output + + +def test_tune_embedding_init_error(runner: CliRunner, mock_agent_file: str, mock_dataset_file: str) -> None: + """Test exception when initializing embedding client in tune command.""" + with patch("coreason_optimizer.main.load_agent_from_path"): + with patch("coreason_optimizer.main.OpenAIClient"): + with patch("coreason_optimizer.main.OpenAIEmbeddingClient", side_effect=Exception("Embed Init Fail")): + result = runner.invoke( + cli, ["tune", "--agent", mock_agent_file, "--dataset", mock_dataset_file, "--selector", "semantic"] + ) + assert result.exit_code != 0 + assert "Failed to initialize OpenAI Embedding Client" in result.output + + +def test_tune_compile_exception(runner: CliRunner, mock_agent_file: str, mock_dataset_file: str) -> None: + """Test generic exception during optimization compilation.""" + with patch("coreason_optimizer.main.load_agent_from_path"): + with patch("coreason_optimizer.main.OpenAIClient"): + # Mock MiproOptimizer.compile to raise + with patch( + "coreason_optimizer.strategies.mipro.MiproOptimizer.compile", side_effect=Exception("Compile Error") + ): + result = runner.invoke(cli, ["tune", "--agent", mock_agent_file, "--dataset", mock_dataset_file]) + assert result.exit_code != 0 + assert "Optimization failed: Compile Error" in result.output + + +def test_tune_save_exception(runner: CliRunner, mock_agent_file: str, mock_dataset_file: str, tmp_path: Path) -> None: + """Test exception when saving the manifest.""" + with patch("coreason_optimizer.main.load_agent_from_path"): + with patch("coreason_optimizer.main.OpenAIClient"): + with patch("coreason_optimizer.strategies.bootstrap.BootstrapFewShot.compile") as mock_compile: + mock_compile.return_value = OptimizedManifest( + agent_id="test", + base_model="m", + optimized_instruction="i", + few_shot_examples=[], + performance_metric=1, + optimization_run_id="1", + ) + # Output to a directory which causes IsADirectoryError (mapped to Exception) + result = runner.invoke( + cli, + [ + "tune", + "--agent", + mock_agent_file, + "--dataset", + mock_dataset_file, + "--strategy", + "bootstrap", + "--output", + str(tmp_path), + ], + ) + assert result.exit_code != 0 + # The message format depends on OS, but click exception wraps it + assert ( + "Is a directory" in result.output + or "Permission denied" in result.output + or "Failed to save manifest" in result.output + ) + + +def test_evaluate_client_init_exception(runner: CliRunner, mock_manifest_file: str, mock_dataset_file: str) -> None: + """Test exception when initializing OpenAI client in evaluate command.""" + with patch("coreason_optimizer.main.OpenAIClient", side_effect=Exception("Client Init Fail")): + result = runner.invoke(cli, ["evaluate", "--manifest", mock_manifest_file, "--dataset", mock_dataset_file]) + assert result.exit_code != 0 + assert "Failed to initialize OpenAI Client" in result.output + + +def test_evaluate_metric_exception(runner: CliRunner, mock_manifest_file: str, mock_dataset_file: str) -> None: + """Test exception when getting metric in evaluate command.""" + with patch("coreason_optimizer.main.OpenAIClient"): + with patch("coreason_optimizer.main.MetricFactory.get", side_effect=ValueError("Bad Metric")): + result = runner.invoke( + cli, ["evaluate", "--manifest", mock_manifest_file, "--dataset", mock_dataset_file, "--metric", "bad"] + ) + assert result.exit_code != 0 + assert "Bad Metric" in result.output + + +def test_evaluate_manifest_load_error(runner: CliRunner, mock_dataset_file: str) -> None: + """Test exception when loading manifest fails.""" + # Pass a non-existent file + result = runner.invoke(cli, ["evaluate", "--manifest", "nonexistent.json", "--dataset", mock_dataset_file]) + assert result.exit_code != 0 + assert "Failed to load manifest" in result.output + + +def test_evaluate_dataset_load_error(runner: CliRunner, mock_manifest_file: str) -> None: + """Test exception when loading dataset fails.""" + result = runner.invoke(cli, ["evaluate", "--manifest", mock_manifest_file, "--dataset", "nonexistent.jsonl"]) + assert result.exit_code != 0 + assert "Failed to load dataset" in result.output + + +def test_evaluate_example_error(runner: CliRunner, mock_manifest_file: str, mock_dataset_file: str) -> None: + """Test exception handling during per-example evaluation.""" + with patch("coreason_optimizer.main.OpenAIClient") as MockClient: + mock_client = MagicMock() + # Raise exception on generate + mock_client.generate.side_effect = Exception("Generation Error") + MockClient.return_value = mock_client + + result = runner.invoke(cli, ["evaluate", "--manifest", mock_manifest_file, "--dataset", mock_dataset_file]) + + # Should finish but with 0 score (or partial) + assert result.exit_code == 0 + assert "Evaluation Complete" in result.output + + +def test_tune_csv_dataset(runner: CliRunner, mock_agent_file: str, mock_dataset_csv: str, tmp_path: Path) -> None: + """Test loading CSV dataset in tune.""" + output_file = str(tmp_path / "out_csv.json") + with patch("coreason_optimizer.main.load_agent_from_path") as mock_load: + # Construct needs to define inputs + construct = MagicMock(spec=Construct) + construct.inputs = ["input"] + mock_load.return_value = construct + + with patch("coreason_optimizer.main.OpenAIClient"): + with patch("coreason_optimizer.strategies.mipro.MiproOptimizer.compile") as mock_compile: + mock_compile.return_value = OptimizedManifest( + agent_id="test", + base_model="m", + optimized_instruction="i", + few_shot_examples=[], + performance_metric=1, + optimization_run_id="1", + ) + result = runner.invoke( + cli, ["tune", "--agent", mock_agent_file, "--dataset", mock_dataset_csv, "--output", output_file] + ) + assert result.exit_code == 0 + + +def test_tune_options_overrides( + runner: CliRunner, mock_agent_file: str, mock_dataset_file: str, tmp_path: Path +) -> None: + """Test overriding config options via CLI.""" + output_file = str(tmp_path / "out_opts.json") + with patch("coreason_optimizer.main.load_agent_from_path"): + with patch("coreason_optimizer.main.OpenAIClient"): + with patch("coreason_optimizer.strategies.mipro.MiproOptimizer.__init__", return_value=None) as mock_init: + # We mock init to check config passed + with patch("coreason_optimizer.strategies.mipro.MiproOptimizer.compile"): + runner.invoke( + cli, + [ + "tune", + "--agent", + mock_agent_file, + "--dataset", + mock_dataset_file, + "--base-model", + "gpt-3.5", + "--epochs", + "5", + "--demos", + "2", + "--output", + output_file, + ], + ) + # Retrieve the config object passed to MiproOptimizer + args, kwargs = mock_init.call_args + config = args[2] # 3rd positional arg is config + assert config.target_model == "gpt-3.5" + assert config.max_rounds == 5 + assert config.max_bootstrapped_demos == 2 + + +def test_evaluate_csv_dataset(runner: CliRunner, mock_manifest_file: str, mock_dataset_csv: str) -> None: + """Test evaluating with a CSV dataset (inferring schema from manifest).""" + with patch("coreason_optimizer.main.OpenAIClient") as MockClient: + mock_client = MagicMock() + mock_client.generate.return_value = LLMResponse(content="a1", usage={}) + MockClient.return_value = mock_client + + result = runner.invoke(cli, ["evaluate", "--manifest", mock_manifest_file, "--dataset", mock_dataset_csv]) + + assert result.exit_code == 0 + assert "Evaluation Complete" in result.output diff --git a/tests/test_readme_usage.py b/tests/test_readme_usage.py new file mode 100644 index 0000000..68a90b3 --- /dev/null +++ b/tests/test_readme_usage.py @@ -0,0 +1,138 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +from pathlib import Path +from typing import cast +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from coreason_optimizer.core.client import OpenAIClient +from coreason_optimizer.core.config import OptimizerConfig +from coreason_optimizer.core.interfaces import Construct +from coreason_optimizer.core.metrics import MetricFactory +from coreason_optimizer.core.models import OptimizedManifest +from coreason_optimizer.data.loader import Dataset +from coreason_optimizer.strategies.mipro import MiproOptimizer + + +class MockAgent: + """A valid agent for testing.""" + + system_prompt = "You are a helper." + inputs = ["question"] + outputs = ["answer"] + + +def test_readme_library_usage_flow(tmp_path: Path) -> None: + """ + Verify the code snippet in 'Library Usage' section of README.md works. + """ + # Setup dummy data + train_csv = tmp_path / "train.csv" + train_csv.write_text("question,answer\nq1,a1\n", encoding="utf-8") + + val_csv = tmp_path / "val.csv" + val_csv.write_text("question,answer\nq2,a2\n", encoding="utf-8") + + # 1. Configure + config = OptimizerConfig( + target_model="gpt-4o", + budget_limit_usd=5.0, + max_rounds=1, # Reduced for test speed + ) + + # 2. Initialize Components + # Mock AsyncOpenAI because OpenAIClient uses it internally + with patch("coreason_optimizer.core.client.AsyncOpenAI") as MockAsyncOpenAI: + mock_client_instance = AsyncMock() + MockAsyncOpenAI.return_value = mock_client_instance + + # Mock generate response for Mipro diagnosis/eval + mock_resp = MagicMock() + mock_resp.content = "a1" + mock_resp.usage.prompt_tokens = 10 + mock_resp.usage.completion_tokens = 10 + mock_resp.usage.total_tokens = 20 + + # Ensure create is async mock + mock_client_instance.chat.completions.create = AsyncMock() + + # Setup return value of the async call + # The result of await create() should be the response object + mock_response_object = MagicMock() + mock_response_object.choices = [MagicMock(message=MagicMock(content="a1"))] + mock_response_object.usage = mock_resp.usage + + mock_client_instance.chat.completions.create.return_value = mock_response_object + + # Mock close + mock_client_instance.close = AsyncMock() + + client = OpenAIClient() + metric = MetricFactory.get("exact_match") + + optimizer = MiproOptimizer(client, metric, config) + + # 3. Load Data + train_set = Dataset.from_csv(train_csv, input_cols=["question"], reference_col="answer") + val_set = Dataset.from_csv(val_csv, input_cols=["question"], reference_col="answer") + + # 4. Compile + agent = MockAgent() + manifest = optimizer.compile(agent, list(train_set), list(val_set)) + + # 5. Save (Verify object) + assert isinstance(manifest, OptimizedManifest) + assert manifest.base_model == "gpt-4o" + assert manifest.performance_metric is not None + + +def test_invalid_agent_protocol() -> None: + """Test using an agent that does not satisfy the Construct protocol.""" + + class InvalidAgent: + # Missing inputs/outputs + system_prompt = "broken" + + config = OptimizerConfig() + with patch("coreason_optimizer.core.client.AsyncOpenAI"): + client = OpenAIClient() + metric = MetricFactory.get("exact_match") + optimizer = MiproOptimizer(client, metric, config) + + # MiproOptimizer.compile calls methods on agent. + # It handles missing attributes gracefully or crashes depending on usage. + # Since we use mocks and Empty dataset (implicit in diagnosis if we don't pass one? + # No, diagnosis iterates trainset). We need to pass a trainset to trigger agent usage. + + # Cast to Construct to bypass mypy check since we are testing runtime behavior + agent = cast(Construct, InvalidAgent()) + manifest = optimizer.compile(agent, [], []) + assert isinstance(manifest, OptimizedManifest) + + +def test_unknown_metric() -> None: + """Test requesting an unknown metric.""" + with pytest.raises(ValueError, match="Unknown metric: unknown"): + MetricFactory.get("unknown") + + +def test_mipro_missing_embeddings_for_semantic() -> None: + """Test error when semantic selector is requested but no embedding provider is given.""" + config = OptimizerConfig(selector_type="semantic") + + with patch("coreason_optimizer.core.client.AsyncOpenAI"): + client = OpenAIClient() + metric = MetricFactory.get("exact_match") + + # Should raise ValueError in __init__ + with pytest.raises(ValueError, match="Embedding provider is required for semantic selection"): + MiproOptimizer(client, metric, config, embedding_provider=None) diff --git a/tests/test_utils_import.py b/tests/test_utils_import.py new file mode 100644 index 0000000..a8e3e00 --- /dev/null +++ b/tests/test_utils_import.py @@ -0,0 +1,149 @@ +# Copyright (c) 2025 CoReason, Inc. +# +# This software is proprietary and dual-licensed. +# Licensed under the Prosperity Public License 3.0 (the "License"). +# A copy of the license is available at https://prosperitylicense.com/versions/3.0.0 +# For details, see the LICENSE file. +# Commercial use beyond a 30-day trial requires a separate license. +# +# Source Code: https://github.com/CoReason-AI/coreason_optimizer + +import sys +from unittest.mock import MagicMock, patch + +import pytest + +from coreason_optimizer.core.interfaces import Construct +from coreason_optimizer.utils.import_utils import load_agent_from_path + + +# Mock Construct +class MockAgent(Construct): + system_prompt: str = "System" + inputs: list[str] = ["input"] + outputs: list[str] = ["output"] + + +def test_load_agent_from_path_success(tmp_path: MagicMock) -> None: + """Test successful loading of an agent from a file.""" + # Create a temporary python file + d = tmp_path / "agents" + d.mkdir() + p = d / "my_agent.py" + p.touch() # Create the file so existence check passes + + with patch("importlib.util.spec_from_file_location") as mock_spec_from_file: + mock_spec = MagicMock() + mock_spec_from_file.return_value = mock_spec + + mock_module = MagicMock() + # Set the agent attribute + mock_module.agent = MockAgent() + + with patch("importlib.util.module_from_spec", return_value=mock_module): + with patch.object(sys, "modules", {}): # Isolate sys.modules + agent = load_agent_from_path(str(p)) + assert isinstance(agent, MockAgent) + assert agent.system_prompt == "System" + + +def test_load_agent_from_path_with_variable(tmp_path: MagicMock) -> None: + """Test loading an agent with a specific variable name.""" + d = tmp_path / "agents" + d.mkdir() + p = d / "my_agent.py" + p.touch() + path_str = f"{p}:my_custom_agent" + + with patch("importlib.util.spec_from_file_location") as mock_spec_from_file: + mock_spec = MagicMock() + mock_spec_from_file.return_value = mock_spec + + mock_module = MagicMock() + mock_module.my_custom_agent = MockAgent() + + with patch("importlib.util.module_from_spec", return_value=mock_module): + agent = load_agent_from_path(path_str) + assert isinstance(agent, MockAgent) + + +def test_load_agent_file_not_found() -> None: + """Test FileNotFoundError.""" + with pytest.raises(FileNotFoundError): + load_agent_from_path("non_existent_file.py") + + +def test_load_agent_module_spec_failure(tmp_path: MagicMock) -> None: + """Test ImportError when spec cannot be loaded.""" + d = tmp_path / "agents" + d.mkdir() + p = d / "bad_agent.py" + p.touch() + + with patch("importlib.util.spec_from_file_location", return_value=None): + with pytest.raises(ImportError, match="Could not load spec"): + load_agent_from_path(str(p)) + + +def test_load_agent_execution_failure(tmp_path: MagicMock) -> None: + """Test ImportError when module execution fails.""" + d = tmp_path / "agents" + d.mkdir() + p = d / "error_agent.py" + p.touch() + + with patch("importlib.util.spec_from_file_location") as mock_spec_from_file: + mock_spec = MagicMock() + mock_spec_from_file.return_value = mock_spec + mock_spec.loader.exec_module.side_effect = Exception("Module Error") + + with patch("importlib.util.module_from_spec", return_value=MagicMock()): + with pytest.raises(ImportError, match="Error executing module"): + load_agent_from_path(str(p)) + + +def test_load_agent_variable_not_found(tmp_path: MagicMock) -> None: + """Test AttributeError when variable is missing.""" + d = tmp_path / "agents" + d.mkdir() + p = d / "no_var_agent.py" + p.touch() + + with patch("importlib.util.spec_from_file_location") as mock_spec_from_file: + mock_spec = MagicMock() + mock_spec_from_file.return_value = mock_spec + + # When module_from_spec returns a mock, hasattr(mock, "agent") is usually True (it creates a new mock). + # We need to configure the mock to NOT have 'agent'. + mock_module = MagicMock() + del mock_module.agent # Explicitly delete it if it auto-created, or use spec + + # Alternative: use a real class instance + class EmptyModule: + pass + + with patch("importlib.util.module_from_spec", return_value=EmptyModule()): + with pytest.raises(AttributeError, match="Variable 'agent' not found"): + load_agent_from_path(str(p)) + + +def test_load_agent_protocol_mismatch(tmp_path: MagicMock) -> None: + """Test TypeError when object does not satisfy protocol.""" + d = tmp_path / "agents" + d.mkdir() + p = d / "bad_protocol.py" + p.touch() + + class BadAgent: + pass # Missing attributes + + with patch("importlib.util.spec_from_file_location") as mock_spec_from_file: + mock_spec = MagicMock() + mock_spec_from_file.return_value = mock_spec + + mock_module = MagicMock() + mock_module.agent = BadAgent() + + with patch("importlib.util.module_from_spec", return_value=mock_module): + with pytest.raises(TypeError, match="does not satisfy Construct protocol"): + load_agent_from_path(str(p))