From 5760593fcd731199139f896addef24a440ec1333 Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 23 Jun 2025 01:52:06 -0500 Subject: [PATCH 01/26] feat: upgrade README generator and update README Rewrite the README generation script using Pydantic and a multi-stage DSPy pipeline for a more robust, context-aware process. The README.md is regenerated with the new script, providing more comprehensive documentation. --- README.md | 266 +++++++++------- scripts/generate_readme.py | 621 +++++++++++++++++++++++-------------- uv.lock | 2 +- 3 files changed, 544 insertions(+), 345 deletions(-) diff --git a/README.md b/README.md index 7f01bca..e30f1b4 100644 --- a/README.md +++ b/README.md @@ -1,185 +1,213 @@ # Robofactor -The robot who refactors: /[^_^]\ +> The robot who refactors: /[^_^]\ -[![PyPI version](https://img.shields.io/pypi/v/robofactor)](https://pypi.org/project/robofactor) +[![PyPI Version](https://badge.fury.io/py/robofactor.svg)](https://pypi.org/project/robofactor/) [![Build Status](https://github.com/ethan-wickstrom/robofactor/actions/workflows/publish.yml/badge.svg)](https://github.com/ethan-wickstrom/robofactor/actions) -[![License](https://img.shields.io/pypi/l/robofactor)](https://github.com/ethan-wickstrom/robofactor) -[![Python versions](https://img.shields.io/pypi/pyversions/robofactor)](https://pypi.org/project/robofactor) +[![License](https://img.shields.io/github/license/ethan-wickstrom/robofactor)](https://github.com/ethan-wickstrom/robofactor/blob/main/LICENSE) -## Table of Contents +**Robofactor** is a DSPy-powered tool designed to analyze, plan, and refactor Python code. It leverages large language models to understand your code and suggest improvements, which are then programmatically verified for correctness and quality before being applied. -- [Overview](#overview) -- [Key Features](#key-features) -- [Installation](#installation) -- [Usage](#usage) -- [How It Works](#how-it-works) -- [Development](#development) -- [Contributing](#contributing) +Robofactor is a command-line tool powered by the [DSPy](https://github.com/stanford-futuredata/dspy) framework, designed to automatically analyze, refactor, and evaluate Python code. By leveraging the structured prompting capabilities of DSPy, it can understand your code, propose improvements, and verify the results. The ultimate goal is to serve as an AI-powered assistant that helps improve the quality, readability, and maintainability of your Python projects. ---- +## ✨ Features -## Overview +- 🤖 **AI-Powered Refactoring**: Leverages `dspy-ai` to analyze, plan, and refactor your Python code, improving readability, style, and structure. +- 🐶 **Self-Refactoring Mode**: Use the `--dog-food` flag to turn Robofactor on itself, continuously improving its own codebase. +- 📝 **In-Place File Writing**: Automatically write the improved code back to the source file with the `--write` option. +- 🔧 **Configurable AI Models**: Easily switch between different LLMs for refactoring tasks (`--task-llm`) and prompt generation (`--prompt-llm`). +- 📊 **Experiment Tracing**: Integrates seamlessly with MLflow to trace refactoring runs, monitor performance, and compare results. +- 🧠 **DSPy Model Optimization**: Force a re-optimization of the underlying DSPy model with the `--optimize` flag to fine-tune the refactoring logic. -Robofactor is a DSPy-powered tool to analyze, plan, and refactor Python code. It leverages a modern stack to programmatically assess and improve code quality through a structured, multi-step process. +## 🚀 Installation -The core technologies driving Robofactor include: +Follow these steps to get Robofactor set up on your local machine. -* **DSPy (`dspy-ai`):** The project is built on the DSPy framework, which provides a structured way to program with language models. It is used to generate refactoring plans and implement code changes. -* **Railway-Oriented Pipelines (`returns`):** The evaluation process is constructed as a robust pipeline using the `returns` library. This allows for a series of checks (syntax, quality, functional correctness) where any failure gracefully halts the process and returns a descriptive error. -* **Code Quality Analysis (`flake8`):** Code quality is programmatically measured using `flake8`, providing objective metrics to evaluate the effectiveness of the refactoring. -* **Rich CLI (`rich`):** All terminal output, from the refactoring process to the final evaluation results, is formatted for clarity and readability using the `rich` library. +### Prerequisites -## Key Features +- Python 3.12 or higher +- [uv](https://github.com/astral-sh/uv) package manager. If you don't have it, you can install it via pip: + ```bash + pip install uv + ``` -* **AI-Powered Refactoring**: Leverages a `CodeRefactor` module built with DSPy (`dspy_modules.py`) to intelligently analyze and generate refactoring suggestions for Python code snippets. -* **Comprehensive Evaluation Pipeline**: Ensures the quality and correctness of refactored code through a multi-stage process (`evaluation.py`). This pipeline includes syntax validation (`check_syntax`), quality scoring using `flake8` and AST analysis (`check_code_quality`), and functional correctness checks against provided test cases (`check_functional_correctness`). -* **Advanced Code Analysis**: Performs deep static analysis of Python code by parsing it into an Abstract Syntax Tree (AST). The `function_extraction.py` module is dedicated to extracting detailed information about functions, decorators, and parameters directly from the source code structure. -* **DSPy Model Optimization**: Features the ability to compile and optimize the underlying DSPy program for improved performance and accuracy. This can be triggered using the `--optimize` flag in the CLI (`main.py`). -* **Interactive CLI**: Provides a user-friendly command-line interface built with `typer`. It uses `rich` to deliver clear, well-formatted, and colorized output for refactoring plans and evaluation results (`main.py`, `ui.py`). -* **MLflow Integration**: Comes with built-in support for experiment tracing using MLflow. Users can configure the MLflow tracking URI and experiment name via CLI arguments (`--mlflow-uri`, `--mlflow-experiment`) to log and monitor refactoring runs (`main.py`). +### Step-by-Step Guide -## Installation +1. **Clone the repository:** -Before you begin, ensure you have Python 3.10 or newer installed on your system. This project uses `uv` for fast and efficient dependency management. + ```bash + git clone https://github.com/ethan-wickstrom/robofactor.git + cd robofactor + ``` -### Standard Installation +2. **Install dependencies:** -To install Robofactor for regular use, clone the repository and run the following command from the project root: + This project uses `uv` to manage dependencies and virtual environments. -```bash -make install -``` + - For a **standard installation** (to use the tool): + + ```bash + uv sync --no-dev + ``` -This command uses `uv` to install the package and its required dependencies. + - For a **development installation** (includes testing and linting tools): + ```bash + uv sync --all-groups + ``` -### Development Installation +## 🚀 Usage -If you plan to contribute to the project, you will need to install the development dependencies, which include tools for testing, linting, and type-checking. Use the following command: +To refactor a Python file, run Robofactor from your command line and provide the path to the file: ```bash -make install-dev +robofactor path/to/your/file.py ``` -This will install all dependencies, including the development-specific ones listed in `pyproject.toml`. - -## Usage +To have Robofactor refactor its own source code (a process often called "dogfooding"), use the `--dog-food` flag: -Robofactor is a command-line tool designed to analyze and refactor a single Python file. +```bash +robofactor --dog-food +``` -To refactor a Python file, run the tool with the path to your script. By default, it performs a dry run, printing the proposed changes to the console without modifying the original file. +By default, Robofactor prints the refactored code to the console without modifying the original file. To write the changes back to the source file, include the `--write` flag: ```bash -robofactor path/to/your/file.py +robofactor path/to/your/file.py --write ``` -### Example Workflow +For a complete list of all available commands and options, see the help text below. -1. **Analyze the Code (Dry Run)** +
+Full CLI Options - Run Robofactor on a script to see the proposed refactoring. The tool will display the original code, the refactoring plan, the refactored code, and an evaluation of the changes. +```bash + Usage: robofactor [OPTIONS] [PATH] + + A DSPy-powered tool to analyze, plan, and refactor Python code. + +╭─ Arguments ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ path [PATH] Path to the Python file to refactor. [default: None] │ +╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ --dog-food Self-refactor the script you are running. │ +│ --write Write the refactored code back to the file. │ +│ --optimize Force re-optimization of the DSPy model. │ +│ --task-llm TEXT Model for the main refactoring task. [default: gemini/gemini-2.5-flash-lite-preview-06-17] │ +│ --prompt-llm TEXT Model for generating prompts during optimization. [default: gemini/gemini-2.5-pro] │ +│ --tracing --no-tracing Enable MLflow tracing. [default: tracing] │ +│ --mlflow-uri TEXT MLflow tracking server URI. [default: http://127.0.0.1:5000] │ +│ --mlflow-experiment TEXT MLflow experiment name. [default: robofactor] │ +│ --install-completion Install completion for the current shell. │ +│ --show-completion Show completion for the current shell, to copy it or customize the installation. │ +│ --help Show this message and exit. │ +╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +``` - ```bash - robofactor src/my_app/utils.py - ``` +
-2. **Apply the Changes** +## ⚙️ How It Works - If you are satisfied with the proposed changes, you can write them back to the original file using the `--write` flag. +Robofactor operates through a systematic, three-stage pipeline to ensure that code is not only refactored but also improved in a safe and verifiable way. - ```bash - robofactor --write src/my_app/utils.py - ``` +### 1. Code Parsing & Analysis -### Command-Line Options +The process begins by deeply understanding the target Python file. Instead of treating the code as plain text, Robofactor uses Python's built-in `ast` (Abstract Syntax Tree) module to parse the source code into a structured tree. -Here are some of the key arguments and options available. The descriptions are based on the output of `robofactor --help`. +- **Module:** `src/robofactor/function_extraction.py` +- **Process:** This module traverses the AST to identify individual functions, their signatures (parameters, decorators, return types), docstrings, and body content. This granular understanding allows the AI to focus its efforts on a specific, well-defined piece of code. -| Argument / Option | Description | -| --- | --- | -| `PATH` | The path to the Python file you want to refactor. | -| `--write` | Write the refactored code back to the original file. | -| `--optimize` | Force re-optimization of the underlying DSPy model. | -| `--dog-food` | A special mode to make Robofactor refactor its own source code. | -| `--task-llm ` | Specify the language model for the main refactoring task. | -| `--tracing / --no-tracing` | Enable or disable MLflow tracing for experiment tracking. | -| `--mlflow-uri ` | Set the MLflow tracking server URI (default: `http://127.0.0.1:5000`). | -| `--mlflow-experiment ` | Set the MLflow experiment name (default: `robofactor`). | +### 2. AI-Powered Refactoring with DSPy -For a complete list of all available options, run: +Once a function is isolated, it's handed over to the AI core for refactoring. Robofactor leverages `dspy-ai`, a framework for programming with language models, to create a robust and optimizable refactoring program. -```bash -robofactor --help -``` +- **Module:** `src/robofactor/dspy_modules.py` +- **Process:** The parsed function code is fed into a compiled DSPy program. This program instructs a Large Language Model (LLM) to rewrite the code with specific goals: improving readability, adding missing type hints, generating comprehensive docstrings, and adhering to Python best practices. + +### 3. Rigorous Evaluation -## How It Works +The AI's suggested refactoring is never trusted blindly. Before any changes are accepted, the new code is subjected to a strict, multi-faceted evaluation pipeline. This pipeline is built using a railway-oriented approach with the `returns` library, ensuring that if any single check fails, the entire process halts safely. -Robofactor follows a structured, multi-stage process to analyze, refactor, and evaluate Python code. The architecture is designed to be robust and transparent, leveraging modern tools for each step. +- **Module:** `src/robofactor/evaluation.py` +- **Process:** The evaluation consists of several automated checks: + 1. **Syntax Check**: The refactored code is parsed again to ensure it is valid Python syntax. + 2. **Code Quality Analysis**: The code is linted using `flake8` to check for style guide violations, logical errors, and code smells. + 3. **Functional Correctness**: The original function's test cases are executed against the refactored code in a sandboxed environment. This critical step verifies that the refactoring did not alter the function's behavior or introduce regressions. -1. **Code Parsing & Extraction** - The process begins by parsing the target Python file. Using Python's built-in `ast` (Abstract Syntax Tree) module, the tool traverses the code's structure. As detailed in `src/robofactor/function_extraction.py`, it identifies every function and extracts comprehensive metadata, including its name, parameters, decorators, and docstring. This creates a structured representation of the code to be refactored. +Only if the refactored code passes all three checks is the process considered a success. -2. **LLM-Powered Refactoring with DSPy** - The extracted function code is then passed to a `dspy.Module`, specifically the `CodeRefactor` class found in `src/robofactor/dspy_modules.py`. This module contains a sophisticated prompt that instructs a Large Language Model (LLM) to analyze the provided code snippet, identify areas for improvement, and generate a refactored version. The LLM's goal is to enhance code quality, readability, and performance while preserving its original functionality. +## 🔧 Configuration -3. **Programmatic Evaluation Pipeline** - Once the LLM returns the refactored code, it undergoes a rigorous, automated evaluation pipeline defined in `src/robofactor/evaluation.py`. This pipeline, built using the `returns` library for robust error handling (railway-oriented programming), consists of several checks: - * **Syntax Check**: Verifies that the generated code is valid Python. - * **Quality Check**: Uses `flake8` to score the code against PEP 8 standards and other common issues. - * **Functional Correctness**: Executes the refactored code against a set of predefined test cases to ensure it still produces the correct output. - If any step fails, the pipeline short-circuits and reports the failure. +You can configure Robofactor's behavior using command-line options, particularly for setting the language models and connecting to an MLflow instance for experiment tracing. -4. **Rich Terminal Display** - Finally, the results of the refactoring and evaluation are presented to the user in the terminal. The `src/robofactor/ui.py` module uses the `rich` library to create clear, well-formatted tables and panels that display the original code, the refactored code, the LLM's reasoning, and the detailed evaluation scores. +### Language Models (LLMs) -## Development +Robofactor uses two distinct language models: one for the primary refactoring task and another, typically more powerful, model for the one-time optimization process that generates effective prompts. -To contribute to Robofactor, you'll need to set up a local development environment. This project uses `uv` for fast dependency management and a `Makefile` to provide convenient shortcuts for common tasks. +- `--task-llm`: Specifies the model used for the core refactoring task. + - **Default**: `gemini/gemini-2.5-flash-lite-preview-06-17` +- `--prompt-llm`: Specifies the model used during the DSPy optimization step (`--optimize`) to generate high-quality prompts. + - **Default**: `gemini/gemini-2.5-pro` -First, clone the repository: +**Example:** ```bash -git clone https://github.com/ethan-wickstrom/robofactor.git -cd robofactor +# Use OpenAI models for both tasks +robofactor --task-llm "openai/gpt-4o-mini" --prompt-llm "openai/gpt-4o" path/to/your/file.py ``` -### Setup +### MLflow Tracing -To install all dependencies, including development tools like `ruff`, `mypy`, and `pytest`, run the following command. This will create a virtual environment and install all required packages. +To monitor and debug the DSPy program's execution, Robofactor integrates with MLflow. Tracing is enabled by default. + +- `--no-tracing`: Use this flag to disable MLflow integration entirely. +- `--mlflow-uri`: Sets the URI for your MLflow tracking server. + - **Default**: `http://127.0.0.1:5000` +- `--mlflow-experiment`: Specifies the name of the MLflow experiment where runs will be logged. + - **Default**: `robofactor` + +**Example:** ```bash -make install-dev +# Run with a custom MLflow server and experiment name +robofactor --mlflow-uri "http://your-mlflow-server:5001" --mlflow-experiment "refactor-audits" path/to/your/file.py + +# Run without any MLflow tracing +robofactor --no-tracing path/to/your/file.py ``` -### Common Development Tasks +## 🛠️ Technology Stack -The `Makefile` includes several targets to streamline the development workflow: +Robofactor is built on a modern stack of Python libraries, leveraging the power of LLMs, robust CLI frameworks, and functional programming principles. -* **Run all checks:** To ensure code quality before committing, run all linters, type-checkers, and tests at once. - ```bash - make check - ``` -* **Run tests:** Execute the test suite using pytest. - ```bash - make test - ``` -* **Linting:** Check for code style issues and automatically apply fixes using Ruff. - ```bash - make lint - ``` -* **Formatting:** Format the code using Ruff Formatter and isort. - ```bash - make format - ``` -* **Type-checking:** Perform static type analysis with mypy. - ```bash - make type-check - ``` +- **[DSPy-AI](https://github.com/stanford-futuredata/dspy)**: The core AI programming model used to create, optimize, and execute the refactoring logic with language models. +- **[Typer](https://typer.tiangolo.com/)**: Powers the clean, user-friendly command-line interface. +- **[Rich](https://github.com/Textualize/rich)**: Provides beautiful and informative terminal output, including progress spinners, tables, and syntax-highlighted code. +- **[MLflow](https://mlflow.org/)**: Tracks and visualizes the refactoring process as experiments, enabling detailed analysis of the LLM's behavior. +- **[Flake8](https://flake8.pycqa.org/en/latest/) & `ast`**: Used for static analysis of Python code, checking for syntax errors, code quality issues, and extracting function metadata. +- **[Returns](https://returns.readthedocs.io/en/latest/)**: Implements a robust, railway-oriented programming pipeline for evaluating refactored code, ensuring each step is handled safely and declaratively. + +## 🧑‍💻 Development + +Contributions are welcome! To set up the development environment, first clone the repository. This project uses `uv` for dependency management. Install all dependencies, including development tools, with: + +```bash +uv sync --all-groups +``` + +### Available Commands -## Contributing +The project includes several helper commands to streamline development, which can be executed with `uv run `: -Contributions are welcome! If you find a bug, have a feature request, or want to contribute to the code, please open an issue on our GitHub repository. +- **`lint`**: Run linting checks. +- **`format`**: Format code with black and isort. +- **`type-check`**: Run mypy type checking. +- **`test`**: Run all tests. +- **`test-unit`**: Run unit tests only. +- **`test-integration`**: Run integration tests only. +- **`test-coverage`**: Run tests and generate an HTML coverage report. +- **`check`**: Run all checks (lint, type-check, test). +- **`readme`**: Generate README.md using DSPy. -- **Issues:** [https://github.com/ethan-wickstrom/robofactor/issues](https://github.com/ethan-wickstrom/robofactor/issues) +## 📜 License -Please check the existing issues to see if your suggestion has already been discussed. +This project is licensed under the Apache Version 2.0 License. See the `LICENSE` file for more details. diff --git a/scripts/generate_readme.py b/scripts/generate_readme.py index 4402b84..0e9e6c8 100644 --- a/scripts/generate_readme.py +++ b/scripts/generate_readme.py @@ -1,22 +1,29 @@ +#!/usr/bin/env python3 +""" +Intelligent README generator for the robofactor project. + +This module uses DSPy with Pydantic integration to analyze the project structure +and generate a comprehensive README based on extracted information rather than assumptions. +""" from __future__ import annotations -import enum -import json +import logging import sys -from dataclasses import asdict, dataclass, is_dataclass from pathlib import Path -from typing import Any +from typing import Protocol import dspy import toml import typer +from pydantic import BaseModel, Field from rich.console import Console from rich.progress import Progress, SpinnerColumn, TextColumn +# Add project root to path for imports try: project_root = Path(__file__).parent.parent.resolve() sys.path.insert(0, str(project_root / "src")) - from robofactor.function_extraction import FunctionInfo, parse_python_source + from robofactor.function_extraction import parse_python_source from robofactor.main import app as cli_app from robofactor.utils import suppress_pydantic_warnings except ImportError as e: @@ -27,112 +34,217 @@ ) sys.exit(1) -# --- Constants --- -PYPROJECT_TOML_FILENAME = "pyproject.toml" -MAKEFILE_FILENAME = "Makefile" -PYPROJECT_PROJECT_KEY = "project" -PYPROJECT_NAME_KEY = "name" -PYPROJECT_DESC_KEY = "description" +# Configure logging +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + +# --- Data Models (Pydantic) --- +class FunctionMetadata(BaseModel): + """Metadata about a function extracted from source code.""" + name: str + file_path: str + docstring: str | None + is_async: bool + decorators: list[str] + parameters: list[str] -# --- Data Structures for Generation Pipeline --- -@dataclass(frozen=True) -class FileAnalysis: - """Immutable representation of a source file's content and structure.""" +class SourceFileAnalysis(BaseModel): + """Analysis of a single source file.""" relative_path: str - structure: tuple[FunctionInfo, ...] + functions: list[FunctionMetadata] + imports: list[str] = Field(default_factory=list) + classes: list[str] = Field(default_factory=list) -@dataclass(frozen=True) -class ReadmeSection: - """Represents a proposed section for the README.""" +class ProjectMetadata(BaseModel): + """Basic project metadata from pyproject.toml.""" + name: str + description: str + version: str | None = None + authors: list[str] = Field(default_factory=list) + dependencies: list[str] = Field(default_factory=list) + dev_dependencies: list[str] = Field(default_factory=list) + homepage: str | None = None + repository: str | None = None + + +class DevelopmentEnvironment(BaseModel): + """Extracted development environment information.""" + package_manager: str = Field(description="The package manager used (e.g., uv, pip, poetry)") + install_command: str = Field(description="Command to install the package") + dev_install_command: str = Field(description="Command to install with dev dependencies") + available_commands: dict[str, str] = Field( + default_factory=dict, + description="Available make/task commands and their descriptions" + ) + python_version: str | None = None + + +class ProjectFeatures(BaseModel): + """High-level features extracted from the project.""" + core_technologies: list[str] = Field(description="Main technologies/libraries used") + cli_capabilities: list[str] = Field(description="CLI commands and options available") + key_modules: dict[str, str] = Field( + description="Key modules and their purposes", + default_factory=dict + ) + testing_framework: str | None = None + code_quality_tools: list[str] = Field(default_factory=list) + +class ExtractedContext(BaseModel): + """Complete extracted context for README generation.""" + metadata: ProjectMetadata + environment: DevelopmentEnvironment + features: ProjectFeatures + source_analyses: list[SourceFileAnalysis] + cli_help_text: str + + +class ReadmeSection(BaseModel): + """A section in the README outline.""" title: str description: str + priority: int = Field(default=5, ge=1, le=10) -@dataclass(frozen=True) -class GeneratedSection: - """Represents a fully generated section with its Markdown content.""" - +class GeneratedSection(BaseModel): + """A generated README section with content.""" title: str content: str -@dataclass(frozen=True) -class ProjectContext: - """Immutable snapshot of the entire project's state for generation.""" +# --- Service Interfaces (Dependency Injection) --- - project_name: str - project_description: str - source_analyses: tuple[FileAnalysis, ...] - config_files: dict[str, str] - cli_help_text: str +class FileReaderProtocol(Protocol): + """Protocol for file reading operations.""" + def read_file(self, path: Path) -> str: ... + def file_exists(self, path: Path) -> bool: ... -# --- Project Analysis Logic --- -class ProjectAnalyzer: - """Handles all file system I/O and static analysis of the project.""" +class CLIRunnerProtocol(Protocol): + """Protocol for running CLI commands.""" + def get_help_text(self) -> str: ... - def __init__(self, root: Path, console: Console): - """Initializes the analyzer.""" - self.root = root - self.console = console - self.source_dir = root / "src" / "robofactor" - def _read_file(self, path: Path) -> str: - """Reads a file, raising a FileNotFoundError on failure.""" +# --- Concrete Service Implementations --- + +class FileReader: + """Handles file system operations.""" + + def read_file(self, path: Path) -> str: + """Read a file's contents.""" try: return path.read_text(encoding="utf-8") except FileNotFoundError: - self.console.print(f"[bold red]Error: File not found at {path}[/]") + logger.error(f"File not found: {path}") raise except Exception as e: - self.console.print(f"[bold red]Error: Failed to read {path}: {e}[/]") + logger.error(f"Failed to read {path}: {e}") raise - def _analyze_source_file(self, path: Path) -> FileAnalysis: - """Parses a Python file to extract its structure.""" - content = self._read_file(path) + def file_exists(self, path: Path) -> bool: + """Check if a file exists.""" + return path.exists() + + +class CLIRunner: + """Handles CLI command execution.""" + + def get_help_text(self) -> str: + """Get the CLI help text.""" try: - # The `parse_python_source` function returns a `Result` container. - # We `unwrap()` it to get the value or propagate the exception on failure. - structure_result = parse_python_source(content, module_name=path.name) - structure_iterator = structure_result.unwrap() - return FileAnalysis( - relative_path=str(path.relative_to(self.root)), - structure=tuple(structure_iterator), - ) + from typer.testing import CliRunner + runner = CliRunner() + result = runner.invoke(cli_app, ["--help"], catch_exceptions=False) + + if result.exit_code != 0: + error_msg = f"CLI failed with exit code {result.exit_code}" + logger.error(error_msg) + raise RuntimeError(error_msg) + + return result.stdout except Exception as e: - self.console.print(f"[bold red]Error: Failed to parse AST for {path}: {e}[/]") + logger.error(f"Failed to get CLI help text: {e}") raise - def get_cli_help_text(self) -> str: - """Captures the --help output from the project's Typer CLI.""" - self.console.print("[dim]Capturing CLI help text...[/dim]") - try: - from typer.testing import CliRunner - runner = CliRunner() - cli_runner_result = runner.invoke(cli_app, ["--help"], catch_exceptions=False) +# --- Project Analyzer --- - if cli_runner_result.exit_code != 0: - error_message = f"CLI command failed with exit code {cli_runner_result.exit_code}:\n{cli_runner_result.stderr or cli_runner_result.stdout}" - raise RuntimeError(error_message) +class ProjectAnalyzer: + """Analyzes project structure and extracts information.""" + + def __init__( + self, + root: Path, + file_reader: FileReaderProtocol, + cli_runner: CLIRunnerProtocol, + console: Console | None = None + ): + self.root = root + self.file_reader = file_reader + self.cli_runner = cli_runner + self.console = console or Console() + self.source_dir = root / "src" / "robofactor" + + def analyze_source_file(self, path: Path) -> SourceFileAnalysis: + """Analyze a Python source file.""" + content = self.file_reader.read_file(path) - return cli_runner_result.stdout + try: + result = parse_python_source(content, module_name=path.name) + functions = list(result.unwrap()) + + # Convert FunctionInfo to our simplified FunctionMetadata + func_metadata = [ + FunctionMetadata( + name=f.name, + file_path=str(path.relative_to(self.root)), + docstring=f.docstring, + is_async=f.is_async, + decorators=[d.name for d in f.decorators], + parameters=[p.name for p in f.parameters] + ) + for f in functions + ] + return SourceFileAnalysis( + relative_path=str(path.relative_to(self.root)), + functions=func_metadata + ) except Exception as e: - self.console.print(f"[bold red]Error: Failed to get CLI help text: {e}[/]") + logger.error(f"Failed to parse {path}: {e}") raise - def analyze(self) -> ProjectContext: - """Performs a full analysis of the project.""" - self.console.print(f"[dim]Analyzing project at: {self.root}[/dim]") + def extract_project_metadata(self) -> ProjectMetadata: + """Extract metadata from pyproject.toml.""" + pyproject_path = self.root / "pyproject.toml" + content = self.file_reader.read_file(pyproject_path) + data = toml.loads(content) + + project = data.get("project", {}) + deps = project.get("dependencies", []) + dev_deps = data.get("dependency-groups", {}).get("dev", []) + urls = project.get("urls", {}) + + return ProjectMetadata( + name=project.get("name", "Unknown"), + description=project.get("description", ""), + version=project.get("version"), + authors=[a.get("name", "") for a in project.get("authors", [])], + dependencies=deps, + dev_dependencies=dev_deps, + homepage=urls.get("Homepage"), + repository=urls.get("Repository") + ) + def analyze_all_source_files(self) -> list[SourceFileAnalysis]: + """Analyze all Python source files.""" py_files = [p for p in self.source_dir.glob("*.py") if p.name != "__init__.py"] - analyses: list[FileAnalysis] = [] + analyses = [] + with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), @@ -140,204 +252,212 @@ def analyze(self) -> ProjectContext: transient=True, ) as progress: task = progress.add_task("Analyzing source files...", total=len(py_files)) + for file_path in py_files: progress.update(task, description=f"Parsing {file_path.name}") - analyses.append(self._analyze_source_file(file_path)) + analyses.append(self.analyze_source_file(file_path)) progress.advance(task) - config_files: dict[str, str] = {} - required_configs = (PYPROJECT_TOML_FILENAME, MAKEFILE_FILENAME) - for filename in required_configs: - try: - config_files[filename] = self._read_file(self.root / filename) - except FileNotFoundError: - self.console.print(f"[yellow]Warning: Config file '{filename}' not found. Skipping.[/yellow]") - - pyproject_data = toml.loads(config_files.get(PYPROJECT_TOML_FILENAME, "")) - project_name = pyproject_data.get(PYPROJECT_PROJECT_KEY, {}).get( - PYPROJECT_NAME_KEY, "Unknown Project" - ) - project_desc = pyproject_data.get(PYPROJECT_PROJECT_KEY, {}).get( - PYPROJECT_DESC_KEY, "No description found." - ) - cli_help_text = self.get_cli_help_text() + return analyses - return ProjectContext( - project_name=project_name, - project_description=project_desc, - source_analyses=tuple(analyses), - config_files=config_files, - cli_help_text=cli_help_text, - ) + def get_cli_help(self) -> str: + """Get CLI help text.""" + self.console.print("[dim]Capturing CLI help text...[/dim]") + return self.cli_runner.get_help_text() -# --- JSON Serialization --- -def _custom_json_encoder(obj: Any) -> Any: - """A custom encoder to handle dataclasses and other special types.""" - if isinstance(obj, enum.Enum): - return obj.value - if is_dataclass(obj) and not isinstance(obj, type): - return asdict(obj) - if isinstance(obj, Path): - return str(obj) - raise TypeError(f"Object of type {type(obj).__name__} is not JSON serializable") +# --- DSPy Signatures with Pydantic --- +class ExtractPackageManager(dspy.Signature): + """Extract the package manager and installation commands from project files.""" -def to_json_string(data: Any) -> str: - """Converts a Python object (including dataclasses) to a JSON string.""" - return json.dumps(data, default=_custom_json_encoder, indent=2) + makefile_content: str = dspy.InputField( + desc="Content of the Makefile" + ) + pyproject_content: str = dspy.InputField( + desc="Content of pyproject.toml" + ) + package_manager: str = dspy.OutputField( + desc="The package manager used (e.g., 'uv', 'pip', 'poetry')" + ) + install_command: str = dspy.OutputField( + desc="The exact command to install the package" + ) + dev_install_command: str = dspy.OutputField( + desc="The exact command to install with dev dependencies" + ) -# --- DSPy Signatures for README Generation --- -class GenerateReadmeOutline(dspy.Signature): - """ - Generate a logical and comprehensive outline for a project's README.md file. - - IMPORTANT: You MUST prioritize information from the provided `project_context` - over your own knowledge. The context contains the ground truth for this project, - including file contents and configurations. - """ - - project_context: str = dspy.InputField( - desc=( - "A JSON object containing the project's ground truth. It includes: " - "'project_name', 'project_description', 'source_analyses' (AST parsing of source files), " - "'cli_help_text' (output of --help), and 'config_files'. The 'config_files' key holds the " - "full content of important files like 'pyproject.toml' and 'Makefile'. " - "Use 'Makefile' for installation and development commands. " - "Use 'pyproject.toml' for dependencies and project metadata." - ) +class ExtractDevelopmentCommands(dspy.Signature): + """Extract available development commands from Makefile.""" + + makefile_content: str = dspy.InputField( + desc="Content of the Makefile" ) - outline: list[dict] = dspy.OutputField( - desc=( - "A list of sections for the README. Each item should be a dictionary with 'title' and 'description' keys. " - "The description must specify what content to include in that section, referencing the ground truth from the project_context." - ) + commands: dict[str, str] = dspy.OutputField( + desc="Dictionary mapping command names to their descriptions" ) -class GenerateSectionContent(dspy.Signature): - """ - Generate the Markdown content for a single section of the README. - - IMPORTANT: You MUST prioritize information from the provided `project_context` - over your own knowledge. Adhere strictly to the file contents provided in the context. - For example, if the Makefile specifies using 'uv', you must use 'uv' in the installation instructions. - """ - - project_context: str = dspy.InputField( - desc=( - "A JSON object containing all analyzed information about the project. This is the ground truth. " - "It includes 'project_name', 'project_description', 'source_analyses', 'cli_help_text', and " - "'config_files' (containing the content of 'pyproject.toml' and 'Makefile')." - ) - ) - section_title: str = dspy.InputField(desc="The title of the section to generate.") - section_description: str = dspy.InputField( - desc="A description of the content that should be in this section, as determined by the outline." +class ExtractProjectFeatures(dspy.Signature): + """Extract key features and technologies from the project.""" + + metadata: ProjectMetadata = dspy.InputField() + source_analyses: list[SourceFileAnalysis] = dspy.InputField() + cli_help_text: str = dspy.InputField() + features: ProjectFeatures = dspy.OutputField() + + +class GenerateReadmeOutline(dspy.Signature): + """Generate a README outline based on extracted context.""" + + context: ExtractedContext = dspy.InputField() + sections: list[ReadmeSection] = dspy.OutputField( + desc="List of sections for the README, ordered by priority" ) - section_content: str = dspy.OutputField( - desc="The fully-formed Markdown content for this specific section, grounded in the provided context." + + +class GenerateSectionContent(dspy.Signature): + """Generate content for a specific README section.""" + + context: ExtractedContext = dspy.InputField() + section: ReadmeSection = dspy.InputField() + content: str = dspy.OutputField( + desc="Markdown content for this section" ) class AssembleReadme(dspy.Signature): - """ - Assemble the final README.md from a list of generated sections. - - Ensure the final output is clean, well-formatted, and includes a table of contents. - """ + """Assemble the final README from generated sections.""" - project_name: str = dspy.InputField(desc="The name of the project.") - project_description: str = dspy.InputField(desc="A one-line description of the project.") - generated_sections: str = dspy.InputField( - desc="A JSON string of a list of generated sections, each with a 'title' and 'content'key." - ) + project_name: str = dspy.InputField() + project_description: str = dspy.InputField() + sections: list[GeneratedSection] = dspy.InputField() readme_content: str = dspy.OutputField( - desc=( - "The complete, final README.md content. It must include a title, the project description, " - "a table of contents, and all the provided sections formatted professionally with Markdown." - ) + desc="Complete README.md content with proper formatting" ) -# --- The Main DSPy Module --- +# --- DSPy Modules --- + +class ContextExtractor(dspy.Module): + """Extracts specific context from project files.""" + + def __init__(self): + super().__init__() + self.package_extractor = dspy.ChainOfThought(ExtractPackageManager) + self.commands_extractor = dspy.ChainOfThought(ExtractDevelopmentCommands) + self.features_extractor = dspy.ChainOfThought(ExtractProjectFeatures) + + def forward( + self, + metadata: ProjectMetadata, + source_analyses: list[SourceFileAnalysis], + makefile_content: str, + pyproject_content: str, + cli_help_text: str, + python_version: str | None = None + ) -> ExtractedContext: + """Extract all context from project files.""" + + # Extract package manager and install commands + pkg_result = self.package_extractor( + makefile_content=makefile_content, + pyproject_content=pyproject_content + ) + + # Extract development commands + cmd_result = self.commands_extractor( + makefile_content=makefile_content + ) + + # Create development environment + environment = DevelopmentEnvironment( + package_manager=pkg_result.package_manager, + install_command=pkg_result.install_command, + dev_install_command=pkg_result.dev_install_command, + available_commands=cmd_result.commands, + python_version=python_version + ) + + # Extract project features + features_result = self.features_extractor( + metadata=metadata, + source_analyses=source_analyses, + cli_help_text=cli_help_text + ) + + return ExtractedContext( + metadata=metadata, + environment=environment, + features=features_result.features, + source_analyses=source_analyses, + cli_help_text=cli_help_text + ) + + class ReadmeGenerator(dspy.Module): - """A DSPy module that orchestrates the entire README generation process.""" + """Generates README content from extracted context.""" def __init__(self): - """Initializes the sub-modules for each step of the generation pipeline.""" super().__init__() self.outline_generator = dspy.ChainOfThought(GenerateReadmeOutline) self.section_generator = dspy.ChainOfThought(GenerateSectionContent) self.assembler = dspy.ChainOfThought(AssembleReadme) - def forward(self, project_context: ProjectContext) -> dspy.Prediction: - """ - Executes the two-stage README generation pipeline. - - Args: - project_context: The analyzed state of the project. + def forward(self, context: ExtractedContext) -> dspy.Prediction: + """Generate complete README from context.""" - Returns: - A dspy.Prediction object containing the final readme_content and - intermediate artifacts for inspection. - """ - context_json = to_json_string(project_context) + # Generate outline + outline_result = self.outline_generator(context=context) + sections = sorted(outline_result.sections, key=lambda s: s.priority) - # Stage 1: Generate the README outline - outline_prediction = self.outline_generator(project_context=context_json) - readme_outline = [ - ReadmeSection(title=s["title"], description=s["description"]) - for s in outline_prediction.outline - ] - - # Stage 2: Generate content for each section in the outline + # Generate content for each section generated_sections = [] - for section in readme_outline: - section_content_prediction = self.section_generator( - project_context=context_json, - section_title=section.title, - section_description=section.description, + for section in sections: + section_result = self.section_generator( + context=context, + section=section ) generated_sections.append( GeneratedSection( title=section.title, - content=section_content_prediction.section_content, + content=section_result.content ) ) - # Stage 3: Assemble the final README - final_prediction = self.assembler( - project_name=project_context.project_name, - project_description=project_context.project_description, - generated_sections=to_json_string(generated_sections), + # Assemble final README + final_result = self.assembler( + project_name=context.metadata.name, + project_description=context.metadata.description, + sections=generated_sections ) return dspy.Prediction( - outline=readme_outline, + outline=sections, generated_sections=generated_sections, - readme_content=final_prediction.readme_content, + readme_content=final_result.readme_content ) -# --- CLI Application --- +# --- Main Application --- + app = typer.Typer( - help="An intelligent, context-aware README generator for the robofactor project.", + help="Intelligent README generator for the robofactor project", add_completion=False, no_args_is_help=True, pretty_exceptions_show_locals=False, ) -def configure_dspy(model_name: str, console: Console) -> None: - """Configures the DSPy framework with the specified language model.""" - console.print(f"[dim]Configuring LLM: [bold]{model_name}[/bold]...[/dim]") +def configure_dspy(model_name: str) -> None: + """Configure DSPy with the specified model.""" + logger.info(f"Configuring DSPy with model: {model_name}") try: llm = dspy.LM(model_name, max_tokens=64000) dspy.configure(lm=llm) except Exception as e: - console.print(f"[bold red]Error: Failed to configure DSPy with model '{model_name}': {e}[/]") + logger.error(f"Failed to configure DSPy: {e}") raise typer.Exit(code=1) @@ -348,7 +468,6 @@ def generate( "--output", "-o", help="Path to write the generated README.md file.", - show_default=True, writable=True, ), model: str = typer.Option( @@ -356,39 +475,91 @@ def generate( "--model", "-m", help="Language model to use for generation.", - show_default=True, + ), + verbose: bool = typer.Option( + False, + "--verbose", + "-v", + help="Enable verbose logging.", ), ) -> None: - """ - Analyzes the project and generates a comprehensive README.md. - """ + """Analyze the project and generate a comprehensive README.""" suppress_pydantic_warnings() + + if verbose: + logging.getLogger().setLevel(logging.DEBUG) + console = Console() console.print("\n[bold cyan]═══ Robofactor README Generator ═══[/bold cyan]\n") try: - configure_dspy(model, console) + # Configure DSPy + configure_dspy(model) + + # Initialize services + file_reader = FileReader() + cli_runner = CLIRunner() + + # Analyze project + console.print("[dim]Analyzing project structure...[/dim]") + analyzer = ProjectAnalyzer(project_root, file_reader, cli_runner, console) + + # Extract metadata + metadata = analyzer.extract_project_metadata() + logger.info(f"Extracted metadata for project: {metadata.name}") + + # Analyze source files + source_analyses = analyzer.analyze_all_source_files() + logger.info(f"Analyzed {len(source_analyses)} source files") + + # Get CLI help + cli_help_text = analyzer.get_cli_help() + + # Read additional files + makefile_content = file_reader.read_file(project_root / "Makefile") + pyproject_content = file_reader.read_file(project_root / "pyproject.toml") + + # Read Python version if available + python_version = None + python_version_file = project_root / ".python-version" + if file_reader.file_exists(python_version_file): + python_version = file_reader.read_file(python_version_file).strip() + + # Extract context + console.print("[bold blue]Extracting project context...[/bold blue]") + context_extractor = ContextExtractor() + context = context_extractor( + metadata=metadata, + source_analyses=source_analyses, + makefile_content=makefile_content, + pyproject_content=pyproject_content, + cli_help_text=cli_help_text, + python_version=python_version + ) - analyzer = ProjectAnalyzer(project_root, console) - project_context = analyzer.analyze() + logger.info(f"Extracted context - Package manager: {context.environment.package_manager}") + logger.info(f"Available commands: {list(context.environment.available_commands.keys())}") - console.print("[bold blue]Starting README generation pipeline...[/bold blue]") + # Generate README + console.print("[bold green]Generating README content...[/bold green]") readme_generator = ReadmeGenerator() + with console.status("[bold green]Synthesizing README with DSPy...[/]", spinner="dots"): - prediction = readme_generator(project_context=project_context) - console.print("[green]✓ Generation pipeline complete.[/green]") + result = readme_generator(context=context) + + console.print("[green]✓ Generation complete.[/green]") + # Write output console.print(f"[dim]Writing output to [bold]{output}[/bold]...[/dim]") - output.write_text(prediction.readme_content, encoding="utf-8") + output.write_text(result.readme_content, encoding="utf-8") + + console.print(f"\n[bold green]✅ README successfully generated at: {output}[/bold green]") except Exception as e: - console.print(f"\n[bold red]❌ An unexpected error occurred:[/bold red]\n{e}") + logger.error(f"Generation failed: {e}", exc_info=True) + console.print(f"\n[bold red]❌ An error occurred:[/bold red]\n{e}") raise typer.Exit(code=1) - console.print( - f"\n[bold green]✅ README successfully generated at: {output}[/bold green]" - ) - if __name__ == "__main__": app() diff --git a/uv.lock b/uv.lock index 8ba797a..9308b15 100644 --- a/uv.lock +++ b/uv.lock @@ -2436,7 +2436,7 @@ wheels = [ [[package]] name = "robofactor" -version = "0.1.0" +version = "0.1.1" source = { editable = "." } dependencies = [ { name = "dspy-ai" }, From 58da311fc54ad3a493093e22cf0544ea3b657391 Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 23 Jun 2025 21:13:04 -0500 Subject: [PATCH 02/26] chore(typings): add DSPy 2.6.19 type stubs for improved type checking Adds comprehensive .pyi type stub files for the DSPy library (version 2.6.19), including all core modules, adapters, clients, datasets, evaluation, experimental features, prediction modules, primitives, propose, retrieve, retrievers, signatures, streaming, teleprompt, and utility modules. These stubs enable static type checking and improved IDE support for DSPy-based projects. The stubs were generated by pyright and cover all public APIs, class methods, and function signatures, facilitating better integration and type safety for downstream users. --- typings/dspy/__init__.pyi | 45 ++++ typings/dspy/__metadata__.pyi | 10 + typings/dspy/adapters/__init__.pyi | 22 ++ typings/dspy/adapters/base.pyi | 207 ++++++++++++++++ typings/dspy/adapters/chat_adapter.pyi | 99 ++++++++ typings/dspy/adapters/json_adapter.pyi | 48 ++++ typings/dspy/adapters/two_step_adapter.pyi | 87 +++++++ typings/dspy/adapters/types/__init__.pyi | 11 + typings/dspy/adapters/types/audio.pyi | 56 +++++ typings/dspy/adapters/types/base_type.pyi | 72 ++++++ typings/dspy/adapters/types/history.pyi | 64 +++++ typings/dspy/adapters/types/image.pyi | 57 +++++ typings/dspy/adapters/types/tool.pyi | 168 +++++++++++++ typings/dspy/adapters/utils.pyi | 52 ++++ typings/dspy/clients/__init__.pyi | 59 +++++ typings/dspy/clients/base_lm.pyi | 77 ++++++ typings/dspy/clients/cache.pyi | 79 ++++++ typings/dspy/clients/databricks.pyi | 45 ++++ typings/dspy/clients/embedding.pyi | 98 ++++++++ typings/dspy/clients/lm.pyi | 89 +++++++ typings/dspy/clients/lm_local.pyi | 56 +++++ typings/dspy/clients/lm_local_arbor.pyi | 77 ++++++ typings/dspy/clients/openai.pyi | 47 ++++ typings/dspy/clients/provider.pyi | 64 +++++ typings/dspy/clients/utils_finetune.pyi | 49 ++++ typings/dspy/datasets/__init__.pyi | 12 + typings/dspy/datasets/alfworld/__init__.pyi | 5 + typings/dspy/datasets/alfworld/alfworld.pyi | 46 ++++ typings/dspy/datasets/colors.pyi | 12 + typings/dspy/datasets/dataloader.pyi | 47 ++++ typings/dspy/datasets/dataset.pyi | 38 +++ typings/dspy/datasets/hotpotqa.pyi | 14 ++ typings/dspy/datasets/math.pyi | 10 + typings/dspy/dsp/__init__.pyi | 3 + typings/dspy/dsp/colbertv2.pyi | 58 +++++ typings/dspy/dsp/utils/__init__.pyi | 8 + typings/dspy/dsp/utils/dpr.pyi | 121 +++++++++ typings/dspy/dsp/utils/metrics.pyi | 20 ++ typings/dspy/dsp/utils/settings.pyi | 71 ++++++ typings/dspy/dsp/utils/utils.pyi | 80 ++++++ typings/dspy/evaluate/__init__.pyi | 18 ++ typings/dspy/evaluate/auto_evaluation.pyi | 73 ++++++ typings/dspy/evaluate/evaluate.pyi | 133 ++++++++++ typings/dspy/evaluate/metrics.pyi | 8 + typings/dspy/experimental/__init__.pyi | 7 + typings/dspy/experimental/module_graph.pyi | 31 +++ .../experimental/synthesizer/__init__.pyi | 8 + .../dspy/experimental/synthesizer/config.pyi | 17 ++ .../synthesizer/instruction_suffixes.pyi | 6 + .../experimental/synthesizer/signatures.pyi | 48 ++++ .../experimental/synthesizer/synthesizer.pyi | 21 ++ .../dspy/experimental/synthesizer/utils.pyi | 8 + typings/dspy/experimental/synthetic_data.pyi | 28 +++ typings/dspy/predict/__init__.pyi | 32 +++ typings/dspy/predict/aggregation.pyi | 13 + typings/dspy/predict/avatar/__init__.pyi | 7 + typings/dspy/predict/avatar/avatar.pyi | 12 + typings/dspy/predict/avatar/models.pyi | 24 ++ typings/dspy/predict/avatar/signatures.pyi | 17 ++ typings/dspy/predict/best_of_n.pyi | 52 ++++ typings/dspy/predict/chain_of_thought.pyi | 31 +++ .../predict/chain_of_thought_with_hint.pyi | 9 + typings/dspy/predict/code_act.pyi | 42 ++++ typings/dspy/predict/knn.pyi | 42 ++++ .../dspy/predict/multi_chain_comparison.pyi | 9 + typings/dspy/predict/parallel.pyi | 21 ++ typings/dspy/predict/parameter.pyi | 6 + typings/dspy/predict/predict.pyi | 47 ++++ typings/dspy/predict/program_of_thought.pyi | 34 +++ typings/dspy/predict/react.pyi | 26 ++ typings/dspy/predict/refine.pyi | 86 +++++++ typings/dspy/predict/retry.pyi | 3 + typings/dspy/primitives/__init__.pyi | 21 ++ typings/dspy/primitives/assertions.pyi | 3 + typings/dspy/primitives/example.pyi | 45 ++++ typings/dspy/primitives/module.pyi | 84 +++++++ typings/dspy/primitives/prediction.pyi | 50 ++++ typings/dspy/primitives/program.pyi | 63 +++++ .../dspy/primitives/python_interpreter.pyi | 36 +++ typings/dspy/propose/__init__.pyi | 7 + .../propose/dataset_summary_generator.pyi | 39 +++ typings/dspy/propose/grounded_proposer.pyi | 91 +++++++ typings/dspy/propose/propose_base.pyi | 13 + typings/dspy/propose/utils.pyi | 20 ++ typings/dspy/retrieve/__init__.pyi | 7 + typings/dspy/retrieve/azureaisearch_rm.pyi | 231 ++++++++++++++++++ typings/dspy/retrieve/chromadb_rm.pyi | 79 ++++++ typings/dspy/retrieve/clarifai_rm.pyi | 53 ++++ typings/dspy/retrieve/databricks_rm.pyi | 149 +++++++++++ typings/dspy/retrieve/deeplake_rm.pyi | 55 +++++ typings/dspy/retrieve/epsilla_rm.pyi | 21 ++ typings/dspy/retrieve/faiss_rm.pyi | 3 + typings/dspy/retrieve/falkordb_rm.pyi | 87 +++++++ typings/dspy/retrieve/lancedb_rm.pyi | 58 +++++ typings/dspy/retrieve/llama_index_rm.pyi | 58 +++++ typings/dspy/retrieve/marqo_rm.pyi | 57 +++++ typings/dspy/retrieve/milvus_rm.pyi | 64 +++++ typings/dspy/retrieve/mongodb_atlas_rm.pyi | 34 +++ typings/dspy/retrieve/my_scale_rm.pyi | 3 + typings/dspy/retrieve/neo4j_rm.pyi | 78 ++++++ typings/dspy/retrieve/pgvector_rm.pyi | 81 ++++++ typings/dspy/retrieve/pinecone_rm.pyi | 70 ++++++ typings/dspy/retrieve/qdrant_rm.pyi | 3 + typings/dspy/retrieve/ragatouille_rm.pyi | 37 +++ typings/dspy/retrieve/retrieve.pyi | 29 +++ typings/dspy/retrieve/snowflake_rm.pyi | 104 ++++++++ typings/dspy/retrieve/vectara_rm.pyi | 57 +++++ typings/dspy/retrieve/watson_discovery_rm.pyi | 43 ++++ typings/dspy/retrieve/weaviate_rm.pyi | 69 ++++++ typings/dspy/retrieve/you_rm.pyi | 40 +++ typings/dspy/retrievers/__init__.pyi | 7 + typings/dspy/retrievers/embeddings.pyi | 21 ++ typings/dspy/signatures/__init__.pyi | 25 ++ typings/dspy/signatures/field.pyi | 32 +++ typings/dspy/signatures/signature.pyi | 153 ++++++++++++ typings/dspy/signatures/utils.pyi | 8 + typings/dspy/streaming/__init__.pyi | 17 ++ typings/dspy/streaming/messages.pyi | 89 +++++++ typings/dspy/streaming/streamify.pyi | 164 +++++++++++++ typings/dspy/streaming/streaming_listener.pyi | 46 ++++ typings/dspy/teleprompt/__init__.pyi | 34 +++ typings/dspy/teleprompt/avatar_optimizer.pyi | 64 +++++ typings/dspy/teleprompt/bettertogether.pyi | 23 ++ typings/dspy/teleprompt/bootstrap.pyi | 42 ++++ .../dspy/teleprompt/bootstrap_finetune.pyi | 75 ++++++ typings/dspy/teleprompt/copro_optimizer.pyi | 50 ++++ typings/dspy/teleprompt/ensemble.pyi | 13 + typings/dspy/teleprompt/grpo.pyi | 72 ++++++ typings/dspy/teleprompt/infer_rules.pyi | 30 +++ typings/dspy/teleprompt/knn_fewshot.pyi | 54 ++++ .../dspy/teleprompt/mipro_optimizer_v2.pyi | 61 +++++ typings/dspy/teleprompt/random_search.pyi | 23 ++ typings/dspy/teleprompt/signature_opt.pyi | 18 ++ typings/dspy/teleprompt/simba.pyi | 51 ++++ typings/dspy/teleprompt/simba_utils.pyi | 55 +++++ typings/dspy/teleprompt/teleprompt.pyi | 40 +++ typings/dspy/teleprompt/teleprompt_optuna.pyi | 23 ++ typings/dspy/teleprompt/utils.pyi | 103 ++++++++ typings/dspy/teleprompt/vanilla.pyi | 9 + typings/dspy/utils/__init__.pyi | 27 ++ typings/dspy/utils/asyncify.pyi | 29 +++ typings/dspy/utils/caching.pyi | 10 + typings/dspy/utils/callback.pyi | 211 ++++++++++++++++ typings/dspy/utils/dummies.pyi | 84 +++++++ typings/dspy/utils/exceptions.pyi | 17 ++ typings/dspy/utils/inspect_history.pyi | 7 + typings/dspy/utils/langchain_tool.pyi | 23 ++ typings/dspy/utils/logging_utils.pyi | 45 ++++ typings/dspy/utils/mcp.pyi | 21 ++ typings/dspy/utils/parallelizer.pyi | 25 ++ typings/dspy/utils/saving.pyi | 20 ++ typings/dspy/utils/unbatchify.pyi | 58 +++++ typings/dspy/utils/usage_tracker.pyi | 24 ++ 153 files changed, 7247 insertions(+) create mode 100644 typings/dspy/__init__.pyi create mode 100644 typings/dspy/__metadata__.pyi create mode 100644 typings/dspy/adapters/__init__.pyi create mode 100644 typings/dspy/adapters/base.pyi create mode 100644 typings/dspy/adapters/chat_adapter.pyi create mode 100644 typings/dspy/adapters/json_adapter.pyi create mode 100644 typings/dspy/adapters/two_step_adapter.pyi create mode 100644 typings/dspy/adapters/types/__init__.pyi create mode 100644 typings/dspy/adapters/types/audio.pyi create mode 100644 typings/dspy/adapters/types/base_type.pyi create mode 100644 typings/dspy/adapters/types/history.pyi create mode 100644 typings/dspy/adapters/types/image.pyi create mode 100644 typings/dspy/adapters/types/tool.pyi create mode 100644 typings/dspy/adapters/utils.pyi create mode 100644 typings/dspy/clients/__init__.pyi create mode 100644 typings/dspy/clients/base_lm.pyi create mode 100644 typings/dspy/clients/cache.pyi create mode 100644 typings/dspy/clients/databricks.pyi create mode 100644 typings/dspy/clients/embedding.pyi create mode 100644 typings/dspy/clients/lm.pyi create mode 100644 typings/dspy/clients/lm_local.pyi create mode 100644 typings/dspy/clients/lm_local_arbor.pyi create mode 100644 typings/dspy/clients/openai.pyi create mode 100644 typings/dspy/clients/provider.pyi create mode 100644 typings/dspy/clients/utils_finetune.pyi create mode 100644 typings/dspy/datasets/__init__.pyi create mode 100644 typings/dspy/datasets/alfworld/__init__.pyi create mode 100644 typings/dspy/datasets/alfworld/alfworld.pyi create mode 100644 typings/dspy/datasets/colors.pyi create mode 100644 typings/dspy/datasets/dataloader.pyi create mode 100644 typings/dspy/datasets/dataset.pyi create mode 100644 typings/dspy/datasets/hotpotqa.pyi create mode 100644 typings/dspy/datasets/math.pyi create mode 100644 typings/dspy/dsp/__init__.pyi create mode 100644 typings/dspy/dsp/colbertv2.pyi create mode 100644 typings/dspy/dsp/utils/__init__.pyi create mode 100644 typings/dspy/dsp/utils/dpr.pyi create mode 100644 typings/dspy/dsp/utils/metrics.pyi create mode 100644 typings/dspy/dsp/utils/settings.pyi create mode 100644 typings/dspy/dsp/utils/utils.pyi create mode 100644 typings/dspy/evaluate/__init__.pyi create mode 100644 typings/dspy/evaluate/auto_evaluation.pyi create mode 100644 typings/dspy/evaluate/evaluate.pyi create mode 100644 typings/dspy/evaluate/metrics.pyi create mode 100644 typings/dspy/experimental/__init__.pyi create mode 100644 typings/dspy/experimental/module_graph.pyi create mode 100644 typings/dspy/experimental/synthesizer/__init__.pyi create mode 100644 typings/dspy/experimental/synthesizer/config.pyi create mode 100644 typings/dspy/experimental/synthesizer/instruction_suffixes.pyi create mode 100644 typings/dspy/experimental/synthesizer/signatures.pyi create mode 100644 typings/dspy/experimental/synthesizer/synthesizer.pyi create mode 100644 typings/dspy/experimental/synthesizer/utils.pyi create mode 100644 typings/dspy/experimental/synthetic_data.pyi create mode 100644 typings/dspy/predict/__init__.pyi create mode 100644 typings/dspy/predict/aggregation.pyi create mode 100644 typings/dspy/predict/avatar/__init__.pyi create mode 100644 typings/dspy/predict/avatar/avatar.pyi create mode 100644 typings/dspy/predict/avatar/models.pyi create mode 100644 typings/dspy/predict/avatar/signatures.pyi create mode 100644 typings/dspy/predict/best_of_n.pyi create mode 100644 typings/dspy/predict/chain_of_thought.pyi create mode 100644 typings/dspy/predict/chain_of_thought_with_hint.pyi create mode 100644 typings/dspy/predict/code_act.pyi create mode 100644 typings/dspy/predict/knn.pyi create mode 100644 typings/dspy/predict/multi_chain_comparison.pyi create mode 100644 typings/dspy/predict/parallel.pyi create mode 100644 typings/dspy/predict/parameter.pyi create mode 100644 typings/dspy/predict/predict.pyi create mode 100644 typings/dspy/predict/program_of_thought.pyi create mode 100644 typings/dspy/predict/react.pyi create mode 100644 typings/dspy/predict/refine.pyi create mode 100644 typings/dspy/predict/retry.pyi create mode 100644 typings/dspy/primitives/__init__.pyi create mode 100644 typings/dspy/primitives/assertions.pyi create mode 100644 typings/dspy/primitives/example.pyi create mode 100644 typings/dspy/primitives/module.pyi create mode 100644 typings/dspy/primitives/prediction.pyi create mode 100644 typings/dspy/primitives/program.pyi create mode 100644 typings/dspy/primitives/python_interpreter.pyi create mode 100644 typings/dspy/propose/__init__.pyi create mode 100644 typings/dspy/propose/dataset_summary_generator.pyi create mode 100644 typings/dspy/propose/grounded_proposer.pyi create mode 100644 typings/dspy/propose/propose_base.pyi create mode 100644 typings/dspy/propose/utils.pyi create mode 100644 typings/dspy/retrieve/__init__.pyi create mode 100644 typings/dspy/retrieve/azureaisearch_rm.pyi create mode 100644 typings/dspy/retrieve/chromadb_rm.pyi create mode 100644 typings/dspy/retrieve/clarifai_rm.pyi create mode 100644 typings/dspy/retrieve/databricks_rm.pyi create mode 100644 typings/dspy/retrieve/deeplake_rm.pyi create mode 100644 typings/dspy/retrieve/epsilla_rm.pyi create mode 100644 typings/dspy/retrieve/faiss_rm.pyi create mode 100644 typings/dspy/retrieve/falkordb_rm.pyi create mode 100644 typings/dspy/retrieve/lancedb_rm.pyi create mode 100644 typings/dspy/retrieve/llama_index_rm.pyi create mode 100644 typings/dspy/retrieve/marqo_rm.pyi create mode 100644 typings/dspy/retrieve/milvus_rm.pyi create mode 100644 typings/dspy/retrieve/mongodb_atlas_rm.pyi create mode 100644 typings/dspy/retrieve/my_scale_rm.pyi create mode 100644 typings/dspy/retrieve/neo4j_rm.pyi create mode 100644 typings/dspy/retrieve/pgvector_rm.pyi create mode 100644 typings/dspy/retrieve/pinecone_rm.pyi create mode 100644 typings/dspy/retrieve/qdrant_rm.pyi create mode 100644 typings/dspy/retrieve/ragatouille_rm.pyi create mode 100644 typings/dspy/retrieve/retrieve.pyi create mode 100644 typings/dspy/retrieve/snowflake_rm.pyi create mode 100644 typings/dspy/retrieve/vectara_rm.pyi create mode 100644 typings/dspy/retrieve/watson_discovery_rm.pyi create mode 100644 typings/dspy/retrieve/weaviate_rm.pyi create mode 100644 typings/dspy/retrieve/you_rm.pyi create mode 100644 typings/dspy/retrievers/__init__.pyi create mode 100644 typings/dspy/retrievers/embeddings.pyi create mode 100644 typings/dspy/signatures/__init__.pyi create mode 100644 typings/dspy/signatures/field.pyi create mode 100644 typings/dspy/signatures/signature.pyi create mode 100644 typings/dspy/signatures/utils.pyi create mode 100644 typings/dspy/streaming/__init__.pyi create mode 100644 typings/dspy/streaming/messages.pyi create mode 100644 typings/dspy/streaming/streamify.pyi create mode 100644 typings/dspy/streaming/streaming_listener.pyi create mode 100644 typings/dspy/teleprompt/__init__.pyi create mode 100644 typings/dspy/teleprompt/avatar_optimizer.pyi create mode 100644 typings/dspy/teleprompt/bettertogether.pyi create mode 100644 typings/dspy/teleprompt/bootstrap.pyi create mode 100644 typings/dspy/teleprompt/bootstrap_finetune.pyi create mode 100644 typings/dspy/teleprompt/copro_optimizer.pyi create mode 100644 typings/dspy/teleprompt/ensemble.pyi create mode 100644 typings/dspy/teleprompt/grpo.pyi create mode 100644 typings/dspy/teleprompt/infer_rules.pyi create mode 100644 typings/dspy/teleprompt/knn_fewshot.pyi create mode 100644 typings/dspy/teleprompt/mipro_optimizer_v2.pyi create mode 100644 typings/dspy/teleprompt/random_search.pyi create mode 100644 typings/dspy/teleprompt/signature_opt.pyi create mode 100644 typings/dspy/teleprompt/simba.pyi create mode 100644 typings/dspy/teleprompt/simba_utils.pyi create mode 100644 typings/dspy/teleprompt/teleprompt.pyi create mode 100644 typings/dspy/teleprompt/teleprompt_optuna.pyi create mode 100644 typings/dspy/teleprompt/utils.pyi create mode 100644 typings/dspy/teleprompt/vanilla.pyi create mode 100644 typings/dspy/utils/__init__.pyi create mode 100644 typings/dspy/utils/asyncify.pyi create mode 100644 typings/dspy/utils/caching.pyi create mode 100644 typings/dspy/utils/callback.pyi create mode 100644 typings/dspy/utils/dummies.pyi create mode 100644 typings/dspy/utils/exceptions.pyi create mode 100644 typings/dspy/utils/inspect_history.pyi create mode 100644 typings/dspy/utils/langchain_tool.pyi create mode 100644 typings/dspy/utils/logging_utils.pyi create mode 100644 typings/dspy/utils/mcp.pyi create mode 100644 typings/dspy/utils/parallelizer.pyi create mode 100644 typings/dspy/utils/saving.pyi create mode 100644 typings/dspy/utils/unbatchify.pyi create mode 100644 typings/dspy/utils/usage_tracker.pyi diff --git a/typings/dspy/__init__.pyi b/typings/dspy/__init__.pyi new file mode 100644 index 0000000..6d3ec82 --- /dev/null +++ b/typings/dspy/__init__.pyi @@ -0,0 +1,45 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy.retrievers +from dspy.predict import * +from dspy.primitives import * +from dspy.retrieve import * +from dspy.signatures import * +from dspy.teleprompt import * +from dspy.evaluate import Evaluate +from dspy.clients import * +from dspy.clients import DSPY_CACHE +from dspy.adapters import ( + Adapter, + Audio, + BaseType, + ChatAdapter, + History, + Image, + JSONAdapter, + Tool, + ToolCalls, + TwoStepAdapter, +) +from dspy.utils.logging_utils import configure_dspy_loggers, disable_logging, enable_logging +from dspy.utils.asyncify import asyncify +from dspy.utils.saving import load +from dspy.streaming.streamify import streamify +from dspy.utils.usage_tracker import track_usage +from dspy.dsp.utils.settings import settings +from dspy.dsp.colbertv2 import ColBERTv2 +from .__metadata__ import ( + __author__, + __author_email__, + __description__, + __name__, + __url__, + __version__, +) + +configure = ... +context = ... +BootstrapRS = BootstrapFewShotWithRandomSearch +cache = ... diff --git a/typings/dspy/__metadata__.pyi b/typings/dspy/__metadata__.pyi new file mode 100644 index 0000000..446283b --- /dev/null +++ b/typings/dspy/__metadata__.pyi @@ -0,0 +1,10 @@ +""" +This type stub file was generated by pyright. +""" + +__name__ = ... +__version__ = ... +__description__ = ... +__url__ = ... +__author__ = ... +__author_email__ = ... diff --git a/typings/dspy/adapters/__init__.pyi b/typings/dspy/adapters/__init__.pyi new file mode 100644 index 0000000..50e175a --- /dev/null +++ b/typings/dspy/adapters/__init__.pyi @@ -0,0 +1,22 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.adapters.base import Adapter +from dspy.adapters.chat_adapter import ChatAdapter +from dspy.adapters.json_adapter import JSONAdapter +from dspy.adapters.two_step_adapter import TwoStepAdapter +from dspy.adapters.types import Audio, BaseType, History, Image, Tool, ToolCalls + +__all__ = [ + "Adapter", + "ChatAdapter", + "BaseType", + "History", + "Image", + "Audio", + "JSONAdapter", + "TwoStepAdapter", + "Tool", + "ToolCalls", +] diff --git a/typings/dspy/adapters/base.pyi b/typings/dspy/adapters/base.pyi new file mode 100644 index 0000000..f68b40d --- /dev/null +++ b/typings/dspy/adapters/base.pyi @@ -0,0 +1,207 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Optional, TYPE_CHECKING, Type +from dspy.signatures.signature import Signature +from dspy.utils.callback import BaseCallback +from dspy.clients.lm import LM + +logger = ... +if TYPE_CHECKING: ... + +class Adapter: + def __init__(self, callbacks: Optional[list[BaseCallback]] = ...) -> None: ... + def __init_subclass__(cls, **kwargs) -> None: ... + def __call__( + self, + lm: LM, + lm_kwargs: dict[str, Any], + signature: Type[Signature], + demos: list[dict[str, Any]], + inputs: dict[str, Any], + ) -> list[dict[str, Any]]: ... + async def acall( + self, + lm: LM, + lm_kwargs: dict[str, Any], + signature: Type[Signature], + demos: list[dict[str, Any]], + inputs: dict[str, Any], + ) -> list[dict[str, Any]]: ... + def format( + self, signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any] + ) -> list[dict[str, Any]]: + """Format the input messages for the LM call. + + This method converts the DSPy structured input along with few-shot examples and conversation history into + multiturn messages as expected by the LM. For custom adapters, this method can be overridden to customize + the formatting of the input messages. + + In general we recommend the messages to have the following structure: + ``` + [ + {"role": "system", "content": system_message}, + # Begin few-shot examples + {"role": "user", "content": few_shot_example_1_input}, + {"role": "assistant", "content": few_shot_example_1_output}, + {"role": "user", "content": few_shot_example_2_input}, + {"role": "assistant", "content": few_shot_example_2_output}, + ... + # End few-shot examples + # Begin conversation history + {"role": "user", "content": conversation_history_1_input}, + {"role": "assistant", "content": conversation_history_1_output}, + {"role": "user", "content": conversation_history_2_input}, + {"role": "assistant", "content": conversation_history_2_output}, + ... + # End conversation history + {"role": "user", "content": current_input}, + ] + + And system message should contain the field description, field structure, and task description. + ``` + + + Args: + signature: The DSPy signature for which to format the input messages. + demos: A list of few-shot examples. + inputs: The input arguments to the DSPy module. + + Returns: + A list of multiturn messages as expected by the LM. + """ + ... + + def format_field_description(self, signature: Type[Signature]) -> str: + """Format the field description for the system message. + + This method formats the field description for the system message. It should return a string that contains + the field description for the input fields and the output fields. + + Args: + signature: The DSPy signature for which to format the field description. + + Returns: + A string that contains the field description for the input fields and the output fields. + """ + ... + + def format_field_structure(self, signature: Type[Signature]) -> str: + """Format the field structure for the system message. + + This method formats the field structure for the system message. It should return a string that dictates the + format the input fields should be provided to the LM, and the format the output fields will be in the response. + Refer to the ChatAdapter and JsonAdapter for an example. + + Args: + signature: The DSPy signature for which to format the field structure. + """ + ... + + def format_task_description(self, signature: Type[Signature]) -> str: + """Format the task description for the system message. + + This method formats the task description for the system message. In most cases this is just a thin wrapper + over `signature.instructions`. + + Args: + signature: The DSPy signature of the DSpy module. + + Returns: + A string that describes the task. + """ + ... + + def format_user_message_content( + self, + signature: Type[Signature], + inputs: dict[str, Any], + prefix: str = ..., + suffix: str = ..., + main_request: bool = ..., + ) -> str: + """Format the user message content. + + This method formats the user message content, which can be used in formatting few-shot examples, conversation + history, and the current input. + + Args: + signature: The DSPy signature for which to format the user message content. + inputs: The input arguments to the DSPy module. + prefix: A prefix to the user message content. + suffix: A suffix to the user message content. + + Returns: + A string that contains the user message content. + """ + ... + + def format_assistant_message_content( + self, + signature: Type[Signature], + outputs: dict[str, Any], + missing_field_message: Optional[str] = ..., + ) -> str: + """Format the assistant message content. + + This method formats the assistant message content, which can be used in formatting few-shot examples, + conversation history. + + Args: + signature: The DSPy signature for which to format the assistant message content. + outputs: The output fields to be formatted. + missing_field_message: A message to be used when a field is missing. + + Returns: + A string that contains the assistant message content. + """ + ... + + def format_demos( + self, signature: Type[Signature], demos: list[dict[str, Any]] + ) -> list[dict[str, Any]]: + """Format the few-shot examples. + + This method formats the few-shot examples as multiturn messages. + + Args: + signature: The DSPy signature for which to format the few-shot examples. + demos: A list of few-shot examples, each element is a dictionary with keys of the input and output fields of + the signature. + + Returns: + A list of multiturn messages. + """ + ... + + def format_conversation_history( + self, signature: Type[Signature], history_field_name: str, inputs: dict[str, Any] + ) -> list[dict[str, Any]]: + """Format the conversation history. + + This method formats the conversation history and the current input as multiturn messages. + + Args: + signature: The DSPy signature for which to format the conversation history. + history_field_name: The name of the history field in the signature. + inputs: The input arguments to the DSPy module. + + Returns: + A list of multiturn messages. + """ + ... + + def parse(self, signature: Type[Signature], completion: str) -> dict[str, Any]: + """Parse the LM output into a dictionary of the output fields. + + This method parses the LM output into a dictionary of the output fields. + + Args: + signature: The DSPy signature for which to parse the LM output. + completion: The LM output to be parsed. + + Returns: + A dictionary of the output fields. + """ + ... diff --git a/typings/dspy/adapters/chat_adapter.pyi b/typings/dspy/adapters/chat_adapter.pyi new file mode 100644 index 0000000..8af8283 --- /dev/null +++ b/typings/dspy/adapters/chat_adapter.pyi @@ -0,0 +1,99 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Dict, NamedTuple, Optional, Type +from pydantic.fields import FieldInfo +from dspy.adapters.base import Adapter +from dspy.clients.lm import LM +from dspy.signatures.signature import Signature +from dspy.utils.callback import BaseCallback + +field_header_pattern = ... + +class FieldInfoWithName(NamedTuple): + name: str + info: FieldInfo + ... + +class ChatAdapter(Adapter): + def __init__(self, callbacks: Optional[list[BaseCallback]] = ...) -> None: ... + def __call__( + self, + lm: LM, + lm_kwargs: dict[str, Any], + signature: Type[Signature], + demos: list[dict[str, Any]], + inputs: dict[str, Any], + ) -> list[dict[str, Any]]: ... + def format_field_description(self, signature: Type[Signature]) -> str: ... + def format_field_structure(self, signature: Type[Signature]) -> str: + """ + `ChatAdapter` requires input and output fields to be in their own sections, with section header using markers + `[[ ## field_name ## ]]`. An arbitrary field `completed` ([[ ## completed ## ]]) is added to the end of the + output fields section to indicate the end of the output fields. + """ + ... + + def format_task_description(self, signature: Type[Signature]) -> str: ... + def format_user_message_content( + self, + signature: Type[Signature], + inputs: dict[str, Any], + prefix: str = ..., + suffix: str = ..., + main_request: bool = ..., + ) -> str: ... + def user_message_output_requirements(self, signature: Type[Signature]) -> str: + """Returns a simplified format reminder for the language model. + + In chat-based interactions, language models may lose track of the required output format + as the conversation context grows longer. This method generates a concise reminder of + the expected output structure that can be included in user messages. + + Args: + signature (Type[Signature]): The DSPy signature defining the expected input/output fields. + + Returns: + str: A simplified description of the required output format. + + Note: + This is a more lightweight version of `format_field_structure` specifically designed + for inline reminders within chat messages. + """ + ... + + def format_assistant_message_content( + self, signature: Type[Signature], outputs: dict[str, Any], missing_field_message=... + ) -> str: ... + def parse(self, signature: Type[Signature], completion: str) -> dict[str, Any]: ... + def format_field_with_value(self, fields_with_values: Dict[FieldInfoWithName, Any]) -> str: + """ + Formats the values of the specified fields according to the field's DSPy type (input or output), + annotation (e.g. str, int, etc.), and the type of the value itself. Joins the formatted values + into a single string, which is is a multiline string if there are multiple fields. + + Args: + fields_with_values: A dictionary mapping information about a field to its corresponding + value. + + Returns: + The joined formatted values of the fields, represented as a string + """ + ... + + def format_finetune_data( + self, + signature: Type[Signature], + demos: list[dict[str, Any]], + inputs: dict[str, Any], + outputs: dict[str, Any], + ) -> dict[str, list[Any]]: + """ + Format the call data into finetuning data according to the OpenAI API specifications. + + For the chat adapter, this means formatting the data as a list of messages, where each message is a dictionary + with a "role" and "content" key. The role can be "system", "user", or "assistant". Then, the messages are + wrapped in a dictionary with a "messages" key. + """ + ... diff --git a/typings/dspy/adapters/json_adapter.pyi b/typings/dspy/adapters/json_adapter.pyi new file mode 100644 index 0000000..e29ef42 --- /dev/null +++ b/typings/dspy/adapters/json_adapter.pyi @@ -0,0 +1,48 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Dict, Type +from dspy.adapters.chat_adapter import ChatAdapter, FieldInfoWithName +from dspy.clients.lm import LM +from dspy.signatures.signature import Signature + +logger = ... + +class JSONAdapter(ChatAdapter): + def __call__( + self, + lm: LM, + lm_kwargs: dict[str, Any], + signature: Type[Signature], + demos: list[dict[str, Any]], + inputs: dict[str, Any], + ) -> list[dict[str, Any]]: ... + def format_field_structure(self, signature: Type[Signature]) -> str: ... + def user_message_output_requirements(self, signature: Type[Signature]) -> str: ... + def format_assistant_message_content( + self, signature: Type[Signature], outputs: dict[str, Any], missing_field_message=... + ) -> str: ... + def parse(self, signature: Type[Signature], completion: str) -> dict[str, Any]: ... + def format_field_with_value( + self, fields_with_values: Dict[FieldInfoWithName, Any], role: str = ... + ) -> str: + """ + Formats the values of the specified fields according to the field's DSPy type (input or output), + annotation (e.g. str, int, etc.), and the type of the value itself. Joins the formatted values + into a single string, which is a multiline string if there are multiple fields. + + Args: + fields_with_values: A dictionary mapping information about a field to its corresponding value. + Returns: + The joined formatted values of the fields, represented as a string. + """ + ... + + def format_finetune_data( + self, + signature: Type[Signature], + demos: list[dict[str, Any]], + inputs: dict[str, Any], + outputs: dict[str, Any], + ) -> dict[str, list[Any]]: ... diff --git a/typings/dspy/adapters/two_step_adapter.pyi b/typings/dspy/adapters/two_step_adapter.pyi new file mode 100644 index 0000000..3a26d87 --- /dev/null +++ b/typings/dspy/adapters/two_step_adapter.pyi @@ -0,0 +1,87 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Optional, Type +from dspy.adapters.base import Adapter +from dspy.clients import LM +from dspy.signatures.signature import Signature + +class TwoStepAdapter(Adapter): + """ + A two-stage adapter that: + 1. Uses a simpler, more natural prompt for the main LM + 2. Uses a smaller LM with chat adapter to extract structured data from the response of main LM + This adapter uses a common __call__ logic defined in base Adapter class. + This class is particularly useful when interacting with reasoning models as the main LM since reasoning models + are known to struggle with structured outputs. + + Example: + ``` + import dspy + lm = dspy.LM(model="openai/o3-mini", max_tokens=10000, temperature = 1.0) + adapter = dspy.TwoStepAdapter(dspy.LM("openai/gpt-4o-mini")) + dspy.configure(lm=lm, adapter=adapter) + program = dspy.ChainOfThought("question->answer") + result = program("What is the capital of France?") + print(result) + ``` + """ + def __init__(self, extraction_model: LM) -> None: ... + def format( + self, signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any] + ) -> list[dict[str, Any]]: + """ + Format a prompt for the first stage with the main LM. + This no specific structure is required for the main LM, we customize the format method + instead of format_field_description or format_field_structure. + + Args: + signature: The signature of the original task + demos: A list of demo examples + inputs: The current input + + Returns: + A list of messages to be passed to the main LM. + """ + ... + + def parse(self, signature: Signature, completion: str) -> dict[str, Any]: + """ + Use a smaller LM (extraction_model) with chat adapter to extract structured data + from the raw completion text of the main LM. + + Args: + signature: The signature of the original task + completion: The completion from the main LM + + Returns: + A dictionary containing the extracted structured data. + """ + ... + + async def acall( + self, + lm: LM, + lm_kwargs: dict[str, Any], + signature: Type[Signature], + demos: list[dict[str, Any]], + inputs: dict[str, Any], + ) -> list[dict[str, Any]]: ... + def format_task_description(self, signature: Signature) -> str: + """Create a description of the task based on the signature""" + ... + + def format_user_message_content( + self, + signature: Type[Signature], + inputs: dict[str, Any], + prefix: str = ..., + suffix: str = ..., + ) -> str: ... + def format_assistant_message_content( + self, + signature: Type[Signature], + outputs: dict[str, Any], + missing_field_message: Optional[str] = ..., + ) -> str: ... diff --git a/typings/dspy/adapters/types/__init__.pyi b/typings/dspy/adapters/types/__init__.pyi new file mode 100644 index 0000000..9e00508 --- /dev/null +++ b/typings/dspy/adapters/types/__init__.pyi @@ -0,0 +1,11 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.adapters.types.audio import Audio +from dspy.adapters.types.base_type import BaseType +from dspy.adapters.types.history import History +from dspy.adapters.types.image import Image +from dspy.adapters.types.tool import Tool, ToolCalls + +__all__ = ["History", "Image", "Audio", "BaseType", "Tool", "ToolCalls"] diff --git a/typings/dspy/adapters/types/audio.pyi b/typings/dspy/adapters/types/audio.pyi new file mode 100644 index 0000000..8ebfe4f --- /dev/null +++ b/typings/dspy/adapters/types/audio.pyi @@ -0,0 +1,56 @@ +""" +This type stub file was generated by pyright. +""" + +import pydantic +from typing import Any, Union +from dspy.adapters.types.base_type import BaseType + +SF_AVAILABLE = ... + +class Audio(BaseType): + data: str + audio_format: str + model_config = ... + def format(self) -> list[dict[str, Any]]: ... + @pydantic.model_validator(mode="before") + @classmethod + def validate_input(cls, values: Any) -> Any: + """ + Validate input for Audio, expecting 'data' and 'format' keys in dictionary. + """ + ... + + @classmethod + def from_url(cls, url: str) -> Audio: + """ + Download an audio file from URL and encode it as base64. + """ + ... + + @classmethod + def from_file(cls, file_path: str) -> Audio: + """ + Read local audio file and encode it as base64. + """ + ... + + @classmethod + def from_array(cls, array: Any, sampling_rate: int, format: str = ...) -> Audio: + """ + Process numpy-like array and encode it as base64. Uses sampling rate and audio format for encoding. + """ + ... + + def __str__(self) -> str: ... + def __repr__(self) -> str: ... + +def encode_audio( + audio: Union[str, bytes, dict, Audio, Any], sampling_rate: int = ..., format: str = ... +) -> dict: + """ + Encode audio to a dict with 'data' and 'format'. + + Accepts: local file path, URL, data URI, dict, Audio instance, numpy array, or bytes (with known format). + """ + ... diff --git a/typings/dspy/adapters/types/base_type.pyi b/typings/dspy/adapters/types/base_type.pyi new file mode 100644 index 0000000..78250a8 --- /dev/null +++ b/typings/dspy/adapters/types/base_type.pyi @@ -0,0 +1,72 @@ +""" +This type stub file was generated by pyright. +""" + +import pydantic +from typing import Any, Union + +CUSTOM_TYPE_START_IDENTIFIER = ... +CUSTOM_TYPE_END_IDENTIFIER = ... + +class BaseType(pydantic.BaseModel): + """Base class to support creating custom types for DSPy signatures. + + This is the parent class of DSPy custom types, e.g, dspy.Image. Subclasses must implement the `format` method to + return a list of dictionaries (same as the Array of content parts in the OpenAI API user message's content field). + + Example: + + ```python + class Image(BaseType): + url: str + + def format(self) -> list[dict[str, Any]]: + return [{"type": "image_url", "image_url": {"url": self.url}}] + ``` + """ + def format(self) -> Union[list[dict[str, Any]], str]: ... + @classmethod + def description(cls) -> str: + """Description of the custom type""" + ... + + @classmethod + def extract_custom_type_from_annotation(cls, annotation): # -> list[type[Self]] | list[Any]: + """Extract all custom types from the annotation. + + This is used to extract all custom types from the annotation of a field, while the annotation can + have arbitrary level of nesting. For example, we detect `Tool` is in `list[dict[str, Tool]]`. + """ + ... + + @pydantic.model_serializer() + def serialize_model(self): # -> str: + ... + +def split_message_content_for_custom_types(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Split user message content into a list of content blocks. + + This method splits each user message's content in the `messages` list to be a list of content block, so that + the custom types like `dspy.Image` can be properly formatted for better quality. For example, the split content + may look like below if the user message has a `dspy.Image` object: + + ``` + [ + {"type": "text", "text": "{text_before_image}"}, + {"type": "image_url", "image_url": {"url": "{image_url}"}}, + {"type": "text", "text": "{text_after_image}"}, + ] + ``` + + This is implemented by finding the `<>` and `<>` + in the user message content and splitting the content around them. The `<>` + and `<>` are the reserved identifiers for the custom types as in `dspy.BaseType`. + + Args: + messages: a list of messages sent to the LM. The format is the same as [OpenAI API's messages + format](https://platform.openai.com/docs/guides/chat-completions/response-format). + + Returns: + A list of messages with the content split into a list of content blocks around custom types content. + """ + ... diff --git a/typings/dspy/adapters/types/history.pyi b/typings/dspy/adapters/types/history.pyi new file mode 100644 index 0000000..572f7ed --- /dev/null +++ b/typings/dspy/adapters/types/history.pyi @@ -0,0 +1,64 @@ +""" +This type stub file was generated by pyright. +""" + +import pydantic +from typing import Any + +class History(pydantic.BaseModel): + """Class representing the conversation history. + + The conversation history is a list of messages, each message entity should have keys from the associated signature. + For example, if you have the following signature: + + ``` + class MySignature(dspy.Signature): + question: str = dspy.InputField() + history: dspy.History = dspy.InputField() + answer: str = dspy.OutputField() + ``` + + Then the history should be a list of dictionaries with keys "question" and "answer". + + Example: + ``` + import dspy + + dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini")) + + class MySignature(dspy.Signature): + question: str = dspy.InputField() + history: dspy.History = dspy.InputField() + answer: str = dspy.OutputField() + + history = dspy.History( + messages=[ + {"question": "What is the capital of France?", "answer": "Paris"}, + {"question": "What is the capital of Germany?", "answer": "Berlin"}, + ] + ) + + predict = dspy.Predict(MySignature) + outputs = predict(question="What is the capital of France?", history=history) + ``` + + Example of capturing the conversation history: + ``` + import dspy + + dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini")) + + class MySignature(dspy.Signature): + question: str = dspy.InputField() + history: dspy.History = dspy.InputField() + answer: str = dspy.OutputField() + + predict = dspy.Predict(MySignature) + outputs = predict(question="What is the capital of France?") + history = dspy.History(messages=[{"question": "What is the capital of France?", **outputs}]) + outputs_with_history = predict(question="Are you sure?", history=history) + ``` + """ + + messages: list[dict[str, Any]] + model_config = ... diff --git a/typings/dspy/adapters/types/image.pyi b/typings/dspy/adapters/types/image.pyi new file mode 100644 index 0000000..1b2e827 --- /dev/null +++ b/typings/dspy/adapters/types/image.pyi @@ -0,0 +1,57 @@ +""" +This type stub file was generated by pyright. +""" + +import pydantic +from typing import Any, Union +from dspy.adapters.types.base_type import BaseType +from PIL import Image as PILImage + +PIL_AVAILABLE = ... + +class Image(BaseType): + url: str + model_config = ... + def format(self) -> Union[list[dict[str, Any]], str]: ... + @pydantic.model_validator(mode="before") + @classmethod + def validate_input(cls, values): # -> dict[str, str] | dict[Any, Any] | dict[str, Any]: + ... + @classmethod + def from_url(cls, url: str, download: bool = ...): # -> Self: + ... + @classmethod + def from_file(cls, file_path: str): # -> Self: + ... + @classmethod + def from_PIL(cls, pil_image): # -> Self: + ... + def __str__(self) -> str: ... + def __repr__(self): # -> str: + ... + +def is_url(string: str) -> bool: + """Check if a string is a valid URL.""" + ... + +def encode_image( + image: Union[str, bytes, PILImage.Image, dict], download_images: bool = ... +) -> str: + """ + Encode an image or file to a base64 data URI. + + Args: + image: The image or file to encode. Can be a PIL Image, file path, URL, or data URI. + download_images: Whether to download images from URLs. + + Returns: + str: The data URI of the file or the URL if download_images is False. + + Raises: + ValueError: If the file type is not supported. + """ + ... + +def is_image(obj) -> bool: + """Check if the object is an image or a valid media file reference.""" + ... diff --git a/typings/dspy/adapters/types/tool.pyi b/typings/dspy/adapters/types/tool.pyi new file mode 100644 index 0000000..a80297b --- /dev/null +++ b/typings/dspy/adapters/types/tool.pyi @@ -0,0 +1,168 @@ +""" +This type stub file was generated by pyright. +""" + +import mcp +from typing import Any, Callable, Optional, TYPE_CHECKING, Tuple, Type +from pydantic import BaseModel +from dspy.adapters.types.base_type import BaseType +from dspy.utils.callback import with_callbacks +from langchain.tools import BaseTool + +if TYPE_CHECKING: ... +_TYPE_MAPPING = ... + +class Tool(BaseType): + """Tool class. + + This class is used to simplify the creation of tools for tool calling (function calling) in LLMs. Only supports + functions for now. + """ + + func: Callable + name: Optional[str] = ... + desc: Optional[str] = ... + args: Optional[dict[str, Any]] = ... + arg_types: Optional[dict[str, Any]] = ... + arg_desc: Optional[dict[str, str]] = ... + has_kwargs: bool = ... + def __init__( + self, + func: Callable, + name: Optional[str] = ..., + desc: Optional[str] = ..., + args: Optional[dict[str, Any]] = ..., + arg_types: Optional[dict[str, Any]] = ..., + arg_desc: Optional[dict[str, str]] = ..., + ) -> None: + """Initialize the Tool class. + + Users can choose to specify the `name`, `desc`, `args`, and `arg_types`, or let the `dspy.Tool` + automatically infer the values from the function. For values that are specified by the user, automatic inference + will not be performed on them. + + Args: + func (Callable): The actual function that is being wrapped by the tool. + name (Optional[str], optional): The name of the tool. Defaults to None. + desc (Optional[str], optional): The description of the tool. Defaults to None. + args (Optional[dict[str, Any]], optional): The args and their schema of the tool, represented as a + dictionary from arg name to arg's json schema. Defaults to None. + arg_types (Optional[dict[str, Any]], optional): The argument types of the tool, represented as a dictionary + from arg name to the type of the argument. Defaults to None. + arg_desc (Optional[dict[str, str]], optional): Descriptions for each arg, represented as a + dictionary from arg name to description string. Defaults to None. + + Example: + + ```python + def foo(x: int, y: str = "hello"): + return str(x) + y + + tool = Tool(foo) + print(tool.args) + # Expected output: {'x': {'type': 'integer'}, 'y': {'type': 'string', 'default': 'hello'}} + ``` + """ + ... + + def format(self): # -> str: + ... + def format_as_litellm_function_call( + self, + ): # -> dict[str, str | dict[str, str | dict[str, str | dict[str, Any] | list[str] | None] | None]]: + ... + @with_callbacks + def __call__(self, **kwargs): ... + @with_callbacks + async def acall(self, **kwargs): # -> Any: + ... + @classmethod + def from_mcp_tool(cls, session: mcp.client.session.ClientSession, tool: mcp.types.Tool) -> Tool: + """ + Build a DSPy tool from an MCP tool and a ClientSession. + + Args: + session: The MCP session to use. + tool: The MCP tool to convert. + + Returns: + A Tool object. + """ + ... + + @classmethod + def from_langchain(cls, tool: BaseTool) -> Tool: + """ + Build a DSPy tool from a LangChain tool. + + Args: + tool: The LangChain tool to convert. + + Returns: + A Tool object. + + Example: + ```python + from langchain.tools import tool + import dspy + + @tool + def add(x: int, y: int): + "Add two numbers together." + return x + y + + tool = dspy.Tool.from_langchain(add) + print(await tool.acall(x=1, y=2)) + # 3 + ``` + """ + ... + + def __repr__(self): # -> str: + ... + def __str__(self) -> str: ... + +class ToolCalls(BaseType): + class ToolCall(BaseModel): + name: str + args: dict[str, Any] + ... + + tool_calls: list[ToolCall] + @classmethod + def from_dict_list(cls, tool_calls_dicts: list[dict[str, Any]]) -> ToolCalls: + """Convert a list of dictionaries to a ToolCalls instance. + + Args: + dict_list: A list of dictionaries, where each dictionary should have 'name' and 'args' keys. + + Returns: + A ToolCalls instance. + + Example: + + ```python + tool_calls_dict = [ + {"name": "search", "args": {"query": "hello"}}, + {"name": "translate", "args": {"text": "world"}} + ] + tool_calls = ToolCalls.from_dict_list(tool_calls_dict) + ``` + """ + ... + + @classmethod + def description(cls) -> str: ... + +def convert_input_schema_to_tool_args( + schema: dict[str, Any], +) -> Tuple[dict[str, Any], dict[str, Type], dict[str, str]]: + """Convert an input json schema to tool arguments compatible with DSPy Tool. + + Args: + schema: An input json schema describing the tool's input parameters + + Returns: + A tuple of (args, arg_types, arg_desc) for DSPy Tool definition. + """ + ... diff --git a/typings/dspy/adapters/utils.pyi b/typings/dspy/adapters/utils.pyi new file mode 100644 index 0000000..4d6638e --- /dev/null +++ b/typings/dspy/adapters/utils.pyi @@ -0,0 +1,52 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Union +from pydantic.fields import FieldInfo + +def serialize_for_json(value: Any) -> Any: + """ + Formats the specified value so that it can be serialized as a JSON string. + + Args: + value: The value to format as a JSON string. + Returns: + The formatted value, which is serializable as a JSON string. + """ + ... + +def format_field_value(field_info: FieldInfo, value: Any, assume_text=...) -> Union[str, dict]: + """ + Formats the value of the specified field according to the field's DSPy type (input or output), + annotation (e.g. str, int, etc.), and the type of the value itself. + + Args: + field_info: Information about the field, including its DSPy field type and annotation. + value: The value of the field. + Returns: + The formatted value of the field, represented as a string. + """ + ... + +def translate_field_type(field_name, field_info): # -> str: + ... +def find_enum_member(enum, identifier): + """ + Finds the enum member corresponding to the specified identifier, which may be the + enum member's name or value. + + Args: + enum: The enum to search for the member. + identifier: If the enum is explicitly-valued, this is the value of the enum member to find. + If the enum is auto-valued, this is the name of the enum member to find. + Returns: + The enum member corresponding to the specified identifier. + """ + ... + +def parse_value(value, annotation): # -> str | EnumMeta: + ... +def get_annotation_name(annotation): # -> str: + ... +def get_field_description_string(fields: dict) -> str: ... diff --git a/typings/dspy/clients/__init__.pyi b/typings/dspy/clients/__init__.pyi new file mode 100644 index 0000000..ec0340e --- /dev/null +++ b/typings/dspy/clients/__init__.pyi @@ -0,0 +1,59 @@ +""" +This type stub file was generated by pyright. +""" + +import logging +import os +import litellm +from pathlib import Path +from typing import Optional +from litellm.caching.caching import Cache as LitellmCache +from dspy.clients.base_lm import BaseLM, inspect_history +from dspy.clients.cache import Cache +from dspy.clients.embedding import Embedder +from dspy.clients.lm import LM +from dspy.clients.provider import Provider, TrainingJob + +logger = ... +DISK_CACHE_DIR = ... +DISK_CACHE_LIMIT = ... + +def configure_cache( + enable_disk_cache: Optional[bool] = ..., + enable_memory_cache: Optional[bool] = ..., + disk_cache_dir: Optional[str] = ..., + disk_size_limit_bytes: Optional[int] = ..., + memory_max_entries: Optional[int] = ..., + enable_litellm_cache: bool = ..., +): # -> None: + """Configure the cache for DSPy. + + Args: + enable_disk_cache: Whether to enable on-disk cache. + enable_memory_cache: Whether to enable in-memory cache. + disk_cache_dir: The directory to store the on-disk cache. + disk_size_limit_bytes: The size limit of the on-disk cache. + memory_max_entries: The maximum number of entries in the in-memory cache. + enable_litellm_cache: Whether to enable LiteLLM cache. + """ + ... + +DSPY_CACHE = ... +if "LITELLM_LOCAL_MODEL_COST_MAP" not in os.environ: ... + +def enable_litellm_logging(): # -> None: + ... +def disable_litellm_logging(): # -> None: + ... + +__all__ = [ + "BaseLM", + "LM", + "Provider", + "TrainingJob", + "inspect_history", + "Embedder", + "enable_litellm_logging", + "disable_litellm_logging", + "configure_cache", +] diff --git a/typings/dspy/clients/base_lm.pyi b/typings/dspy/clients/base_lm.pyi new file mode 100644 index 0000000..1962a8e --- /dev/null +++ b/typings/dspy/clients/base_lm.pyi @@ -0,0 +1,77 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.utils.callback import with_callbacks + +MAX_HISTORY_SIZE = ... +GLOBAL_HISTORY = ... + +class BaseLM: + """Base class for handling LLM calls. + + Most users can directly use the `dspy.LM` class, which is a subclass of `BaseLM`. Users can also implement their + own subclasses of `BaseLM` to support custom LLM providers and inject custom logic. To do so, simply override the + `forward` method and make sure the return format is identical to the + [OpenAI response format](https://platform.openai.com/docs/api-reference/responses/object). + + Example: + + ```python + from openai import OpenAI + + import dspy + + + class MyLM(dspy.BaseLM): + def forward(self, prompt, messages=None, **kwargs): + client = OpenAI() + return client.chat.completions.create( + model=self.model, + messages=messages or [{"role": "user", "content": prompt}], + **self.kwargs, + ) + + + lm = MyLM(model="gpt-4o-mini") + dspy.configure(lm=lm) + print(dspy.Predict("q->a")(q="Why did the chicken cross the kitchen?")) + ``` + """ + def __init__( + self, model, model_type=..., temperature=..., max_tokens=..., cache=..., **kwargs + ) -> None: ... + @with_callbacks + def __call__(self, prompt=..., messages=..., **kwargs): # -> list[Any]: + ... + @with_callbacks + async def acall(self, prompt=..., messages=..., **kwargs): # -> list[Any]: + ... + def forward(self, prompt=..., messages=..., **kwargs): + """Forward pass for the language model. + + Subclasses must implement this method, and the response should be identical to + [OpenAI response format](https://platform.openai.com/docs/api-reference/responses/object). + """ + ... + + async def aforward(self, prompt=..., messages=..., **kwargs): + """Async forward pass for the language model. + + Subclasses that support async should implement this method, and the response should be identical to + [OpenAI response format](https://platform.openai.com/docs/api-reference/responses/object). + """ + ... + + def copy(self, **kwargs): # -> Self: + """Returns a copy of the language model with possibly updated parameters.""" + ... + + def inspect_history(self, n: int = ...): # -> None: + ... + def update_global_history(self, entry): # -> None: + ... + +def inspect_history(n: int = ...): # -> None: + """The global history shared across all LMs.""" + ... diff --git a/typings/dspy/clients/cache.pyi b/typings/dspy/clients/cache.pyi new file mode 100644 index 0000000..e600838 --- /dev/null +++ b/typings/dspy/clients/cache.pyi @@ -0,0 +1,79 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Dict, Optional + +logger = ... + +class Cache: + """DSPy Cache + + `Cache` provides 2 levels of caching (in the given order): + 1. In-memory cache - implemented with cachetools.LRUCache + 2. On-disk cache - implemented with diskcache.FanoutCache + """ + def __init__( + self, + enable_disk_cache: bool, + enable_memory_cache: bool, + disk_cache_dir: str, + disk_size_limit_bytes: Optional[int] = ..., + memory_max_entries: Optional[int] = ..., + ) -> None: + """ + Args: + enable_disk_cache: Whether to enable on-disk cache. + enable_memory_cache: Whether to enable in-memory cache. + disk_cache_dir: The directory where the disk cache is stored. + disk_size_limit_bytes: The maximum size of the disk cache (in bytes). + memory_max_entries: The maximum size of the in-memory cache (in number of items). + """ + ... + + def __contains__(self, key: str) -> bool: + """Check if a key is in the cache.""" + ... + + def cache_key( + self, request: Dict[str, Any], ignored_args_for_cache_key: Optional[list[str]] = ... + ) -> str: + """ + Obtain a unique cache key for the given request dictionary by hashing its JSON + representation. For request fields having types that are known to be JSON-incompatible, + convert them to a JSON-serializable format before hashing. + """ + ... + + def get( + self, request: Dict[str, Any], ignored_args_for_cache_key: Optional[list[str]] = ... + ) -> Any: ... + def put( + self, + request: Dict[str, Any], + value: Any, + ignored_args_for_cache_key: Optional[list[str]] = ..., + enable_memory_cache: bool = ..., + ) -> None: ... + def reset_memory_cache(self) -> None: ... + def save_memory_cache(self, filepath: str) -> None: ... + def load_memory_cache(self, filepath: str) -> None: ... + +def request_cache( + cache_arg_name: Optional[str] = ..., + ignored_args_for_cache_key: Optional[list[str]] = ..., + enable_memory_cache: bool = ..., + *, + maxsize: Optional[int] = ..., +): # -> Callable[..., _Wrapped[..., Any, ..., CoroutineType[Any, Any, Any]] | _Wrapped[..., Any, ..., Any]]: + """ + Decorator for applying caching to a function based on the request argument. + + Args: + cache_arg_name: The name of the argument that contains the request. If not provided, the entire kwargs is used + as the request. + ignored_args_for_cache_key: A list of arguments to ignore when computing the cache key from the request. + enable_memory_cache: Whether to enable in-memory cache at call time. If False, the memory cache will not be + written to on new data. + """ + ... diff --git a/typings/dspy/clients/databricks.pyi b/typings/dspy/clients/databricks.pyi new file mode 100644 index 0000000..1013482 --- /dev/null +++ b/typings/dspy/clients/databricks.pyi @@ -0,0 +1,45 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union +from dspy.clients.provider import Provider, TrainingJob +from dspy.clients.utils_finetune import TrainDataFormat + +if TYPE_CHECKING: ... +logger = ... + +class TrainingJobDatabricks(TrainingJob): + def __init__(self, finetuning_run=..., *args, **kwargs) -> None: ... + def status(self): # -> None: + ... + +class DatabricksProvider(Provider): + finetunable = ... + TrainingJob = TrainingJobDatabricks + @staticmethod + def is_provider_model(model: str) -> bool: ... + @staticmethod + def deploy_finetuned_model( + model: str, + data_format: Optional[TrainDataFormat] = ..., + databricks_host: Optional[str] = ..., + databricks_token: Optional[str] = ..., + deploy_timeout: int = ..., + ): # -> None: + ... + @staticmethod + def finetune( + job: TrainingJobDatabricks, + model: str, + train_data: List[Dict[str, Any]], + train_data_format: Optional[Union[TrainDataFormat, str]] = ..., + train_kwargs: Optional[Dict[str, Any]] = ..., + ) -> str: ... + @staticmethod + def upload_data( + train_data: List[Dict[str, Any]], + databricks_unity_catalog_path: str, + data_format: TrainDataFormat, + ): # -> str: + ... diff --git a/typings/dspy/clients/embedding.pyi b/typings/dspy/clients/embedding.pyi new file mode 100644 index 0000000..c56d7b9 --- /dev/null +++ b/typings/dspy/clients/embedding.pyi @@ -0,0 +1,98 @@ +""" +This type stub file was generated by pyright. +""" + +class Embedder: + """DSPy embedding class. + + The class for computing embeddings for text inputs. This class provides a unified interface for both: + + 1. Hosted embedding models (e.g. OpenAI's text-embedding-3-small) via litellm integration + 2. Custom embedding functions that you provide + + For hosted models, simply pass the model name as a string (e.g., "openai/text-embedding-3-small"). The class will use + litellm to handle the API calls and caching. + + For custom embedding models, pass a callable function that: + - Takes a list of strings as input. + - Returns embeddings as either: + - A 2D numpy array of float32 values + - A 2D list of float32 values + - Each row should represent one embedding vector + + Args: + model: The embedding model to use. This can be either a string (representing the name of the hosted embedding + model, must be an embedding model supported by litellm) or a callable that represents a custom embedding + model. + batch_size (int, optional): The default batch size for processing inputs in batches. Defaults to 200. + caching (bool, optional): Whether to cache the embedding response when using a hosted model. Defaults to True. + **kwargs: Additional default keyword arguments to pass to the embedding model. + + Examples: + Example 1: Using a hosted model. + + ```python + import dspy + + embedder = dspy.Embedder("openai/text-embedding-3-small", batch_size=100) + embeddings = embedder(["hello", "world"]) + + assert embeddings.shape == (2, 1536) + ``` + + Example 2: Using any local embedding model, e.g. from https://huggingface.co/models?library=sentence-transformers. + + ```python + # pip install sentence_transformers + import dspy + from sentence_transformers import SentenceTransformer + + # Load an extremely efficient local model for retrieval + model = SentenceTransformer("sentence-transformers/static-retrieval-mrl-en-v1", device="cpu") + + embedder = dspy.Embedder(model.encode) + embeddings = embedder(["hello", "world"], batch_size=1) + + assert embeddings.shape == (2, 1024) + ``` + + Example 3: Using a custom function. + + ```python + import dspy + import numpy as np + + def my_embedder(texts): + return np.random.rand(len(texts), 10) + + embedder = dspy.Embedder(my_embedder) + embeddings = embedder(["hello", "world"], batch_size=1) + + assert embeddings.shape == (2, 10) + ``` + """ + def __init__(self, model, batch_size=..., caching=..., **kwargs) -> None: ... + def __call__( + self, inputs, batch_size=..., caching=..., **kwargs + ): # -> Any | NDArray[floating[_32Bit]]: + """Compute embeddings for the given inputs. + + Args: + inputs: The inputs to compute embeddings for, can be a single string or a list of strings. + batch_size (int, optional): The batch size for processing inputs. If None, defaults to the batch_size set + during initialization. + caching (bool, optional): Whether to cache the embedding response when using a hosted model. If None, + defaults to the caching setting from initialization. + **kwargs: Additional keyword arguments to pass to the embedding model. These will override the default + kwargs provided during initialization. + + Returns: + numpy.ndarray: If the input is a single string, returns a 1D numpy array representing the embedding. + If the input is a list of strings, returns a 2D numpy array of embeddings, one embedding per row. + """ + ... + + async def acall( + self, inputs, batch_size=..., caching=..., **kwargs + ): # -> Any | NDArray[floating[_32Bit]]: + ... diff --git a/typings/dspy/clients/lm.pyi b/typings/dspy/clients/lm.pyi new file mode 100644 index 0000000..566be67 --- /dev/null +++ b/typings/dspy/clients/lm.pyi @@ -0,0 +1,89 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Dict, List, Literal, Optional +from dspy.clients.provider import Provider, ReinforceJob, TrainingJob +from dspy.clients.utils_finetune import TrainDataFormat +from dspy.utils.callback import BaseCallback +from .base_lm import BaseLM + +logger = ... + +class LM(BaseLM): + """ + A language model supporting chat or text completion requests for use with DSPy modules. + """ + def __init__( + self, + model: str, + model_type: Literal["chat", "text"] = ..., + temperature: float = ..., + max_tokens: int = ..., + cache: bool = ..., + cache_in_memory: bool = ..., + callbacks: Optional[List[BaseCallback]] = ..., + num_retries: int = ..., + provider=..., + finetuning_model: Optional[str] = ..., + launch_kwargs: Optional[dict[str, Any]] = ..., + train_kwargs: Optional[dict[str, Any]] = ..., + **kwargs, + ) -> None: + """ + Create a new language model instance for use with DSPy modules and programs. + + Args: + model: The model to use. This should be a string of the form ``"llm_provider/llm_name"`` + supported by LiteLLM. For example, ``"openai/gpt-4o"``. + model_type: The type of the model, either ``"chat"`` or ``"text"``. + temperature: The sampling temperature to use when generating responses. + max_tokens: The maximum number of tokens to generate per response. + cache: Whether to cache the model responses for reuse to improve performance + and reduce costs. + cache_in_memory (deprecated): To enable additional caching with LRU in memory. + callbacks: A list of callback functions to run before and after each request. + num_retries: The number of times to retry a request if it fails transiently due to + network error, rate limiting, etc. Requests are retried with exponential + backoff. + provider: The provider to use. If not specified, the provider will be inferred from the model. + finetuning_model: The model to finetune. In some providers, the models available for finetuning is different + from the models available for inference. + """ + ... + + def forward(self, prompt=..., messages=..., **kwargs): # -> Any | CoroutineType[Any, Any, Any]: + ... + async def aforward(self, prompt=..., messages=..., **kwargs): # -> Any: + ... + def launch(self, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + ... + def kill(self, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + ... + def finetune( + self, + train_data: List[Dict[str, Any]], + train_data_format: Optional[TrainDataFormat], + train_kwargs: Optional[Dict[str, Any]] = ..., + ) -> TrainingJob: ... + def reinforce(self, train_kwargs) -> ReinforceJob: ... + def infer_provider(self) -> Provider: ... + def dump_state(self): # -> dict[str, Any]: + ... + +def litellm_completion( + request: Dict[str, Any], num_retries: int, cache: Optional[Dict[str, Any]] = ... +): # -> ModelResponse | CustomStreamWrapper | TextCompletionResponse | CoroutineType[Any, Any, ModelResponse | TextCompletionResponse | None] | None: + ... +def litellm_text_completion( + request: Dict[str, Any], num_retries: int, cache: Optional[Dict[str, Any]] = ... +): # -> TextCompletionResponse | ModelResponse | CustomStreamWrapper | TextCompletionStreamWrapper | | : + ... +async def alitellm_completion( + request: Dict[str, Any], num_retries: int, cache: Optional[Dict[str, Any]] = ... +): # -> ModelResponse | CustomStreamWrapper | TextCompletionResponse | None: + ... +async def alitellm_text_completion( + request: Dict[str, Any], num_retries: int, cache: Optional[Dict[str, Any]] = ... +): # -> TextCompletionResponse | TextCompletionStreamWrapper: + ... diff --git a/typings/dspy/clients/lm_local.pyi b/typings/dspy/clients/lm_local.pyi new file mode 100644 index 0000000..298fd4e --- /dev/null +++ b/typings/dspy/clients/lm_local.pyi @@ -0,0 +1,56 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Dict, List, Optional, TYPE_CHECKING +from dspy.clients.provider import Provider, TrainingJob +from dspy.clients.utils_finetune import TrainDataFormat +from dspy.clients.lm import LM + +if TYPE_CHECKING: ... +logger = ... + +class LocalProvider(Provider): + def __init__(self) -> None: ... + @staticmethod + def launch(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + ... + @staticmethod + def kill(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + ... + @staticmethod + def finetune( + job: TrainingJob, + model: str, + train_data: List[Dict[str, Any]], + train_data_format: Optional[TrainDataFormat], + train_kwargs: Optional[Dict[str, Any]] = ..., + ) -> str: ... + +def create_output_dir(model_name, data_path): ... +def train_sft_locally(model_name, train_data, train_kwargs): ... +def get_free_port() -> int: + """ + Return a free TCP port on localhost. + """ + ... + +def wait_for_server(base_url: str, timeout: Optional[int] = ...) -> None: + """ + Wait for the server to be ready by polling the /v1/models endpoint. + + Args: + base_url: The base URL of the server (e.g. http://localhost:1234) + timeout: Maximum time to wait in seconds. None means wait forever. + """ + ... + +def encode_sft_example(example, tokenizer, max_seq_length): # -> dict[str, Any]: + """ + This function encodes a single example into a format that can be used for sft training. + Here, we assume each example has a 'messages' field. Each message in it is a dict with 'role' and 'content' fields. + We use the `apply_chat_template` function from the tokenizer to tokenize the messages and prepare the input and label tensors. + + Code obtained from the allenai/open-instruct repository: https://github.com/allenai/open-instruct/blob/4365dea3d1a6111e8b2712af06b22a4512a0df88/open_instruct/finetune.py + """ + ... diff --git a/typings/dspy/clients/lm_local_arbor.pyi b/typings/dspy/clients/lm_local_arbor.pyi new file mode 100644 index 0000000..4f389d9 --- /dev/null +++ b/typings/dspy/clients/lm_local_arbor.pyi @@ -0,0 +1,77 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Dict, List, Optional, TYPE_CHECKING, TypedDict, Union +from dspy.clients.provider import Provider, ReinforceJob, TrainingJob +from dspy.clients.utils_finetune import GRPOGroup, TrainDataFormat, TrainingStatus +from dspy.clients.lm import LM + +if TYPE_CHECKING: ... + +class GRPOTrainKwargs(TypedDict): + num_generations: int + ... + +class ArborTrainingJob(TrainingJob): + def __init__(self, *args, **kwargs) -> None: ... + def cancel(self): # -> None: + ... + def status(self) -> TrainingStatus: ... + +class ArborReinforceJob(ReinforceJob): + DEFAULT_TRAIN_KWARGS = ... + def __init__(self, lm: LM, train_kwargs: GRPOTrainKwargs) -> None: ... + def initialize(self): # -> None: + ... + def step( + self, train_data: List[GRPOGroup], train_data_format: Optional[Union[TrainDataFormat, str]] + ): # -> None: + ... + def update_model(self): # -> None: + ... + def save_checkpoint(self, checkpoint_name: str, score: Optional[float] = ...): # -> None: + ... + def terminate(self): # -> None: + ... + def cancel(self): # -> None: + ... + def status(self) -> TrainingStatus: ... + +class ArborProvider(Provider): + def __init__(self) -> None: ... + @staticmethod + def launch(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + ... + @staticmethod + def kill(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + ... + @staticmethod + def finetune( + job: ArborTrainingJob, + model: str, + train_data: List[Dict[str, Any]], + train_data_format: Optional[TrainDataFormat], + train_kwargs: Optional[Dict[str, Any]] = ..., + ) -> str: ... + @staticmethod + def does_job_exist(job_id: str, training_kwargs: Dict[str, Any]) -> bool: ... + @staticmethod + def does_file_exist(file_id: str, training_kwargs: Dict[str, Any]) -> bool: ... + @staticmethod + def is_terminal_training_status(status: TrainingStatus) -> bool: ... + @staticmethod + def get_training_status(job_id: str, training_kwargs: Dict[str, Any]) -> TrainingStatus: ... + @staticmethod + def validate_data_format(data_format: TrainDataFormat): # -> None: + ... + @staticmethod + def upload_data(data_path: str, training_kwargs: Dict[str, Any]) -> str: ... + @staticmethod + def wait_for_job( + job: TrainingJob, training_kwargs: Dict[str, Any], poll_frequency: int = ... + ): # -> None: + ... + @staticmethod + def get_trained_model(job, training_kwargs: Dict[str, Any]): # -> str | None: + ... diff --git a/typings/dspy/clients/openai.pyi b/typings/dspy/clients/openai.pyi new file mode 100644 index 0000000..207364c --- /dev/null +++ b/typings/dspy/clients/openai.pyi @@ -0,0 +1,47 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Dict, List, Optional +from dspy.clients.provider import Provider, TrainingJob +from dspy.clients.utils_finetune import TrainDataFormat, TrainingStatus + +_OPENAI_MODELS = ... + +class TrainingJobOpenAI(TrainingJob): + def __init__(self, *args, **kwargs) -> None: ... + def cancel(self): # -> None: + ... + def status(self) -> TrainingStatus: ... + +class OpenAIProvider(Provider): + def __init__(self) -> None: ... + @staticmethod + def is_provider_model(model: str) -> bool: ... + @staticmethod + def finetune( + job: TrainingJobOpenAI, + model: str, + train_data: List[Dict[str, Any]], + train_data_format: Optional[TrainDataFormat], + train_kwargs: Optional[Dict[str, Any]] = ..., + ) -> str: ... + @staticmethod + def does_job_exist(job_id: str) -> bool: ... + @staticmethod + def does_file_exist(file_id: str) -> bool: ... + @staticmethod + def is_terminal_training_status(status: TrainingStatus) -> bool: ... + @staticmethod + def get_training_status(job_id: str) -> TrainingStatus: ... + @staticmethod + def validate_data_format(data_format: TrainDataFormat): # -> None: + ... + @staticmethod + def upload_data(data_path: str) -> str: ... + @staticmethod + def wait_for_job(job: TrainingJobOpenAI, poll_frequency: int = ...): # -> None: + ... + @staticmethod + def get_trained_model(job): # -> str | None: + ... diff --git a/typings/dspy/clients/provider.pyi b/typings/dspy/clients/provider.pyi new file mode 100644 index 0000000..68a0f8f --- /dev/null +++ b/typings/dspy/clients/provider.pyi @@ -0,0 +1,64 @@ +""" +This type stub file was generated by pyright. +""" + +from abc import abstractmethod +from concurrent.futures import Future +from threading import Thread +from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union +from dspy.clients.utils_finetune import TrainDataFormat +from dspy.clients.lm import LM + +if TYPE_CHECKING: ... + +class TrainingJob(Future): + def __init__( + self, + thread: Optional[Thread] = ..., + model: Optional[str] = ..., + train_data: Optional[List[Dict[str, Any]]] = ..., + train_data_format: Optional[TrainDataFormat] = ..., + train_kwargs: Optional[Dict[str, Any]] = ..., + ) -> None: ... + def cancel(self): # -> None: + ... + @abstractmethod + def status(self): ... + +class ReinforceJob: + def __init__(self, lm: LM, train_kwargs: Optional[Dict[str, Any]] = ...) -> None: ... + @abstractmethod + def initialize(self): ... + @abstractmethod + def step( + self, + train_data: List[Dict[str, Any]], + train_data_format: Optional[Union[TrainDataFormat, str]] = ..., + ): ... + @abstractmethod + def terminate(self): ... + @abstractmethod + def update_model(self): ... + @abstractmethod + def save_checkpoint(self, checkpoint_name: str): ... + def cancel(self): ... + def status(self): ... + +class Provider: + def __init__(self) -> None: ... + @staticmethod + def is_provider_model(model: str) -> bool: ... + @staticmethod + def launch(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + ... + @staticmethod + def kill(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + ... + @staticmethod + def finetune( + job: TrainingJob, + model: str, + train_data: List[Dict[str, Any]], + train_data_format: Optional[Union[TrainDataFormat, str]], + train_kwargs: Optional[Dict[str, Any]] = ..., + ) -> str: ... diff --git a/typings/dspy/clients/utils_finetune.pyi b/typings/dspy/clients/utils_finetune.pyi new file mode 100644 index 0000000..f747703 --- /dev/null +++ b/typings/dspy/clients/utils_finetune.pyi @@ -0,0 +1,49 @@ +""" +This type stub file was generated by pyright. +""" + +from enum import Enum +from typing import Any, Dict, List, Literal, Optional, TypedDict, Union +from dspy.adapters.base import Adapter + +class TrainingStatus(str, Enum): + not_started = ... + pending = ... + running = ... + succeeded = ... + failed = ... + cancelled = ... + +class TrainDataFormat(str, Enum): + CHAT = ... + COMPLETION = ... + GRPO_CHAT = ... + +class Message(TypedDict): + role: Union[Literal["user"], Literal["assistant"], Literal["system"]] + content: str + ... + +class MessageAssistant(TypedDict): + role: Literal["assistant"] + content: str + ... + +class GRPOChatData(TypedDict): + messages: List[Message] + completion: MessageAssistant + reward: float + ... + +GRPOGroup = List[GRPOChatData] + +def infer_data_format(adapter: Adapter) -> str: ... +def get_finetune_directory() -> str: ... +def write_lines(file_path, data): # -> None: + ... +def save_data(data: List[Dict[str, Any]]) -> str: ... +def validate_data_format(data: List[Dict[str, Any]], data_format: TrainDataFormat): # -> None: + ... +def find_data_errors_completion(data_dict: Dict[str, str]) -> Optional[str]: ... +def find_data_error_chat(messages: Dict[str, Any]) -> Optional[str]: ... +def find_data_error_chat_message(message: Dict[str, Any]) -> Optional[str]: ... diff --git a/typings/dspy/datasets/__init__.pyi b/typings/dspy/datasets/__init__.pyi new file mode 100644 index 0000000..22ac983 --- /dev/null +++ b/typings/dspy/datasets/__init__.pyi @@ -0,0 +1,12 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.datasets.alfworld import AlfWorld +from dspy.datasets.colors import Colors +from dspy.datasets.dataloader import DataLoader +from dspy.datasets.dataset import Dataset +from dspy.datasets.hotpotqa import HotPotQA +from dspy.datasets.math import MATH + +__all__ = ["Colors", "DataLoader", "Dataset", "HotPotQA", "MATH"] diff --git a/typings/dspy/datasets/alfworld/__init__.pyi b/typings/dspy/datasets/alfworld/__init__.pyi new file mode 100644 index 0000000..9fe206d --- /dev/null +++ b/typings/dspy/datasets/alfworld/__init__.pyi @@ -0,0 +1,5 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.datasets.alfworld.alfworld import AlfWorld diff --git a/typings/dspy/datasets/alfworld/alfworld.pyi b/typings/dspy/datasets/alfworld/alfworld.pyi new file mode 100644 index 0000000..5b956b9 --- /dev/null +++ b/typings/dspy/datasets/alfworld/alfworld.pyi @@ -0,0 +1,46 @@ +""" +This type stub file was generated by pyright. +""" + +def env_worker(inq, outq): # -> None: + """ + Worker process: creates a single AlfredTWEnv instance, + handles 'init' (with task idx) and 'step' (with action). + """ + ... + +class EnvPool: + """ + Pool of processes, each with a unique env_worker. + Acquire a worker using a context manager for safe usage: + with pool.session() as sess: + sess.init(5) # init with idx=5 + obs, rew, done, info = sess.step("go north") + ... + """ + def __init__(self, size=...) -> None: ... + def close_all(self): # -> None: + """Close all processes in the pool.""" + ... + + def session(self): # -> _EnvSession: + """Context manager that acquires/releases a single worker.""" + ... + +class _EnvSession: + """ + A context manager that acquires a worker from the pool, + provides .init(idx) and .step(action), then releases the worker. + """ + def __init__(self, pool: EnvPool) -> None: ... + def __enter__(self): # -> Self: + ... + def __exit__(self, exc_type, exc_val, exc_tb): # -> None: + ... + def init(self, idx): ... + def step(self, action): ... + +class AlfWorld: + def __init__(self, max_threads=...) -> None: ... + def __del__(self): # -> None: + ... diff --git a/typings/dspy/datasets/colors.pyi b/typings/dspy/datasets/colors.pyi new file mode 100644 index 0000000..04e07f3 --- /dev/null +++ b/typings/dspy/datasets/colors.pyi @@ -0,0 +1,12 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.datasets.dataset import Dataset + +all_colors = ... + +class Colors(Dataset): + def __init__(self, sort_by_suffix=..., *args, **kwargs) -> None: ... + def sorted_by_suffix(self, colors): # -> list[Any]: + ... diff --git a/typings/dspy/datasets/dataloader.pyi b/typings/dspy/datasets/dataloader.pyi new file mode 100644 index 0000000..0585a39 --- /dev/null +++ b/typings/dspy/datasets/dataloader.pyi @@ -0,0 +1,47 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +import pandas as pd +from collections.abc import Mapping +from typing import List, Optional, TYPE_CHECKING, Tuple, Union +from dspy.datasets.dataset import Dataset + +if TYPE_CHECKING: ... + +class DataLoader(Dataset): + def __init__(self) -> None: ... + def from_huggingface( + self, + dataset_name: str, + *args, + input_keys: Tuple[str] = ..., + fields: Optional[Tuple[str]] = ..., + **kwargs, + ) -> Union[Mapping[str, List[dspy.Example]], List[dspy.Example]]: ... + def from_csv( + self, file_path: str, fields: Optional[List[str]] = ..., input_keys: Tuple[str] = ... + ) -> List[dspy.Example]: ... + def from_pandas( + self, df: pd.DataFrame, fields: Optional[List[str]] = ..., input_keys: tuple[str] = ... + ) -> list[dspy.Example]: ... + def from_json( + self, file_path: str, fields: Optional[List[str]] = ..., input_keys: Tuple[str] = ... + ) -> List[dspy.Example]: ... + def from_parquet( + self, file_path: str, fields: Optional[List[str]] = ..., input_keys: Tuple[str] = ... + ) -> List[dspy.Example]: ... + def from_rm( + self, num_samples: int, fields: List[str], input_keys: List[str] + ) -> List[dspy.Example]: ... + def sample( + self, dataset: List[dspy.Example], n: int, *args, **kwargs + ) -> List[dspy.Example]: ... + def train_test_split( + self, + dataset: List[dspy.Example], + train_size: Union[int, float] = ..., + test_size: Optional[Union[int, float]] = ..., + random_state: Optional[int] = ..., + ) -> Mapping[str, List[dspy.Example]]: ... diff --git a/typings/dspy/datasets/dataset.pyi b/typings/dspy/datasets/dataset.pyi new file mode 100644 index 0000000..6b337e8 --- /dev/null +++ b/typings/dspy/datasets/dataset.pyi @@ -0,0 +1,38 @@ +""" +This type stub file was generated by pyright. +""" + +class Dataset: + def __init__( + self, + train_seed=..., + train_size=..., + eval_seed=..., + dev_size=..., + test_size=..., + input_keys=..., + ) -> None: ... + def reset_seeds( + self, train_seed=..., train_size=..., eval_seed=..., dev_size=..., test_size=... + ): # -> None: + ... + @property + def train(self): # -> list[Any]: + ... + @property + def dev(self): # -> list[Any]: + ... + @property + def test(self): # -> list[Any]: + ... + @classmethod + def prepare_by_seed( + cls, + train_seeds=..., + train_size=..., + dev_size=..., + divide_eval_per_seed=..., + eval_seed=..., + **kwargs, + ): # -> dotdict: + ... diff --git a/typings/dspy/datasets/hotpotqa.pyi b/typings/dspy/datasets/hotpotqa.pyi new file mode 100644 index 0000000..91daeb3 --- /dev/null +++ b/typings/dspy/datasets/hotpotqa.pyi @@ -0,0 +1,14 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.datasets.dataset import Dataset + +class HotPotQA(Dataset): + def __init__( + self, *args, only_hard_examples=..., keep_details=..., unofficial_dev=..., **kwargs + ) -> None: ... + +if __name__ == "__main__": + data_args = ... + dataset = ... diff --git a/typings/dspy/datasets/math.pyi b/typings/dspy/datasets/math.pyi new file mode 100644 index 0000000..2a72785 --- /dev/null +++ b/typings/dspy/datasets/math.pyi @@ -0,0 +1,10 @@ +""" +This type stub file was generated by pyright. +""" + +class MATH: + def __init__(self, subset) -> None: ... + def metric(self, example, pred, trace=...): ... + +def extract_answer(s): # -> str | None: + ... diff --git a/typings/dspy/dsp/__init__.pyi b/typings/dspy/dsp/__init__.pyi new file mode 100644 index 0000000..cea7ef9 --- /dev/null +++ b/typings/dspy/dsp/__init__.pyi @@ -0,0 +1,3 @@ +""" +This type stub file was generated by pyright. +""" diff --git a/typings/dspy/dsp/colbertv2.pyi b/typings/dspy/dsp/colbertv2.pyi new file mode 100644 index 0000000..703d87e --- /dev/null +++ b/typings/dspy/dsp/colbertv2.pyi @@ -0,0 +1,58 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, List, Optional, Union +from dspy.clients.cache import request_cache +from dspy.dsp.utils import dotdict + +class ColBERTv2: + """Wrapper for the ColBERTv2 Retrieval.""" + def __init__( + self, url: str = ..., port: Optional[Union[str, int]] = ..., post_requests: bool = ... + ) -> None: ... + def __call__( + self, query: str, k: int = ..., simplify: bool = ... + ) -> Union[list[str], list[dotdict]]: ... + +@request_cache() +def colbertv2_get_request_v2(url: str, query: str, k: int): # -> list[dict[Any | str, Any]]: + ... +@request_cache() +def colbertv2_get_request_v2_wrapped(*args, **kwargs): # -> list[dict[Any | str, Any]]: + ... + +colbertv2_get_request = ... + +@request_cache() +def colbertv2_post_request_v2(url: str, query: str, k: int): # -> Any: + ... +@request_cache() +def colbertv2_post_request_v2_wrapped(*args, **kwargs): # -> Any: + ... + +colbertv2_post_request = ... + +class ColBERTv2RetrieverLocal: + def __init__(self, passages: List[str], colbert_config=..., load_only: bool = ...) -> None: + """Colbertv2 retriever module + + Args: + passages (List[str]): list of passages + colbert_config (ColBERTConfig, optional): colbert config for building and searching. Defaults to None. + load_only (bool, optional): whether to load the index or build and then load. Defaults to False. + """ + ... + + def build_index(self): # -> None: + ... + def get_index(self): ... + def __call__(self, *args: Any, **kwargs: Any) -> Any: ... + def forward(self, query: str, k: int = ..., **kwargs): # -> list[Any]: + ... + +class ColBERTv2RerankerLocal: + def __init__(self, colbert_config=..., checkpoint: str = ...) -> None: ... + def __call__(self, *args: Any, **kwargs: Any) -> Any: ... + def forward(self, query: str, passages: Optional[List[str]] = ...): # -> NDArray[Any]: + ... diff --git a/typings/dspy/dsp/utils/__init__.pyi b/typings/dspy/dsp/utils/__init__.pyi new file mode 100644 index 0000000..25c730d --- /dev/null +++ b/typings/dspy/dsp/utils/__init__.pyi @@ -0,0 +1,8 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.dsp.utils.dpr import * +from dspy.dsp.utils.metrics import * +from dspy.dsp.utils.settings import * +from dspy.dsp.utils.utils import * diff --git a/typings/dspy/dsp/utils/dpr.pyi b/typings/dspy/dsp/utils/dpr.pyi new file mode 100644 index 0000000..3ae7664 --- /dev/null +++ b/typings/dspy/dsp/utils/dpr.pyi @@ -0,0 +1,121 @@ +""" +This type stub file was generated by pyright. +""" + +""" +Source: DPR Implementation from Facebook Research +https://github.com/facebookresearch/DPR/tree/master/dpr +Original license: https://github.com/facebookresearch/DPR/blob/main/LICENSE +""" +logger = ... + +class Tokens: + """A class to represent a list of tokenized text.""" + + TEXT = ... + TEXT_WS = ... + SPAN = ... + POS = ... + LEMMA = ... + NER = ... + def __init__(self, data, annotators, opts=...) -> None: ... + def __len__(self): # -> int: + """The number of tokens.""" + ... + + def slice(self, i=..., j=...): # -> Self: + """Return a view of the list of tokens from [i, j).""" + ... + + def untokenize(self): # -> LiteralString: + """Returns the original text (with whitespace reinserted).""" + ... + + def words(self, uncased=...): # -> list[Any]: + """Returns a list of the text of each token + + Args: + uncased: lower cases text + """ + ... + + def offsets(self): # -> list[Any]: + """Returns a list of [start, end) character offsets of each token.""" + ... + + def pos(self): # -> list[Any] | None: + """Returns a list of part-of-speech tags of each token. + Returns None if this annotation was not included. + """ + ... + + def lemmas(self): # -> list[Any] | None: + """Returns a list of the lemmatized text of each token. + Returns None if this annotation was not included. + """ + ... + + def entities(self): # -> list[Any] | None: + """Returns a list of named-entity-recognition tags of each token. + Returns None if this annotation was not included. + """ + ... + + def ngrams( + self, n=..., uncased=..., filter_fn=..., as_strings=... + ): # -> list[str] | list[tuple[int, int]]: + """Returns a list of all ngrams from length 1 to n. + + Args: + n: upper limit of ngram length + uncased: lower cases text + filter_fn: user function that takes in an ngram list and returns + True or False to keep or not keep the ngram + as_string: return the ngram as a string vs list + """ + ... + + def entity_groups(self): # -> list[Any] | None: + """Group consecutive entity tokens with the same NER tag.""" + ... + +class Tokenizer: + """Base tokenizer class. + Tokenizers implement tokenize, which should return a Tokens class. + """ + def tokenize(self, text): ... + def shutdown(self): # -> None: + ... + def __del__(self): # -> None: + ... + +class SimpleTokenizer(Tokenizer): + ALPHA_NUM = ... + NON_WS = ... + def __init__(self, **kwargs) -> None: + """ + Args: + annotators: None or empty set (only tokenizes). + """ + ... + + def tokenize(self, text): # -> Tokens: + ... + +def has_answer(tokenized_answers, text): # -> bool: + ... +def locate_answers(tokenized_answers, text): # -> list[Any]: + """ + Returns each occurrence of an answer as (offset, endpos) in terms of *characters*. + """ + ... + +STokenizer = ... + +def DPR_tokenize(text): # -> Tokens: + ... +def DPR_normalize(text): # -> list[Any]: + ... +def strip_accents(text): # -> LiteralString: + """Strips accents from a piece of text.""" + ... diff --git a/typings/dspy/dsp/utils/metrics.pyi b/typings/dspy/dsp/utils/metrics.pyi new file mode 100644 index 0000000..2bfeff9 --- /dev/null +++ b/typings/dspy/dsp/utils/metrics.pyi @@ -0,0 +1,20 @@ +""" +This type stub file was generated by pyright. +""" + +def EM(prediction, answers_list): # -> bool: + ... +def F1(prediction, answers_list): # -> float | int: + ... +def HotPotF1(prediction, answers_list): # -> float | int: + ... +def normalize_text(s): # -> str: + ... +def em_score(prediction, ground_truth): # -> bool: + ... +def f1_score(prediction, ground_truth): # -> float | Literal[0]: + ... +def hotpot_f1_score(prediction, ground_truth): # -> float | Literal[0]: + ... +def precision_score(prediction, ground_truth): # -> float | Literal[0]: + ... diff --git a/typings/dspy/dsp/utils/settings.pyi b/typings/dspy/dsp/utils/settings.pyi new file mode 100644 index 0000000..3148244 --- /dev/null +++ b/typings/dspy/dsp/utils/settings.pyi @@ -0,0 +1,71 @@ +""" +This type stub file was generated by pyright. +""" + +import threading +from contextlib import contextmanager + +DEFAULT_CONFIG = ... +main_thread_config = ... +config_owner_thread_id = ... +global_lock = ... + +class ThreadLocalOverrides(threading.local): + def __init__(self) -> None: ... + +thread_local_overrides = ... + +class Settings: + """ + A singleton class for DSPy configuration settings. + Thread-safe global configuration. + - 'configure' can be called by only one 'owner' thread (the first thread that calls it). + - Other threads see the configured global values from 'main_thread_config'. + - 'context' sets thread-local overrides. These overrides propagate to threads spawned + inside that context block, when (and only when!) using a ParallelExecutor that copies overrides. + + 1. Only one unique thread (which can be any thread!) can call dspy.configure. + 2. It affects a global state, visible to all. As a result, user threads work, but they shouldn't be + mixed with concurrent changes to dspy.configure from the "main" thread. + (TODO: In the future, add warnings: if there are near-in-time user-thread reads followed by .configure calls.) + 3. Any thread can use dspy.context. It propagates to child threads created with DSPy primitives: Parallel, asyncify, etc. + """ + + _instance = ... + def __new__(cls): # -> Self: + ... + @property + def lock(self): # -> lock: + ... + def __getattr__(self, name): # -> Any: + ... + def __setattr__(self, name, value): # -> None: + ... + def __getitem__(self, key): # -> Any: + ... + def __setitem__(self, key, value): # -> None: + ... + def __contains__(self, key): # -> bool: + ... + def get(self, key, default=...): # -> Any | None: + ... + def copy(self): # -> dotdict: + ... + @property + def config(self): # -> dotdict: + ... + def configure(self, **kwargs): # -> None: + ... + @contextmanager + def context(self, **kwargs): # -> Generator[None, Any, None]: + """ + Context manager for temporary configuration changes at the thread level. + Does not affect global configuration. Changes only apply to the current thread. + If threads are spawned inside this block using ParallelExecutor, they will inherit these overrides. + """ + ... + + def __repr__(self): # -> str: + ... + +settings = ... diff --git a/typings/dspy/dsp/utils/utils.pyi b/typings/dspy/dsp/utils/utils.pyi new file mode 100644 index 0000000..fc4da30 --- /dev/null +++ b/typings/dspy/dsp/utils/utils.pyi @@ -0,0 +1,80 @@ +""" +This type stub file was generated by pyright. +""" + +def print_message(*s, condition=..., pad=..., sep=...): # -> str: + ... +def timestamp(daydir=...): # -> str: + ... +def file_tqdm(file): # -> Generator[Any, Any, None]: + ... +def create_directory(path): # -> None: + ... +def deduplicate(seq: list[str]) -> list[str]: + """ + Source: https://stackoverflow.com/a/480227/1493011 + """ + ... + +def batch(group, bsize, provide_offset=...): # -> Generator[tuple[int, Any] | Any, Any, None]: + ... + +class dotdict(dict): + def __getattr__(self, key): ... + def __setattr__(self, key, value): # -> None: + ... + def __delattr__(self, key): # -> None: + ... + def __deepcopy__(self, memo): # -> dotdict: + ... + +class dotdict_lax(dict): + __getattr__ = ... + __setattr__ = ... + __delattr__ = ... + +def flatten(data_list): # -> list[Any]: + ... +def zipstar( + data_list, lazy=... +): # -> list[list[Any]] | zip[tuple[Any, ...]] | list[tuple[Any, ...]]: + """ + A much faster A, B, C = zip(*[(a, b, c), (a, b, c), ...]) + May return lists or tuples. + """ + ... + +def zip_first(list1, list2): # -> list[tuple[Any, Any]]: + ... +def int_or_float(val): # -> float | int: + ... +def groupby_first_item(lst): # -> defaultdict[Any, list[Any]]: + ... +def process_grouped_by_first_item( + lst, +): # -> Generator[tuple[Any | None, list[Any]], Any, defaultdict[Any, list[Any]]]: + """ + Requires items in list to already be grouped by first item. + """ + ... + +def grouper(iterable, n, fillvalue=...): # -> zip_longest[tuple[Any | None, ...]]: + """ + Collect data into fixed-length chunks or blocks + Example: grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx" + Source: https://docs.python.org/3/library/itertools.html#itertools-recipes + """ + ... + +def lengths2offsets(lengths): # -> Generator[tuple[Any | Literal[0], Any], Any, None]: + ... + +class NullContextManager: + def __init__(self, dummy_resource=...) -> None: ... + def __enter__(self): # -> None: + ... + def __exit__(self, *args): # -> None: + ... + +def load_batch_backgrounds(args, qids): # -> list[Any] | None: + ... diff --git a/typings/dspy/evaluate/__init__.pyi b/typings/dspy/evaluate/__init__.pyi new file mode 100644 index 0000000..e6846ed --- /dev/null +++ b/typings/dspy/evaluate/__init__.pyi @@ -0,0 +1,18 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.dsp.utils import EM, normalize_text +from dspy.evaluate.auto_evaluation import CompleteAndGrounded, SemanticF1 +from dspy.evaluate.evaluate import Evaluate +from dspy.evaluate.metrics import answer_exact_match, answer_passage_match + +__all__ = [ + "EM", + "normalize_text", + "answer_exact_match", + "answer_passage_match", + "Evaluate", + "SemanticF1", + "CompleteAndGrounded", +] diff --git a/typings/dspy/evaluate/auto_evaluation.pyi b/typings/dspy/evaluate/auto_evaluation.pyi new file mode 100644 index 0000000..23e194f --- /dev/null +++ b/typings/dspy/evaluate/auto_evaluation.pyi @@ -0,0 +1,73 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy + +class SemanticRecallPrecision(dspy.Signature): + """ + Compare a system's response to the ground truth to compute its recall and precision. + If asked to reason, enumerate key ideas in each response, and whether they are present in the other response. + """ + + question: str = ... + ground_truth: str = ... + system_response: str = ... + recall: float = ... + precision: float = ... + +class DecompositionalSemanticRecallPrecision(dspy.Signature): + """ + Compare a system's response to the ground truth to compute recall and precision of key ideas. + You will first enumerate key ideas in each response, discuss their overlap, and then report recall and precision. + """ + + question: str = ... + ground_truth: str = ... + system_response: str = ... + ground_truth_key_ideas: str = ... + system_response_key_ideas: str = ... + discussion: str = ... + recall: float = ... + precision: float = ... + +def f1_score(precision, recall): # -> float: + ... + +class SemanticF1(dspy.Module): + def __init__(self, threshold=..., decompositional=...) -> None: ... + def forward(self, example, pred, trace=...): # -> float | bool: + ... + +class AnswerCompleteness(dspy.Signature): + """ + Estimate the completeness of a system's responses, against the ground truth. + You will first enumerate key ideas in each response, discuss their overlap, and then report completeness. + """ + + question: str = ... + ground_truth: str = ... + system_response: str = ... + ground_truth_key_ideas: str = ... + system_response_key_ideas: str = ... + discussion: str = ... + completeness: float = ... + +class AnswerGroundedness(dspy.Signature): + """ + Estimate the groundedness of a system's responses, against real retrieved documents written by people. + You will first enumerate whatever non-trivial or check-worthy claims are made in the system response, and then + discuss the extent to which some or all of them can be deduced from the retrieved context and basic commonsense. + """ + + question: str = ... + retrieved_context: str = ... + system_response: str = ... + system_response_claims: str = ... + discussion: str = ... + groundedness: float = ... + +class CompleteAndGrounded(dspy.Module): + def __init__(self, threshold=...) -> None: ... + def forward(self, example, pred, trace=...): # -> float | bool: + ... diff --git a/typings/dspy/evaluate/evaluate.pyi b/typings/dspy/evaluate/evaluate.pyi new file mode 100644 index 0000000..b8ff8e1 --- /dev/null +++ b/typings/dspy/evaluate/evaluate.pyi @@ -0,0 +1,133 @@ +""" +This type stub file was generated by pyright. +""" + +import pandas as pd +import dspy +from typing import Any, Callable, List, Optional, TYPE_CHECKING, Union +from dspy.utils.callback import with_callbacks + +if TYPE_CHECKING: ... +logger = ... + +class Evaluate: + """DSPy Evaluate class. + + This class is used to evaluate the performance of a DSPy program. Users need to provide a evaluation dataset and + a metric function in order to use this class. This class supports parallel evaluation on the provided dataset. + """ + def __init__( + self, + *, + devset: List[dspy.Example], + metric: Optional[Callable] = ..., + num_threads: Optional[int] = ..., + display_progress: bool = ..., + display_table: Union[bool, int] = ..., + max_errors: int = ..., + return_all_scores: bool = ..., + return_outputs: bool = ..., + provide_traceback: Optional[bool] = ..., + failure_score: float = ..., + **kwargs, + ) -> None: + """ + Args: + devset (List[dspy.Example]): the evaluation dataset. + metric (Callable): The metric function to use for evaluation. + num_threads (Optional[int]): The number of threads to use for parallel evaluation. + display_progress (bool): Whether to display progress during evaluation. + display_table (Union[bool, int]): Whether to display the evaluation results in a table. + If a number is passed, the evaluation results will be truncated to that number before displayed. + max_errors (int): The maximum number of errors to allow before stopping evaluation. + return_all_scores (bool): Whether to return scores for every data record in `devset`. + return_outputs (bool): Whether to return the dspy program's outputs for every data in `devset`. + provide_traceback (Optional[bool]): Whether to provide traceback information during evaluation. + failure_score (float): The default score to use if evaluation fails due to an exception. + """ + ... + + @with_callbacks + def __call__( + self, + program: dspy.Module, + metric: Optional[Callable] = ..., + devset: Optional[List[dspy.Example]] = ..., + num_threads: Optional[int] = ..., + display_progress: Optional[bool] = ..., + display_table: Optional[Union[bool, int]] = ..., + return_all_scores: Optional[bool] = ..., + return_outputs: Optional[bool] = ..., + callback_metadata: Optional[dict[str, Any]] = ..., + ): # -> tuple[float, list[tuple[Example, Prediction, float]], list[float]] | tuple[float, list[float]] | tuple[float, list[tuple[Example, Prediction, float]]] | float: + """ + Args: + program (dspy.Module): The DSPy program to evaluate. + metric (Callable): The metric function to use for evaluation. if not provided, use `self.metric`. + devset (List[dspy.Example]): the evaluation dataset. if not provided, use `self.devset`. + num_threads (Optional[int]): The number of threads to use for parallel evaluation. if not provided, use + `self.num_threads`. + display_progress (bool): Whether to display progress during evaluation. if not provided, use + `self.display_progress`. + display_table (Union[bool, int]): Whether to display the evaluation results in a table. if not provided, use + `self.display_table`. If a number is passed, the evaluation results will be truncated to that number before displayed. + return_all_scores (bool): Whether to return scores for every data record in `devset`. if not provided, + use `self.return_all_scores`. + return_outputs (bool): Whether to return the dspy program's outputs for every data in `devset`. if not + provided, use `self.return_outputs`. + callback_metadata (dict): Metadata to be used for evaluate callback handlers. + + Returns: + The evaluation results are returned in different formats based on the flags: + + - Base return: A float percentage score (e.g., 67.30) representing overall performance + + - With `return_all_scores=True`: + Returns (overall_score, individual_scores) where individual_scores is a list of + float scores for each example in devset + + - With `return_outputs=True`: + Returns (overall_score, result_triples) where result_triples is a list of + (example, prediction, score) tuples for each example in devset + + - With both flags=True: + Returns (overall_score, result_triples, individual_scores) + + """ + ... + +def prediction_is_dictlike(prediction): # -> TypeIs[Callable[..., object]] | Literal[False]: + ... +def merge_dicts(d1, d2) -> dict: ... +def truncate_cell(content) -> str: + """Truncate content of a cell to 25 words.""" + ... + +def stylize_metric_name(df: pd.DataFrame, metric_name: str) -> pd.DataFrame: + """ + Stylize the cell contents of a pandas DataFrame corresponding to the specified metric name. + + :param df: The pandas DataFrame for which to stylize cell contents. + :param metric_name: The name of the metric for which to stylize DataFrame cell contents. + """ + ... + +def display_dataframe(df: pd.DataFrame): # -> None: + """ + Display the specified Pandas DataFrame in the console. + + :param df: The Pandas DataFrame to display. + """ + ... + +def configure_dataframe_for_ipython_notebook_display(df: pd.DataFrame) -> pd.DataFrame: + """Set various pandas display options for DataFrame in an IPython notebook environment.""" + ... + +def is_in_ipython_notebook_environment(): # -> bool: + """ + Check if the current environment is an IPython notebook environment. + + :return: True if the current environment is an IPython notebook environment, False otherwise. + """ + ... diff --git a/typings/dspy/evaluate/metrics.pyi b/typings/dspy/evaluate/metrics.pyi new file mode 100644 index 0000000..440314b --- /dev/null +++ b/typings/dspy/evaluate/metrics.pyi @@ -0,0 +1,8 @@ +""" +This type stub file was generated by pyright. +""" + +def answer_exact_match(example, pred, trace=..., frac=...): # -> bool: + ... +def answer_passage_match(example, pred, trace=...): # -> bool: + ... diff --git a/typings/dspy/experimental/__init__.pyi b/typings/dspy/experimental/__init__.pyi new file mode 100644 index 0000000..e45e824 --- /dev/null +++ b/typings/dspy/experimental/__init__.pyi @@ -0,0 +1,7 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.experimental.module_graph import * +from dspy.experimental.synthesizer import * +from dspy.experimental.synthetic_data import * diff --git a/typings/dspy/experimental/module_graph.pyi b/typings/dspy/experimental/module_graph.pyi new file mode 100644 index 0000000..a6b5487 --- /dev/null +++ b/typings/dspy/experimental/module_graph.pyi @@ -0,0 +1,31 @@ +""" +This type stub file was generated by pyright. +""" + +graphviz_available = ... + +class ModuleGraph: + def __init__(self, module_name, module) -> None: ... + def inspect_settings(self, settings): # -> None: + """Check for the existence and configuration of LM and RM and add them to the graph.""" + ... + + def add_module(self, module_name, module): # -> None: + """Add a module to the graph""" + ... + + def generate_module_name(self, base_name, module_type): + """Generate a module name based on the module type""" + ... + + def process_submodules(self, module_name, module): # -> None: + """Process submodules of a module and add them to the graph""" + ... + + def process_submodule(self, sub_module_name, sub_module): # -> None: + """Process a submodule and add it to the graph""" + ... + + def render_graph(self, filename=...): # -> None: + """Render the graph to a file(png)""" + ... diff --git a/typings/dspy/experimental/synthesizer/__init__.pyi b/typings/dspy/experimental/synthesizer/__init__.pyi new file mode 100644 index 0000000..8e2d062 --- /dev/null +++ b/typings/dspy/experimental/synthesizer/__init__.pyi @@ -0,0 +1,8 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.experimental.synthesizer.config import SynthesizerArguments +from dspy.experimental.synthesizer.synthesizer import Synthesizer + +__all__ = ["Synthesizer", "SynthesizerArguments"] diff --git a/typings/dspy/experimental/synthesizer/config.pyi b/typings/dspy/experimental/synthesizer/config.pyi new file mode 100644 index 0000000..fe3a6c2 --- /dev/null +++ b/typings/dspy/experimental/synthesizer/config.pyi @@ -0,0 +1,17 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Optional +from pydantic import BaseModel, model_validator + +class SynthesizerArguments(BaseModel): + feedback_mode: Optional[str] = ... + num_example_for_feedback: Optional[int] = ... + input_lm_model: Optional[Any] = ... + output_lm_model: Optional[Any] = ... + output_teacher_module: Optional[Any] = ... + num_example_for_optim: Optional[int] = ... + @model_validator(mode="after") + def validate_feedback_mode(self): # -> Self: + ... diff --git a/typings/dspy/experimental/synthesizer/instruction_suffixes.pyi b/typings/dspy/experimental/synthesizer/instruction_suffixes.pyi new file mode 100644 index 0000000..d89b6aa --- /dev/null +++ b/typings/dspy/experimental/synthesizer/instruction_suffixes.pyi @@ -0,0 +1,6 @@ +""" +This type stub file was generated by pyright. +""" + +INPUT_GENERATION_TASK_WITH_EXAMPLES_SUFFIX = ... +INPUT_GENERATION_TASK_WITH_FEEDBACK_SUFFIX = ... diff --git a/typings/dspy/experimental/synthesizer/signatures.pyi b/typings/dspy/experimental/synthesizer/signatures.pyi new file mode 100644 index 0000000..d2aa72d --- /dev/null +++ b/typings/dspy/experimental/synthesizer/signatures.pyi @@ -0,0 +1,48 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy + +class UnderstandTask(dspy.Signature): + """I'll be providing you a task description. Your task is to prepare a concise, comprehensible summary that captures the broad essence and purpose of this task description. Your summary should illuminate the general objective and the type of problem being solved, offering a clear picture of what the task entails at a high level. Avoid getting into the nuances or specifics of individual datapoints, models, examples, algorithms, or any intricate technicalities. Your explanation should serve to clarify the task's overall goal and its basic premise without touching on methodologies or solutions.""" + + task_description = ... + explanation = ... + +class ExplainTask(dspy.Signature): + """Analyze the provided set of datapoints carefully, and prepare a concise, comprehensible summary that captures the broad essence and purpose of the task these datapoints aim to address. Your summary should illuminate the general objective and the type of problem being solved, offering a clear picture of what the task entails at a high level. Avoid getting into the nuances of individual datapoints, specifics about models, examples, algorithms, or any intricate technicalities. Your explanation should serve to clarify the task's overall goal and its basic premise, without touching on methodologies or solutions.""" + + examples = ... + explanation = ... + +class UpdateTaskDescriptionBasedOnFeedback(dspy.Signature): + """Update the task description based on the feedback provided. Ensure that the revised task description incorporates the feedback to improve its overall clarity and effectiveness. Focus on enhancing the task's goal and basic premise, without delving into specific data points, models, examples, algorithms, or technical intricacies. Your explanation should aim to clarify the task's fundamental objective and purpose.""" + + task_description = ... + feedback = ... + updated_task_description = ... + +class GetFeedbackOnGeneration(dspy.Signature): + """Provide constructive feedback on the synthetic data generated, focusing on its quality, relevance, and diversity. Highlight any areas that require improvement and offer suggestions for enhancement. The feedback should center on the overall effectiveness of the synthetic data in aligning with the task description and knowledge seed. Avoid delving into specific data points, models, examples, algorithms, or technical intricacies. Your feedback should be critical but constructive, aiming to improve the synthetic data and the task description.""" + + synthetic_data = ... + task_description = ... + feedback = ... + +class GenerateFieldDescription(dspy.Signature): + """Generate a concise and informative description for a given field based on the provided name and task description. This description should be no longer than 10 words and should be in simple english.""" + + task_description = ... + field_name = ... + field_description = ... + +class GenerateInputFieldsData(dspy.Signature): + """Create synthetic data using the task description and the provided knowledge seed. Your task is to generate diverse and imaginative data that aligns with the given task description and knowledge seed. You are encouraged to be creative and not limit yourself, allowing for a wide range of synthetic data that reflects the characteristics and details provided in the task description. The data should be unique and varied, showcasing originality and creativity while maintaining relevance to the task and knowledge seed. + + A knowledge seed is the index of the knowledge base you have, each index represents a different knowledge base.""" + + knowledge_seed = ... + task_description = ... + +class GenerateOutputFieldsData(dspy.Signature): ... diff --git a/typings/dspy/experimental/synthesizer/synthesizer.pyi b/typings/dspy/experimental/synthesizer/synthesizer.pyi new file mode 100644 index 0000000..a52b4be --- /dev/null +++ b/typings/dspy/experimental/synthesizer/synthesizer.pyi @@ -0,0 +1,21 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from typing import List, Optional, Union +from dspy.experimental.synthesizer.config import SynthesizerArguments + +class Synthesizer: + def __init__(self, config: SynthesizerArguments) -> None: ... + def generate( + self, + ground_source: Union[List[dspy.Example], dspy.Signature], + num_data: int, + batch_size: int = ..., + ): # -> list[Any]: + ... + def export( + self, data: List[dspy.Example], path: str, mode: Optional[str] = ..., **kwargs + ): # -> None: + ... diff --git a/typings/dspy/experimental/synthesizer/utils.pyi b/typings/dspy/experimental/synthesizer/utils.pyi new file mode 100644 index 0000000..e719418 --- /dev/null +++ b/typings/dspy/experimental/synthesizer/utils.pyi @@ -0,0 +1,8 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from typing import List + +def format_examples(examples: List[dspy.Example]) -> str: ... diff --git a/typings/dspy/experimental/synthetic_data.pyi b/typings/dspy/experimental/synthetic_data.pyi new file mode 100644 index 0000000..1a7c2e3 --- /dev/null +++ b/typings/dspy/experimental/synthetic_data.pyi @@ -0,0 +1,28 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from typing import List, Optional +from pydantic import BaseModel + +class DescriptionSignature(dspy.Signature): + field_name = ... + example = ... + description = ... + +class SyntheticDataGenerator: + def __init__( + self, schema_class: Optional[BaseModel] = ..., examples: Optional[List[dspy.Example]] = ... + ) -> None: ... + def generate(self, sample_size: int) -> List[dspy.Example]: + """Generate synthetic examples. + + Args: + sample_size (int): number of examples to generate + Raises: + ValueError: either a schema_class or examples should be provided + Returns: + List[dspy.Example]: list of synthetic examples generated + """ + ... diff --git a/typings/dspy/predict/__init__.pyi b/typings/dspy/predict/__init__.pyi new file mode 100644 index 0000000..ef8ec69 --- /dev/null +++ b/typings/dspy/predict/__init__.pyi @@ -0,0 +1,32 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.predict.aggregation import majority +from dspy.predict.best_of_n import BestOfN +from dspy.predict.chain_of_thought import ChainOfThought +from dspy.predict.chain_of_thought_with_hint import ChainOfThoughtWithHint +from dspy.predict.code_act import CodeAct +from dspy.predict.knn import KNN +from dspy.predict.multi_chain_comparison import MultiChainComparison +from dspy.predict.parallel import Parallel +from dspy.predict.predict import Predict +from dspy.predict.program_of_thought import ProgramOfThought +from dspy.predict.react import ReAct, Tool +from dspy.predict.refine import Refine + +__all__ = [ + "majority", + "BestOfN", + "ChainOfThought", + "ChainOfThoughtWithHint", + "CodeAct", + "KNN", + "MultiChainComparison", + "Predict", + "ProgramOfThought", + "ReAct", + "Refine", + "Tool", + "Parallel", +] diff --git a/typings/dspy/predict/aggregation.pyi b/typings/dspy/predict/aggregation.pyi new file mode 100644 index 0000000..ce4eb76 --- /dev/null +++ b/typings/dspy/predict/aggregation.pyi @@ -0,0 +1,13 @@ +""" +This type stub file was generated by pyright. +""" + +def default_normalize(s): # -> str | None: + ... +def majority(prediction_or_completions, normalize=..., field=...): # -> Prediction: + """ + Returns the most common completion for the target field (or the last field) in the signature. + When normalize returns None, that completion is ignored. + In case of a tie, earlier completion are prioritized. + """ + ... diff --git a/typings/dspy/predict/avatar/__init__.pyi b/typings/dspy/predict/avatar/__init__.pyi new file mode 100644 index 0000000..f45abe9 --- /dev/null +++ b/typings/dspy/predict/avatar/__init__.pyi @@ -0,0 +1,7 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.predict.avatar.avatar import * +from dspy.predict.avatar.models import * +from dspy.predict.avatar.signatures import * diff --git a/typings/dspy/predict/avatar/avatar.pyi b/typings/dspy/predict/avatar/avatar.pyi new file mode 100644 index 0000000..8c95d49 --- /dev/null +++ b/typings/dspy/predict/avatar/avatar.pyi @@ -0,0 +1,12 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy + +def get_number_with_suffix(number: int) -> str: ... + +class Avatar(dspy.Module): + def __init__(self, signature, tools, max_iters=..., verbose=...) -> None: ... + def forward(self, **kwargs): # -> Prediction: + ... diff --git a/typings/dspy/predict/avatar/models.pyi b/typings/dspy/predict/avatar/models.pyi new file mode 100644 index 0000000..fbcd0b6 --- /dev/null +++ b/typings/dspy/predict/avatar/models.pyi @@ -0,0 +1,24 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Optional +from pydantic import BaseModel + +class Tool(BaseModel): + tool: Any + name: str + desc: Optional[str] + input_type: Optional[str] = ... + def __str__(self) -> str: ... + def __repr__(self) -> str: ... + +class Action(BaseModel): + tool_name: Any = ... + tool_input_query: Any = ... + +class ActionOutput(BaseModel): + tool_name: str + tool_input_query: str + tool_output: str + ... diff --git a/typings/dspy/predict/avatar/signatures.pyi b/typings/dspy/predict/avatar/signatures.pyi new file mode 100644 index 0000000..6986aaa --- /dev/null +++ b/typings/dspy/predict/avatar/signatures.pyi @@ -0,0 +1,17 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from dspy.predict.avatar.models import Action + +class Actor(dspy.Signature): + """You will be given `Tools` which will be a list of tools to use to accomplish the `Goal`. Given the user query, your task is to decide which tool to use and what input values to provide. + + You will output action needed to accomplish the `Goal`. `Action` should have a tool to use and the input query to pass to the tool. + + Note: You can opt to use no tools and provide the final answer directly. You can also one tool multiple times with different input queries if applicable.""" + + goal: str = ... + tools: list[str] = ... + action_1: Action = ... diff --git a/typings/dspy/predict/best_of_n.pyi b/typings/dspy/predict/best_of_n.pyi new file mode 100644 index 0000000..690e360 --- /dev/null +++ b/typings/dspy/predict/best_of_n.pyi @@ -0,0 +1,52 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Callable, Optional +from dspy.predict.predict import Module, Prediction + +class BestOfN(Module): + def __init__( + self, + module: Module, + N: int, + reward_fn: Callable[[dict, Prediction], float], + threshold: float, + fail_count: Optional[int] = ..., + ) -> None: + """ + Runs a module up to `N` times with different temperatures and returns the best prediction + out of `N` attempts or the first prediction that passes the `threshold`. + + Args: + module (Module): The module to run. + N (int): The number of times to run the module. + reward_fn (Callable[[dict, Prediction], float]): The reward function which takes in the args passed to the module, the resulting prediction, and returns a scalar reward. + threshold (float): The threshold for the reward function. + fail_count (Optional[int], optional): The number of times the module can fail before raising an error. Defaults to N if not provided. + + Example: + ```python + import dspy + + dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini")) + + # Define a QA module with chain of thought + qa = dspy.ChainOfThought("question -> answer") + + # Define a reward function that checks for one-word answers + def one_word_answer(args, pred): + return 1.0 if len(pred.answer.split()) == 1 else 0.0 + + # Create a refined module that tries up to 3 times + best_of_3 = dspy.BestOfN(module=qa, N=3, reward_fn=one_word_answer, threshold=1.0) + + # Use the refined module + result = best_of_3(question="What is the capital of Belgium?").answer + # Returns: Brussels + ``` + """ + ... + + def forward(self, **kwargs): # -> None: + ... diff --git a/typings/dspy/predict/chain_of_thought.pyi b/typings/dspy/predict/chain_of_thought.pyi new file mode 100644 index 0000000..8da7c62 --- /dev/null +++ b/typings/dspy/predict/chain_of_thought.pyi @@ -0,0 +1,31 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Optional, Type, Union +from pydantic.fields import FieldInfo +from dspy.primitives.program import Module +from dspy.signatures.field import OutputField +from dspy.signatures.signature import Signature + +class ChainOfThought(Module): + def __init__( + self, + signature: Type[Signature], + rationale_field: Optional[Union[OutputField, FieldInfo]] = ..., + rationale_field_type: Type = ..., + **config, + ) -> None: + """ + A module that reasons step by step in order to predict the output of a task. + + Args: + signature (Type[dspy.Signature]): The signature of the module. + rationale_field (Optional[Union[dspy.OutputField, pydantic.fields.FieldInfo]]): The field that will contain the reasoning. + rationale_field_type (Type): The type of the rationale field. + **config: The configuration for the module. + """ + ... + + def forward(self, **kwargs): ... + async def aforward(self, **kwargs): ... diff --git a/typings/dspy/predict/chain_of_thought_with_hint.pyi b/typings/dspy/predict/chain_of_thought_with_hint.pyi new file mode 100644 index 0000000..ff83d81 --- /dev/null +++ b/typings/dspy/predict/chain_of_thought_with_hint.pyi @@ -0,0 +1,9 @@ +""" +This type stub file was generated by pyright. +""" + +from .predict import Module + +class ChainOfThoughtWithHint(Module): + def __init__(self, signature, rationale_type=..., **config) -> None: ... + def forward(self, **kwargs): ... diff --git a/typings/dspy/predict/code_act.pyi b/typings/dspy/predict/code_act.pyi new file mode 100644 index 0000000..7352d18 --- /dev/null +++ b/typings/dspy/predict/code_act.pyi @@ -0,0 +1,42 @@ +""" +This type stub file was generated by pyright. +""" + +from inspect import Signature +from typing import Callable, Type, Union +from dspy.predict.program_of_thought import ProgramOfThought +from dspy.predict.react import ReAct + +logger = ... + +class CodeAct(ReAct, ProgramOfThought): + """ + CodeAct is a module that utilizes the Code Interpreter and predefined tools to solve the problem. + """ + def __init__( + self, signature: Union[str, Type[Signature]], tools: list[Callable], max_iters: int = ... + ) -> None: + """ + Initializes the CodeAct class with the specified model, temperature, and max tokens. + + Args: + signature (Union[str, Type[Signature]]): The signature of the module. + tools (list[Callable]): The tool callables to be used. CodeAct only accepts functions and not callable objects. + max_iters (int): The maximum number of iterations to generate the answer. + + Example: + ```python + from dspy.predict import CodeAct + def factorial(n): + if n == 1: + return 1 + return n * factorial(n-1) + + act = CodeAct("n->factorial", tools=[factorial]) + act(n=5) # 120 + ``` + """ + ... + + def forward(self, **kwargs): # -> Prediction: + ... diff --git a/typings/dspy/predict/knn.pyi b/typings/dspy/predict/knn.pyi new file mode 100644 index 0000000..1a2fa00 --- /dev/null +++ b/typings/dspy/predict/knn.pyi @@ -0,0 +1,42 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.clients import Embedder +from dspy.primitives import Example + +class KNN: + def __init__(self, k: int, trainset: list[Example], vectorizer: Embedder) -> None: + """ + A k-nearest neighbors retriever that finds similar examples from a training set. + + Args: + k: Number of nearest neighbors to retrieve + trainset: List of training examples to search through + vectorizer: The `Embedder` to use for vectorization + + Example: + ```python + import dspy + from sentence_transformers import SentenceTransformer + + # Create a training dataset with examples + trainset = [ + dspy.Example(input="hello", output="world"), + # ... more examples ... + ] + + # Initialize KNN with a sentence transformer model + knn = KNN( + k=3, + trainset=trainset, + vectorizer=dspy.Embedder(SentenceTransformer("all-MiniLM-L6-v2").encode) + ) + + # Find similar examples + similar_examples = knn(input="hello") + ``` + """ + ... + + def __call__(self, **kwargs) -> list: ... diff --git a/typings/dspy/predict/multi_chain_comparison.pyi b/typings/dspy/predict/multi_chain_comparison.pyi new file mode 100644 index 0000000..28b10dd --- /dev/null +++ b/typings/dspy/predict/multi_chain_comparison.pyi @@ -0,0 +1,9 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.primitives.program import Module + +class MultiChainComparison(Module): + def __init__(self, signature, M=..., temperature=..., **config) -> None: ... + def forward(self, completions, **kwargs): ... diff --git a/typings/dspy/predict/parallel.pyi b/typings/dspy/predict/parallel.pyi new file mode 100644 index 0000000..85f166a --- /dev/null +++ b/typings/dspy/predict/parallel.pyi @@ -0,0 +1,21 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, List, Optional, Tuple +from dspy.primitives.example import Example + +class Parallel: + def __init__( + self, + num_threads: Optional[int] = ..., + max_errors: int = ..., + access_examples: bool = ..., + return_failed_examples: bool = ..., + provide_traceback: Optional[bool] = ..., + disable_progress_bar: bool = ..., + ) -> None: ... + def forward( + self, exec_pairs: List[Tuple[Any, Example]], num_threads: Optional[int] = ... + ) -> List[Any]: ... + def __call__(self, *args: Any, **kwargs: Any) -> Any: ... diff --git a/typings/dspy/predict/parameter.pyi b/typings/dspy/predict/parameter.pyi new file mode 100644 index 0000000..7dcd22f --- /dev/null +++ b/typings/dspy/predict/parameter.pyi @@ -0,0 +1,6 @@ +""" +This type stub file was generated by pyright. +""" + +class Parameter: ... +class Hyperparameter: ... diff --git a/typings/dspy/predict/predict.pyi b/typings/dspy/predict/predict.pyi new file mode 100644 index 0000000..206b1e7 --- /dev/null +++ b/typings/dspy/predict/predict.pyi @@ -0,0 +1,47 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.predict.parameter import Parameter +from dspy.primitives.program import Module + +logger = ... + +class Predict(Module, Parameter): + def __init__(self, signature, callbacks=..., **config) -> None: ... + def reset(self): # -> None: + ... + def dump_state(self): # -> dict[str, Any]: + ... + def load_state(self, state): # -> Self: + """Load the saved state of a `Predict` object. + + Args: + state (dict): The saved state of a `Predict` object. + + Returns: + self: Returns self to allow method chaining + """ + ... + + def __call__(self, *args, **kwargs): ... + async def acall(self, *args, **kwargs): ... + def forward(self, **kwargs): # -> Prediction: + ... + async def aforward(self, **kwargs): # -> Prediction: + ... + def update_config(self, **kwargs): # -> None: + ... + def get_config(self): # -> dict[str, Any]: + ... + def __repr__(self): # -> str: + ... + +def serialize_object( + obj, +): # -> dict[str, Any] | list[dict[str, Any] | list[Any] | tuple[dict[str, Any] | list[Any] | tuple[Any, ...] | dict[Any, Any] | Any, ...] | dict[Any, Any] | Any] | tuple[dict[str, Any] | list[Any] | tuple[Any, ...] | dict[Any, Any] | Any, ...] | dict[Any, Any]: + """ + Recursively serialize a given object into a JSON-compatible format. + Supports Pydantic models, lists, dicts, and primitive types. + """ + ... diff --git a/typings/dspy/predict/program_of_thought.pyi b/typings/dspy/predict/program_of_thought.pyi new file mode 100644 index 0000000..12017fa --- /dev/null +++ b/typings/dspy/predict/program_of_thought.pyi @@ -0,0 +1,34 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Type, Union +from dspy.primitives.program import Module +from dspy.signatures.signature import Signature + +logger = ... + +class ProgramOfThought(Module): + """ + A DSPy module that runs Python programs to solve a problem. + This module reuires deno to be installed. Please install deno following https://docs.deno.com/runtime/getting_started/installation/ + + Example: + ``` + import dspy + + lm = dspy.LM('openai/gpt-4o-mini') + dspy.configure(lm=lm) + pot = dspy.ProgramOfThought("question -> answer") + pot(question="what is 1+1?") + ``` + """ + def __init__(self, signature: Union[str, Type[Signature]], max_iters=...) -> None: + """ + Args: + signature: The signature of the module. + max_iters: The maximum number of iterations to retry code generation and execution. + """ + ... + + def forward(self, **kwargs): ... diff --git a/typings/dspy/predict/react.pyi b/typings/dspy/predict/react.pyi new file mode 100644 index 0000000..74d5376 --- /dev/null +++ b/typings/dspy/predict/react.pyi @@ -0,0 +1,26 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Callable +from dspy.primitives.program import Module + +logger = ... + +class ReAct(Module): + def __init__(self, signature, tools: list[Callable], max_iters=...) -> None: + """ + `tools` is either a list of functions, callable classes, or `dspy.Tool` instances. + """ + ... + + def forward(self, **input_args): # -> Prediction: + ... + async def aforward(self, **input_args): # -> Prediction: + ... + def truncate_trajectory(self, trajectory): + """Truncates the trajectory so that it fits in the context window. + + Users can override this method to implement their own truncation logic. + """ + ... diff --git a/typings/dspy/predict/refine.pyi b/typings/dspy/predict/refine.pyi new file mode 100644 index 0000000..d34f35d --- /dev/null +++ b/typings/dspy/predict/refine.pyi @@ -0,0 +1,86 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Callable, Optional +from dspy.predict.predict import Prediction +from dspy.signatures import Signature +from .predict import Module + +class OfferFeedback(Signature): + """ + In the discussion, assign blame to each module that contributed to the final reward being below the threshold, if + any. Then, prescribe concrete advice of how the module should act on its future input when we retry the process, if + it were to receive the same or similar inputs. If a module is not to blame, the advice should be N/A. + The module will not see its own history, so it needs to rely on entirely concrete and actionable advice from you + to avoid the same mistake on the same or similar inputs. + """ + + program_code: str = ... + modules_defn: str = ... + program_inputs: str = ... + program_trajectory: str = ... + program_outputs: str = ... + reward_code: str = ... + target_threshold: float = ... + reward_value: float = ... + module_names: list[str] = ... + discussion: str = ... + advice: dict[str, str] = ... + +class Refine(Module): + def __init__( + self, + module: Module, + N: int, + reward_fn: Callable[[dict, Prediction], float], + threshold: float, + fail_count: Optional[int] = ..., + ) -> None: + """ + Refines a module by running it up to N times with different temperatures and returns the best prediction. + + This module runs the provided module multiple times with varying temperature settings and selects + either the first prediction that exceeds the specified threshold or the one with the highest reward. + If no prediction meets the threshold, it automatically generates feedback to improve future predictions. + + + Args: + module (Module): The module to refine. + N (int): The number of times to run the module. must + reward_fn (Callable): The reward function. + threshold (float): The threshold for the reward function. + fail_count (Optional[int], optional): The number of times the module can fail before raising an error + + Example: + ```python + import dspy + + dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini")) + + # Define a QA module with chain of thought + qa = dspy.ChainOfThought("question -> answer") + + # Define a reward function that checks for one-word answers + def one_word_answer(args, pred): + return 1.0 if len(pred.answer.split()) == 1 else 0.0 + + # Create a refined module that tries up to 3 times + best_of_3 = dspy.Refine(module=qa, N=3, reward_fn=one_word_answer, threshold=1.0) + + # Use the refined module + result = best_of_3(question="What is the capital of Belgium?").answer + # Returns: Brussels + ``` + """ + ... + + def forward(self, **kwargs): # -> None: + ... + +def inspect_modules(program): # -> str: + ... +def recursive_mask( + o, +): # -> dict[Any, Any | dict[Any, Any] | list[Any | dict[Any, Any] | list[Any] | tuple[Any, ...] | str] | tuple[Any, ...] | str] | list[Any | dict[Any, Any | dict[Any, Any] | list[Any] | tuple[Any, ...] | str] | list[Any] | tuple[Any, ...] | str] | tuple[Any, ...] | str: + ... diff --git a/typings/dspy/predict/retry.pyi b/typings/dspy/predict/retry.pyi new file mode 100644 index 0000000..cea7ef9 --- /dev/null +++ b/typings/dspy/predict/retry.pyi @@ -0,0 +1,3 @@ +""" +This type stub file was generated by pyright. +""" diff --git a/typings/dspy/primitives/__init__.pyi b/typings/dspy/primitives/__init__.pyi new file mode 100644 index 0000000..cd1ae6d --- /dev/null +++ b/typings/dspy/primitives/__init__.pyi @@ -0,0 +1,21 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.primitives import assertions +from dspy.primitives.example import Example +from dspy.primitives.module import BaseModule +from dspy.primitives.prediction import Completions, Prediction +from dspy.primitives.program import Module, Program +from dspy.primitives.python_interpreter import PythonInterpreter + +__all__ = [ + "assertions", + "Example", + "BaseModule", + "Prediction", + "Completions", + "Program", + "Module", + "PythonInterpreter", +] diff --git a/typings/dspy/primitives/assertions.pyi b/typings/dspy/primitives/assertions.pyi new file mode 100644 index 0000000..cea7ef9 --- /dev/null +++ b/typings/dspy/primitives/assertions.pyi @@ -0,0 +1,3 @@ +""" +This type stub file was generated by pyright. +""" diff --git a/typings/dspy/primitives/example.pyi b/typings/dspy/primitives/example.pyi new file mode 100644 index 0000000..b3ae28c --- /dev/null +++ b/typings/dspy/primitives/example.pyi @@ -0,0 +1,45 @@ +""" +This type stub file was generated by pyright. +""" + +class Example: + def __init__(self, base=..., **kwargs) -> None: ... + def __getattr__(self, key): ... + def __setattr__(self, key, value): # -> None: + ... + def __getitem__(self, key): ... + def __setitem__(self, key, value): # -> None: + ... + def __delitem__(self, key): # -> None: + ... + def __contains__(self, key): # -> bool: + ... + def __len__(self): # -> int: + ... + def __repr__(self): # -> str: + ... + def __str__(self) -> str: ... + def __eq__(self, other) -> bool: ... + def __hash__(self) -> int: ... + def keys(self, include_dspy=...): # -> list[Any]: + ... + def values(self, include_dspy=...): # -> list[Any]: + ... + def items(self, include_dspy=...): # -> list[tuple[Any, Any]]: + ... + def get(self, key, default=...): # -> None: + ... + def with_inputs(self, *keys): # -> Self: + ... + def inputs(self): # -> Self: + ... + def labels(self): # -> Self: + ... + def __iter__(self): # -> Iterator[Any]: + ... + def copy(self, **kwargs): # -> Self: + ... + def without(self, *keys): # -> Self: + ... + def toDict(self): # -> dict[Any, Any]: + ... diff --git a/typings/dspy/primitives/module.pyi b/typings/dspy/primitives/module.pyi new file mode 100644 index 0000000..1858011 --- /dev/null +++ b/typings/dspy/primitives/module.pyi @@ -0,0 +1,84 @@ +""" +This type stub file was generated by pyright. +""" + +from collections.abc import Generator + +logger = ... + +class BaseModule: + def __init__(self) -> None: ... + def named_parameters(self): # -> list[Any]: + """ + Unlike PyTorch, handles (non-recursive) lists of parameters too. + """ + ... + + def named_sub_modules( + self, type_=..., skip_compiled=... + ) -> Generator[tuple[str, BaseModule], None, None]: + """Find all sub-modules in the module, as well as their names. + + Say self.children[4]['key'].sub_module is a sub-module. Then the name will be + 'children[4][key].sub_module'. But if the sub-module is accessible at different + paths, only one of the paths will be returned. + """ + ... + + def parameters(self): # -> list[Any]: + ... + def deepcopy(self): # -> Self: + """Deep copy the module. + + This is a tweak to the default python deepcopy that only deep copies `self.parameters()`, and for other + attributes, we just do the shallow copy. + """ + ... + + def reset_copy(self): # -> Self: + """Deep copy the module and reset all parameters.""" + ... + + def dump_state(self): # -> dict[Any, Any]: + ... + def load_state(self, state): # -> None: + ... + def save(self, path, save_program=..., modules_to_serialize=...): # -> None: + """Save the module. + + Save the module to a directory or a file. There are two modes: + - `save_program=False`: Save only the state of the module to a json or pickle file, based on the value of + the file extension. + - `save_program=True`: Save the whole module to a directory via cloudpickle, which contains both the state and + architecture of the model. + + If `save_program=True` and `modules_to_serialize` are provided, it will register those modules for serialization + with cloudpickle's `register_pickle_by_value`. This causes cloudpickle to serialize the module by value rather + than by reference, ensuring the module is fully preserved along with the saved program. This is useful + when you have custom modules that need to be serialized alongside your program. If None, then no modules + will be registered for serialization. + + We also save the dependency versions, so that the loaded model can check if there is a version mismatch on + critical dependencies or DSPy version. + + Args: + path (str): Path to the saved state file, which should be a .json or .pkl file when `save_program=False`, + and a directory when `save_program=True`. + save_program (bool): If True, save the whole module to a directory via cloudpickle, otherwise only save + the state. + modules_to_serialize (list): A list of modules to serialize with cloudpickle's `register_pickle_by_value`. + If None, then no modules will be registered for serialization. + + """ + ... + + def load(self, path): # -> None: + """Load the saved module. You may also want to check out dspy.load, if you want to + load an entire program, not just the state for an existing program. + + Args: + path (str): Path to the saved state file, which should be a .json or a .pkl file + """ + ... + +def postprocess_parameter_name(name, value): ... diff --git a/typings/dspy/primitives/prediction.pyi b/typings/dspy/primitives/prediction.pyi new file mode 100644 index 0000000..ccf7bd9 --- /dev/null +++ b/typings/dspy/primitives/prediction.pyi @@ -0,0 +1,50 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.primitives.example import Example + +class Prediction(Example): + def __init__(self, *args, **kwargs) -> None: ... + def get_lm_usage(self): # -> None: + ... + def set_lm_usage(self, value): # -> None: + ... + @classmethod + def from_completions(cls, list_or_dict, signature=...): # -> Self: + ... + def __repr__(self): # -> str: + ... + def __str__(self) -> str: ... + def __float__(self): # -> float: + ... + def __add__(self, other): # -> float: + ... + def __radd__(self, other): # -> float: + ... + def __truediv__(self, other): # -> float: + ... + def __rtruediv__(self, other): # -> float: + ... + def __lt__(self, other) -> bool: ... + def __le__(self, other) -> bool: ... + def __gt__(self, other) -> bool: ... + def __ge__(self, other) -> bool: ... + @property + def completions(self): # -> None: + ... + +class Completions: + def __init__(self, list_or_dict, signature=...) -> None: ... + def items(self): # -> dict_items[Any, Any]: + ... + def __getitem__(self, key): # -> Prediction: + ... + def __getattr__(self, name): ... + def __len__(self): # -> int: + ... + def __contains__(self, key): # -> bool: + ... + def __repr__(self): # -> str: + ... + def __str__(self) -> str: ... diff --git a/typings/dspy/primitives/program.pyi b/typings/dspy/primitives/program.pyi new file mode 100644 index 0000000..1c3dd4a --- /dev/null +++ b/typings/dspy/primitives/program.pyi @@ -0,0 +1,63 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Optional +from dspy.primitives.module import BaseModule +from dspy.utils.callback import with_callbacks + +class ProgramMeta(type): + """Metaclass ensuring every ``dspy.Module`` instance is properly initialised.""" + def __call__(cls, *args, **kwargs): ... + +class Module(BaseModule, metaclass=ProgramMeta): + def __init__(self, callbacks=...) -> None: ... + @with_callbacks + def __call__(self, *args, **kwargs): ... + @with_callbacks + async def acall(self, *args, **kwargs): ... + def named_predictors(self): # -> list[tuple[Any, Predict]]: + ... + def predictors(self): # -> list[Predict]: + ... + def set_lm(self, lm): # -> None: + ... + def get_lm(self): # -> LM | None: + ... + def __repr__(self): # -> LiteralString: + ... + def map_named_predictors(self, func): # -> Self: + """Applies a function to all named predictors.""" + ... + + def inspect_history(self, n: int = ...): # -> None: + ... + def batch( + self, + examples, + num_threads: Optional[int] = ..., + max_errors: int = ..., + return_failed_examples: bool = ..., + provide_traceback: Optional[bool] = ..., + disable_progress_bar: bool = ..., + ): # -> tuple[Any, Any, Any] | List[Any]: + """ + Processes a list of dspy.Example instances in parallel using the Parallel module. + + Args: + examples: List of dspy.Example instances to process. + num_threads: Number of threads to use for parallel processing. + max_errors: Maximum number of errors allowed before stopping execution. + return_failed_examples: Whether to return failed examples and exceptions. + provide_traceback: Whether to include traceback information in error logs. + disable_progress_bar: Whether to display the progress bar. + + Returns: + List of results, and optionally failed examples and exceptions. + """ + ... + +def set_attribute_by_name(obj, name, value): # -> None: + ... + +Program = Module diff --git a/typings/dspy/primitives/python_interpreter.pyi b/typings/dspy/primitives/python_interpreter.pyi new file mode 100644 index 0000000..23ff130 --- /dev/null +++ b/typings/dspy/primitives/python_interpreter.pyi @@ -0,0 +1,36 @@ +""" +This type stub file was generated by pyright. +""" + +from types import TracebackType +from typing import Any, Dict, List, Optional + +class InterpreterError(RuntimeError): ... + +class PythonInterpreter: + r""" + PythonInterpreter that runs code in a sandboxed environment using Deno and Pyodide. + + Prerequisites: + - Deno (https://docs.deno.com/runtime/getting_started/installation/). + + Example Usage: + ```python + code_string = "print('Hello'); 1 + 2" + with PythonInterpreter() as interp: + output = interp(code_string) # If final statement is non-None, prints the numeric result, else prints captured output + ``` + """ + def __init__(self, deno_command: Optional[List[str]] = ...) -> None: ... + def execute(self, code: str, variables: Optional[Dict[str, Any]] = ...) -> Any: ... + def __enter__(self): # -> Self: + ... + def __exit__( + self, + _exc_type: Optional[type[BaseException]], + _exc_val: Optional[BaseException], + _exc_tb: Optional[TracebackType], + ): # -> None: + ... + def __call__(self, code: str, variables: Optional[Dict[str, Any]] = ...) -> Any: ... + def shutdown(self) -> None: ... diff --git a/typings/dspy/propose/__init__.pyi b/typings/dspy/propose/__init__.pyi new file mode 100644 index 0000000..45e8d87 --- /dev/null +++ b/typings/dspy/propose/__init__.pyi @@ -0,0 +1,7 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.propose.grounded_proposer import GroundedProposer + +__all__ = ["GroundedProposer"] diff --git a/typings/dspy/propose/dataset_summary_generator.pyi b/typings/dspy/propose/dataset_summary_generator.pyi new file mode 100644 index 0000000..e983e02 --- /dev/null +++ b/typings/dspy/propose/dataset_summary_generator.pyi @@ -0,0 +1,39 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy + +class ObservationSummarizer(dspy.Signature): + """Given a series of observations I have made about my dataset, please summarize them into a brief 2-3 sentence summary which highlights only the most important details.""" + + observations = ... + summary = ... + +class DatasetDescriptor(dspy.Signature): + ( + """Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """ + """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """ + """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative""" + ) + + examples = ... + observations = ... + +class DatasetDescriptorWithPriorObservations(dspy.Signature): + ( + """Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """ + """I will also provide you with a few observations I have already made. Please add your own observations or if you feel the observations are comprehensive say 'COMPLETE' """ + """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """ + """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative""" + ) + + examples = ... + prior_observations = ... + observations = ... + +def order_input_keys_in_string(unordered_repr): # -> str: + ... +def create_dataset_summary( + trainset, view_data_batch_size, prompt_model, log_file=..., verbose=... +): ... diff --git a/typings/dspy/propose/grounded_proposer.pyi b/typings/dspy/propose/grounded_proposer.pyi new file mode 100644 index 0000000..af0b8ef --- /dev/null +++ b/typings/dspy/propose/grounded_proposer.pyi @@ -0,0 +1,91 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from dspy.propose.propose_base import Proposer + +MAX_INSTRUCT_IN_HISTORY = ... +TIPS = ... + +class DescribeProgram(dspy.Signature): + """Below is some pseudo-code for a pipeline that solves tasks with calls to language models. Please describe what type of task this program appears to be designed to solve, and how it appears to work.""" + + program_code = ... + program_example = ... + program_description = ... + +class DescribeModule(dspy.Signature): + """Below is some pseudo-code for a pipeline that solves tasks with calls to language models. Please describe the purpose of one of the specified module in this pipeline.""" + + program_code = ... + program_example = ... + program_description = ... + module = ... + module_description = ... + +def generate_instruction_class( + use_dataset_summary=..., + program_aware=..., + use_task_demos=..., + use_instruct_history=..., + use_tip=..., +): # -> Predict: + class GenerateSingleModuleInstruction(dspy.Signature): + """Use the information below to learn about a task that we are trying to solve using calls to an LM, then generate a new instruction that will be used to prompt a Language Model to better solve the task.""" + + ... + +class GenerateModuleInstruction(dspy.Module): + def __init__( + self, + program_code_string=..., + use_dataset_summary=..., + program_aware=..., + use_task_demos=..., + use_instruct_history=..., + use_tip=..., + verbose=..., + ) -> None: ... + def forward( + self, + demo_candidates, + pred_i, + demo_set_i, + program, + previous_instructions, + data_summary, + num_demos_in_context=..., + tip=..., + ): # -> Prediction: + ... + +class GroundedProposer(Proposer): + def __init__( + self, + prompt_model, + program, + trainset, + view_data_batch_size=..., + use_dataset_summary=..., + program_aware=..., + use_task_demos=..., + num_demos_in_context=..., + use_instruct_history=..., + use_tip=..., + set_tip_randomly=..., + set_history_randomly=..., + verbose=..., + rng=..., + ) -> None: ... + def propose_instructions_for_program( + self, trainset, program, demo_candidates, trial_logs, N, T + ) -> list[str]: + """This method is responsible for returning the full set of new instructions for our program, given the specified criteria.""" + ... + + def propose_instruction_for_predictor( + self, program, predictor, pred_i, T, demo_candidates, demo_set_i, trial_logs, tip=... + ) -> str: + """This method is responsible for returning a single instruction for a given predictor, using the specified criteria.""" + ... diff --git a/typings/dspy/propose/propose_base.pyi b/typings/dspy/propose/propose_base.pyi new file mode 100644 index 0000000..1a92cbe --- /dev/null +++ b/typings/dspy/propose/propose_base.pyi @@ -0,0 +1,13 @@ +""" +This type stub file was generated by pyright. +""" + +from abc import ABC, abstractmethod + +class Proposer(ABC): + def __init__(self) -> None: ... + @abstractmethod + def propose_instructions_for_program(self): # -> None: + ... + def propose_instruction_for_predictor(self): # -> None: + ... diff --git a/typings/dspy/propose/utils.pyi b/typings/dspy/propose/utils.pyi new file mode 100644 index 0000000..a90a853 --- /dev/null +++ b/typings/dspy/propose/utils.pyi @@ -0,0 +1,20 @@ +""" +This type stub file was generated by pyright. +""" + +def strip_prefix(text): # -> str: + ... +def create_instruction_set_history_string(base_program, trial_logs, top_n): # -> str: + ... +def parse_list_of_instructions(instruction_string): # -> Any | list[Any]: + ... +def get_program_instruction_set_string(program): # -> LiteralString: + ... +def create_predictor_level_history_string( + base_program, predictor_i, trial_logs, top_n +): # -> Literal['']: + ... +def create_example_string(fields, example): # -> LiteralString: + ... +def get_dspy_source_code(module): # -> str: + ... diff --git a/typings/dspy/retrieve/__init__.pyi b/typings/dspy/retrieve/__init__.pyi new file mode 100644 index 0000000..b68692c --- /dev/null +++ b/typings/dspy/retrieve/__init__.pyi @@ -0,0 +1,7 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.retrieve.retrieve import Retrieve + +__all__ = ["Retrieve"] diff --git a/typings/dspy/retrieve/azureaisearch_rm.pyi b/typings/dspy/retrieve/azureaisearch_rm.pyi new file mode 100644 index 0000000..19516fa --- /dev/null +++ b/typings/dspy/retrieve/azureaisearch_rm.pyi @@ -0,0 +1,231 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +import openai +from typing import Any, Callable, List, Optional, Union +from azure.search.documents import SearchClient +from azure.search.documents._paging import SearchItemPaged +from azure.search.documents.models import QueryType, VectorFilterMode + +""" +Retriever module for Azure AI Search +Author: Prajapati Harishkumar Kishorkumar (@HARISHKUMAR1112001) +""" + +class AzureAISearchRM(dspy.Retrieve): + """ + A retrieval module that utilizes Azure AI Search to retrieve top passages for a given query. + + Args: + search_service_name (str): The name of the Azure AI Search service. + search_api_key (str): The API key for accessing the Azure AI Search service. + search_index_name (str): The name of the search index in the Azure AI Search service. + field_text (str): The name of the field containing text content in the search index. This field will be mapped to the "content" field in the dsp framework. + field_vector (Optional[str]): The name of the field containing vector content in the search index. Defaults to None. + k (int, optional): The default number of top passages to retrieve. Defaults to 3. + azure_openai_client (Optional[openai.AzureOpenAI]): An instance of the AzureOpenAI client. Either openai_client or embedding_func must be provided. Defaults to None. + openai_embed_model (Optional[str]): The name of the OpenAI embedding model. Defaults to "text-embedding-ada-002". + embedding_func (Optional[Callable]): A function for generating embeddings. Either openai_client or embedding_func must be provided. Defaults to None. + semantic_ranker (bool, optional): Whether to use semantic ranking. Defaults to False. + filter (str, optional): Additional filter query. Defaults to None. + query_language (str, optional): The language of the query. Defaults to "en-Us". + query_speller (str, optional): The speller mode. Defaults to "lexicon". + use_semantic_captions (bool, optional): Whether to use semantic captions. Defaults to False. + query_type (Optional[QueryType], optional): The type of query. Defaults to QueryType.FULL. + semantic_configuration_name (str, optional): The name of the semantic configuration. Defaults to None. + is_vector_search (Optional[bool]): Whether to enable vector search. Defaults to False. + is_hybrid_search (Optional[bool]): Whether to enable hybrid search. Defaults to False. + is_fulltext_search (Optional[bool]): Whether to enable fulltext search. Defaults to True. + vector_filter_mode (Optional[VectorFilterMode]): The vector filter mode. Defaults to None. + + Examples: + Below is a code snippet that demonstrates how to instantiate and use the AzureAISearchRM class: + ```python + search_service_name = "your_search_service_name" + search_api_key = "your_search_api_key" + search_index_name = "your_search_index_name" + field_text = "text_content_field" + + azure_search_retriever = AzureAISearchRM(search_service_name, search_api_key, search_index_name, field_text) + ``` + + Attributes: + search_service_name (str): The name of the Azure AI Search service. + search_api_key (str): The API key for accessing the Azure AI Search service. + search_index_name (str): The name of the search index in the Azure AI Search service. + endpoint (str): The endpoint URL for the Azure AI Search service. + field_text (str): The name of the field containing text content in the search index. + field_vector (Optional[str]): The name of the field containing vector content in the search index. + azure_openai_client (Optional[openai.AzureOpenAI]): An instance of the AzureOpenAI client. + openai_embed_model (Optional[str]): The name of the OpenAI embedding model. + embedding_func (Optional[Callable]): A function for generating embeddings. + credential (AzureKeyCredential): The Azure key credential for accessing the service. + client (SearchClient): The Azure AI Search client instance. + semantic_ranker (bool): Whether to use semantic ranking. + filter (str): Additional filter query. + query_language (str): The language of the query. + query_speller (str): The speller mode. + use_semantic_captions (bool): Whether to use semantic captions. + query_type (Optional[QueryType]): The type of query. + semantic_configuration_name (str): The name of the semantic configuration. + is_vector_search (Optional[bool]): Whether to enable vector search. + is_hybrid_search (Optional[bool]): Whether to enable hybrid search. + is_fulltext_search (Optional[bool]): Whether to enable fulltext search. + vector_filter_mode (Optional[VectorFilterMode]): The vector filter mode. + + Methods: + forward(query_or_queries: Union[str, List[str]], k: Optional[int]) -> dspy.Prediction: + Search for the top passages corresponding to the given query or queries. + + azure_search_request( + self, + key_content: str, + client: SearchClient, + query: str, + top: int, + semantic_ranker: bool, + filter: str, + query_language: str, + query_speller: str, + use_semantic_captions: bool, + query_type: QueryType, + semantic_configuration_name: str, + is_vector_search: bool, + is_hybrid_search: bool, + is_fulltext_search: bool, + field_vector: str, + vector_filter_mode: VectorFilterMode + ) -> List[dict]: + Perform a search request to the Azure AI Search service. + + process_azure_result( + self, + results:SearchItemPaged, + content_key:str, + content_score: str + ) -> List[dict]: + Process the results received from the Azure AI Search service and map them to the correct format. + + get_embeddings( + self, + query: str, + k_nearest_neighbors: int, + field_vector: str + ) -> List | Any: + Returns embeddings for the given query. + + check_semantic_configuration( + self, + semantic_configuration_name, + query_type + ): + Checks semantic configuration. + + Raises: + ImportError: If the required Azure AI Search libraries are not installed. + + Note: + This class relies on the 'azure-search-documents' library for interacting with the Azure AI Search service. + Ensure that you have the necessary permissions and correct configurations set up in Azure before using this class. + """ + def __init__( + self, + search_service_name: str, + search_api_key: str, + search_index_name: str, + field_text: str, + field_vector: Optional[str] = ..., + k: int = ..., + azure_openai_client: Optional[openai.AzureOpenAI] = ..., + openai_embed_model: Optional[str] = ..., + embedding_func: Optional[Callable] = ..., + semantic_ranker: bool = ..., + filter: str = ..., + query_language: str = ..., + query_speller: str = ..., + use_semantic_captions: bool = ..., + query_type: Optional[QueryType] = ..., + semantic_configuration_name: str = ..., + is_vector_search: Optional[bool] = ..., + is_hybrid_search: Optional[bool] = ..., + is_fulltext_search: Optional[bool] = ..., + vector_filter_mode: Optional[VectorFilterMode.PRE_FILTER] = ..., + ) -> None: ... + def azure_search_request( + self, + key_content: str, + client: SearchClient, + query: str, + top: int, + semantic_ranker: bool, + filter: str, + query_language: str, + query_speller: str, + use_semantic_captions: bool, + query_type: QueryType, + semantic_configuration_name: str, + is_vector_search: bool, + is_hybrid_search: bool, + is_fulltext_search: bool, + field_vector: str, + vector_filter_mode: VectorFilterMode, + ): # -> list[Any]: + """ + Search in Azure AI Search Index + """ + ... + + def process_azure_result( + self, results: SearchItemPaged, content_key: str, content_score: str + ): # -> list[Any]: + """ + process received result from Azure AI Search as dictionary array and map content and score to correct format + """ + ... + + def forward(self, query_or_queries: Union[str, List[str]], k: Optional[int]) -> dspy.Prediction: + """ + Search with pinecone for self.k top passages for query + + Args: + query_or_queries (Union[str, List[str]]): The query or queries to search for. + + Returns: + dspy.Prediction: An object containing the retrieved passages. + """ + ... + + def get_embeddings(self, query: str, k_nearest_neighbors: int, field_vector: str) -> List | Any: + """ + Returns embeddings for the given query. + + Args: + query (str): The query for which embeddings are to be retrieved. + k_nearest_neighbors (int): The number of nearest neighbors to consider. + field_vector (str): The field vector to use for embeddings. + + Returns: + list: A list containing the vectorized query. + Any: The result of embedding_func if azure_openai_client is not provided. + + Raises: + AssertionError: If neither azure_openai_client nor embedding_func is provided, + or if field_vector is not provided. + """ + ... + + def check_semantic_configuration(self, semantic_configuration_name, query_type): # -> None: + """ + Checks semantic configuration. + + Args: + semantic_configuration_name: The name of the semantic configuration. + query_type: The type of the query. + + Raises: + AssertionError: If semantic_configuration_name is not provided + or if query_type is not QueryType.SEMANTIC. + """ + ... diff --git a/typings/dspy/retrieve/chromadb_rm.pyi b/typings/dspy/retrieve/chromadb_rm.pyi new file mode 100644 index 0000000..b628a41 --- /dev/null +++ b/typings/dspy/retrieve/chromadb_rm.pyi @@ -0,0 +1,79 @@ +""" +This type stub file was generated by pyright. +""" + +import chromadb +from typing import List, Optional, Union +from dspy import Prediction, Retrieve +from chromadb.api.types import Embeddable, EmbeddingFunction + +""" +Retriever model for chromadb +""" +ERRORS = ... + +class ChromadbRM(Retrieve): + """ + A retrieval module that uses chromadb to return the top passages for a given query. + + Assumes that the chromadb index has been created and populated with the following metadata: + - documents: The text of the passage + + Args: + collection_name (str): chromadb collection name + persist_directory (str): chromadb persist directory + embedding_function (Optional[EmbeddingFunction[Embeddable]]): Optional function to use to embed documents. Defaults to DefaultEmbeddingFunction. + k (int, optional): The number of top passages to retrieve. Defaults to 7. + client(Optional[chromadb.Client]): Optional chromadb client provided by user, default to None + + Returns: + dspy.Prediction: An object containing the retrieved passages. + + Examples: + Below is a code snippet that shows how to use this as the default retriever: + ```python + llm = dspy.OpenAI(model="gpt-3.5-turbo") + # using default chromadb client + retriever_model = ChromadbRM('collection_name', 'db_path') + dspy.settings.configure(lm=llm, rm=retriever_model) + # to test the retriever with "my query" + retriever_model("my query") + ``` + + Use provided chromadb client + ```python + import chromadb + llm = dspy.OpenAI(model="gpt-3.5-turbo") + # say you have a chromadb running on a different port + client = chromadb.HttpClient(host='localhost', port=8889) + retriever_model = ChromadbRM('collection_name', 'db_path', client=client) + dspy.settings.configure(lm=llm, rm=retriever_model) + # to test the retriever with "my query" + retriever_model("my query") + ``` + + Below is a code snippet that shows how to use this in the forward() function of a module + ```python + self.retrieve = ChromadbRM('collection_name', 'db_path', k=num_passages) + ``` + """ + def __init__( + self, + collection_name: str, + persist_directory: str, + embedding_function: Optional[EmbeddingFunction[Embeddable]] = ..., + client: Optional[chromadb.Client] = ..., + k: int = ..., + ) -> None: ... + def forward( + self, query_or_queries: Union[str, List[str]], k: Optional[int] = ..., **kwargs + ) -> Prediction: + """Search with db for self.k top passages for query + + Args: + query_or_queries (Union[str, List[str]]): The query or queries to search for. + + Returns: + dspy.Prediction: An object containing the retrieved passages. + """ + ... diff --git a/typings/dspy/retrieve/clarifai_rm.pyi b/typings/dspy/retrieve/clarifai_rm.pyi new file mode 100644 index 0000000..68f7dec --- /dev/null +++ b/typings/dspy/retrieve/clarifai_rm.pyi @@ -0,0 +1,53 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from typing import List, Optional, Union + +"""Clarifai as retriver to retrieve hits""" + +class ClarifaiRM(dspy.Retrieve): + """ + Retrieval module uses clarifai to return the Top K relevant pasages for the given query. + Assuming that you have ingested the source documents into clarifai App, where it is indexed and stored. + + Args: + clarifai_user_id (str): Clarifai unique user_id. + clarfiai_app_id (str): Clarifai App ID, where the documents are stored. + clarifai_pat (str): Clarifai PAT key. + k (int): Top K documents to retrieve. + + Examples: + TODO + """ + def __init__( + self, + clarifai_user_id: str, + clarfiai_app_id: str, + clarifai_pat: Optional[str] = ..., + k: int = ..., + ) -> None: ... + def retrieve_hits(self, hits): # -> str: + ... + def forward( + self, query_or_queries: Union[str, List[str]], k: Optional[int] = ..., **kwargs + ) -> dspy.Prediction: + """Uses clarifai-python SDK search function and retrieves top_k similar passages for given query, + Args: + query_or_queries : single query or list of queries + k : Top K relevant documents to return + + Returns: + passages in format of dotdict + + Examples: + Below is a code snippet that shows how to use Marqo as the default retriver: + ```python + import clarifai + llm = dspy.Clarifai(model=MODEL_URL, api_key="YOUR CLARIFAI_PAT") + retriever_model = ClarifaiRM(clarifai_user_id="USER_ID", clarfiai_app_id="APP_ID", clarifai_pat="YOUR CLARIFAI_PAT") + dspy.settings.configure(lm=llm, rm=retriever_model) + ``` + """ + ... diff --git a/typings/dspy/retrieve/databricks_rm.pyi b/typings/dspy/retrieve/databricks_rm.pyi new file mode 100644 index 0000000..f0492ef --- /dev/null +++ b/typings/dspy/retrieve/databricks_rm.pyi @@ -0,0 +1,149 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Union +from dspy.primitives.prediction import Prediction + +_databricks_sdk_installed = ... + +@dataclass +class Document: + page_content: str + metadata: Dict[str, Any] + type: str + def to_dict(self) -> Dict[str, Any]: ... + +class DatabricksRM(dspy.Retrieve): + """ + A retriever module that uses a Databricks Mosaic AI Vector Search Index to return the top-k + embeddings for a given query. + + Examples: + Below is a code snippet that shows how to set up a Databricks Vector Search Index + and configure a DatabricksRM DSPy retriever module to query the index. + + (example adapted from "Databricks: How to create and query a Vector Search Index: + https://docs.databricks.com/en/generative-ai/create-query-vector-search.html#create-a-vector-search-index) + + ```python + from databricks.vector_search.client import VectorSearchClient + + # Create a Databricks Vector Search Endpoint + client = VectorSearchClient() + client.create_endpoint( + name="your_vector_search_endpoint_name", + endpoint_type="STANDARD" + ) + + # Create a Databricks Direct Access Vector Search Index + index = client.create_direct_access_index( + endpoint_name="your_vector_search_endpoint_name", + index_name="your_index_name", + primary_key="id", + embedding_dimension=1024, + embedding_vector_column="text_vector", + schema={ + "id": "int", + "field2": "str", + "field3": "float", + "text_vector": "array" + } + ) + + # Create a DatabricksRM retriever module to query the Databricks Direct Access Vector + # Search Index + retriever = DatabricksRM( + databricks_index_name = "your_index_name", + docs_id_column_name="id", + text_column_name="field2", + k=3 + ) + ``` + + Below is a code snippet that shows how to query the Databricks Direct Access Vector + Search Index using the DatabricksRM retriever module: + + ```python + retrieved_results = DatabricksRM(query="Example query text")) + ``` + """ + def __init__( + self, + databricks_index_name: str, + databricks_endpoint: Optional[str] = ..., + databricks_token: Optional[str] = ..., + databricks_client_id: Optional[str] = ..., + databricks_client_secret: Optional[str] = ..., + columns: Optional[List[str]] = ..., + filters_json: Optional[str] = ..., + k: int = ..., + docs_id_column_name: str = ..., + docs_uri_column_name: Optional[str] = ..., + text_column_name: str = ..., + use_with_databricks_agent_framework: bool = ..., + ) -> None: + """ + Args: + databricks_index_name (str): The name of the Databricks Vector Search Index to query. + databricks_endpoint (Optional[str]): The URL of the Databricks Workspace containing + the Vector Search Index. Defaults to the value of the ``DATABRICKS_HOST`` + environment variable. If unspecified, the Databricks SDK is used to identify the + endpoint based on the current environment. + databricks_token (Optional[str]): The Databricks Workspace authentication token to use + when querying the Vector Search Index. Defaults to the value of the + ``DATABRICKS_TOKEN`` environment variable. If unspecified, the Databricks SDK is + used to identify the token based on the current environment. + databricks_client_id (str): Databricks service principal id. If not specified, + the token is resolved from the current environment (DATABRICKS_CLIENT_ID). + databricks_client_secret (str): Databricks service principal secret. If not specified, + the endpoint is resolved from the current environment (DATABRICKS_CLIENT_SECRET). + columns (Optional[List[str]]): Extra column names to include in response, + in addition to the document id and text columns specified by + ``docs_id_column_name`` and ``text_column_name``. + filters_json (Optional[str]): A JSON string specifying additional query filters. + Example filters: ``{"id <": 5}`` selects records that have an ``id`` column value + less than 5, and ``{"id >=": 5, "id <": 10}`` selects records that have an ``id`` + column value greater than or equal to 5 and less than 10. + k (int): The number of documents to retrieve. + docs_id_column_name (str): The name of the column in the Databricks Vector Search Index + containing document IDs. + docs_uri_column_name (Optional[str]): The name of the column in the Databricks Vector Search Index + containing document URI. + text_column_name (str): The name of the column in the Databricks Vector Search Index + containing document text to retrieve. + use_with_databricks_agent_framework (bool): Whether to use the `DatabricksRM` in a way that is + compatible with the Databricks Mosaic Agent Framework. + """ + ... + + def forward( + self, + query: Union[str, List[float]], + query_type: str = ..., + filters_json: Optional[str] = ..., + ) -> Union[dspy.Prediction, List[Dict[str, Any]]]: + """ + Retrieve documents from a Databricks Mosaic AI Vector Search Index that are relevant to the + specified query. + + Args: + query (Union[str, List[float]]): The query text or numeric query vector for which to + retrieve relevant documents. + query_type (str): The type of search query to perform against the Databricks Vector + Search Index. Must be either 'ANN' (approximate nearest neighbor) or 'HYBRID' + (hybrid search). + filters_json (Optional[str]): A JSON string specifying additional query filters. + Example filters: ``{"id <": 5}`` selects records that have an ``id`` column value + less than 5, and ``{"id >=": 5, "id <": 10}`` selects records that have an ``id`` + column value greater than or equal to 5 and less than 10. If specified, this + parameter overrides the `filters_json` parameter passed to the constructor. + + Returns: + A list of dictionaries when ``use_with_databricks_agent_framework`` is ``True``, + or a ``dspy.Prediction`` object when ``use_with_databricks_agent_framework`` is + ``False``. + """ + ... diff --git a/typings/dspy/retrieve/deeplake_rm.pyi b/typings/dspy/retrieve/deeplake_rm.pyi new file mode 100644 index 0000000..be840ae --- /dev/null +++ b/typings/dspy/retrieve/deeplake_rm.pyi @@ -0,0 +1,55 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from typing import List, Optional, Union + +""" +Retriever model for deeplake +""" +ERRORS = ... + +class DeeplakeRM(dspy.Retrieve): + """ + A retriever module that uses deeplake to return the top passages for a given query. + + Assumes that a Deep Lake Vector Store has been created and populated with the following payload: + - text: The text of the passage + + Args: + deeplake_vectorstore_name (str): The name or path of the Deep Lake Vector Store. + deeplake_client (VectorStore): An instance of the Deep Lake client. + k (int, optional): The default number of top passages to retrieve. Defaults to 3. + + Examples: + Below is a code snippet that shows how to use Deep Lake as the default retriver: + ```python + from deeplake import VectorStore + llm = dspy.OpenAI(model="gpt-3.5-turbo") + deeplake_client = VectorStore + retriever_model = DeeplakeRM("my_vectorstore_path", deeplake_client=deeplake_client) + dspy.settings.configure(lm=llm, rm=retriever_model) + ``` + + Below is a code snippet that shows how to use Deep Lake in the forward() function of a module + ```python + self.retrieve = DeeplakeRM("my_vectorstore_path", deeplake_client=deeplake_client, k=num_passages) + ``` + """ + def __init__(self, deeplake_vectorstore_name: str, deeplake_client, k: int = ...) -> None: ... + def embedding_function(self, texts, model=...): # -> list[List[float]]: + ... + def forward( + self, query_or_queries: Union[str, List[str]], k: Optional[int], **kwargs + ) -> dspy.Prediction: + """Search with DeepLake for self.k top passages for query + + Args: + query_or_queries (Union[str, List[str]]): The query or queries to search for. + k (Optional[int]): The number of top passages to retrieve. Defaults to self.k. + + Returns: + dspy.Prediction: An object containing the retrieved passages. + """ + ... diff --git a/typings/dspy/retrieve/epsilla_rm.pyi b/typings/dspy/retrieve/epsilla_rm.pyi new file mode 100644 index 0000000..7cec062 --- /dev/null +++ b/typings/dspy/retrieve/epsilla_rm.pyi @@ -0,0 +1,21 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from typing import List, Union +from pyepsilla import vectordb + +class EpsillaRM(dspy.Retrieve): + def __init__( + self, + epsilla_client: vectordb.Client, + db_name: str, + db_path: str, + table_name: str, + k: int = ..., + page_content: str = ..., + ) -> None: ... + def forward( + self, query_or_queries: Union[str, List[str]], k: Union[int, None] = ..., **kwargs + ) -> dspy.Prediction: ... diff --git a/typings/dspy/retrieve/faiss_rm.pyi b/typings/dspy/retrieve/faiss_rm.pyi new file mode 100644 index 0000000..cea7ef9 --- /dev/null +++ b/typings/dspy/retrieve/faiss_rm.pyi @@ -0,0 +1,3 @@ +""" +This type stub file was generated by pyright. +""" diff --git a/typings/dspy/retrieve/falkordb_rm.pyi b/typings/dspy/retrieve/falkordb_rm.pyi new file mode 100644 index 0000000..b7eecba --- /dev/null +++ b/typings/dspy/retrieve/falkordb_rm.pyi @@ -0,0 +1,87 @@ +""" +This type stub file was generated by pyright. +""" + +import backoff +from typing import List, Optional, Union +from openai import APITimeoutError, InternalServerError, RateLimitError, UnprocessableEntityError +from dspy import Prediction, Retrieve +from dspy.dsp.utils.settings import settings + +def generate_random_string(length: int) -> str: ... + +class Embedder: + def __init__(self, provider: str, model: str) -> None: ... + @backoff.on_exception( + backoff.expo, + (APITimeoutError, InternalServerError, RateLimitError, UnprocessableEntityError), + max_time=settings.backoff_time, + ) + def __call__(self, queries: Union[str, List[str]]) -> List[List[float]]: ... + +DEFAULT_INDEX_QUERY = ... + +class FalkordbRM(Retrieve): + """ + Implements a retriever that utilizes FalkorDB for retrieving passages. + This class manages a connection to a FalkorDB database using official FalkorDB Python drivers and requires + the database credentials. That is, if using a local FalkorDB session, host and port else if using a FalkorDB cloud session, + host, port, username, and password to be set as environment variables and optionally the database name. + Additionally, it utilizes an embedding provider (defaulting to OpenAI's services) to compute query embeddings, + which are then used to find the most relevant nodes in the FalkorDB graph based on the specified node property or custom retrieval query. + + Returns a list of passages in the form of `dspy.Prediction` objects + + Args: + Args: + node_label (str): The label of the node in the FalkorDB database to query against + text_node_property (str): The property of the node containing the text. + embedding_node_property (List[float]): The property of the node containing the embeddings. + k (Optional[int]): The default number of top passages to retrieve. Defaults to 5. + retrieval_query (Optional[str]): Custom Cypher query for retrieving passages. + embedding_provider (str): The provider of the embedding service. Defaults to "openai". + embedding_model (str): The model identifier for generating embeddings. Defaults to "text-embedding-ada-002". + + Examples: + Below is a code snippet showcasing how to initialize FalkordbRM with environment variables for the database connection and OpenAI as the embedding provider: + + ```python + import os + + import dspy + import openai + + os.environ["FALKORDB_HOST"] = "localhost" + os.environ["FALORDB_PORT"] = "6379" + os.environ["OPENAI_API_KEY"] = "sk-" (Only if using openai as embedding's provider) + + # Uncomment and set the following if you are using FalkorDB cloud + # os.environ["FALKORDB_USERNAME"] = "falkordb" + # os.environ["FALKORDB_PASSWORD"] = "password" + + + falkordb_retriever = FalkordbRM( + node_label="myIndex", + text_node_property="text", + k=10, + embedding_provider="openai", + embedding_model="text-embedding-ada-002", + ) + + dspy.settings.configure(rm=falkordb_retriever) + ``` + + In this example, `FalkordbRM` is configured to retrieve nodes based on the "text" property from an index on a node labeled "myIndex", + using embeddings computed by OpenAI's "text-embedding-ada-002" model. + """ + def __init__( + self, + node_label: str, + text_node_property: str = ..., + embedding_node_property: str = ..., + k: int = ..., + retrieval_query: Optional[str] = ..., + embedding_provider: str = ..., + embedding_model: str = ..., + ) -> None: ... + def forward(self, query_or_queries: Union[str, List[str]], k: Optional[int]) -> Prediction: ... diff --git a/typings/dspy/retrieve/lancedb_rm.pyi b/typings/dspy/retrieve/lancedb_rm.pyi new file mode 100644 index 0000000..152899c --- /dev/null +++ b/typings/dspy/retrieve/lancedb_rm.pyi @@ -0,0 +1,58 @@ +""" +This type stub file was generated by pyright. +""" + +import backoff +import lancedb +from typing import List, Union +from dspy import Prediction, Retrieve +from dspy.dsp.utils.settings import settings + +""" +Retriever model for LanceDB +Author: Prashant Dixit (@PrashantDixit0) +""" +if lancedb is None: ... +OPENAI_LEGACY = ... +ERRORS = ... + +class LancedbRM(Retrieve): + """ + A retrieval module that uses LanceDB to return the top passages for a given query. + + Assumes that the LanceDB table has been created and populated with the following metadata: + - text: The text of the passage + + Args: + table_name (str): The name of the table to query against. + persist_directory (str): directory where database is stored. + k (int, optional): The number of top passages to retrieve. Defaults to 3. + + Returns: + dspy.Prediction: An object containing the retrieved passages. + + Examples: + Below is a code snippet that shows how to use this as the default retriever: + ```python + llm = dspy.OpenAI(model="gpt-3.5-turbo") + retriever_model = LancedbRM() + dspy.settings.configure(lm=llm, rm=retriever_model) + ``` + + Below is a code snippet that shows how to use this in the forward() function of a module + ```python + self.retrieve = LancedbRM(k=num_passages) + ``` + """ + def __init__(self, table_name: str, persist_directory: str, k: int = ...) -> None: ... + @backoff.on_exception(backoff.expo, ERRORS, max_time=settings.backoff_time) + def forward(self, query_or_queries: Union[str, List[str]]) -> Prediction: + """Search with Lancedb for self.k top passages for query + + Args: + query_or_queries (Union[str, List[str]]): The query or queries to search for. + + Returns: + dspy.Prediction: An object containing the retrieved passages. + """ + ... diff --git a/typings/dspy/retrieve/llama_index_rm.pyi b/typings/dspy/retrieve/llama_index_rm.pyi new file mode 100644 index 0000000..af2715e --- /dev/null +++ b/typings/dspy/retrieve/llama_index_rm.pyi @@ -0,0 +1,58 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from typing import Optional +from llama_index.core.base.base_retriever import BaseRetriever + +NO_TOP_K_WARNING = ... + +class LlamaIndexRM(dspy.Retrieve): + """Implements a retriever which wraps over a LlamaIndex retriever. + + This is done to bridge LlamaIndex and DSPy and allow the various retrieval + abstractions in LlamaIndex to be used in DSPy. + + To-do (maybe): + - Async support (DSPy lacks this entirely it seems, so not a priority until the rest of the repo catches on) + - Text/video retrieval (Available in LI, not sure if this will be a priority in DSPy) + + Args: + retriever (BaseRetriever): A LlamaIndex retriever object - text based only + k (int): Optional; the number of examples to retrieve (similarity_top_k) + + If the underlying LI retriever does not have the property similarity_top_k, k will be ignored. + + Returns: + DSPy RM Object - this is a retriever object that can be used in DSPy + """ + + retriever: BaseRetriever + def __init__(self, retriever: BaseRetriever, k: Optional[int] = ...) -> None: ... + @property + def k(self) -> Optional[int]: + """Get similarity top k of retriever.""" + ... + + @k.setter + def k(self, k: int) -> None: + """Set similarity top k of retriever.""" + ... + + def forward(self, query: str, k: Optional[int] = ...) -> list[dspy.Example]: + """Forward function for the LI retriever. + + This is the function that is called to retrieve the top k examples for a given query. + Top k is set via the setter similarity_top_k or at LI instantiation. + + Args: + query (str): The query to retrieve examples for + k (int): Optional; the number of examples to retrieve (similarity_top_k) + + If the underlying LI retriever does not have the property similarity_top_k, k will be ignored. + + Returns: + List[dspy.Example]: A list of examples retrieved by the retriever + """ + ... diff --git a/typings/dspy/retrieve/marqo_rm.pyi b/typings/dspy/retrieve/marqo_rm.pyi new file mode 100644 index 0000000..2a3f2ec --- /dev/null +++ b/typings/dspy/retrieve/marqo_rm.pyi @@ -0,0 +1,57 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +import marqo +from typing import List, Union + +class MarqoRM(dspy.Retrieve): + """ + A retrieval module that uses Marqo to return the top passages for a given query. + + Assumes that a Marqo index has been created and populated with the following payload: + - document: The text of the passage + + Args: + marqo_index_name (str): The name of the marqo index. + marqo_client (marqo.client.Client): A marqo client instance. + k (int, optional): The number of top passages to retrieve. Defaults to 3. + page_content (str, optional): The name of the field in the marqo index that contains the text of the passage. Defaults to 'document'. + filter_string (str, optional): A filter string to use when searching. Defaults to None. + **kwargs: Additional keyword arguments to pass to the marqo search function. + + Examples: + Below is a code snippet that shows how to use Marqo as the default retriver: + ```python + import marqo + marqo_client = marqo.Client(url="http://0.0.0.0:8882") + + llm = dspy.OpenAI(model="gpt-3.5-turbo") + retriever_model = MarqoRM("my_index_name", marqo_client=marqo_client) + dspy.settings.configure(lm=llm, rm=retriever_model) + ``` + + Below is a code snippet that shows how to use Marqo in the forward() function of a module + ```python + self.retrieve = MarqoRM("my_index_name", marqo_client=marqo_client, k=num_passages) + ``` + """ + def __init__( + self, + marqo_index_name: str, + marqo_client: marqo.client.Client, + k: int = ..., + page_content: str = ..., + filter_string: str = ..., + ) -> None: ... + def forward(self, query_or_queries: Union[str, List[str]], k=..., **kwargs) -> dspy.Prediction: + """Search with Marqo for self.k top passages for query + + Args: + query_or_queries (Union[str, List[str]]): The query or queries to search for. + + Returns: + dspy.Prediction: An object containing the retrieved passages. + """ + ... diff --git a/typings/dspy/retrieve/milvus_rm.pyi b/typings/dspy/retrieve/milvus_rm.pyi new file mode 100644 index 0000000..f7fd667 --- /dev/null +++ b/typings/dspy/retrieve/milvus_rm.pyi @@ -0,0 +1,64 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from typing import Callable, List, Optional, Union + +""" +Retriever model for Milvus or Zilliz Cloud +""" + +def openai_embedding_function(texts: List[str]): # -> list[List[float]]: + ... + +class MilvusRM(dspy.Retrieve): + """ + A retrieval module that uses Milvus to return passages for a given query. + + Assumes that a Milvus collection has been created and populated with the following field: + - text: The text of the passage + + Args: + collection_name (str): The name of the Milvus collection to query against. + uri (str, optional): The Milvus connection uri. Defaults to "http://localhost:19530". + token (str, optional): The Milvus connection token. Defaults to None. + db_name (str, optional): The Milvus database name. Defaults to "default". + embedding_function (callable, optional): The function to convert a list of text to embeddings. + The embedding function should take a list of text strings as input and output a list of embeddings. + Defaults to None. By default, it will get OpenAI client by the environment variable OPENAI_API_KEY + and use OpenAI's embedding model "text-embedding-3-small" with the default dimension. + k (int, optional): The number of top passages to retrieve. Defaults to 3. + + Returns: + dspy.Prediction: An object containing the retrieved passages. + + Examples: + Below is a code snippet that shows how to use this as the default retriever: + ```python + llm = dspy.OpenAI(model="gpt-3.5-turbo") + retriever_model = MilvusRM( + collection_name="", + uri="", + token="" + ) + dspy.settings.configure(lm=llm, rm=retriever_model) + ``` + + Below is a code snippet that shows how to use this in the forward() function of a module + ```python + self.retrieve = MilvusRM(k=num_passages) + ``` + """ + def __init__( + self, + collection_name: str, + uri: Optional[str] = ..., + token: Optional[str] = ..., + db_name: Optional[str] = ..., + embedding_function: Optional[Callable] = ..., + k: int = ..., + ) -> None: ... + def forward( + self, query_or_queries: Union[str, List[str]], k: Optional[int] = ... + ) -> dspy.Prediction: ... diff --git a/typings/dspy/retrieve/mongodb_atlas_rm.pyi b/typings/dspy/retrieve/mongodb_atlas_rm.pyi new file mode 100644 index 0000000..e635f72 --- /dev/null +++ b/typings/dspy/retrieve/mongodb_atlas_rm.pyi @@ -0,0 +1,34 @@ +""" +This type stub file was generated by pyright. +""" + +import backoff +from typing import Any, List +from openai import APITimeoutError, InternalServerError, RateLimitError, UnprocessableEntityError +from dspy.dsp.utils.settings import settings +from dspy import Prediction, Retrieve + +def build_vector_search_pipeline( + index_name: str, query_vector: List[float], num_candidates: int, limit: int +) -> List[dict[str, Any]]: ... + +class Embedder: + def __init__(self, provider: str, model: str) -> None: ... + @backoff.on_exception( + backoff.expo, + (APITimeoutError, InternalServerError, RateLimitError, UnprocessableEntityError), + max_time=settings.backoff_time, + ) + def __call__(self, queries) -> Any: ... + +class MongoDBAtlasRM(Retrieve): + def __init__( + self, + db_name: str, + collection_name: str, + index_name: str, + k: int = ..., + embedding_provider: str = ..., + embedding_model: str = ..., + ) -> None: ... + def forward(self, query_or_queries: str) -> Prediction: ... diff --git a/typings/dspy/retrieve/my_scale_rm.pyi b/typings/dspy/retrieve/my_scale_rm.pyi new file mode 100644 index 0000000..cea7ef9 --- /dev/null +++ b/typings/dspy/retrieve/my_scale_rm.pyi @@ -0,0 +1,3 @@ +""" +This type stub file was generated by pyright. +""" diff --git a/typings/dspy/retrieve/neo4j_rm.pyi b/typings/dspy/retrieve/neo4j_rm.pyi new file mode 100644 index 0000000..0cae2c2 --- /dev/null +++ b/typings/dspy/retrieve/neo4j_rm.pyi @@ -0,0 +1,78 @@ +""" +This type stub file was generated by pyright. +""" + +import backoff +from typing import Any, Callable, List, Optional, Union +from openai import APITimeoutError, InternalServerError, RateLimitError, UnprocessableEntityError +from dspy import Prediction, Retrieve +from dspy.dsp.utils.settings import settings + +class Embedder: + def __init__(self, provider: str, model: str) -> None: ... + @backoff.on_exception( + backoff.expo, + (APITimeoutError, InternalServerError, RateLimitError, UnprocessableEntityError), + max_time=settings.backoff_time, + ) + def __call__(self, queries) -> Any: ... + +DEFAULT_INDEX_QUERY = ... + +class Neo4jRM(Retrieve): + """ + Implements a retriever that utilizes Neo4j for retrieving passages. + This class manages a connection to a Neo4j database using official Neo4j Python drivers and requires + the database credentials (username, password, URI, and optionally the database name) to be set as environment variables. + Additionally, it utilizes an embedding provider (defaulting to OpenAI's services) to compute query embeddings, + which are then used to find the most relevant nodes in the Neo4j graph based on the specified node property or custom retrieval query. + + Returns a list of passages in the form of `dspy.Prediction` objects. + + Args: + index_name (str): The name of the vector index in the Neo4j database to query against. + text_node_property (Optional[str]): The property of the node containing the text. Required if `retrieval_query` is not set. + k (Optional[int]): The default number of top passages to retrieve. Defaults to 5. + retrieval_query (Optional[str]): Custom Cypher query for retrieving passages. Required if `text_node_property` is not set. + embedding_provider (str): The provider of the embedding service. Defaults to "openai". + embedding_model (str): The model identifier for generating embeddings. Defaults to "text-embedding-ada-002". + + Examples: + Below is a code snippet showcasing how to initialize Neo4jRM with environment variables for the database connection and OpenAI as the embedding provider: + + ```python + import os + + import dspy + import openai + + os.environ["NEO4J_URI"] = "bolt://localhost:7687" + os.environ["NEO4J_USERNAME"] = "neo4j" + os.environ["NEO4J_PASSWORD"] = "password" + os.environ["OPENAI_API_KEY"] = "sk-" + + neo4j_retriever = Neo4jRM( + index_name="myIndex", + text_node_property="text", + k=10, + embedding_provider="openai", + embedding_model="text-embedding-ada-002", + ) + + dspy.settings.configure(rm=neo4j_retriever) + ``` + + In this example, `Neo4jRM` is configured to retrieve nodes based on the "text" property from an index named "myIndex", + using embeddings computed by OpenAI's "text-embedding-ada-002" model. + """ + def __init__( + self, + index_name: str, + text_node_property: str = ..., + k: int = ..., + retrieval_query: str = ..., + embedding_provider: str = ..., + embedding_model: str = ..., + embedding_function: Optional[Callable] = ..., + ) -> None: ... + def forward(self, query_or_queries: Union[str, List[str]], k: Optional[int]) -> Prediction: ... diff --git a/typings/dspy/retrieve/pgvector_rm.pyi b/typings/dspy/retrieve/pgvector_rm.pyi new file mode 100644 index 0000000..82bd141 --- /dev/null +++ b/typings/dspy/retrieve/pgvector_rm.pyi @@ -0,0 +1,81 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +import openai +from typing import Callable, Optional + +class PgVectorRM(dspy.Retrieve): + """ + Implements a retriever that (as the name suggests) uses pgvector to retrieve passages, + using a raw SQL query and a postgresql connection managed by psycopg2. + + It needs to register the pgvector extension with the psycopg2 connection + + Returns a list of dspy.Example objects + + Args: + db_url (str): A PostgreSQL database URL in psycopg2's DSN format + pg_table_name (Optional[str]): name of the table containing passages + openai_client (openai.OpenAI): OpenAI client to use for computing query embeddings. Either openai_client or embedding_func must be provided. + embedding_func (Callable): A function to use for computing query embeddings. Either openai_client or embedding_func must be provided. + content_field (str = "text"): Field containing the passage text. Defaults to "text" + k (Optional[int]): Default number of top passages to retrieve. Defaults to 20 + embedding_field (str = "embedding"): Field containing passage embeddings. Defaults to "embedding" + fields (List[str] = ['text']): Fields to retrieve from the table. Defaults to "text" + embedding_model (str = "text-embedding-ada-002"): Field containing the OpenAI embedding model to use. Defaults to "text-embedding-ada-002" + + Examples: + Below is a code snippet that shows how to use PgVector as the default retriever + + ```python + import dspy + import openai + import psycopg2 + + openai.api_key = os.environ.get("OPENAI_API_KEY", None) + openai_client = openai.OpenAI() + + llm = dspy.OpenAI(model="gpt-3.5-turbo") + + DATABASE_URL should be in the format postgresql://user:password@host/database + db_url=os.getenv("DATABASE_URL") + + retriever_model = PgVectorRM(conn, openai_client=openai_client, "paragraphs", fields=["text", "document_id"], k=20) + dspy.settings.configure(lm=llm, rm=retriever_model) + ``` + + Below is a code snippet that shows how to use PgVector in the forward() function of a module + ```python + self.retrieve = PgVectorRM(db_url, openai_client=openai_client, "paragraphs", fields=["text", "document_id"], k=20) + ``` + """ + def __init__( + self, + db_url: str, + pg_table_name: str, + openai_client: Optional[openai.OpenAI] = ..., + embedding_func: Optional[Callable] = ..., + k: int = ..., + embedding_field: str = ..., + fields: Optional[list[str]] = ..., + content_field: str = ..., + embedding_model: str = ..., + include_similarity: bool = ..., + ) -> None: + """ + k = 20 is the number of paragraphs to retrieve + """ + ... + + def forward(self, query: str, k: int = ...): # -> list[Any]: + """Search with PgVector for k top passages for query using cosine similarity + + Args: + query (str): The query to search for + k (int): The number of top passages to retrieve. Defaults to the value set in the constructor. + Returns: + dspy.Prediction: an object containing the retrieved passages. + """ + ... diff --git a/typings/dspy/retrieve/pinecone_rm.pyi b/typings/dspy/retrieve/pinecone_rm.pyi new file mode 100644 index 0000000..6e816dc --- /dev/null +++ b/typings/dspy/retrieve/pinecone_rm.pyi @@ -0,0 +1,70 @@ +""" +This type stub file was generated by pyright. +""" + +import pinecone +from typing import List, Optional, Union +from dspy import Prediction, Retrieve + +""" +Retriever model for Pinecone +Author: Dhar Rawal (@drawal1) +""" +if pinecone is None: ... +OPENAI_LEGACY = ... +ERRORS = ... + +class PineconeRM(Retrieve): + """ + A retrieval module that uses Pinecone to return the top passages for a given query. + + Assumes that the Pinecone index has been created and populated with the following metadata: + - text: The text of the passage + + Args: + pinecone_index_name (str): The name of the Pinecone index to query against. + pinecone_api_key (str, optional): The Pinecone API key. Defaults to None. + pinecone_env (str, optional): The Pinecone environment. Defaults to None. + local_embed_model (str, optional): The local embedding model to use. A popular default is "sentence-transformers/all-mpnet-base-v2". + openai_embed_model (str, optional): The OpenAI embedding model to use. Defaults to "text-embedding-ada-002". + openai_api_key (str, optional): The API key for OpenAI. Defaults to None. + openai_org (str, optional): The organization for OpenAI. Defaults to None. + k (int, optional): The number of top passages to retrieve. Defaults to 3. + + Returns: + dspy.Prediction: An object containing the retrieved passages. + + Examples: + Below is a code snippet that shows how to use this as the default retriever: + ```python + llm = dspy.OpenAI(model="gpt-3.5-turbo") + retriever_model = PineconeRM(openai.api_key) + dspy.settings.configure(lm=llm, rm=retriever_model) + ``` + + Below is a code snippet that shows how to use this in the forward() function of a module + ```python + self.retrieve = PineconeRM(k=num_passages) + ``` + """ + def __init__( + self, + pinecone_index_name: str, + pinecone_api_key: Optional[str] = ..., + pinecone_env: Optional[str] = ..., + local_embed_model: Optional[str] = ..., + openai_embed_model: Optional[str] = ..., + openai_api_key: Optional[str] = ..., + openai_org: Optional[str] = ..., + k: int = ..., + ) -> None: ... + def forward(self, query_or_queries: Union[str, List[str]]) -> Prediction: + """Search with pinecone for self.k top passages for query + + Args: + query_or_queries (Union[str, List[str]]): The query or queries to search for. + + Returns: + dspy.Prediction: An object containing the retrieved passages. + """ + ... diff --git a/typings/dspy/retrieve/qdrant_rm.pyi b/typings/dspy/retrieve/qdrant_rm.pyi new file mode 100644 index 0000000..cea7ef9 --- /dev/null +++ b/typings/dspy/retrieve/qdrant_rm.pyi @@ -0,0 +1,3 @@ +""" +This type stub file was generated by pyright. +""" diff --git a/typings/dspy/retrieve/ragatouille_rm.pyi b/typings/dspy/retrieve/ragatouille_rm.pyi new file mode 100644 index 0000000..9d0b5a9 --- /dev/null +++ b/typings/dspy/retrieve/ragatouille_rm.pyi @@ -0,0 +1,37 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from typing import Optional, Union + +class RAGatouilleRM(dspy.Retrieve): + """A retrieval model that uses RAGatouille library to return the top passages for a given query. + + Assumes that you already have an index created with RAGatouille. + Reference: https://github.com/bclavie/RAGatouille + + Args: + index_root (str): Folder path where you index is stored. + index_name (str): Name of the index you want to retrieve from. + k (int, optional): The default number of passages to retrieve. Defaults to 3. + + Examples: + Below is a code snippet that shows how to use RAGatouille index as the default retriver: + ```python + llm = dspy.OpenAI(model="gpt-3.5-turbo") + rm = RAGatouilleRM(index_root="ragatouille/colbert/indexes", index_name="my_index") + dspy.settings.configure(lm=llm, rm=rm) + ``` + """ + def __init__(self, index_root: str, index_name: str, k: int = ...) -> None: ... + def forward(self, query_or_queries: Union[str, list[str]], k: Optional[int]) -> dspy.Prediction: + """Search with RAGAtouille based index for self.k top passages for query + + Args: + query_or_queries (Union[str, List[str]]): The query or queries to search for. + k (Optional[int]): The number of top passages to retrieve. Defaults to self.k. + Returns: + dspy.Prediction: An object containing the retrieved passages. + """ + ... diff --git a/typings/dspy/retrieve/retrieve.pyi b/typings/dspy/retrieve/retrieve.pyi new file mode 100644 index 0000000..7cefe3d --- /dev/null +++ b/typings/dspy/retrieve/retrieve.pyi @@ -0,0 +1,29 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import List, Optional, Union +from dspy.predict.parameter import Parameter +from dspy.primitives.prediction import Prediction +from dspy.utils.callback import with_callbacks + +def single_query_passage(passages): # -> Prediction: + ... + +class Retrieve(Parameter): + name = ... + input_variable = ... + desc = ... + def __init__(self, k=..., callbacks=...) -> None: ... + def reset(self): # -> None: + ... + def dump_state(self): # -> dict[str, Any]: + ... + def load_state(self, state): # -> None: + ... + @with_callbacks + def __call__(self, *args, **kwargs): # -> List[str] | Prediction | List[Prediction]: + ... + def forward( + self, query: str, k: Optional[int] = ..., **kwargs + ) -> Union[List[str], Prediction, List[Prediction]]: ... diff --git a/typings/dspy/retrieve/snowflake_rm.pyi b/typings/dspy/retrieve/snowflake_rm.pyi new file mode 100644 index 0000000..c475241 --- /dev/null +++ b/typings/dspy/retrieve/snowflake_rm.pyi @@ -0,0 +1,104 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from typing import Any, Optional, Type, Union +from pydantic import BaseModel + +class SnowflakeRM(dspy.Retrieve): + """A retrieval module that uses Snowflake's Cortex Search service to return the top relevant passages for a given query. + + Assumes that a Snowflake Cortex Search endpoint has been configured by the use. + + For more information on configuring the Cortex Search service, visit: https://docs.snowflake.com/en/user-guide/snowflake-cortex/cortex-search/cortex-search-overview + + Args: + snowflake_session (object): Snowflake Snowpark session for accessing the service. + cortex_search_service(str): Name of the Cortex Search service to be used. + snowflake_database (str): The name of the Snowflake table containing document embeddings. + snowflake_schema (str): The name of the Snowflake table containing document embeddings. + auto_filter (bool): Auto generate metadata filter based on user query and push it down prior to retrieving Cortex Search results. + k (int, optional): The default number of top passages to retrieve. Defaults to 3. + """ + def __init__( + self, + snowflake_session: object, + cortex_search_service: str, + snowflake_database: str, + snowflake_schema: str, + auto_filter=..., + k: int = ..., + max_retries=..., + ) -> None: ... + def forward( + self, + query_or_queries: Union[str, list[str]], + retrieval_columns: list[str], + filter: Optional[dict] = ..., + k: Optional[int] = ..., + ) -> dspy.Prediction: + """Query Cortex Search endpoint for top k relevant passages. + Args: + query_or_queries (Union[str, List[str]]): The query or queries to search for. + retrieval_columns (List[str]): Columns to include in response. + filter (Optional[json]):Filter query. + k (Optional[int]): The number of top passages to retrieve. Defaults to self.k. + Returns: + dspy.Prediction: An object containing the retrieved passages. + """ + ... + +class JSONFilter(BaseModel): + answer: str = ... + @classmethod + def model_validate_json( + cls, json_data: str, *, strict: bool | None = ..., context: dict[str, Any] | None = ... + ): # -> Any: + ... + +class GenerateFilter(dspy.Signature): + """ + Given a query, attributes in the data, and example values of each attribute, generate a filter in valid JSON format. + Ensure the filter only uses valid operators: @eq, @contains,@and,@or,@not + Ensure only the valid JSON is output with no other reasoning. + + --- + Query: What was the sentiment of CEOs between 2021 and 2024? + Attributes: industry,hq,date + Sample Values: {"industry":["biotechnology","healthcare","agriculture"],"HQ":["NY, US","CA,US","FL,US"],"date":["01/01,1999","01/01/2024"]} + Answer: {"@or":[{"@eq":{"year":"2021"}},{"@eq":{"year":"2022"}},{"@eq":{"year":"2023"}},{"@eq":{"year":"2024"}}]} + + Query: What is the sentiment of Biotech CEO's of companies based in New York? + Attributes: industry,hq,date + Sample Values: {"industry":["biotechnology","healthcare","agriculture"],"HQ":["NY, US","CA,US","FL,US"],"date":["01/01,1999","01/01/2024"]} + Answer: {"@and": [ { "@eq": { "industry"": "biotechnology" } }, { "@eq": { "HQ": "NY,US" } }]} + + Query: What is the sentiment of Biotech CEOs outside of California? + Attributes: industry,hq,date + Sample Values: {"industry":["biotechnology","healthcare","agriculture"],"HQ":["NY, US","CA,US","FL,US"],"date":["01/01,1999","01/01/2024"]} + Answer: {"@and":[{ "@eq": { "industry": "biotechnology" } },{"@not":{"@eq":{"HQ":"CA,US"}}}]} + + Query: What is the sentiment of Biotech CEOs outside of California? + Attributes: industry,hq,date + Sample Values: {"industry":["biotechnology","healthcare","agriculture"],"HQ":["NY, US","CA,US","FL,US"],"date":["01/01,1999","01/01/2024"]} + Answer: {"@and":[{ "@eq": { "industry": "biotechnology" } },{"@not":{"@eq":{"HQ":"CA,US"}}}]} + + Query: What is sentiment towards ag and biotech companies based outside of the US? + Attributes: industry,hq,date + Sample Values: {"industry"":["biotechnology","healthcare","agriculture"],"COUNTRY":["United States","Ireland","Russia","Georgia","Spain"],"month":["01","02","03","06","11","12""],""year"":["2022","2023","2024"]} + Answer:{"@and": [{ "@or": [{"@eq":{ "industry": "biotechnology" } },{"@eq":{"industry":"agriculture"}}]},{ "@not": {"@eq": { "COUNTRY": "United States" } }}]} + + """ + + query = ... + attributes = ... + sample_values = ... + answer: JSONFilter = ... + +class SmartSearch(dspy.Module): + def __init__(self) -> None: ... + def forward(self, query, attributes, sample_values): ... + +def get_min_length(model: Type[BaseModel]): # -> int: + ... diff --git a/typings/dspy/retrieve/vectara_rm.pyi b/typings/dspy/retrieve/vectara_rm.pyi new file mode 100644 index 0000000..a99ce4c --- /dev/null +++ b/typings/dspy/retrieve/vectara_rm.pyi @@ -0,0 +1,57 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from typing import List, Optional, Union + +START_SNIPPET = ... +END_SNIPPET = ... + +def remove_snippet(s: str) -> str: ... + +class VectaraRM(dspy.Retrieve): + """ + A retrieval module that uses Vectara to return the top passages for a given query. + + Assumes that a Vectara corpora have been created and populated with the following payload: + - document: The text of the passage + + Args: + vectara_customer_id (str): Vectara Customer ID. defaults to VECTARA_CUSTOMER_ID environment variable + vectara_corpus_id (str): Vectara Corpus ID. defaults to VECTARA_CORPUS_ID environment variable + vectara_api_key (str): Vectara API Key. defaults to VECTARA_API_KEY environment variable + k (int, optional): The default number of top passages to retrieve. Defaults to 3. + + Examples: + Below is a code snippet that shows how to use Vectara as the default retriver: + ```python + from vectara_client import vectaraClient + + llm = dspy.OpenAI(model="gpt-3.5-turbo") + retriever_model = vectaraRM("", "", "") + dspy.settings.configure(lm=llm, rm=retriever_model) + ``` + + Below is a code snippet that shows how to use Vectara in the forward() function of a module + ```python + self.retrieve = vectaraRM("", "", "", k=num_passages) + ``` + """ + def __init__( + self, + vectara_customer_id: Optional[str] = ..., + vectara_corpus_id: Optional[str] = ..., + vectara_api_key: Optional[str] = ..., + k: int = ..., + ) -> None: ... + def forward(self, query_or_queries: Union[str, List[str]], k: Optional[int]) -> dspy.Prediction: + """Search with Vectara for self.k top passages for query + + Args: + query_or_queries (Union[str, List[str]]): The query or queries to search for. + k (Optional[int]): The number of top passages to retrieve. Defaults to self.k. + Returns: + dspy.Prediction: An object containing the retrieved passages. + """ + ... diff --git a/typings/dspy/retrieve/watson_discovery_rm.pyi b/typings/dspy/retrieve/watson_discovery_rm.pyi new file mode 100644 index 0000000..b07a1a5 --- /dev/null +++ b/typings/dspy/retrieve/watson_discovery_rm.pyi @@ -0,0 +1,43 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from typing import Optional, Union + +class WatsonDiscoveryRM(dspy.Retrieve): + """A retrieval module that uses Watson Discovery to return the top passages for a given query. + + Args: + apikey (str): apikey for authentication purposes, + url (str): endpoint URL that includes the service instance ID + version (str): Release date of the version of the API you want to use. Specify dates in YYYY-MM-DD format. + project_id (str): The Universally Unique Identifier (UUID) of the project. + collection_ids (list): An array containing the collections on which the search will be executed. + k (int, optional): The number of top passages to retrieve. Defaults to 5. + + Returns: + dspy.Prediction: An object containing the retrieved passages. + """ + def __init__( + self, + apikey: str, + url: str, + version: str, + project_id: str, + collection_ids: list = ..., + k: int = ..., + ) -> None: ... + def forward( + self, query_or_queries: Union[str, list[str]], k: Optional[int] = ... + ) -> dspy.Prediction: + """Search with Watson Discovery for self.k top passages for query. + + Args: + query_or_queries (Union[str, List[str]]): The query or queries to search for. + k (int, optional): The number of top passages to retrieve. + + Returns: + dspy.Prediction: An object containing the retrieved passages. + """ + ... diff --git a/typings/dspy/retrieve/weaviate_rm.pyi b/typings/dspy/retrieve/weaviate_rm.pyi new file mode 100644 index 0000000..0877571 --- /dev/null +++ b/typings/dspy/retrieve/weaviate_rm.pyi @@ -0,0 +1,69 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +import weaviate +from typing import List, Optional, Union +from dspy.primitives.prediction import Prediction + +class WeaviateRM(dspy.Retrieve): + """A retrieval module that uses Weaviate to return the top passages for a given query. + + Assumes that a Weaviate collection has been created and populated with the following payload: + - content: The text of the passage + + Args: + weaviate_collection_name (str): The name of the Weaviate collection. + weaviate_client (WeaviateClient): An instance of the Weaviate client. + k (int, optional): The default number of top passages to retrieve. Default to 3. + tenant_id (str, optional): The tenant to retrieve objects from. + + Examples: + Below is a code snippet that shows how to use Weaviate as the default retriever: + ```python + import weaviate + + llm = dspy.Cohere(model="command-r-plus", api_key=api_key) + weaviate_client = weaviate.connect_to_[local, wcs, custom, embedded]("your-path-here") + retriever_model = WeaviateRM("my_collection_name", weaviate_client=weaviate_client) + dspy.settings.configure(lm=llm, rm=retriever_model) + + retrieve = dspy.Retrieve(k=1) + topK_passages = retrieve("what are the stages in planning, sanctioning and execution of public works").passages + ``` + + Below is a code snippet that shows how to use Weaviate in the forward() function of a module + ```python + self.retrieve = WeaviateRM("my_collection_name", weaviate_client=weaviate_client, k=num_passages) + ``` + """ + def __init__( + self, + weaviate_collection_name: str, + weaviate_client: Union[weaviate.WeaviateClient, weaviate.Client], + weaviate_collection_text_key: Optional[str] = ..., + k: int = ..., + tenant_id: Optional[str] = ..., + ) -> None: ... + def forward( + self, query_or_queries: Union[str, List[str]], k: Optional[int] = ..., **kwargs + ) -> Prediction: + """Search with Weaviate for self.k top passages for query or queries. + + Args: + query_or_queries (Union[str, List[str]]): The query or queries to search for. + k (Optional[int]): The number of top passages to retrieve. Defaults to self.k. + kwargs : + + Returns: + dspy.Prediction: An object containing the retrieved passages. + """ + ... + + def get_objects(self, num_samples: int, fields: List[str]) -> List[dict]: + """Get objects from Weaviate using the cursor API.""" + ... + + def insert(self, new_object_properties: dict): # -> None: + ... diff --git a/typings/dspy/retrieve/you_rm.pyi b/typings/dspy/retrieve/you_rm.pyi new file mode 100644 index 0000000..5b338bc --- /dev/null +++ b/typings/dspy/retrieve/you_rm.pyi @@ -0,0 +1,40 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from typing import Literal, Optional, Union + +class YouRM(dspy.Retrieve): + """Retriever for You.com's Search and News API. + + [API reference](https://documentation.you.com/api-reference/) + + Args: + ydc_api_key: you.com API key, if `YDC_API_KEY` is not set in the environment + k: If ``endpoint="search"``, the max snippets to return per search hit. + If ``endpoint="news"``, the max articles to return. + endpoint: you.com endpoints + num_web_results: The max number of web results to return, must be under 20 + safesearch: Safesearch settings, one of "off", "moderate", "strict", defaults to moderate + country: Country code, ex: 'US' for United States, see API reference for more info + search_lang: (News API) Language codes, ex: 'en' for English, see API reference for more info + ui_lang: (News API) User interface language for the response, ex: 'en' for English. + See API reference for more info + spellcheck: (News API) Whether to spell check query or not, defaults to True + """ + def __init__( + self, + ydc_api_key: Optional[str] = ..., + k: int = ..., + endpoint: Literal["search", "news"] = ..., + num_web_results: Optional[int] = ..., + safesearch: Optional[Literal["off", "moderate", "strict"]] = ..., + country: Optional[str] = ..., + search_lang: Optional[str] = ..., + ui_lang: Optional[str] = ..., + spellcheck: Optional[bool] = ..., + ) -> None: ... + def forward( + self, query_or_queries: Union[str, list[str]], k: Optional[int] = ... + ) -> dspy.Prediction: ... diff --git a/typings/dspy/retrievers/__init__.pyi b/typings/dspy/retrievers/__init__.pyi new file mode 100644 index 0000000..86f85fb --- /dev/null +++ b/typings/dspy/retrievers/__init__.pyi @@ -0,0 +1,7 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.retrievers.embeddings import Embeddings + +__all__ = ["Embeddings"] diff --git a/typings/dspy/retrievers/embeddings.pyi b/typings/dspy/retrievers/embeddings.pyi new file mode 100644 index 0000000..838f0e2 --- /dev/null +++ b/typings/dspy/retrievers/embeddings.pyi @@ -0,0 +1,21 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, List, Optional + +class Embeddings: + def __init__( + self, + corpus: List[str], + embedder, + k: int = ..., + callbacks: Optional[List[Any]] = ..., + cache: bool = ..., + brute_force_threshold: int = ..., + normalize: bool = ..., + ) -> None: ... + def __call__(self, query: str): # -> Prediction: + ... + def forward(self, query: str): # -> Prediction: + ... diff --git a/typings/dspy/signatures/__init__.pyi b/typings/dspy/signatures/__init__.pyi new file mode 100644 index 0000000..db3fe87 --- /dev/null +++ b/typings/dspy/signatures/__init__.pyi @@ -0,0 +1,25 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.signatures.field import InputField, OldField, OldInputField, OldOutputField, OutputField +from dspy.signatures.signature import ( + Signature, + SignatureMeta, + ensure_signature, + infer_prefix, + make_signature, +) + +__all__ = [ + "InputField", + "OutputField", + "OldField", + "OldInputField", + "OldOutputField", + "SignatureMeta", + "Signature", + "infer_prefix", + "ensure_signature", + "make_signature", +] diff --git a/typings/dspy/signatures/field.pyi b/typings/dspy/signatures/field.pyi new file mode 100644 index 0000000..a33889e --- /dev/null +++ b/typings/dspy/signatures/field.pyi @@ -0,0 +1,32 @@ +""" +This type stub file was generated by pyright. +""" + +DSPY_FIELD_ARG_NAMES = ... +PYDANTIC_CONSTRAINT_MAP = ... + +def move_kwargs(**kwargs): # -> dict[Any, Any]: + ... +def InputField(**kwargs): # -> Any: + ... +def OutputField(**kwargs): # -> Any: + ... +def new_to_old_field(field): # -> OldInputField | OldOutputField: + ... + +class OldField: + """A more ergonomic datatype that infers prefix and desc if omitted.""" + def __init__(self, *, prefix=..., desc=..., input, format=...) -> None: ... + def finalize(self, key, inferred_prefix): # -> None: + """Set the prefix if it's not provided explicitly.""" + ... + + def __repr__(self): # -> str: + ... + def __eq__(self, __value: object) -> bool: ... + +class OldInputField(OldField): + def __init__(self, *, prefix=..., desc=..., format=...) -> None: ... + +class OldOutputField(OldField): + def __init__(self, *, prefix=..., desc=..., format=...) -> None: ... diff --git a/typings/dspy/signatures/signature.pyi b/typings/dspy/signatures/signature.pyi new file mode 100644 index 0000000..905c988 --- /dev/null +++ b/typings/dspy/signatures/signature.pyi @@ -0,0 +1,153 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Dict, Optional, Tuple, Type, Union +from pydantic import BaseModel +from pydantic.fields import FieldInfo + +"""Signature class for DSPy. + +You typically subclass the Signature class, like this: + class MySignature(dspy.Signature): + input: str = InputField(desc="...") + output: int = OutputField(desc="...") + +You can call Signature("input1, input2 -> output1, output2") to create a new signature type. +You can also include instructions, Signature("input -> output", "This is a test"). +But it's generally better to use the make_signature function. + +If you are not sure if your input is a string representation, (like "input1, input2 -> output1, output2"), +or a signature, you can use the ensure_signature function. + +For compatibility with the legacy dsp format, you can use the signature_to_template function. +""" + +class SignatureMeta(type(BaseModel)): + def __call__(cls, *args, **kwargs): # -> type[Signature] | Any: + ... + def __new__(mcs, signature_name, bases, namespace, **kwargs): # -> type: + ... + @property + def instructions(cls) -> str: ... + @instructions.setter + def instructions(cls, instructions: str) -> None: ... + @property + def input_fields(cls) -> dict[str, FieldInfo]: ... + @property + def output_fields(cls) -> dict[str, FieldInfo]: ... + @property + def fields(cls) -> dict[str, FieldInfo]: ... + @property + def signature(cls) -> str: + """The string representation of the signature.""" + ... + + def __repr__(cls): # -> str: + """Output a representation of the signature. + + Uses the form: + Signature(question, context -> answer + question: str = InputField(desc="..."), + context: List[str] = InputField(desc="..."), + answer: int = OutputField(desc="..."), + ). + """ + ... + +class Signature(BaseModel, metaclass=SignatureMeta): + "" + @classmethod + def with_instructions(cls, instructions: str) -> Type[Signature]: ... + @classmethod + def with_updated_fields(cls, name, type_=..., **kwargs) -> Type[Signature]: + """Create a new Signature class with the updated field information. + + Returns a new Signature class with the field, name, updated + with fields[name].json_schema_extra[key] = value. + + Args: + name: The name of the field to update. + type_: The new type of the field. + **kwargs: The new values for the field. + + Returns: + A new Signature class (not an instance) with the updated field information. + """ + ... + + @classmethod + def prepend(cls, name, field, type_=...) -> Type[Signature]: ... + @classmethod + def append(cls, name, field, type_=...) -> Type[Signature]: ... + @classmethod + def delete(cls, name) -> Type[Signature]: ... + @classmethod + def insert( + cls, index: int, name: str, field, type_: Optional[Type] = ... + ) -> Type[Signature]: ... + @classmethod + def equals(cls, other) -> bool: + """Compare the JSON schema of two Signature classes.""" + ... + + @classmethod + def dump_state(cls): # -> dict[str, str | list[Any]]: + ... + @classmethod + def load_state(cls, state): # -> Signature: + ... + +def ensure_signature(signature: Union[str, Type[Signature]], instructions=...) -> Signature: ... +def make_signature( + signature: Union[str, Dict[str, Tuple[type, FieldInfo]]], + instructions: Optional[str] = ..., + signature_name: str = ..., + custom_types: Optional[Dict[str, Type]] = ..., +) -> Type[Signature]: + """Create a new Signature subclass with the specified fields and instructions. + + Args: + signature: Either a string in the format "input1, input2 -> output1, output2" + or a dictionary mapping field names to tuples of (type, FieldInfo). + instructions: Optional string containing instructions/prompt for the signature. + If not provided, defaults to a basic description of inputs and outputs. + signature_name: Optional string to name the generated Signature subclass. + Defaults to "StringSignature". + custom_types: Optional dictionary mapping type names to their actual type objects. + Useful for resolving custom types that aren't built-ins or in the typing module. + + Returns: + A new signature class with the specified fields and instructions. + + Examples: + + ``` + # Using string format + sig1 = make_signature("question, context -> answer") + + # Using dictionary format + sig2 = make_signature({ + "question": (str, InputField()), + "answer": (str, OutputField()) + }) + + # Using custom types + class MyType: + pass + + sig3 = make_signature("input: MyType -> output", custom_types={"MyType": MyType}) + ``` + """ + ... + +def infer_prefix(attribute_name: str) -> str: + """Infer a prefix from an attribute name by converting it to a human-readable format. + + Examples: + "camelCaseText" -> "Camel Case Text" + "snake_case_text" -> "Snake Case Text" + "text2number" -> "Text 2 Number" + "HTMLParser" -> "HTML Parser" + """ + ... diff --git a/typings/dspy/signatures/utils.pyi b/typings/dspy/signatures/utils.pyi new file mode 100644 index 0000000..2cff8ed --- /dev/null +++ b/typings/dspy/signatures/utils.pyi @@ -0,0 +1,8 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Literal +from pydantic.fields import FieldInfo + +def get_dspy_field_type(field: FieldInfo) -> Literal["input", "output"]: ... diff --git a/typings/dspy/streaming/__init__.pyi b/typings/dspy/streaming/__init__.pyi new file mode 100644 index 0000000..b5ac15c --- /dev/null +++ b/typings/dspy/streaming/__init__.pyi @@ -0,0 +1,17 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.streaming.messages import StatusMessage, StatusMessageProvider, StreamResponse +from dspy.streaming.streamify import apply_sync_streaming, streamify, streaming_response +from dspy.streaming.streaming_listener import StreamListener + +__all__ = [ + "StatusMessage", + "StatusMessageProvider", + "streamify", + "StreamListener", + "StreamResponse", + "streaming_response", + "apply_sync_streaming", +] diff --git a/typings/dspy/streaming/messages.pyi b/typings/dspy/streaming/messages.pyi new file mode 100644 index 0000000..a1ed0f4 --- /dev/null +++ b/typings/dspy/streaming/messages.pyi @@ -0,0 +1,89 @@ +""" +This type stub file was generated by pyright. +""" + +from dataclasses import dataclass +from typing import Any, Dict, Optional +from dspy.utils.callback import BaseCallback + +@dataclass +class StreamResponse: + predict_name: str + signature_field_name: str + chunk: str + ... + +@dataclass +class StatusMessage: + """Dataclass that wraps a status message for status streaming.""" + + message: str + ... + +def sync_send_to_stream(stream, message): + """Send message to stream in a sync context, regardless of whether the caller is async or not.""" + ... + +class StatusMessageProvider: + """Provides customizable status message streaming for DSPy programs. + + This class serves as a base for creating custom status message providers. Users can subclass + and override its methods to define specific status messages for different stages of program execution, + each method must return a string. + + Example: + ```python + class MyStatusMessageProvider(StatusMessageProvider): + def lm_start_status_message(self, instance, inputs): + return f"Calling LM with inputs {inputs}..." + + def module_end_status_message(self, outputs): + return f"Module finished with output: {outputs}!" + + program = dspy.streamify(dspy.Predict("q->a"), status_message_provider=MyStatusMessageProvider()) + ``` + """ + def tool_start_status_message(self, instance: Any, inputs: Dict[str, Any]): # -> str: + """Status message before a `dspy.Tool` is called.""" + ... + + def tool_end_status_message(self, outputs: Any): # -> LiteralString: + """Status message after a `dspy.Tool` is called.""" + ... + + def module_start_status_message(self, instance: Any, inputs: Dict[str, Any]): # -> None: + """Status message before a `dspy.Module` or `dspy.Predict` is called.""" + ... + + def module_end_status_message(self, outputs: Any): # -> None: + """Status message after a `dspy.Module` or `dspy.Predict` is called.""" + ... + + def lm_start_status_message(self, instance: Any, inputs: Dict[str, Any]): # -> None: + """Status message before a `dspy.LM` is called.""" + ... + + def lm_end_status_message(self, outputs: Any): # -> None: + """Status message after a `dspy.LM` is called.""" + ... + +class StatusStreamingCallback(BaseCallback): + def __init__(self, status_message_provider: Optional[StatusMessageProvider] = ...) -> None: ... + def on_tool_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: + ... + def on_tool_end( + self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ... + ): # -> None: + ... + def on_lm_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: + ... + def on_lm_end( + self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ... + ): # -> None: + ... + def on_module_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: + ... + def on_module_end( + self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ... + ): # -> None: + ... diff --git a/typings/dspy/streaming/streamify.pyi b/typings/dspy/streaming/streamify.pyi new file mode 100644 index 0000000..7387226 --- /dev/null +++ b/typings/dspy/streaming/streamify.pyi @@ -0,0 +1,164 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import ( + Any, + AsyncGenerator, + Awaitable, + Callable, + Generator, + List, + Optional, + TYPE_CHECKING, +) +from dspy.streaming.messages import StatusMessageProvider +from dspy.streaming.streaming_listener import StreamListener +from dspy.primitives.program import Module + +logger = ... +if TYPE_CHECKING: ... + +def streamify( + program: Module, + status_message_provider: Optional[StatusMessageProvider] = ..., + stream_listeners: Optional[List[StreamListener]] = ..., + include_final_prediction_in_output_stream: bool = ..., + is_async_program: bool = ..., + async_streaming: bool = ..., +) -> Callable[[Any, Any], Awaitable[Any]]: + """ + Wrap a DSPy program so that it streams its outputs incrementally, rather than returning them + all at once. It also provides status messages to the user to indicate the progress of the program, and users + can implement their own status message provider to customize the status messages and what module to generate + status messages for. + + Args: + program: The DSPy program to wrap with streaming functionality. + status_message_provider: A custom status message generator to use instead of the default one. Users can + implement their own status message generator to customize the status messages and what module to generate + status messages for. + stream_listeners: A list of stream listeners to capture the streaming output of specific fields of sub predicts + in the program. When provided, only the target fields in the target predict will be streamed to the user. + include_final_prediction_in_output_stream: Whether to include the final prediction in the output stream, only + useful when `stream_listeners` is provided. If `False`, the final prediction will not be included in the + output stream. When the program hit cache, or no listeners captured anything, the final prediction will + still be included in the output stream even if this is `False`. + is_async_program: Whether the program is async. If `False`, the program will be wrapped with `asyncify`, + otherwise the program will be called with `acall`. + async_streaming: Whether to return an async generator or a sync generator. If `False`, the streaming will be + converted to a sync generator. + + Returns: + A function that takes the same arguments as the original program, but returns an async + generator that yields the program's outputs incrementally. + + Example: + + ```python + import asyncio + import dspy + + dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini")) + # Create the program and wrap it with streaming functionality + program = dspy.streamify(dspy.Predict("q->a")) + + # Use the program with streaming output + async def use_streaming(): + output = program(q="Why did a chicken cross the kitchen?") + return_value = None + async for value in output: + if isinstance(value, dspy.Prediction): + return_value = value + else: + print(value) + return return_value + + output = asyncio.run(use_streaming()) + print(output) + ``` + + Example with custom status message provider: + ```python + import asyncio + import dspy + + dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini")) + + class MyStatusMessageProvider(StatusMessageProvider): + def module_start_status_message(self, instance, inputs): + return f"Predicting..." + + def tool_end_status_message(self, outputs): + return f"Tool calling finished with output: {outputs}!" + + # Create the program and wrap it with streaming functionality + program = dspy.streamify(dspy.Predict("q->a"), status_message_provider=MyStatusMessageProvider()) + + # Use the program with streaming output + async def use_streaming(): + output = program(q="Why did a chicken cross the kitchen?") + return_value = None + async for value in output: + if isinstance(value, dspy.Prediction): + return_value = value + else: + print(value) + return return_value + + output = asyncio.run(use_streaming()) + print(output) + ``` + + Example with stream listeners: + + ```python + import asyncio + import dspy + + dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini", cache=False)) + + # Create the program and wrap it with streaming functionality + predict = dspy.Predict("question->answer, reasoning") + stream_listeners = [ + dspy.streaming.StreamListener(signature_field_name="answer"), + dspy.streaming.StreamListener(signature_field_name="reasoning"), + ] + stream_predict = dspy.streamify(predict, stream_listeners=stream_listeners) + + async def use_streaming(): + output = stream_predict( + question="why did a chicken cross the kitchen?", + include_final_prediction_in_output_stream=False, + ) + return_value = None + async for value in output: + if isinstance(value, dspy.Prediction): + return_value = value + else: + print(value) + return return_value + + output = asyncio.run(use_streaming()) + print(output) + ``` + + You should see the streaming chunks (in the format of `dspy.streaming.StreamResponse`) in the console output. + """ + ... + +def apply_sync_streaming(async_generator: AsyncGenerator) -> Generator: + """Convert the async streaming generator to a sync generator.""" + ... + +async def streaming_response(streamer: AsyncGenerator) -> AsyncGenerator: + """ + Convert a DSPy program output stream to an OpenAI-compatible output stream that can be + used by a service as an API response to a streaming request. + + Args: + streamer: An async generator that yields values from a DSPy program output stream. + Returns: + An async generator that yields OpenAI-compatible streaming response chunks. + """ + ... diff --git a/typings/dspy/streaming/streaming_listener.pyi b/typings/dspy/streaming/streaming_listener.pyi new file mode 100644 index 0000000..f26e2f7 --- /dev/null +++ b/typings/dspy/streaming/streaming_listener.pyi @@ -0,0 +1,46 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, List, Optional, TYPE_CHECKING +from litellm import ModelResponseStream +from dspy.primitives.program import Module + +if TYPE_CHECKING: ... + +class StreamListener: + """Class that listens to the stream to capture the streeaming of a specific output field of a predictor.""" + def __init__( + self, signature_field_name: str, predict: Any = ..., predict_name: Optional[str] = ... + ) -> None: + """ + Args: + signature_field_name: The name of the field to listen to. + predict: The predictor to listen to. If None, when calling `streamify()` it will automatically look for + the predictor that has the `signature_field_name` in its signature. + predict_name: The name of the predictor to listen to. If None, when calling `streamify()` it will + automatically look for the predictor that has the `signature_field_name` in its signature. + """ + ... + + def receive(self, chunk: ModelResponseStream): # -> StreamResponse | None: + ... + def flush(self) -> str: + """Flush all tokens in the field end queue. + + This method is called to flush out the last a few tokens when the stream is ended. These tokens + are in the buffer because we don't directly yield the tokens received by the stream listener + with the purpose to not yield the end_identifier tokens, e.g., "[[ ## ... ## ]]" for ChatAdapter. + """ + ... + +def find_predictor_for_stream_listeners( + program: Module, stream_listeners: List[StreamListener] +): # -> defaultdict[Any, list[Any]]: + """Find the predictor for each stream listener. + + This is a utility function to automatically find the predictor for each stream listener. It is used when some + listeners don't specify the predictor they want to listen to. If a listener's `signature_field_name` is not + unique in the program, this function will raise an error. + """ + ... diff --git a/typings/dspy/teleprompt/__init__.pyi b/typings/dspy/teleprompt/__init__.pyi new file mode 100644 index 0000000..1005c41 --- /dev/null +++ b/typings/dspy/teleprompt/__init__.pyi @@ -0,0 +1,34 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.teleprompt.avatar_optimizer import AvatarOptimizer +from dspy.teleprompt.bettertogether import BetterTogether +from dspy.teleprompt.bootstrap import BootstrapFewShot +from dspy.teleprompt.bootstrap_finetune import BootstrapFinetune +from dspy.teleprompt.copro_optimizer import COPRO +from dspy.teleprompt.ensemble import Ensemble +from dspy.teleprompt.infer_rules import InferRules +from dspy.teleprompt.knn_fewshot import KNNFewShot +from dspy.teleprompt.mipro_optimizer_v2 import MIPROv2 +from dspy.teleprompt.random_search import BootstrapFewShotWithRandomSearch +from dspy.teleprompt.simba import SIMBA +from dspy.teleprompt.teleprompt import Teleprompter +from dspy.teleprompt.teleprompt_optuna import BootstrapFewShotWithOptuna +from dspy.teleprompt.vanilla import LabeledFewShot + +__all__ = [ + "AvatarOptimizer", + "BetterTogether", + "BootstrapFewShot", + "BootstrapFinetune", + "COPRO", + "Ensemble", + "KNNFewShot", + "MIPROv2", + "BootstrapFewShotWithRandomSearch", + "BootstrapFewShotWithOptuna", + "LabeledFewShot", + "InferRules", + "SIMBA", +] diff --git a/typings/dspy/teleprompt/avatar_optimizer.pyi b/typings/dspy/teleprompt/avatar_optimizer.pyi new file mode 100644 index 0000000..cc57b36 --- /dev/null +++ b/typings/dspy/teleprompt/avatar_optimizer.pyi @@ -0,0 +1,64 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from typing import Callable, List, Optional +from pydantic import BaseModel +from dspy.predict.avatar import ActionOutput +from dspy.teleprompt.teleprompt import Teleprompter + +DEFAULT_MAX_EXAMPLES = ... + +class EvalResult(BaseModel): + example: dict + score: float + actions: Optional[List[ActionOutput]] = ... + +class Comparator(dspy.Signature): + """After executing the given actions on user inputs using the given instruction, some inputs have yielded good, results, while others have not. I'll provide you the inputs along with their, corresponding evaluation metrics: + + Task: + (1) Firstly, identify and contrast the patterns of inputs that have achieved good results with those that have not. + (2) Then, review the computational logic for any inconsistencies in the previous actions. + (3) Lastly, specify the modification in tools used that can lead to improved performance on the negative inputs.""" + + instruction: str = ... + actions: List[str] = ... + pos_input_with_metrics: List[EvalResult] = ... + neg_input_with_metrics: List[EvalResult] = ... + feedback: str = ... + +class FeedbackBasedInstruction(dspy.Signature): + """There is a task that needs to be completed for which one can use multiple tools to achieve the desired outcome. A group's performance was evaluated on a dataset of inputs, the inputs that did well are positive inputs, and the inputs that did not do well are negative inputs. + + You received feedback on how they can better use the tools to improve your performance on the negative inputs. You have been provided with the previous instruction, that they followed to use tools to complete the task, and the feedback on your performance. + + Your task is to incorporate the feedback and generate a detailed instruction for the group to follow to improve their performance on the task. + + Make sure that the new instruction talks about how to use the tools effectively and should be no more than 3 paragraphs long. The previous instruction contains general guidelines that you must retain in the new instruction.""" + + previous_instruction: str = ... + feedback: str = ... + new_instruction: str = ... + +class AvatarOptimizer(Teleprompter): + def __init__( + self, + metric: Callable, + max_iters: int = ..., + lower_bound: int = ..., + upper_bound: int = ..., + max_positive_inputs: Optional[int] = ..., + max_negative_inputs: Optional[int] = ..., + optimize_for: str = ..., + ) -> None: ... + def process_example( + self, actor, example, return_outputs + ): # -> tuple[Any, Any, Any] | tuple[Any, None, Literal[0]] | Literal[0]: + ... + def thread_safe_evaluator( + self, devset, actor, return_outputs=..., num_threads=... + ): # -> tuple[Any | float, list[Any]] | float: + ... + def compile(self, student, *, trainset): ... diff --git a/typings/dspy/teleprompt/bettertogether.pyi b/typings/dspy/teleprompt/bettertogether.pyi new file mode 100644 index 0000000..c7a964b --- /dev/null +++ b/typings/dspy/teleprompt/bettertogether.pyi @@ -0,0 +1,23 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Callable, List, Optional +from dspy.primitives.example import Example +from dspy.primitives.program import Program +from dspy.teleprompt.teleprompt import Teleprompter + +logger = ... + +class BetterTogether(Teleprompter): + STRAT_SEP = ... + def __init__( + self, + metric: Callable, + prompt_optimizer: Optional[Teleprompter] = ..., + weight_optimizer: Optional[Teleprompter] = ..., + seed: Optional[int] = ..., + ) -> None: ... + def compile( + self, student: Program, trainset: List[Example], strategy: str = ..., valset_ratio=... + ) -> Program: ... diff --git a/typings/dspy/teleprompt/bootstrap.pyi b/typings/dspy/teleprompt/bootstrap.pyi new file mode 100644 index 0000000..070ee64 --- /dev/null +++ b/typings/dspy/teleprompt/bootstrap.pyi @@ -0,0 +1,42 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Dict, Optional +from dspy.teleprompt.teleprompt import Teleprompter + +logger = ... + +class BootstrapFewShot(Teleprompter): + def __init__( + self, + metric=..., + metric_threshold=..., + teacher_settings: Optional[Dict] = ..., + max_bootstrapped_demos=..., + max_labeled_demos=..., + max_rounds=..., + max_errors=..., + ) -> None: + """A Teleprompter class that composes a set of demos/examples to go into a predictor's prompt. + These demos come from a combination of labeled examples in the training set, and bootstrapped demos. + + Args: + metric (Callable): A function that compares an expected value and predicted value, + outputting the result of that comparison. + metric_threshold (float, optional): If the metric yields a numerical value, then check it + against this threshold when deciding whether or not to accept a bootstrap example. + Defaults to None. + teacher_settings (dict, optional): Settings for the `teacher` model. + Defaults to None. + max_bootstrapped_demos (int): Maximum number of bootstrapped demonstrations to include. + Defaults to 4. + max_labeled_demos (int): Maximum number of labeled demonstrations to include. + Defaults to 16. + max_rounds (int): Number of iterations to attempt generating the required bootstrap + examples. If unsuccessful after `max_rounds`, the program ends. Defaults to 1. + max_errors (int): Maximum number of errors until program ends. Defaults to 5. + """ + ... + + def compile(self, student, *, teacher=..., trainset): ... diff --git a/typings/dspy/teleprompt/bootstrap_finetune.pyi b/typings/dspy/teleprompt/bootstrap_finetune.pyi new file mode 100644 index 0000000..34f3c21 --- /dev/null +++ b/typings/dspy/teleprompt/bootstrap_finetune.pyi @@ -0,0 +1,75 @@ +""" +This type stub file was generated by pyright. +""" + +from dataclasses import dataclass +from typing import Any, Callable, Dict, List, Optional, Union +from dspy.adapters.base import Adapter +from dspy.clients.lm import LM +from dspy.primitives.example import Example +from dspy.primitives.program import Program +from dspy.teleprompt.teleprompt import Teleprompter + +logger = ... + +class FinetuneTeleprompter(Teleprompter): + def __init__( + self, train_kwargs: Optional[Union[Dict[str, Any], Dict[LM, Dict[str, Any]]]] = ... + ) -> None: ... + @staticmethod + def convert_to_lm_dict(arg) -> Dict[LM, Any]: ... + +class BootstrapFinetune(FinetuneTeleprompter): + def __init__( + self, + metric: Optional[Callable] = ..., + multitask: bool = ..., + train_kwargs: Optional[Union[Dict[str, Any], Dict[LM, Dict[str, Any]]]] = ..., + adapter: Optional[Union[Adapter, Dict[LM, Adapter]]] = ..., + exclude_demos: bool = ..., + num_threads: Optional[int] = ..., + ) -> None: ... + def compile( + self, + student: Program, + trainset: List[Example], + teacher: Optional[Union[Program, List[Program]]] = ..., + ) -> Program: ... + @staticmethod + def finetune_lms(finetune_dict) -> Dict[Any, LM]: ... + +def build_call_data_from_trace( + trace: List[Dict], pred_ind: int, adapter: Adapter, exclude_demos: bool = ... +) -> Dict[str, List[Dict[str, Any]]]: ... +@dataclass +class FailedPrediction: + completion_text: str + format_reward: Union[float, None] = ... + +def bootstrap_trace_data( + program: Program, + dataset: List[Example], + metric: Optional[Callable] = ..., + num_threads: Optional[int] = ..., + raise_on_error=..., + capture_failed_parses=..., + failure_score: float = ..., + format_failure_score: float = ..., + log_format_failures: bool = ..., +) -> List[Dict[str, Any]]: ... +def all_predictors_have_lms(program: Program) -> bool: + """Return True if all predictors in the program have an LM set.""" + ... + +def copy_program_with_lms(program: Program) -> Program: ... +def prepare_student(student: Program) -> Program: ... +def prepare_teacher(student: Program, teacher: Optional[Program] = ...) -> Program: ... +def assert_structural_equivalency(program1: object, program2: object): # -> None: + ... +def assert_no_shared_predictor(program1: Program, program2: Program): # -> None: + ... +def get_unique_lms(program: Program) -> List[LM]: ... +def launch_lms(program: Program): # -> None: + ... +def kill_lms(program: Program): # -> None: + ... diff --git a/typings/dspy/teleprompt/copro_optimizer.pyi b/typings/dspy/teleprompt/copro_optimizer.pyi new file mode 100644 index 0000000..6e7404f --- /dev/null +++ b/typings/dspy/teleprompt/copro_optimizer.pyi @@ -0,0 +1,50 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from dspy.signatures import Signature +from dspy.teleprompt.teleprompt import Teleprompter + +logger = ... + +class BasicGenerateInstruction(Signature): + """You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative.""" + + basic_instruction = ... + proposed_instruction = ... + proposed_prefix_for_output_field = ... + +class GenerateInstructionGivenAttempts(dspy.Signature): + """You are an instruction optimizer for large language models. I will give some task instructions I've tried, along with their corresponding validation scores. The instructions are arranged in increasing order based on their scores, where higher scores indicate better quality. + + Your task is to propose a new instruction that will lead a good language model to perform the task even better. Don't be afraid to be creative.""" + + attempted_instructions = ... + proposed_instruction = ... + proposed_prefix_for_output_field = ... + +class COPRO(Teleprompter): + def __init__( + self, + prompt_model=..., + metric=..., + breadth=..., + depth=..., + init_temperature=..., + track_stats=..., + **_kwargs, + ) -> None: ... + def compile(self, student, *, trainset, eval_kwargs): + """ + optimizes `signature` of `student` program - note that it may be zero-shot or already pre-optimized (demos already chosen - `demos != []`) + + parameters: + student: program to optimize and left modified. + trainset: iterable of `Example`s + eval_kwargs: optional, dict + Additional keywords to go into `Evaluate` for the metric. + + Returns optimized version of `student`. + """ + ... diff --git a/typings/dspy/teleprompt/ensemble.pyi b/typings/dspy/teleprompt/ensemble.pyi new file mode 100644 index 0000000..4ebde0d --- /dev/null +++ b/typings/dspy/teleprompt/ensemble.pyi @@ -0,0 +1,13 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.teleprompt.teleprompt import Teleprompter + +class Ensemble(Teleprompter): + def __init__(self, *, reduce_fn=..., size=..., deterministic=...) -> None: + """A common reduce_fn is dspy.majority.""" + ... + + def compile(self, programs): # -> EnsembledProgram: + class EnsembledProgram(dspy.Module): ... diff --git a/typings/dspy/teleprompt/grpo.pyi b/typings/dspy/teleprompt/grpo.pyi new file mode 100644 index 0000000..9496685 --- /dev/null +++ b/typings/dspy/teleprompt/grpo.pyi @@ -0,0 +1,72 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Callable, Dict, List, Literal, Optional, Union +from dspy.adapters.base import Adapter +from dspy.clients.lm import LM +from dspy.primitives.example import Example +from dspy.primitives.program import Program +from dspy.teleprompt.bootstrap_finetune import FinetuneTeleprompter + +logger = ... + +class GRPO(FinetuneTeleprompter): + def __init__( + self, + metric: Optional[Callable] = ..., + multitask: bool = ..., + train_kwargs: Optional[Union[Dict[str, Any], Dict[LM, Dict[str, Any]]]] = ..., + adapter: Optional[Union[Adapter, Dict[LM, Adapter]]] = ..., + exclude_demos: bool = ..., + num_threads: int = ..., + num_train_steps: int = ..., + seed: int = ..., + num_dspy_examples_per_grpo_step: int = ..., + num_rollouts_per_grpo_step: int = ..., + use_train_as_val: bool = ..., + num_steps_for_val: int = ..., + report_train_scores: bool = ..., + failure_score: float = ..., + format_failure_score: float = ..., + variably_invoked_predictor_grouping_mode: Union[ + Literal["truncate"], Literal["fill"], Literal["ragged"] + ] = ..., + variably_invoked_predictor_fill_strategy: Optional[ + Union[Literal["randint"], Literal["max"]] + ] = ..., + ) -> None: ... + def validate_trace_data_and_log_issues( + self, + trace_data: List[List[List[Dict[str, Any]]]], + subsample_training_dataset: List[Example], + num_teachers: int, + num_samples_per_input: int, + pred_signature_hash_to_ind: Dict[int, int], + ): # -> None: + ... + def report_validation_metrics( + self, student, trainset, valset, logger, step_idx=... + ): # -> None: + ... + def update_shuffled_trainset(self, original_trainset): # -> None: + ... + def select_training_sample_and_update_shuffled_trainset( + self, original_trainset: List[Example], train_step_idx: int + ) -> List[Example]: ... + def compile( + self, + student: Program, + trainset: List[Example], + teacher: Optional[Union[Program, List[Program]]] = ..., + valset: Optional[List[Example]] = ..., + **kwargs, + ) -> Program: ... + +def disable_lm_cache(program: Program, lm_cache_dict: dict): # -> None: + """Disable the LM cache for all predictors in the program.""" + ... + +def recover_lm_cache(program: Program, lm_cache_dict: dict): # -> None: + """Recover the LM caches for all predictors in the program to their original state.""" + ... diff --git a/typings/dspy/teleprompt/infer_rules.pyi b/typings/dspy/teleprompt/infer_rules.pyi new file mode 100644 index 0000000..b63d264 --- /dev/null +++ b/typings/dspy/teleprompt/infer_rules.pyi @@ -0,0 +1,30 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from dspy.teleprompt import BootstrapFewShot + +logger = ... + +class InferRules(BootstrapFewShot): + def __init__( + self, num_candidates=..., num_rules=..., num_threads=..., teacher_settings=..., **kwargs + ) -> None: ... + def compile(self, student, *, teacher=..., trainset, valset=...): # -> None: + ... + def induce_natural_language_rules(self, predictor, trainset): ... + def update_program_instructions(self, predictor, natural_language_rules): # -> None: + ... + def format_examples(self, demos, signature): # -> str: + ... + def get_predictor_demos(self, trainset, predictor): # -> list[dict[Any, Any]]: + ... + def evaluate_program(self, program, dataset): # -> float: + ... + +class RulesInductionProgram(dspy.Module): + def __init__(self, num_rules, teacher_settings=...) -> None: + class CustomRulesInduction(dspy.Signature): ... + + def forward(self, examples_text): ... diff --git a/typings/dspy/teleprompt/knn_fewshot.pyi b/typings/dspy/teleprompt/knn_fewshot.pyi new file mode 100644 index 0000000..4ed2d4a --- /dev/null +++ b/typings/dspy/teleprompt/knn_fewshot.pyi @@ -0,0 +1,54 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.clients import Embedder +from dspy.primitives import Example +from dspy.teleprompt.teleprompt import Teleprompter + +class KNNFewShot(Teleprompter): + def __init__( + self, k: int, trainset: list[Example], vectorizer: Embedder, **few_shot_bootstrap_args + ) -> None: + """ + KNNFewShot is an optimizer that uses an in-memory KNN retriever to find the k nearest neighbors + in a trainset at test time. For each input example in a forward call, it identifies the k most + similar examples from the trainset and attaches them as demonstrations to the student module. + + Args: + k: The number of nearest neighbors to attach to the student model. + trainset: The training set to use for few-shot prompting. + vectorizer: The `Embedder` to use for vectorization + **few_shot_bootstrap_args: Additional arguments for the `BootstrapFewShot` optimizer. + + Example: + ```python + import dspy + from sentence_transformers import SentenceTransformer + + # Define a QA module with chain of thought + qa = dspy.ChainOfThought("question -> answer") + + # Create a training dataset with examples + trainset = [ + dspy.Example(question="What is the capital of France?", answer="Paris").with_inputs("question"), + # ... more examples ... + ] + + # Initialize KNNFewShot with a sentence transformer model + knn_few_shot = KNNFewShot( + k=3, + trainset=trainset, + vectorizer=dspy.Embedder(SentenceTransformer("all-MiniLM-L6-v2").encode) + ) + + # Compile the QA module with few-shot learning + compiled_qa = knn_few_shot.compile(qa) + + # Use the compiled module + result = compiled_qa("What is the capital of Belgium?") + ``` + """ + ... + + def compile(self, student, *, teacher=...): ... diff --git a/typings/dspy/teleprompt/mipro_optimizer_v2.pyi b/typings/dspy/teleprompt/mipro_optimizer_v2.pyi new file mode 100644 index 0000000..afe9536 --- /dev/null +++ b/typings/dspy/teleprompt/mipro_optimizer_v2.pyi @@ -0,0 +1,61 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Callable, List, Literal, Optional, TYPE_CHECKING +from dspy.teleprompt.teleprompt import Teleprompter + +if TYPE_CHECKING: ... +logger = ... +BOOTSTRAPPED_FEWSHOT_EXAMPLES_IN_CONTEXT = ... +LABELED_FEWSHOT_EXAMPLES_IN_CONTEXT = ... +MIN_MINIBATCH_SIZE = ... +AUTO_RUN_SETTINGS = ... +YELLOW = ... +GREEN = ... +BLUE = ... +BOLD = ... +ENDC = ... + +class MIPROv2(Teleprompter): + def __init__( + self, + metric: Callable, + prompt_model: Optional[Any] = ..., + task_model: Optional[Any] = ..., + teacher_settings: Optional[dict] = ..., + max_bootstrapped_demos: int = ..., + max_labeled_demos: int = ..., + auto: Optional[Literal["light", "medium", "heavy"]] = ..., + num_candidates: Optional[int] = ..., + num_threads: Optional[int] = ..., + max_errors: int = ..., + seed: int = ..., + init_temperature: float = ..., + verbose: bool = ..., + track_stats: bool = ..., + log_dir: Optional[str] = ..., + metric_threshold: Optional[float] = ..., + ) -> None: ... + def compile( + self, + student: Any, + *, + trainset: List, + teacher: Any = ..., + valset: Optional[List] = ..., + num_trials: Optional[int] = ..., + max_bootstrapped_demos: Optional[int] = ..., + max_labeled_demos: Optional[int] = ..., + seed: Optional[int] = ..., + minibatch: bool = ..., + minibatch_size: int = ..., + minibatch_full_eval_steps: int = ..., + program_aware_proposer: bool = ..., + data_aware_proposer: bool = ..., + view_data_batch_size: int = ..., + tip_aware_proposer: bool = ..., + fewshot_aware_proposer: bool = ..., + requires_permission_to_run: bool = ..., + provide_traceback: Optional[bool] = ..., + ) -> Any: ... diff --git a/typings/dspy/teleprompt/random_search.pyi b/typings/dspy/teleprompt/random_search.pyi new file mode 100644 index 0000000..b3a7c68 --- /dev/null +++ b/typings/dspy/teleprompt/random_search.pyi @@ -0,0 +1,23 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.teleprompt.teleprompt import Teleprompter + +class BootstrapFewShotWithRandomSearch(Teleprompter): + def __init__( + self, + metric, + teacher_settings=..., + max_bootstrapped_demos=..., + max_labeled_demos=..., + max_rounds=..., + num_candidate_programs=..., + num_threads=..., + max_errors=..., + stop_at_score=..., + metric_threshold=..., + ) -> None: ... + def compile( + self, student, *, teacher=..., trainset, valset=..., restrict=..., labeled_sample=... + ): ... diff --git a/typings/dspy/teleprompt/signature_opt.pyi b/typings/dspy/teleprompt/signature_opt.pyi new file mode 100644 index 0000000..5f0981f --- /dev/null +++ b/typings/dspy/teleprompt/signature_opt.pyi @@ -0,0 +1,18 @@ +""" +This type stub file was generated by pyright. +""" + +from .copro_optimizer import COPRO + +class SignatureOptimizer(COPRO): + def __init__( + self, + prompt_model=..., + metric=..., + breadth=..., + depth=..., + init_temperature=..., + verbose=..., + track_stats=..., + ) -> None: ... + def compile(self, student, *, devset, eval_kwargs): ... diff --git a/typings/dspy/teleprompt/simba.pyi b/typings/dspy/teleprompt/simba.pyi new file mode 100644 index 0000000..232a26f --- /dev/null +++ b/typings/dspy/teleprompt/simba.pyi @@ -0,0 +1,51 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from typing import Callable +from dspy.teleprompt.teleprompt import Teleprompter + +logger = ... + +class SIMBA(Teleprompter): + def __init__( + self, + *, + metric: Callable, + bsize=..., + num_candidates=..., + max_steps=..., + max_demos=..., + demo_input_field_maxlen=..., + num_threads=..., + temperature_for_sampling=..., + temperature_for_candidates=..., + ) -> None: + """ + Initializes SIMBA. + + Args: + metric (Callable): A function that takes an Example and a prediction_dict + as input and returns a float. + bsize (int, optional): Mini-batch size. Defaults to 32. + num_candidates (int, optional): Number of new candidate programs to produce + per iteration. Defaults to 6. + max_steps (int, optional): Number of optimization steps to run. Defaults to 8. + max_demos (int, optional): Maximum number of demos a predictor can hold + before dropping some. Defaults to 4. + demo_input_field_maxlen (int, optional): Maximum number of characters to keep + in an input field when building a new demo. Defaults to 100,000. + num_threads (int, optional): Number of threads for parallel execution. + Defaults to None. + temperature_for_sampling (float, optional): Temperature used for picking + programs during the trajectory-sampling step. Defaults to 0.2. + temperature_for_candidates (float, optional): Temperature used for picking + the source program for building new candidates. Defaults to 0.2. + """ + ... + + def compile( + self, student: dspy.Module, *, trainset: list[dspy.Example], seed: int = ... + ): # -> Module: + ... diff --git a/typings/dspy/teleprompt/simba_utils.pyi b/typings/dspy/teleprompt/simba_utils.pyi new file mode 100644 index 0000000..ea1732d --- /dev/null +++ b/typings/dspy/teleprompt/simba_utils.pyi @@ -0,0 +1,55 @@ +""" +This type stub file was generated by pyright. +""" + +import dspy +from typing import Callable + +logger = ... + +def prepare_models_for_resampling(program: dspy.Module, n: int): # -> list[LM | Any]: + ... +def wrap_program( + program: dspy.Module, metric: Callable +): # -> Callable[..., dict[str, Any | float | None]]: + ... +def append_a_demo(demo_input_field_maxlen): # -> Callable[..., Literal[True]]: + ... +def append_a_rule(bucket, system, **kwargs): # -> bool: + ... + +class OfferFeedback(dspy.Signature): + """ + You will be given two trajectories of an LLM-driven program's execution. Your goal is to help the program's modules + build up experience on how to maximize the reward value assigned to the program's outputs if it were to receive + similar inputs in the future. + + The module won't see its own history. It will rely on your advice balancing being concrete and being generalizable. + + In your advice: + - Avoid boilerplate. Offer advice that would change the module's behavior for the better in the future. + - Ensure that advice offered to a module M is specific to that M's specific sub-task, not the overall program. + - Rely on contrasting the behavior of the worse trajectory against the better trajectory in making recommendations. + - Ensure each unique module name appears exactly once as a key in the advice dictionary. + """ + + program_code: str = ... + modules_defn: str = ... + program_inputs: str = ... + oracle_metadata: str = ... + worse_program_trajectory: str = ... + worse_program_outputs: str = ... + worse_reward_value: float = ... + better_program_trajectory: str = ... + better_program_outputs: str = ... + better_reward_value: float = ... + module_names: list[str] = ... + discussion: str = ... + module_advice: dict[str, str] = ... + +def inspect_modules(program): # -> str: + ... +def recursive_mask( + o, +): # -> dict[Any, Any | dict[Any, Any] | list[Any | dict[Any, Any] | list[Any] | tuple[Any, ...] | str] | tuple[Any, ...] | str] | list[Any | dict[Any, Any | dict[Any, Any] | list[Any] | tuple[Any, ...] | str] | list[Any] | tuple[Any, ...] | str] | tuple[Any, ...] | str: + ... diff --git a/typings/dspy/teleprompt/teleprompt.pyi b/typings/dspy/teleprompt/teleprompt.pyi new file mode 100644 index 0000000..53ecc05 --- /dev/null +++ b/typings/dspy/teleprompt/teleprompt.pyi @@ -0,0 +1,40 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Optional +from dspy.primitives import Example, Module + +class Teleprompter: + def __init__(self) -> None: ... + def compile( + self, + student: Module, + *, + trainset: list[Example], + teacher: Optional[Module] = ..., + valset: Optional[list[Example]] = ..., + **kwargs, + ) -> Module: + """ + Optimize the student program. + + Args: + student: The student program to optimize. + trainset: The training set to use for optimization. + teacher: The teacher program to use for optimization. + valset: The validation set to use for optimization. + + Returns: + The optimized student program. + """ + ... + + def get_params(self) -> dict[str, Any]: + """ + Get the parameters of the teleprompter. + + Returns: + The parameters of the teleprompter. + """ + ... diff --git a/typings/dspy/teleprompt/teleprompt_optuna.pyi b/typings/dspy/teleprompt/teleprompt_optuna.pyi new file mode 100644 index 0000000..ae16597 --- /dev/null +++ b/typings/dspy/teleprompt/teleprompt_optuna.pyi @@ -0,0 +1,23 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.teleprompt.teleprompt import Teleprompter + +class BootstrapFewShotWithOptuna(Teleprompter): + def __init__( + self, + metric, + teacher_settings=..., + max_bootstrapped_demos=..., + max_labeled_demos=..., + max_rounds=..., + num_candidate_programs=..., + num_threads=..., + ) -> None: ... + def objective( + self, trial + ): # -> tuple[float, list[tuple[Example, Prediction, float]], list[float]] | tuple[float, list[float]] | tuple[float, list[tuple[Example, Prediction, float]]] | float: + ... + def compile(self, student, *, teacher=..., max_demos, trainset, valset=...): # -> Any: + ... diff --git a/typings/dspy/teleprompt/utils.pyi b/typings/dspy/teleprompt/utils.pyi new file mode 100644 index 0000000..daad69a --- /dev/null +++ b/typings/dspy/teleprompt/utils.pyi @@ -0,0 +1,103 @@ +""" +This type stub file was generated by pyright. +""" + +logger = ... + +def create_minibatch(trainset, batch_size=..., rng=...): # -> list[Any]: + """Create a minibatch from the trainset.""" + ... + +def eval_candidate_program( + batch_size, trainset, candidate_program, evaluate, rng=..., return_all_scores=... +): # -> tuple[float, list[float]] | float: + """Evaluate a candidate program on the trainset, using the specified batch size.""" + ... + +def eval_candidate_program_with_pruning( + trial, trial_logs, trainset, candidate_program, evaluate, trial_num, batch_size=... +): # -> tuple[Any, Any, int, Literal[True]] | tuple[Any, Any, int, Literal[False]]: + """Evaluation of candidate_program with pruning implemented""" + ... + +def get_program_with_highest_avg_score( + param_score_dict, fully_evaled_param_combos +): # -> tuple[Any, Any, Any, Any] | tuple[Any, Any | floating[Any], Any, Any]: + """Used as a helper function for bayesian + minibatching optimizers. Returns the program with the highest average score from the batches evaluated so far.""" + ... + +def calculate_last_n_proposed_quality( + base_program, trial_logs, evaluate, trainset, devset, n +): # -> tuple[Any | Literal[0], Any, Any | Literal[0], Any]: + """ + Calculate the average and best quality of the last n programs proposed. This is useful for seeing if our proposals + are actually 'improving' overtime or not. + """ + ... + +def get_task_model_history_for_full_example(candidate_program, task_model, devset, evaluate): + """Get a full trace of the task model's history for a given candidate program.""" + ... + +def print_full_program(program): # -> None: + """Print out the program's instructions & prefixes for each module.""" + ... + +def save_candidate_program(program, log_dir, trial_num, note=...): # -> str | None: + """Save the candidate program to the log directory.""" + ... + +def save_file_to_log_dir(source_file_path, log_dir): # -> None: + ... +def setup_logging(log_dir): # -> None: + """Setup logger, which will log our print statements to a txt file at our log_dir for later viewing""" + ... + +def get_token_usage(model) -> tuple[int, int]: + """ + Extract total input tokens and output tokens from a model's interaction history. + Returns (total_input_tokens, total_output_tokens). + """ + ... + +def log_token_usage(trial_logs, trial_num, model_dict): # -> None: + """ + Extract total input and output tokens used by each model and log to trial_logs[trial_num]["token_usage"]. + """ + ... + +def get_prompt_model(prompt_model): # -> Any: + ... +def get_signature(predictor): ... +def set_signature(predictor, updated_signature): # -> None: + ... +def create_n_fewshot_demo_sets( + student, + num_candidate_sets, + trainset, + max_labeled_demos, + max_bootstrapped_demos, + metric, + teacher_settings, + max_errors=..., + max_rounds=..., + labeled_sample=..., + min_num_samples=..., + metric_threshold=..., + teacher=..., + include_non_bootstrapped=..., + seed=..., + rng=..., +): # -> dict[Any, Any]: + """ + This function is copied from random_search.py, and creates fewshot examples in the same way that random search does. + This allows us to take advantage of using the same fewshot examples when we use the same random seed in our optimizers. + """ + ... + +def old_getfile(object): # -> str | None: + """Work out which source or compiled file an object was defined in.""" + ... + +def new_getfile(object): # -> str | None: + ... diff --git a/typings/dspy/teleprompt/vanilla.pyi b/typings/dspy/teleprompt/vanilla.pyi new file mode 100644 index 0000000..391fab2 --- /dev/null +++ b/typings/dspy/teleprompt/vanilla.pyi @@ -0,0 +1,9 @@ +""" +This type stub file was generated by pyright. +""" + +from dspy.teleprompt.teleprompt import Teleprompter + +class LabeledFewShot(Teleprompter): + def __init__(self, k=...) -> None: ... + def compile(self, student, *, trainset, sample=...): ... diff --git a/typings/dspy/utils/__init__.pyi b/typings/dspy/utils/__init__.pyi new file mode 100644 index 0000000..d0a1a4d --- /dev/null +++ b/typings/dspy/utils/__init__.pyi @@ -0,0 +1,27 @@ +""" +This type stub file was generated by pyright. +""" + +import os +import requests +from dspy.streaming.messages import StatusMessage, StatusMessageProvider +from dspy.utils import exceptions +from dspy.utils.callback import BaseCallback, with_callbacks +from dspy.utils.dummies import DummyLM, DummyVectorizer, dummy_rm +from dspy.utils.inspect_history import pretty_print_history + +def download(url): # -> None: + ... + +__all__ = [ + "download", + "exceptions", + "BaseCallback", + "with_callbacks", + "DummyLM", + "DummyVectorizer", + "dummy_rm", + "StatusMessage", + "StatusMessageProvider", + "pretty_print_history", +] diff --git a/typings/dspy/utils/asyncify.pyi b/typings/dspy/utils/asyncify.pyi new file mode 100644 index 0000000..9a55926 --- /dev/null +++ b/typings/dspy/utils/asyncify.pyi @@ -0,0 +1,29 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Awaitable, Callable, TYPE_CHECKING +from dspy.primitives.program import Module + +if TYPE_CHECKING: ... +_limiter = ... + +def get_async_max_workers(): # -> Any: + ... +def get_limiter(): # -> CapacityLimiter: + ... +def asyncify(program: Module) -> Callable[[Any, Any], Awaitable[Any]]: + """ + Wraps a DSPy program so that it can be called asynchronously. This is useful for running a + program in parallel with another task (e.g., another DSPy program). + + This implementation propagates the current thread's configuration context to the worker thread. + + Args: + program: The DSPy program to be wrapped for asynchronous execution. + + Returns: + An async function: An async function that, when awaited, runs the program in a worker thread. + The current thread's configuration context is inherited for each call. + """ + ... diff --git a/typings/dspy/utils/caching.pyi b/typings/dspy/utils/caching.pyi new file mode 100644 index 0000000..6512b2b --- /dev/null +++ b/typings/dspy/utils/caching.pyi @@ -0,0 +1,10 @@ +""" +This type stub file was generated by pyright. +""" + +_DEFAULT_CACHE_DIR = ... +DSPY_CACHEDIR = ... + +def create_subdir_in_cachedir(subdir: str) -> str: + """Create a subdirectory in the DSPy cache directory.""" + ... diff --git a/typings/dspy/utils/callback.pyi b/typings/dspy/utils/callback.pyi new file mode 100644 index 0000000..1483b8b --- /dev/null +++ b/typings/dspy/utils/callback.pyi @@ -0,0 +1,211 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Dict, Optional + +ACTIVE_CALL_ID = ... +logger = ... + +class BaseCallback: + """A base class for defining callback handlers for DSPy components. + + To use a callback, subclass this class and implement the desired handlers. Each handler + will be called at the appropriate time before/after the execution of the corresponding component. For example, if + you want to print a message before and after an LM is called, implement `the on_llm_start` and `on_lm_end` handler. + Users can set the callback globally using `dspy.settings.configure` or locally by passing it to the component + constructor. + + + Example 1: Set a global callback using `dspy.settings.configure`. + + ``` + import dspy + from dspy.utils.callback import BaseCallback + + class LoggingCallback(BaseCallback): + + def on_lm_start(self, call_id, instance, inputs): + print(f"LM is called with inputs: {inputs}") + + def on_lm_end(self, call_id, outputs, exception): + print(f"LM is finished with outputs: {outputs}") + + dspy.settings.configure( + callbacks=[LoggingCallback()] + ) + + cot = dspy.ChainOfThought("question -> answer") + cot(question="What is the meaning of life?") + + # > LM is called with inputs: {'question': 'What is the meaning of life?'} + # > LM is finished with outputs: {'answer': '42'} + ``` + + Example 2: Set a local callback by passing it to the component constructor. + + ``` + lm_1 = dspy.LM("gpt-3.5-turbo", callbacks=[LoggingCallback()]) + lm_1(question="What is the meaning of life?") + + # > LM is called with inputs: {'question': 'What is the meaning of life?'} + # > LM is finished with outputs: {'answer': '42'} + + lm_2 = dspy.LM("gpt-3.5-turbo") + lm_2(question="What is the meaning of life?") + # No logging here because only `lm_1` has the callback set. + ``` + """ + def on_module_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: + """A handler triggered when forward() method of a module (subclass of dspy.Module) is called. + + Args: + call_id: A unique identifier for the call. Can be used to connect start/end handlers. + instance: The Module instance. + inputs: The inputs to the module's forward() method. Each arguments is stored as + a key-value pair in a dictionary. + """ + ... + + def on_module_end( + self, call_id: str, outputs: Optional[Any], exception: Optional[Exception] = ... + ): # -> None: + """A handler triggered after forward() method of a module (subclass of dspy.Module) is executed. + + Args: + call_id: A unique identifier for the call. Can be used to connect start/end handlers. + outputs: The outputs of the module's forward() method. If the method is interrupted by + an exception, this will be None. + exception: If an exception is raised during the execution, it will be stored here. + """ + ... + + def on_lm_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: + """A handler triggered when __call__ method of dspy.LM instance is called. + + Args: + call_id: A unique identifier for the call. Can be used to connect start/end handlers. + instance: The LM instance. + inputs: The inputs to the LM's __call__ method. Each arguments is stored as + a key-value pair in a dictionary. + """ + ... + + def on_lm_end( + self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ... + ): # -> None: + """A handler triggered after __call__ method of dspy.LM instance is executed. + + Args: + call_id: A unique identifier for the call. Can be used to connect start/end handlers. + outputs: The outputs of the LM's __call__ method. If the method is interrupted by + an exception, this will be None. + exception: If an exception is raised during the execution, it will be stored here. + """ + ... + + def on_adapter_format_start( + self, call_id: str, instance: Any, inputs: Dict[str, Any] + ): # -> None: + """A handler triggered when format() method of an adapter (subclass of dspy.Adapter) is called. + + Args: + call_id: A unique identifier for the call. Can be used to connect start/end handlers. + instance: The Adapter instance. + inputs: The inputs to the Adapter's format() method. Each arguments is stored as + a key-value pair in a dictionary. + """ + ... + + def on_adapter_format_end( + self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ... + ): # -> None: + """A handler triggered after format() method of an adapter (subclass of dspy.Adapter) is called.. + + Args: + call_id: A unique identifier for the call. Can be used to connect start/end handlers. + outputs: The outputs of the Adapter's format() method. If the method is interrupted + by an exception, this will be None. + exception: If an exception is raised during the execution, it will be stored here. + """ + ... + + def on_adapter_parse_start( + self, call_id: str, instance: Any, inputs: Dict[str, Any] + ): # -> None: + """A handler triggered when parse() method of an adapter (subclass of dspy.Adapter) is called. + + Args: + call_id: A unique identifier for the call. Can be used to connect start/end handlers. + instance: The Adapter instance. + inputs: The inputs to the Adapter's parse() method. Each arguments is stored as + a key-value pair in a dictionary. + """ + ... + + def on_adapter_parse_end( + self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ... + ): # -> None: + """A handler triggered after parse() method of an adapter (subclass of dspy.Adapter) is called. + + Args: + call_id: A unique identifier for the call. Can be used to connect start/end handlers. + outputs: The outputs of the Adapter's parse() method. If the method is interrupted + by an exception, this will be None. + exception: If an exception is raised during the execution, it will be stored here. + """ + ... + + def on_tool_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: + """A handler triggered when a tool is called. + + Args: + call_id: A unique identifier for the call. Can be used to connect start/end handlers. + instance: The Tool instance. + inputs: The inputs to the Tool's __call__ method. Each arguments is stored as + a key-value pair in a dictionary. + """ + ... + + def on_tool_end( + self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ... + ): # -> None: + """A handler triggered after a tool is executed. + + Args: + call_id: A unique identifier for the call. Can be used to connect start/end handlers. + outputs: The outputs of the Tool's __call__ method. If the method is interrupted by + an exception, this will be None. + exception: If an exception is raised during the execution, it will be stored here. + """ + ... + + def on_evaluate_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: + """A handler triggered when evaluation is started. + + Args: + call_id: A unique identifier for the call. Can be used to connect start/end handlers. + instance: The Evaluate instance. + inputs: The inputs to the Evaluate's __call__ method. Each arguments is stored as + a key-value pair in a dictionary. + """ + ... + + def on_evaluate_end( + self, call_id: str, outputs: Optional[Any], exception: Optional[Exception] = ... + ): # -> None: + """A handler triggered after evaluation is executed. + + Args: + call_id: A unique identifier for the call. Can be used to connect start/end handlers. + outputs: The outputs of the Evaluate's __call__ method. If the method is interrupted by + an exception, this will be None. + exception: If an exception is raised during the execution, it will be stored here. + """ + ... + +def with_callbacks( + fn, +): # -> _Wrapped[..., Any, ..., CoroutineType[Any, Any, Any]] | _Wrapped[..., Any, ..., Any]: + """Decorator to add callback functionality to instance methods.""" + ... diff --git a/typings/dspy/utils/dummies.pyi b/typings/dspy/utils/dummies.pyi new file mode 100644 index 0000000..b0c0634 --- /dev/null +++ b/typings/dspy/utils/dummies.pyi @@ -0,0 +1,84 @@ +""" +This type stub file was generated by pyright. +""" + +import numpy as np +from typing import Union +from dspy.clients.lm import LM +from dspy.utils.callback import with_callbacks + +class DummyLM(LM): + """Dummy language model for unit testing purposes. + + Three modes of operation: + + Mode 1: List of dictionaries + + If a list of dictionaries is provided, the dummy model will return the next dictionary + in the list for each request, formatted according to the `format_field_with_value` function. + + Example: + + ``` + lm = DummyLM([{"answer": "red"}, {"answer": "blue"}]) + dspy.settings.configure(lm=lm) + predictor("What color is the sky?") + # Output: + # [[## answer ##]] + # red + predictor("What color is the sky?") + # Output: + # [[## answer ##]] + # blue + ``` + + Mode 2: Dictionary of dictionaries + + If a dictionary of dictionaries is provided, the dummy model will return the value + corresponding to the key which is contained with the final message of the prompt, + formatted according to the `format_field_with_value` function from the chat adapter. + + ``` + lm = DummyLM({"What color is the sky?": {"answer": "blue"}}) + dspy.settings.configure(lm=lm) + predictor("What color is the sky?") + # Output: + # [[## answer ##]] + # blue + ``` + + Mode 3: Follow examples + + If `follow_examples` is set to True, and the prompt contains an example input exactly equal to the prompt, + the dummy model will return the output from that example. + + ``` + lm = DummyLM([{"answer": "red"}], follow_examples=True) + dspy.settings.configure(lm=lm) + predictor("What color is the sky?, demos=dspy.Example(input="What color is the sky?", output="blue")) + # Output: + # [[## answer ##]] + # blue + ``` + + """ + def __init__( + self, + answers: Union[list[dict[str, str]], dict[str, dict[str, str]]], + follow_examples: bool = ..., + ) -> None: ... + @with_callbacks + def __call__(self, prompt=..., messages=..., **kwargs): # -> list[Any]: + ... + async def acall(self, prompt=..., messages=..., **kwargs): # -> list[Any]: + ... + def get_convo(self, index): # -> tuple[Any, Any]: + """Get the prompt + answer from the ith message.""" + ... + +def dummy_rm(passages=...) -> callable: ... + +class DummyVectorizer: + """Simple vectorizer based on n-grams.""" + def __init__(self, max_length=..., n_gram=...) -> None: ... + def __call__(self, texts: list[str]) -> np.ndarray: ... diff --git a/typings/dspy/utils/exceptions.pyi b/typings/dspy/utils/exceptions.pyi new file mode 100644 index 0000000..4ccd2ee --- /dev/null +++ b/typings/dspy/utils/exceptions.pyi @@ -0,0 +1,17 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Optional +from dspy.signatures.signature import Signature + +class AdapterParseError(Exception): + """Exception raised when adapter cannot parse the LM response.""" + def __init__( + self, + adapter_name: str, + signature: Signature, + lm_response: str, + message: Optional[str] = ..., + parsed_result: Optional[str] = ..., + ) -> None: ... diff --git a/typings/dspy/utils/inspect_history.pyi b/typings/dspy/utils/inspect_history.pyi new file mode 100644 index 0000000..c740d09 --- /dev/null +++ b/typings/dspy/utils/inspect_history.pyi @@ -0,0 +1,7 @@ +""" +This type stub file was generated by pyright. +""" + +def pretty_print_history(history, n: int = ...): # -> None: + """Prints the last n prompts and their completions.""" + ... diff --git a/typings/dspy/utils/langchain_tool.pyi b/typings/dspy/utils/langchain_tool.pyi new file mode 100644 index 0000000..7bd7b46 --- /dev/null +++ b/typings/dspy/utils/langchain_tool.pyi @@ -0,0 +1,23 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import TYPE_CHECKING +from dspy.adapters.types.tool import Tool +from langchain.tools import BaseTool + +if TYPE_CHECKING: ... + +def convert_langchain_tool(tool: BaseTool) -> Tool: + """Build a DSPy tool from a LangChain tool. + + This function converts a LangChain tool (either created with @tool decorator + or by subclassing BaseTool) into a DSPy Tool. + + Args: + tool: The LangChain tool to convert. + + Returns: + A DSPy Tool object. + """ + ... diff --git a/typings/dspy/utils/logging_utils.pyi b/typings/dspy/utils/logging_utils.pyi new file mode 100644 index 0000000..b0b1818 --- /dev/null +++ b/typings/dspy/utils/logging_utils.pyi @@ -0,0 +1,45 @@ +""" +This type stub file was generated by pyright. +""" + +LOGGING_LINE_FORMAT = ... +LOGGING_DATETIME_FORMAT = ... + +class DSPyLoggingStream: + """ + A Python stream for use with event logging APIs throughout DSPy (`eprint()`, + `logger.info()`, etc.). This stream wraps `sys.stderr`, forwarding `write()` and + `flush()` calls to the stream referred to by `sys.stderr` at the time of the call. + It also provides capabilities for disabling the stream to silence event logs. + """ + def __init__(self) -> None: ... + def write(self, text): # -> None: + ... + def flush(self): # -> None: + ... + @property + def enabled(self): # -> bool: + ... + @enabled.setter + def enabled(self, value): # -> None: + ... + +DSPY_LOGGING_STREAM = ... + +def disable_logging(): # -> None: + """ + Disables the `DSPyLoggingStream` used by event logging APIs throughout DSPy + (`eprint()`, `logger.info()`, etc), silencing all subsequent event logs. + """ + ... + +def enable_logging(): # -> None: + """ + Enables the `DSPyLoggingStream` used by event logging APIs throughout DSPy + (`eprint()`, `logger.info()`, etc), emitting all subsequent event logs. This + reverses the effects of `disable_logging()`. + """ + ... + +def configure_dspy_loggers(root_module_name): # -> None: + ... diff --git a/typings/dspy/utils/mcp.pyi b/typings/dspy/utils/mcp.pyi new file mode 100644 index 0000000..6d52fb3 --- /dev/null +++ b/typings/dspy/utils/mcp.pyi @@ -0,0 +1,21 @@ +""" +This type stub file was generated by pyright. +""" + +import mcp +from typing import TYPE_CHECKING +from dspy.adapters.types.tool import Tool + +if TYPE_CHECKING: ... + +def convert_mcp_tool(session: mcp.client.session.ClientSession, tool: mcp.types.Tool) -> Tool: + """Build a DSPy tool from an MCP tool. + + Args: + session: The MCP session to use. + tool: The MCP tool to convert. + + Returns: + A dspy Tool object. + """ + ... diff --git a/typings/dspy/utils/parallelizer.pyi b/typings/dspy/utils/parallelizer.pyi new file mode 100644 index 0000000..f92a52a --- /dev/null +++ b/typings/dspy/utils/parallelizer.pyi @@ -0,0 +1,25 @@ +""" +This type stub file was generated by pyright. +""" + +logger = ... + +class ParallelExecutor: + def __init__( + self, + num_threads=..., + max_errors=..., + disable_progress_bar=..., + provide_traceback=..., + compare_results=..., + timeout=..., + straggler_limit=..., + ) -> None: + """ + Offers isolation between the tasks (dspy.settings) irrespective of whether num_threads == 1 or > 1. + Handles also straggler timeouts. + """ + ... + + def execute(self, function, data): # -> list[None]: + ... diff --git a/typings/dspy/utils/saving.pyi b/typings/dspy/utils/saving.pyi new file mode 100644 index 0000000..bc6d49c --- /dev/null +++ b/typings/dspy/utils/saving.pyi @@ -0,0 +1,20 @@ +""" +This type stub file was generated by pyright. +""" + +logger = ... + +def get_dependency_versions(): # -> dict[str, str]: + ... +def load(path): # -> Any: + """Load saved DSPy model. + + This method is used to load a saved DSPy model with `save_program=True`, i.e., the model is saved with cloudpickle. + + Args: + path (str): Path to the saved model. + + Returns: + The loaded model, a `dspy.Module` instance. + """ + ... diff --git a/typings/dspy/utils/unbatchify.pyi b/typings/dspy/utils/unbatchify.pyi new file mode 100644 index 0000000..22c89c4 --- /dev/null +++ b/typings/dspy/utils/unbatchify.pyi @@ -0,0 +1,58 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Callable, List + +class Unbatchify: + def __init__( + self, + batch_fn: Callable[[List[Any]], List[Any]], + max_batch_size: int = ..., + max_wait_time: float = ..., + ) -> None: + """ + Initializes the Unbatchify. + + Args: + batch_fn: The batch-processing function that accepts a list of inputs and returns a list of outputs. + max_batch_size: The maximum number of items to include in a batch. + max_wait_time: The maximum time (in seconds) to wait for batch to fill before processing. + """ + ... + + def __call__(self, input_item: Any) -> Any: + """ + Thread-safe function that accepts a single input and returns the corresponding output. + + Args: + input_item: The single input item to process. + + Returns: + The output corresponding to the input_item after processing through batch_fn. + """ + ... + + def close(self): # -> None: + """ + Stops the worker thread and cleans up resources. + """ + ... + + def __enter__(self): # -> Self: + """ + Enables use as a context manager. + """ + ... + + def __exit__(self, exc_type, exc_value, traceback): # -> None: + """ + Ensures resources are cleaned up when exiting context. + """ + ... + + def __del__(self): # -> None: + """ + Ensures the worker thread is terminated when the object is garbage collected. + """ + ... diff --git a/typings/dspy/utils/usage_tracker.pyi b/typings/dspy/utils/usage_tracker.pyi new file mode 100644 index 0000000..6d12ded --- /dev/null +++ b/typings/dspy/utils/usage_tracker.pyi @@ -0,0 +1,24 @@ +""" +This type stub file was generated by pyright. +""" + +from contextlib import contextmanager +from typing import Any + +"""Usage tracking utilities for DSPy.""" + +class UsageTracker: + """Tracks LM usage data within a context.""" + def __init__(self) -> None: ... + def add_usage(self, lm: str, usage_entry: dict): # -> None: + """Add a usage entry to the tracker.""" + ... + + def get_total_tokens(self) -> dict[str, dict[str, Any]]: + """Calculate total tokens from all tracked usage.""" + ... + +@contextmanager +def track_usage(): # -> Generator[UsageTracker, Any, None]: + """Context manager for tracking LM usage.""" + ... From a82bf50070d4462fe2bc119b3dfaf2be5b7b995d Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 23 Jun 2025 21:14:13 -0500 Subject: [PATCH 03/26] build(deps): migrate to basedpyright and Python 3.13 Replaces mypy and pyright with basedpyright for type checking, and updates the project's target Python version from 3.12 to 3.13. This includes updating pyproject.toml, .python-version, and adding VS Code settings for basedpyright integration. --- .python-version | 2 +- .vscode/settings.json | 13 ++++++++++++ pyproject.toml | 48 +++++++++++++++++++------------------------ 3 files changed, 35 insertions(+), 28 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.python-version b/.python-version index e4fba21..24ee5b1 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.12 +3.13 diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..aa20fa4 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,13 @@ +{ + "python.languageServer": "Default", + "windsurfPyright.analysis.typeCheckingMode": "basedpyright", + "editor.defaultFormatter": "Codeium.windsurfPyright", + "windsurfPyright.analysis.autoSearchPaths": true, + "windsurfPyright.analysis.stubPath": "typings", + "windsurfPyright.analysis.useLibraryCodeForTypes": true, + "python.terminal.shellIntegration.enabled": true, + "windsurfPyright.analysis.inlayHints.callArgumentNames": false, + "windsurfPyright.analysis.inlayHints.functionReturnTypes": false, + "windsurfPyright.analysis.inlayHints.genericTypes": false, + "windsurfPyright.analysis.inlayHints.variableTypes": false +} diff --git a/pyproject.toml b/pyproject.toml index 183d7a1..dcbff97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,6 @@ classifiers = [ "Topic :: Software Development :: Libraries :: Application Frameworks", ] dependencies = [ - "pyright>=1.1.402", "typer>=0.16.0", "rich>=13.7.1", "pyflakes>=3.3.2", @@ -44,11 +43,16 @@ build-backend = "hatchling.build" packages = ["src/robofactor"] [dependency-groups] -dev = ["isort>=6.0.1", "mypy>=1.16.1", "ruff>=0.11.13", "toml>=0.10.2"] +dev = [ + "basedpyright>=1.29.4", + "isort>=6.0.1", + "ruff>=0.11.13", + "toml>=0.10.2", +] [tool.ruff] line-length = 100 -target-version = "py312" +target-version = "py313" [tool.ruff.lint] select = [ @@ -65,28 +69,22 @@ ignore = [ "E501", # line too long (handled by formatter) ] -[tool.ruff.lint.per-file-ignores] -"tests/*" = ["S101"] # allow assert in tests +[tool.basedpyright] +include = ["src"] +exclude = [ + "**/node_modules", + "**/__pycache__", + "src/experimental", + "src/typestubs" +] +stubPath = "typings" +reportUnknownMemberType = false +reportUnknownVariableType = false +reportUnknownArgumentType = false -[tool.mypy] -python_version = "3.12" -plugins = ["returns.contrib.mypy.returns_plugin"] -warn_return_any = true -warn_unused_configs = true -disallow_untyped_defs = true -disallow_incomplete_defs = true -check_untyped_defs = true -disallow_untyped_decorators = false -no_implicit_optional = true -warn_redundant_casts = true -warn_unused_ignores = true -warn_no_return = true -warn_unreachable = true -strict_equality = true +[tool.basedpyright.defineConstant] +DEBUG = true -[[tool.mypy.overrides]] -module = "dspy.*" -ignore_missing_imports = true [tool.pytest.ini_options] minversion = "8.0" @@ -122,9 +120,5 @@ line_length = 100 multi_line_output = 3 include_trailing_comma = true -[tool.pyright] -venvPath = "." -venv = ".venv" - [tool.uv.sources] dspy = { git = "https://github.com/stanfordnlp/dspy.git" } From fce046ddf1253f6415551e03e8563ca1a635498f Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 23 Jun 2025 21:15:34 -0500 Subject: [PATCH 04/26] docs(typer): document Annotated Option default issue Adds a new documentation page detailing a `TypeError: Name 'X' defined twice` issue encountered with Typer 0.16.0 when using `Annotated` with `typer.Option()` and specifying the default value as the first argument to `Option()`. The document provides a root cause analysis, explains why the issue occurs due to Typer/Click's parameter processing, and offers best practices and alternative patterns to avoid it. --- docs/issues-in-typer.md | 211 ++++++++++++++++++++++++++++++++++++++++ uv.lock | 164 +++++++++++-------------------- 2 files changed, 268 insertions(+), 107 deletions(-) create mode 100644 docs/issues-in-typer.md diff --git a/docs/issues-in-typer.md b/docs/issues-in-typer.md new file mode 100644 index 0000000..2e03002 --- /dev/null +++ b/docs/issues-in-typer.md @@ -0,0 +1,211 @@ +# Typer 0.16.0 "Name defined twice" Issue with Annotated and Option + +## Executive Summary + +In Typer 0.16.0 with Python 3.13, using `Annotated` with `Option()` can trigger a `TypeError: Name 'X' defined twice` error when the default value is specified as the first argument to `Option()`. This occurs due to how Typer and Click process parameter declarations internally. + +## The Problem + +### Failing Code + +```python +@app.command() +def main( + mode: Annotated[ + DiffMode, + typer.Option( + DiffMode.both, # ❌ Default value as first argument + "-m", + "--mode", + help="Which mode to use", + ), + ] = DiffMode.both, +): + pass +``` + +**Error:** `TypeError: Name 'mode' defined twice` + +### Working Code + +```python +@app.command() +def main( + mode: Annotated[ + DiffMode, + typer.Option( + "-m", + "--mode", + help="Which mode to use", + ), + ] = DiffMode.both, # ✅ Default only as parameter default +): + pass +``` + +## Root Cause Analysis + +### 1. Option() Function Signature + +The `Option()` function in `typer/params.py` has this signature: + +```python +def Option( + default: Optional[Any] = ..., + *param_decls: str, + # ... other parameters +) -> OptionInfo +``` + +When you call: + +- `Option(DiffMode.both, "-m", "--mode")` → `default=DiffMode.both`, `param_decls=('-m', '--mode')` +- `Option("-m", "--mode")` → `default='-m'`, `param_decls=('--mode',)` + +### 2. Typer's Parameter Processing + +In `typer/main.py` at line 895, when processing parameters: + +```python +param_decls = [param.name] # Adds the parameter name first +if parameter_info.param_decls: + param_decls.extend(parameter_info.param_decls) +``` + +For a parameter named `mode`: + +- With `Option(DiffMode.both, "-m", "--mode")`: `param_decls = ['mode', '-m', '--mode']` +- With `Option("-m", "--mode")`: `param_decls = ['mode', '--mode']` + +### 3. Click's \_parse_decls Method + +Click's `_parse_decls` method in `click/core.py` (line 2683) processes these declarations: + +```python +def _parse_decls(self, decls, expose_value): + name = None + for decl in decls: + if decl.isidentifier(): + if name is not None: + raise TypeError(f"Name '{name}' defined twice") + name = decl +``` + +The method: + +1. Iterates through each declaration +2. If it's an identifier (passes `.isidentifier()`), it sets it as the parameter name +3. If a name was already set, it raises the "defined twice" error + +### 4. The Conflict + +When `param_decls = ['mode', '-m', '--mode']`: + +1. `'mode'` is processed → `name = 'mode'` (it's an identifier) +2. `'-m'` is processed → treated as option flag (not an identifier) +3. `'--mode'` is processed → Click extracts `'mode'` from it internally +4. Since `'mode'` was already set as the name, the error is raised + +## Full Traceability + +### Call Stack Flow + +1. **User code**: Defines function with `Annotated[Type, Option(...)]` +2. **typer/main.py:341**: `Typer.__call__()` is invoked +3. **typer/main.py:377**: `get_command()` processes the command +4. **typer/main.py:586**: `get_command_from_info()` extracts command info +5. **typer/main.py:562**: `get_params_convertors_ctx_param_name_from_function()` processes parameters +6. **typer/main.py:901**: `get_click_param()` creates Click parameters + - Line 895: Prepends parameter name to `param_decls` +7. **typer/core.py:444**: `TyperOption.__init__()` is called +8. **click/core.py:2558**: `click.Option.__init__()` is called +9. **click/core.py:2098**: `click.Parameter.__init__()` is called +10. **click/core.py:2694**: `_parse_decls()` raises the error + +### Environment Details + +- **Python**: 3.13 +- **Typer**: 0.16.0 +- **Click**: (bundled with Typer) +- **Platform**: darwin (macOS) + +## Solution + +### Best Practice + +When using `Annotated` with `Option()`, never specify the default value as the first argument to `Option()`: + +```python +# ❌ WRONG - Causes "Name defined twice" error +mode: Annotated[Type, typer.Option(default_value, "-m", "--mode")] = default_value + +# ✅ CORRECT - Default only as parameter default +mode: Annotated[Type, typer.Option("-m", "--mode")] = default_value +``` + +### Why This Works + +- Without a default in `Option()`, the first argument becomes a param_decl +- This prevents the parameter name from appearing twice in the declarations +- The default value is properly handled through Python's parameter default mechanism + +## Alternative Patterns + +### 1. Direct Option Usage (No Annotated) + +```python +def main( + mode: DiffMode = typer.Option( + DiffMode.both, # Can specify default here + "-m", + "--mode", + help="Which mode to use", + ), +): + pass +``` + +### 2. Argument Instead of Option + +```python +def main( + mode: Annotated[ + DiffMode, + typer.Argument(help="Which mode to use"), + ] = DiffMode.both, +): + pass +``` + +## Impact and Considerations + +### When This Issue Occurs + +- Using Typer 0.16.0 +- Using `Annotated` type hints +- Specifying default value as first argument to `Option()` +- The parameter name matches (after transformation) an option name + +### When This Issue Does NOT Occur + +- Using direct assignment pattern (no `Annotated`) +- Not specifying default in `Option()` constructor +- Using `Argument()` instead of `Option()` + +## Recommendations + +1. **For New Code**: Always omit the default value from `Option()` when using `Annotated` +2. **For Migration**: Remove default values from `Option()` calls in `Annotated` contexts +3. **For Teams**: Establish coding standards that enforce this pattern +4. **For Tooling**: Consider linters or pre-commit hooks to catch this pattern + +## Related Issues + +This issue is specific to the interaction between: + +- Typer's parameter processing +- Click's declaration parsing +- Python's `Annotated` type hints +- The overlapping namespace between parameter names and option names + +The error message "Name 'X' defined twice" is misleading as it doesn't clearly indicate the source of the duplication. diff --git a/uv.lock b/uv.lock index 9308b15..2d0076e 100644 --- a/uv.lock +++ b/uv.lock @@ -191,6 +191,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148 }, ] +[[package]] +name = "basedpyright" +version = "1.29.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nodejs-wheel-binaries" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/80/fb/bd92196a07e3b4ccee4ff2761a26a05bff77d4da089b67b4b1a547868099/basedpyright-1.29.4.tar.gz", hash = "sha256:2df1976f8591eedf4b4ce8f9d123f43e810cc8cb7cc83c53eec0e2f8044073d0", size = 21961481 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/dc/180fe721a2574fb3aad4051adcca196ac2d18adaf75122f5eeb47436cca2/basedpyright-1.29.4-py3-none-any.whl", hash = "sha256:e087513979972f83010639c6c1a1c13dd3b1d24ee45f8ecff747962cc2063d6f", size = 11476859 }, +] + [[package]] name = "blinker" version = "1.9.0" @@ -557,16 +569,16 @@ wheels = [ [[package]] name = "flake8" -version = "7.2.0" +version = "7.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mccabe" }, { name = "pycodestyle" }, { name = "pyflakes" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e7/c4/5842fc9fc94584c455543540af62fd9900faade32511fab650e9891ec225/flake8-7.2.0.tar.gz", hash = "sha256:fa558ae3f6f7dbf2b4f22663e5343b6b6023620461f8d4ff2019ef4b5ee70426", size = 48177 } +sdist = { url = "https://files.pythonhosted.org/packages/9b/af/fbfe3c4b5a657d79e5c47a2827a362f9e1b763336a52f926126aa6dc7123/flake8-7.3.0.tar.gz", hash = "sha256:fe044858146b9fc69b551a4b490d69cf960fcb78ad1edcb84e7fbb1b4a8e3872", size = 48326 } wheels = [ - { url = "https://files.pythonhosted.org/packages/83/5c/0627be4c9976d56b1217cb5187b7504e7fd7d3503f8bfd312a04077bd4f7/flake8-7.2.0-py2.py3-none-any.whl", hash = "sha256:93b92ba5bdb60754a6da14fa3b93a9361fd00a59632ada61fd7b130436c40343", size = 57786 }, + { url = "https://files.pythonhosted.org/packages/9f/56/13ab06b4f93ca7cac71078fbe37fcea175d3216f31f85c3168a6bbd0bb9a/flake8-7.3.0-py2.py3-none-any.whl", hash = "sha256:b9696257b9ce8beb888cdbe31cf885c90d31928fe202be0889a7cdafad32f01e", size = 57922 }, ] [[package]] @@ -883,17 +895,17 @@ wheels = [ [[package]] name = "hf-xet" -version = "1.1.4" +version = "1.1.5" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8d/11/b480bb7515db97d5b2b703927a59bbdd3f87e68d47dff5591aada467b4a9/hf_xet-1.1.4.tar.gz", hash = "sha256:875158df90cb13547752532ed73cad9dfaad3b29e203143838f67178418d08a4", size = 492082 } +sdist = { url = "https://files.pythonhosted.org/packages/ed/d4/7685999e85945ed0d7f0762b686ae7015035390de1161dcea9d5276c134c/hf_xet-1.1.5.tar.gz", hash = "sha256:69ebbcfd9ec44fdc2af73441619eeb06b94ee34511bbcf57cd423820090f5694", size = 495969 } wheels = [ - { url = "https://files.pythonhosted.org/packages/c4/62/3b41a7439930996530c64955874445012fd9044c82c60b34c5891c34fec6/hf_xet-1.1.4-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:6591ab9f61ea82d261107ed90237e2ece972f6a7577d96f5f071208bbf255d1c", size = 2643151 }, - { url = "https://files.pythonhosted.org/packages/9b/9f/1744fb1d79e0ac147578b193ce29208ebb9f4636e8cdf505638f6f0a6874/hf_xet-1.1.4-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:071b0b4d4698990f746edd666c7cc42555833d22035d88db0df936677fb57d29", size = 2510687 }, - { url = "https://files.pythonhosted.org/packages/d1/a8/49a81d4f81b0d21cc758b6fca3880a85ca0d209e8425c8b3a6ef694881ca/hf_xet-1.1.4-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b5b610831e92e41182d4c028653978b844d332d492cdcba1b920d3aca4a0207e", size = 3057631 }, - { url = "https://files.pythonhosted.org/packages/bf/8b/65fa08273789dafbc38d0f0bdd20df56b63ebc6566981bbaa255d9d84a33/hf_xet-1.1.4-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f6578bcd71393abfd60395279cc160ca808b61f5f9d535b922fcdcd3f77a708d", size = 2949250 }, - { url = "https://files.pythonhosted.org/packages/8b/4b/224340bb1d5c63b6e03e04095b4e42230848454bf4293c45cd7bdaa0c208/hf_xet-1.1.4-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fb2bbfa2aae0e4f0baca988e7ba8d8c1a39a25adf5317461eb7069ad00505b3e", size = 3124670 }, - { url = "https://files.pythonhosted.org/packages/4a/b7/4be010014de6585401c32a04c46b09a4a842d66bd16ed549a401e973b74b/hf_xet-1.1.4-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:73346ba3e2e15ea8909a26b0862b458f15b003e6277935e3fba5bf273508d698", size = 3234131 }, - { url = "https://files.pythonhosted.org/packages/c2/2d/cf148d532f741fbf93f380ff038a33c1309d1e24ea629dc39d11dca08c92/hf_xet-1.1.4-cp37-abi3-win_amd64.whl", hash = "sha256:52e8f8bc2029d8b911493f43cea131ac3fa1f0dc6a13c50b593c4516f02c6fc3", size = 2695589 }, + { url = "https://files.pythonhosted.org/packages/00/89/a1119eebe2836cb25758e7661d6410d3eae982e2b5e974bcc4d250be9012/hf_xet-1.1.5-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:f52c2fa3635b8c37c7764d8796dfa72706cc4eded19d638331161e82b0792e23", size = 2687929 }, + { url = "https://files.pythonhosted.org/packages/de/5f/2c78e28f309396e71ec8e4e9304a6483dcbc36172b5cea8f291994163425/hf_xet-1.1.5-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:9fa6e3ee5d61912c4a113e0708eaaef987047616465ac7aa30f7121a48fc1af8", size = 2556338 }, + { url = "https://files.pythonhosted.org/packages/6d/2f/6cad7b5fe86b7652579346cb7f85156c11761df26435651cbba89376cd2c/hf_xet-1.1.5-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc874b5c843e642f45fd85cda1ce599e123308ad2901ead23d3510a47ff506d1", size = 3102894 }, + { url = "https://files.pythonhosted.org/packages/d0/54/0fcf2b619720a26fbb6cc941e89f2472a522cd963a776c089b189559447f/hf_xet-1.1.5-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:dbba1660e5d810bd0ea77c511a99e9242d920790d0e63c0e4673ed36c4022d18", size = 3002134 }, + { url = "https://files.pythonhosted.org/packages/f3/92/1d351ac6cef7c4ba8c85744d37ffbfac2d53d0a6c04d2cabeba614640a78/hf_xet-1.1.5-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ab34c4c3104133c495785d5d8bba3b1efc99de52c02e759cf711a91fd39d3a14", size = 3171009 }, + { url = "https://files.pythonhosted.org/packages/c9/65/4b2ddb0e3e983f2508528eb4501288ae2f84963586fbdfae596836d5e57a/hf_xet-1.1.5-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:83088ecea236d5113de478acb2339f92c95b4fb0462acaa30621fac02f5a534a", size = 3279245 }, + { url = "https://files.pythonhosted.org/packages/f0/55/ef77a85ee443ae05a9e9cba1c9f0dd9241eb42da2aeba1dc50f51154c81a/hf_xet-1.1.5-cp37-abi3-win_amd64.whl", hash = "sha256:73e167d9807d166596b4b2f0b585c6d5bd84a26dea32843665a8b58f6edba245", size = 2738931 }, ] [[package]] @@ -1077,11 +1089,11 @@ wheels = [ [[package]] name = "json-repair" -version = "0.47.1" +version = "0.47.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/51/83/46f16a17e7a12e81719da3c17eefceb39c7dc076cf8ae48aef421cf81504/json_repair-0.47.1.tar.gz", hash = "sha256:e232ae4def2cb5ce00900f74ca8c0a05cab148bd2ec7eba85506aef26e750805", size = 33882 } +sdist = { url = "https://files.pythonhosted.org/packages/20/b3/4d27ddc50faf1900b5e6230212e802549f19821cea88c934bc6ca9a5d7b9/json_repair-0.47.2.tar.gz", hash = "sha256:4c5db08c2af384b96125beba0e59dbe154b13406a2b5eee8089f7e4faec3ebea", size = 33832 } wheels = [ - { url = "https://files.pythonhosted.org/packages/c3/bb/616395c4e06ce6ffdd4b2b73afc7bbb072391d8b0824294a1a3419a4770b/json_repair-0.47.1-py3-none-any.whl", hash = "sha256:0b9c0b3ae400e83317efdaab91983a2b63faebebda2924dcfdd16080839673c7", size = 22475 }, + { url = "https://files.pythonhosted.org/packages/c9/b9/d6fb6d0e415282168bc16bc31de3c762c07916f75a9b6ecc346eb6a89e09/json_repair-0.47.2-py3-none-any.whl", hash = "sha256:7387f318f2b982e04d6f3ab70c314324b15d57c8d2bfe17b44215b98fbd71caf", size = 22423 }, ] [[package]] @@ -1200,7 +1212,7 @@ wheels = [ [[package]] name = "litellm" -version = "1.72.6.post2" +version = "1.73.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -1215,7 +1227,10 @@ dependencies = [ { name = "tiktoken" }, { name = "tokenizers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8f/50/e594d8978362796e44c9643befac6dd27dcc113aa700723697bae849ed72/litellm-1.72.6.post2.tar.gz", hash = "sha256:24dbe0efaeca0712d2e18795a6734d1678af086ed9ea1893721c426fe984398d", size = 8363431 } +sdist = { url = "https://files.pythonhosted.org/packages/3e/5e/e110a45916b18ac93234e1b76a6ca57e7a3f9d38c4b9c004b68aedcddf41/litellm-1.73.0.tar.gz", hash = "sha256:4fdbb86f349be2038068827517786f6f7e7f761528d8f5d4b941b406d33bb8c3", size = 8553383 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8c/ef/a788e2aca00e1afa97e07a14e85b9a1189498bbd424be7e1aea57cc5a831/litellm-1.73.0-py3-none-any.whl", hash = "sha256:0a0a14c2f2522ffaf6cfbea043108d837047ee2b8dff5fb1dc14a7bd3cea0118", size = 8358192 }, +] [[package]] name = "magicattr" @@ -1551,60 +1566,19 @@ wheels = [ ] [[package]] -name = "mypy" -version = "1.16.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "mypy-extensions" }, - { name = "pathspec" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/81/69/92c7fa98112e4d9eb075a239caa4ef4649ad7d441545ccffbd5e34607cbb/mypy-1.16.1.tar.gz", hash = "sha256:6bd00a0a2094841c5e47e7374bb42b83d64c527a502e3334e1173a0c24437bab", size = 3324747 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8e/12/2bf23a80fcef5edb75de9a1e295d778e0f46ea89eb8b115818b663eff42b/mypy-1.16.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b4f0fed1022a63c6fec38f28b7fc77fca47fd490445c69d0a66266c59dd0b88a", size = 10958644 }, - { url = "https://files.pythonhosted.org/packages/08/50/bfe47b3b278eacf348291742fd5e6613bbc4b3434b72ce9361896417cfe5/mypy-1.16.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:86042bbf9f5a05ea000d3203cf87aa9d0ccf9a01f73f71c58979eb9249f46d72", size = 10087033 }, - { url = "https://files.pythonhosted.org/packages/21/de/40307c12fe25675a0776aaa2cdd2879cf30d99eec91b898de00228dc3ab5/mypy-1.16.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ea7469ee5902c95542bea7ee545f7006508c65c8c54b06dc2c92676ce526f3ea", size = 11875645 }, - { url = "https://files.pythonhosted.org/packages/a6/d8/85bdb59e4a98b7a31495bd8f1a4445d8ffc86cde4ab1f8c11d247c11aedc/mypy-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:352025753ef6a83cb9e7f2427319bb7875d1fdda8439d1e23de12ab164179574", size = 12616986 }, - { url = "https://files.pythonhosted.org/packages/0e/d0/bb25731158fa8f8ee9e068d3e94fcceb4971fedf1424248496292512afe9/mypy-1.16.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ff9fa5b16e4c1364eb89a4d16bcda9987f05d39604e1e6c35378a2987c1aac2d", size = 12878632 }, - { url = "https://files.pythonhosted.org/packages/2d/11/822a9beb7a2b825c0cb06132ca0a5183f8327a5e23ef89717c9474ba0bc6/mypy-1.16.1-cp310-cp310-win_amd64.whl", hash = "sha256:1256688e284632382f8f3b9e2123df7d279f603c561f099758e66dd6ed4e8bd6", size = 9484391 }, - { url = "https://files.pythonhosted.org/packages/9a/61/ec1245aa1c325cb7a6c0f8570a2eee3bfc40fa90d19b1267f8e50b5c8645/mypy-1.16.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:472e4e4c100062488ec643f6162dd0d5208e33e2f34544e1fc931372e806c0cc", size = 10890557 }, - { url = "https://files.pythonhosted.org/packages/6b/bb/6eccc0ba0aa0c7a87df24e73f0ad34170514abd8162eb0c75fd7128171fb/mypy-1.16.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea16e2a7d2714277e349e24d19a782a663a34ed60864006e8585db08f8ad1782", size = 10012921 }, - { url = "https://files.pythonhosted.org/packages/5f/80/b337a12e2006715f99f529e732c5f6a8c143bb58c92bb142d5ab380963a5/mypy-1.16.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:08e850ea22adc4d8a4014651575567b0318ede51e8e9fe7a68f25391af699507", size = 11802887 }, - { url = "https://files.pythonhosted.org/packages/d9/59/f7af072d09793d581a745a25737c7c0a945760036b16aeb620f658a017af/mypy-1.16.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22d76a63a42619bfb90122889b903519149879ddbf2ba4251834727944c8baca", size = 12531658 }, - { url = "https://files.pythonhosted.org/packages/82/c4/607672f2d6c0254b94a646cfc45ad589dd71b04aa1f3d642b840f7cce06c/mypy-1.16.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2c7ce0662b6b9dc8f4ed86eb7a5d505ee3298c04b40ec13b30e572c0e5ae17c4", size = 12732486 }, - { url = "https://files.pythonhosted.org/packages/b6/5e/136555ec1d80df877a707cebf9081bd3a9f397dedc1ab9750518d87489ec/mypy-1.16.1-cp311-cp311-win_amd64.whl", hash = "sha256:211287e98e05352a2e1d4e8759c5490925a7c784ddc84207f4714822f8cf99b6", size = 9479482 }, - { url = "https://files.pythonhosted.org/packages/b4/d6/39482e5fcc724c15bf6280ff5806548c7185e0c090712a3736ed4d07e8b7/mypy-1.16.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:af4792433f09575d9eeca5c63d7d90ca4aeceda9d8355e136f80f8967639183d", size = 11066493 }, - { url = "https://files.pythonhosted.org/packages/e6/e5/26c347890efc6b757f4d5bb83f4a0cf5958b8cf49c938ac99b8b72b420a6/mypy-1.16.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:66df38405fd8466ce3517eda1f6640611a0b8e70895e2a9462d1d4323c5eb4b9", size = 10081687 }, - { url = "https://files.pythonhosted.org/packages/44/c7/b5cb264c97b86914487d6a24bd8688c0172e37ec0f43e93b9691cae9468b/mypy-1.16.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:44e7acddb3c48bd2713994d098729494117803616e116032af192871aed80b79", size = 11839723 }, - { url = "https://files.pythonhosted.org/packages/15/f8/491997a9b8a554204f834ed4816bda813aefda31cf873bb099deee3c9a99/mypy-1.16.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ab5eca37b50188163fa7c1b73c685ac66c4e9bdee4a85c9adac0e91d8895e15", size = 12722980 }, - { url = "https://files.pythonhosted.org/packages/df/f0/2bd41e174b5fd93bc9de9a28e4fb673113633b8a7f3a607fa4a73595e468/mypy-1.16.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb6229b2c9086247e21a83c309754b9058b438704ad2f6807f0d8227f6ebdd", size = 12903328 }, - { url = "https://files.pythonhosted.org/packages/61/81/5572108a7bec2c46b8aff7e9b524f371fe6ab5efb534d38d6b37b5490da8/mypy-1.16.1-cp312-cp312-win_amd64.whl", hash = "sha256:1f0435cf920e287ff68af3d10a118a73f212deb2ce087619eb4e648116d1fe9b", size = 9562321 }, - { url = "https://files.pythonhosted.org/packages/28/e3/96964af4a75a949e67df4b95318fe2b7427ac8189bbc3ef28f92a1c5bc56/mypy-1.16.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ddc91eb318c8751c69ddb200a5937f1232ee8efb4e64e9f4bc475a33719de438", size = 11063480 }, - { url = "https://files.pythonhosted.org/packages/f5/4d/cd1a42b8e5be278fab7010fb289d9307a63e07153f0ae1510a3d7b703193/mypy-1.16.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:87ff2c13d58bdc4bbe7dc0dedfe622c0f04e2cb2a492269f3b418df2de05c536", size = 10090538 }, - { url = "https://files.pythonhosted.org/packages/c9/4f/c3c6b4b66374b5f68bab07c8cabd63a049ff69796b844bc759a0ca99bb2a/mypy-1.16.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a7cfb0fe29fe5a9841b7c8ee6dffb52382c45acdf68f032145b75620acfbd6f", size = 11836839 }, - { url = "https://files.pythonhosted.org/packages/b4/7e/81ca3b074021ad9775e5cb97ebe0089c0f13684b066a750b7dc208438403/mypy-1.16.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:051e1677689c9d9578b9c7f4d206d763f9bbd95723cd1416fad50db49d52f359", size = 12715634 }, - { url = "https://files.pythonhosted.org/packages/e9/95/bdd40c8be346fa4c70edb4081d727a54d0a05382d84966869738cfa8a497/mypy-1.16.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d5d2309511cc56c021b4b4e462907c2b12f669b2dbeb68300110ec27723971be", size = 12895584 }, - { url = "https://files.pythonhosted.org/packages/5a/fd/d486a0827a1c597b3b48b1bdef47228a6e9ee8102ab8c28f944cb83b65dc/mypy-1.16.1-cp313-cp313-win_amd64.whl", hash = "sha256:4f58ac32771341e38a853c5d0ec0dfe27e18e27da9cdb8bbc882d2249c71a3ee", size = 9573886 }, - { url = "https://files.pythonhosted.org/packages/cf/d3/53e684e78e07c1a2bf7105715e5edd09ce951fc3f47cf9ed095ec1b7a037/mypy-1.16.1-py3-none-any.whl", hash = "sha256:5fc2ac4027d0ef28d6ba69a0343737a23c4d1b83672bf38d1fe237bdc0643b37", size = 2265923 }, -] - -[[package]] -name = "mypy-extensions" -version = "1.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963 }, -] - -[[package]] -name = "nodeenv" -version = "1.9.1" +name = "nodejs-wheel-binaries" +version = "22.16.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437 } +sdist = { url = "https://files.pythonhosted.org/packages/0f/c6/66f36b7b0d528660dfb4a59cb9b8dd6a3f4c0a3939cd49c404a775ea4a63/nodejs_wheel_binaries-22.16.0.tar.gz", hash = "sha256:d695832f026df3a0cf9a089d222225939de9d1b67f8f0a353b79f015aabbe7e2", size = 8061 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314 }, + { url = "https://files.pythonhosted.org/packages/d7/dc/417a5c5f99e53a5d2b3be122506312731eb90fb9630c248e327e2e38cc6b/nodejs_wheel_binaries-22.16.0-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:986b715a96ed703f8ce0c15712f76fc42895cf09067d72b6ef29e8b334eccf64", size = 50957501 }, + { url = "https://files.pythonhosted.org/packages/0e/dd/d6ce48209ed15f5d1fccb29eeaa111f962557123eaf4fd03a7316c42734c/nodejs_wheel_binaries-22.16.0-py2.py3-none-macosx_11_0_x86_64.whl", hash = "sha256:4ae3cf22138891cb44c3ee952862a257ce082b098b29024d7175684a9a77b0c0", size = 51891634 }, + { url = "https://files.pythonhosted.org/packages/80/fa/a07e622fd87717eec3e5cff41575f85ad62717e8698884d28ca809266ca1/nodejs_wheel_binaries-22.16.0-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71f2de4dc0b64ae43e146897ce811f80ac4f9acfbae6ccf814226282bf4ef174", size = 57857862 }, + { url = "https://files.pythonhosted.org/packages/1f/80/52736f9570a93f8e6b7942981dc9770eca2bc7aa1d200c1d54198374a6ca/nodejs_wheel_binaries-22.16.0-py2.py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbfccbcd558d2f142ccf66d8c3a098022bf4436db9525b5b8d32169ce185d99e", size = 58395868 }, + { url = "https://files.pythonhosted.org/packages/0f/0e/53616a5ed8fc1fbe9e48bf132862da5a9abf5cc7f8483dab1722ec257187/nodejs_wheel_binaries-22.16.0-py2.py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:447ad796850eb52ca20356ad39b2d296ed8fef3f214921f84a1ccdad49f2eba1", size = 59712469 }, + { url = "https://files.pythonhosted.org/packages/4a/cd/e2b5083df581fc1d08eb93feb6f8fbd3d56b113cef9b59d8e0fb7d4dd4f3/nodejs_wheel_binaries-22.16.0-py2.py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:7f526ca6a132b0caf633566a2a78c6985fe92857e7bfdb37380f76205a10b808", size = 60763005 }, + { url = "https://files.pythonhosted.org/packages/71/8d/57112b49214e8bd636f3cc3386eba6be4d23552ec8a0f6efbe814013caa7/nodejs_wheel_binaries-22.16.0-py2.py3-none-win_amd64.whl", hash = "sha256:2fffb4bf1066fb5f660da20819d754f1b424bca1b234ba0f4fa901c52e3975fb", size = 41313324 }, + { url = "https://files.pythonhosted.org/packages/91/03/a852711aec73dfb965844592dfe226024c0da28e37d1ee54083342e38f57/nodejs_wheel_binaries-22.16.0-py2.py3-none-win_arm64.whl", hash = "sha256:2728972d336d436d39ee45988978d8b5d963509e06f063e80fe41b203ee80b28", size = 38828154 }, ] [[package]] @@ -1671,7 +1645,7 @@ wheels = [ [[package]] name = "openai" -version = "1.88.0" +version = "1.91.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1683,9 +1657,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5a/ea/bbeef604d1fe0f7e9111745bb8a81362973a95713b28855beb9a9832ab12/openai-1.88.0.tar.gz", hash = "sha256:122d35e42998255cf1fc84560f6ee49a844e65c054cd05d3e42fda506b832bb1", size = 470963 } +sdist = { url = "https://files.pythonhosted.org/packages/0f/e2/a22f2973b729eff3f1f429017bdf717930c5de0fbf9e14017bae330e4e7a/openai-1.91.0.tar.gz", hash = "sha256:d6b07730d2f7c6745d0991997c16f85cddfc90ddcde8d569c862c30716b9fc90", size = 472529 } wheels = [ - { url = "https://files.pythonhosted.org/packages/f4/03/ef68d77a38dd383cbed7fc898857d394d5a8b0520a35f054e7fe05dc3ac1/openai-1.88.0-py3-none-any.whl", hash = "sha256:7edd7826b3b83f5846562a6f310f040c79576278bf8e3687b30ba05bb5dff978", size = 734293 }, + { url = "https://files.pythonhosted.org/packages/7a/d2/f99bdd6fc737d6b3cf0df895508d621fc9a386b375a1230ee81d46c5436e/openai-1.91.0-py3-none-any.whl", hash = "sha256:207f87aa3bc49365e014fac2f7e291b99929f4fe126c4654143440e0ad446a5f", size = 735837 }, ] [[package]] @@ -1803,15 +1777,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/c2/646d2e93e0af70f4e5359d870a63584dacbc324b54d73e6b3267920ff117/pandas-2.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bb3be958022198531eb7ec2008cfc78c5b1eed51af8600c6c5d9160d89d8d249", size = 13231847 }, ] -[[package]] -name = "pathspec" -version = "0.12.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 }, -] - [[package]] name = "pillow" version = "11.2.1" @@ -2068,11 +2033,11 @@ wheels = [ [[package]] name = "pycodestyle" -version = "2.13.0" +version = "2.14.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/04/6e/1f4a62078e4d95d82367f24e685aef3a672abfd27d1a868068fed4ed2254/pycodestyle-2.13.0.tar.gz", hash = "sha256:c8415bf09abe81d9c7f872502a6eee881fbe85d8763dd5b9924bb0a01d67efae", size = 39312 } +sdist = { url = "https://files.pythonhosted.org/packages/11/e0/abfd2a0d2efe47670df87f3e3a0e2edda42f055053c85361f19c0e2c1ca8/pycodestyle-2.14.0.tar.gz", hash = "sha256:c4b5b517d278089ff9d0abdec919cd97262a3367449ea1c8b49b91529167b783", size = 39472 } wheels = [ - { url = "https://files.pythonhosted.org/packages/07/be/b00116df1bfb3e0bb5b45e29d604799f7b91dd861637e4d448b4e09e6a3e/pycodestyle-2.13.0-py2.py3-none-any.whl", hash = "sha256:35863c5974a271c7a726ed228a14a4f6daf49df369d8c50cd9a6f58a5e143ba9", size = 31424 }, + { url = "https://files.pythonhosted.org/packages/d7/27/a58ddaf8c588a3ef080db9d0b7e0b97215cee3a45df74f3a94dbbf5c893a/pycodestyle-2.14.0-py2.py3-none-any.whl", hash = "sha256:dd6bf7cb4ee77f8e016f9c8e74a35ddd9f67e1d5fd4184d86c3b98e07099f42d", size = 31594 }, ] [[package]] @@ -2179,20 +2144,20 @@ wheels = [ [[package]] name = "pyflakes" -version = "3.3.2" +version = "3.4.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/af/cc/1df338bd7ed1fa7c317081dcf29bf2f01266603b301e6858856d346a12b3/pyflakes-3.3.2.tar.gz", hash = "sha256:6dfd61d87b97fba5dcfaaf781171ac16be16453be6d816147989e7f6e6a9576b", size = 64175 } +sdist = { url = "https://files.pythonhosted.org/packages/45/dc/fd034dc20b4b264b3d015808458391acbf9df40b1e54750ef175d39180b1/pyflakes-3.4.0.tar.gz", hash = "sha256:b24f96fafb7d2ab0ec5075b7350b3d2d2218eab42003821c06344973d3ea2f58", size = 64669 } wheels = [ - { url = "https://files.pythonhosted.org/packages/15/40/b293a4fa769f3b02ab9e387c707c4cbdc34f073f945de0386107d4e669e6/pyflakes-3.3.2-py2.py3-none-any.whl", hash = "sha256:5039c8339cbb1944045f4ee5466908906180f13cc99cc9949348d10f82a5c32a", size = 63164 }, + { url = "https://files.pythonhosted.org/packages/c2/2f/81d580a0fb83baeb066698975cb14a618bdbed7720678566f1b046a95fe8/pyflakes-3.4.0-py2.py3-none-any.whl", hash = "sha256:f742a7dbd0d9cb9ea41e9a24a918996e8170c799fa528688d40dd582c8265f4f", size = 63551 }, ] [[package]] name = "pygments" -version = "2.19.1" +version = "2.19.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581 } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631 } wheels = [ - { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217 }, ] [[package]] @@ -2204,19 +2169,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120 }, ] -[[package]] -name = "pyright" -version = "1.1.402" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nodeenv" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/aa/04/ce0c132d00e20f2d2fb3b3e7c125264ca8b909e693841210534b1ea1752f/pyright-1.1.402.tar.gz", hash = "sha256:85a33c2d40cd4439c66aa946fd4ce71ab2f3f5b8c22ce36a623f59ac22937683", size = 3888207 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fe/37/1a1c62d955e82adae588be8e374c7f77b165b6cb4203f7d581269959abbc/pyright-1.1.402-py3-none-any.whl", hash = "sha256:2c721f11869baac1884e846232800fe021c33f1b4acb3929cff321f7ea4e2982", size = 5624004 }, -] - [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -2443,7 +2395,6 @@ dependencies = [ { name = "flake8" }, { name = "mlflow" }, { name = "pyflakes" }, - { name = "pyright" }, { name = "returns" }, { name = "rich" }, { name = "toml" }, @@ -2452,8 +2403,8 @@ dependencies = [ [package.dev-dependencies] dev = [ + { name = "basedpyright" }, { name = "isort" }, - { name = "mypy" }, { name = "ruff" }, { name = "toml" }, ] @@ -2464,7 +2415,6 @@ requires-dist = [ { name = "flake8", specifier = ">=7.2.0" }, { name = "mlflow", specifier = ">=3.1.0" }, { name = "pyflakes", specifier = ">=3.3.2" }, - { name = "pyright", specifier = ">=1.1.402" }, { name = "returns", specifier = ">=0.25.0" }, { name = "rich", specifier = ">=13.7.1" }, { name = "toml", specifier = ">=0.10.2" }, @@ -2473,8 +2423,8 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ + { name = "basedpyright", specifier = ">=1.29.4" }, { name = "isort", specifier = ">=6.0.1" }, - { name = "mypy", specifier = ">=1.16.1" }, { name = "ruff", specifier = ">=0.11.13" }, { name = "toml", specifier = ">=0.10.2" }, ] From f0b4679c9fd5cd556faa3b6549ebd9905168df8b Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 23 Jun 2025 21:17:14 -0500 Subject: [PATCH 05/26] refactor(structure): reorganize modules and improve type safety Extracted shared models (TestCase, CodeQualityScores) into a dedicated models.py module to reduce circular dependencies. Created json and training subpackages to better organize related functionality. Replaced list type annotations with Sequence throughout for improved immutability guarantees. Added explicit type annotations for JSON data handling and module attributes. Removed redundant module docstrings and cleaned up imports across all affected files. The changes improve code organization by grouping related functionality, enhance type safety with more precise annotations, and establish clearer module boundaries without altering any external behavior. --- src/robofactor/analysis.py | 23 ++-- src/robofactor/config.py | 1 + src/robofactor/dspy_modules.py | 102 +++++++----------- src/robofactor/evaluation.py | 34 ++---- src/robofactor/function_extraction.py | 12 +-- src/robofactor/json/__init__.py | 0 src/robofactor/json/is_json_list.py | 8 ++ src/robofactor/json/types.py | 6 ++ src/robofactor/main.py | 35 ++++-- src/robofactor/models.py | 19 ++++ src/robofactor/training/__init__.py | 0 .../{ => training}/training_data.json | 0 src/robofactor/training/training_loader.py | 64 +++++++++++ src/robofactor/ui.py | 9 +- src/robofactor/utils.py | 3 +- 15 files changed, 191 insertions(+), 125 deletions(-) create mode 100644 src/robofactor/json/__init__.py create mode 100644 src/robofactor/json/is_json_list.py create mode 100644 src/robofactor/json/types.py create mode 100644 src/robofactor/models.py create mode 100644 src/robofactor/training/__init__.py rename src/robofactor/{ => training}/training_data.json (100%) create mode 100644 src/robofactor/training/training_loader.py diff --git a/src/robofactor/analysis.py b/src/robofactor/analysis.py index 0414d64..cbace56 100644 --- a/src/robofactor/analysis.py +++ b/src/robofactor/analysis.py @@ -1,12 +1,3 @@ -""" -Utility functions for static and dynamic code analysis. - -This module includes functions for syntax validation, quality scoring (linting, -complexity, typing, docstrings), and functional correctness checking. These -functions are designed to be pure or to have their side effects managed by -callers, often using decorators like `@safe` from the `returns` library. -""" - import ast import json import os @@ -14,12 +5,16 @@ import subprocess import tempfile import textwrap +from collections.abc import Sequence from pathlib import Path +from .models import TestCase import dspy +from .models import CodeQualityScores + from . import config -from .evaluation import CodeQualityScores, TestCase +from .json.types import JSON def extract_python_code(text: str) -> str: @@ -77,7 +72,7 @@ def check_code_quality(code: str, func_name: str | None = None) -> CodeQualitySc or `@impure_safe` to handle potential exceptions. """ with tempfile.NamedTemporaryFile("w", suffix=".py", delete=False, encoding="utf-8") as tmp: - tmp.write(code) + _ = tmp.write(code) tmp_path = Path(tmp.name) try: @@ -143,7 +138,7 @@ def _build_execution_script(func_name: str, test_case: TestCase) -> str: ) -def check_functional_correctness(code: str, func_name: str, test_cases: list[TestCase]) -> int: +def check_functional_correctness(code: str, func_name: str, test_cases: Sequence[TestCase]) -> int: """ Executes test cases against code in a sandboxed Python interpreter. @@ -164,10 +159,10 @@ def check_functional_correctness(code: str, func_name: str, test_cases: list[Tes try: exec_script = _build_execution_script(func_name, test) actual_output_json = interp.execute(exec_script) - actual_output = json.loads(actual_output_json) + actual_output: JSON = json.loads(actual_output_json) # Normalize expected output to ensure consistent comparison. - normalized_expected_output = json.loads(json.dumps(test.expected_output)) + normalized_expected_output: JSON = json.loads(json.dumps(test.expected_output)) if actual_output == normalized_expected_output: passed_count += 1 except Exception: diff --git a/src/robofactor/config.py b/src/robofactor/config.py index 962c5f8..e4bb2c1 100644 --- a/src/robofactor/config.py +++ b/src/robofactor/config.py @@ -9,6 +9,7 @@ # --- File Paths --- OPTIMIZER_FILENAME: Path = Path("optimized/") +TRAINING_DATA_FILE = Path("training/training_data.json") # --- DSPy Model Configuration --- DEFAULT_TASK_LLM: str = "gemini/gemini-2.5-flash-lite-preview-06-17" diff --git a/src/robofactor/dspy_modules.py b/src/robofactor/dspy_modules.py index 7f8b4b8..53a0ec1 100644 --- a/src/robofactor/dspy_modules.py +++ b/src/robofactor/dspy_modules.py @@ -1,61 +1,56 @@ -""" -Refactored DSPy modules and Pydantic models for Python code refactoring agent. -Improved with type safety, error handling, and separation of concerns. -""" - -import json import logging -from pathlib import Path +from collections.abc import Sequence import dspy from pydantic import BaseModel, Field, field_validator, model_validator from returns.result import Result, Success from . import analysis, evaluation -from .evaluation import EvaluationResult, TestCase +from .evaluation import EvaluationResult # --- Constants --- FAILURE_SCORE = 0.0 -TRAINING_DATA_FILE = "training_data.json" logger = logging.getLogger(__name__) + # --- Pydantic Models --- class AnalysisOutput(BaseModel): """Structured analysis of Python code functionality and improvement opportunities.""" - analysis: str = Field(description="Concise summary of functionality, complexity, and dependencies") - refactoring_opportunities: list[str] = Field( + + analysis: str = Field( + description="Concise summary of functionality, complexity, and dependencies" + ) + refactoring_opportunities: Sequence[str] = Field( description="Actionable bullet points for refactoring" ) + class PlanOutput(BaseModel): """Step-by-step refactoring execution plan.""" + refactoring_summary: str = Field(description="High-level refactoring objective") - plan_steps: list[str] = Field(description="Sequential actions to achieve refactoring") + plan_steps: Sequence[str] = Field(description="Sequential actions to achieve refactoring") + class ImplementationOutput(BaseModel): """Final refactored code with change explanations.""" + refactored_code: str = Field( description="PEP8-compliant Python code with type hints and docstrings" ) - implementation_explanation: str = Field( - description="Rationale for implemented changes" - ) + implementation_explanation: str = Field(description="Rationale for implemented changes") @field_validator("refactored_code") @classmethod def extract_from_markdown(cls, v: str) -> str: return analysis.extract_python_code(v) + class EvaluationOutput(BaseModel): """Holistic assessment of refactoring quality.""" - final_score: float = Field( - description="Weighted quality score (0.0-1.0)", - ge=0.0, - le=1.0 - ) - final_suggestion: str = Field( - description="Improvement recommendations or approval" - ) + + final_score: float = Field(description="Weighted quality score (0.0-1.0)", ge=0.0, le=1.0) + final_suggestion: str = Field(description="Improvement recommendations or approval") @model_validator(mode="after") def validate_score_precision(self) -> "EvaluationOutput": @@ -63,46 +58,55 @@ def validate_score_precision(self) -> "EvaluationOutput": self.final_score = round(self.final_score, 2) return self + # --- DSPy Signatures --- class CodeAnalysis(dspy.Signature): """Analyze Python code for functionality and improvement areas.""" + code_snippet: str = dspy.InputField(desc="Python code to analyze") analysis: AnalysisOutput = dspy.OutputField() + class RefactoringPlan(dspy.Signature): """Create refactoring plan based on code analysis.""" + code_snippet: str = dspy.InputField(desc="Original Python code") analysis: str = dspy.InputField(desc="Code analysis summary") plan: PlanOutput = dspy.OutputField() + class RefactoredCode(dspy.Signature): """Generate refactored code from execution plan.""" + original_code: str = dspy.InputField(desc="Unmodified source code") refactoring_summary: str = dspy.InputField(desc="Refactoring objective") - plan_steps: list[str] = dspy.InputField(desc="Step-by-step refactoring actions") + plan_steps: Sequence[str] = dspy.InputField(desc="Step-by-step refactoring actions") implementation: ImplementationOutput = dspy.OutputField() + class FinalEvaluation(dspy.Signature): """Assess refactored code quality with quantitative metrics.""" + code_snippet: str = dspy.InputField(desc="Refactored Python code") quality_scores: str = dspy.InputField(desc="JSON quality metrics") functional_score: float = dspy.InputField(desc="Test pass rate (0.0-1.0)") evaluation: EvaluationOutput = dspy.OutputField() + # --- DSPy Modules --- class CodeRefactor(dspy.Module): """Orchestrates code analysis, planning, and refactoring.""" + def __init__(self): super().__init__() - self.analyzer = dspy.Predict(CodeAnalysis) - self.planner = dspy.Predict(RefactoringPlan) - self.implementer = dspy.Predict(RefactoredCode) + self.analyzer: dspy.Module = dspy.Predict(CodeAnalysis) + self.planner: dspy.Module = dspy.Predict(RefactoringPlan) + self.implementer: dspy.Module = dspy.Predict(RefactoredCode) def forward(self, code_snippet: str) -> dspy.Prediction: analysis_result = self.analyzer(code_snippet=code_snippet) plan_result = self.planner( - code_snippet=code_snippet, - analysis=analysis_result.analysis.analysis + code_snippet=code_snippet, analysis=analysis_result.analysis.analysis ) impl_result = self.implementer( original_code=code_snippet, @@ -119,16 +123,16 @@ def forward(self, code_snippet: str) -> dspy.Prediction: implementation_explanation=impl_result.implementation.implementation_explanation, ) + class RefactoringEvaluator(dspy.Module): """Evaluates refactored code through automated checks and LLM assessment.""" + def __init__(self): super().__init__() - self.evaluator = dspy.Predict(FinalEvaluation) + self.evaluator: dspy.Module = dspy.Predict(FinalEvaluation) def _handle_evaluation_success( - self, - eval_data: EvaluationResult, - refactored_code: str + self, eval_data: EvaluationResult, refactored_code: str ) -> float: """Process successful programmatic evaluation.""" functional_score = ( @@ -148,11 +152,7 @@ def _handle_evaluation_success( logger.error(f"LLM evaluation failed: {e}", exc_info=True) return FAILURE_SCORE - def forward( - self, - original_example: dspy.Example, - prediction: dspy.Prediction - ) -> float: + def forward(self, original_example: dspy.Example, prediction: dspy.Prediction) -> float: refactored_code = getattr(prediction, "refactored_code", "") if not refactored_code: logger.warning("Evaluation aborted: Missing refactored code") @@ -164,8 +164,8 @@ def forward( return FAILURE_SCORE test_cases = getattr(original_example, "test_cases", []) - eval_result: Result[EvaluationResult, str] = ( - evaluation.evaluate_refactored_code(code_to_evaluate, test_cases) + eval_result: Result[EvaluationResult, str] = evaluation.evaluate_refactored_code( + code_to_evaluate, test_cases ) if isinstance(eval_result, Success): @@ -173,25 +173,3 @@ def forward( else: logger.warning(f"Programmatic evaluation failed: {eval_result.failure()}") return FAILURE_SCORE - -# --- Data Loading --- -def load_training_data() -> list[dspy.Example]: - """Load training examples from external JSON file.""" - data_path = Path(__file__).parent / TRAINING_DATA_FILE - try: - with data_path.open("r", encoding="utf-8") as f: - return [ - dspy.Example( - code_snippet=item["code_snippet"], - test_cases=[TestCase(**tc) for tc in item.get("test_cases", [])] - ).with_inputs("code_snippet") - for item in json.load(f) - ] - except FileNotFoundError: - logger.error(f"Training data file not found: {data_path}") - except json.JSONDecodeError as e: - logger.error(f"Invalid JSON in training data: {e}") - except KeyError as e: - logger.error(f"Missing required key in training data: {e}") - - return [] diff --git a/src/robofactor/evaluation.py b/src/robofactor/evaluation.py index eb109c6..f09580a 100644 --- a/src/robofactor/evaluation.py +++ b/src/robofactor/evaluation.py @@ -1,38 +1,18 @@ -""" -Data models and core logic for evaluating refactored code. - -This module defines the structures for test cases, quality scores, and -evaluation results, and contains the pure function for performing the evaluation. -It leverages the 'returns' library for robust, type-safe error handling using -a railway-oriented programming approach. -""" - from __future__ import annotations -from typing import Any, NamedTuple +from collections.abc import Sequence +from typing import NamedTuple from pydantic import BaseModel, Field from returns.result import Failure, Result, Success, safe from . import analysis +from .json.types import JSON +from .models import TestCase, CodeQualityScores -class TestCase(BaseModel): - """A single, executable test case for a function.""" - - args: list[Any] = Field(default_factory=list) - kwargs: dict[str, Any] = Field(default_factory=dict) - expected_output: Any - -class CodeQualityScores(BaseModel): - """Holds various code quality metrics.""" - linting_score: float - complexity_score: float - typing_score: float - docstring_score: float - linting_issues: list[str] = Field(default_factory=list) class FunctionalCheckResult(NamedTuple): @@ -78,7 +58,7 @@ def _check_quality(code: str, func_name: str) -> CodeQualityScores: @safe def _check_functional_correctness( - code: str, func_name: str, tests: list[TestCase] + code: str, func_name: str, tests: Sequence[TestCase] ) -> FunctionalCheckResult: """ Runs functional tests and returns the pass rate. @@ -93,7 +73,7 @@ def _check_functional_correctness( def evaluate_refactored_code( - code: str, tests: list[TestCase] + code: str, tests: Sequence[TestCase] ) -> Result[EvaluationResult, str]: """ Performs a full evaluation of the refactored code. @@ -105,7 +85,7 @@ def evaluate_refactored_code( Args: code: The refactored Python code to evaluate. - tests: A list of test cases to verify functional correctness. + tests: A Sequence of test cases to verify functional correctness. Returns: A `Result` container: diff --git a/src/robofactor/function_extraction.py b/src/robofactor/function_extraction.py index 45aa9e2..5617703 100644 --- a/src/robofactor/function_extraction.py +++ b/src/robofactor/function_extraction.py @@ -1,6 +1,6 @@ import ast import enum -from collections.abc import Iterator +from collections.abc import Iterator, Sequence from dataclasses import dataclass from pathlib import Path @@ -162,7 +162,7 @@ def from_ast_node( is_async=isinstance(node, ast.AsyncFunctionDef), context=context, docstring=extract_docstring(node), - return_annotation=ast_node_to_source(node.returns) if node.returns else None, + return_annotation=(ast_node_to_source(node.returns) if node.returns else None), ) @@ -184,12 +184,12 @@ def ast_node_to_source(node: ast.AST) -> str: return repr(node) -def extract_decorators(decorators: list[ast.expr]) -> tuple[Decorator, ...]: +def extract_decorators(decorators: Sequence[ast.expr]) -> tuple[Decorator, ...]: """ - Extract decorator information from an AST decorator list. + Extract decorator information from an AST decorator Sequence. Args: - decorators: A list of decorator nodes from an AST function definition. + decorators: A Sequence of decorator nodes from an AST function definition. Returns: A tuple of Decorator objects. @@ -527,7 +527,7 @@ def format_param(p: Parameter) -> str: res += f" = {p.default}" return res - param_parts: list[str] = [] + param_parts: Sequence[str] = [] pos_only_ended = False var_pos_added = False diff --git a/src/robofactor/json/__init__.py b/src/robofactor/json/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/robofactor/json/is_json_list.py b/src/robofactor/json/is_json_list.py new file mode 100644 index 0000000..affa90b --- /dev/null +++ b/src/robofactor/json/is_json_list.py @@ -0,0 +1,8 @@ +from collections.abc import Sequence +from typing import TypeGuard + +from .types import JSON + + +def is_json_list(x: JSON) -> TypeGuard[Sequence[JSON]]: + return isinstance(x, Sequence) diff --git a/src/robofactor/json/types.py b/src/robofactor/json/types.py new file mode 100644 index 0000000..2f21e4f --- /dev/null +++ b/src/robofactor/json/types.py @@ -0,0 +1,6 @@ +from collections.abc import Mapping, Sequence + +type JSONPrimitive = None | bool | int | float | str +type JSONSequence = Sequence["JSON"] +type JSONObject = Mapping[str, "JSON"] +type JSON = JSONPrimitive | JSONSequence | JSONObject diff --git a/src/robofactor/main.py b/src/robofactor/main.py index fd98233..617f704 100644 --- a/src/robofactor/main.py +++ b/src/robofactor/main.py @@ -1,6 +1,7 @@ """ Main entry point for the command-line interface (CLI) of the refactoring tool. """ + from pathlib import Path from typing import Annotated @@ -15,8 +16,10 @@ from . import config, ui, utils from .analysis import extract_python_code -from .dspy_modules import CodeRefactor, RefactoringEvaluator, load_training_data -from .evaluation import TestCase, evaluate_refactored_code +from .dspy_modules import CodeRefactor, RefactoringEvaluator +from .models import TestCase +from .evaluation import evaluate_refactored_code +from .training.training_loader import load_training_data app = typer.Typer() @@ -28,13 +31,17 @@ def _setup_environment(tracing: bool, mlflow_uri: str, mlflow_experiment: str) - if tracing: console.print(f"[bold yellow]MLflow tracing enabled. URI: {mlflow_uri}[/bold yellow]") mlflow.set_tracking_uri(mlflow_uri) - mlflow.set_experiment(mlflow_experiment) - mlflow.dspy.autolog(log_compiles=True, log_traces=True) + _ = mlflow.set_experiment(mlflow_experiment) + _ = mlflow.autolog() return console def _load_or_compile_model( - optimizer_path: Path, optimize: bool, console: Console, prompt_llm: dspy.LM, task_llm: dspy.LM + optimizer_path: Path, + optimize: bool, + console: Console, + prompt_llm: dspy.LM, + task_llm: dspy.LM, ) -> dspy.Module: """Loads an optimized DSPy model or compiles a new one.""" refactorer = CodeRefactor() @@ -57,7 +64,7 @@ def _load_or_compile_model( num_threads=8, ) teleprompter.compile( - refactorer, trainset=load_training_data(), requires_permission_to_run=False + refactorer, trainset=list(load_training_data()), requires_permission_to_run=False ) console.print(f"Optimization complete. Saving to {optimizer_path}...") self_correcting_refactorer.save(str(optimizer_path), save_program=True) @@ -96,14 +103,14 @@ def _run_refactoring_on_file( evaluation = evaluate_refactored_code(refactored_code, tests) - match evaluation: + match evaluation: # type: ignore[reportMatchNotExhaustive] case Success(eval_data): ui.display_evaluation_results(console, eval_data) if write: console.print( f"[yellow]Writing refactored code back to {script_path.name}...[/yellow]" ) - script_path.write_text(refactored_code, encoding="utf-8") + _ = script_path.write_text(refactored_code, encoding="utf-8") console.print(f"[green]Refactoring of {script_path.name} complete.[/green]") case Failure(error_message): console.print( @@ -141,7 +148,9 @@ def main( False, "--optimize", help="Force re-optimization of the DSPy model." ), task_llm_model: str = typer.Option( - config.DEFAULT_TASK_LLM, "--task-llm", help="Model for the main refactoring task." + config.DEFAULT_TASK_LLM, + "--task-llm", + help="Model for the main refactoring task.", ), prompt_llm_model: str = typer.Option( config.DEFAULT_PROMPT_LLM, @@ -150,10 +159,14 @@ def main( ), tracing: bool = typer.Option(True, "--tracing/--no-tracing", help="Enable MLflow tracing."), mlflow_uri: str = typer.Option( - config.DEFAULT_MLFLOW_TRACKING_URI, "--mlflow-uri", help="MLflow tracking server URI." + config.DEFAULT_MLFLOW_TRACKING_URI, + "--mlflow-uri", + help="MLflow tracking server URI.", ), mlflow_experiment: str = typer.Option( - config.DEFAULT_MLFLOW_EXPERIMENT_NAME, "--mlflow-experiment", help="MLflow experiment name." + config.DEFAULT_MLFLOW_EXPERIMENT_NAME, + "--mlflow-experiment", + help="MLflow experiment name.", ), ): """A DSPy-powered tool to analyze, plan, and refactor Python code.""" diff --git a/src/robofactor/models.py b/src/robofactor/models.py new file mode 100644 index 0000000..5805a6f --- /dev/null +++ b/src/robofactor/models.py @@ -0,0 +1,19 @@ +from .json.types import JSON +from collections.abc import Sequence, Mapping +from pydantic import BaseModel, Field + + +class TestCase(BaseModel): + """Represents a single test case with positional and keyword args and expected output.""" + args: JSON = Field() + kwargs: JSON = Field() + expected_output: JSON + + +class CodeQualityScores(BaseModel): + """Holds various code quality metrics.""" + linting_score: float + complexity_score: float + typing_score: float + docstring_score: float + linting_issues: Sequence[str] = Field(default_factory=list) diff --git a/src/robofactor/training/__init__.py b/src/robofactor/training/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/robofactor/training_data.json b/src/robofactor/training/training_data.json similarity index 100% rename from src/robofactor/training_data.json rename to src/robofactor/training/training_data.json diff --git a/src/robofactor/training/training_loader.py b/src/robofactor/training/training_loader.py new file mode 100644 index 0000000..b053b5d --- /dev/null +++ b/src/robofactor/training/training_loader.py @@ -0,0 +1,64 @@ +import json +from collections.abc import Sequence +from logging import getLogger +from pathlib import Path +from typing import TypeGuard, cast + +import dspy + +from ..config import TRAINING_DATA_FILE +from ..models import TestCase +from ..json.is_json_list import is_json_list +from ..json.types import JSON, JSONObject + +FAILURE_SCORE = 0.0 +logger = getLogger(__name__) + + +def is_training_item(x: JSON) -> TypeGuard[JSONObject]: + return ( + isinstance(x, dict) + and "code_snippet" in x + and isinstance(x["code_snippet"], str) + and ( + "test_cases" not in x + or ( + isinstance(x["test_cases"], Sequence) + and all(isinstance(tc, dict) for tc in x["test_cases"]) + ) + ) + ) + + +def load_training_data() -> Sequence[dspy.Example]: + data_path = Path(__file__).parent / TRAINING_DATA_FILE + try: + # CAST the untyped json.loads → JSON + raw = cast(JSON, json.loads(data_path.read_text(encoding="utf-8"))) + except FileNotFoundError: + logger.error(f"Training data file not found: {data_path}") + return [] + except json.JSONDecodeError as e: + logger.error(f"Invalid JSON in training data: {e}") + return [] + + # NARROW to actual Sequence[JSON] + if not is_json_list(raw): + logger.error(f"Expected top-level array, got {type(raw).__name__}") + return [] + + items: Sequence[dspy.Example] = [] + for idx, entry in enumerate(raw): + if not is_training_item(entry): + logger.error(f"Invalid training entry at index {idx}: {entry!r}") + continue + code = entry["code_snippet"] + raw_tcs = entry.get("test_cases", []) + tcs = cast(Sequence[JSONObject], raw_tcs) + items.append( + dspy.Example( + code_snippet=code, + test_cases=[TestCase(**tc) for tc in tcs], + ).with_inputs("code_snippet") + ) + return items diff --git a/src/robofactor/ui.py b/src/robofactor/ui.py index a28986b..3326bf1 100644 --- a/src/robofactor/ui.py +++ b/src/robofactor/ui.py @@ -1,3 +1,4 @@ + """ Presentation logic for displaying results in the console. @@ -22,11 +23,11 @@ def display_refactoring_process(console: Console, prediction: dspy.Prediction) - console.print(Panel(prediction.analysis, title="[bold cyan]Analysis[/bold cyan]", expand=False)) plan_text = Text() - plan_text.append("Summary: ", style="bold") - plan_text.append(prediction.refactoring_summary) - plan_text.append("\n\n") + _ = plan_text.append("Summary: ", style="bold") + _ = plan_text.append(prediction.refactoring_summary) + _ = plan_text.append("\n\n") for i, step in enumerate(prediction.plan_steps, 1): - plan_text.append(f"{i}. {step}\n") + _ = plan_text.append(f"{i}. {step}\n") console.print(Panel(plan_text, title="[bold cyan]Refactoring Plan[/bold cyan]")) console.print( diff --git a/src/robofactor/utils.py b/src/robofactor/utils.py index a2d548e..2543157 100644 --- a/src/robofactor/utils.py +++ b/src/robofactor/utils.py @@ -1,5 +1,6 @@ import warnings + def suppress_pydantic_warnings(): # TODO: Remove this warning suppression once DSPy fixes Pydantic serialization compatibility. # @@ -31,4 +32,4 @@ def suppress_pydantic_warnings(): # # Last checked: 19 June 2025 # DSPy version when added: 2.6.27 - warnings.filterwarnings('ignore', category=UserWarning, module='pydantic.main') + warnings.filterwarnings("ignore", category=UserWarning, module="pydantic.main") From 3a09bad23ebcb396e4f427609a7d0988fd8cfa1e Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 23 Jun 2025 21:18:07 -0500 Subject: [PATCH 06/26] docs: add comprehensive coding standards and style guide Establishes project-wide development guidelines covering type system usage, CLI patterns, error handling, function design, data structures, testing, architecture, performance, async patterns, documentation, security, logging, database practices, API design, and refactoring standards. The .windsurfrules file provides detailed best practices with GOOD/BAD examples for consistent code quality across the robofactor project. --- .windsurfrules | 202 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 .windsurfrules diff --git a/.windsurfrules b/.windsurfrules new file mode 100644 index 0000000..83ca2f8 --- /dev/null +++ b/.windsurfrules @@ -0,0 +1,202 @@ +# [!SYSTEM] INSTRUCTIONS + +## Type System and Imports + +- Always use `Sequence` over list; GOOD: always use `Sequence` by importing using `from collections.abc import Sequence` and BAD: never import using `from typing import Sequence` +- Always use `Mapping` over `dict`; GOOD: always use `Mapping` by importing using `from collections.abc import Mapping` and BAD: never import using `from typing import Mapping` +- Always use `Result` over `Optional` or `None`; GOOD: always use `Result` by importing using `from returns.result import Result` and BAD: never import using `from typing import Optional` or `None` +- Prefer `Callable` over `typing.Callable`; GOOD: always use `Callable` by importing using `from collections.abc import Callable` and BAD: never import using `from typing import Callable` +- Use `Iterable` for read-only iteration; GOOD: always use `Iterable` by importing using `from collections.abc import Iterable` and BAD: never import using `from typing import Iterable` +- Avoid `Any`; use explicit types or `TypeVar`; GOOD: always use specific types and BAD: never import using `from typing import Any` +- Use `TypeVar` for generic type parameters; GOOD: `T = TypeVar('T')` and BAD: never use `Any` as a generic placeholder +- Always use `Final` for constants; GOOD: `from typing import Final` then `MAX_RETRIES: Final[int] = 3` and BAD: never use mutable constants +- Use `Literal` for specific string/int values; GOOD: `from typing import Literal` then `Mode = Literal['read', 'write']` and BAD: never use plain `str` for enums +- Always prefer `TypeAlias` for complex types; GOOD: `from typing import TypeAlias` then `UserID: TypeAlias = int` and BAD: never repeat complex type annotations +- Use `Protocol` for structural subtyping; GOOD: `from typing import Protocol` for interfaces and BAD: never use ABC classes just for typing +- Always use `TypedDict` for structured dicts; GOOD: `from typing import TypedDict` and BAD: never use `dict[str, Any]` +- Prefer `frozenset` over `set` for immutable collections; GOOD: `frozenset({1, 2, 3})` and BAD: avoid mutable sets in function returns +- Use `tuple` for fixed-length sequences; GOOD: `tuple[int, str, bool]` and BAD: never use `list` for fixed-size data +- Always specify variance in TypeVars; GOOD: `TypeVar('T', covariant=True)` when appropriate and BAD: never leave variance implicit +- Use `NewType` for semantic distinctions; GOOD: `UserId = NewType('UserId', int)` and BAD: never conflate different semantic types + +## CLI and Command Line Tools + +- For CLIs, always use Typer; GOOD: always use `typer` by importing using `import typer` and BAD: never import using `import click` +- When using Annotated, never put the default value in Option(); only use it as the parameter default +- Always use Annotated for CLI parameters; GOOD: `name: Annotated[str, typer.Option("--name")]` and BAD: never use direct assignment +- Group related commands using Typer sub-apps; GOOD: `app.add_typer(users_app, name="users")` and BAD: never have flat command structures +- Always provide help text for commands and options; GOOD: `typer.Option(help="User name")` and BAD: never leave options undocumented +- Use rich for enhanced CLI output; GOOD: `from rich.console import Console` and BAD: never use plain print for complex output +- Implement proper exit codes; GOOD: `raise typer.Exit(code=1)` and BAD: never use `sys.exit()` in CLI commands +- Always validate CLI inputs early; GOOD: validate in the command function and BAD: never defer validation to business logic +- Use typer.echo for output; GOOD: `typer.echo("message")` and BAD: never use print() in CLI commands +- Provide shell completion; GOOD: implement custom completion functions and BAD: never ignore shell integration + +## Error Handling and Result Types + +- Always use Result for fallible operations; GOOD: `Result[Success, Error]` and BAD: never throw exceptions for expected errors +- Chain Results with map/bind; GOOD: `result.map(transform).bind(validate)` and BAD: never unwrap Results prematurely +- Use specific error types; GOOD: `Result[User, UserNotFoundError]` and BAD: never use generic `Exception` +- Always handle both Success and Failure cases; GOOD: pattern match or use `result.fold()` and BAD: never ignore error cases +- Create error hierarchies with dataclasses; GOOD: `@dataclass class ValidationError` and BAD: never use string errors +- Use `returns.maybe.Maybe` for nullable values; GOOD: `Maybe[User]` and BAD: never use raw `None` +- Compose error-prone operations; GOOD: `returns.pipeline.pipe` and BAD: never nest try-except blocks +- Log errors at boundaries only; GOOD: log at service edges and BAD: never log deep in business logic +- Always provide error context; GOOD: include relevant ids/values in errors and BAD: never raise context-free errors +- Use `returns.io.IO` for side effects; GOOD: `IO[str]` for file reads and BAD: never perform I/O in pure functions + +## Function Design and Composition + +- Keep functions under 20 lines; GOOD: extract helper functions and BAD: never write long procedural functions +- Single responsibility per function; GOOD: one clear purpose and BAD: never mix concerns +- Use descriptive names; GOOD: `calculate_tax_rate()` and BAD: never use `calc()` or `process()` +- Parameters should be immutable; GOOD: accept `Sequence` and BAD: never mutate input parameters +- Return new values; GOOD: `return dataclasses.replace(obj, field=new_value)` and BAD: never modify and return same object +- Limit function parameters to 4; GOOD: use parameter objects and BAD: never have functions with 7+ parameters +- Always type annotate; GOOD: full annotations for params and returns and BAD: never rely on type inference +- Use keyword-only arguments; GOOD: `def fn(*, name: str)` and BAD: never rely on positional args for clarity +- Compose small functions; GOOD: `pipe(data, parse, validate, transform)` and BAD: never write monolithic functions +- Cache pure computations; GOOD: `@functools.cache` for expensive pure functions and BAD: never recompute identical results + +## Data Structures and Immutability + +- Always use frozen dataclasses; GOOD: `@dataclass(frozen=True)` and BAD: never use mutable dataclasses +- Prefer immutable collections; GOOD: `tuple`, `frozenset`, `MappingProxyType` and BAD: avoid `list`, `set`, `dict` in APIs +- Use `copy.deepcopy` sparingly; GOOD: design for immutability and BAD: never rely on deep copying for safety +- Return new instances; GOOD: `dataclasses.replace()` and BAD: never mutate and return +- Use `__slots__` for performance; GOOD: `__slots__ = ('x', 'y')` in classes and BAD: never ignore memory efficiency +- Implement `__eq__` and `__hash__` properly; GOOD: use `@dataclass` or implement both and BAD: never implement just one +- Use enums for fixed choices; GOOD: `class Status(Enum)` and BAD: never use string constants +- Design algebraic data types; GOOD: sum types with Union and BAD: never use inheritance for variants +- Validate at construction; GOOD: `__post_init__` validation and BAD: never allow invalid states +- Use builders for complex objects; GOOD: builder pattern with validation and BAD: never use complex constructors + +## Testing and Validation + +- Write tests first; GOOD: TDD approach and BAD: never write tests after implementation +- One assertion per test; GOOD: focused test cases and BAD: never test multiple behaviors +- Use descriptive test names; GOOD: `test_calculate_tax_returns_zero_for_negative_income` and BAD: never use `test_1` +- Test edge cases; GOOD: empty, null, boundary values and BAD: never test only happy path +- Use property-based testing; GOOD: `hypothesis` for invariants and BAD: never rely only on examples +- Mock at boundaries; GOOD: mock external services and BAD: never mock internal functions +- Use fixtures properly; GOOD: `@pytest.fixture` for reusable setup and BAD: never duplicate test setup +- Test error conditions; GOOD: verify error types and messages and BAD: never ignore failure cases +- Keep tests independent; GOOD: each test runs in isolation and BAD: never depend on test order +- Use test doubles correctly; GOOD: stubs for queries, mocks for commands and BAD: never overuse mocks + +## Code Organization and Architecture + +- One module per concept; GOOD: `user.py`, `payment.py` and BAD: never have `utils.py` or `helpers.py` +- Clear module boundaries; GOOD: explicit public APIs and BAD: never expose internals +- Use `__init__.py` wisely; GOOD: re-export public API and BAD: never leave empty +- Separate concerns; GOOD: business logic, I/O, presentation and BAD: never mix layers +- Dependency injection; GOOD: pass dependencies explicitly and BAD: never use global state +- Use protocols for dependencies; GOOD: depend on protocols and BAD: never depend on concrete types +- Group by feature; GOOD: `features/user/`, `features/payment/` and BAD: never group by type +- Keep imports at top; GOOD: standard, third-party, local order and BAD: never import inside functions +- Use relative imports carefully; GOOD: absolute for public API and BAD: never use relative in public modules +- Define clear interfaces; GOOD: protocol or ABC at module boundary and BAD: never leak implementation + +## Performance and Optimization + +- Measure before optimizing; GOOD: use profiling tools and BAD: never optimize prematurely +- Use generators for large datasets; GOOD: `yield` for streaming and BAD: never load everything in memory +- Cache expensive computations; GOOD: `@lru_cache` or `@cache` and BAD: never recompute unchanged results +- Use appropriate data structures; GOOD: `set` for membership, `deque` for queues and BAD: never use wrong structure +- Batch I/O operations; GOOD: bulk reads/writes and BAD: never do I/O in loops +- Use `__slots__` for many instances; GOOD: memory efficiency and BAD: never ignore memory usage +- Prefer `bisect` for sorted data; GOOD: O(log n) operations and BAD: never linear search sorted data +- Use `itertools` effectively; GOOD: `chain`, `groupby`, etc. and BAD: never reinvent iterations +- Compile regex once; GOOD: `PATTERN = re.compile()` and BAD: never compile in loops +- Use numpy for numerical work; GOOD: vectorized operations and BAD: never loop over arrays + +## Async and Concurrency + +- Use async for I/O bound tasks; GOOD: `async def` for I/O and BAD: never block event loop +- Prefer asyncio over threading; GOOD: async/await pattern and BAD: never use threads for I/O +- Use `asyncio.gather` for parallel tasks; GOOD: concurrent execution and BAD: never await sequentially +- Handle async errors properly; GOOD: try/except in async and BAD: never ignore async exceptions +- Use async context managers; GOOD: `async with` for resources and BAD: never leak async resources +- Limit concurrent operations; GOOD: use semaphores and BAD: never overwhelm resources +- Use `asyncio.create_task` wisely; GOOD: for fire-and-forget and BAD: never lose task references +- Test async code properly; GOOD: `pytest-asyncio` and BAD: never test async as sync +- Avoid blocking calls; GOOD: use async libraries and BAD: never call blocking functions in async +- Use queues for task distribution; GOOD: `asyncio.Queue` and BAD: never share mutable state + +## Documentation and Comments + +- Write docstrings for public APIs; GOOD: Google/NumPy style and BAD: never leave public functions undocumented +- Document why, not what; GOOD: explain decisions and BAD: never state the obvious +- Keep docs in sync; GOOD: update with code and BAD: never have outdated docs +- Use type hints as documentation; GOOD: self-documenting types and BAD: never rely only on docstrings +- Document exceptions; GOOD: list raised exceptions and BAD: never hide error conditions +- Provide examples; GOOD: doctest examples and BAD: never have abstract docs only +- Link to references; GOOD: cite algorithms/papers and BAD: never leave knowledge implicit +- Document assumptions; GOOD: state preconditions and BAD: never assume context +- Use meaningful variable names; GOOD: self-documenting code and BAD: never use cryptic names +- Keep README updated; GOOD: current setup/usage and BAD: never have stale instructions + +## Security and Safety + +- Validate all inputs; GOOD: whitelist validation and BAD: never trust user input +- Use parameterized queries; GOOD: prepared statements and BAD: never concatenate SQL +- Hash passwords properly; GOOD: bcrypt/argon2 and BAD: never store plain text +- Use secrets management; GOOD: environment variables and BAD: never hardcode secrets +- Implement rate limiting; GOOD: protect endpoints and BAD: never allow unlimited requests +- Log security events; GOOD: audit trail and BAD: never ignore suspicious activity +- Use HTTPS everywhere; GOOD: TLS for all connections and BAD: never send data unencrypted +- Validate file uploads; GOOD: check type/size and BAD: never trust file extensions +- Implement CSRF protection; GOOD: tokens for state changes and BAD: never rely on cookies alone +- Keep dependencies updated; GOOD: regular updates and BAD: never ignore security advisories + +## Logging and Monitoring + +- Use structured logging; GOOD: JSON logs with context and BAD: never use print statements +- Log at appropriate levels; GOOD: ERROR for errors, INFO for events and BAD: never log everything as DEBUG +- Include correlation IDs; GOOD: trace requests and BAD: never log without context +- Log at boundaries; GOOD: entry/exit points and BAD: never log in business logic +- Avoid logging sensitive data; GOOD: mask PII and BAD: never log passwords/tokens +- Use log aggregation; GOOD: centralized logging and BAD: never rely on local logs +- Set up alerts; GOOD: monitor error rates and BAD: never ignore errors +- Log performance metrics; GOOD: timing, counts and BAD: never fly blind +- Implement health checks; GOOD: `/health` endpoint and BAD: never assume service health +- Use distributed tracing; GOOD: trace across services and BAD: never lose request context + +## Database and Persistence + +- Use migrations; GOOD: version control schema and BAD: never modify schema manually +- Write idempotent migrations; GOOD: safe to rerun and BAD: never assume migration state +- Use transactions properly; GOOD: ACID guarantees and BAD: never leave data inconsistent +- Implement retry logic; GOOD: handle transient failures and BAD: never fail on first error +- Use connection pooling; GOOD: reuse connections and BAD: never create per-request +- Index foreign keys; GOOD: performance optimization and BAD: never ignore query patterns +- Use EXPLAIN for queries; GOOD: understand performance and BAD: never deploy untested queries +- Implement soft deletes; GOOD: audit trail and BAD: never lose data permanently +- Use read replicas; GOOD: scale read operations and BAD: never overload primary +- Backup regularly; GOOD: automated backups and BAD: never rely on manual process + +## API Design + +- Use consistent naming; GOOD: REST conventions and BAD: never mix styles +- Version your APIs; GOOD: `/v1/`, `/v2/` and BAD: never break clients +- Use proper HTTP methods; GOOD: GET for reads, POST for creates and BAD: never use GET for mutations +- Return appropriate status codes; GOOD: 201 for created, 404 for not found and BAD: never return 200 for errors +- Implement pagination; GOOD: limit/offset or cursor and BAD: never return unbounded lists +- Use content negotiation; GOOD: Accept headers and BAD: never hardcode formats +- Implement rate limiting; GOOD: protect resources and BAD: never allow abuse +- Document with OpenAPI; GOOD: machine-readable specs and BAD: never rely on prose +- Use HATEOAS principles; GOOD: discoverable APIs and BAD: never require hardcoded URLs +- Implement idempotency; GOOD: safe retries and BAD: never have side effects on retry + +## Refactoring and Maintenance + +- Refactor in small steps; GOOD: incremental changes and BAD: never big bang refactors +- Keep tests green; GOOD: refactor with confidence and BAD: never break tests +- Use feature flags; GOOD: gradual rollout and BAD: never deploy all at once +- Document refactoring decisions; GOOD: ADRs and BAD: never lose context +- Remove dead code; GOOD: clean as you go and BAD: never leave commented code +- Update dependencies gradually; GOOD: one at a time and BAD: never update all at once +- Use deprecation warnings; GOOD: give users time and BAD: never break without warning +- Measure impact; GOOD: performance metrics and BAD: never assume improvement +- Refactor tests too; GOOD: maintain test quality and BAD: never let tests rot +- Keep refactoring atomic; GOOD: one concept per commit and BAD: never mix refactors with features From 13df166e05da609afb7a87af582538a4f2244114 Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 23 Jun 2025 21:19:29 -0500 Subject: [PATCH 07/26] feat(scripts): add git diff visualization tool and code utilities Introduces create_diffs.py for displaying staged/unstaged git diffs in Markdown format with flexible output options. Also adds utility script for automated list-to-Sequence refactoring and applies consistent code style formatting to the README generator. The diff tool supports multiple modes (staged/unstaged/both), output formats (stat/patch), and customizable context lines with word-level diff capabilities. --- scripts/create_diffs.py | 162 ++++++++++++++++++++++++++ scripts/generate_readme.py | 94 +++++++-------- scripts/replace_list_with_sequence.py | 64 ++++++++++ 3 files changed, 270 insertions(+), 50 deletions(-) create mode 100644 scripts/create_diffs.py create mode 100644 scripts/replace_list_with_sequence.py diff --git a/scripts/create_diffs.py b/scripts/create_diffs.py new file mode 100644 index 0000000..a8a8608 --- /dev/null +++ b/scripts/create_diffs.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +"""Show staged/unstaged git diffs in smart Markdown format.""" + +import subprocess +from collections.abc import Sequence +from dataclasses import dataclass +from enum import Enum +from typing import Annotated + +import typer +from returns.result import Result, Success, Failure + +app = typer.Typer(help="Show staged/unstaged git diffs in Markdown") + +DiffOptions = Sequence[str] +DiffText = str + + +class DiffMode(str, Enum): + staged = "staged" + unstaged = "unstaged" + both = "both" + + +class DiffType(str, Enum): + stat = "stat" + patch = "patch" + + +@dataclass(frozen=True) +class GitError: + command: tuple[str, ...] + return_code: int + stderr: str + + +def build_diff_opts(is_stat: bool, context: int, word_diff: bool) -> DiffOptions: + """Return git-diff options for stat-only or patch with context.""" + if is_stat: + return ("--stat",) + base: tuple[str, ...] = ("--minimal", f"-U{context}", "--color=never") + return base + (("--word-diff",) if word_diff else ()) + + +def run_git_diff(opts: DiffOptions) -> Result[DiffText, GitError]: + """ + Runs `git` with the given options. + - return code >1 ⇒ Failure(GitError) + - return code ==1 ⇒ Success("") (no changes) + - return code ==0 ⇒ Success(stdout) + """ + command = tuple(["git", *opts]) + proc = subprocess.run( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + rc = proc.returncode + if rc > 1: + return Failure(GitError(command=command, return_code=rc, stderr=proc.stderr)) + if rc == 1: + return Success("") # no changes + return Success(proc.stdout) + + +def format_section(title: str, diff_text: str) -> str: + """Render a Markdown section for a diff under the given title.""" + header = f"### {title}\n\n" + if not diff_text.strip(): + return header + "_No changes_\n\n" + fence = "```diff" if diff_text.startswith(("diff --", "@@")) else "```text" + body = f"{fence}\n{diff_text.rstrip()}\n```\n\n" + return header + body + + +@app.command() +def main( + which: Annotated[ + DiffMode, + typer.Option( + "-m", + "--mode", + help="Which diffs to show: 'staged', 'unstaged', or 'both' (default: both)", + ), + ] = DiffMode.both, + stat: Annotated[ + bool, + typer.Option("--stat", help="Show only stat for all sections"), + ] = False, + word_diff: Annotated[ + bool, + typer.Option("--word-diff", help="Enable word-level patch diff"), + ] = False, + context: Annotated[ + int, + typer.Option("-c", "--context", help="Context lines (ignored if --stat)"), + ] = 3, + staged_type: Annotated[ + DiffType | None, + typer.Option("--staged-type", help="Override mode for staged changes"), + ] = None, + unstaged_type: Annotated[ + DiffType | None, + typer.Option("--unstaged-type", help="Override mode for unstaged changes"), + ] = None, +) -> None: + """ + Show staged/unstaged git diffs formatted as Markdown. + Errors are printed to stderr and exit with the git return code. + """ + + def section_is_stat(section_mode: DiffMode) -> bool: + if stat: + return True + override = staged_type if section_mode is DiffMode.staged else unstaged_type + return override is not None and override is DiffType.stat + + staged_opts: DiffOptions = ("diff", "--cached") + tuple( + build_diff_opts(section_is_stat(DiffMode.staged), context, word_diff) + ) + unstaged_opts: DiffOptions = ("diff",) + tuple( + build_diff_opts(section_is_stat(DiffMode.unstaged), context, word_diff) + ) + + # Staged + staged_diff = "" + if which is not DiffMode.unstaged: + res = run_git_diff(staged_opts) + if isinstance(res, Failure): + err = res.failure() + typer.secho( + f"Error running git {' '.join(err.command)}:\n{err.stderr}", + err=True, + fg="red", + ) + raise typer.Exit(err.return_code) + staged_diff = res.unwrap() + + # Unstaged + unstaged_diff = "" + if which is not DiffMode.staged: + res = run_git_diff(unstaged_opts) + if isinstance(res, Failure): + err = res.failure() + typer.secho( + f"Error running git {' '.join(err.command)}:\n{err.stderr}", + err=True, + fg="red", + ) + raise typer.Exit(err.return_code) + unstaged_diff = res.unwrap() + + # Print results + if which in (DiffMode.both, DiffMode.staged) and staged_diff: + print(format_section("Staged", staged_diff)) + if which in (DiffMode.both, DiffMode.unstaged) and unstaged_diff: + print(format_section("Unstaged", unstaged_diff)) + + +if __name__ == "__main__": + app() diff --git a/scripts/generate_readme.py b/scripts/generate_readme.py index 0e9e6c8..e4131b9 100644 --- a/scripts/generate_readme.py +++ b/scripts/generate_readme.py @@ -5,6 +5,7 @@ This module uses DSPy with Pydantic integration to analyze the project structure and generate a comprehensive README based on extracted information rather than assumptions. """ + from __future__ import annotations import logging @@ -35,13 +36,17 @@ sys.exit(1) # Configure logging -logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) logger = logging.getLogger(__name__) # --- Data Models (Pydantic) --- + class FunctionMetadata(BaseModel): """Metadata about a function extracted from source code.""" + name: str file_path: str docstring: str | None @@ -52,6 +57,7 @@ class FunctionMetadata(BaseModel): class SourceFileAnalysis(BaseModel): """Analysis of a single source file.""" + relative_path: str functions: list[FunctionMetadata] imports: list[str] = Field(default_factory=list) @@ -60,6 +66,7 @@ class SourceFileAnalysis(BaseModel): class ProjectMetadata(BaseModel): """Basic project metadata from pyproject.toml.""" + name: str description: str version: str | None = None @@ -72,23 +79,23 @@ class ProjectMetadata(BaseModel): class DevelopmentEnvironment(BaseModel): """Extracted development environment information.""" + package_manager: str = Field(description="The package manager used (e.g., uv, pip, poetry)") install_command: str = Field(description="Command to install the package") dev_install_command: str = Field(description="Command to install with dev dependencies") available_commands: dict[str, str] = Field( - default_factory=dict, - description="Available make/task commands and their descriptions" + default_factory=dict, description="Available make/task commands and their descriptions" ) python_version: str | None = None class ProjectFeatures(BaseModel): """High-level features extracted from the project.""" + core_technologies: list[str] = Field(description="Main technologies/libraries used") cli_capabilities: list[str] = Field(description="CLI commands and options available") key_modules: dict[str, str] = Field( - description="Key modules and their purposes", - default_factory=dict + description="Key modules and their purposes", default_factory=dict ) testing_framework: str | None = None code_quality_tools: list[str] = Field(default_factory=list) @@ -96,6 +103,7 @@ class ProjectFeatures(BaseModel): class ExtractedContext(BaseModel): """Complete extracted context for README generation.""" + metadata: ProjectMetadata environment: DevelopmentEnvironment features: ProjectFeatures @@ -105,6 +113,7 @@ class ExtractedContext(BaseModel): class ReadmeSection(BaseModel): """A section in the README outline.""" + title: str description: str priority: int = Field(default=5, ge=1, le=10) @@ -112,25 +121,30 @@ class ReadmeSection(BaseModel): class GeneratedSection(BaseModel): """A generated README section with content.""" + title: str content: str # --- Service Interfaces (Dependency Injection) --- + class FileReaderProtocol(Protocol): """Protocol for file reading operations.""" + def read_file(self, path: Path) -> str: ... def file_exists(self, path: Path) -> bool: ... class CLIRunnerProtocol(Protocol): """Protocol for running CLI commands.""" + def get_help_text(self) -> str: ... # --- Concrete Service Implementations --- + class FileReader: """Handles file system operations.""" @@ -157,6 +171,7 @@ def get_help_text(self) -> str: """Get the CLI help text.""" try: from typer.testing import CliRunner + runner = CliRunner() result = runner.invoke(cli_app, ["--help"], catch_exceptions=False) @@ -173,6 +188,7 @@ def get_help_text(self) -> str: # --- Project Analyzer --- + class ProjectAnalyzer: """Analyzes project structure and extracts information.""" @@ -181,7 +197,7 @@ def __init__( root: Path, file_reader: FileReaderProtocol, cli_runner: CLIRunnerProtocol, - console: Console | None = None + console: Console | None = None, ): self.root = root self.file_reader = file_reader @@ -205,14 +221,13 @@ def analyze_source_file(self, path: Path) -> SourceFileAnalysis: docstring=f.docstring, is_async=f.is_async, decorators=[d.name for d in f.decorators], - parameters=[p.name for p in f.parameters] + parameters=[p.name for p in f.parameters], ) for f in functions ] return SourceFileAnalysis( - relative_path=str(path.relative_to(self.root)), - functions=func_metadata + relative_path=str(path.relative_to(self.root)), functions=func_metadata ) except Exception as e: logger.error(f"Failed to parse {path}: {e}") @@ -237,7 +252,7 @@ def extract_project_metadata(self) -> ProjectMetadata: dependencies=deps, dev_dependencies=dev_deps, homepage=urls.get("Homepage"), - repository=urls.get("Repository") + repository=urls.get("Repository"), ) def analyze_all_source_files(self) -> list[SourceFileAnalysis]: @@ -268,21 +283,16 @@ def get_cli_help(self) -> str: # --- DSPy Signatures with Pydantic --- + class ExtractPackageManager(dspy.Signature): """Extract the package manager and installation commands from project files.""" - makefile_content: str = dspy.InputField( - desc="Content of the Makefile" - ) - pyproject_content: str = dspy.InputField( - desc="Content of pyproject.toml" - ) + makefile_content: str = dspy.InputField(desc="Content of the Makefile") + pyproject_content: str = dspy.InputField(desc="Content of pyproject.toml") package_manager: str = dspy.OutputField( desc="The package manager used (e.g., 'uv', 'pip', 'poetry')" ) - install_command: str = dspy.OutputField( - desc="The exact command to install the package" - ) + install_command: str = dspy.OutputField(desc="The exact command to install the package") dev_install_command: str = dspy.OutputField( desc="The exact command to install with dev dependencies" ) @@ -291,9 +301,7 @@ class ExtractPackageManager(dspy.Signature): class ExtractDevelopmentCommands(dspy.Signature): """Extract available development commands from Makefile.""" - makefile_content: str = dspy.InputField( - desc="Content of the Makefile" - ) + makefile_content: str = dspy.InputField(desc="Content of the Makefile") commands: dict[str, str] = dspy.OutputField( desc="Dictionary mapping command names to their descriptions" ) @@ -322,9 +330,7 @@ class GenerateSectionContent(dspy.Signature): context: ExtractedContext = dspy.InputField() section: ReadmeSection = dspy.InputField() - content: str = dspy.OutputField( - desc="Markdown content for this section" - ) + content: str = dspy.OutputField(desc="Markdown content for this section") class AssembleReadme(dspy.Signature): @@ -333,13 +339,12 @@ class AssembleReadme(dspy.Signature): project_name: str = dspy.InputField() project_description: str = dspy.InputField() sections: list[GeneratedSection] = dspy.InputField() - readme_content: str = dspy.OutputField( - desc="Complete README.md content with proper formatting" - ) + readme_content: str = dspy.OutputField(desc="Complete README.md content with proper formatting") # --- DSPy Modules --- + class ContextExtractor(dspy.Module): """Extracts specific context from project files.""" @@ -356,20 +361,17 @@ def forward( makefile_content: str, pyproject_content: str, cli_help_text: str, - python_version: str | None = None + python_version: str | None = None, ) -> ExtractedContext: """Extract all context from project files.""" # Extract package manager and install commands pkg_result = self.package_extractor( - makefile_content=makefile_content, - pyproject_content=pyproject_content + makefile_content=makefile_content, pyproject_content=pyproject_content ) # Extract development commands - cmd_result = self.commands_extractor( - makefile_content=makefile_content - ) + cmd_result = self.commands_extractor(makefile_content=makefile_content) # Create development environment environment = DevelopmentEnvironment( @@ -377,14 +379,12 @@ def forward( install_command=pkg_result.install_command, dev_install_command=pkg_result.dev_install_command, available_commands=cmd_result.commands, - python_version=python_version + python_version=python_version, ) # Extract project features features_result = self.features_extractor( - metadata=metadata, - source_analyses=source_analyses, - cli_help_text=cli_help_text + metadata=metadata, source_analyses=source_analyses, cli_help_text=cli_help_text ) return ExtractedContext( @@ -392,7 +392,7 @@ def forward( environment=environment, features=features_result.features, source_analyses=source_analyses, - cli_help_text=cli_help_text + cli_help_text=cli_help_text, ) @@ -415,28 +415,22 @@ def forward(self, context: ExtractedContext) -> dspy.Prediction: # Generate content for each section generated_sections = [] for section in sections: - section_result = self.section_generator( - context=context, - section=section - ) + section_result = self.section_generator(context=context, section=section) generated_sections.append( - GeneratedSection( - title=section.title, - content=section_result.content - ) + GeneratedSection(title=section.title, content=section_result.content) ) # Assemble final README final_result = self.assembler( project_name=context.metadata.name, project_description=context.metadata.description, - sections=generated_sections + sections=generated_sections, ) return dspy.Prediction( outline=sections, generated_sections=generated_sections, - readme_content=final_result.readme_content + readme_content=final_result.readme_content, ) @@ -534,7 +528,7 @@ def generate( makefile_content=makefile_content, pyproject_content=pyproject_content, cli_help_text=cli_help_text, - python_version=python_version + python_version=python_version, ) logger.info(f"Extracted context - Package manager: {context.environment.package_manager}") diff --git a/scripts/replace_list_with_sequence.py b/scripts/replace_list_with_sequence.py new file mode 100644 index 0000000..d20ba4b --- /dev/null +++ b/scripts/replace_list_with_sequence.py @@ -0,0 +1,64 @@ +import os +import re +import shutil +from pathlib import Path +import logging +import sys + +# Configure logging +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + + +def replace_list_with_sequence(file_path: Path) -> None: + """Replace list/List with Sequence and add import if needed""" + try: + # Read the entire file + content = file_path.read_text(encoding="utf-8") + + # Check if Sequence import exists + has_sequence_import = any( + re.search(r"from\s+collections\.abc\s+import\s+Sequence", line) + for line in content.splitlines() + ) + + # Replace list/List with Sequence + new_content = re.sub(r"\blist\b", "Sequence", content) + new_content = re.sub(r"\bList\b", "Sequence", new_content) + + # Add import if needed + if not has_sequence_import: + new_content = f"from collections.abc import Sequence\n{new_content}" + + # Create backup + backup_path = file_path.with_suffix(file_path.suffix + ".bak") + shutil.copy2(file_path, backup_path) + logger.info(f"Created backup: {backup_path}") + + # Write changes + file_path.write_text(new_content, encoding="utf-8") + logger.info(f"Updated: {file_path}") + + except Exception as e: + logger.error(f"Error processing {file_path}: {e}") + + +def process_directory(directory: Path) -> None: + """Process all Python files in directory""" + for root, _, files in os.walk(directory): + for file in files: + if file.endswith(".py"): + file_path = Path(root) / file + replace_list_with_sequence(file_path) + + +if __name__ == "__main__": + target_dir = Path("src/robofactor") + if not target_dir.exists(): + logger.error(f"Target directory not found: {target_dir}") + sys.exit(1) + + process_directory(target_dir) + logger.info(f"Successfully updated files in {target_dir}") From a60ab297250f29a112360178eed4e4081f742b08 Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Tue, 24 Jun 2025 02:56:23 -0500 Subject: [PATCH 08/26] refactor(structure): reorganize codebase into modular package architecture Split monolithic modules into focused packages with clear responsibilities: - Created app package for CLI and UI components - Established evaluation package for code quality and correctness checks - Introduced parsing package for AST operations and code analysis - Added refactoring package for AI-powered transformation logic Migrated from relative to absolute imports throughout the codebase for better maintainability. Updated entry point in pyproject.toml to reflect new app.main module location. --- pyproject.toml | 11 +- src/robofactor/app/__init__.py | 3 + src/robofactor/{ => app}/config.py | 0 src/robofactor/{ => app}/main.py | 14 +- src/robofactor/{ => app}/ui.py | 5 +- src/robofactor/dspy_modules.py | 175 ------------------ src/robofactor/evaluation/__init__.py | 10 + .../{analysis.py => evaluation/checkers.py} | 98 ++++++---- .../{evaluation.py => evaluation/pipeline.py} | 12 +- src/robofactor/models.py | 19 -- src/robofactor/parsing/__init__.py | 7 + src/robofactor/parsing/analysis.py | 22 +++ .../ast_parser.py} | 102 ++-------- src/robofactor/parsing/models.py | 114 ++++++++++++ src/robofactor/refactoring/__init__.py | 7 + src/robofactor/refactoring/evaluator.py | 90 +++++++++ src/robofactor/refactoring/module.py | 48 +++++ src/robofactor/refactoring/signatures.py | 129 +++++++++++++ src/robofactor/training/training_loader.py | 4 +- 19 files changed, 522 insertions(+), 348 deletions(-) create mode 100644 src/robofactor/app/__init__.py rename src/robofactor/{ => app}/config.py (100%) rename src/robofactor/{ => app}/main.py (94%) rename src/robofactor/{ => app}/ui.py (96%) delete mode 100644 src/robofactor/dspy_modules.py create mode 100644 src/robofactor/evaluation/__init__.py rename src/robofactor/{analysis.py => evaluation/checkers.py} (64%) rename src/robofactor/{evaluation.py => evaluation/pipeline.py} (90%) delete mode 100644 src/robofactor/models.py create mode 100644 src/robofactor/parsing/__init__.py create mode 100644 src/robofactor/parsing/analysis.py rename src/robofactor/{function_extraction.py => parsing/ast_parser.py} (88%) create mode 100644 src/robofactor/parsing/models.py create mode 100644 src/robofactor/refactoring/__init__.py create mode 100644 src/robofactor/refactoring/evaluator.py create mode 100644 src/robofactor/refactoring/module.py create mode 100644 src/robofactor/refactoring/signatures.py diff --git a/pyproject.toml b/pyproject.toml index dcbff97..b523315 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ Issues = "https://github.com/ethan-wickstrom/robofactor/issues" Documentation = "https://github.com/ethan-wickstrom/robofactor#readme" [project.scripts] -robofactor = "robofactor.main:app" +robofactor = "robofactor.app.main:app" [build-system] requires = ["hatchling"] @@ -43,12 +43,7 @@ build-backend = "hatchling.build" packages = ["src/robofactor"] [dependency-groups] -dev = [ - "basedpyright>=1.29.4", - "isort>=6.0.1", - "ruff>=0.11.13", - "toml>=0.10.2", -] +dev = ["basedpyright>=1.29.4", "isort>=6.0.1", "ruff>=0.11.13", "toml>=0.10.2"] [tool.ruff] line-length = 100 @@ -75,7 +70,7 @@ exclude = [ "**/node_modules", "**/__pycache__", "src/experimental", - "src/typestubs" + "src/typestubs", ] stubPath = "typings" reportUnknownMemberType = false diff --git a/src/robofactor/app/__init__.py b/src/robofactor/app/__init__.py new file mode 100644 index 0000000..bf79120 --- /dev/null +++ b/src/robofactor/app/__init__.py @@ -0,0 +1,3 @@ +""" +The app package contains the main entry point and user interface components. +""" diff --git a/src/robofactor/config.py b/src/robofactor/app/config.py similarity index 100% rename from src/robofactor/config.py rename to src/robofactor/app/config.py diff --git a/src/robofactor/main.py b/src/robofactor/app/main.py similarity index 94% rename from src/robofactor/main.py rename to src/robofactor/app/main.py index 617f704..79fb562 100644 --- a/src/robofactor/main.py +++ b/src/robofactor/app/main.py @@ -14,12 +14,14 @@ from rich.rule import Rule from rich.syntax import Syntax -from . import config, ui, utils -from .analysis import extract_python_code -from .dspy_modules import CodeRefactor, RefactoringEvaluator -from .models import TestCase -from .evaluation import evaluate_refactored_code -from .training.training_loader import load_training_data +from robofactor import utils +from robofactor.app import config, ui +from robofactor.evaluation import evaluate_refactored_code +from robofactor.parsing.analysis import extract_python_code +from robofactor.parsing.models import TestCase +from robofactor.refactoring.evaluator import RefactoringEvaluator +from robofactor.refactoring.module import CodeRefactor +from robofactor.training.training_loader import load_training_data app = typer.Typer() diff --git a/src/robofactor/ui.py b/src/robofactor/app/ui.py similarity index 96% rename from src/robofactor/ui.py rename to src/robofactor/app/ui.py index 3326bf1..6e3bb05 100644 --- a/src/robofactor/ui.py +++ b/src/robofactor/app/ui.py @@ -14,8 +14,9 @@ from rich.table import Table from rich.text import Text -from . import analysis, config -from .evaluation import EvaluationResult +from ..parsing import analysis +from . import config +from ..evaluation import EvaluationResult def display_refactoring_process(console: Console, prediction: dspy.Prediction) -> None: diff --git a/src/robofactor/dspy_modules.py b/src/robofactor/dspy_modules.py deleted file mode 100644 index 53a0ec1..0000000 --- a/src/robofactor/dspy_modules.py +++ /dev/null @@ -1,175 +0,0 @@ -import logging -from collections.abc import Sequence - -import dspy -from pydantic import BaseModel, Field, field_validator, model_validator -from returns.result import Result, Success - -from . import analysis, evaluation -from .evaluation import EvaluationResult - -# --- Constants --- -FAILURE_SCORE = 0.0 -logger = logging.getLogger(__name__) - - -# --- Pydantic Models --- -class AnalysisOutput(BaseModel): - """Structured analysis of Python code functionality and improvement opportunities.""" - - analysis: str = Field( - description="Concise summary of functionality, complexity, and dependencies" - ) - refactoring_opportunities: Sequence[str] = Field( - description="Actionable bullet points for refactoring" - ) - - -class PlanOutput(BaseModel): - """Step-by-step refactoring execution plan.""" - - refactoring_summary: str = Field(description="High-level refactoring objective") - plan_steps: Sequence[str] = Field(description="Sequential actions to achieve refactoring") - - -class ImplementationOutput(BaseModel): - """Final refactored code with change explanations.""" - - refactored_code: str = Field( - description="PEP8-compliant Python code with type hints and docstrings" - ) - implementation_explanation: str = Field(description="Rationale for implemented changes") - - @field_validator("refactored_code") - @classmethod - def extract_from_markdown(cls, v: str) -> str: - return analysis.extract_python_code(v) - - -class EvaluationOutput(BaseModel): - """Holistic assessment of refactoring quality.""" - - final_score: float = Field(description="Weighted quality score (0.0-1.0)", ge=0.0, le=1.0) - final_suggestion: str = Field(description="Improvement recommendations or approval") - - @model_validator(mode="after") - def validate_score_precision(self) -> "EvaluationOutput": - if isinstance(self.final_score, float): - self.final_score = round(self.final_score, 2) - return self - - -# --- DSPy Signatures --- -class CodeAnalysis(dspy.Signature): - """Analyze Python code for functionality and improvement areas.""" - - code_snippet: str = dspy.InputField(desc="Python code to analyze") - analysis: AnalysisOutput = dspy.OutputField() - - -class RefactoringPlan(dspy.Signature): - """Create refactoring plan based on code analysis.""" - - code_snippet: str = dspy.InputField(desc="Original Python code") - analysis: str = dspy.InputField(desc="Code analysis summary") - plan: PlanOutput = dspy.OutputField() - - -class RefactoredCode(dspy.Signature): - """Generate refactored code from execution plan.""" - - original_code: str = dspy.InputField(desc="Unmodified source code") - refactoring_summary: str = dspy.InputField(desc="Refactoring objective") - plan_steps: Sequence[str] = dspy.InputField(desc="Step-by-step refactoring actions") - implementation: ImplementationOutput = dspy.OutputField() - - -class FinalEvaluation(dspy.Signature): - """Assess refactored code quality with quantitative metrics.""" - - code_snippet: str = dspy.InputField(desc="Refactored Python code") - quality_scores: str = dspy.InputField(desc="JSON quality metrics") - functional_score: float = dspy.InputField(desc="Test pass rate (0.0-1.0)") - evaluation: EvaluationOutput = dspy.OutputField() - - -# --- DSPy Modules --- -class CodeRefactor(dspy.Module): - """Orchestrates code analysis, planning, and refactoring.""" - - def __init__(self): - super().__init__() - self.analyzer: dspy.Module = dspy.Predict(CodeAnalysis) - self.planner: dspy.Module = dspy.Predict(RefactoringPlan) - self.implementer: dspy.Module = dspy.Predict(RefactoredCode) - - def forward(self, code_snippet: str) -> dspy.Prediction: - analysis_result = self.analyzer(code_snippet=code_snippet) - plan_result = self.planner( - code_snippet=code_snippet, analysis=analysis_result.analysis.analysis - ) - impl_result = self.implementer( - original_code=code_snippet, - refactoring_summary=plan_result.plan.refactoring_summary, - plan_steps=plan_result.plan.plan_steps, - ) - - return dspy.Prediction( - analysis=analysis_result.analysis.analysis, - refactoring_opportunities=analysis_result.analysis.refactoring_opportunities, - refactoring_summary=plan_result.plan.refactoring_summary, - plan_steps=plan_result.plan.plan_steps, - refactored_code=impl_result.implementation.refactored_code, - implementation_explanation=impl_result.implementation.implementation_explanation, - ) - - -class RefactoringEvaluator(dspy.Module): - """Evaluates refactored code through automated checks and LLM assessment.""" - - def __init__(self): - super().__init__() - self.evaluator: dspy.Module = dspy.Predict(FinalEvaluation) - - def _handle_evaluation_success( - self, eval_data: EvaluationResult, refactored_code: str - ) -> float: - """Process successful programmatic evaluation.""" - functional_score = ( - eval_data.functional_check.passed_tests / eval_data.functional_check.total_tests - if eval_data.functional_check.total_tests > 0 - else 1.0 - ) - - try: - llm_evaluation = self.evaluator( - code_snippet=refactored_code, - quality_scores=eval_data.quality_scores.model_dump_json(), - functional_score=functional_score, - ) - return llm_evaluation.evaluation.final_score - except Exception as e: - logger.error(f"LLM evaluation failed: {e}", exc_info=True) - return FAILURE_SCORE - - def forward(self, original_example: dspy.Example, prediction: dspy.Prediction) -> float: - refactored_code = getattr(prediction, "refactored_code", "") - if not refactored_code: - logger.warning("Evaluation aborted: Missing refactored code") - return FAILURE_SCORE - - code_to_evaluate = analysis.extract_python_code(refactored_code) - if not code_to_evaluate: - logger.warning("Evaluation aborted: Empty code extraction") - return FAILURE_SCORE - - test_cases = getattr(original_example, "test_cases", []) - eval_result: Result[EvaluationResult, str] = evaluation.evaluate_refactored_code( - code_to_evaluate, test_cases - ) - - if isinstance(eval_result, Success): - return self._handle_evaluation_success(eval_result.unwrap(), code_to_evaluate) - else: - logger.warning(f"Programmatic evaluation failed: {eval_result.failure()}") - return FAILURE_SCORE diff --git a/src/robofactor/evaluation/__init__.py b/src/robofactor/evaluation/__init__.py new file mode 100644 index 0000000..7abcb98 --- /dev/null +++ b/src/robofactor/evaluation/__init__.py @@ -0,0 +1,10 @@ +""" +The evaluation package is responsible for assessing the quality and correctness +of refactored code. It includes a multi-stage pipeline that performs syntax, +quality, and functional checks to ensure that AI-generated code is safe, +reliable, and adheres to best practices. +""" +from . import checkers, pipeline +from .pipeline import evaluate_refactored_code, EvaluationResult + +__all__ = ["evaluate_refactored_code", "EvaluationResult", "checkers", "pipeline"] diff --git a/src/robofactor/analysis.py b/src/robofactor/evaluation/checkers.py similarity index 64% rename from src/robofactor/analysis.py rename to src/robofactor/evaluation/checkers.py index cbace56..ff6bd71 100644 --- a/src/robofactor/analysis.py +++ b/src/robofactor/evaluation/checkers.py @@ -1,34 +1,38 @@ +""" +Provides functions for checking the quality and correctness of Python code. + +This module contains checkers for syntax validity, code quality (linting, +complexity, docstrings, typing), and functional correctness against test cases. +These functions are designed to be composed into an evaluation pipeline. +""" + import ast import json import os -import re import subprocess import tempfile import textwrap from collections.abc import Sequence from pathlib import Path -from .models import TestCase import dspy -from .models import CodeQualityScores - -from . import config -from .json.types import JSON - - -def extract_python_code(text: str) -> str: - """Extracts Python code from a markdown block, returns original text if no block is found.""" - match = re.search(r"```python\n(.*?)\n```", text, re.DOTALL) - return match.group(1).strip() if match else text +from ..app import config +from ..parsing.models import CodeQualityScores, TestCase def check_syntax(code: str) -> tuple[bool, str | None, str | None]: """ Checks for valid Python syntax and a top-level function definition. - Returns a tuple indicating validity, the function name, and an error message. - This format is consumed by a wrapper that converts it into a `Result` monad. + Args: + code: The Python source code to check. + + Returns: + A tuple containing: + - A boolean indicating if the syntax is valid. + - The name of the top-level function if found, otherwise None. + - An error message if the syntax is invalid, otherwise None. """ try: tree = ast.parse(code) @@ -46,11 +50,15 @@ def _get_ast_based_scores(tree: ast.AST, func_name: str | None) -> tuple[float, if not all_funcs: return 0.0, 0.0 - target_funcs = [f for f in all_funcs if f.name == func_name] if func_name else all_funcs + target_funcs = ( + [f for f in all_funcs if f.name == func_name] if func_name else all_funcs + ) if not target_funcs: return 0.0, 0.0 - docstring_score = sum(1.0 for f in target_funcs if ast.get_docstring(f)) / len(target_funcs) + docstring_score = sum(1.0 for f in target_funcs if ast.get_docstring(f)) / len( + target_funcs + ) typed_elements, typeable_elements = 0, 0 for func_node in target_funcs: @@ -69,14 +77,26 @@ def check_code_quality(code: str, func_name: str | None = None) -> CodeQualitySc This function performs I/O by creating a temporary file and running a subprocess. It is designed to be wrapped by a decorator like `@safe` - or `@impure_safe` to handle potential exceptions. + to handle potential exceptions. + + Args: + code: The Python code to analyze. + func_name: The specific function name to target for AST-based checks. + + Returns: + A CodeQualityScores object with the analysis results. + + Raises: + subprocess.CalledProcessError: If the flake8 command fails. + SyntaxError: If the code cannot be parsed into an AST. """ - with tempfile.NamedTemporaryFile("w", suffix=".py", delete=False, encoding="utf-8") as tmp: + with tempfile.NamedTemporaryFile( + "w", suffix=".py", delete=False, encoding="utf-8" + ) as tmp: _ = tmp.write(code) tmp_path = Path(tmp.name) try: - # Exceptions from subprocess.run will be caught by the @safe wrapper in the caller. result = subprocess.run( [ "flake8", @@ -85,7 +105,7 @@ def check_code_quality(code: str, func_name: str | None = None) -> CodeQualitySc ], capture_output=True, text=True, - check=False, # We manually check output, not exit code. + check=False, ) all_issues = result.stdout.strip().splitlines() if result.stdout else [] @@ -97,9 +117,10 @@ def check_code_quality(code: str, func_name: str | None = None) -> CodeQualitySc ] complexity_score = 1.0 if not complexity_warnings else 0.0 - linting_score = max(0.0, 1.0 - (config.LINTING_PENALTY_PER_ISSUE * len(linting_issues))) + linting_score = max( + 0.0, 1.0 - (config.LINTING_PENALTY_PER_ISSUE * len(linting_issues)) + ) - # A SyntaxError here will be caught by the @safe wrapper in the caller. tree = ast.parse(code) docstring_score, typing_score = _get_ast_based_scores(tree, func_name) @@ -111,7 +132,6 @@ def check_code_quality(code: str, func_name: str | None = None) -> CodeQualitySc linting_issues=linting_issues, ) finally: - # Ensure the temporary file is always cleaned up. if tmp_path.exists(): os.unlink(tmp_path) @@ -126,9 +146,6 @@ def _build_execution_script(func_name: str, test_case: TestCase) -> str: import json import sys - # This script assumes the function '{func_name}' has been defined in the - # execution context by the dspy.PythonInterpreter. - args = json.loads('''{args_json}''') kwargs = json.loads('''{kwargs_json}''') @@ -138,34 +155,39 @@ def _build_execution_script(func_name: str, test_case: TestCase) -> str: ) -def check_functional_correctness(code: str, func_name: str, test_cases: Sequence[TestCase]) -> int: +def check_functional_correctness( + code: str, func_name: str, test_cases: Sequence[TestCase] +) -> int: """ Executes test cases against code in a sandboxed Python interpreter. - This function can raise exceptions if the provided code is invalid or if - the test execution fails unexpectedly. It is designed to be wrapped by a + Args: + code: The Python source code containing the function. + func_name: The name of the function to test. + test_cases: A sequence of TestCase objects to run. + + Returns: + The number of test cases that passed. - decorator like `@safe` to handle these failures gracefully. + Raises: + Exception: If the PythonInterpreter fails during setup or execution. """ if not test_cases: return 0 passed_count = 0 - # A failure in the interpreter setup will be caught by the @safe wrapper. with dspy.PythonInterpreter() as interp: - interp.execute(code) # Define the function in the interpreter's scope. + interp.execute(code) for test in test_cases: - # Handle failures for individual test cases gracefully to allow others to run. try: exec_script = _build_execution_script(func_name, test) actual_output_json = interp.execute(exec_script) - actual_output: JSON = json.loads(actual_output_json) - - # Normalize expected output to ensure consistent comparison. - normalized_expected_output: JSON = json.loads(json.dumps(test.expected_output)) + actual_output = json.loads(actual_output_json) + normalized_expected_output = json.loads( + json.dumps(test.expected_output) + ) if actual_output == normalized_expected_output: passed_count += 1 except Exception: - # If a single test case fails to execute or assert, continue to the next. continue return passed_count diff --git a/src/robofactor/evaluation.py b/src/robofactor/evaluation/pipeline.py similarity index 90% rename from src/robofactor/evaluation.py rename to src/robofactor/evaluation/pipeline.py index f09580a..b535e8b 100644 --- a/src/robofactor/evaluation.py +++ b/src/robofactor/evaluation/pipeline.py @@ -3,12 +3,10 @@ from collections.abc import Sequence from typing import NamedTuple -from pydantic import BaseModel, Field from returns.result import Failure, Result, Success, safe -from . import analysis -from .json.types import JSON -from .models import TestCase, CodeQualityScores +from . import checkers +from ..parsing.models import CodeQualityScores, TestCase @@ -39,7 +37,7 @@ def _check_syntax(code: str) -> Result[str, str]: output into a `Result` monad, which is more suitable for functional pipelines. """ - is_valid, func_name, err = analysis.check_syntax(code) + is_valid, func_name, err = checkers.check_syntax(code) if not is_valid or not func_name: return Failure(f"Syntax Check Failed: {err or 'No function found.'}") return Success(func_name) @@ -53,7 +51,7 @@ def _check_quality(code: str, func_name: str) -> CodeQualityScores: The `@safe` decorator automatically wraps this function's execution in a `Result` container, capturing any exceptions as a `Failure`. """ - return analysis.check_code_quality(code, func_name) + return checkers.check_code_quality(code, func_name) @safe @@ -68,7 +66,7 @@ def _check_functional_correctness( if not tests: return FunctionalCheckResult(passed_tests=0, total_tests=0) - passed_tests = analysis.check_functional_correctness(code, func_name, tests) + passed_tests = checkers.check_functional_correctness(code, func_name, tests) return FunctionalCheckResult(passed_tests=passed_tests, total_tests=len(tests)) diff --git a/src/robofactor/models.py b/src/robofactor/models.py deleted file mode 100644 index 5805a6f..0000000 --- a/src/robofactor/models.py +++ /dev/null @@ -1,19 +0,0 @@ -from .json.types import JSON -from collections.abc import Sequence, Mapping -from pydantic import BaseModel, Field - - -class TestCase(BaseModel): - """Represents a single test case with positional and keyword args and expected output.""" - args: JSON = Field() - kwargs: JSON = Field() - expected_output: JSON - - -class CodeQualityScores(BaseModel): - """Holds various code quality metrics.""" - linting_score: float - complexity_score: float - typing_score: float - docstring_score: float - linting_issues: Sequence[str] = Field(default_factory=list) diff --git a/src/robofactor/parsing/__init__.py b/src/robofactor/parsing/__init__.py new file mode 100644 index 0000000..0b5dd23 --- /dev/null +++ b/src/robofactor/parsing/__init__.py @@ -0,0 +1,7 @@ +""" +The parsing package is responsible for analyzing and extracting information +from Python source code. It uses Abstract Syntax Trees (AST) to deconstruct +code into a structured format, making it easier for other parts of the +application to understand and manipulate. +""" +from . import analysis, ast_parser, models diff --git a/src/robofactor/parsing/analysis.py b/src/robofactor/parsing/analysis.py new file mode 100644 index 0000000..8ffe58e --- /dev/null +++ b/src/robofactor/parsing/analysis.py @@ -0,0 +1,22 @@ +""" +Provides utility functions for parsing and extracting code from text. +""" + +import re + + +def extract_python_code(text: str) -> str: + """ + Extracts Python code from a markdown block. + + If a python markdown block (```python...```) is found, its content is + returned. Otherwise, the original text is returned. + + Args: + text: The string to search for a Python code block. + + Returns: + The extracted Python code, or the original text if no block is found. + """ + match = re.search(r"```python\n(.*?)\n```", text, re.DOTALL) + return match.group(1).strip() if match else text diff --git a/src/robofactor/function_extraction.py b/src/robofactor/parsing/ast_parser.py similarity index 88% rename from src/robofactor/function_extraction.py rename to src/robofactor/parsing/ast_parser.py index 5617703..840d713 100644 --- a/src/robofactor/function_extraction.py +++ b/src/robofactor/parsing/ast_parser.py @@ -1,5 +1,4 @@ import ast -import enum from collections.abc import Iterator, Sequence from dataclasses import dataclass from pathlib import Path @@ -7,97 +6,18 @@ from returns.io import impure_safe from returns.result import safe -# Type alias for function definition AST nodes to improve readability. -FunctionDefNode = ast.FunctionDef | ast.AsyncFunctionDef - - -class ParameterKind(enum.Enum): - """Enumeration for the different kinds of function parameters.""" - - POSITIONAL_ONLY = "positional_only" - POSITIONAL_OR_KEYWORD = "positional_or_keyword" - VAR_POSITIONAL = "var_positional" - KEYWORD_ONLY = "keyword_only" - VAR_KEYWORD = "var_keyword" - - -@dataclass(frozen=True) -class Parameter: - """ - Represents a function parameter with its name, kind, and optional details. - - Attributes: - name: The name of the parameter. - kind: The kind of the parameter (e.g., positional-only). - annotation: The type annotation as a string, if present. - default: The default value as a string, if present. - """ - - name: str - kind: ParameterKind - annotation: str | None = None - default: str | None = None - - -@dataclass(frozen=True) -class Decorator: - """ - Represents a function decorator. - - Attributes: - name: The name of the decorator. - args: A tuple of arguments passed to the decorator, as strings. - """ - - name: str - args: tuple[str, ...] = () - - -@dataclass(frozen=True) -class FunctionContext: - """Represents the context where a function is defined (base class).""" +from .models import ( + ClassContext, + Decorator, + FunctionContext, + ModuleContext, + NestedContext, + Parameter, + ParameterKind, +) - pass - - -@dataclass(frozen=True) -class ModuleContext(FunctionContext): - """ - Represents a function defined at the module level. - - Attributes: - module_name: The name of the module. - """ - - module_name: str - - -@dataclass(frozen=True) -class ClassContext(FunctionContext): - """ - Represents a function defined within a class. - - Attributes: - class_name: The name of the class. - parent_context: The context in which the class is defined. - """ - - class_name: str - parent_context: FunctionContext - - -@dataclass(frozen=True) -class NestedContext(FunctionContext): - """ - Represents a function defined within another function. - - Attributes: - parent_function: The name of the enclosing function. - parent_context: The context of the enclosing function. - """ - - parent_function: str - parent_context: FunctionContext +# Type alias for function definition AST nodes to improve readability. +type FunctionDefNode = ast.FunctionDef | ast.AsyncFunctionDef @dataclass(frozen=True) diff --git a/src/robofactor/parsing/models.py b/src/robofactor/parsing/models.py new file mode 100644 index 0000000..20eade0 --- /dev/null +++ b/src/robofactor/parsing/models.py @@ -0,0 +1,114 @@ +import enum +from collections.abc import Sequence +from dataclasses import dataclass + +from pydantic import BaseModel, Field + +from ..json.types import JSON + + +class TestCase(BaseModel): + """Represents a single test case with positional and keyword args and expected output.""" + + args: JSON = Field() + kwargs: JSON = Field() + expected_output: JSON + + +class CodeQualityScores(BaseModel): + """Holds various code quality metrics.""" + + linting_score: float + complexity_score: float + typing_score: float + docstring_score: float + linting_issues: Sequence[str] = Field(default_factory=list) + + +class ParameterKind(enum.Enum): + """Enumeration for the different kinds of function parameters.""" + + POSITIONAL_ONLY = "positional_only" + POSITIONAL_OR_KEYWORD = "positional_or_keyword" + VAR_POSITIONAL = "var_positional" + KEYWORD_ONLY = "keyword_only" + VAR_KEYWORD = "var_keyword" + + +@dataclass(frozen=True) +class Parameter: + """ + Represents a function parameter with its name, kind, and optional details. + + Attributes: + name: The name of the parameter. + kind: The kind of the parameter (e.g., positional-only). + annotation: The type annotation as a string, if present. + default: The default value as a string, if present. + """ + + name: str + kind: ParameterKind + annotation: str | None = None + default: str | None = None + + +@dataclass(frozen=True) +class Decorator: + """ + Represents a function decorator. + + Attributes: + name: The name of the decorator. + args: A tuple of arguments passed to the decorator, as strings. + """ + + name: str + args: tuple[str, ...] = () + + +@dataclass(frozen=True) +class FunctionContext: + """Represents the context where a function is defined (base class).""" + + pass + + +@dataclass(frozen=True) +class ModuleContext(FunctionContext): + """ + Represents a function defined at the module level. + + Attributes: + module_name: The name of the module. + """ + + module_name: str + + +@dataclass(frozen=True) +class ClassContext(FunctionContext): + """ + Represents a function defined within a class. + + Attributes: + class_name: The name of the class. + parent_context: The context in which the class is defined. + """ + + class_name: str + parent_context: FunctionContext + + +@dataclass(frozen=True) +class NestedContext(FunctionContext): + """ + Represents a function defined within another function. + + Attributes: + parent_function: The name of the enclosing function. + parent_context: The context of the enclosing function. + """ + + parent_function: str + parent_context: FunctionContext diff --git a/src/robofactor/refactoring/__init__.py b/src/robofactor/refactoring/__init__.py new file mode 100644 index 0000000..2ca476c --- /dev/null +++ b/src/robofactor/refactoring/__init__.py @@ -0,0 +1,7 @@ +""" +The refactoring package contains the core AI-powered logic for code +transformation. It defines the DSPy modules, signatures, and evaluators +that work together to analyze, plan, and execute code improvements. +""" + +from . import evaluator, module, signatures diff --git a/src/robofactor/refactoring/evaluator.py b/src/robofactor/refactoring/evaluator.py new file mode 100644 index 0000000..ff26b57 --- /dev/null +++ b/src/robofactor/refactoring/evaluator.py @@ -0,0 +1,90 @@ +""" +Defines the DSPy module for evaluating refactored code. +""" + +import logging + +import dspy +from returns.result import Result, Success + +from ..evaluation import EvaluationResult, evaluate_refactored_code +from ..parsing import analysis +from .signatures import FinalEvaluation + +# --- Constants --- +FAILURE_SCORE = 0.0 +logger = logging.getLogger(__name__) + + +class RefactoringEvaluator(dspy.Module): + """Evaluates refactored code through automated checks and LLM assessment.""" + + def __init__(self) -> None: + """Initializes the evaluator module.""" + super().__init__() + self.evaluator: dspy.Module = dspy.Predict(FinalEvaluation) + + def _handle_evaluation_success( + self, eval_data: EvaluationResult, refactored_code: str + ) -> float: + """ + Process a successful programmatic evaluation by sending results to an LLM. + + Args: + eval_data: The structured results from the programmatic checks. + refactored_code: The code that was evaluated. + + Returns: + The final score from the LLM assessment, or a failure score. + """ + functional_score = ( + eval_data.functional_check.passed_tests / eval_data.functional_check.total_tests + if eval_data.functional_check.total_tests > 0 + else 1.0 + ) + + try: + llm_evaluation = self.evaluator( + code_snippet=refactored_code, + quality_scores=eval_data.quality_scores.model_dump_json(), + functional_score=functional_score, + ) + return llm_evaluation.evaluation.final_score + except Exception as e: + logger.error(f"LLM evaluation failed: {e}", exc_info=True) + return FAILURE_SCORE + + def forward(self, original_example: dspy.Example, prediction: dspy.Prediction) -> float: + """ + Executes the full evaluation pipeline for a refactoring prediction. + + This function serves as the metric for the DSPy teleprompter. It first + runs programmatic checks and then uses an LLM for a final assessment. + + Args: + original_example: The original data point, containing test cases. + prediction: The output from the CodeRefactor module. + + Returns: + A final score between 0.0 and 1.0. + """ + refactored_code = getattr(prediction, "refactored_code", "") + if not refactored_code: + logger.warning("Evaluation aborted: Missing refactored code") + return FAILURE_SCORE + + code_to_evaluate = analysis.extract_python_code(refactored_code) + if not code_to_evaluate: + logger.warning("Evaluation aborted: Empty code extraction") + return FAILURE_SCORE + + test_cases = getattr(original_example, "test_cases", []) + eval_result: Result[EvaluationResult, str] = evaluate_refactored_code( + code_to_evaluate, test_cases + ) + + if isinstance(eval_result, Success): + return self._handle_evaluation_success(eval_result.unwrap(), code_to_evaluate) + + logger.warning(f"Programmatic evaluation failed: {eval_result.failure()}") + return FAILURE_SCORE diff --git a/src/robofactor/refactoring/module.py b/src/robofactor/refactoring/module.py new file mode 100644 index 0000000..fce5aa0 --- /dev/null +++ b/src/robofactor/refactoring/module.py @@ -0,0 +1,48 @@ +""" +Defines the core DSPy module for the code refactoring pipeline. +""" + +import dspy + +from .signatures import CodeAnalysis, RefactoringPlan, RefactoredCode + + +class CodeRefactor(dspy.Module): + """Orchestrates code analysis, planning, and refactoring.""" + + def __init__(self) -> None: + """Initializes the multi-stage refactoring module.""" + super().__init__() + self.analyzer: dspy.Module = dspy.Predict(CodeAnalysis) + self.planner: dspy.Module = dspy.Predict(RefactoringPlan) + self.implementer: dspy.Module = dspy.Predict(RefactoredCode) + + def forward(self, code_snippet: str) -> dspy.Prediction: + """ + Executes the analysis, planning, and implementation pipeline. + + Args: + code_snippet: The Python code to be refactored. + + Returns: + A dspy.Prediction object containing the full trace of the + refactoring process, from analysis to final code. + """ + analysis_result = self.analyzer(code_snippet=code_snippet) + plan_result = self.planner( + code_snippet=code_snippet, analysis=analysis_result.analysis.analysis + ) + impl_result = self.implementer( + original_code=code_snippet, + refactoring_summary=plan_result.plan.refactoring_summary, + plan_steps=plan_result.plan.plan_steps, + ) + + return dspy.Prediction( + analysis=analysis_result.analysis.analysis, + refactoring_opportunities=analysis_result.analysis.refactoring_opportunities, + refactoring_summary=plan_result.plan.refactoring_summary, + plan_steps=plan_result.plan.plan_steps, + refactored_code=impl_result.implementation.refactored_code, + implementation_explanation=impl_result.implementation.implementation_explanation, + ) diff --git a/src/robofactor/refactoring/signatures.py b/src/robofactor/refactoring/signatures.py new file mode 100644 index 0000000..1dc1d04 --- /dev/null +++ b/src/robofactor/refactoring/signatures.py @@ -0,0 +1,129 @@ +""" +Defines the Pydantic data models and DSPy Signatures for the refactoring process. + +This module specifies the structured inputs and outputs for each step of the +AI-powered refactoring pipeline, including analysis, planning, implementation, +and evaluation. +""" + +from collections.abc import Sequence + +import dspy +from pydantic import BaseModel, Field, field_validator, model_validator + +from ..parsing.analysis import extract_python_code + + +# --- Pydantic Models --- +class AnalysisOutput(BaseModel): + """Structured analysis of Python code functionality and improvement opportunities.""" + + analysis: str = Field( + description="Concise summary of functionality, complexity, and dependencies" + ) + refactoring_opportunities: Sequence[str] = Field( + description="Actionable bullet points for refactoring" + ) + + +class PlanOutput(BaseModel): + """Step-by-step refactoring execution plan.""" + + refactoring_summary: str = Field(description="High-level refactoring objective") + plan_steps: Sequence[str] = Field(description="Sequential actions to achieve refactoring") + + +class ImplementationOutput(BaseModel): + """Final refactored code with change explanations.""" + + refactored_code: str = Field( + description="PEP8-compliant Python code with type hints and docstrings" + ) + implementation_explanation: str = Field(description="Rationale for implemented changes") + + @field_validator("refactored_code") + @classmethod + def extract_from_markdown(cls, v: str) -> str: + """Extracts Python code from a markdown code block.""" + return extract_python_code(v) + + +class EvaluationOutput(BaseModel): + """Holistic assessment of refactoring quality.""" + + final_score: float = Field(description="Weighted quality score (0.0-1.0)", ge=0.0, le=1.0) + final_suggestion: str = Field(description="Improvement recommendations or approval") + + @model_validator(mode="after") + def validate_score_precision(self) -> "EvaluationOutput": + """Rounds the final score to two decimal places.""" + if isinstance(self.final_score, float): + self.final_score = round(self.final_score, 2) + return self + + +# --- DSPy Signatures --- +class CodeAnalysis(dspy.Signature): + """ + Analyze Python code for functionality and improvement areas. + + **Instruction**: You are an expert code analyst. Your task is to thoroughly + examine the provided Python code snippet. Identify its core functionality, + dependencies, and complexity. Then, suggest concrete, actionable refactoring + opportunities. The analysis should be concise, and the opportunities should be + clear and directly implementable. + """ + + code_snippet: str = dspy.InputField(desc="Python code to analyze") + analysis: AnalysisOutput = dspy.OutputField() + + +class RefactoringPlan(dspy.Signature): + """ + Create a refactoring plan based on code analysis. + + **Instruction**: You are a senior software architect. Based on the provided code + and its analysis, create a high-level refactoring plan. Define a clear + objective for the refactoring and then break it down into a sequence of + specific, logical steps. The plan should be easy to follow and lead to a + measurably better version of the code. + """ + + code_snippet: str = dspy.InputField(desc="Original Python code") + analysis: str = dspy.InputField(desc="Code analysis summary") + plan: PlanOutput = dspy.OutputField() + + +class RefactoredCode(dspy.Signature): + """ + Generate refactored code from an execution plan. + + **Instruction**: You are a world-class Python programmer. Your task is to + implement the refactoring plan for the given code. The final code must be + 100% PEP8 compliant, include comprehensive docstrings (Google-style), + and have full type hints. Provide a clear explanation of the changes you made + and why. The refactored code must be enclosed in a single Python markdown block. + """ + + original_code: str = dspy.InputField(desc="Unmodified source code") + refactoring_summary: str = dspy.InputField(desc="Refactoring objective") + plan_steps: Sequence[str] = dspy.InputField(desc="Step-by-step refactoring actions") + implementation: ImplementationOutput = dspy.OutputField() + + +class FinalEvaluation(dspy.Signature): + """ + Assess refactored code quality with quantitative metrics. + + **Instruction**: You are a quality assurance automation bot. Evaluate the + refactored code based on the provided quality and functional scores. + Your assessment must result in a final score between 0.0 and 1.0, + where 1.0 is a perfect refactoring. Provide a concluding suggestion, + either approving the code or recommending specific further improvements. + The final score should be a weighted average of the inputs. + """ + + code_snippet: str = dspy.InputField(desc="Refactored Python code") + quality_scores: str = dspy.InputField(desc="JSON quality metrics") + functional_score: float = dspy.InputField(desc="Test pass rate (0.0-1.0)") + evaluation: EvaluationOutput = dspy.OutputField() diff --git a/src/robofactor/training/training_loader.py b/src/robofactor/training/training_loader.py index b053b5d..2f199ed 100644 --- a/src/robofactor/training/training_loader.py +++ b/src/robofactor/training/training_loader.py @@ -6,8 +6,8 @@ import dspy -from ..config import TRAINING_DATA_FILE -from ..models import TestCase +from ..app.config import TRAINING_DATA_FILE +from ..parsing.models import TestCase from ..json.is_json_list import is_json_list from ..json.types import JSON, JSONObject From c6116c4b24ea4c3f9d4c930bffe6a3d278eb082f Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Tue, 24 Jun 2025 02:56:44 -0500 Subject: [PATCH 09/26] chore: update type hints to use type[] syntax --- typings/dspy/adapters/json_adapter.pyi | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/typings/dspy/adapters/json_adapter.pyi b/typings/dspy/adapters/json_adapter.pyi index e29ef42..4e559b2 100644 --- a/typings/dspy/adapters/json_adapter.pyi +++ b/typings/dspy/adapters/json_adapter.pyi @@ -2,7 +2,7 @@ This type stub file was generated by pyright. """ -from typing import Any, Dict, Type +from typing import Any, Dict from dspy.adapters.chat_adapter import ChatAdapter, FieldInfoWithName from dspy.clients.lm import LM from dspy.signatures.signature import Signature @@ -14,16 +14,19 @@ class JSONAdapter(ChatAdapter): self, lm: LM, lm_kwargs: dict[str, Any], - signature: Type[Signature], + signature: type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any], ) -> list[dict[str, Any]]: ... - def format_field_structure(self, signature: Type[Signature]) -> str: ... - def user_message_output_requirements(self, signature: Type[Signature]) -> str: ... + def format_field_structure(self, signature: type[Signature]) -> str: ... + def user_message_output_requirements(self, signature: type[Signature]) -> str: ... def format_assistant_message_content( - self, signature: Type[Signature], outputs: dict[str, Any], missing_field_message=... + self, + signature: type[Signature], + outputs: dict[str, Any], + missing_field_message=..., ) -> str: ... - def parse(self, signature: Type[Signature], completion: str) -> dict[str, Any]: ... + def parse(self, signature: type[Signature], completion: str) -> dict[str, Any]: ... def format_field_with_value( self, fields_with_values: Dict[FieldInfoWithName, Any], role: str = ... ) -> str: @@ -41,7 +44,7 @@ class JSONAdapter(ChatAdapter): def format_finetune_data( self, - signature: Type[Signature], + signature: type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any], outputs: dict[str, Any], From dcdfbaeaaa9602bfcc8e6ea8782b0741d694f78d Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Tue, 24 Jun 2025 04:10:13 -0500 Subject: [PATCH 10/26] refactor: replace list/List with Sequence in type annotations --- scripts/generate_readme.py | 47 ++- scripts/replace_list_with_sequence.py | 417 ++++++++++++++++++--- src/robofactor/app/main.py | 30 +- src/robofactor/parsing/models.py | 2 +- src/robofactor/training/training_loader.py | 4 +- 5 files changed, 428 insertions(+), 72 deletions(-) diff --git a/scripts/generate_readme.py b/scripts/generate_readme.py index e4131b9..ca27592 100644 --- a/scripts/generate_readme.py +++ b/scripts/generate_readme.py @@ -24,8 +24,8 @@ try: project_root = Path(__file__).parent.parent.resolve() sys.path.insert(0, str(project_root / "src")) - from robofactor.function_extraction import parse_python_source - from robofactor.main import app as cli_app + from robofactor.parsing.ast_parser import parse_python_source + from robofactor.app.main import app as cli_app from robofactor.utils import suppress_pydantic_warnings except ImportError as e: print( @@ -80,11 +80,16 @@ class ProjectMetadata(BaseModel): class DevelopmentEnvironment(BaseModel): """Extracted development environment information.""" - package_manager: str = Field(description="The package manager used (e.g., uv, pip, poetry)") + package_manager: str = Field( + description="The package manager used (e.g., uv, pip, poetry)" + ) install_command: str = Field(description="Command to install the package") - dev_install_command: str = Field(description="Command to install with dev dependencies") + dev_install_command: str = Field( + description="Command to install with dev dependencies" + ) available_commands: dict[str, str] = Field( - default_factory=dict, description="Available make/task commands and their descriptions" + default_factory=dict, + description="Available make/task commands and their descriptions", ) python_version: str | None = None @@ -93,7 +98,9 @@ class ProjectFeatures(BaseModel): """High-level features extracted from the project.""" core_technologies: list[str] = Field(description="Main technologies/libraries used") - cli_capabilities: list[str] = Field(description="CLI commands and options available") + cli_capabilities: list[str] = Field( + description="CLI commands and options available" + ) key_modules: dict[str, str] = Field( description="Key modules and their purposes", default_factory=dict ) @@ -292,7 +299,9 @@ class ExtractPackageManager(dspy.Signature): package_manager: str = dspy.OutputField( desc="The package manager used (e.g., 'uv', 'pip', 'poetry')" ) - install_command: str = dspy.OutputField(desc="The exact command to install the package") + install_command: str = dspy.OutputField( + desc="The exact command to install the package" + ) dev_install_command: str = dspy.OutputField( desc="The exact command to install with dev dependencies" ) @@ -339,7 +348,9 @@ class AssembleReadme(dspy.Signature): project_name: str = dspy.InputField() project_description: str = dspy.InputField() sections: list[GeneratedSection] = dspy.InputField() - readme_content: str = dspy.OutputField(desc="Complete README.md content with proper formatting") + readme_content: str = dspy.OutputField( + desc="Complete README.md content with proper formatting" + ) # --- DSPy Modules --- @@ -384,7 +395,9 @@ def forward( # Extract project features features_result = self.features_extractor( - metadata=metadata, source_analyses=source_analyses, cli_help_text=cli_help_text + metadata=metadata, + source_analyses=source_analyses, + cli_help_text=cli_help_text, ) return ExtractedContext( @@ -531,14 +544,20 @@ def generate( python_version=python_version, ) - logger.info(f"Extracted context - Package manager: {context.environment.package_manager}") - logger.info(f"Available commands: {list(context.environment.available_commands.keys())}") + logger.info( + f"Extracted context - Package manager: {context.environment.package_manager}" + ) + logger.info( + f"Available commands: {list(context.environment.available_commands.keys())}" + ) # Generate README console.print("[bold green]Generating README content...[/bold green]") readme_generator = ReadmeGenerator() - with console.status("[bold green]Synthesizing README with DSPy...[/]", spinner="dots"): + with console.status( + "[bold green]Synthesizing README with DSPy...[/]", spinner="dots" + ): result = readme_generator(context=context) console.print("[green]✓ Generation complete.[/green]") @@ -547,7 +566,9 @@ def generate( console.print(f"[dim]Writing output to [bold]{output}[/bold]...[/dim]") output.write_text(result.readme_content, encoding="utf-8") - console.print(f"\n[bold green]✅ README successfully generated at: {output}[/bold green]") + console.print( + f"\n[bold green]✅ README successfully generated at: {output}[/bold green]" + ) except Exception as e: logger.error(f"Generation failed: {e}", exc_info=True) diff --git a/scripts/replace_list_with_sequence.py b/scripts/replace_list_with_sequence.py index d20ba4b..b62a025 100644 --- a/scripts/replace_list_with_sequence.py +++ b/scripts/replace_list_with_sequence.py @@ -1,64 +1,387 @@ -import os +""" +Functional code transformation script for replacing list/List with Sequence. + +Implements functional programming principles with clear service boundaries, +error handling as values, and immutable data transformations. +""" + +from __future__ import annotations + import re import shutil +from collections.abc import Sequence as SeqType +from dataclasses import dataclass, replace from pathlib import Path -import logging -import sys +from typing import Protocol -# Configure logging -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" -) -logger = logging.getLogger(__name__) +from returns.result import Result, Success, Failure, safe -def replace_list_with_sequence(file_path: Path) -> None: - """Replace list/List with Sequence and add import if needed""" - try: - # Read the entire file - content = file_path.read_text(encoding="utf-8") +# ============================================================================ +# Domain Models +# ============================================================================ - # Check if Sequence import exists - has_sequence_import = any( - re.search(r"from\s+collections\.abc\s+import\s+Sequence", line) - for line in content.splitlines() - ) +@dataclass(frozen=True) +class FileContent: + """Immutable representation of file content.""" + path: Path + content: str + encoding: str = "utf-8" - # Replace list/List with Sequence - new_content = re.sub(r"\blist\b", "Sequence", content) - new_content = re.sub(r"\bList\b", "Sequence", new_content) - # Add import if needed - if not has_sequence_import: - new_content = f"from collections.abc import Sequence\n{new_content}" +@dataclass(frozen=True) +class TransformationRule: + """Immutable rule for text transformation.""" + pattern: re.Pattern[str] + replacement: str + description: str - # Create backup - backup_path = file_path.with_suffix(file_path.suffix + ".bak") - shutil.copy2(file_path, backup_path) - logger.info(f"Created backup: {backup_path}") - # Write changes - file_path.write_text(new_content, encoding="utf-8") - logger.info(f"Updated: {file_path}") +@dataclass(frozen=True) +class ImportStatement: + """Immutable representation of an import statement.""" + module: str + imports: tuple[str, ...] + line: str - except Exception as e: - logger.error(f"Error processing {file_path}: {e}") +@dataclass(frozen=True) +class TransformationResult: + """Result of applying transformations to file content.""" + original: FileContent + transformed: FileContent + rules_applied: tuple[TransformationRule, ...] + import_added: ImportStatement | None = None -def process_directory(directory: Path) -> None: - """Process all Python files in directory""" - for root, _, files in os.walk(directory): - for file in files: - if file.endswith(".py"): - file_path = Path(root) / file - replace_list_with_sequence(file_path) +# ============================================================================ +# Service Interfaces +# ============================================================================ -if __name__ == "__main__": - target_dir = Path("src/robofactor") - if not target_dir.exists(): - logger.error(f"Target directory not found: {target_dir}") - sys.exit(1) +class FileOperations(Protocol): + """Interface for file I/O operations.""" + + def read_file(self, path: Path) -> Result[FileContent, Exception]: + """Read file content safely.""" + ... + + def write_file(self, content: FileContent) -> Result[Path, Exception]: + """Write content to file safely.""" + ... + + def create_backup(self, path: Path) -> Result[Path, Exception]: + """Create backup of file.""" + ... + + +class TextTransformer(Protocol): + """Interface for text transformation operations.""" + + def apply_transformations( + self, + content: FileContent, + rules: SeqType[TransformationRule] + ) -> Result[TransformationResult, str]: + """Apply transformation rules to content.""" + ... + + def ensure_import( + self, + content: FileContent, + import_statement: ImportStatement + ) -> Result[FileContent, str]: + """Ensure import statement exists in content.""" + ... + + +class DirectoryProcessor(Protocol): + """Interface for directory traversal operations.""" + + def find_python_files(self, directory: Path) -> Result[tuple[Path, ...], str]: + """Find all Python files in directory recursively.""" + ... + + +# ============================================================================ +# Implementation Services +# ============================================================================ + +class SafeFileOperations: + """Safe file operations implementation using functional patterns.""" + + @safe + def read_file(self, path: Path) -> FileContent: + """Read file content with automatic error handling.""" + content = path.read_text(encoding="utf-8") + return FileContent(path=path, content=content) + + @safe + def write_file(self, content: FileContent) -> Path: + """Write content to file with automatic error handling.""" + _ = content.path.write_text(content.content, encoding=content.encoding) + return content.path + + @safe + def create_backup(self, path: Path) -> Path: + """Create backup file with automatic error handling.""" + backup_path = path.with_suffix(path.suffix + ".bak") + shutil.copy2(path, backup_path) + return backup_path + + +class FunctionalTextTransformer: + """Functional text transformation implementation.""" + + def apply_transformations( + self, + content: FileContent, + rules: SeqType[TransformationRule] + ) -> Result[TransformationResult, str]: + """Apply transformation rules functionally.""" + def _apply_rule(text: str, rule: TransformationRule) -> str: + return rule.pattern.sub(rule.replacement, text) + + try: + # Apply transformations immutably + transformed_content = content.content + applied_rules: list[TransformationRule] = [] + + for rule in rules: + original_content = transformed_content + transformed_content = _apply_rule(transformed_content, rule) + + # Track which rules were actually applied + if original_content != transformed_content: + applied_rules.append(rule) + + transformed_file = replace(content, content=transformed_content) + + return Success(TransformationResult( + original=content, + transformed=transformed_file, + rules_applied=tuple(applied_rules) + )) + + except Exception as e: + return Failure(f"Transformation failed: {e}") + + def ensure_import( + self, + content: FileContent, + import_statement: ImportStatement + ) -> Result[FileContent, str]: + """Ensure import statement exists, adding if necessary.""" + try: + lines = content.content.splitlines() + + # Check if import already exists + has_import = any( + import_statement.module in line and + all(imp in line for imp in import_statement.imports) + for line in lines + ) + + if has_import: + return Success(content) + + # Add import at the top after any existing imports + import_line = import_statement.line + + # Find insertion point (after last import or at beginning) + insert_index = 0 + for i, line in enumerate(lines): + if line.strip().startswith(('import ', 'from ')): + insert_index = i + 1 + elif line.strip() and not line.startswith('#'): + break + + new_lines = lines[:insert_index] + [import_line] + lines[insert_index:] + new_content = '\n'.join(new_lines) + + return Success(replace(content, content=new_content)) + + except Exception as e: + return Failure(f"Import addition failed: {e}") + + +class RecursiveDirectoryProcessor: + """Directory processing implementation.""" + + def find_python_files(self, directory: Path) -> Result[tuple[Path, ...], str]: + """Find Python files recursively with error handling.""" + try: + if not directory.exists(): + return Failure(f"Directory does not exist: {directory}") + + if not directory.is_dir(): + return Failure(f"Path is not a directory: {directory}") + + python_files = tuple( + path for path in directory.rglob("*.py") + if path.is_file() + ) + + return Success(python_files) + + except Exception as e: + return Failure(f"Directory traversal failed: {e}") - process_directory(target_dir) - logger.info(f"Successfully updated files in {target_dir}") + +# ============================================================================ +# Configuration and Rules +# ============================================================================ + +# Transformation rules for list -> Sequence replacement +LIST_TO_SEQUENCE_RULES: tuple[TransformationRule, ...] = ( + TransformationRule( + pattern=re.compile(r'\blist\b'), + replacement="Sequence", + description="Replace 'list' with 'Sequence'" + ), + TransformationRule( + pattern=re.compile(r'\bList\b'), + replacement="Sequence", + description="Replace 'List' with 'Sequence'" + ), +) + +# Import statement to add +SEQUENCE_IMPORT = ImportStatement( + module="collections.abc", + imports=("Sequence",), + line="from collections.abc import Sequence" +) + + +# ============================================================================ +# Application Service +# ============================================================================ + +@dataclass(frozen=True) +class CodeTransformationService: + """Main application service with dependency injection.""" + + file_ops: FileOperations + text_transformer: TextTransformer + directory_processor: DirectoryProcessor + + def transform_file(self, file_path: Path) -> Result[TransformationResult, str]: + """Transform a single file with full error handling.""" + def _process_content(content: FileContent) -> Result[TransformationResult, str]: + # Apply transformations + transform_result = self.text_transformer.apply_transformations( + content, LIST_TO_SEQUENCE_RULES + ) + + match transform_result: + case Success(result): + # Ensure import if transformations were applied + if result.rules_applied: + import_result = self.text_transformer.ensure_import( + result.transformed, SEQUENCE_IMPORT + ) + match import_result: + case Success(updated_content): + # Create backup and write + backup_result = self.file_ops.create_backup(content.path) + match backup_result: + case Success(_): + write_result = self.file_ops.write_file(updated_content) + match write_result: + case Success(_): + return Success(replace(result, transformed=updated_content)) + case Failure(error): + return Failure(f"Write failed: {error}") + case _: + return Failure("Unknown write error") + case Failure(error): + return Failure(f"Backup failed: {error}") + case _: + return Failure("Unknown backup error") + case Failure(error): + return Failure(f"Import failed: {error}") + case _: + return Failure("Unknown import error") + else: + # No changes needed, return original + return Success(result) + case Failure(error): + return Failure(error) + case _: + return Failure("Unknown transformation error") + + # Read file and process + read_result = self.file_ops.read_file(file_path) + match read_result: + case Success(content): + return _process_content(content) + case Failure(error): + return Failure(f"Read failed: {error}") + case _: + return Failure("Unknown read error") + + def transform_directory(self, directory: Path) -> Result[tuple[TransformationResult, ...], str]: + """Transform all Python files in directory.""" + def _transform_files(files: tuple[Path, ...]) -> Result[tuple[TransformationResult, ...], str]: + results: list[TransformationResult] = [] + errors: list[str] = [] + + for file_path in files: + result = self.transform_file(file_path) + match result: + case Success(transformation_result): + results.append(transformation_result) + case Failure(error): + errors.append(f"Failed to transform {file_path}: {error}") + case _: + errors.append(f"Unknown error for {file_path}") + + if errors: + return Failure(f"Errors occurred: {'; '.join(errors)}") + + return Success(tuple(results)) + + files_result = self.directory_processor.find_python_files(directory) + match files_result: + case Success(files): + return _transform_files(files) + case Failure(error): + return Failure(error) + case _: + return Failure("Unknown directory processing error") + + +# ============================================================================ +# Application Entry Point +# ============================================================================ + +def create_application() -> CodeTransformationService: + """Factory function for creating the application with dependencies.""" + return CodeTransformationService( + file_ops=SafeFileOperations(), + text_transformer=FunctionalTextTransformer(), + directory_processor=RecursiveDirectoryProcessor() + ) + + +def main() -> None: + """Main application entry point.""" + app = create_application() + target_directory = Path("src/robofactor") + + match app.transform_directory(target_directory): + case Success(results): + print(f"✅ Successfully transformed {len(results)} files in {target_directory}") + for result in results: + if result.rules_applied: + print(f" 📝 Transformed: {result.transformed.path}") + case Failure(error): + print(f"❌ Transformation failed: {error}") + _ = exit(1) # Explicitly ignore return value + case _: + print("❌ Unknown error occurred") + _ = exit(1) + + +if __name__ == "__main__": + main() diff --git a/src/robofactor/app/main.py b/src/robofactor/app/main.py index 79fb562..5d15b87 100644 --- a/src/robofactor/app/main.py +++ b/src/robofactor/app/main.py @@ -26,12 +26,16 @@ app = typer.Typer() -def _setup_environment(tracing: bool, mlflow_uri: str, mlflow_experiment: str) -> Console: +def _setup_environment( + tracing: bool, mlflow_uri: str, mlflow_experiment: str +) -> Console: """Configures warnings, MLflow, and returns a rich Console.""" utils.suppress_pydantic_warnings() console = Console() if tracing: - console.print(f"[bold yellow]MLflow tracing enabled. URI: {mlflow_uri}[/bold yellow]") + console.print( + f"[bold yellow]MLflow tracing enabled. URI: {mlflow_uri}[/bold yellow]" + ) mlflow.set_tracking_uri(mlflow_uri) _ = mlflow.set_experiment(mlflow_experiment) _ = mlflow.autolog() @@ -66,7 +70,9 @@ def _load_or_compile_model( num_threads=8, ) teleprompter.compile( - refactorer, trainset=list(load_training_data()), requires_permission_to_run=False + refactorer, + trainset=load_training_data(), + requires_permission_to_run=False, ) console.print(f"Optimization complete. Saving to {optimizer_path}...") self_correcting_refactorer.save(str(optimizer_path), save_program=True) @@ -87,15 +93,17 @@ def _run_refactoring_on_file( console.print( Panel( - Syntax(source_code, "python", theme=config.RICH_SYNTAX_THEME, line_numbers=True), + Syntax( + source_code, "python", theme=config.RICH_SYNTAX_THEME, line_numbers=True + ), title=f"[bold]Original Code: {script_path.name}[/bold]", border_style="blue", ) ) - refactor_example = dspy.Example(code_snippet=source_code, test_cases=[]).with_inputs( - "code_snippet" - ) + refactor_example = dspy.Example( + code_snippet=source_code, test_cases=[] + ).with_inputs("code_snippet") prediction = refactorer(**refactor_example.inputs()) ui.display_refactoring_process(console, prediction) @@ -113,7 +121,9 @@ def _run_refactoring_on_file( f"[yellow]Writing refactored code back to {script_path.name}...[/yellow]" ) _ = script_path.write_text(refactored_code, encoding="utf-8") - console.print(f"[green]Refactoring of {script_path.name} complete.[/green]") + console.print( + f"[green]Refactoring of {script_path.name} complete.[/green]" + ) case Failure(error_message): console.print( Panel( @@ -159,7 +169,9 @@ def main( "--prompt-llm", help="Model for generating prompts during optimization.", ), - tracing: bool = typer.Option(True, "--tracing/--no-tracing", help="Enable MLflow tracing."), + tracing: bool = typer.Option( + True, "--tracing/--no-tracing", help="Enable MLflow tracing." + ), mlflow_uri: str = typer.Option( config.DEFAULT_MLFLOW_TRACKING_URI, "--mlflow-uri", diff --git a/src/robofactor/parsing/models.py b/src/robofactor/parsing/models.py index 20eade0..97ff30d 100644 --- a/src/robofactor/parsing/models.py +++ b/src/robofactor/parsing/models.py @@ -22,7 +22,7 @@ class CodeQualityScores(BaseModel): complexity_score: float typing_score: float docstring_score: float - linting_issues: Sequence[str] = Field(default_factory=list) + linting_issues: Sequence[str] = Field(default_factory=Sequence) class ParameterKind(enum.Enum): diff --git a/src/robofactor/training/training_loader.py b/src/robofactor/training/training_loader.py index 2f199ed..59afbdd 100644 --- a/src/robofactor/training/training_loader.py +++ b/src/robofactor/training/training_loader.py @@ -30,7 +30,7 @@ def is_training_item(x: JSON) -> TypeGuard[JSONObject]: ) -def load_training_data() -> Sequence[dspy.Example]: +def load_training_data() -> list[dspy.Example]: data_path = Path(__file__).parent / TRAINING_DATA_FILE try: # CAST the untyped json.loads → JSON @@ -47,7 +47,7 @@ def load_training_data() -> Sequence[dspy.Example]: logger.error(f"Expected top-level array, got {type(raw).__name__}") return [] - items: Sequence[dspy.Example] = [] + items: list[dspy.Example] = [] for idx, entry in enumerate(raw): if not is_training_item(entry): logger.error(f"Invalid training entry at index {idx}: {entry!r}") From bb9570cbc4748929a0aed663a72acb3055bd8602 Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Tue, 24 Jun 2025 04:15:21 -0500 Subject: [PATCH 11/26] chore: regen type stubs for DSPy --- typings/dspy/__init__.pyi | 25 +- typings/dspy/adapters/__init__.pyi | 14 +- typings/dspy/adapters/base.pyi | 81 +++--- typings/dspy/adapters/chat_adapter.pyi | 66 +++-- typings/dspy/adapters/json_adapter.pyi | 54 ++-- typings/dspy/adapters/two_step_adapter.pyi | 44 ++-- typings/dspy/adapters/types/audio.pyi | 31 ++- typings/dspy/adapters/types/base_type.pyi | 16 +- typings/dspy/adapters/types/history.pyi | 3 +- typings/dspy/adapters/types/image.pyi | 30 ++- typings/dspy/adapters/types/tool.pyi | 73 +++--- typings/dspy/adapters/utils.pyi | 13 +- typings/dspy/adapters/xml_adapter.pyi | 24 ++ typings/dspy/clients/__init__.pyi | 31 +-- typings/dspy/clients/base_lm.pyi | 31 ++- typings/dspy/clients/cache.pyi | 56 ++--- typings/dspy/clients/databricks.pyi | 44 ++-- typings/dspy/clients/embedding.pyi | 22 +- typings/dspy/clients/lm.pyi | 73 +++--- typings/dspy/clients/lm_local.pyi | 38 +-- typings/dspy/clients/lm_local_arbor.pyi | 96 +++++--- typings/dspy/clients/openai.pyi | 62 +++-- typings/dspy/clients/provider.pyi | 86 ++++--- typings/dspy/clients/utils_finetune.pyi | 34 ++- typings/dspy/datasets/alfworld/__init__.pyi | 1 + typings/dspy/datasets/alfworld/alfworld.pyi | 42 +++- typings/dspy/datasets/colors.pyi | 10 +- typings/dspy/datasets/dataloader.pyi | 67 +++-- typings/dspy/datasets/dataset.pyi | 39 ++- typings/dspy/datasets/hotpotqa.pyi | 7 +- typings/dspy/datasets/math.pyi | 12 +- typings/dspy/dsp/__init__.pyi | 1 + typings/dspy/dsp/colbertv2.pyi | 55 +++-- typings/dspy/dsp/utils/__init__.pyi | 2 +- typings/dspy/dsp/utils/dpr.pyi | 80 +++--- typings/dspy/dsp/utils/metrics.pyi | 20 -- typings/dspy/dsp/utils/settings.pyi | 49 ++-- typings/dspy/dsp/utils/utils.pyi | 67 +++-- typings/dspy/evaluate/__init__.pyi | 13 +- typings/dspy/evaluate/auto_evaluation.pyi | 42 ++-- typings/dspy/evaluate/evaluate.pyi | 95 +++---- typings/dspy/evaluate/metrics.pyi | 30 ++- typings/dspy/experimental/__init__.pyi | 7 - typings/dspy/experimental/module_graph.pyi | 31 --- .../experimental/synthesizer/__init__.pyi | 8 - .../dspy/experimental/synthesizer/config.pyi | 17 -- .../synthesizer/instruction_suffixes.pyi | 6 - .../experimental/synthesizer/signatures.pyi | 48 ---- .../experimental/synthesizer/synthesizer.pyi | 21 -- .../dspy/experimental/synthesizer/utils.pyi | 8 - typings/dspy/experimental/synthetic_data.pyi | 28 --- typings/dspy/predict/__init__.pyi | 17 +- typings/dspy/predict/aggregation.pyi | 6 +- typings/dspy/predict/avatar/__init__.pyi | 1 + typings/dspy/predict/avatar/avatar.pyi | 12 +- typings/dspy/predict/avatar/models.pyi | 12 +- typings/dspy/predict/avatar/signatures.pyi | 3 +- typings/dspy/predict/best_of_n.pyi | 16 +- typings/dspy/predict/chain_of_thought.pyi | 22 +- .../predict/chain_of_thought_with_hint.pyi | 9 - typings/dspy/predict/code_act.pyi | 19 +- typings/dspy/predict/knn.pyi | 6 +- .../dspy/predict/multi_chain_comparison.pyi | 12 +- typings/dspy/predict/parallel.pyi | 24 +- typings/dspy/predict/parameter.pyi | 6 +- typings/dspy/predict/predict.pyi | 53 ++-- typings/dspy/predict/program_of_thought.pyi | 15 +- typings/dspy/predict/react.pyi | 42 +++- typings/dspy/predict/refine.pyi | 25 +- typings/dspy/predict/retry.pyi | 1 + typings/dspy/primitives/__init__.pyi | 16 +- typings/dspy/primitives/assertions.pyi | 3 - typings/dspy/primitives/base_module.pyi | 87 +++++++ typings/dspy/primitives/example.pyi | 77 ++++-- typings/dspy/primitives/module.pyi | 117 +++++---- typings/dspy/primitives/prediction.pyi | 96 ++++++-- typings/dspy/primitives/program.pyi | 63 ----- .../dspy/primitives/python_interpreter.pyi | 45 +++- .../propose/dataset_summary_generator.pyi | 28 +-- typings/dspy/propose/grounded_proposer.pyi | 76 ++---- typings/dspy/propose/propose_base.pyi | 12 +- typings/dspy/propose/utils.pyi | 23 +- typings/dspy/retrieve/__init__.pyi | 7 - typings/dspy/retrieve/azureaisearch_rm.pyi | 231 ------------------ typings/dspy/retrieve/chromadb_rm.pyi | 79 ------ typings/dspy/retrieve/clarifai_rm.pyi | 53 ---- typings/dspy/retrieve/deeplake_rm.pyi | 55 ----- typings/dspy/retrieve/epsilla_rm.pyi | 21 -- typings/dspy/retrieve/faiss_rm.pyi | 3 - typings/dspy/retrieve/falkordb_rm.pyi | 87 ------- typings/dspy/retrieve/lancedb_rm.pyi | 58 ----- typings/dspy/retrieve/llama_index_rm.pyi | 58 ----- typings/dspy/retrieve/marqo_rm.pyi | 57 ----- typings/dspy/retrieve/milvus_rm.pyi | 64 ----- typings/dspy/retrieve/mongodb_atlas_rm.pyi | 34 --- typings/dspy/retrieve/my_scale_rm.pyi | 3 - typings/dspy/retrieve/neo4j_rm.pyi | 78 ------ typings/dspy/retrieve/pgvector_rm.pyi | 81 ------ typings/dspy/retrieve/pinecone_rm.pyi | 70 ------ typings/dspy/retrieve/qdrant_rm.pyi | 3 - typings/dspy/retrieve/ragatouille_rm.pyi | 37 --- typings/dspy/retrieve/retrieve.pyi | 29 --- typings/dspy/retrieve/snowflake_rm.pyi | 104 -------- typings/dspy/retrieve/vectara_rm.pyi | 57 ----- typings/dspy/retrieve/watson_discovery_rm.pyi | 43 ---- typings/dspy/retrieve/you_rm.pyi | 40 --- typings/dspy/retrievers/__init__.pyi | 3 +- .../databricks_rm.pyi | 34 +-- typings/dspy/retrievers/embeddings.pyi | 21 +- typings/dspy/retrievers/retrieve.pyi | 37 +++ .../{retrieve => retrievers}/weaviate_rm.pyi | 24 +- typings/dspy/signatures/__init__.pyi | 21 +- typings/dspy/signatures/field.pyi | 39 ++- typings/dspy/signatures/signature.pyi | 84 ++++--- typings/dspy/signatures/utils.pyi | 4 +- typings/dspy/streaming/__init__.pyi | 10 +- typings/dspy/streaming/messages.pyi | 61 +++-- typings/dspy/streaming/streamify.pyi | 27 +- typings/dspy/streaming/streaming_listener.pyi | 22 +- typings/dspy/teleprompt/__init__.pyi | 16 +- typings/dspy/teleprompt/avatar_optimizer.pyi | 49 ++-- typings/dspy/teleprompt/bettertogether.pyi | 21 +- typings/dspy/teleprompt/bootstrap.pyi | 21 +- .../dspy/teleprompt/bootstrap_finetune.pyi | 91 +++---- typings/dspy/teleprompt/copro_optimizer.pyi | 21 +- typings/dspy/teleprompt/ensemble.pyi | 10 +- typings/dspy/teleprompt/grpo.pyi | 71 ++---- typings/dspy/teleprompt/infer_rules.pyi | 38 ++- typings/dspy/teleprompt/knn_fewshot.pyi | 11 +- .../dspy/teleprompt/mipro_optimizer_v2.pyi | 53 +--- typings/dspy/teleprompt/random_search.pyi | 24 +- typings/dspy/teleprompt/signature_opt.pyi | 19 +- typings/dspy/teleprompt/simba.pyi | 24 +- typings/dspy/teleprompt/simba_utils.pyi | 24 +- typings/dspy/teleprompt/teleprompt.pyi | 19 +- typings/dspy/teleprompt/teleprompt_optuna.pyi | 23 +- typings/dspy/teleprompt/utils.pyi | 68 ++---- typings/dspy/teleprompt/vanilla.pyi | 10 +- typings/dspy/utils/__init__.pyi | 15 +- typings/dspy/utils/asyncify.pyi | 13 +- typings/dspy/utils/caching.pyi | 2 +- typings/dspy/utils/callback.pyi | 70 ++---- typings/dspy/utils/dummies.pyi | 31 ++- typings/dspy/utils/exceptions.pyi | 13 +- typings/dspy/utils/inspect_history.pyi | 3 +- typings/dspy/utils/langchain_tool.pyi | 7 +- typings/dspy/utils/logging_utils.pyi | 26 +- typings/dspy/utils/mcp.pyi | 5 +- typings/dspy/utils/parallelizer.pyi | 19 +- typings/dspy/utils/saving.pyi | 12 +- typings/dspy/utils/unbatchify.pyi | 28 +-- typings/dspy/utils/usage_tracker.pyi | 14 +- 152 files changed, 1992 insertions(+), 3415 deletions(-) create mode 100644 typings/dspy/adapters/xml_adapter.pyi delete mode 100644 typings/dspy/dsp/utils/metrics.pyi delete mode 100644 typings/dspy/experimental/__init__.pyi delete mode 100644 typings/dspy/experimental/module_graph.pyi delete mode 100644 typings/dspy/experimental/synthesizer/__init__.pyi delete mode 100644 typings/dspy/experimental/synthesizer/config.pyi delete mode 100644 typings/dspy/experimental/synthesizer/instruction_suffixes.pyi delete mode 100644 typings/dspy/experimental/synthesizer/signatures.pyi delete mode 100644 typings/dspy/experimental/synthesizer/synthesizer.pyi delete mode 100644 typings/dspy/experimental/synthesizer/utils.pyi delete mode 100644 typings/dspy/experimental/synthetic_data.pyi delete mode 100644 typings/dspy/predict/chain_of_thought_with_hint.pyi delete mode 100644 typings/dspy/primitives/assertions.pyi create mode 100644 typings/dspy/primitives/base_module.pyi delete mode 100644 typings/dspy/primitives/program.pyi delete mode 100644 typings/dspy/retrieve/__init__.pyi delete mode 100644 typings/dspy/retrieve/azureaisearch_rm.pyi delete mode 100644 typings/dspy/retrieve/chromadb_rm.pyi delete mode 100644 typings/dspy/retrieve/clarifai_rm.pyi delete mode 100644 typings/dspy/retrieve/deeplake_rm.pyi delete mode 100644 typings/dspy/retrieve/epsilla_rm.pyi delete mode 100644 typings/dspy/retrieve/faiss_rm.pyi delete mode 100644 typings/dspy/retrieve/falkordb_rm.pyi delete mode 100644 typings/dspy/retrieve/lancedb_rm.pyi delete mode 100644 typings/dspy/retrieve/llama_index_rm.pyi delete mode 100644 typings/dspy/retrieve/marqo_rm.pyi delete mode 100644 typings/dspy/retrieve/milvus_rm.pyi delete mode 100644 typings/dspy/retrieve/mongodb_atlas_rm.pyi delete mode 100644 typings/dspy/retrieve/my_scale_rm.pyi delete mode 100644 typings/dspy/retrieve/neo4j_rm.pyi delete mode 100644 typings/dspy/retrieve/pgvector_rm.pyi delete mode 100644 typings/dspy/retrieve/pinecone_rm.pyi delete mode 100644 typings/dspy/retrieve/qdrant_rm.pyi delete mode 100644 typings/dspy/retrieve/ragatouille_rm.pyi delete mode 100644 typings/dspy/retrieve/retrieve.pyi delete mode 100644 typings/dspy/retrieve/snowflake_rm.pyi delete mode 100644 typings/dspy/retrieve/vectara_rm.pyi delete mode 100644 typings/dspy/retrieve/watson_discovery_rm.pyi delete mode 100644 typings/dspy/retrieve/you_rm.pyi rename typings/dspy/{retrieve => retrievers}/databricks_rm.pyi (88%) create mode 100644 typings/dspy/retrievers/retrieve.pyi rename typings/dspy/{retrieve => retrievers}/weaviate_rm.pyi (82%) diff --git a/typings/dspy/__init__.pyi b/typings/dspy/__init__.pyi index 6d3ec82..bd3e92a 100644 --- a/typings/dspy/__init__.pyi +++ b/typings/dspy/__init__.pyi @@ -2,27 +2,15 @@ This type stub file was generated by pyright. """ -import dspy.retrievers from dspy.predict import * from dspy.primitives import * -from dspy.retrieve import * +from dspy.retrievers import * from dspy.signatures import * from dspy.teleprompt import * from dspy.evaluate import Evaluate from dspy.clients import * from dspy.clients import DSPY_CACHE -from dspy.adapters import ( - Adapter, - Audio, - BaseType, - ChatAdapter, - History, - Image, - JSONAdapter, - Tool, - ToolCalls, - TwoStepAdapter, -) +from dspy.adapters import Adapter, Audio, BaseType, ChatAdapter, History, Image, JSONAdapter, Tool, ToolCalls, TwoStepAdapter, XMLAdapter from dspy.utils.logging_utils import configure_dspy_loggers, disable_logging, enable_logging from dspy.utils.asyncify import asyncify from dspy.utils.saving import load @@ -30,14 +18,7 @@ from dspy.streaming.streamify import streamify from dspy.utils.usage_tracker import track_usage from dspy.dsp.utils.settings import settings from dspy.dsp.colbertv2 import ColBERTv2 -from .__metadata__ import ( - __author__, - __author_email__, - __description__, - __name__, - __url__, - __version__, -) +from dspy.__metadata__ import __author__, __author_email__, __description__, __name__, __url__, __version__ configure = ... context = ... diff --git a/typings/dspy/adapters/__init__.pyi b/typings/dspy/adapters/__init__.pyi index 50e175a..3188d8f 100644 --- a/typings/dspy/adapters/__init__.pyi +++ b/typings/dspy/adapters/__init__.pyi @@ -7,16 +7,6 @@ from dspy.adapters.chat_adapter import ChatAdapter from dspy.adapters.json_adapter import JSONAdapter from dspy.adapters.two_step_adapter import TwoStepAdapter from dspy.adapters.types import Audio, BaseType, History, Image, Tool, ToolCalls +from dspy.adapters.xml_adapter import XMLAdapter -__all__ = [ - "Adapter", - "ChatAdapter", - "BaseType", - "History", - "Image", - "Audio", - "JSONAdapter", - "TwoStepAdapter", - "Tool", - "ToolCalls", -] +__all__ = ["Adapter", "ChatAdapter", "BaseType", "History", "Image", "Audio", "JSONAdapter", "XMLAdapter", "TwoStepAdapter", "Tool", "ToolCalls"] diff --git a/typings/dspy/adapters/base.pyi b/typings/dspy/adapters/base.pyi index f68b40d..abeafe5 100644 --- a/typings/dspy/adapters/base.pyi +++ b/typings/dspy/adapters/base.pyi @@ -8,30 +8,22 @@ from dspy.utils.callback import BaseCallback from dspy.clients.lm import LM logger = ... -if TYPE_CHECKING: ... - +if TYPE_CHECKING: + ... class Adapter: - def __init__(self, callbacks: Optional[list[BaseCallback]] = ...) -> None: ... - def __init_subclass__(cls, **kwargs) -> None: ... - def __call__( - self, - lm: LM, - lm_kwargs: dict[str, Any], - signature: Type[Signature], - demos: list[dict[str, Any]], - inputs: dict[str, Any], - ) -> list[dict[str, Any]]: ... - async def acall( - self, - lm: LM, - lm_kwargs: dict[str, Any], - signature: Type[Signature], - demos: list[dict[str, Any]], - inputs: dict[str, Any], - ) -> list[dict[str, Any]]: ... - def format( - self, signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any] - ) -> list[dict[str, Any]]: + def __init__(self, callbacks: Optional[list[BaseCallback]] = ...) -> None: + ... + + def __init_subclass__(cls, **kwargs) -> None: + ... + + def __call__(self, lm: LM, lm_kwargs: dict[str, Any], signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: + ... + + async def acall(self, lm: LM, lm_kwargs: dict[str, Any], signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: + ... + + def format(self, signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: """Format the input messages for the LM call. This method converts the DSPy structured input along with few-shot examples and conversation history into @@ -72,7 +64,7 @@ class Adapter: A list of multiturn messages as expected by the LM. """ ... - + def format_field_description(self, signature: Type[Signature]) -> str: """Format the field description for the system message. @@ -86,7 +78,7 @@ class Adapter: A string that contains the field description for the input fields and the output fields. """ ... - + def format_field_structure(self, signature: Type[Signature]) -> str: """Format the field structure for the system message. @@ -98,7 +90,7 @@ class Adapter: signature: The DSPy signature for which to format the field structure. """ ... - + def format_task_description(self, signature: Type[Signature]) -> str: """Format the task description for the system message. @@ -112,15 +104,8 @@ class Adapter: A string that describes the task. """ ... - - def format_user_message_content( - self, - signature: Type[Signature], - inputs: dict[str, Any], - prefix: str = ..., - suffix: str = ..., - main_request: bool = ..., - ) -> str: + + def format_user_message_content(self, signature: Type[Signature], inputs: dict[str, Any], prefix: str = ..., suffix: str = ..., main_request: bool = ...) -> str: """Format the user message content. This method formats the user message content, which can be used in formatting few-shot examples, conversation @@ -136,13 +121,8 @@ class Adapter: A string that contains the user message content. """ ... - - def format_assistant_message_content( - self, - signature: Type[Signature], - outputs: dict[str, Any], - missing_field_message: Optional[str] = ..., - ) -> str: + + def format_assistant_message_content(self, signature: Type[Signature], outputs: dict[str, Any], missing_field_message: Optional[str] = ...) -> str: """Format the assistant message content. This method formats the assistant message content, which can be used in formatting few-shot examples, @@ -157,10 +137,8 @@ class Adapter: A string that contains the assistant message content. """ ... - - def format_demos( - self, signature: Type[Signature], demos: list[dict[str, Any]] - ) -> list[dict[str, Any]]: + + def format_demos(self, signature: Type[Signature], demos: list[dict[str, Any]]) -> list[dict[str, Any]]: """Format the few-shot examples. This method formats the few-shot examples as multiturn messages. @@ -174,10 +152,8 @@ class Adapter: A list of multiturn messages. """ ... - - def format_conversation_history( - self, signature: Type[Signature], history_field_name: str, inputs: dict[str, Any] - ) -> list[dict[str, Any]]: + + def format_conversation_history(self, signature: Type[Signature], history_field_name: str, inputs: dict[str, Any]) -> list[dict[str, Any]]: """Format the conversation history. This method formats the conversation history and the current input as multiturn messages. @@ -191,7 +167,7 @@ class Adapter: A list of multiturn messages. """ ... - + def parse(self, signature: Type[Signature], completion: str) -> dict[str, Any]: """Parse the LM output into a dictionary of the output fields. @@ -205,3 +181,6 @@ class Adapter: A dictionary of the output fields. """ ... + + + diff --git a/typings/dspy/adapters/chat_adapter.pyi b/typings/dspy/adapters/chat_adapter.pyi index 8af8283..97ee019 100644 --- a/typings/dspy/adapters/chat_adapter.pyi +++ b/typings/dspy/adapters/chat_adapter.pyi @@ -10,23 +10,25 @@ from dspy.signatures.signature import Signature from dspy.utils.callback import BaseCallback field_header_pattern = ... - class FieldInfoWithName(NamedTuple): name: str info: FieldInfo ... + class ChatAdapter(Adapter): - def __init__(self, callbacks: Optional[list[BaseCallback]] = ...) -> None: ... - def __call__( - self, - lm: LM, - lm_kwargs: dict[str, Any], - signature: Type[Signature], - demos: list[dict[str, Any]], - inputs: dict[str, Any], - ) -> list[dict[str, Any]]: ... - def format_field_description(self, signature: Type[Signature]) -> str: ... + def __init__(self, callbacks: Optional[list[BaseCallback]] = ...) -> None: + ... + + def __call__(self, lm: LM, lm_kwargs: dict[str, Any], signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: + ... + + async def acall(self, lm: LM, lm_kwargs: dict[str, Any], signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: + ... + + def format_field_description(self, signature: Type[Signature]) -> str: + ... + def format_field_structure(self, signature: Type[Signature]) -> str: """ `ChatAdapter` requires input and output fields to be in their own sections, with section header using markers @@ -34,16 +36,13 @@ class ChatAdapter(Adapter): output fields section to indicate the end of the output fields. """ ... - - def format_task_description(self, signature: Type[Signature]) -> str: ... - def format_user_message_content( - self, - signature: Type[Signature], - inputs: dict[str, Any], - prefix: str = ..., - suffix: str = ..., - main_request: bool = ..., - ) -> str: ... + + def format_task_description(self, signature: Type[Signature]) -> str: + ... + + def format_user_message_content(self, signature: Type[Signature], inputs: dict[str, Any], prefix: str = ..., suffix: str = ..., main_request: bool = ...) -> str: + ... + def user_message_output_requirements(self, signature: Type[Signature]) -> str: """Returns a simplified format reminder for the language model. @@ -62,11 +61,13 @@ class ChatAdapter(Adapter): for inline reminders within chat messages. """ ... - - def format_assistant_message_content( - self, signature: Type[Signature], outputs: dict[str, Any], missing_field_message=... - ) -> str: ... - def parse(self, signature: Type[Signature], completion: str) -> dict[str, Any]: ... + + def format_assistant_message_content(self, signature: Type[Signature], outputs: dict[str, Any], missing_field_message=...) -> str: + ... + + def parse(self, signature: Type[Signature], completion: str) -> dict[str, Any]: + ... + def format_field_with_value(self, fields_with_values: Dict[FieldInfoWithName, Any]) -> str: """ Formats the values of the specified fields according to the field's DSPy type (input or output), @@ -81,14 +82,8 @@ class ChatAdapter(Adapter): The joined formatted values of the fields, represented as a string """ ... - - def format_finetune_data( - self, - signature: Type[Signature], - demos: list[dict[str, Any]], - inputs: dict[str, Any], - outputs: dict[str, Any], - ) -> dict[str, list[Any]]: + + def format_finetune_data(self, signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any], outputs: dict[str, Any]) -> dict[str, list[Any]]: """ Format the call data into finetuning data according to the OpenAI API specifications. @@ -97,3 +92,6 @@ class ChatAdapter(Adapter): wrapped in a dictionary with a "messages" key. """ ... + + + diff --git a/typings/dspy/adapters/json_adapter.pyi b/typings/dspy/adapters/json_adapter.pyi index 4e559b2..13775f0 100644 --- a/typings/dspy/adapters/json_adapter.pyi +++ b/typings/dspy/adapters/json_adapter.pyi @@ -2,34 +2,32 @@ This type stub file was generated by pyright. """ -from typing import Any, Dict +from typing import Any, Dict, Type from dspy.adapters.chat_adapter import ChatAdapter, FieldInfoWithName from dspy.clients.lm import LM from dspy.signatures.signature import Signature logger = ... - class JSONAdapter(ChatAdapter): - def __call__( - self, - lm: LM, - lm_kwargs: dict[str, Any], - signature: type[Signature], - demos: list[dict[str, Any]], - inputs: dict[str, Any], - ) -> list[dict[str, Any]]: ... - def format_field_structure(self, signature: type[Signature]) -> str: ... - def user_message_output_requirements(self, signature: type[Signature]) -> str: ... - def format_assistant_message_content( - self, - signature: type[Signature], - outputs: dict[str, Any], - missing_field_message=..., - ) -> str: ... - def parse(self, signature: type[Signature], completion: str) -> dict[str, Any]: ... - def format_field_with_value( - self, fields_with_values: Dict[FieldInfoWithName, Any], role: str = ... - ) -> str: + def __call__(self, lm: LM, lm_kwargs: dict[str, Any], signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: + ... + + async def acall(self, lm: LM, lm_kwargs: dict[str, Any], signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: + ... + + def format_field_structure(self, signature: Type[Signature]) -> str: + ... + + def user_message_output_requirements(self, signature: Type[Signature]) -> str: + ... + + def format_assistant_message_content(self, signature: Type[Signature], outputs: dict[str, Any], missing_field_message=...) -> str: + ... + + def parse(self, signature: Type[Signature], completion: str) -> dict[str, Any]: + ... + + def format_field_with_value(self, fields_with_values: Dict[FieldInfoWithName, Any], role: str = ...) -> str: """ Formats the values of the specified fields according to the field's DSPy type (input or output), annotation (e.g. str, int, etc.), and the type of the value itself. Joins the formatted values @@ -41,11 +39,9 @@ class JSONAdapter(ChatAdapter): The joined formatted values of the fields, represented as a string. """ ... + + def format_finetune_data(self, signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any], outputs: dict[str, Any]) -> dict[str, list[Any]]: + ... + + - def format_finetune_data( - self, - signature: type[Signature], - demos: list[dict[str, Any]], - inputs: dict[str, Any], - outputs: dict[str, Any], - ) -> dict[str, list[Any]]: ... diff --git a/typings/dspy/adapters/two_step_adapter.pyi b/typings/dspy/adapters/two_step_adapter.pyi index 3a26d87..2245be2 100644 --- a/typings/dspy/adapters/two_step_adapter.pyi +++ b/typings/dspy/adapters/two_step_adapter.pyi @@ -27,10 +27,10 @@ class TwoStepAdapter(Adapter): print(result) ``` """ - def __init__(self, extraction_model: LM) -> None: ... - def format( - self, signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any] - ) -> list[dict[str, Any]]: + def __init__(self, extraction_model: LM) -> None: + ... + + def format(self, signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: """ Format a prompt for the first stage with the main LM. This no specific structure is required for the main LM, we customize the format method @@ -45,7 +45,7 @@ class TwoStepAdapter(Adapter): A list of messages to be passed to the main LM. """ ... - + def parse(self, signature: Signature, completion: str) -> dict[str, Any]: """ Use a smaller LM (extraction_model) with chat adapter to extract structured data @@ -59,29 +59,19 @@ class TwoStepAdapter(Adapter): A dictionary containing the extracted structured data. """ ... - - async def acall( - self, - lm: LM, - lm_kwargs: dict[str, Any], - signature: Type[Signature], - demos: list[dict[str, Any]], - inputs: dict[str, Any], - ) -> list[dict[str, Any]]: ... + + async def acall(self, lm: LM, lm_kwargs: dict[str, Any], signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: + ... + def format_task_description(self, signature: Signature) -> str: """Create a description of the task based on the signature""" ... + + def format_user_message_content(self, signature: Type[Signature], inputs: dict[str, Any], prefix: str = ..., suffix: str = ...) -> str: + ... + + def format_assistant_message_content(self, signature: Type[Signature], outputs: dict[str, Any], missing_field_message: Optional[str] = ...) -> str: + ... + + - def format_user_message_content( - self, - signature: Type[Signature], - inputs: dict[str, Any], - prefix: str = ..., - suffix: str = ..., - ) -> str: ... - def format_assistant_message_content( - self, - signature: Type[Signature], - outputs: dict[str, Any], - missing_field_message: Optional[str] = ..., - ) -> str: ... diff --git a/typings/dspy/adapters/types/audio.pyi b/typings/dspy/adapters/types/audio.pyi index 8ebfe4f..d21adc1 100644 --- a/typings/dspy/adapters/types/audio.pyi +++ b/typings/dspy/adapters/types/audio.pyi @@ -7,50 +7,55 @@ from typing import Any, Union from dspy.adapters.types.base_type import BaseType SF_AVAILABLE = ... - class Audio(BaseType): data: str audio_format: str model_config = ... - def format(self) -> list[dict[str, Any]]: ... + def format(self) -> list[dict[str, Any]]: + ... + @pydantic.model_validator(mode="before") @classmethod def validate_input(cls, values: Any) -> Any: """ - Validate input for Audio, expecting 'data' and 'format' keys in dictionary. + Validate input for Audio, expecting 'data' and 'audio_format' keys in dictionary. """ ... - + @classmethod def from_url(cls, url: str) -> Audio: """ Download an audio file from URL and encode it as base64. """ ... - + @classmethod def from_file(cls, file_path: str) -> Audio: """ Read local audio file and encode it as base64. """ ... - + @classmethod def from_array(cls, array: Any, sampling_rate: int, format: str = ...) -> Audio: """ Process numpy-like array and encode it as base64. Uses sampling rate and audio format for encoding. """ ... + + def __str__(self) -> str: + ... + + def __repr__(self) -> str: + ... + - def __str__(self) -> str: ... - def __repr__(self) -> str: ... -def encode_audio( - audio: Union[str, bytes, dict, Audio, Any], sampling_rate: int = ..., format: str = ... -) -> dict: +def encode_audio(audio: Union[str, bytes, dict, Audio, Any], sampling_rate: int = ..., format: str = ...) -> dict: """ - Encode audio to a dict with 'data' and 'format'. - + Encode audio to a dict with 'data' and 'audio_format'. + Accepts: local file path, URL, data URI, dict, Audio instance, numpy array, or bytes (with known format). """ ... + diff --git a/typings/dspy/adapters/types/base_type.pyi b/typings/dspy/adapters/types/base_type.pyi index 78250a8..19ab9ad 100644 --- a/typings/dspy/adapters/types/base_type.pyi +++ b/typings/dspy/adapters/types/base_type.pyi @@ -7,7 +7,6 @@ from typing import Any, Union CUSTOM_TYPE_START_IDENTIFIER = ... CUSTOM_TYPE_END_IDENTIFIER = ... - class BaseType(pydantic.BaseModel): """Base class to support creating custom types for DSPy signatures. @@ -24,24 +23,28 @@ class BaseType(pydantic.BaseModel): return [{"type": "image_url", "image_url": {"url": self.url}}] ``` """ - def format(self) -> Union[list[dict[str, Any]], str]: ... + def format(self) -> Union[list[dict[str, Any]], str]: + ... + @classmethod def description(cls) -> str: """Description of the custom type""" ... - + @classmethod - def extract_custom_type_from_annotation(cls, annotation): # -> list[type[Self]] | list[Any]: + def extract_custom_type_from_annotation(cls, annotation): # -> list[type[Self]] | list[Any]: """Extract all custom types from the annotation. This is used to extract all custom types from the annotation of a field, while the annotation can have arbitrary level of nesting. For example, we detect `Tool` is in `list[dict[str, Tool]]`. """ ... - + @pydantic.model_serializer() - def serialize_model(self): # -> str: + def serialize_model(self): # -> str: ... + + def split_message_content_for_custom_types(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: """Split user message content into a list of content blocks. @@ -70,3 +73,4 @@ def split_message_content_for_custom_types(messages: list[dict[str, Any]]) -> li A list of messages with the content split into a list of content blocks around custom types content. """ ... + diff --git a/typings/dspy/adapters/types/history.pyi b/typings/dspy/adapters/types/history.pyi index 572f7ed..86256cd 100644 --- a/typings/dspy/adapters/types/history.pyi +++ b/typings/dspy/adapters/types/history.pyi @@ -59,6 +59,7 @@ class History(pydantic.BaseModel): outputs_with_history = predict(question="Are you sure?", history=history) ``` """ - messages: list[dict[str, Any]] model_config = ... + + diff --git a/typings/dspy/adapters/types/image.pyi b/typings/dspy/adapters/types/image.pyi index 1b2e827..5c4140e 100644 --- a/typings/dspy/adapters/types/image.pyi +++ b/typings/dspy/adapters/types/image.pyi @@ -8,35 +8,42 @@ from dspy.adapters.types.base_type import BaseType from PIL import Image as PILImage PIL_AVAILABLE = ... - class Image(BaseType): url: str model_config = ... - def format(self) -> Union[list[dict[str, Any]], str]: ... + def format(self) -> Union[list[dict[str, Any]], str]: + ... + @pydantic.model_validator(mode="before") @classmethod - def validate_input(cls, values): # -> dict[str, str] | dict[Any, Any] | dict[str, Any]: + def validate_input(cls, values): # -> dict[str, str] | dict[Any, Any] | dict[str, Any]: ... + @classmethod - def from_url(cls, url: str, download: bool = ...): # -> Self: + def from_url(cls, url: str, download: bool = ...): # -> Self: ... + @classmethod - def from_file(cls, file_path: str): # -> Self: + def from_file(cls, file_path: str): # -> Self: ... + @classmethod - def from_PIL(cls, pil_image): # -> Self: + def from_PIL(cls, pil_image): # -> Self: + ... + + def __str__(self) -> str: ... - def __str__(self) -> str: ... - def __repr__(self): # -> str: + + def __repr__(self): # -> str: ... + + def is_url(string: str) -> bool: """Check if a string is a valid URL.""" ... -def encode_image( - image: Union[str, bytes, PILImage.Image, dict], download_images: bool = ... -) -> str: +def encode_image(image: Union[str, bytes, PILImage.Image, dict], download_images: bool = ...) -> str: """ Encode an image or file to a base64 data URI. @@ -55,3 +62,4 @@ def encode_image( def is_image(obj) -> bool: """Check if the object is an image or a valid media file reference.""" ... + diff --git a/typings/dspy/adapters/types/tool.pyi b/typings/dspy/adapters/types/tool.pyi index a80297b..18ab60c 100644 --- a/typings/dspy/adapters/types/tool.pyi +++ b/typings/dspy/adapters/types/tool.pyi @@ -9,16 +9,15 @@ from dspy.adapters.types.base_type import BaseType from dspy.utils.callback import with_callbacks from langchain.tools import BaseTool -if TYPE_CHECKING: ... +if TYPE_CHECKING: + ... _TYPE_MAPPING = ... - class Tool(BaseType): """Tool class. This class is used to simplify the creation of tools for tool calling (function calling) in LLMs. Only supports functions for now. """ - func: Callable name: Optional[str] = ... desc: Optional[str] = ... @@ -26,15 +25,7 @@ class Tool(BaseType): arg_types: Optional[dict[str, Any]] = ... arg_desc: Optional[dict[str, str]] = ... has_kwargs: bool = ... - def __init__( - self, - func: Callable, - name: Optional[str] = ..., - desc: Optional[str] = ..., - args: Optional[dict[str, Any]] = ..., - arg_types: Optional[dict[str, Any]] = ..., - arg_desc: Optional[dict[str, str]] = ..., - ) -> None: + def __init__(self, func: Callable, name: Optional[str] = ..., desc: Optional[str] = ..., args: Optional[dict[str, Any]] = ..., arg_types: Optional[dict[str, Any]] = ..., arg_desc: Optional[dict[str, str]] = ...) -> None: """Initialize the Tool class. Users can choose to specify the `name`, `desc`, `args`, and `arg_types`, or let the `dspy.Tool` @@ -64,18 +55,21 @@ class Tool(BaseType): ``` """ ... - - def format(self): # -> str: + + def format(self): # -> str: ... - def format_as_litellm_function_call( - self, - ): # -> dict[str, str | dict[str, str | dict[str, str | dict[str, Any] | list[str] | None] | None]]: + + def format_as_litellm_function_call(self): # -> dict[str, str | dict[str, str | dict[str, str | dict[str, Any] | list[str] | None] | None]]: ... + @with_callbacks - def __call__(self, **kwargs): ... + def __call__(self, **kwargs): # -> Any: + ... + @with_callbacks - async def acall(self, **kwargs): # -> Any: + async def acall(self, **kwargs): # -> Any: ... + @classmethod def from_mcp_tool(cls, session: mcp.client.session.ClientSession, tool: mcp.types.Tool) -> Tool: """ @@ -89,7 +83,7 @@ class Tool(BaseType): A Tool object. """ ... - + @classmethod def from_langchain(cls, tool: BaseTool) -> Tool: """ @@ -102,32 +96,43 @@ class Tool(BaseType): A Tool object. Example: + ```python - from langchain.tools import tool + import asyncio import dspy + from langchain.tools import tool as lc_tool - @tool + @lc_tool def add(x: int, y: int): "Add two numbers together." return x + y - tool = dspy.Tool.from_langchain(add) - print(await tool.acall(x=1, y=2)) + dspy_tool = dspy.Tool.from_langchain(add) + + async def run_tool(): + return await dspy_tool.acall(x=1, y=2) + + print(asyncio.run(run_tool())) # 3 ``` """ ... - - def __repr__(self): # -> str: + + def __repr__(self): # -> str: + ... + + def __str__(self) -> str: ... - def __str__(self) -> str: ... + + class ToolCalls(BaseType): class ToolCall(BaseModel): name: str args: dict[str, Any] ... - + + tool_calls: list[ToolCall] @classmethod def from_dict_list(cls, tool_calls_dicts: list[dict[str, Any]]) -> ToolCalls: @@ -150,13 +155,14 @@ class ToolCalls(BaseType): ``` """ ... - + @classmethod - def description(cls) -> str: ... + def description(cls) -> str: + ... + -def convert_input_schema_to_tool_args( - schema: dict[str, Any], -) -> Tuple[dict[str, Any], dict[str, Type], dict[str, str]]: + +def convert_input_schema_to_tool_args(schema: dict[str, Any]) -> Tuple[dict[str, Any], dict[str, Type], dict[str, str]]: """Convert an input json schema to tool arguments compatible with DSPy Tool. Args: @@ -166,3 +172,4 @@ def convert_input_schema_to_tool_args( A tuple of (args, arg_types, arg_desc) for DSPy Tool definition. """ ... + diff --git a/typings/dspy/adapters/utils.pyi b/typings/dspy/adapters/utils.pyi index 4d6638e..36b9a99 100644 --- a/typings/dspy/adapters/utils.pyi +++ b/typings/dspy/adapters/utils.pyi @@ -29,8 +29,9 @@ def format_field_value(field_info: FieldInfo, value: Any, assume_text=...) -> Un """ ... -def translate_field_type(field_name, field_info): # -> str: +def translate_field_type(field_name, field_info): # -> str: ... + def find_enum_member(enum, identifier): """ Finds the enum member corresponding to the specified identifier, which may be the @@ -45,8 +46,12 @@ def find_enum_member(enum, identifier): """ ... -def parse_value(value, annotation): # -> str | EnumMeta: +def parse_value(value, annotation): # -> str | EnumMeta: ... -def get_annotation_name(annotation): # -> str: + +def get_annotation_name(annotation): # -> str: ... -def get_field_description_string(fields: dict) -> str: ... + +def get_field_description_string(fields: dict) -> str: + ... + diff --git a/typings/dspy/adapters/xml_adapter.pyi b/typings/dspy/adapters/xml_adapter.pyi new file mode 100644 index 0000000..da4fb63 --- /dev/null +++ b/typings/dspy/adapters/xml_adapter.pyi @@ -0,0 +1,24 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import Any, Dict, Optional, Type +from dspy.adapters.chat_adapter import ChatAdapter, FieldInfoWithName +from dspy.signatures.signature import Signature +from dspy.utils.callback import BaseCallback + +class XMLAdapter(ChatAdapter): + def __init__(self, callbacks: Optional[list[BaseCallback]] = ...) -> None: + ... + + def format_field_with_value(self, fields_with_values: Dict[FieldInfoWithName, Any]) -> str: + ... + + def user_message_output_requirements(self, signature: Type[Signature]) -> str: + ... + + def parse(self, signature: Type[Signature], completion: str) -> dict[str, Any]: + ... + + + diff --git a/typings/dspy/clients/__init__.pyi b/typings/dspy/clients/__init__.pyi index ec0340e..9ee8265 100644 --- a/typings/dspy/clients/__init__.pyi +++ b/typings/dspy/clients/__init__.pyi @@ -17,15 +17,7 @@ from dspy.clients.provider import Provider, TrainingJob logger = ... DISK_CACHE_DIR = ... DISK_CACHE_LIMIT = ... - -def configure_cache( - enable_disk_cache: Optional[bool] = ..., - enable_memory_cache: Optional[bool] = ..., - disk_cache_dir: Optional[str] = ..., - disk_size_limit_bytes: Optional[int] = ..., - memory_max_entries: Optional[int] = ..., - enable_litellm_cache: bool = ..., -): # -> None: +def configure_cache(enable_disk_cache: Optional[bool] = ..., enable_memory_cache: Optional[bool] = ..., disk_cache_dir: Optional[str] = ..., disk_size_limit_bytes: Optional[int] = ..., memory_max_entries: Optional[int] = ..., enable_litellm_cache: bool = ...): # -> None: """Configure the cache for DSPy. Args: @@ -39,21 +31,12 @@ def configure_cache( ... DSPY_CACHE = ... -if "LITELLM_LOCAL_MODEL_COST_MAP" not in os.environ: ... - -def enable_litellm_logging(): # -> None: +if "LITELLM_LOCAL_MODEL_COST_MAP" not in os.environ: ... -def disable_litellm_logging(): # -> None: +def enable_litellm_logging(): # -> None: + ... + +def disable_litellm_logging(): # -> None: ... -__all__ = [ - "BaseLM", - "LM", - "Provider", - "TrainingJob", - "inspect_history", - "Embedder", - "enable_litellm_logging", - "disable_litellm_logging", - "configure_cache", -] +__all__ = ["BaseLM", "LM", "Provider", "TrainingJob", "inspect_history", "Embedder", "enable_litellm_logging", "disable_litellm_logging", "configure_cache"] diff --git a/typings/dspy/clients/base_lm.pyi b/typings/dspy/clients/base_lm.pyi index 1962a8e..6cfa32a 100644 --- a/typings/dspy/clients/base_lm.pyi +++ b/typings/dspy/clients/base_lm.pyi @@ -6,7 +6,6 @@ from dspy.utils.callback import with_callbacks MAX_HISTORY_SIZE = ... GLOBAL_HISTORY = ... - class BaseLM: """Base class for handling LLM calls. @@ -38,15 +37,17 @@ class BaseLM: print(dspy.Predict("q->a")(q="Why did the chicken cross the kitchen?")) ``` """ - def __init__( - self, model, model_type=..., temperature=..., max_tokens=..., cache=..., **kwargs - ) -> None: ... + def __init__(self, model, model_type=..., temperature=..., max_tokens=..., cache=..., **kwargs) -> None: + ... + @with_callbacks - def __call__(self, prompt=..., messages=..., **kwargs): # -> list[Any]: + def __call__(self, prompt=..., messages=..., **kwargs): # -> list[Any]: ... + @with_callbacks - async def acall(self, prompt=..., messages=..., **kwargs): # -> list[Any]: + async def acall(self, prompt=..., messages=..., **kwargs): # -> list[Any]: ... + def forward(self, prompt=..., messages=..., **kwargs): """Forward pass for the language model. @@ -54,7 +55,7 @@ class BaseLM: [OpenAI response format](https://platform.openai.com/docs/api-reference/responses/object). """ ... - + async def aforward(self, prompt=..., messages=..., **kwargs): """Async forward pass for the language model. @@ -62,16 +63,20 @@ class BaseLM: [OpenAI response format](https://platform.openai.com/docs/api-reference/responses/object). """ ... - - def copy(self, **kwargs): # -> Self: + + def copy(self, **kwargs): # -> Self: """Returns a copy of the language model with possibly updated parameters.""" ... - - def inspect_history(self, n: int = ...): # -> None: + + def inspect_history(self, n: int = ...): # -> None: ... - def update_global_history(self, entry): # -> None: + + def update_global_history(self, entry): # -> None: ... + + -def inspect_history(n: int = ...): # -> None: +def inspect_history(n: int = ...): # -> None: """The global history shared across all LMs.""" ... + diff --git a/typings/dspy/clients/cache.pyi b/typings/dspy/clients/cache.pyi index e600838..a6e4853 100644 --- a/typings/dspy/clients/cache.pyi +++ b/typings/dspy/clients/cache.pyi @@ -5,7 +5,6 @@ This type stub file was generated by pyright. from typing import Any, Dict, Optional logger = ... - class Cache: """DSPy Cache @@ -13,14 +12,7 @@ class Cache: 1. In-memory cache - implemented with cachetools.LRUCache 2. On-disk cache - implemented with diskcache.FanoutCache """ - def __init__( - self, - enable_disk_cache: bool, - enable_memory_cache: bool, - disk_cache_dir: str, - disk_size_limit_bytes: Optional[int] = ..., - memory_max_entries: Optional[int] = ..., - ) -> None: + def __init__(self, enable_disk_cache: bool, enable_memory_cache: bool, disk_cache_dir: str, disk_size_limit_bytes: Optional[int] = ..., memory_max_entries: Optional[int] = ...) -> None: """ Args: enable_disk_cache: Whether to enable on-disk cache. @@ -30,42 +22,37 @@ class Cache: memory_max_entries: The maximum size of the in-memory cache (in number of items). """ ... - + def __contains__(self, key: str) -> bool: """Check if a key is in the cache.""" ... - - def cache_key( - self, request: Dict[str, Any], ignored_args_for_cache_key: Optional[list[str]] = ... - ) -> str: + + def cache_key(self, request: Dict[str, Any], ignored_args_for_cache_key: Optional[list[str]] = ...) -> str: """ Obtain a unique cache key for the given request dictionary by hashing its JSON representation. For request fields having types that are known to be JSON-incompatible, convert them to a JSON-serializable format before hashing. """ ... + + def get(self, request: Dict[str, Any], ignored_args_for_cache_key: Optional[list[str]] = ...) -> Any: + ... + + def put(self, request: Dict[str, Any], value: Any, ignored_args_for_cache_key: Optional[list[str]] = ..., enable_memory_cache: bool = ...) -> None: + ... + + def reset_memory_cache(self) -> None: + ... + + def save_memory_cache(self, filepath: str) -> None: + ... + + def load_memory_cache(self, filepath: str) -> None: + ... + - def get( - self, request: Dict[str, Any], ignored_args_for_cache_key: Optional[list[str]] = ... - ) -> Any: ... - def put( - self, - request: Dict[str, Any], - value: Any, - ignored_args_for_cache_key: Optional[list[str]] = ..., - enable_memory_cache: bool = ..., - ) -> None: ... - def reset_memory_cache(self) -> None: ... - def save_memory_cache(self, filepath: str) -> None: ... - def load_memory_cache(self, filepath: str) -> None: ... -def request_cache( - cache_arg_name: Optional[str] = ..., - ignored_args_for_cache_key: Optional[list[str]] = ..., - enable_memory_cache: bool = ..., - *, - maxsize: Optional[int] = ..., -): # -> Callable[..., _Wrapped[..., Any, ..., CoroutineType[Any, Any, Any]] | _Wrapped[..., Any, ..., Any]]: +def request_cache(cache_arg_name: Optional[str] = ..., ignored_args_for_cache_key: Optional[list[str]] = ..., enable_memory_cache: bool = ..., *, maxsize: Optional[int] = ...): # -> Callable[..., _Wrapped[..., Any, ..., CoroutineType[Any, Any, Any]] | _Wrapped[..., Any, ..., Any]]: """ Decorator for applying caching to a function based on the request argument. @@ -77,3 +64,4 @@ def request_cache( written to on new data. """ ... + diff --git a/typings/dspy/clients/databricks.pyi b/typings/dspy/clients/databricks.pyi index 1013482..264de31 100644 --- a/typings/dspy/clients/databricks.pyi +++ b/typings/dspy/clients/databricks.pyi @@ -6,40 +6,36 @@ from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union from dspy.clients.provider import Provider, TrainingJob from dspy.clients.utils_finetune import TrainDataFormat -if TYPE_CHECKING: ... +if TYPE_CHECKING: + ... logger = ... - class TrainingJobDatabricks(TrainingJob): - def __init__(self, finetuning_run=..., *args, **kwargs) -> None: ... - def status(self): # -> None: + def __init__(self, finetuning_run=..., *args, **kwargs) -> None: + ... + + def status(self): # -> None: ... + + class DatabricksProvider(Provider): finetunable = ... TrainingJob = TrainingJobDatabricks @staticmethod - def is_provider_model(model: str) -> bool: ... + def is_provider_model(model: str) -> bool: + ... + @staticmethod - def deploy_finetuned_model( - model: str, - data_format: Optional[TrainDataFormat] = ..., - databricks_host: Optional[str] = ..., - databricks_token: Optional[str] = ..., - deploy_timeout: int = ..., - ): # -> None: + def deploy_finetuned_model(model: str, data_format: Optional[TrainDataFormat] = ..., databricks_host: Optional[str] = ..., databricks_token: Optional[str] = ..., deploy_timeout: int = ...): # -> None: ... + @staticmethod - def finetune( - job: TrainingJobDatabricks, - model: str, - train_data: List[Dict[str, Any]], - train_data_format: Optional[Union[TrainDataFormat, str]] = ..., - train_kwargs: Optional[Dict[str, Any]] = ..., - ) -> str: ... + def finetune(job: TrainingJobDatabricks, model: str, train_data: List[Dict[str, Any]], train_data_format: Optional[Union[TrainDataFormat, str]] = ..., train_kwargs: Optional[Dict[str, Any]] = ...) -> str: + ... + @staticmethod - def upload_data( - train_data: List[Dict[str, Any]], - databricks_unity_catalog_path: str, - data_format: TrainDataFormat, - ): # -> str: + def upload_data(train_data: List[Dict[str, Any]], databricks_unity_catalog_path: str, data_format: TrainDataFormat): # -> str: ... + + + diff --git a/typings/dspy/clients/embedding.pyi b/typings/dspy/clients/embedding.pyi index c56d7b9..29162a0 100644 --- a/typings/dspy/clients/embedding.pyi +++ b/typings/dspy/clients/embedding.pyi @@ -2,6 +2,9 @@ This type stub file was generated by pyright. """ +import numpy as np +from typing import Any, Callable, Optional, Union + class Embedder: """DSPy embedding class. @@ -71,10 +74,10 @@ class Embedder: assert embeddings.shape == (2, 10) ``` """ - def __init__(self, model, batch_size=..., caching=..., **kwargs) -> None: ... - def __call__( - self, inputs, batch_size=..., caching=..., **kwargs - ): # -> Any | NDArray[floating[_32Bit]]: + def __init__(self, model: Union[str, Callable], batch_size: int = ..., caching: bool = ..., **kwargs: dict[str, Any]) -> None: + ... + + def __call__(self, inputs: Union[str, list[str]], batch_size: Optional[int] = ..., caching: Optional[bool] = ..., **kwargs: dict[str, Any]) -> np.ndarray: """Compute embeddings for the given inputs. Args: @@ -83,7 +86,7 @@ class Embedder: during initialization. caching (bool, optional): Whether to cache the embedding response when using a hosted model. If None, defaults to the caching setting from initialization. - **kwargs: Additional keyword arguments to pass to the embedding model. These will override the default + kwargs: Additional keyword arguments to pass to the embedding model. These will override the default kwargs provided during initialization. Returns: @@ -91,8 +94,9 @@ class Embedder: If the input is a list of strings, returns a 2D numpy array of embeddings, one embedding per row. """ ... - - async def acall( - self, inputs, batch_size=..., caching=..., **kwargs - ): # -> Any | NDArray[floating[_32Bit]]: + + async def acall(self, inputs, batch_size=..., caching=..., **kwargs): # -> Any | NDArray[floating[_32Bit]]: ... + + + diff --git a/typings/dspy/clients/lm.pyi b/typings/dspy/clients/lm.pyi index 566be67..ff26701 100644 --- a/typings/dspy/clients/lm.pyi +++ b/typings/dspy/clients/lm.pyi @@ -9,27 +9,11 @@ from dspy.utils.callback import BaseCallback from .base_lm import BaseLM logger = ... - class LM(BaseLM): """ A language model supporting chat or text completion requests for use with DSPy modules. """ - def __init__( - self, - model: str, - model_type: Literal["chat", "text"] = ..., - temperature: float = ..., - max_tokens: int = ..., - cache: bool = ..., - cache_in_memory: bool = ..., - callbacks: Optional[List[BaseCallback]] = ..., - num_retries: int = ..., - provider=..., - finetuning_model: Optional[str] = ..., - launch_kwargs: Optional[dict[str, Any]] = ..., - train_kwargs: Optional[dict[str, Any]] = ..., - **kwargs, - ) -> None: + def __init__(self, model: str, model_type: Literal["chat", "text"] = ..., temperature: float = ..., max_tokens: int = ..., cache: bool = ..., cache_in_memory: bool = ..., callbacks: Optional[List[BaseCallback]] = ..., num_retries: int = ..., provider: Optional[Provider] = ..., finetuning_model: Optional[str] = ..., launch_kwargs: Optional[dict[str, Any]] = ..., train_kwargs: Optional[dict[str, Any]] = ..., **kwargs) -> None: """ Create a new language model instance for use with DSPy modules and programs. @@ -51,39 +35,42 @@ class LM(BaseLM): from the models available for inference. """ ... - - def forward(self, prompt=..., messages=..., **kwargs): # -> Any | CoroutineType[Any, Any, Any]: + + def forward(self, prompt=..., messages=..., **kwargs): # -> Any | CoroutineType[Any, Any, Any]: + ... + + async def aforward(self, prompt=..., messages=..., **kwargs): # -> Any: + ... + + def launch(self, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + ... + + def kill(self, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: ... - async def aforward(self, prompt=..., messages=..., **kwargs): # -> Any: + + def finetune(self, train_data: List[Dict[str, Any]], train_data_format: Optional[TrainDataFormat], train_kwargs: Optional[Dict[str, Any]] = ...) -> TrainingJob: ... - def launch(self, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + + def reinforce(self, train_kwargs) -> ReinforceJob: ... - def kill(self, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + + def infer_provider(self) -> Provider: ... - def finetune( - self, - train_data: List[Dict[str, Any]], - train_data_format: Optional[TrainDataFormat], - train_kwargs: Optional[Dict[str, Any]] = ..., - ) -> TrainingJob: ... - def reinforce(self, train_kwargs) -> ReinforceJob: ... - def infer_provider(self) -> Provider: ... - def dump_state(self): # -> dict[str, Any]: + + def dump_state(self): # -> dict[str, Any]: ... + -def litellm_completion( - request: Dict[str, Any], num_retries: int, cache: Optional[Dict[str, Any]] = ... -): # -> ModelResponse | CustomStreamWrapper | TextCompletionResponse | CoroutineType[Any, Any, ModelResponse | TextCompletionResponse | None] | None: + +def litellm_completion(request: Dict[str, Any], num_retries: int, cache: Optional[Dict[str, Any]] = ...): # -> ModelResponse | CustomStreamWrapper | TextCompletionResponse | CoroutineType[Any, Any, ModelResponse | TextCompletionResponse | None] | None: ... -def litellm_text_completion( - request: Dict[str, Any], num_retries: int, cache: Optional[Dict[str, Any]] = ... -): # -> TextCompletionResponse | ModelResponse | CustomStreamWrapper | TextCompletionStreamWrapper | | : + +def litellm_text_completion(request: Dict[str, Any], num_retries: int, cache: Optional[Dict[str, Any]] = ...): # -> TextCompletionResponse | ModelResponse | CustomStreamWrapper | TextCompletionStreamWrapper | | : ... -async def alitellm_completion( - request: Dict[str, Any], num_retries: int, cache: Optional[Dict[str, Any]] = ... -): # -> ModelResponse | CustomStreamWrapper | TextCompletionResponse | None: + +async def alitellm_completion(request: Dict[str, Any], num_retries: int, cache: Optional[Dict[str, Any]] = ...): # -> ModelResponse | CustomStreamWrapper | TextCompletionResponse | None: ... -async def alitellm_text_completion( - request: Dict[str, Any], num_retries: int, cache: Optional[Dict[str, Any]] = ... -): # -> TextCompletionResponse | TextCompletionStreamWrapper: + +async def alitellm_text_completion(request: Dict[str, Any], num_retries: int, cache: Optional[Dict[str, Any]] = ...): # -> TextCompletionResponse | TextCompletionStreamWrapper: ... + diff --git a/typings/dspy/clients/lm_local.pyi b/typings/dspy/clients/lm_local.pyi index 298fd4e..31f3cee 100644 --- a/typings/dspy/clients/lm_local.pyi +++ b/typings/dspy/clients/lm_local.pyi @@ -7,28 +7,33 @@ from dspy.clients.provider import Provider, TrainingJob from dspy.clients.utils_finetune import TrainDataFormat from dspy.clients.lm import LM -if TYPE_CHECKING: ... +if TYPE_CHECKING: + ... logger = ... - class LocalProvider(Provider): - def __init__(self) -> None: ... + def __init__(self) -> None: + ... + @staticmethod - def launch(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + def launch(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: ... + @staticmethod - def kill(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + def kill(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: ... + @staticmethod - def finetune( - job: TrainingJob, - model: str, - train_data: List[Dict[str, Any]], - train_data_format: Optional[TrainDataFormat], - train_kwargs: Optional[Dict[str, Any]] = ..., - ) -> str: ... - -def create_output_dir(model_name, data_path): ... -def train_sft_locally(model_name, train_data, train_kwargs): ... + def finetune(job: TrainingJob, model: str, train_data: List[Dict[str, Any]], train_data_format: Optional[TrainDataFormat], train_kwargs: Optional[Dict[str, Any]] = ...) -> str: + ... + + + +def create_output_dir(model_name, data_path): + ... + +def train_sft_locally(model_name, train_data, train_kwargs): + ... + def get_free_port() -> int: """ Return a free TCP port on localhost. @@ -45,7 +50,7 @@ def wait_for_server(base_url: str, timeout: Optional[int] = ...) -> None: """ ... -def encode_sft_example(example, tokenizer, max_seq_length): # -> dict[str, Any]: +def encode_sft_example(example, tokenizer, max_seq_length): # -> dict[str, Any]: """ This function encodes a single example into a format that can be used for sft training. Here, we assume each example has a 'messages' field. Each message in it is a dict with 'role' and 'content' fields. @@ -54,3 +59,4 @@ def encode_sft_example(example, tokenizer, max_seq_length): # -> dict[str, Any] Code obtained from the allenai/open-instruct repository: https://github.com/allenai/open-instruct/blob/4365dea3d1a6111e8b2712af06b22a4512a0df88/open_instruct/finetune.py """ ... + diff --git a/typings/dspy/clients/lm_local_arbor.pyi b/typings/dspy/clients/lm_local_arbor.pyi index 4f389d9..2cd53e0 100644 --- a/typings/dspy/clients/lm_local_arbor.pyi +++ b/typings/dspy/clients/lm_local_arbor.pyi @@ -7,71 +7,97 @@ from dspy.clients.provider import Provider, ReinforceJob, TrainingJob from dspy.clients.utils_finetune import GRPOGroup, TrainDataFormat, TrainingStatus from dspy.clients.lm import LM -if TYPE_CHECKING: ... - +if TYPE_CHECKING: + ... class GRPOTrainKwargs(TypedDict): num_generations: int ... + class ArborTrainingJob(TrainingJob): - def __init__(self, *args, **kwargs) -> None: ... - def cancel(self): # -> None: + def __init__(self, *args, **kwargs) -> None: + ... + + def cancel(self): # -> None: + ... + + def status(self) -> TrainingStatus: ... - def status(self) -> TrainingStatus: ... + + class ArborReinforceJob(ReinforceJob): DEFAULT_TRAIN_KWARGS = ... - def __init__(self, lm: LM, train_kwargs: GRPOTrainKwargs) -> None: ... - def initialize(self): # -> None: + def __init__(self, lm: LM, train_kwargs: GRPOTrainKwargs) -> None: + ... + + def initialize(self): # -> None: ... - def step( - self, train_data: List[GRPOGroup], train_data_format: Optional[Union[TrainDataFormat, str]] - ): # -> None: + + def step(self, train_data: List[GRPOGroup], train_data_format: Optional[Union[TrainDataFormat, str]]): # -> None: ... - def update_model(self): # -> None: + + def save_checkpoint(self, checkpoint_name: str, score: Optional[float] = ...): # -> None: ... - def save_checkpoint(self, checkpoint_name: str, score: Optional[float] = ...): # -> None: + + def terminate(self): # -> None: ... - def terminate(self): # -> None: + + def cancel(self): # -> None: ... - def cancel(self): # -> None: + + def status(self) -> TrainingStatus: ... - def status(self) -> TrainingStatus: ... + + class ArborProvider(Provider): - def __init__(self) -> None: ... + def __init__(self) -> None: + ... + @staticmethod - def launch(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + def launch(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: ... + @staticmethod - def kill(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + def kill(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: ... + @staticmethod - def finetune( - job: ArborTrainingJob, - model: str, - train_data: List[Dict[str, Any]], - train_data_format: Optional[TrainDataFormat], - train_kwargs: Optional[Dict[str, Any]] = ..., - ) -> str: ... + def finetune(job: ArborTrainingJob, model: str, train_data: List[Dict[str, Any]], train_data_format: Optional[TrainDataFormat], train_kwargs: Optional[Dict[str, Any]] = ...) -> str: + ... + @staticmethod - def does_job_exist(job_id: str, training_kwargs: Dict[str, Any]) -> bool: ... + def does_job_exist(job_id: str, training_kwargs: Dict[str, Any]) -> bool: + ... + @staticmethod - def does_file_exist(file_id: str, training_kwargs: Dict[str, Any]) -> bool: ... + def does_file_exist(file_id: str, training_kwargs: Dict[str, Any]) -> bool: + ... + @staticmethod - def is_terminal_training_status(status: TrainingStatus) -> bool: ... + def is_terminal_training_status(status: TrainingStatus) -> bool: + ... + @staticmethod - def get_training_status(job_id: str, training_kwargs: Dict[str, Any]) -> TrainingStatus: ... + def get_training_status(job_id: str, training_kwargs: Dict[str, Any]) -> TrainingStatus: + ... + @staticmethod - def validate_data_format(data_format: TrainDataFormat): # -> None: + def validate_data_format(data_format: TrainDataFormat): # -> None: ... + @staticmethod - def upload_data(data_path: str, training_kwargs: Dict[str, Any]) -> str: ... + def upload_data(data_path: str, training_kwargs: Dict[str, Any]) -> str: + ... + @staticmethod - def wait_for_job( - job: TrainingJob, training_kwargs: Dict[str, Any], poll_frequency: int = ... - ): # -> None: + def wait_for_job(job: TrainingJob, training_kwargs: Dict[str, Any], poll_frequency: int = ...): # -> None: ... + @staticmethod - def get_trained_model(job, training_kwargs: Dict[str, Any]): # -> str | None: + def get_trained_model(job, training_kwargs: Dict[str, Any]): # -> str | None: ... + + + diff --git a/typings/dspy/clients/openai.pyi b/typings/dspy/clients/openai.pyi index 207364c..d4a7b5d 100644 --- a/typings/dspy/clients/openai.pyi +++ b/typings/dspy/clients/openai.pyi @@ -7,41 +7,61 @@ from dspy.clients.provider import Provider, TrainingJob from dspy.clients.utils_finetune import TrainDataFormat, TrainingStatus _OPENAI_MODELS = ... - class TrainingJobOpenAI(TrainingJob): - def __init__(self, *args, **kwargs) -> None: ... - def cancel(self): # -> None: + def __init__(self, *args, **kwargs) -> None: + ... + + def cancel(self): # -> None: ... - def status(self) -> TrainingStatus: ... + + def status(self) -> TrainingStatus: + ... + + class OpenAIProvider(Provider): - def __init__(self) -> None: ... + def __init__(self) -> None: + ... + @staticmethod - def is_provider_model(model: str) -> bool: ... + def is_provider_model(model: str) -> bool: + ... + @staticmethod - def finetune( - job: TrainingJobOpenAI, - model: str, - train_data: List[Dict[str, Any]], - train_data_format: Optional[TrainDataFormat], - train_kwargs: Optional[Dict[str, Any]] = ..., - ) -> str: ... + def finetune(job: TrainingJobOpenAI, model: str, train_data: List[Dict[str, Any]], train_data_format: Optional[TrainDataFormat], train_kwargs: Optional[Dict[str, Any]] = ...) -> str: + ... + @staticmethod - def does_job_exist(job_id: str) -> bool: ... + def does_job_exist(job_id: str) -> bool: + ... + @staticmethod - def does_file_exist(file_id: str) -> bool: ... + def does_file_exist(file_id: str) -> bool: + ... + @staticmethod - def is_terminal_training_status(status: TrainingStatus) -> bool: ... + def is_terminal_training_status(status: TrainingStatus) -> bool: + ... + @staticmethod - def get_training_status(job_id: str) -> TrainingStatus: ... + def get_training_status(job_id: str) -> TrainingStatus: + ... + @staticmethod - def validate_data_format(data_format: TrainDataFormat): # -> None: + def validate_data_format(data_format: TrainDataFormat): # -> None: ... + @staticmethod - def upload_data(data_path: str) -> str: ... + def upload_data(data_path: str) -> str: + ... + @staticmethod - def wait_for_job(job: TrainingJobOpenAI, poll_frequency: int = ...): # -> None: + def wait_for_job(job: TrainingJobOpenAI, poll_frequency: int = ...): # -> None: ... + @staticmethod - def get_trained_model(job): # -> str | None: + def get_trained_model(job): # -> str | None: ... + + + diff --git a/typings/dspy/clients/provider.pyi b/typings/dspy/clients/provider.pyi index 68a0f8f..01818de 100644 --- a/typings/dspy/clients/provider.pyi +++ b/typings/dspy/clients/provider.pyi @@ -9,56 +9,72 @@ from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union from dspy.clients.utils_finetune import TrainDataFormat from dspy.clients.lm import LM -if TYPE_CHECKING: ... - +if TYPE_CHECKING: + ... class TrainingJob(Future): - def __init__( - self, - thread: Optional[Thread] = ..., - model: Optional[str] = ..., - train_data: Optional[List[Dict[str, Any]]] = ..., - train_data_format: Optional[TrainDataFormat] = ..., - train_kwargs: Optional[Dict[str, Any]] = ..., - ) -> None: ... - def cancel(self): # -> None: + def __init__(self, thread: Optional[Thread] = ..., model: Optional[str] = ..., train_data: Optional[List[Dict[str, Any]]] = ..., train_data_format: Optional[TrainDataFormat] = ..., train_kwargs: Optional[Dict[str, Any]] = ...) -> None: + ... + + def cancel(self): # -> None: ... + @abstractmethod - def status(self): ... + def status(self): + ... + + class ReinforceJob: - def __init__(self, lm: LM, train_kwargs: Optional[Dict[str, Any]] = ...) -> None: ... + def __init__(self, lm: LM, train_kwargs: Optional[Dict[str, Any]] = ...) -> None: + ... + @abstractmethod - def initialize(self): ... + def initialize(self): + ... + @abstractmethod - def step( - self, - train_data: List[Dict[str, Any]], - train_data_format: Optional[Union[TrainDataFormat, str]] = ..., - ): ... + def step(self, train_data: List[Dict[str, Any]], train_data_format: Optional[Union[TrainDataFormat, str]] = ...): + ... + @abstractmethod - def terminate(self): ... + def terminate(self): + ... + @abstractmethod - def update_model(self): ... + def update_model(self): + ... + @abstractmethod - def save_checkpoint(self, checkpoint_name: str): ... - def cancel(self): ... - def status(self): ... + def save_checkpoint(self, checkpoint_name: str): + ... + + def cancel(self): + ... + + def status(self): + ... + + class Provider: - def __init__(self) -> None: ... + def __init__(self) -> None: + ... + @staticmethod - def is_provider_model(model: str) -> bool: ... + def is_provider_model(model: str) -> bool: + ... + @staticmethod - def launch(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + def launch(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: ... + @staticmethod - def kill(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: + def kill(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: ... + @staticmethod - def finetune( - job: TrainingJob, - model: str, - train_data: List[Dict[str, Any]], - train_data_format: Optional[Union[TrainDataFormat, str]], - train_kwargs: Optional[Dict[str, Any]] = ..., - ) -> str: ... + def finetune(job: TrainingJob, model: str, train_data: List[Dict[str, Any]], train_data_format: Optional[Union[TrainDataFormat, str]], train_kwargs: Optional[Dict[str, Any]] = ...) -> str: + ... + + + diff --git a/typings/dspy/clients/utils_finetune.pyi b/typings/dspy/clients/utils_finetune.pyi index f747703..1cfcccd 100644 --- a/typings/dspy/clients/utils_finetune.pyi +++ b/typings/dspy/clients/utils_finetune.pyi @@ -14,36 +14,54 @@ class TrainingStatus(str, Enum): failed = ... cancelled = ... + class TrainDataFormat(str, Enum): CHAT = ... COMPLETION = ... GRPO_CHAT = ... + class Message(TypedDict): role: Union[Literal["user"], Literal["assistant"], Literal["system"]] content: str ... + class MessageAssistant(TypedDict): role: Literal["assistant"] content: str ... + class GRPOChatData(TypedDict): messages: List[Message] completion: MessageAssistant reward: float ... + GRPOGroup = List[GRPOChatData] +def infer_data_format(adapter: Adapter) -> str: + ... + +def get_finetune_directory() -> str: + ... + +def write_lines(file_path, data): # -> None: + ... -def infer_data_format(adapter: Adapter) -> str: ... -def get_finetune_directory() -> str: ... -def write_lines(file_path, data): # -> None: +def save_data(data: List[Dict[str, Any]]) -> str: ... -def save_data(data: List[Dict[str, Any]]) -> str: ... -def validate_data_format(data: List[Dict[str, Any]], data_format: TrainDataFormat): # -> None: + +def validate_data_format(data: List[Dict[str, Any]], data_format: TrainDataFormat): # -> None: ... -def find_data_errors_completion(data_dict: Dict[str, str]) -> Optional[str]: ... -def find_data_error_chat(messages: Dict[str, Any]) -> Optional[str]: ... -def find_data_error_chat_message(message: Dict[str, Any]) -> Optional[str]: ... + +def find_data_errors_completion(data_dict: Dict[str, str]) -> Optional[str]: + ... + +def find_data_error_chat(messages: Dict[str, Any]) -> Optional[str]: + ... + +def find_data_error_chat_message(message: Dict[str, Any]) -> Optional[str]: + ... + diff --git a/typings/dspy/datasets/alfworld/__init__.pyi b/typings/dspy/datasets/alfworld/__init__.pyi index 9fe206d..406446d 100644 --- a/typings/dspy/datasets/alfworld/__init__.pyi +++ b/typings/dspy/datasets/alfworld/__init__.pyi @@ -3,3 +3,4 @@ This type stub file was generated by pyright. """ from dspy.datasets.alfworld.alfworld import AlfWorld + diff --git a/typings/dspy/datasets/alfworld/alfworld.pyi b/typings/dspy/datasets/alfworld/alfworld.pyi index 5b956b9..31b429f 100644 --- a/typings/dspy/datasets/alfworld/alfworld.pyi +++ b/typings/dspy/datasets/alfworld/alfworld.pyi @@ -2,7 +2,7 @@ This type stub file was generated by pyright. """ -def env_worker(inq, outq): # -> None: +def env_worker(inq, outq): # -> None: """ Worker process: creates a single AlfredTWEnv instance, handles 'init' (with task idx) and 'step' (with action). @@ -18,29 +18,47 @@ class EnvPool: obs, rew, done, info = sess.step("go north") ... """ - def __init__(self, size=...) -> None: ... - def close_all(self): # -> None: + def __init__(self, size=...) -> None: + ... + + def close_all(self): # -> None: """Close all processes in the pool.""" ... - - def session(self): # -> _EnvSession: + + def session(self): # -> _EnvSession: """Context manager that acquires/releases a single worker.""" ... + + class _EnvSession: """ A context manager that acquires a worker from the pool, provides .init(idx) and .step(action), then releases the worker. """ - def __init__(self, pool: EnvPool) -> None: ... - def __enter__(self): # -> Self: + def __init__(self, pool: EnvPool) -> None: + ... + + def __enter__(self): # -> Self: + ... + + def __exit__(self, exc_type, exc_val, exc_tb): # -> None: ... - def __exit__(self, exc_type, exc_val, exc_tb): # -> None: + + def init(self, idx): ... - def init(self, idx): ... - def step(self, action): ... + + def step(self, action): + ... + + class AlfWorld: - def __init__(self, max_threads=...) -> None: ... - def __del__(self): # -> None: + def __init__(self, max_threads=...) -> None: + ... + + def __del__(self): # -> None: ... + + + diff --git a/typings/dspy/datasets/colors.pyi b/typings/dspy/datasets/colors.pyi index 04e07f3..7fa2279 100644 --- a/typings/dspy/datasets/colors.pyi +++ b/typings/dspy/datasets/colors.pyi @@ -5,8 +5,12 @@ This type stub file was generated by pyright. from dspy.datasets.dataset import Dataset all_colors = ... - class Colors(Dataset): - def __init__(self, sort_by_suffix=..., *args, **kwargs) -> None: ... - def sorted_by_suffix(self, colors): # -> list[Any]: + def __init__(self, sort_by_suffix=..., *args, **kwargs) -> None: + ... + + def sorted_by_suffix(self, colors): # -> list[Any]: ... + + + diff --git a/typings/dspy/datasets/dataloader.pyi b/typings/dspy/datasets/dataloader.pyi index 0585a39..b323967 100644 --- a/typings/dspy/datasets/dataloader.pyi +++ b/typings/dspy/datasets/dataloader.pyi @@ -8,40 +8,35 @@ from collections.abc import Mapping from typing import List, Optional, TYPE_CHECKING, Tuple, Union from dspy.datasets.dataset import Dataset -if TYPE_CHECKING: ... - +if TYPE_CHECKING: + ... class DataLoader(Dataset): - def __init__(self) -> None: ... - def from_huggingface( - self, - dataset_name: str, - *args, - input_keys: Tuple[str] = ..., - fields: Optional[Tuple[str]] = ..., - **kwargs, - ) -> Union[Mapping[str, List[dspy.Example]], List[dspy.Example]]: ... - def from_csv( - self, file_path: str, fields: Optional[List[str]] = ..., input_keys: Tuple[str] = ... - ) -> List[dspy.Example]: ... - def from_pandas( - self, df: pd.DataFrame, fields: Optional[List[str]] = ..., input_keys: tuple[str] = ... - ) -> list[dspy.Example]: ... - def from_json( - self, file_path: str, fields: Optional[List[str]] = ..., input_keys: Tuple[str] = ... - ) -> List[dspy.Example]: ... - def from_parquet( - self, file_path: str, fields: Optional[List[str]] = ..., input_keys: Tuple[str] = ... - ) -> List[dspy.Example]: ... - def from_rm( - self, num_samples: int, fields: List[str], input_keys: List[str] - ) -> List[dspy.Example]: ... - def sample( - self, dataset: List[dspy.Example], n: int, *args, **kwargs - ) -> List[dspy.Example]: ... - def train_test_split( - self, - dataset: List[dspy.Example], - train_size: Union[int, float] = ..., - test_size: Optional[Union[int, float]] = ..., - random_state: Optional[int] = ..., - ) -> Mapping[str, List[dspy.Example]]: ... + def __init__(self) -> None: + ... + + def from_huggingface(self, dataset_name: str, *args, input_keys: Tuple[str] = ..., fields: Optional[Tuple[str]] = ..., **kwargs) -> Union[Mapping[str, List[dspy.Example]], List[dspy.Example]]: + ... + + def from_csv(self, file_path: str, fields: Optional[List[str]] = ..., input_keys: Tuple[str] = ...) -> List[dspy.Example]: + ... + + def from_pandas(self, df: pd.DataFrame, fields: Optional[List[str]] = ..., input_keys: tuple[str] = ...) -> list[dspy.Example]: + ... + + def from_json(self, file_path: str, fields: Optional[List[str]] = ..., input_keys: Tuple[str] = ...) -> List[dspy.Example]: + ... + + def from_parquet(self, file_path: str, fields: Optional[List[str]] = ..., input_keys: Tuple[str] = ...) -> List[dspy.Example]: + ... + + def from_rm(self, num_samples: int, fields: List[str], input_keys: List[str]) -> List[dspy.Example]: + ... + + def sample(self, dataset: List[dspy.Example], n: int, *args, **kwargs) -> List[dspy.Example]: + ... + + def train_test_split(self, dataset: List[dspy.Example], train_size: Union[int, float] = ..., test_size: Optional[Union[int, float]] = ..., random_state: Optional[int] = ...) -> Mapping[str, List[dspy.Example]]: + ... + + + diff --git a/typings/dspy/datasets/dataset.pyi b/typings/dspy/datasets/dataset.pyi index 6b337e8..6875cf8 100644 --- a/typings/dspy/datasets/dataset.pyi +++ b/typings/dspy/datasets/dataset.pyi @@ -3,36 +3,27 @@ This type stub file was generated by pyright. """ class Dataset: - def __init__( - self, - train_seed=..., - train_size=..., - eval_seed=..., - dev_size=..., - test_size=..., - input_keys=..., - ) -> None: ... - def reset_seeds( - self, train_seed=..., train_size=..., eval_seed=..., dev_size=..., test_size=... - ): # -> None: + def __init__(self, train_seed=..., train_size=..., eval_seed=..., dev_size=..., test_size=..., input_keys=...) -> None: ... + + def reset_seeds(self, train_seed=..., train_size=..., eval_seed=..., dev_size=..., test_size=...): # -> None: + ... + @property - def train(self): # -> list[Any]: + def train(self): # -> list[Any]: ... + @property - def dev(self): # -> list[Any]: + def dev(self): # -> list[Any]: ... + @property - def test(self): # -> list[Any]: + def test(self): # -> list[Any]: ... + @classmethod - def prepare_by_seed( - cls, - train_seeds=..., - train_size=..., - dev_size=..., - divide_eval_per_seed=..., - eval_seed=..., - **kwargs, - ): # -> dotdict: + def prepare_by_seed(cls, train_seeds=..., train_size=..., dev_size=..., divide_eval_per_seed=..., eval_seed=..., **kwargs): # -> dotdict: ... + + + diff --git a/typings/dspy/datasets/hotpotqa.pyi b/typings/dspy/datasets/hotpotqa.pyi index 91daeb3..35d9138 100644 --- a/typings/dspy/datasets/hotpotqa.pyi +++ b/typings/dspy/datasets/hotpotqa.pyi @@ -5,9 +5,10 @@ This type stub file was generated by pyright. from dspy.datasets.dataset import Dataset class HotPotQA(Dataset): - def __init__( - self, *args, only_hard_examples=..., keep_details=..., unofficial_dev=..., **kwargs - ) -> None: ... + def __init__(self, *args, only_hard_examples=..., keep_details=..., unofficial_dev=..., **kwargs) -> None: + ... + + if __name__ == "__main__": data_args = ... diff --git a/typings/dspy/datasets/math.pyi b/typings/dspy/datasets/math.pyi index 2a72785..513cf83 100644 --- a/typings/dspy/datasets/math.pyi +++ b/typings/dspy/datasets/math.pyi @@ -3,8 +3,14 @@ This type stub file was generated by pyright. """ class MATH: - def __init__(self, subset) -> None: ... - def metric(self, example, pred, trace=...): ... + def __init__(self, subset) -> None: + ... + + def metric(self, example, pred, trace=...): + ... + -def extract_answer(s): # -> str | None: + +def extract_answer(s): # -> str | None: ... + diff --git a/typings/dspy/dsp/__init__.pyi b/typings/dspy/dsp/__init__.pyi index cea7ef9..006bc27 100644 --- a/typings/dspy/dsp/__init__.pyi +++ b/typings/dspy/dsp/__init__.pyi @@ -1,3 +1,4 @@ """ This type stub file was generated by pyright. """ + diff --git a/typings/dspy/dsp/colbertv2.pyi b/typings/dspy/dsp/colbertv2.pyi index 703d87e..263f522 100644 --- a/typings/dspy/dsp/colbertv2.pyi +++ b/typings/dspy/dsp/colbertv2.pyi @@ -8,31 +8,32 @@ from dspy.dsp.utils import dotdict class ColBERTv2: """Wrapper for the ColBERTv2 Retrieval.""" - def __init__( - self, url: str = ..., port: Optional[Union[str, int]] = ..., post_requests: bool = ... - ) -> None: ... - def __call__( - self, query: str, k: int = ..., simplify: bool = ... - ) -> Union[list[str], list[dotdict]]: ... + def __init__(self, url: str = ..., port: Optional[Union[str, int]] = ..., post_requests: bool = ...) -> None: + ... + + def __call__(self, query: str, k: int = ..., simplify: bool = ...) -> Union[list[str], list[dotdict]]: + ... + + @request_cache() -def colbertv2_get_request_v2(url: str, query: str, k: int): # -> list[dict[Any | str, Any]]: +def colbertv2_get_request_v2(url: str, query: str, k: int): # -> list[dict[Any | str, Any]]: ... + @request_cache() -def colbertv2_get_request_v2_wrapped(*args, **kwargs): # -> list[dict[Any | str, Any]]: +def colbertv2_get_request_v2_wrapped(*args, **kwargs): # -> list[dict[Any | str, Any]]: ... colbertv2_get_request = ... - @request_cache() -def colbertv2_post_request_v2(url: str, query: str, k: int): # -> Any: +def colbertv2_post_request_v2(url: str, query: str, k: int): # -> Any: ... + @request_cache() -def colbertv2_post_request_v2_wrapped(*args, **kwargs): # -> Any: +def colbertv2_post_request_v2_wrapped(*args, **kwargs): # -> Any: ... colbertv2_post_request = ... - class ColBERTv2RetrieverLocal: def __init__(self, passages: List[str], colbert_config=..., load_only: bool = ...) -> None: """Colbertv2 retriever module @@ -43,16 +44,30 @@ class ColBERTv2RetrieverLocal: load_only (bool, optional): whether to load the index or build and then load. Defaults to False. """ ... - - def build_index(self): # -> None: + + def build_index(self): # -> None: ... - def get_index(self): ... - def __call__(self, *args: Any, **kwargs: Any) -> Any: ... - def forward(self, query: str, k: int = ..., **kwargs): # -> list[Any]: + + def get_index(self): ... + + def __call__(self, *args: Any, **kwargs: Any) -> Any: + ... + + def forward(self, query: str, k: int = ..., **kwargs): # -> list[Any]: + ... + + class ColBERTv2RerankerLocal: - def __init__(self, colbert_config=..., checkpoint: str = ...) -> None: ... - def __call__(self, *args: Any, **kwargs: Any) -> Any: ... - def forward(self, query: str, passages: Optional[List[str]] = ...): # -> NDArray[Any]: + def __init__(self, colbert_config=..., checkpoint: str = ...) -> None: + ... + + def __call__(self, *args: Any, **kwargs: Any) -> Any: ... + + def forward(self, query: str, passages: Optional[List[str]] = ...): # -> NDArray[Any]: + ... + + + diff --git a/typings/dspy/dsp/utils/__init__.pyi b/typings/dspy/dsp/utils/__init__.pyi index 25c730d..8e023fc 100644 --- a/typings/dspy/dsp/utils/__init__.pyi +++ b/typings/dspy/dsp/utils/__init__.pyi @@ -3,6 +3,6 @@ This type stub file was generated by pyright. """ from dspy.dsp.utils.dpr import * -from dspy.dsp.utils.metrics import * from dspy.dsp.utils.settings import * from dspy.dsp.utils.utils import * + diff --git a/typings/dspy/dsp/utils/dpr.pyi b/typings/dspy/dsp/utils/dpr.pyi index 3ae7664..b303068 100644 --- a/typings/dspy/dsp/utils/dpr.pyi +++ b/typings/dspy/dsp/utils/dpr.pyi @@ -8,62 +8,60 @@ https://github.com/facebookresearch/DPR/tree/master/dpr Original license: https://github.com/facebookresearch/DPR/blob/main/LICENSE """ logger = ... - class Tokens: """A class to represent a list of tokenized text.""" - TEXT = ... TEXT_WS = ... SPAN = ... POS = ... LEMMA = ... NER = ... - def __init__(self, data, annotators, opts=...) -> None: ... - def __len__(self): # -> int: + def __init__(self, data, annotators, opts=...) -> None: + ... + + def __len__(self): # -> int: """The number of tokens.""" ... - - def slice(self, i=..., j=...): # -> Self: + + def slice(self, i=..., j=...): # -> Self: """Return a view of the list of tokens from [i, j).""" ... - - def untokenize(self): # -> LiteralString: + + def untokenize(self): # -> LiteralString: """Returns the original text (with whitespace reinserted).""" ... - - def words(self, uncased=...): # -> list[Any]: + + def words(self, uncased=...): # -> list[Any]: """Returns a list of the text of each token Args: uncased: lower cases text """ ... - - def offsets(self): # -> list[Any]: + + def offsets(self): # -> list[Any]: """Returns a list of [start, end) character offsets of each token.""" ... - - def pos(self): # -> list[Any] | None: + + def pos(self): # -> list[Any] | None: """Returns a list of part-of-speech tags of each token. Returns None if this annotation was not included. """ ... - - def lemmas(self): # -> list[Any] | None: + + def lemmas(self): # -> list[Any] | None: """Returns a list of the lemmatized text of each token. Returns None if this annotation was not included. """ ... - - def entities(self): # -> list[Any] | None: + + def entities(self): # -> list[Any] | None: """Returns a list of named-entity-recognition tags of each token. Returns None if this annotation was not included. """ ... - - def ngrams( - self, n=..., uncased=..., filter_fn=..., as_strings=... - ): # -> list[str] | list[tuple[int, int]]: + + def ngrams(self, n=..., uncased=..., filter_fn=..., as_strings=...): # -> list[str] | list[tuple[int, int]]: """Returns a list of all ngrams from length 1 to n. Args: @@ -74,20 +72,27 @@ class Tokens: as_string: return the ngram as a string vs list """ ... - - def entity_groups(self): # -> list[Any] | None: + + def entity_groups(self): # -> list[Any] | None: """Group consecutive entity tokens with the same NER tag.""" ... + + class Tokenizer: """Base tokenizer class. Tokenizers implement tokenize, which should return a Tokens class. """ - def tokenize(self, text): ... - def shutdown(self): # -> None: + def tokenize(self, text): + ... + + def shutdown(self): # -> None: ... - def __del__(self): # -> None: + + def __del__(self): # -> None: ... + + class SimpleTokenizer(Tokenizer): ALPHA_NUM = ... @@ -98,24 +103,29 @@ class SimpleTokenizer(Tokenizer): annotators: None or empty set (only tokenizes). """ ... - - def tokenize(self, text): # -> Tokens: + + def tokenize(self, text): # -> Tokens: ... + + -def has_answer(tokenized_answers, text): # -> bool: +def has_answer(tokenized_answers, text): # -> bool: ... -def locate_answers(tokenized_answers, text): # -> list[Any]: + +def locate_answers(tokenized_answers, text): # -> list[Any]: """ Returns each occurrence of an answer as (offset, endpos) in terms of *characters*. """ ... STokenizer = ... - -def DPR_tokenize(text): # -> Tokens: +def DPR_tokenize(text): # -> Tokens: ... -def DPR_normalize(text): # -> list[Any]: + +def DPR_normalize(text): # -> list[Any]: ... -def strip_accents(text): # -> LiteralString: + +def strip_accents(text): # -> LiteralString: """Strips accents from a piece of text.""" ... + diff --git a/typings/dspy/dsp/utils/metrics.pyi b/typings/dspy/dsp/utils/metrics.pyi deleted file mode 100644 index 2bfeff9..0000000 --- a/typings/dspy/dsp/utils/metrics.pyi +++ /dev/null @@ -1,20 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -def EM(prediction, answers_list): # -> bool: - ... -def F1(prediction, answers_list): # -> float | int: - ... -def HotPotF1(prediction, answers_list): # -> float | int: - ... -def normalize_text(s): # -> str: - ... -def em_score(prediction, ground_truth): # -> bool: - ... -def f1_score(prediction, ground_truth): # -> float | Literal[0]: - ... -def hotpot_f1_score(prediction, ground_truth): # -> float | Literal[0]: - ... -def precision_score(prediction, ground_truth): # -> float | Literal[0]: - ... diff --git a/typings/dspy/dsp/utils/settings.pyi b/typings/dspy/dsp/utils/settings.pyi index 3148244..eb311a0 100644 --- a/typings/dspy/dsp/utils/settings.pyi +++ b/typings/dspy/dsp/utils/settings.pyi @@ -2,19 +2,14 @@ This type stub file was generated by pyright. """ -import threading from contextlib import contextmanager DEFAULT_CONFIG = ... main_thread_config = ... config_owner_thread_id = ... +config_owner_async_task = ... global_lock = ... - -class ThreadLocalOverrides(threading.local): - def __init__(self) -> None: ... - thread_local_overrides = ... - class Settings: """ A singleton class for DSPy configuration settings. @@ -30,42 +25,54 @@ class Settings: (TODO: In the future, add warnings: if there are near-in-time user-thread reads followed by .configure calls.) 3. Any thread can use dspy.context. It propagates to child threads created with DSPy primitives: Parallel, asyncify, etc. """ - _instance = ... - def __new__(cls): # -> Self: + def __new__(cls): # -> Self: ... + @property - def lock(self): # -> lock: + def lock(self): # -> lock: ... - def __getattr__(self, name): # -> Any: + + def __getattr__(self, name): ... - def __setattr__(self, name, value): # -> None: + + def __setattr__(self, name, value): # -> None: ... - def __getitem__(self, key): # -> Any: + + def __getitem__(self, key): ... - def __setitem__(self, key, value): # -> None: + + def __setitem__(self, key, value): # -> None: ... - def __contains__(self, key): # -> bool: + + def __contains__(self, key): # -> bool: ... - def get(self, key, default=...): # -> Any | None: + + def get(self, key, default=...): # -> None: ... - def copy(self): # -> dotdict: + + def copy(self): # -> dotdict: ... + @property - def config(self): # -> dotdict: + def config(self): # -> dotdict: ... - def configure(self, **kwargs): # -> None: + + def configure(self, **kwargs): # -> None: ... + @contextmanager - def context(self, **kwargs): # -> Generator[None, Any, None]: + def context(self, **kwargs): # -> Generator[None, Any, None]: """ Context manager for temporary configuration changes at the thread level. Does not affect global configuration. Changes only apply to the current thread. If threads are spawned inside this block using ParallelExecutor, they will inherit these overrides. """ ... - - def __repr__(self): # -> str: + + def __repr__(self): # -> str: ... + + settings = ... diff --git a/typings/dspy/dsp/utils/utils.pyi b/typings/dspy/dsp/utils/utils.pyi index fc4da30..a55daf1 100644 --- a/typings/dspy/dsp/utils/utils.pyi +++ b/typings/dspy/dsp/utils/utils.pyi @@ -2,63 +2,74 @@ This type stub file was generated by pyright. """ -def print_message(*s, condition=..., pad=..., sep=...): # -> str: +def print_message(*s, condition=..., pad=..., sep=...): # -> str: ... -def timestamp(daydir=...): # -> str: + +def timestamp(daydir=...): # -> str: ... -def file_tqdm(file): # -> Generator[Any, Any, None]: + +def file_tqdm(file): # -> Generator[Any, Any, None]: ... -def create_directory(path): # -> None: + +def create_directory(path): # -> None: ... + def deduplicate(seq: list[str]) -> list[str]: """ Source: https://stackoverflow.com/a/480227/1493011 """ ... -def batch(group, bsize, provide_offset=...): # -> Generator[tuple[int, Any] | Any, Any, None]: +def batch(group, bsize, provide_offset=...): # -> Generator[tuple[int, Any] | Any, Any, None]: ... class dotdict(dict): - def __getattr__(self, key): ... - def __setattr__(self, key, value): # -> None: + def __getattr__(self, key): ... - def __delattr__(self, key): # -> None: + + def __setattr__(self, key, value): # -> None: ... - def __deepcopy__(self, memo): # -> dotdict: + + def __delattr__(self, key): # -> None: ... + + def __deepcopy__(self, memo): # -> dotdict: + ... + + class dotdict_lax(dict): __getattr__ = ... __setattr__ = ... __delattr__ = ... -def flatten(data_list): # -> list[Any]: + +def flatten(data_list): # -> list[Any]: ... -def zipstar( - data_list, lazy=... -): # -> list[list[Any]] | zip[tuple[Any, ...]] | list[tuple[Any, ...]]: + +def zipstar(data_list, lazy=...): # -> list[list[Any]] | zip[tuple[Any, ...]] | list[tuple[Any, ...]]: """ A much faster A, B, C = zip(*[(a, b, c), (a, b, c), ...]) May return lists or tuples. """ ... -def zip_first(list1, list2): # -> list[tuple[Any, Any]]: +def zip_first(list1, list2): # -> list[tuple[Any, Any]]: ... -def int_or_float(val): # -> float | int: + +def int_or_float(val): # -> float | int: ... -def groupby_first_item(lst): # -> defaultdict[Any, list[Any]]: + +def groupby_first_item(lst): # -> defaultdict[Any, list[Any]]: ... -def process_grouped_by_first_item( - lst, -): # -> Generator[tuple[Any | None, list[Any]], Any, defaultdict[Any, list[Any]]]: + +def process_grouped_by_first_item(lst): # -> Generator[tuple[Any | None, list[Any]], Any, defaultdict[Any, list[Any]]]: """ Requires items in list to already be grouped by first item. """ ... -def grouper(iterable, n, fillvalue=...): # -> zip_longest[tuple[Any | None, ...]]: +def grouper(iterable, n, fillvalue=...): # -> zip_longest[tuple[Any | None, ...]]: """ Collect data into fixed-length chunks or blocks Example: grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx" @@ -66,15 +77,21 @@ def grouper(iterable, n, fillvalue=...): # -> zip_longest[tuple[Any | None, ... """ ... -def lengths2offsets(lengths): # -> Generator[tuple[Any | Literal[0], Any], Any, None]: +def lengths2offsets(lengths): # -> Generator[tuple[Any | Literal[0], Any], Any, None]: ... class NullContextManager: - def __init__(self, dummy_resource=...) -> None: ... - def __enter__(self): # -> None: + def __init__(self, dummy_resource=...) -> None: ... - def __exit__(self, *args): # -> None: + + def __enter__(self): # -> None: ... + + def __exit__(self, *args): # -> None: + ... + + -def load_batch_backgrounds(args, qids): # -> list[Any] | None: +def load_batch_backgrounds(args, qids): # -> list[Any] | None: ... + diff --git a/typings/dspy/evaluate/__init__.pyi b/typings/dspy/evaluate/__init__.pyi index e6846ed..ede51ba 100644 --- a/typings/dspy/evaluate/__init__.pyi +++ b/typings/dspy/evaluate/__init__.pyi @@ -2,17 +2,8 @@ This type stub file was generated by pyright. """ -from dspy.dsp.utils import EM, normalize_text from dspy.evaluate.auto_evaluation import CompleteAndGrounded, SemanticF1 from dspy.evaluate.evaluate import Evaluate -from dspy.evaluate.metrics import answer_exact_match, answer_passage_match +from dspy.evaluate.metrics import EM, answer_exact_match, answer_passage_match, normalize_text -__all__ = [ - "EM", - "normalize_text", - "answer_exact_match", - "answer_passage_match", - "Evaluate", - "SemanticF1", - "CompleteAndGrounded", -] +__all__ = ["EM", "normalize_text", "answer_exact_match", "answer_passage_match", "Evaluate", "SemanticF1", "CompleteAndGrounded"] diff --git a/typings/dspy/evaluate/auto_evaluation.pyi b/typings/dspy/evaluate/auto_evaluation.pyi index 23e194f..78b5177 100644 --- a/typings/dspy/evaluate/auto_evaluation.pyi +++ b/typings/dspy/evaluate/auto_evaluation.pyi @@ -2,26 +2,26 @@ This type stub file was generated by pyright. """ -import dspy +from dspy.primitives import Module +from dspy.signatures import Signature -class SemanticRecallPrecision(dspy.Signature): +class SemanticRecallPrecision(Signature): """ Compare a system's response to the ground truth to compute its recall and precision. If asked to reason, enumerate key ideas in each response, and whether they are present in the other response. """ - question: str = ... ground_truth: str = ... system_response: str = ... recall: float = ... precision: float = ... -class DecompositionalSemanticRecallPrecision(dspy.Signature): + +class DecompositionalSemanticRecallPrecision(Signature): """ Compare a system's response to the ground truth to compute recall and precision of key ideas. You will first enumerate key ideas in each response, discuss their overlap, and then report recall and precision. """ - question: str = ... ground_truth: str = ... system_response: str = ... @@ -31,20 +31,24 @@ class DecompositionalSemanticRecallPrecision(dspy.Signature): recall: float = ... precision: float = ... -def f1_score(precision, recall): # -> float: + +def f1_score(precision, recall): # -> float: ... -class SemanticF1(dspy.Module): - def __init__(self, threshold=..., decompositional=...) -> None: ... - def forward(self, example, pred, trace=...): # -> float | bool: +class SemanticF1(Module): + def __init__(self, threshold=..., decompositional=...) -> None: ... + + def forward(self, example, pred, trace=...): # -> float | bool: + ... + + -class AnswerCompleteness(dspy.Signature): +class AnswerCompleteness(Signature): """ Estimate the completeness of a system's responses, against the ground truth. You will first enumerate key ideas in each response, discuss their overlap, and then report completeness. """ - question: str = ... ground_truth: str = ... system_response: str = ... @@ -53,13 +57,13 @@ class AnswerCompleteness(dspy.Signature): discussion: str = ... completeness: float = ... -class AnswerGroundedness(dspy.Signature): + +class AnswerGroundedness(Signature): """ Estimate the groundedness of a system's responses, against real retrieved documents written by people. You will first enumerate whatever non-trivial or check-worthy claims are made in the system response, and then discuss the extent to which some or all of them can be deduced from the retrieved context and basic commonsense. """ - question: str = ... retrieved_context: str = ... system_response: str = ... @@ -67,7 +71,13 @@ class AnswerGroundedness(dspy.Signature): discussion: str = ... groundedness: float = ... -class CompleteAndGrounded(dspy.Module): - def __init__(self, threshold=...) -> None: ... - def forward(self, example, pred, trace=...): # -> float | bool: + +class CompleteAndGrounded(Module): + def __init__(self, threshold=...) -> None: ... + + def forward(self, example, pred, trace=...): # -> float | bool: + ... + + + diff --git a/typings/dspy/evaluate/evaluate.pyi b/typings/dspy/evaluate/evaluate.pyi index b8ff8e1..e1ea8b2 100644 --- a/typings/dspy/evaluate/evaluate.pyi +++ b/typings/dspy/evaluate/evaluate.pyi @@ -4,11 +4,28 @@ This type stub file was generated by pyright. import pandas as pd import dspy -from typing import Any, Callable, List, Optional, TYPE_CHECKING, Union +from typing import Any, Callable, List, Optional, TYPE_CHECKING, Tuple, Union +from dspy.primitives.prediction import Prediction from dspy.utils.callback import with_callbacks -if TYPE_CHECKING: ... +if TYPE_CHECKING: + ... logger = ... +class EvaluationResult(Prediction): + """ + A class that represents the result of an evaluation. + It is a subclass of `dspy.Prediction` that contains the following fields + + - score: An float value (e.g., 67.30) representing the overall performance + - results: a list of (example, prediction, score) tuples for each example in devset + """ + def __init__(self, score: float, results: list[Tuple[dspy.Example, dspy.Example, Any]]) -> None: + ... + + def __repr__(self): # -> str: + ... + + class Evaluate: """DSPy Evaluate class. @@ -16,21 +33,7 @@ class Evaluate: This class is used to evaluate the performance of a DSPy program. Users need to provide a evaluation dataset and a metric function in order to use this class. This class supports parallel evaluation on the provided dataset. """ - def __init__( - self, - *, - devset: List[dspy.Example], - metric: Optional[Callable] = ..., - num_threads: Optional[int] = ..., - display_progress: bool = ..., - display_table: Union[bool, int] = ..., - max_errors: int = ..., - return_all_scores: bool = ..., - return_outputs: bool = ..., - provide_traceback: Optional[bool] = ..., - failure_score: float = ..., - **kwargs, - ) -> None: + def __init__(self, *, devset: List[dspy.Example], metric: Optional[Callable] = ..., num_threads: Optional[int] = ..., display_progress: bool = ..., display_table: Union[bool, int] = ..., max_errors: Optional[int] = ..., provide_traceback: Optional[bool] = ..., failure_score: float = ..., **kwargs) -> None: """ Args: devset (List[dspy.Example]): the evaluation dataset. @@ -39,27 +42,15 @@ class Evaluate: display_progress (bool): Whether to display progress during evaluation. display_table (Union[bool, int]): Whether to display the evaluation results in a table. If a number is passed, the evaluation results will be truncated to that number before displayed. - max_errors (int): The maximum number of errors to allow before stopping evaluation. - return_all_scores (bool): Whether to return scores for every data record in `devset`. - return_outputs (bool): Whether to return the dspy program's outputs for every data in `devset`. + max_errors (Optional[int]): The maximum number of errors to allow before + stopping evaluation. If ``None``, inherits from ``dspy.settings.max_errors``. provide_traceback (Optional[bool]): Whether to provide traceback information during evaluation. failure_score (float): The default score to use if evaluation fails due to an exception. """ ... - + @with_callbacks - def __call__( - self, - program: dspy.Module, - metric: Optional[Callable] = ..., - devset: Optional[List[dspy.Example]] = ..., - num_threads: Optional[int] = ..., - display_progress: Optional[bool] = ..., - display_table: Optional[Union[bool, int]] = ..., - return_all_scores: Optional[bool] = ..., - return_outputs: Optional[bool] = ..., - callback_metadata: Optional[dict[str, Any]] = ..., - ): # -> tuple[float, list[tuple[Example, Prediction, float]], list[float]] | tuple[float, list[float]] | tuple[float, list[tuple[Example, Prediction, float]]] | float: + def __call__(self, program: dspy.Module, metric: Optional[Callable] = ..., devset: Optional[List[dspy.Example]] = ..., num_threads: Optional[int] = ..., display_progress: Optional[bool] = ..., display_table: Optional[Union[bool, int]] = ..., callback_metadata: Optional[dict[str, Any]] = ...) -> EvaluationResult: """ Args: program (dspy.Module): The DSPy program to evaluate. @@ -71,34 +62,25 @@ class Evaluate: `self.display_progress`. display_table (Union[bool, int]): Whether to display the evaluation results in a table. if not provided, use `self.display_table`. If a number is passed, the evaluation results will be truncated to that number before displayed. - return_all_scores (bool): Whether to return scores for every data record in `devset`. if not provided, - use `self.return_all_scores`. - return_outputs (bool): Whether to return the dspy program's outputs for every data in `devset`. if not - provided, use `self.return_outputs`. callback_metadata (dict): Metadata to be used for evaluate callback handlers. Returns: - The evaluation results are returned in different formats based on the flags: - - - Base return: A float percentage score (e.g., 67.30) representing overall performance - - - With `return_all_scores=True`: - Returns (overall_score, individual_scores) where individual_scores is a list of - float scores for each example in devset - - - With `return_outputs=True`: - Returns (overall_score, result_triples) where result_triples is a list of - (example, prediction, score) tuples for each example in devset - - - With both flags=True: - Returns (overall_score, result_triples, individual_scores) - + The evaluation results are returned as a dspy.EvaluationResult object containing the following attributes: + + - score: A float percentage score (e.g., 67.30) representing overall performance + + - results: a list of (example, prediction, score) tuples for each example in devset """ ... + + -def prediction_is_dictlike(prediction): # -> TypeIs[Callable[..., object]] | Literal[False]: +def prediction_is_dictlike(prediction): # -> TypeIs[Callable[..., object]] | Literal[False]: ... -def merge_dicts(d1, d2) -> dict: ... + +def merge_dicts(d1, d2) -> dict: + ... + def truncate_cell(content) -> str: """Truncate content of a cell to 25 words.""" ... @@ -112,7 +94,7 @@ def stylize_metric_name(df: pd.DataFrame, metric_name: str) -> pd.DataFrame: """ ... -def display_dataframe(df: pd.DataFrame): # -> None: +def display_dataframe(df: pd.DataFrame): # -> None: """ Display the specified Pandas DataFrame in the console. @@ -124,10 +106,11 @@ def configure_dataframe_for_ipython_notebook_display(df: pd.DataFrame) -> pd.Dat """Set various pandas display options for DataFrame in an IPython notebook environment.""" ... -def is_in_ipython_notebook_environment(): # -> bool: +def is_in_ipython_notebook_environment(): # -> bool: """ Check if the current environment is an IPython notebook environment. :return: True if the current environment is an IPython notebook environment, False otherwise. """ ... + diff --git a/typings/dspy/evaluate/metrics.pyi b/typings/dspy/evaluate/metrics.pyi index 440314b..75bfe09 100644 --- a/typings/dspy/evaluate/metrics.pyi +++ b/typings/dspy/evaluate/metrics.pyi @@ -2,7 +2,33 @@ This type stub file was generated by pyright. """ -def answer_exact_match(example, pred, trace=..., frac=...): # -> bool: +def EM(prediction, answers_list): # -> bool: ... -def answer_passage_match(example, pred, trace=...): # -> bool: + +def F1(prediction, answers_list): # -> float | int: + ... + +def HotPotF1(prediction, answers_list): # -> float | int: + ... + +def normalize_text(s): # -> str: + ... + +def em_score(prediction, ground_truth): # -> bool: + ... + +def f1_score(prediction, ground_truth): # -> float | Literal[0]: ... + +def hotpot_f1_score(prediction, ground_truth): # -> float | Literal[0]: + ... + +def precision_score(prediction, ground_truth): # -> float | Literal[0]: + ... + +def answer_exact_match(example, pred, trace=..., frac=...): # -> bool: + ... + +def answer_passage_match(example, pred, trace=...): # -> bool: + ... + diff --git a/typings/dspy/experimental/__init__.pyi b/typings/dspy/experimental/__init__.pyi deleted file mode 100644 index e45e824..0000000 --- a/typings/dspy/experimental/__init__.pyi +++ /dev/null @@ -1,7 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.experimental.module_graph import * -from dspy.experimental.synthesizer import * -from dspy.experimental.synthetic_data import * diff --git a/typings/dspy/experimental/module_graph.pyi b/typings/dspy/experimental/module_graph.pyi deleted file mode 100644 index a6b5487..0000000 --- a/typings/dspy/experimental/module_graph.pyi +++ /dev/null @@ -1,31 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -graphviz_available = ... - -class ModuleGraph: - def __init__(self, module_name, module) -> None: ... - def inspect_settings(self, settings): # -> None: - """Check for the existence and configuration of LM and RM and add them to the graph.""" - ... - - def add_module(self, module_name, module): # -> None: - """Add a module to the graph""" - ... - - def generate_module_name(self, base_name, module_type): - """Generate a module name based on the module type""" - ... - - def process_submodules(self, module_name, module): # -> None: - """Process submodules of a module and add them to the graph""" - ... - - def process_submodule(self, sub_module_name, sub_module): # -> None: - """Process a submodule and add it to the graph""" - ... - - def render_graph(self, filename=...): # -> None: - """Render the graph to a file(png)""" - ... diff --git a/typings/dspy/experimental/synthesizer/__init__.pyi b/typings/dspy/experimental/synthesizer/__init__.pyi deleted file mode 100644 index 8e2d062..0000000 --- a/typings/dspy/experimental/synthesizer/__init__.pyi +++ /dev/null @@ -1,8 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.experimental.synthesizer.config import SynthesizerArguments -from dspy.experimental.synthesizer.synthesizer import Synthesizer - -__all__ = ["Synthesizer", "SynthesizerArguments"] diff --git a/typings/dspy/experimental/synthesizer/config.pyi b/typings/dspy/experimental/synthesizer/config.pyi deleted file mode 100644 index fe3a6c2..0000000 --- a/typings/dspy/experimental/synthesizer/config.pyi +++ /dev/null @@ -1,17 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Optional -from pydantic import BaseModel, model_validator - -class SynthesizerArguments(BaseModel): - feedback_mode: Optional[str] = ... - num_example_for_feedback: Optional[int] = ... - input_lm_model: Optional[Any] = ... - output_lm_model: Optional[Any] = ... - output_teacher_module: Optional[Any] = ... - num_example_for_optim: Optional[int] = ... - @model_validator(mode="after") - def validate_feedback_mode(self): # -> Self: - ... diff --git a/typings/dspy/experimental/synthesizer/instruction_suffixes.pyi b/typings/dspy/experimental/synthesizer/instruction_suffixes.pyi deleted file mode 100644 index d89b6aa..0000000 --- a/typings/dspy/experimental/synthesizer/instruction_suffixes.pyi +++ /dev/null @@ -1,6 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -INPUT_GENERATION_TASK_WITH_EXAMPLES_SUFFIX = ... -INPUT_GENERATION_TASK_WITH_FEEDBACK_SUFFIX = ... diff --git a/typings/dspy/experimental/synthesizer/signatures.pyi b/typings/dspy/experimental/synthesizer/signatures.pyi deleted file mode 100644 index d2aa72d..0000000 --- a/typings/dspy/experimental/synthesizer/signatures.pyi +++ /dev/null @@ -1,48 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy - -class UnderstandTask(dspy.Signature): - """I'll be providing you a task description. Your task is to prepare a concise, comprehensible summary that captures the broad essence and purpose of this task description. Your summary should illuminate the general objective and the type of problem being solved, offering a clear picture of what the task entails at a high level. Avoid getting into the nuances or specifics of individual datapoints, models, examples, algorithms, or any intricate technicalities. Your explanation should serve to clarify the task's overall goal and its basic premise without touching on methodologies or solutions.""" - - task_description = ... - explanation = ... - -class ExplainTask(dspy.Signature): - """Analyze the provided set of datapoints carefully, and prepare a concise, comprehensible summary that captures the broad essence and purpose of the task these datapoints aim to address. Your summary should illuminate the general objective and the type of problem being solved, offering a clear picture of what the task entails at a high level. Avoid getting into the nuances of individual datapoints, specifics about models, examples, algorithms, or any intricate technicalities. Your explanation should serve to clarify the task's overall goal and its basic premise, without touching on methodologies or solutions.""" - - examples = ... - explanation = ... - -class UpdateTaskDescriptionBasedOnFeedback(dspy.Signature): - """Update the task description based on the feedback provided. Ensure that the revised task description incorporates the feedback to improve its overall clarity and effectiveness. Focus on enhancing the task's goal and basic premise, without delving into specific data points, models, examples, algorithms, or technical intricacies. Your explanation should aim to clarify the task's fundamental objective and purpose.""" - - task_description = ... - feedback = ... - updated_task_description = ... - -class GetFeedbackOnGeneration(dspy.Signature): - """Provide constructive feedback on the synthetic data generated, focusing on its quality, relevance, and diversity. Highlight any areas that require improvement and offer suggestions for enhancement. The feedback should center on the overall effectiveness of the synthetic data in aligning with the task description and knowledge seed. Avoid delving into specific data points, models, examples, algorithms, or technical intricacies. Your feedback should be critical but constructive, aiming to improve the synthetic data and the task description.""" - - synthetic_data = ... - task_description = ... - feedback = ... - -class GenerateFieldDescription(dspy.Signature): - """Generate a concise and informative description for a given field based on the provided name and task description. This description should be no longer than 10 words and should be in simple english.""" - - task_description = ... - field_name = ... - field_description = ... - -class GenerateInputFieldsData(dspy.Signature): - """Create synthetic data using the task description and the provided knowledge seed. Your task is to generate diverse and imaginative data that aligns with the given task description and knowledge seed. You are encouraged to be creative and not limit yourself, allowing for a wide range of synthetic data that reflects the characteristics and details provided in the task description. The data should be unique and varied, showcasing originality and creativity while maintaining relevance to the task and knowledge seed. - - A knowledge seed is the index of the knowledge base you have, each index represents a different knowledge base.""" - - knowledge_seed = ... - task_description = ... - -class GenerateOutputFieldsData(dspy.Signature): ... diff --git a/typings/dspy/experimental/synthesizer/synthesizer.pyi b/typings/dspy/experimental/synthesizer/synthesizer.pyi deleted file mode 100644 index a52b4be..0000000 --- a/typings/dspy/experimental/synthesizer/synthesizer.pyi +++ /dev/null @@ -1,21 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from typing import List, Optional, Union -from dspy.experimental.synthesizer.config import SynthesizerArguments - -class Synthesizer: - def __init__(self, config: SynthesizerArguments) -> None: ... - def generate( - self, - ground_source: Union[List[dspy.Example], dspy.Signature], - num_data: int, - batch_size: int = ..., - ): # -> list[Any]: - ... - def export( - self, data: List[dspy.Example], path: str, mode: Optional[str] = ..., **kwargs - ): # -> None: - ... diff --git a/typings/dspy/experimental/synthesizer/utils.pyi b/typings/dspy/experimental/synthesizer/utils.pyi deleted file mode 100644 index e719418..0000000 --- a/typings/dspy/experimental/synthesizer/utils.pyi +++ /dev/null @@ -1,8 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from typing import List - -def format_examples(examples: List[dspy.Example]) -> str: ... diff --git a/typings/dspy/experimental/synthetic_data.pyi b/typings/dspy/experimental/synthetic_data.pyi deleted file mode 100644 index 1a7c2e3..0000000 --- a/typings/dspy/experimental/synthetic_data.pyi +++ /dev/null @@ -1,28 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from typing import List, Optional -from pydantic import BaseModel - -class DescriptionSignature(dspy.Signature): - field_name = ... - example = ... - description = ... - -class SyntheticDataGenerator: - def __init__( - self, schema_class: Optional[BaseModel] = ..., examples: Optional[List[dspy.Example]] = ... - ) -> None: ... - def generate(self, sample_size: int) -> List[dspy.Example]: - """Generate synthetic examples. - - Args: - sample_size (int): number of examples to generate - Raises: - ValueError: either a schema_class or examples should be provided - Returns: - List[dspy.Example]: list of synthetic examples generated - """ - ... diff --git a/typings/dspy/predict/__init__.pyi b/typings/dspy/predict/__init__.pyi index ef8ec69..85597a7 100644 --- a/typings/dspy/predict/__init__.pyi +++ b/typings/dspy/predict/__init__.pyi @@ -5,7 +5,6 @@ This type stub file was generated by pyright. from dspy.predict.aggregation import majority from dspy.predict.best_of_n import BestOfN from dspy.predict.chain_of_thought import ChainOfThought -from dspy.predict.chain_of_thought_with_hint import ChainOfThoughtWithHint from dspy.predict.code_act import CodeAct from dspy.predict.knn import KNN from dspy.predict.multi_chain_comparison import MultiChainComparison @@ -15,18 +14,4 @@ from dspy.predict.program_of_thought import ProgramOfThought from dspy.predict.react import ReAct, Tool from dspy.predict.refine import Refine -__all__ = [ - "majority", - "BestOfN", - "ChainOfThought", - "ChainOfThoughtWithHint", - "CodeAct", - "KNN", - "MultiChainComparison", - "Predict", - "ProgramOfThought", - "ReAct", - "Refine", - "Tool", - "Parallel", -] +__all__ = ["majority", "BestOfN", "ChainOfThought", "CodeAct", "KNN", "MultiChainComparison", "Predict", "ProgramOfThought", "ReAct", "Refine", "Tool", "Parallel"] diff --git a/typings/dspy/predict/aggregation.pyi b/typings/dspy/predict/aggregation.pyi index ce4eb76..16dead2 100644 --- a/typings/dspy/predict/aggregation.pyi +++ b/typings/dspy/predict/aggregation.pyi @@ -2,12 +2,14 @@ This type stub file was generated by pyright. """ -def default_normalize(s): # -> str | None: +def default_normalize(s): # -> str | None: ... -def majority(prediction_or_completions, normalize=..., field=...): # -> Prediction: + +def majority(prediction_or_completions, normalize=..., field=...): # -> Prediction: """ Returns the most common completion for the target field (or the last field) in the signature. When normalize returns None, that completion is ignored. In case of a tie, earlier completion are prioritized. """ ... + diff --git a/typings/dspy/predict/avatar/__init__.pyi b/typings/dspy/predict/avatar/__init__.pyi index f45abe9..8c8eea8 100644 --- a/typings/dspy/predict/avatar/__init__.pyi +++ b/typings/dspy/predict/avatar/__init__.pyi @@ -5,3 +5,4 @@ This type stub file was generated by pyright. from dspy.predict.avatar.avatar import * from dspy.predict.avatar.models import * from dspy.predict.avatar.signatures import * + diff --git a/typings/dspy/predict/avatar/avatar.pyi b/typings/dspy/predict/avatar/avatar.pyi index 8c95d49..ab8d6fb 100644 --- a/typings/dspy/predict/avatar/avatar.pyi +++ b/typings/dspy/predict/avatar/avatar.pyi @@ -4,9 +4,15 @@ This type stub file was generated by pyright. import dspy -def get_number_with_suffix(number: int) -> str: ... +def get_number_with_suffix(number: int) -> str: + ... class Avatar(dspy.Module): - def __init__(self, signature, tools, max_iters=..., verbose=...) -> None: ... - def forward(self, **kwargs): # -> Prediction: + def __init__(self, signature, tools, max_iters=..., verbose=...) -> None: ... + + def forward(self, **kwargs): # -> Prediction: + ... + + + diff --git a/typings/dspy/predict/avatar/models.pyi b/typings/dspy/predict/avatar/models.pyi index fbcd0b6..d254758 100644 --- a/typings/dspy/predict/avatar/models.pyi +++ b/typings/dspy/predict/avatar/models.pyi @@ -10,15 +10,23 @@ class Tool(BaseModel): name: str desc: Optional[str] input_type: Optional[str] = ... - def __str__(self) -> str: ... - def __repr__(self) -> str: ... + def __str__(self) -> str: + ... + + def __repr__(self) -> str: + ... + + class Action(BaseModel): tool_name: Any = ... tool_input_query: Any = ... + class ActionOutput(BaseModel): tool_name: str tool_input_query: str tool_output: str ... + + diff --git a/typings/dspy/predict/avatar/signatures.pyi b/typings/dspy/predict/avatar/signatures.pyi index 6986aaa..edf5693 100644 --- a/typings/dspy/predict/avatar/signatures.pyi +++ b/typings/dspy/predict/avatar/signatures.pyi @@ -11,7 +11,8 @@ class Actor(dspy.Signature): You will output action needed to accomplish the `Goal`. `Action` should have a tool to use and the input query to pass to the tool. Note: You can opt to use no tools and provide the final answer directly. You can also one tool multiple times with different input queries if applicable.""" - goal: str = ... tools: list[str] = ... action_1: Action = ... + + diff --git a/typings/dspy/predict/best_of_n.pyi b/typings/dspy/predict/best_of_n.pyi index 690e360..1e609e4 100644 --- a/typings/dspy/predict/best_of_n.pyi +++ b/typings/dspy/predict/best_of_n.pyi @@ -6,14 +6,7 @@ from typing import Callable, Optional from dspy.predict.predict import Module, Prediction class BestOfN(Module): - def __init__( - self, - module: Module, - N: int, - reward_fn: Callable[[dict, Prediction], float], - threshold: float, - fail_count: Optional[int] = ..., - ) -> None: + def __init__(self, module: Module, N: int, reward_fn: Callable[[dict, Prediction], float], threshold: float, fail_count: Optional[int] = ...) -> None: """ Runs a module up to `N` times with different temperatures and returns the best prediction out of `N` attempts or the first prediction that passes the `threshold`. @@ -47,6 +40,9 @@ class BestOfN(Module): ``` """ ... - - def forward(self, **kwargs): # -> None: + + def forward(self, **kwargs): # -> object | Any | None: ... + + + diff --git a/typings/dspy/predict/chain_of_thought.pyi b/typings/dspy/predict/chain_of_thought.pyi index 8da7c62..a8d79c8 100644 --- a/typings/dspy/predict/chain_of_thought.pyi +++ b/typings/dspy/predict/chain_of_thought.pyi @@ -2,20 +2,14 @@ This type stub file was generated by pyright. """ -from typing import Optional, Type, Union +from typing import Any, Optional, Type, Union from pydantic.fields import FieldInfo -from dspy.primitives.program import Module +from dspy.primitives.module import Module from dspy.signatures.field import OutputField from dspy.signatures.signature import Signature class ChainOfThought(Module): - def __init__( - self, - signature: Type[Signature], - rationale_field: Optional[Union[OutputField, FieldInfo]] = ..., - rationale_field_type: Type = ..., - **config, - ) -> None: + def __init__(self, signature: Union[str, Type[Signature]], rationale_field: Optional[Union[OutputField, FieldInfo]] = ..., rationale_field_type: Type = ..., **config: dict[str, Any]) -> None: """ A module that reasons step by step in order to predict the output of a task. @@ -26,6 +20,12 @@ class ChainOfThought(Module): **config: The configuration for the module. """ ... + + def forward(self, **kwargs): # -> object | Any: + ... + + async def aforward(self, **kwargs): # -> Any: + ... + + - def forward(self, **kwargs): ... - async def aforward(self, **kwargs): ... diff --git a/typings/dspy/predict/chain_of_thought_with_hint.pyi b/typings/dspy/predict/chain_of_thought_with_hint.pyi deleted file mode 100644 index ff83d81..0000000 --- a/typings/dspy/predict/chain_of_thought_with_hint.pyi +++ /dev/null @@ -1,9 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from .predict import Module - -class ChainOfThoughtWithHint(Module): - def __init__(self, signature, rationale_type=..., **config) -> None: ... - def forward(self, **kwargs): ... diff --git a/typings/dspy/predict/code_act.pyi b/typings/dspy/predict/code_act.pyi index 7352d18..63c4847 100644 --- a/typings/dspy/predict/code_act.pyi +++ b/typings/dspy/predict/code_act.pyi @@ -2,20 +2,18 @@ This type stub file was generated by pyright. """ -from inspect import Signature -from typing import Callable, Type, Union +from typing import Callable, Optional, Type, Union from dspy.predict.program_of_thought import ProgramOfThought from dspy.predict.react import ReAct +from dspy.primitives.python_interpreter import PythonInterpreter +from dspy.signatures.signature import Signature logger = ... - class CodeAct(ReAct, ProgramOfThought): """ CodeAct is a module that utilizes the Code Interpreter and predefined tools to solve the problem. """ - def __init__( - self, signature: Union[str, Type[Signature]], tools: list[Callable], max_iters: int = ... - ) -> None: + def __init__(self, signature: Union[str, Type[Signature]], tools: list[Callable], max_iters: int = ..., interpreter: Optional[PythonInterpreter] = ...) -> None: """ Initializes the CodeAct class with the specified model, temperature, and max tokens. @@ -23,7 +21,7 @@ class CodeAct(ReAct, ProgramOfThought): signature (Union[str, Type[Signature]]): The signature of the module. tools (list[Callable]): The tool callables to be used. CodeAct only accepts functions and not callable objects. max_iters (int): The maximum number of iterations to generate the answer. - + interpreter: PythonInterpreter instance to use. If None, a new one is instantiated. Example: ```python from dspy.predict import CodeAct @@ -37,6 +35,9 @@ class CodeAct(ReAct, ProgramOfThought): ``` """ ... - - def forward(self, **kwargs): # -> Prediction: + + def forward(self, **kwargs): # -> Prediction: ... + + + diff --git a/typings/dspy/predict/knn.pyi b/typings/dspy/predict/knn.pyi index 1a2fa00..9519ae7 100644 --- a/typings/dspy/predict/knn.pyi +++ b/typings/dspy/predict/knn.pyi @@ -38,5 +38,9 @@ class KNN: ``` """ ... + + def __call__(self, **kwargs) -> list: + ... + + - def __call__(self, **kwargs) -> list: ... diff --git a/typings/dspy/predict/multi_chain_comparison.pyi b/typings/dspy/predict/multi_chain_comparison.pyi index 28b10dd..4c22d04 100644 --- a/typings/dspy/predict/multi_chain_comparison.pyi +++ b/typings/dspy/predict/multi_chain_comparison.pyi @@ -2,8 +2,14 @@ This type stub file was generated by pyright. """ -from dspy.primitives.program import Module +from dspy.primitives.module import Module class MultiChainComparison(Module): - def __init__(self, signature, M=..., temperature=..., **config) -> None: ... - def forward(self, completions, **kwargs): ... + def __init__(self, signature, M=..., temperature=..., **config) -> None: + ... + + def forward(self, completions, **kwargs): # -> object | Any: + ... + + + diff --git a/typings/dspy/predict/parallel.pyi b/typings/dspy/predict/parallel.pyi index 85f166a..e7fed9e 100644 --- a/typings/dspy/predict/parallel.pyi +++ b/typings/dspy/predict/parallel.pyi @@ -6,16 +6,14 @@ from typing import Any, List, Optional, Tuple from dspy.primitives.example import Example class Parallel: - def __init__( - self, - num_threads: Optional[int] = ..., - max_errors: int = ..., - access_examples: bool = ..., - return_failed_examples: bool = ..., - provide_traceback: Optional[bool] = ..., - disable_progress_bar: bool = ..., - ) -> None: ... - def forward( - self, exec_pairs: List[Tuple[Any, Example]], num_threads: Optional[int] = ... - ) -> List[Any]: ... - def __call__(self, *args: Any, **kwargs: Any) -> Any: ... + def __init__(self, num_threads: Optional[int] = ..., max_errors: Optional[int] = ..., access_examples: bool = ..., return_failed_examples: bool = ..., provide_traceback: Optional[bool] = ..., disable_progress_bar: bool = ...) -> None: + ... + + def forward(self, exec_pairs: List[Tuple[Any, Example]], num_threads: Optional[int] = ...) -> List[Any]: + ... + + def __call__(self, *args: Any, **kwargs: Any) -> Any: + ... + + + diff --git a/typings/dspy/predict/parameter.pyi b/typings/dspy/predict/parameter.pyi index 7dcd22f..6fb9ba2 100644 --- a/typings/dspy/predict/parameter.pyi +++ b/typings/dspy/predict/parameter.pyi @@ -2,5 +2,7 @@ This type stub file was generated by pyright. """ -class Parameter: ... -class Hyperparameter: ... +class Parameter: + ... + + diff --git a/typings/dspy/predict/predict.pyi b/typings/dspy/predict/predict.pyi index 206b1e7..cfc3c21 100644 --- a/typings/dspy/predict/predict.pyi +++ b/typings/dspy/predict/predict.pyi @@ -2,46 +2,61 @@ This type stub file was generated by pyright. """ +from typing import Optional, Type, Union from dspy.predict.parameter import Parameter -from dspy.primitives.program import Module +from dspy.primitives.module import Module +from dspy.signatures.signature import Signature +from dspy.utils.callback import BaseCallback logger = ... - class Predict(Module, Parameter): - def __init__(self, signature, callbacks=..., **config) -> None: ... - def reset(self): # -> None: + def __init__(self, signature: Union[str, Type[Signature]], callbacks: Optional[list[BaseCallback]] = ..., **config) -> None: + ... + + def reset(self): # -> None: ... - def dump_state(self): # -> dict[str, Any]: + + def dump_state(self): # -> dict[str, Any]: ... - def load_state(self, state): # -> Self: + + def load_state(self, state: dict) -> Predict: """Load the saved state of a `Predict` object. Args: - state (dict): The saved state of a `Predict` object. + state: The saved state of a `Predict` object. Returns: - self: Returns self to allow method chaining + Self to allow method chaining. """ ... - - def __call__(self, *args, **kwargs): ... - async def acall(self, *args, **kwargs): ... - def forward(self, **kwargs): # -> Prediction: + + def __call__(self, *args, **kwargs): # -> object | Any: + ... + + async def acall(self, *args, **kwargs): # -> Any: ... - async def aforward(self, **kwargs): # -> Prediction: + + def forward(self, **kwargs): # -> Prediction: ... - def update_config(self, **kwargs): # -> None: + + async def aforward(self, **kwargs): # -> Prediction: ... - def get_config(self): # -> dict[str, Any]: + + def update_config(self, **kwargs): # -> None: ... - def __repr__(self): # -> str: + + def get_config(self): # -> dict[str, Any]: ... + + def __repr__(self): # -> str: + ... + + -def serialize_object( - obj, -): # -> dict[str, Any] | list[dict[str, Any] | list[Any] | tuple[dict[str, Any] | list[Any] | tuple[Any, ...] | dict[Any, Any] | Any, ...] | dict[Any, Any] | Any] | tuple[dict[str, Any] | list[Any] | tuple[Any, ...] | dict[Any, Any] | Any, ...] | dict[Any, Any]: +def serialize_object(obj): # -> dict[str, Any] | list[dict[str, Any] | list[Any] | tuple[dict[str, Any] | list[Any] | tuple[Any, ...] | dict[Any, Any] | Any, ...] | dict[Any, Any] | Any] | tuple[dict[str, Any] | list[Any] | tuple[Any, ...] | dict[Any, Any] | Any, ...] | dict[Any, Any]: """ Recursively serialize a given object into a JSON-compatible format. Supports Pydantic models, lists, dicts, and primitive types. """ ... + diff --git a/typings/dspy/predict/program_of_thought.pyi b/typings/dspy/predict/program_of_thought.pyi index 12017fa..e9617da 100644 --- a/typings/dspy/predict/program_of_thought.pyi +++ b/typings/dspy/predict/program_of_thought.pyi @@ -2,12 +2,12 @@ This type stub file was generated by pyright. """ -from typing import Type, Union -from dspy.primitives.program import Module +from typing import Optional, Type, Union +from dspy.primitives.module import Module +from dspy.primitives.python_interpreter import PythonInterpreter from dspy.signatures.signature import Signature logger = ... - class ProgramOfThought(Module): """ A DSPy module that runs Python programs to solve a problem. @@ -23,12 +23,17 @@ class ProgramOfThought(Module): pot(question="what is 1+1?") ``` """ - def __init__(self, signature: Union[str, Type[Signature]], max_iters=...) -> None: + def __init__(self, signature: Union[str, Type[Signature]], max_iters: int = ..., interpreter: Optional[PythonInterpreter] = ...) -> None: """ Args: signature: The signature of the module. max_iters: The maximum number of iterations to retry code generation and execution. + interpreter: PythonInterpreter instance to use. If None, a new one is instantiated. """ ... + + def forward(self, **kwargs): # -> object | Any: + ... + + - def forward(self, **kwargs): ... diff --git a/typings/dspy/predict/react.pyi b/typings/dspy/predict/react.pyi index 74d5376..ee85c75 100644 --- a/typings/dspy/predict/react.pyi +++ b/typings/dspy/predict/react.pyi @@ -2,25 +2,51 @@ This type stub file was generated by pyright. """ -from typing import Callable -from dspy.primitives.program import Module +from typing import Callable, TYPE_CHECKING, Type +from dspy.primitives.module import Module +from dspy.signatures.signature import Signature logger = ... - +if TYPE_CHECKING: + ... class ReAct(Module): - def __init__(self, signature, tools: list[Callable], max_iters=...) -> None: + def __init__(self, signature: Type[Signature], tools: list[Callable], max_iters: int = ...) -> None: """ - `tools` is either a list of functions, callable classes, or `dspy.Tool` instances. + ReAct stands for "Reasoning and Acting," a popular paradigm for building tool-using agents. + In this approach, the language model is iteratively provided with a list of tools and has + to reason about the current situation. The model decides whether to call a tool to gather more + information or to finish the task based on its reasoning process. The DSPy version of ReAct is + generalized to work over any signature, thanks to signature polymorphism. + + Args: + signature: The signature of the module, which defines the input and output of the react module. + tools (list[Callable]): A list of functions, callable objects, or `dspy.Tool` instances. + max_iters (Optional[int]): The maximum number of iterations to run. Defaults to 10. + + Example: + + ```python + def get_weather(city: str) -> str: + return f"The weather in {city} is sunny." + + react = dspy.ReAct(signature="question->answer", tools=[get_weather]) + pred = react(question="What is the weather in Tokyo?") + ``` """ ... - - def forward(self, **input_args): # -> Prediction: + + def forward(self, **input_args): # -> Prediction: ... - async def aforward(self, **input_args): # -> Prediction: + + async def aforward(self, **input_args): # -> Prediction: ... + def truncate_trajectory(self, trajectory): """Truncates the trajectory so that it fits in the context window. Users can override this method to implement their own truncation logic. """ ... + + + diff --git a/typings/dspy/predict/refine.pyi b/typings/dspy/predict/refine.pyi index d34f35d..ceebc23 100644 --- a/typings/dspy/predict/refine.pyi +++ b/typings/dspy/predict/refine.pyi @@ -15,7 +15,6 @@ class OfferFeedback(Signature): The module will not see its own history, so it needs to rely on entirely concrete and actionable advice from you to avoid the same mistake on the same or similar inputs. """ - program_code: str = ... modules_defn: str = ... program_inputs: str = ... @@ -28,15 +27,9 @@ class OfferFeedback(Signature): discussion: str = ... advice: dict[str, str] = ... + class Refine(Module): - def __init__( - self, - module: Module, - N: int, - reward_fn: Callable[[dict, Prediction], float], - threshold: float, - fail_count: Optional[int] = ..., - ) -> None: + def __init__(self, module: Module, N: int, reward_fn: Callable[[dict, Prediction], float], threshold: float, fail_count: Optional[int] = ...) -> None: """ Refines a module by running it up to N times with different temperatures and returns the best prediction. @@ -74,13 +67,15 @@ class Refine(Module): ``` """ ... - - def forward(self, **kwargs): # -> None: + + def forward(self, **kwargs): # -> object | Any | None: ... + -def inspect_modules(program): # -> str: + +def inspect_modules(program): # -> str: ... -def recursive_mask( - o, -): # -> dict[Any, Any | dict[Any, Any] | list[Any | dict[Any, Any] | list[Any] | tuple[Any, ...] | str] | tuple[Any, ...] | str] | list[Any | dict[Any, Any | dict[Any, Any] | list[Any] | tuple[Any, ...] | str] | list[Any] | tuple[Any, ...] | str] | tuple[Any, ...] | str: + +def recursive_mask(o): # -> dict[Any, Any | dict[Any, Any] | list[Any | dict[Any, Any] | list[Any] | tuple[Any, ...] | str] | tuple[Any, ...] | str] | list[Any | dict[Any, Any | dict[Any, Any] | list[Any] | tuple[Any, ...] | str] | list[Any] | tuple[Any, ...] | str] | tuple[Any, ...] | str: ... + diff --git a/typings/dspy/predict/retry.pyi b/typings/dspy/predict/retry.pyi index cea7ef9..006bc27 100644 --- a/typings/dspy/predict/retry.pyi +++ b/typings/dspy/predict/retry.pyi @@ -1,3 +1,4 @@ """ This type stub file was generated by pyright. """ + diff --git a/typings/dspy/primitives/__init__.pyi b/typings/dspy/primitives/__init__.pyi index cd1ae6d..081febc 100644 --- a/typings/dspy/primitives/__init__.pyi +++ b/typings/dspy/primitives/__init__.pyi @@ -2,20 +2,10 @@ This type stub file was generated by pyright. """ -from dspy.primitives import assertions +from dspy.primitives.base_module import BaseModule from dspy.primitives.example import Example -from dspy.primitives.module import BaseModule +from dspy.primitives.module import Module from dspy.primitives.prediction import Completions, Prediction -from dspy.primitives.program import Module, Program from dspy.primitives.python_interpreter import PythonInterpreter -__all__ = [ - "assertions", - "Example", - "BaseModule", - "Prediction", - "Completions", - "Program", - "Module", - "PythonInterpreter", -] +__all__ = ["Example", "BaseModule", "Prediction", "Completions", "Module", "PythonInterpreter"] diff --git a/typings/dspy/primitives/assertions.pyi b/typings/dspy/primitives/assertions.pyi deleted file mode 100644 index cea7ef9..0000000 --- a/typings/dspy/primitives/assertions.pyi +++ /dev/null @@ -1,3 +0,0 @@ -""" -This type stub file was generated by pyright. -""" diff --git a/typings/dspy/primitives/base_module.pyi b/typings/dspy/primitives/base_module.pyi new file mode 100644 index 0000000..08073b5 --- /dev/null +++ b/typings/dspy/primitives/base_module.pyi @@ -0,0 +1,87 @@ +""" +This type stub file was generated by pyright. +""" + +from collections.abc import Generator + +logger = ... +class BaseModule: + def __init__(self) -> None: + ... + + def named_parameters(self): # -> list[Any]: + """ + Unlike PyTorch, handles (non-recursive) lists of parameters too. + """ + ... + + def named_sub_modules(self, type_=..., skip_compiled=...) -> Generator[tuple[str, BaseModule], None, None]: + """Find all sub-modules in the module, as well as their names. + + Say self.children[4]['key'].sub_module is a sub-module. Then the name will be + 'children[4][key].sub_module'. But if the sub-module is accessible at different + paths, only one of the paths will be returned. + """ + ... + + def parameters(self): # -> list[Any]: + ... + + def deepcopy(self): # -> Self: + """Deep copy the module. + + This is a tweak to the default python deepcopy that only deep copies `self.parameters()`, and for other + attributes, we just do the shallow copy. + """ + ... + + def reset_copy(self): # -> Self: + """Deep copy the module and reset all parameters.""" + ... + + def dump_state(self): # -> dict[Any, Any]: + ... + + def load_state(self, state): # -> None: + ... + + def save(self, path, save_program=..., modules_to_serialize=...): # -> None: + """Save the module. + + Save the module to a directory or a file. There are two modes: + - `save_program=False`: Save only the state of the module to a json or pickle file, based on the value of + the file extension. + - `save_program=True`: Save the whole module to a directory via cloudpickle, which contains both the state and + architecture of the model. + + If `save_program=True` and `modules_to_serialize` are provided, it will register those modules for serialization + with cloudpickle's `register_pickle_by_value`. This causes cloudpickle to serialize the module by value rather + than by reference, ensuring the module is fully preserved along with the saved program. This is useful + when you have custom modules that need to be serialized alongside your program. If None, then no modules + will be registered for serialization. + + We also save the dependency versions, so that the loaded model can check if there is a version mismatch on + critical dependencies or DSPy version. + + Args: + path (str): Path to the saved state file, which should be a .json or .pkl file when `save_program=False`, + and a directory when `save_program=True`. + save_program (bool): If True, save the whole module to a directory via cloudpickle, otherwise only save + the state. + modules_to_serialize (list): A list of modules to serialize with cloudpickle's `register_pickle_by_value`. + If None, then no modules will be registered for serialization. + + """ + ... + + def load(self, path): # -> None: + """Load the saved module. You may also want to check out dspy.load, if you want to + load an entire program, not just the state for an existing program. + + Args: + path (str): Path to the saved state file, which should be a .json or a .pkl file + """ + ... + + + diff --git a/typings/dspy/primitives/example.pyi b/typings/dspy/primitives/example.pyi index b3ae28c..eb7bc3d 100644 --- a/typings/dspy/primitives/example.pyi +++ b/typings/dspy/primitives/example.pyi @@ -3,43 +3,74 @@ This type stub file was generated by pyright. """ class Example: - def __init__(self, base=..., **kwargs) -> None: ... - def __getattr__(self, key): ... - def __setattr__(self, key, value): # -> None: + def __init__(self, base=..., **kwargs) -> None: ... - def __getitem__(self, key): ... - def __setitem__(self, key, value): # -> None: + + def __getattr__(self, key): ... - def __delitem__(self, key): # -> None: + + def __setattr__(self, key, value): # -> None: ... - def __contains__(self, key): # -> bool: + + def __getitem__(self, key): ... - def __len__(self): # -> int: + + def __setitem__(self, key, value): # -> None: ... - def __repr__(self): # -> str: + + def __delitem__(self, key): # -> None: ... - def __str__(self) -> str: ... - def __eq__(self, other) -> bool: ... - def __hash__(self) -> int: ... - def keys(self, include_dspy=...): # -> list[Any]: + + def __contains__(self, key): # -> bool: ... - def values(self, include_dspy=...): # -> list[Any]: + + def __len__(self): # -> int: ... - def items(self, include_dspy=...): # -> list[tuple[Any, Any]]: + + def __repr__(self): # -> str: ... - def get(self, key, default=...): # -> None: + + def __str__(self) -> str: ... - def with_inputs(self, *keys): # -> Self: + + def __eq__(self, other) -> bool: ... - def inputs(self): # -> Self: + + def __hash__(self) -> int: ... - def labels(self): # -> Self: + + def keys(self, include_dspy=...): # -> list[Any]: ... - def __iter__(self): # -> Iterator[Any]: + + def values(self, include_dspy=...): # -> list[Any]: ... - def copy(self, **kwargs): # -> Self: + + def items(self, include_dspy=...): # -> list[tuple[Any, Any]]: ... - def without(self, *keys): # -> Self: + + def get(self, key, default=...): # -> None: ... - def toDict(self): # -> dict[Any, Any]: + + def with_inputs(self, *keys): # -> Self: ... + + def inputs(self): # -> Self: + ... + + def labels(self): # -> Self: + ... + + def __iter__(self): # -> Iterator[Any]: + ... + + def copy(self, **kwargs): # -> Self: + ... + + def without(self, *keys): # -> Self: + ... + + def toDict(self): # -> dict[Any, Any]: + ... + + + diff --git a/typings/dspy/primitives/module.pyi b/typings/dspy/primitives/module.pyi index 1858011..9111394 100644 --- a/typings/dspy/primitives/module.pyi +++ b/typings/dspy/primitives/module.pyi @@ -2,83 +2,76 @@ This type stub file was generated by pyright. """ -from collections.abc import Generator +from typing import Optional +from dspy.primitives.base_module import BaseModule +from dspy.primitives.example import Example +from dspy.utils.callback import with_callbacks logger = ... - -class BaseModule: - def __init__(self) -> None: ... - def named_parameters(self): # -> list[Any]: - """ - Unlike PyTorch, handles (non-recursive) lists of parameters too. - """ +class ProgramMeta(type): + """Metaclass ensuring every ``dspy.Module`` instance is properly initialised.""" + def __call__(cls, *args, **kwargs): ... + - def named_sub_modules( - self, type_=..., skip_compiled=... - ) -> Generator[tuple[str, BaseModule], None, None]: - """Find all sub-modules in the module, as well as their names. - Say self.children[4]['key'].sub_module is a sub-module. Then the name will be - 'children[4][key].sub_module'. But if the sub-module is accessible at different - paths, only one of the paths will be returned. - """ +class Module(BaseModule, metaclass=ProgramMeta): + def __init__(self, callbacks=...) -> None: ... - - def parameters(self): # -> list[Any]: + + @with_callbacks + def __call__(self, *args, **kwargs): # -> object | Any: ... - def deepcopy(self): # -> Self: - """Deep copy the module. - - This is a tweak to the default python deepcopy that only deep copies `self.parameters()`, and for other - attributes, we just do the shallow copy. - """ + + @with_callbacks + async def acall(self, *args, **kwargs): # -> Any: ... - - def reset_copy(self): # -> Self: - """Deep copy the module and reset all parameters.""" + + def named_predictors(self): # -> list[tuple[Any, Predict]]: ... - - def dump_state(self): # -> dict[Any, Any]: + + def predictors(self): # -> list[Predict]: ... - def load_state(self, state): # -> None: + + def set_lm(self, lm): # -> None: ... - def save(self, path, save_program=..., modules_to_serialize=...): # -> None: - """Save the module. - - Save the module to a directory or a file. There are two modes: - - `save_program=False`: Save only the state of the module to a json or pickle file, based on the value of - the file extension. - - `save_program=True`: Save the whole module to a directory via cloudpickle, which contains both the state and - architecture of the model. - - If `save_program=True` and `modules_to_serialize` are provided, it will register those modules for serialization - with cloudpickle's `register_pickle_by_value`. This causes cloudpickle to serialize the module by value rather - than by reference, ensuring the module is fully preserved along with the saved program. This is useful - when you have custom modules that need to be serialized alongside your program. If None, then no modules - will be registered for serialization. - - We also save the dependency versions, so that the loaded model can check if there is a version mismatch on - critical dependencies or DSPy version. + + def get_lm(self): # -> LM | None: + ... + + def __repr__(self): # -> LiteralString: + ... + + def map_named_predictors(self, func): # -> Self: + """Applies a function to all named predictors.""" + ... + + def inspect_history(self, n: int = ...): # -> None: + ... + + def batch(self, examples: list[Example], num_threads: Optional[int] = ..., max_errors: Optional[int] = ..., return_failed_examples: bool = ..., provide_traceback: Optional[bool] = ..., disable_progress_bar: bool = ...): # -> tuple[Any, Any, Any] | List[Any]: + """ + Processes a list of dspy.Example instances in parallel using the Parallel module. Args: - path (str): Path to the saved state file, which should be a .json or .pkl file when `save_program=False`, - and a directory when `save_program=True`. - save_program (bool): If True, save the whole module to a directory via cloudpickle, otherwise only save - the state. - modules_to_serialize (list): A list of modules to serialize with cloudpickle's `register_pickle_by_value`. - If None, then no modules will be registered for serialization. - + examples: List of dspy.Example instances to process. + num_threads: Number of threads to use for parallel processing. + max_errors: Maximum number of errors allowed before stopping execution. + If ``None``, inherits from ``dspy.settings.max_errors``. + return_failed_examples: Whether to return failed examples and exceptions. + provide_traceback: Whether to include traceback information in error logs. + disable_progress_bar: Whether to display the progress bar. + + Returns: + List of results, and optionally failed examples and exceptions. """ ... + + def __getattribute__(self, name): # -> Callable[..., object] | Any: + ... + - def load(self, path): # -> None: - """Load the saved module. You may also want to check out dspy.load, if you want to - load an entire program, not just the state for an existing program. - Args: - path (str): Path to the saved state file, which should be a .json or a .pkl file - """ - ... +def set_attribute_by_name(obj, name, value): # -> None: + ... -def postprocess_parameter_name(name, value): ... diff --git a/typings/dspy/primitives/prediction.pyi b/typings/dspy/primitives/prediction.pyi index ccf7bd9..0298384 100644 --- a/typings/dspy/primitives/prediction.pyi +++ b/typings/dspy/primitives/prediction.pyi @@ -5,46 +5,94 @@ This type stub file was generated by pyright. from dspy.primitives.example import Example class Prediction(Example): - def __init__(self, *args, **kwargs) -> None: ... - def get_lm_usage(self): # -> None: + """A prediction object that contains the output of a DSPy module. + + Prediction inherits from Example. + + To allow feedback-augmented scores, Prediction supports comparison operations + (<, >, <=, >=) for Predictions with a `score` field. The comparison operations + compare the 'score' values as floats. For equality comparison, Predictions are equal + if their underlying data stores are equal (inherited from Example). + + Arithmetic operations (+, /, etc.) are also supported for Predictions with a 'score' + field, operating on the score value. + """ + def __init__(self, *args, **kwargs) -> None: ... - def set_lm_usage(self, value): # -> None: + + def get_lm_usage(self): # -> None: ... + + def set_lm_usage(self, value): # -> None: + ... + @classmethod - def from_completions(cls, list_or_dict, signature=...): # -> Self: + def from_completions(cls, list_or_dict, signature=...): # -> Self: + ... + + def __repr__(self): # -> str: + ... + + def __str__(self) -> str: + ... + + def __float__(self): # -> float: + ... + + def __add__(self, other): # -> float: + ... + + def __radd__(self, other): # -> float: ... - def __repr__(self): # -> str: + + def __truediv__(self, other): # -> float: ... - def __str__(self) -> str: ... - def __float__(self): # -> float: + + def __rtruediv__(self, other): # -> float: ... - def __add__(self, other): # -> float: + + def __lt__(self, other) -> bool: ... - def __radd__(self, other): # -> float: + + def __le__(self, other) -> bool: ... - def __truediv__(self, other): # -> float: + + def __gt__(self, other) -> bool: ... - def __rtruediv__(self, other): # -> float: + + def __ge__(self, other) -> bool: ... - def __lt__(self, other) -> bool: ... - def __le__(self, other) -> bool: ... - def __gt__(self, other) -> bool: ... - def __ge__(self, other) -> bool: ... + @property - def completions(self): # -> None: + def completions(self): # -> None: ... + + class Completions: - def __init__(self, list_or_dict, signature=...) -> None: ... - def items(self): # -> dict_items[Any, Any]: + def __init__(self, list_or_dict, signature=...) -> None: + ... + + def items(self): # -> dict_items[Any, Any]: + ... + + def __getitem__(self, key): # -> Prediction: ... - def __getitem__(self, key): # -> Prediction: + + def __getattr__(self, name): ... - def __getattr__(self, name): ... - def __len__(self): # -> int: + + def __len__(self): # -> int: ... - def __contains__(self, key): # -> bool: + + def __contains__(self, key): # -> bool: ... - def __repr__(self): # -> str: + + def __repr__(self): # -> str: ... - def __str__(self) -> str: ... + + def __str__(self) -> str: + ... + + + diff --git a/typings/dspy/primitives/program.pyi b/typings/dspy/primitives/program.pyi deleted file mode 100644 index 1c3dd4a..0000000 --- a/typings/dspy/primitives/program.pyi +++ /dev/null @@ -1,63 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Optional -from dspy.primitives.module import BaseModule -from dspy.utils.callback import with_callbacks - -class ProgramMeta(type): - """Metaclass ensuring every ``dspy.Module`` instance is properly initialised.""" - def __call__(cls, *args, **kwargs): ... - -class Module(BaseModule, metaclass=ProgramMeta): - def __init__(self, callbacks=...) -> None: ... - @with_callbacks - def __call__(self, *args, **kwargs): ... - @with_callbacks - async def acall(self, *args, **kwargs): ... - def named_predictors(self): # -> list[tuple[Any, Predict]]: - ... - def predictors(self): # -> list[Predict]: - ... - def set_lm(self, lm): # -> None: - ... - def get_lm(self): # -> LM | None: - ... - def __repr__(self): # -> LiteralString: - ... - def map_named_predictors(self, func): # -> Self: - """Applies a function to all named predictors.""" - ... - - def inspect_history(self, n: int = ...): # -> None: - ... - def batch( - self, - examples, - num_threads: Optional[int] = ..., - max_errors: int = ..., - return_failed_examples: bool = ..., - provide_traceback: Optional[bool] = ..., - disable_progress_bar: bool = ..., - ): # -> tuple[Any, Any, Any] | List[Any]: - """ - Processes a list of dspy.Example instances in parallel using the Parallel module. - - Args: - examples: List of dspy.Example instances to process. - num_threads: Number of threads to use for parallel processing. - max_errors: Maximum number of errors allowed before stopping execution. - return_failed_examples: Whether to return failed examples and exceptions. - provide_traceback: Whether to include traceback information in error logs. - disable_progress_bar: Whether to display the progress bar. - - Returns: - List of results, and optionally failed examples and exceptions. - """ - ... - -def set_attribute_by_name(obj, name, value): # -> None: - ... - -Program = Module diff --git a/typings/dspy/primitives/python_interpreter.pyi b/typings/dspy/primitives/python_interpreter.pyi index 23ff130..752ab02 100644 --- a/typings/dspy/primitives/python_interpreter.pyi +++ b/typings/dspy/primitives/python_interpreter.pyi @@ -2,10 +2,13 @@ This type stub file was generated by pyright. """ +from os import PathLike from types import TracebackType -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union + +class InterpreterError(RuntimeError): + ... -class InterpreterError(RuntimeError): ... class PythonInterpreter: r""" @@ -21,16 +24,32 @@ class PythonInterpreter: output = interp(code_string) # If final statement is non-None, prints the numeric result, else prints captured output ``` """ - def __init__(self, deno_command: Optional[List[str]] = ...) -> None: ... - def execute(self, code: str, variables: Optional[Dict[str, Any]] = ...) -> Any: ... - def __enter__(self): # -> Self: + def __init__(self, deno_command: Optional[List[str]] = ..., enable_read_paths: Optional[List[Union[PathLike, str]]] = ..., enable_write_paths: Optional[List[Union[PathLike, str]]] = ..., enable_env_vars: Optional[List[str]] = ..., enable_network_access: Optional[List[str]] = ..., sync_files: bool = ...) -> None: + """ + Args: + deno_command: command list to launch Deno. + enable_read_paths: Files or directories to allow reading from in the sandbox. + enable_write_paths: Files or directories to allow writing to in the sandbox. + enable_env_vars: Environment variable names to allow in the sandbox. + enable_network_access: Domains or IPs to allow network access in the sandbox. + sync_files: If set, syncs changes within the sandbox back to original files after execution. + """ + ... + + def execute(self, code: str, variables: Optional[Dict[str, Any]] = ...) -> Any: + ... + + def __enter__(self): # -> Self: + ... + + def __exit__(self, _exc_type: Optional[type[BaseException]], _exc_val: Optional[BaseException], _exc_tb: Optional[TracebackType]): # -> None: ... - def __exit__( - self, - _exc_type: Optional[type[BaseException]], - _exc_val: Optional[BaseException], - _exc_tb: Optional[TracebackType], - ): # -> None: + + def __call__(self, code: str, variables: Optional[Dict[str, Any]] = ...) -> Any: ... - def __call__(self, code: str, variables: Optional[Dict[str, Any]] = ...) -> Any: ... - def shutdown(self) -> None: ... + + def shutdown(self) -> None: + ... + + + diff --git a/typings/dspy/propose/dataset_summary_generator.pyi b/typings/dspy/propose/dataset_summary_generator.pyi index e983e02..ca5d86b 100644 --- a/typings/dspy/propose/dataset_summary_generator.pyi +++ b/typings/dspy/propose/dataset_summary_generator.pyi @@ -6,34 +6,26 @@ import dspy class ObservationSummarizer(dspy.Signature): """Given a series of observations I have made about my dataset, please summarize them into a brief 2-3 sentence summary which highlights only the most important details.""" - observations = ... summary = ... -class DatasetDescriptor(dspy.Signature): - ( - """Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """ - """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """ - """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative""" - ) +class DatasetDescriptor(dspy.Signature): + """Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """ """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """ """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative""" examples = ... observations = ... -class DatasetDescriptorWithPriorObservations(dspy.Signature): - ( - """Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """ - """I will also provide you with a few observations I have already made. Please add your own observations or if you feel the observations are comprehensive say 'COMPLETE' """ - """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """ - """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative""" - ) +class DatasetDescriptorWithPriorObservations(dspy.Signature): + """Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """ """I will also provide you with a few observations I have already made. Please add your own observations or if you feel the observations are comprehensive say 'COMPLETE' """ """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """ """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative""" examples = ... prior_observations = ... observations = ... -def order_input_keys_in_string(unordered_repr): # -> str: + +def order_input_keys_in_string(unordered_repr): # -> str: + ... + +def create_dataset_summary(trainset, view_data_batch_size, prompt_model, log_file=..., verbose=...): ... -def create_dataset_summary( - trainset, view_data_batch_size, prompt_model, log_file=..., verbose=... -): ... + diff --git a/typings/dspy/propose/grounded_proposer.pyi b/typings/dspy/propose/grounded_proposer.pyi index af0b8ef..02f5924 100644 --- a/typings/dspy/propose/grounded_proposer.pyi +++ b/typings/dspy/propose/grounded_proposer.pyi @@ -7,85 +7,49 @@ from dspy.propose.propose_base import Proposer MAX_INSTRUCT_IN_HISTORY = ... TIPS = ... - class DescribeProgram(dspy.Signature): """Below is some pseudo-code for a pipeline that solves tasks with calls to language models. Please describe what type of task this program appears to be designed to solve, and how it appears to work.""" - program_code = ... program_example = ... program_description = ... + class DescribeModule(dspy.Signature): """Below is some pseudo-code for a pipeline that solves tasks with calls to language models. Please describe the purpose of one of the specified module in this pipeline.""" - program_code = ... program_example = ... program_description = ... module = ... module_description = ... -def generate_instruction_class( - use_dataset_summary=..., - program_aware=..., - use_task_demos=..., - use_instruct_history=..., - use_tip=..., -): # -> Predict: + +def generate_instruction_class(use_dataset_summary=..., program_aware=..., use_task_demos=..., use_instruct_history=..., use_tip=...): # -> Predict: class GenerateSingleModuleInstruction(dspy.Signature): """Use the information below to learn about a task that we are trying to solve using calls to an LM, then generate a new instruction that will be used to prompt a Language Model to better solve the task.""" - ... + + class GenerateModuleInstruction(dspy.Module): - def __init__( - self, - program_code_string=..., - use_dataset_summary=..., - program_aware=..., - use_task_demos=..., - use_instruct_history=..., - use_tip=..., - verbose=..., - ) -> None: ... - def forward( - self, - demo_candidates, - pred_i, - demo_set_i, - program, - previous_instructions, - data_summary, - num_demos_in_context=..., - tip=..., - ): # -> Prediction: + def __init__(self, program_code_string=..., use_dataset_summary=..., program_aware=..., use_task_demos=..., use_instruct_history=..., use_tip=..., verbose=...) -> None: ... + + def forward(self, demo_candidates, pred_i, demo_set_i, program, previous_instructions, data_summary, num_demos_in_context=..., tip=...): # -> Prediction: + ... + + class GroundedProposer(Proposer): - def __init__( - self, - prompt_model, - program, - trainset, - view_data_batch_size=..., - use_dataset_summary=..., - program_aware=..., - use_task_demos=..., - num_demos_in_context=..., - use_instruct_history=..., - use_tip=..., - set_tip_randomly=..., - set_history_randomly=..., - verbose=..., - rng=..., - ) -> None: ... - def propose_instructions_for_program( - self, trainset, program, demo_candidates, trial_logs, N, T - ) -> list[str]: + def __init__(self, prompt_model, program, trainset, view_data_batch_size=..., use_dataset_summary=..., program_aware=..., use_task_demos=..., num_demos_in_context=..., use_instruct_history=..., use_tip=..., set_tip_randomly=..., set_history_randomly=..., verbose=..., rng=...) -> None: + ... + + def propose_instructions_for_program(self, trainset, program, demo_candidates, trial_logs, N, T) -> list[str]: """This method is responsible for returning the full set of new instructions for our program, given the specified criteria.""" ... - - def propose_instruction_for_predictor( - self, program, predictor, pred_i, T, demo_candidates, demo_set_i, trial_logs, tip=... - ) -> str: + + def propose_instruction_for_predictor(self, program, predictor, pred_i, T, demo_candidates, demo_set_i, trial_logs, tip=...) -> str: """This method is responsible for returning a single instruction for a given predictor, using the specified criteria.""" ... + + + diff --git a/typings/dspy/propose/propose_base.pyi b/typings/dspy/propose/propose_base.pyi index 1a92cbe..8085a69 100644 --- a/typings/dspy/propose/propose_base.pyi +++ b/typings/dspy/propose/propose_base.pyi @@ -5,9 +5,15 @@ This type stub file was generated by pyright. from abc import ABC, abstractmethod class Proposer(ABC): - def __init__(self) -> None: ... + def __init__(self) -> None: + ... + @abstractmethod - def propose_instructions_for_program(self): # -> None: + def propose_instructions_for_program(self): # -> None: ... - def propose_instruction_for_predictor(self): # -> None: + + def propose_instruction_for_predictor(self): # -> None: ... + + + diff --git a/typings/dspy/propose/utils.pyi b/typings/dspy/propose/utils.pyi index a90a853..6033ee7 100644 --- a/typings/dspy/propose/utils.pyi +++ b/typings/dspy/propose/utils.pyi @@ -2,19 +2,24 @@ This type stub file was generated by pyright. """ -def strip_prefix(text): # -> str: +def strip_prefix(text): # -> str: ... -def create_instruction_set_history_string(base_program, trial_logs, top_n): # -> str: + +def create_instruction_set_history_string(base_program, trial_logs, top_n): # -> str: ... -def parse_list_of_instructions(instruction_string): # -> Any | list[Any]: + +def parse_list_of_instructions(instruction_string): # -> Any | list[Any]: ... -def get_program_instruction_set_string(program): # -> LiteralString: + +def get_program_instruction_set_string(program): # -> LiteralString: ... -def create_predictor_level_history_string( - base_program, predictor_i, trial_logs, top_n -): # -> Literal['']: + +def create_predictor_level_history_string(base_program, predictor_i, trial_logs, top_n): # -> Literal['']: ... -def create_example_string(fields, example): # -> LiteralString: + +def create_example_string(fields, example): # -> LiteralString: ... -def get_dspy_source_code(module): # -> str: + +def get_dspy_source_code(module): # -> str: ... + diff --git a/typings/dspy/retrieve/__init__.pyi b/typings/dspy/retrieve/__init__.pyi deleted file mode 100644 index b68692c..0000000 --- a/typings/dspy/retrieve/__init__.pyi +++ /dev/null @@ -1,7 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.retrieve.retrieve import Retrieve - -__all__ = ["Retrieve"] diff --git a/typings/dspy/retrieve/azureaisearch_rm.pyi b/typings/dspy/retrieve/azureaisearch_rm.pyi deleted file mode 100644 index 19516fa..0000000 --- a/typings/dspy/retrieve/azureaisearch_rm.pyi +++ /dev/null @@ -1,231 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -import openai -from typing import Any, Callable, List, Optional, Union -from azure.search.documents import SearchClient -from azure.search.documents._paging import SearchItemPaged -from azure.search.documents.models import QueryType, VectorFilterMode - -""" -Retriever module for Azure AI Search -Author: Prajapati Harishkumar Kishorkumar (@HARISHKUMAR1112001) -""" - -class AzureAISearchRM(dspy.Retrieve): - """ - A retrieval module that utilizes Azure AI Search to retrieve top passages for a given query. - - Args: - search_service_name (str): The name of the Azure AI Search service. - search_api_key (str): The API key for accessing the Azure AI Search service. - search_index_name (str): The name of the search index in the Azure AI Search service. - field_text (str): The name of the field containing text content in the search index. This field will be mapped to the "content" field in the dsp framework. - field_vector (Optional[str]): The name of the field containing vector content in the search index. Defaults to None. - k (int, optional): The default number of top passages to retrieve. Defaults to 3. - azure_openai_client (Optional[openai.AzureOpenAI]): An instance of the AzureOpenAI client. Either openai_client or embedding_func must be provided. Defaults to None. - openai_embed_model (Optional[str]): The name of the OpenAI embedding model. Defaults to "text-embedding-ada-002". - embedding_func (Optional[Callable]): A function for generating embeddings. Either openai_client or embedding_func must be provided. Defaults to None. - semantic_ranker (bool, optional): Whether to use semantic ranking. Defaults to False. - filter (str, optional): Additional filter query. Defaults to None. - query_language (str, optional): The language of the query. Defaults to "en-Us". - query_speller (str, optional): The speller mode. Defaults to "lexicon". - use_semantic_captions (bool, optional): Whether to use semantic captions. Defaults to False. - query_type (Optional[QueryType], optional): The type of query. Defaults to QueryType.FULL. - semantic_configuration_name (str, optional): The name of the semantic configuration. Defaults to None. - is_vector_search (Optional[bool]): Whether to enable vector search. Defaults to False. - is_hybrid_search (Optional[bool]): Whether to enable hybrid search. Defaults to False. - is_fulltext_search (Optional[bool]): Whether to enable fulltext search. Defaults to True. - vector_filter_mode (Optional[VectorFilterMode]): The vector filter mode. Defaults to None. - - Examples: - Below is a code snippet that demonstrates how to instantiate and use the AzureAISearchRM class: - ```python - search_service_name = "your_search_service_name" - search_api_key = "your_search_api_key" - search_index_name = "your_search_index_name" - field_text = "text_content_field" - - azure_search_retriever = AzureAISearchRM(search_service_name, search_api_key, search_index_name, field_text) - ``` - - Attributes: - search_service_name (str): The name of the Azure AI Search service. - search_api_key (str): The API key for accessing the Azure AI Search service. - search_index_name (str): The name of the search index in the Azure AI Search service. - endpoint (str): The endpoint URL for the Azure AI Search service. - field_text (str): The name of the field containing text content in the search index. - field_vector (Optional[str]): The name of the field containing vector content in the search index. - azure_openai_client (Optional[openai.AzureOpenAI]): An instance of the AzureOpenAI client. - openai_embed_model (Optional[str]): The name of the OpenAI embedding model. - embedding_func (Optional[Callable]): A function for generating embeddings. - credential (AzureKeyCredential): The Azure key credential for accessing the service. - client (SearchClient): The Azure AI Search client instance. - semantic_ranker (bool): Whether to use semantic ranking. - filter (str): Additional filter query. - query_language (str): The language of the query. - query_speller (str): The speller mode. - use_semantic_captions (bool): Whether to use semantic captions. - query_type (Optional[QueryType]): The type of query. - semantic_configuration_name (str): The name of the semantic configuration. - is_vector_search (Optional[bool]): Whether to enable vector search. - is_hybrid_search (Optional[bool]): Whether to enable hybrid search. - is_fulltext_search (Optional[bool]): Whether to enable fulltext search. - vector_filter_mode (Optional[VectorFilterMode]): The vector filter mode. - - Methods: - forward(query_or_queries: Union[str, List[str]], k: Optional[int]) -> dspy.Prediction: - Search for the top passages corresponding to the given query or queries. - - azure_search_request( - self, - key_content: str, - client: SearchClient, - query: str, - top: int, - semantic_ranker: bool, - filter: str, - query_language: str, - query_speller: str, - use_semantic_captions: bool, - query_type: QueryType, - semantic_configuration_name: str, - is_vector_search: bool, - is_hybrid_search: bool, - is_fulltext_search: bool, - field_vector: str, - vector_filter_mode: VectorFilterMode - ) -> List[dict]: - Perform a search request to the Azure AI Search service. - - process_azure_result( - self, - results:SearchItemPaged, - content_key:str, - content_score: str - ) -> List[dict]: - Process the results received from the Azure AI Search service and map them to the correct format. - - get_embeddings( - self, - query: str, - k_nearest_neighbors: int, - field_vector: str - ) -> List | Any: - Returns embeddings for the given query. - - check_semantic_configuration( - self, - semantic_configuration_name, - query_type - ): - Checks semantic configuration. - - Raises: - ImportError: If the required Azure AI Search libraries are not installed. - - Note: - This class relies on the 'azure-search-documents' library for interacting with the Azure AI Search service. - Ensure that you have the necessary permissions and correct configurations set up in Azure before using this class. - """ - def __init__( - self, - search_service_name: str, - search_api_key: str, - search_index_name: str, - field_text: str, - field_vector: Optional[str] = ..., - k: int = ..., - azure_openai_client: Optional[openai.AzureOpenAI] = ..., - openai_embed_model: Optional[str] = ..., - embedding_func: Optional[Callable] = ..., - semantic_ranker: bool = ..., - filter: str = ..., - query_language: str = ..., - query_speller: str = ..., - use_semantic_captions: bool = ..., - query_type: Optional[QueryType] = ..., - semantic_configuration_name: str = ..., - is_vector_search: Optional[bool] = ..., - is_hybrid_search: Optional[bool] = ..., - is_fulltext_search: Optional[bool] = ..., - vector_filter_mode: Optional[VectorFilterMode.PRE_FILTER] = ..., - ) -> None: ... - def azure_search_request( - self, - key_content: str, - client: SearchClient, - query: str, - top: int, - semantic_ranker: bool, - filter: str, - query_language: str, - query_speller: str, - use_semantic_captions: bool, - query_type: QueryType, - semantic_configuration_name: str, - is_vector_search: bool, - is_hybrid_search: bool, - is_fulltext_search: bool, - field_vector: str, - vector_filter_mode: VectorFilterMode, - ): # -> list[Any]: - """ - Search in Azure AI Search Index - """ - ... - - def process_azure_result( - self, results: SearchItemPaged, content_key: str, content_score: str - ): # -> list[Any]: - """ - process received result from Azure AI Search as dictionary array and map content and score to correct format - """ - ... - - def forward(self, query_or_queries: Union[str, List[str]], k: Optional[int]) -> dspy.Prediction: - """ - Search with pinecone for self.k top passages for query - - Args: - query_or_queries (Union[str, List[str]]): The query or queries to search for. - - Returns: - dspy.Prediction: An object containing the retrieved passages. - """ - ... - - def get_embeddings(self, query: str, k_nearest_neighbors: int, field_vector: str) -> List | Any: - """ - Returns embeddings for the given query. - - Args: - query (str): The query for which embeddings are to be retrieved. - k_nearest_neighbors (int): The number of nearest neighbors to consider. - field_vector (str): The field vector to use for embeddings. - - Returns: - list: A list containing the vectorized query. - Any: The result of embedding_func if azure_openai_client is not provided. - - Raises: - AssertionError: If neither azure_openai_client nor embedding_func is provided, - or if field_vector is not provided. - """ - ... - - def check_semantic_configuration(self, semantic_configuration_name, query_type): # -> None: - """ - Checks semantic configuration. - - Args: - semantic_configuration_name: The name of the semantic configuration. - query_type: The type of the query. - - Raises: - AssertionError: If semantic_configuration_name is not provided - or if query_type is not QueryType.SEMANTIC. - """ - ... diff --git a/typings/dspy/retrieve/chromadb_rm.pyi b/typings/dspy/retrieve/chromadb_rm.pyi deleted file mode 100644 index b628a41..0000000 --- a/typings/dspy/retrieve/chromadb_rm.pyi +++ /dev/null @@ -1,79 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import chromadb -from typing import List, Optional, Union -from dspy import Prediction, Retrieve -from chromadb.api.types import Embeddable, EmbeddingFunction - -""" -Retriever model for chromadb -""" -ERRORS = ... - -class ChromadbRM(Retrieve): - """ - A retrieval module that uses chromadb to return the top passages for a given query. - - Assumes that the chromadb index has been created and populated with the following metadata: - - documents: The text of the passage - - Args: - collection_name (str): chromadb collection name - persist_directory (str): chromadb persist directory - embedding_function (Optional[EmbeddingFunction[Embeddable]]): Optional function to use to embed documents. Defaults to DefaultEmbeddingFunction. - k (int, optional): The number of top passages to retrieve. Defaults to 7. - client(Optional[chromadb.Client]): Optional chromadb client provided by user, default to None - - Returns: - dspy.Prediction: An object containing the retrieved passages. - - Examples: - Below is a code snippet that shows how to use this as the default retriever: - ```python - llm = dspy.OpenAI(model="gpt-3.5-turbo") - # using default chromadb client - retriever_model = ChromadbRM('collection_name', 'db_path') - dspy.settings.configure(lm=llm, rm=retriever_model) - # to test the retriever with "my query" - retriever_model("my query") - ``` - - Use provided chromadb client - ```python - import chromadb - llm = dspy.OpenAI(model="gpt-3.5-turbo") - # say you have a chromadb running on a different port - client = chromadb.HttpClient(host='localhost', port=8889) - retriever_model = ChromadbRM('collection_name', 'db_path', client=client) - dspy.settings.configure(lm=llm, rm=retriever_model) - # to test the retriever with "my query" - retriever_model("my query") - ``` - - Below is a code snippet that shows how to use this in the forward() function of a module - ```python - self.retrieve = ChromadbRM('collection_name', 'db_path', k=num_passages) - ``` - """ - def __init__( - self, - collection_name: str, - persist_directory: str, - embedding_function: Optional[EmbeddingFunction[Embeddable]] = ..., - client: Optional[chromadb.Client] = ..., - k: int = ..., - ) -> None: ... - def forward( - self, query_or_queries: Union[str, List[str]], k: Optional[int] = ..., **kwargs - ) -> Prediction: - """Search with db for self.k top passages for query - - Args: - query_or_queries (Union[str, List[str]]): The query or queries to search for. - - Returns: - dspy.Prediction: An object containing the retrieved passages. - """ - ... diff --git a/typings/dspy/retrieve/clarifai_rm.pyi b/typings/dspy/retrieve/clarifai_rm.pyi deleted file mode 100644 index 68f7dec..0000000 --- a/typings/dspy/retrieve/clarifai_rm.pyi +++ /dev/null @@ -1,53 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from typing import List, Optional, Union - -"""Clarifai as retriver to retrieve hits""" - -class ClarifaiRM(dspy.Retrieve): - """ - Retrieval module uses clarifai to return the Top K relevant pasages for the given query. - Assuming that you have ingested the source documents into clarifai App, where it is indexed and stored. - - Args: - clarifai_user_id (str): Clarifai unique user_id. - clarfiai_app_id (str): Clarifai App ID, where the documents are stored. - clarifai_pat (str): Clarifai PAT key. - k (int): Top K documents to retrieve. - - Examples: - TODO - """ - def __init__( - self, - clarifai_user_id: str, - clarfiai_app_id: str, - clarifai_pat: Optional[str] = ..., - k: int = ..., - ) -> None: ... - def retrieve_hits(self, hits): # -> str: - ... - def forward( - self, query_or_queries: Union[str, List[str]], k: Optional[int] = ..., **kwargs - ) -> dspy.Prediction: - """Uses clarifai-python SDK search function and retrieves top_k similar passages for given query, - Args: - query_or_queries : single query or list of queries - k : Top K relevant documents to return - - Returns: - passages in format of dotdict - - Examples: - Below is a code snippet that shows how to use Marqo as the default retriver: - ```python - import clarifai - llm = dspy.Clarifai(model=MODEL_URL, api_key="YOUR CLARIFAI_PAT") - retriever_model = ClarifaiRM(clarifai_user_id="USER_ID", clarfiai_app_id="APP_ID", clarifai_pat="YOUR CLARIFAI_PAT") - dspy.settings.configure(lm=llm, rm=retriever_model) - ``` - """ - ... diff --git a/typings/dspy/retrieve/deeplake_rm.pyi b/typings/dspy/retrieve/deeplake_rm.pyi deleted file mode 100644 index be840ae..0000000 --- a/typings/dspy/retrieve/deeplake_rm.pyi +++ /dev/null @@ -1,55 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from typing import List, Optional, Union - -""" -Retriever model for deeplake -""" -ERRORS = ... - -class DeeplakeRM(dspy.Retrieve): - """ - A retriever module that uses deeplake to return the top passages for a given query. - - Assumes that a Deep Lake Vector Store has been created and populated with the following payload: - - text: The text of the passage - - Args: - deeplake_vectorstore_name (str): The name or path of the Deep Lake Vector Store. - deeplake_client (VectorStore): An instance of the Deep Lake client. - k (int, optional): The default number of top passages to retrieve. Defaults to 3. - - Examples: - Below is a code snippet that shows how to use Deep Lake as the default retriver: - ```python - from deeplake import VectorStore - llm = dspy.OpenAI(model="gpt-3.5-turbo") - deeplake_client = VectorStore - retriever_model = DeeplakeRM("my_vectorstore_path", deeplake_client=deeplake_client) - dspy.settings.configure(lm=llm, rm=retriever_model) - ``` - - Below is a code snippet that shows how to use Deep Lake in the forward() function of a module - ```python - self.retrieve = DeeplakeRM("my_vectorstore_path", deeplake_client=deeplake_client, k=num_passages) - ``` - """ - def __init__(self, deeplake_vectorstore_name: str, deeplake_client, k: int = ...) -> None: ... - def embedding_function(self, texts, model=...): # -> list[List[float]]: - ... - def forward( - self, query_or_queries: Union[str, List[str]], k: Optional[int], **kwargs - ) -> dspy.Prediction: - """Search with DeepLake for self.k top passages for query - - Args: - query_or_queries (Union[str, List[str]]): The query or queries to search for. - k (Optional[int]): The number of top passages to retrieve. Defaults to self.k. - - Returns: - dspy.Prediction: An object containing the retrieved passages. - """ - ... diff --git a/typings/dspy/retrieve/epsilla_rm.pyi b/typings/dspy/retrieve/epsilla_rm.pyi deleted file mode 100644 index 7cec062..0000000 --- a/typings/dspy/retrieve/epsilla_rm.pyi +++ /dev/null @@ -1,21 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from typing import List, Union -from pyepsilla import vectordb - -class EpsillaRM(dspy.Retrieve): - def __init__( - self, - epsilla_client: vectordb.Client, - db_name: str, - db_path: str, - table_name: str, - k: int = ..., - page_content: str = ..., - ) -> None: ... - def forward( - self, query_or_queries: Union[str, List[str]], k: Union[int, None] = ..., **kwargs - ) -> dspy.Prediction: ... diff --git a/typings/dspy/retrieve/faiss_rm.pyi b/typings/dspy/retrieve/faiss_rm.pyi deleted file mode 100644 index cea7ef9..0000000 --- a/typings/dspy/retrieve/faiss_rm.pyi +++ /dev/null @@ -1,3 +0,0 @@ -""" -This type stub file was generated by pyright. -""" diff --git a/typings/dspy/retrieve/falkordb_rm.pyi b/typings/dspy/retrieve/falkordb_rm.pyi deleted file mode 100644 index b7eecba..0000000 --- a/typings/dspy/retrieve/falkordb_rm.pyi +++ /dev/null @@ -1,87 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import backoff -from typing import List, Optional, Union -from openai import APITimeoutError, InternalServerError, RateLimitError, UnprocessableEntityError -from dspy import Prediction, Retrieve -from dspy.dsp.utils.settings import settings - -def generate_random_string(length: int) -> str: ... - -class Embedder: - def __init__(self, provider: str, model: str) -> None: ... - @backoff.on_exception( - backoff.expo, - (APITimeoutError, InternalServerError, RateLimitError, UnprocessableEntityError), - max_time=settings.backoff_time, - ) - def __call__(self, queries: Union[str, List[str]]) -> List[List[float]]: ... - -DEFAULT_INDEX_QUERY = ... - -class FalkordbRM(Retrieve): - """ - Implements a retriever that utilizes FalkorDB for retrieving passages. - This class manages a connection to a FalkorDB database using official FalkorDB Python drivers and requires - the database credentials. That is, if using a local FalkorDB session, host and port else if using a FalkorDB cloud session, - host, port, username, and password to be set as environment variables and optionally the database name. - Additionally, it utilizes an embedding provider (defaulting to OpenAI's services) to compute query embeddings, - which are then used to find the most relevant nodes in the FalkorDB graph based on the specified node property or custom retrieval query. - - Returns a list of passages in the form of `dspy.Prediction` objects - - Args: - Args: - node_label (str): The label of the node in the FalkorDB database to query against - text_node_property (str): The property of the node containing the text. - embedding_node_property (List[float]): The property of the node containing the embeddings. - k (Optional[int]): The default number of top passages to retrieve. Defaults to 5. - retrieval_query (Optional[str]): Custom Cypher query for retrieving passages. - embedding_provider (str): The provider of the embedding service. Defaults to "openai". - embedding_model (str): The model identifier for generating embeddings. Defaults to "text-embedding-ada-002". - - Examples: - Below is a code snippet showcasing how to initialize FalkordbRM with environment variables for the database connection and OpenAI as the embedding provider: - - ```python - import os - - import dspy - import openai - - os.environ["FALKORDB_HOST"] = "localhost" - os.environ["FALORDB_PORT"] = "6379" - os.environ["OPENAI_API_KEY"] = "sk-" (Only if using openai as embedding's provider) - - # Uncomment and set the following if you are using FalkorDB cloud - # os.environ["FALKORDB_USERNAME"] = "falkordb" - # os.environ["FALKORDB_PASSWORD"] = "password" - - - falkordb_retriever = FalkordbRM( - node_label="myIndex", - text_node_property="text", - k=10, - embedding_provider="openai", - embedding_model="text-embedding-ada-002", - ) - - dspy.settings.configure(rm=falkordb_retriever) - ``` - - In this example, `FalkordbRM` is configured to retrieve nodes based on the "text" property from an index on a node labeled "myIndex", - using embeddings computed by OpenAI's "text-embedding-ada-002" model. - """ - def __init__( - self, - node_label: str, - text_node_property: str = ..., - embedding_node_property: str = ..., - k: int = ..., - retrieval_query: Optional[str] = ..., - embedding_provider: str = ..., - embedding_model: str = ..., - ) -> None: ... - def forward(self, query_or_queries: Union[str, List[str]], k: Optional[int]) -> Prediction: ... diff --git a/typings/dspy/retrieve/lancedb_rm.pyi b/typings/dspy/retrieve/lancedb_rm.pyi deleted file mode 100644 index 152899c..0000000 --- a/typings/dspy/retrieve/lancedb_rm.pyi +++ /dev/null @@ -1,58 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import backoff -import lancedb -from typing import List, Union -from dspy import Prediction, Retrieve -from dspy.dsp.utils.settings import settings - -""" -Retriever model for LanceDB -Author: Prashant Dixit (@PrashantDixit0) -""" -if lancedb is None: ... -OPENAI_LEGACY = ... -ERRORS = ... - -class LancedbRM(Retrieve): - """ - A retrieval module that uses LanceDB to return the top passages for a given query. - - Assumes that the LanceDB table has been created and populated with the following metadata: - - text: The text of the passage - - Args: - table_name (str): The name of the table to query against. - persist_directory (str): directory where database is stored. - k (int, optional): The number of top passages to retrieve. Defaults to 3. - - Returns: - dspy.Prediction: An object containing the retrieved passages. - - Examples: - Below is a code snippet that shows how to use this as the default retriever: - ```python - llm = dspy.OpenAI(model="gpt-3.5-turbo") - retriever_model = LancedbRM() - dspy.settings.configure(lm=llm, rm=retriever_model) - ``` - - Below is a code snippet that shows how to use this in the forward() function of a module - ```python - self.retrieve = LancedbRM(k=num_passages) - ``` - """ - def __init__(self, table_name: str, persist_directory: str, k: int = ...) -> None: ... - @backoff.on_exception(backoff.expo, ERRORS, max_time=settings.backoff_time) - def forward(self, query_or_queries: Union[str, List[str]]) -> Prediction: - """Search with Lancedb for self.k top passages for query - - Args: - query_or_queries (Union[str, List[str]]): The query or queries to search for. - - Returns: - dspy.Prediction: An object containing the retrieved passages. - """ - ... diff --git a/typings/dspy/retrieve/llama_index_rm.pyi b/typings/dspy/retrieve/llama_index_rm.pyi deleted file mode 100644 index af2715e..0000000 --- a/typings/dspy/retrieve/llama_index_rm.pyi +++ /dev/null @@ -1,58 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from typing import Optional -from llama_index.core.base.base_retriever import BaseRetriever - -NO_TOP_K_WARNING = ... - -class LlamaIndexRM(dspy.Retrieve): - """Implements a retriever which wraps over a LlamaIndex retriever. - - This is done to bridge LlamaIndex and DSPy and allow the various retrieval - abstractions in LlamaIndex to be used in DSPy. - - To-do (maybe): - - Async support (DSPy lacks this entirely it seems, so not a priority until the rest of the repo catches on) - - Text/video retrieval (Available in LI, not sure if this will be a priority in DSPy) - - Args: - retriever (BaseRetriever): A LlamaIndex retriever object - text based only - k (int): Optional; the number of examples to retrieve (similarity_top_k) - - If the underlying LI retriever does not have the property similarity_top_k, k will be ignored. - - Returns: - DSPy RM Object - this is a retriever object that can be used in DSPy - """ - - retriever: BaseRetriever - def __init__(self, retriever: BaseRetriever, k: Optional[int] = ...) -> None: ... - @property - def k(self) -> Optional[int]: - """Get similarity top k of retriever.""" - ... - - @k.setter - def k(self, k: int) -> None: - """Set similarity top k of retriever.""" - ... - - def forward(self, query: str, k: Optional[int] = ...) -> list[dspy.Example]: - """Forward function for the LI retriever. - - This is the function that is called to retrieve the top k examples for a given query. - Top k is set via the setter similarity_top_k or at LI instantiation. - - Args: - query (str): The query to retrieve examples for - k (int): Optional; the number of examples to retrieve (similarity_top_k) - - If the underlying LI retriever does not have the property similarity_top_k, k will be ignored. - - Returns: - List[dspy.Example]: A list of examples retrieved by the retriever - """ - ... diff --git a/typings/dspy/retrieve/marqo_rm.pyi b/typings/dspy/retrieve/marqo_rm.pyi deleted file mode 100644 index 2a3f2ec..0000000 --- a/typings/dspy/retrieve/marqo_rm.pyi +++ /dev/null @@ -1,57 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -import marqo -from typing import List, Union - -class MarqoRM(dspy.Retrieve): - """ - A retrieval module that uses Marqo to return the top passages for a given query. - - Assumes that a Marqo index has been created and populated with the following payload: - - document: The text of the passage - - Args: - marqo_index_name (str): The name of the marqo index. - marqo_client (marqo.client.Client): A marqo client instance. - k (int, optional): The number of top passages to retrieve. Defaults to 3. - page_content (str, optional): The name of the field in the marqo index that contains the text of the passage. Defaults to 'document'. - filter_string (str, optional): A filter string to use when searching. Defaults to None. - **kwargs: Additional keyword arguments to pass to the marqo search function. - - Examples: - Below is a code snippet that shows how to use Marqo as the default retriver: - ```python - import marqo - marqo_client = marqo.Client(url="http://0.0.0.0:8882") - - llm = dspy.OpenAI(model="gpt-3.5-turbo") - retriever_model = MarqoRM("my_index_name", marqo_client=marqo_client) - dspy.settings.configure(lm=llm, rm=retriever_model) - ``` - - Below is a code snippet that shows how to use Marqo in the forward() function of a module - ```python - self.retrieve = MarqoRM("my_index_name", marqo_client=marqo_client, k=num_passages) - ``` - """ - def __init__( - self, - marqo_index_name: str, - marqo_client: marqo.client.Client, - k: int = ..., - page_content: str = ..., - filter_string: str = ..., - ) -> None: ... - def forward(self, query_or_queries: Union[str, List[str]], k=..., **kwargs) -> dspy.Prediction: - """Search with Marqo for self.k top passages for query - - Args: - query_or_queries (Union[str, List[str]]): The query or queries to search for. - - Returns: - dspy.Prediction: An object containing the retrieved passages. - """ - ... diff --git a/typings/dspy/retrieve/milvus_rm.pyi b/typings/dspy/retrieve/milvus_rm.pyi deleted file mode 100644 index f7fd667..0000000 --- a/typings/dspy/retrieve/milvus_rm.pyi +++ /dev/null @@ -1,64 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from typing import Callable, List, Optional, Union - -""" -Retriever model for Milvus or Zilliz Cloud -""" - -def openai_embedding_function(texts: List[str]): # -> list[List[float]]: - ... - -class MilvusRM(dspy.Retrieve): - """ - A retrieval module that uses Milvus to return passages for a given query. - - Assumes that a Milvus collection has been created and populated with the following field: - - text: The text of the passage - - Args: - collection_name (str): The name of the Milvus collection to query against. - uri (str, optional): The Milvus connection uri. Defaults to "http://localhost:19530". - token (str, optional): The Milvus connection token. Defaults to None. - db_name (str, optional): The Milvus database name. Defaults to "default". - embedding_function (callable, optional): The function to convert a list of text to embeddings. - The embedding function should take a list of text strings as input and output a list of embeddings. - Defaults to None. By default, it will get OpenAI client by the environment variable OPENAI_API_KEY - and use OpenAI's embedding model "text-embedding-3-small" with the default dimension. - k (int, optional): The number of top passages to retrieve. Defaults to 3. - - Returns: - dspy.Prediction: An object containing the retrieved passages. - - Examples: - Below is a code snippet that shows how to use this as the default retriever: - ```python - llm = dspy.OpenAI(model="gpt-3.5-turbo") - retriever_model = MilvusRM( - collection_name="", - uri="", - token="" - ) - dspy.settings.configure(lm=llm, rm=retriever_model) - ``` - - Below is a code snippet that shows how to use this in the forward() function of a module - ```python - self.retrieve = MilvusRM(k=num_passages) - ``` - """ - def __init__( - self, - collection_name: str, - uri: Optional[str] = ..., - token: Optional[str] = ..., - db_name: Optional[str] = ..., - embedding_function: Optional[Callable] = ..., - k: int = ..., - ) -> None: ... - def forward( - self, query_or_queries: Union[str, List[str]], k: Optional[int] = ... - ) -> dspy.Prediction: ... diff --git a/typings/dspy/retrieve/mongodb_atlas_rm.pyi b/typings/dspy/retrieve/mongodb_atlas_rm.pyi deleted file mode 100644 index e635f72..0000000 --- a/typings/dspy/retrieve/mongodb_atlas_rm.pyi +++ /dev/null @@ -1,34 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import backoff -from typing import Any, List -from openai import APITimeoutError, InternalServerError, RateLimitError, UnprocessableEntityError -from dspy.dsp.utils.settings import settings -from dspy import Prediction, Retrieve - -def build_vector_search_pipeline( - index_name: str, query_vector: List[float], num_candidates: int, limit: int -) -> List[dict[str, Any]]: ... - -class Embedder: - def __init__(self, provider: str, model: str) -> None: ... - @backoff.on_exception( - backoff.expo, - (APITimeoutError, InternalServerError, RateLimitError, UnprocessableEntityError), - max_time=settings.backoff_time, - ) - def __call__(self, queries) -> Any: ... - -class MongoDBAtlasRM(Retrieve): - def __init__( - self, - db_name: str, - collection_name: str, - index_name: str, - k: int = ..., - embedding_provider: str = ..., - embedding_model: str = ..., - ) -> None: ... - def forward(self, query_or_queries: str) -> Prediction: ... diff --git a/typings/dspy/retrieve/my_scale_rm.pyi b/typings/dspy/retrieve/my_scale_rm.pyi deleted file mode 100644 index cea7ef9..0000000 --- a/typings/dspy/retrieve/my_scale_rm.pyi +++ /dev/null @@ -1,3 +0,0 @@ -""" -This type stub file was generated by pyright. -""" diff --git a/typings/dspy/retrieve/neo4j_rm.pyi b/typings/dspy/retrieve/neo4j_rm.pyi deleted file mode 100644 index 0cae2c2..0000000 --- a/typings/dspy/retrieve/neo4j_rm.pyi +++ /dev/null @@ -1,78 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import backoff -from typing import Any, Callable, List, Optional, Union -from openai import APITimeoutError, InternalServerError, RateLimitError, UnprocessableEntityError -from dspy import Prediction, Retrieve -from dspy.dsp.utils.settings import settings - -class Embedder: - def __init__(self, provider: str, model: str) -> None: ... - @backoff.on_exception( - backoff.expo, - (APITimeoutError, InternalServerError, RateLimitError, UnprocessableEntityError), - max_time=settings.backoff_time, - ) - def __call__(self, queries) -> Any: ... - -DEFAULT_INDEX_QUERY = ... - -class Neo4jRM(Retrieve): - """ - Implements a retriever that utilizes Neo4j for retrieving passages. - This class manages a connection to a Neo4j database using official Neo4j Python drivers and requires - the database credentials (username, password, URI, and optionally the database name) to be set as environment variables. - Additionally, it utilizes an embedding provider (defaulting to OpenAI's services) to compute query embeddings, - which are then used to find the most relevant nodes in the Neo4j graph based on the specified node property or custom retrieval query. - - Returns a list of passages in the form of `dspy.Prediction` objects. - - Args: - index_name (str): The name of the vector index in the Neo4j database to query against. - text_node_property (Optional[str]): The property of the node containing the text. Required if `retrieval_query` is not set. - k (Optional[int]): The default number of top passages to retrieve. Defaults to 5. - retrieval_query (Optional[str]): Custom Cypher query for retrieving passages. Required if `text_node_property` is not set. - embedding_provider (str): The provider of the embedding service. Defaults to "openai". - embedding_model (str): The model identifier for generating embeddings. Defaults to "text-embedding-ada-002". - - Examples: - Below is a code snippet showcasing how to initialize Neo4jRM with environment variables for the database connection and OpenAI as the embedding provider: - - ```python - import os - - import dspy - import openai - - os.environ["NEO4J_URI"] = "bolt://localhost:7687" - os.environ["NEO4J_USERNAME"] = "neo4j" - os.environ["NEO4J_PASSWORD"] = "password" - os.environ["OPENAI_API_KEY"] = "sk-" - - neo4j_retriever = Neo4jRM( - index_name="myIndex", - text_node_property="text", - k=10, - embedding_provider="openai", - embedding_model="text-embedding-ada-002", - ) - - dspy.settings.configure(rm=neo4j_retriever) - ``` - - In this example, `Neo4jRM` is configured to retrieve nodes based on the "text" property from an index named "myIndex", - using embeddings computed by OpenAI's "text-embedding-ada-002" model. - """ - def __init__( - self, - index_name: str, - text_node_property: str = ..., - k: int = ..., - retrieval_query: str = ..., - embedding_provider: str = ..., - embedding_model: str = ..., - embedding_function: Optional[Callable] = ..., - ) -> None: ... - def forward(self, query_or_queries: Union[str, List[str]], k: Optional[int]) -> Prediction: ... diff --git a/typings/dspy/retrieve/pgvector_rm.pyi b/typings/dspy/retrieve/pgvector_rm.pyi deleted file mode 100644 index 82bd141..0000000 --- a/typings/dspy/retrieve/pgvector_rm.pyi +++ /dev/null @@ -1,81 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -import openai -from typing import Callable, Optional - -class PgVectorRM(dspy.Retrieve): - """ - Implements a retriever that (as the name suggests) uses pgvector to retrieve passages, - using a raw SQL query and a postgresql connection managed by psycopg2. - - It needs to register the pgvector extension with the psycopg2 connection - - Returns a list of dspy.Example objects - - Args: - db_url (str): A PostgreSQL database URL in psycopg2's DSN format - pg_table_name (Optional[str]): name of the table containing passages - openai_client (openai.OpenAI): OpenAI client to use for computing query embeddings. Either openai_client or embedding_func must be provided. - embedding_func (Callable): A function to use for computing query embeddings. Either openai_client or embedding_func must be provided. - content_field (str = "text"): Field containing the passage text. Defaults to "text" - k (Optional[int]): Default number of top passages to retrieve. Defaults to 20 - embedding_field (str = "embedding"): Field containing passage embeddings. Defaults to "embedding" - fields (List[str] = ['text']): Fields to retrieve from the table. Defaults to "text" - embedding_model (str = "text-embedding-ada-002"): Field containing the OpenAI embedding model to use. Defaults to "text-embedding-ada-002" - - Examples: - Below is a code snippet that shows how to use PgVector as the default retriever - - ```python - import dspy - import openai - import psycopg2 - - openai.api_key = os.environ.get("OPENAI_API_KEY", None) - openai_client = openai.OpenAI() - - llm = dspy.OpenAI(model="gpt-3.5-turbo") - - DATABASE_URL should be in the format postgresql://user:password@host/database - db_url=os.getenv("DATABASE_URL") - - retriever_model = PgVectorRM(conn, openai_client=openai_client, "paragraphs", fields=["text", "document_id"], k=20) - dspy.settings.configure(lm=llm, rm=retriever_model) - ``` - - Below is a code snippet that shows how to use PgVector in the forward() function of a module - ```python - self.retrieve = PgVectorRM(db_url, openai_client=openai_client, "paragraphs", fields=["text", "document_id"], k=20) - ``` - """ - def __init__( - self, - db_url: str, - pg_table_name: str, - openai_client: Optional[openai.OpenAI] = ..., - embedding_func: Optional[Callable] = ..., - k: int = ..., - embedding_field: str = ..., - fields: Optional[list[str]] = ..., - content_field: str = ..., - embedding_model: str = ..., - include_similarity: bool = ..., - ) -> None: - """ - k = 20 is the number of paragraphs to retrieve - """ - ... - - def forward(self, query: str, k: int = ...): # -> list[Any]: - """Search with PgVector for k top passages for query using cosine similarity - - Args: - query (str): The query to search for - k (int): The number of top passages to retrieve. Defaults to the value set in the constructor. - Returns: - dspy.Prediction: an object containing the retrieved passages. - """ - ... diff --git a/typings/dspy/retrieve/pinecone_rm.pyi b/typings/dspy/retrieve/pinecone_rm.pyi deleted file mode 100644 index 6e816dc..0000000 --- a/typings/dspy/retrieve/pinecone_rm.pyi +++ /dev/null @@ -1,70 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import pinecone -from typing import List, Optional, Union -from dspy import Prediction, Retrieve - -""" -Retriever model for Pinecone -Author: Dhar Rawal (@drawal1) -""" -if pinecone is None: ... -OPENAI_LEGACY = ... -ERRORS = ... - -class PineconeRM(Retrieve): - """ - A retrieval module that uses Pinecone to return the top passages for a given query. - - Assumes that the Pinecone index has been created and populated with the following metadata: - - text: The text of the passage - - Args: - pinecone_index_name (str): The name of the Pinecone index to query against. - pinecone_api_key (str, optional): The Pinecone API key. Defaults to None. - pinecone_env (str, optional): The Pinecone environment. Defaults to None. - local_embed_model (str, optional): The local embedding model to use. A popular default is "sentence-transformers/all-mpnet-base-v2". - openai_embed_model (str, optional): The OpenAI embedding model to use. Defaults to "text-embedding-ada-002". - openai_api_key (str, optional): The API key for OpenAI. Defaults to None. - openai_org (str, optional): The organization for OpenAI. Defaults to None. - k (int, optional): The number of top passages to retrieve. Defaults to 3. - - Returns: - dspy.Prediction: An object containing the retrieved passages. - - Examples: - Below is a code snippet that shows how to use this as the default retriever: - ```python - llm = dspy.OpenAI(model="gpt-3.5-turbo") - retriever_model = PineconeRM(openai.api_key) - dspy.settings.configure(lm=llm, rm=retriever_model) - ``` - - Below is a code snippet that shows how to use this in the forward() function of a module - ```python - self.retrieve = PineconeRM(k=num_passages) - ``` - """ - def __init__( - self, - pinecone_index_name: str, - pinecone_api_key: Optional[str] = ..., - pinecone_env: Optional[str] = ..., - local_embed_model: Optional[str] = ..., - openai_embed_model: Optional[str] = ..., - openai_api_key: Optional[str] = ..., - openai_org: Optional[str] = ..., - k: int = ..., - ) -> None: ... - def forward(self, query_or_queries: Union[str, List[str]]) -> Prediction: - """Search with pinecone for self.k top passages for query - - Args: - query_or_queries (Union[str, List[str]]): The query or queries to search for. - - Returns: - dspy.Prediction: An object containing the retrieved passages. - """ - ... diff --git a/typings/dspy/retrieve/qdrant_rm.pyi b/typings/dspy/retrieve/qdrant_rm.pyi deleted file mode 100644 index cea7ef9..0000000 --- a/typings/dspy/retrieve/qdrant_rm.pyi +++ /dev/null @@ -1,3 +0,0 @@ -""" -This type stub file was generated by pyright. -""" diff --git a/typings/dspy/retrieve/ragatouille_rm.pyi b/typings/dspy/retrieve/ragatouille_rm.pyi deleted file mode 100644 index 9d0b5a9..0000000 --- a/typings/dspy/retrieve/ragatouille_rm.pyi +++ /dev/null @@ -1,37 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from typing import Optional, Union - -class RAGatouilleRM(dspy.Retrieve): - """A retrieval model that uses RAGatouille library to return the top passages for a given query. - - Assumes that you already have an index created with RAGatouille. - Reference: https://github.com/bclavie/RAGatouille - - Args: - index_root (str): Folder path where you index is stored. - index_name (str): Name of the index you want to retrieve from. - k (int, optional): The default number of passages to retrieve. Defaults to 3. - - Examples: - Below is a code snippet that shows how to use RAGatouille index as the default retriver: - ```python - llm = dspy.OpenAI(model="gpt-3.5-turbo") - rm = RAGatouilleRM(index_root="ragatouille/colbert/indexes", index_name="my_index") - dspy.settings.configure(lm=llm, rm=rm) - ``` - """ - def __init__(self, index_root: str, index_name: str, k: int = ...) -> None: ... - def forward(self, query_or_queries: Union[str, list[str]], k: Optional[int]) -> dspy.Prediction: - """Search with RAGAtouille based index for self.k top passages for query - - Args: - query_or_queries (Union[str, List[str]]): The query or queries to search for. - k (Optional[int]): The number of top passages to retrieve. Defaults to self.k. - Returns: - dspy.Prediction: An object containing the retrieved passages. - """ - ... diff --git a/typings/dspy/retrieve/retrieve.pyi b/typings/dspy/retrieve/retrieve.pyi deleted file mode 100644 index 7cefe3d..0000000 --- a/typings/dspy/retrieve/retrieve.pyi +++ /dev/null @@ -1,29 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import List, Optional, Union -from dspy.predict.parameter import Parameter -from dspy.primitives.prediction import Prediction -from dspy.utils.callback import with_callbacks - -def single_query_passage(passages): # -> Prediction: - ... - -class Retrieve(Parameter): - name = ... - input_variable = ... - desc = ... - def __init__(self, k=..., callbacks=...) -> None: ... - def reset(self): # -> None: - ... - def dump_state(self): # -> dict[str, Any]: - ... - def load_state(self, state): # -> None: - ... - @with_callbacks - def __call__(self, *args, **kwargs): # -> List[str] | Prediction | List[Prediction]: - ... - def forward( - self, query: str, k: Optional[int] = ..., **kwargs - ) -> Union[List[str], Prediction, List[Prediction]]: ... diff --git a/typings/dspy/retrieve/snowflake_rm.pyi b/typings/dspy/retrieve/snowflake_rm.pyi deleted file mode 100644 index c475241..0000000 --- a/typings/dspy/retrieve/snowflake_rm.pyi +++ /dev/null @@ -1,104 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from typing import Any, Optional, Type, Union -from pydantic import BaseModel - -class SnowflakeRM(dspy.Retrieve): - """A retrieval module that uses Snowflake's Cortex Search service to return the top relevant passages for a given query. - - Assumes that a Snowflake Cortex Search endpoint has been configured by the use. - - For more information on configuring the Cortex Search service, visit: https://docs.snowflake.com/en/user-guide/snowflake-cortex/cortex-search/cortex-search-overview - - Args: - snowflake_session (object): Snowflake Snowpark session for accessing the service. - cortex_search_service(str): Name of the Cortex Search service to be used. - snowflake_database (str): The name of the Snowflake table containing document embeddings. - snowflake_schema (str): The name of the Snowflake table containing document embeddings. - auto_filter (bool): Auto generate metadata filter based on user query and push it down prior to retrieving Cortex Search results. - k (int, optional): The default number of top passages to retrieve. Defaults to 3. - """ - def __init__( - self, - snowflake_session: object, - cortex_search_service: str, - snowflake_database: str, - snowflake_schema: str, - auto_filter=..., - k: int = ..., - max_retries=..., - ) -> None: ... - def forward( - self, - query_or_queries: Union[str, list[str]], - retrieval_columns: list[str], - filter: Optional[dict] = ..., - k: Optional[int] = ..., - ) -> dspy.Prediction: - """Query Cortex Search endpoint for top k relevant passages. - Args: - query_or_queries (Union[str, List[str]]): The query or queries to search for. - retrieval_columns (List[str]): Columns to include in response. - filter (Optional[json]):Filter query. - k (Optional[int]): The number of top passages to retrieve. Defaults to self.k. - Returns: - dspy.Prediction: An object containing the retrieved passages. - """ - ... - -class JSONFilter(BaseModel): - answer: str = ... - @classmethod - def model_validate_json( - cls, json_data: str, *, strict: bool | None = ..., context: dict[str, Any] | None = ... - ): # -> Any: - ... - -class GenerateFilter(dspy.Signature): - """ - Given a query, attributes in the data, and example values of each attribute, generate a filter in valid JSON format. - Ensure the filter only uses valid operators: @eq, @contains,@and,@or,@not - Ensure only the valid JSON is output with no other reasoning. - - --- - Query: What was the sentiment of CEOs between 2021 and 2024? - Attributes: industry,hq,date - Sample Values: {"industry":["biotechnology","healthcare","agriculture"],"HQ":["NY, US","CA,US","FL,US"],"date":["01/01,1999","01/01/2024"]} - Answer: {"@or":[{"@eq":{"year":"2021"}},{"@eq":{"year":"2022"}},{"@eq":{"year":"2023"}},{"@eq":{"year":"2024"}}]} - - Query: What is the sentiment of Biotech CEO's of companies based in New York? - Attributes: industry,hq,date - Sample Values: {"industry":["biotechnology","healthcare","agriculture"],"HQ":["NY, US","CA,US","FL,US"],"date":["01/01,1999","01/01/2024"]} - Answer: {"@and": [ { "@eq": { "industry"": "biotechnology" } }, { "@eq": { "HQ": "NY,US" } }]} - - Query: What is the sentiment of Biotech CEOs outside of California? - Attributes: industry,hq,date - Sample Values: {"industry":["biotechnology","healthcare","agriculture"],"HQ":["NY, US","CA,US","FL,US"],"date":["01/01,1999","01/01/2024"]} - Answer: {"@and":[{ "@eq": { "industry": "biotechnology" } },{"@not":{"@eq":{"HQ":"CA,US"}}}]} - - Query: What is the sentiment of Biotech CEOs outside of California? - Attributes: industry,hq,date - Sample Values: {"industry":["biotechnology","healthcare","agriculture"],"HQ":["NY, US","CA,US","FL,US"],"date":["01/01,1999","01/01/2024"]} - Answer: {"@and":[{ "@eq": { "industry": "biotechnology" } },{"@not":{"@eq":{"HQ":"CA,US"}}}]} - - Query: What is sentiment towards ag and biotech companies based outside of the US? - Attributes: industry,hq,date - Sample Values: {"industry"":["biotechnology","healthcare","agriculture"],"COUNTRY":["United States","Ireland","Russia","Georgia","Spain"],"month":["01","02","03","06","11","12""],""year"":["2022","2023","2024"]} - Answer:{"@and": [{ "@or": [{"@eq":{ "industry": "biotechnology" } },{"@eq":{"industry":"agriculture"}}]},{ "@not": {"@eq": { "COUNTRY": "United States" } }}]} - - """ - - query = ... - attributes = ... - sample_values = ... - answer: JSONFilter = ... - -class SmartSearch(dspy.Module): - def __init__(self) -> None: ... - def forward(self, query, attributes, sample_values): ... - -def get_min_length(model: Type[BaseModel]): # -> int: - ... diff --git a/typings/dspy/retrieve/vectara_rm.pyi b/typings/dspy/retrieve/vectara_rm.pyi deleted file mode 100644 index a99ce4c..0000000 --- a/typings/dspy/retrieve/vectara_rm.pyi +++ /dev/null @@ -1,57 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from typing import List, Optional, Union - -START_SNIPPET = ... -END_SNIPPET = ... - -def remove_snippet(s: str) -> str: ... - -class VectaraRM(dspy.Retrieve): - """ - A retrieval module that uses Vectara to return the top passages for a given query. - - Assumes that a Vectara corpora have been created and populated with the following payload: - - document: The text of the passage - - Args: - vectara_customer_id (str): Vectara Customer ID. defaults to VECTARA_CUSTOMER_ID environment variable - vectara_corpus_id (str): Vectara Corpus ID. defaults to VECTARA_CORPUS_ID environment variable - vectara_api_key (str): Vectara API Key. defaults to VECTARA_API_KEY environment variable - k (int, optional): The default number of top passages to retrieve. Defaults to 3. - - Examples: - Below is a code snippet that shows how to use Vectara as the default retriver: - ```python - from vectara_client import vectaraClient - - llm = dspy.OpenAI(model="gpt-3.5-turbo") - retriever_model = vectaraRM("", "", "") - dspy.settings.configure(lm=llm, rm=retriever_model) - ``` - - Below is a code snippet that shows how to use Vectara in the forward() function of a module - ```python - self.retrieve = vectaraRM("", "", "", k=num_passages) - ``` - """ - def __init__( - self, - vectara_customer_id: Optional[str] = ..., - vectara_corpus_id: Optional[str] = ..., - vectara_api_key: Optional[str] = ..., - k: int = ..., - ) -> None: ... - def forward(self, query_or_queries: Union[str, List[str]], k: Optional[int]) -> dspy.Prediction: - """Search with Vectara for self.k top passages for query - - Args: - query_or_queries (Union[str, List[str]]): The query or queries to search for. - k (Optional[int]): The number of top passages to retrieve. Defaults to self.k. - Returns: - dspy.Prediction: An object containing the retrieved passages. - """ - ... diff --git a/typings/dspy/retrieve/watson_discovery_rm.pyi b/typings/dspy/retrieve/watson_discovery_rm.pyi deleted file mode 100644 index b07a1a5..0000000 --- a/typings/dspy/retrieve/watson_discovery_rm.pyi +++ /dev/null @@ -1,43 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from typing import Optional, Union - -class WatsonDiscoveryRM(dspy.Retrieve): - """A retrieval module that uses Watson Discovery to return the top passages for a given query. - - Args: - apikey (str): apikey for authentication purposes, - url (str): endpoint URL that includes the service instance ID - version (str): Release date of the version of the API you want to use. Specify dates in YYYY-MM-DD format. - project_id (str): The Universally Unique Identifier (UUID) of the project. - collection_ids (list): An array containing the collections on which the search will be executed. - k (int, optional): The number of top passages to retrieve. Defaults to 5. - - Returns: - dspy.Prediction: An object containing the retrieved passages. - """ - def __init__( - self, - apikey: str, - url: str, - version: str, - project_id: str, - collection_ids: list = ..., - k: int = ..., - ) -> None: ... - def forward( - self, query_or_queries: Union[str, list[str]], k: Optional[int] = ... - ) -> dspy.Prediction: - """Search with Watson Discovery for self.k top passages for query. - - Args: - query_or_queries (Union[str, List[str]]): The query or queries to search for. - k (int, optional): The number of top passages to retrieve. - - Returns: - dspy.Prediction: An object containing the retrieved passages. - """ - ... diff --git a/typings/dspy/retrieve/you_rm.pyi b/typings/dspy/retrieve/you_rm.pyi deleted file mode 100644 index 5b338bc..0000000 --- a/typings/dspy/retrieve/you_rm.pyi +++ /dev/null @@ -1,40 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from typing import Literal, Optional, Union - -class YouRM(dspy.Retrieve): - """Retriever for You.com's Search and News API. - - [API reference](https://documentation.you.com/api-reference/) - - Args: - ydc_api_key: you.com API key, if `YDC_API_KEY` is not set in the environment - k: If ``endpoint="search"``, the max snippets to return per search hit. - If ``endpoint="news"``, the max articles to return. - endpoint: you.com endpoints - num_web_results: The max number of web results to return, must be under 20 - safesearch: Safesearch settings, one of "off", "moderate", "strict", defaults to moderate - country: Country code, ex: 'US' for United States, see API reference for more info - search_lang: (News API) Language codes, ex: 'en' for English, see API reference for more info - ui_lang: (News API) User interface language for the response, ex: 'en' for English. - See API reference for more info - spellcheck: (News API) Whether to spell check query or not, defaults to True - """ - def __init__( - self, - ydc_api_key: Optional[str] = ..., - k: int = ..., - endpoint: Literal["search", "news"] = ..., - num_web_results: Optional[int] = ..., - safesearch: Optional[Literal["off", "moderate", "strict"]] = ..., - country: Optional[str] = ..., - search_lang: Optional[str] = ..., - ui_lang: Optional[str] = ..., - spellcheck: Optional[bool] = ..., - ) -> None: ... - def forward( - self, query_or_queries: Union[str, list[str]], k: Optional[int] = ... - ) -> dspy.Prediction: ... diff --git a/typings/dspy/retrievers/__init__.pyi b/typings/dspy/retrievers/__init__.pyi index 86f85fb..ce589e3 100644 --- a/typings/dspy/retrievers/__init__.pyi +++ b/typings/dspy/retrievers/__init__.pyi @@ -3,5 +3,6 @@ This type stub file was generated by pyright. """ from dspy.retrievers.embeddings import Embeddings +from dspy.retrievers.retrieve import Retrieve -__all__ = ["Embeddings"] +__all__ = ["Embeddings", "Retrieve"] diff --git a/typings/dspy/retrieve/databricks_rm.pyi b/typings/dspy/retrievers/databricks_rm.pyi similarity index 88% rename from typings/dspy/retrieve/databricks_rm.pyi rename to typings/dspy/retrievers/databricks_rm.pyi index f0492ef..97e91bc 100644 --- a/typings/dspy/retrieve/databricks_rm.pyi +++ b/typings/dspy/retrievers/databricks_rm.pyi @@ -8,13 +8,15 @@ from typing import Any, Dict, List, Optional, Union from dspy.primitives.prediction import Prediction _databricks_sdk_installed = ... - @dataclass class Document: page_content: str metadata: Dict[str, Any] type: str - def to_dict(self) -> Dict[str, Any]: ... + def to_dict(self) -> Dict[str, Any]: + ... + + class DatabricksRM(dspy.Retrieve): """ @@ -70,21 +72,7 @@ class DatabricksRM(dspy.Retrieve): retrieved_results = DatabricksRM(query="Example query text")) ``` """ - def __init__( - self, - databricks_index_name: str, - databricks_endpoint: Optional[str] = ..., - databricks_token: Optional[str] = ..., - databricks_client_id: Optional[str] = ..., - databricks_client_secret: Optional[str] = ..., - columns: Optional[List[str]] = ..., - filters_json: Optional[str] = ..., - k: int = ..., - docs_id_column_name: str = ..., - docs_uri_column_name: Optional[str] = ..., - text_column_name: str = ..., - use_with_databricks_agent_framework: bool = ..., - ) -> None: + def __init__(self, databricks_index_name: str, databricks_endpoint: Optional[str] = ..., databricks_token: Optional[str] = ..., databricks_client_id: Optional[str] = ..., databricks_client_secret: Optional[str] = ..., columns: Optional[List[str]] = ..., filters_json: Optional[str] = ..., k: int = ..., docs_id_column_name: str = ..., docs_uri_column_name: Optional[str] = ..., text_column_name: str = ..., use_with_databricks_agent_framework: bool = ...) -> None: """ Args: databricks_index_name (str): The name of the Databricks Vector Search Index to query. @@ -118,13 +106,8 @@ class DatabricksRM(dspy.Retrieve): compatible with the Databricks Mosaic Agent Framework. """ ... - - def forward( - self, - query: Union[str, List[float]], - query_type: str = ..., - filters_json: Optional[str] = ..., - ) -> Union[dspy.Prediction, List[Dict[str, Any]]]: + + def forward(self, query: Union[str, List[float]], query_type: str = ..., filters_json: Optional[str] = ...) -> Union[dspy.Prediction, List[Dict[str, Any]]]: """ Retrieve documents from a Databricks Mosaic AI Vector Search Index that are relevant to the specified query. @@ -147,3 +130,6 @@ class DatabricksRM(dspy.Retrieve): ``False``. """ ... + + + diff --git a/typings/dspy/retrievers/embeddings.pyi b/typings/dspy/retrievers/embeddings.pyi index 838f0e2..1c520f3 100644 --- a/typings/dspy/retrievers/embeddings.pyi +++ b/typings/dspy/retrievers/embeddings.pyi @@ -5,17 +5,14 @@ This type stub file was generated by pyright. from typing import Any, List, Optional class Embeddings: - def __init__( - self, - corpus: List[str], - embedder, - k: int = ..., - callbacks: Optional[List[Any]] = ..., - cache: bool = ..., - brute_force_threshold: int = ..., - normalize: bool = ..., - ) -> None: ... - def __call__(self, query: str): # -> Prediction: + def __init__(self, corpus: List[str], embedder, k: int = ..., callbacks: Optional[List[Any]] = ..., cache: bool = ..., brute_force_threshold: int = ..., normalize: bool = ...) -> None: ... - def forward(self, query: str): # -> Prediction: + + def __call__(self, query: str): # -> Prediction: ... + + def forward(self, query: str): # -> Prediction: + ... + + + diff --git a/typings/dspy/retrievers/retrieve.pyi b/typings/dspy/retrievers/retrieve.pyi new file mode 100644 index 0000000..6a1a6c3 --- /dev/null +++ b/typings/dspy/retrievers/retrieve.pyi @@ -0,0 +1,37 @@ +""" +This type stub file was generated by pyright. +""" + +from typing import List, Optional, Union +from dspy.predict.parameter import Parameter +from dspy.primitives.prediction import Prediction +from dspy.utils.callback import with_callbacks + +def single_query_passage(passages): # -> Prediction: + ... + +class Retrieve(Parameter): + name = ... + input_variable = ... + desc = ... + def __init__(self, k=..., callbacks=...) -> None: + ... + + def reset(self): # -> None: + ... + + def dump_state(self): # -> dict[str, Any]: + ... + + def load_state(self, state): # -> None: + ... + + @with_callbacks + def __call__(self, *args, **kwargs): # -> List[str] | Prediction | List[Prediction]: + ... + + def forward(self, query: str, k: Optional[int] = ..., **kwargs) -> Union[List[str], Prediction, List[Prediction]]: + ... + + + diff --git a/typings/dspy/retrieve/weaviate_rm.pyi b/typings/dspy/retrievers/weaviate_rm.pyi similarity index 82% rename from typings/dspy/retrieve/weaviate_rm.pyi rename to typings/dspy/retrievers/weaviate_rm.pyi index 0877571..fcc2eba 100644 --- a/typings/dspy/retrieve/weaviate_rm.pyi +++ b/typings/dspy/retrievers/weaviate_rm.pyi @@ -38,17 +38,10 @@ class WeaviateRM(dspy.Retrieve): self.retrieve = WeaviateRM("my_collection_name", weaviate_client=weaviate_client, k=num_passages) ``` """ - def __init__( - self, - weaviate_collection_name: str, - weaviate_client: Union[weaviate.WeaviateClient, weaviate.Client], - weaviate_collection_text_key: Optional[str] = ..., - k: int = ..., - tenant_id: Optional[str] = ..., - ) -> None: ... - def forward( - self, query_or_queries: Union[str, List[str]], k: Optional[int] = ..., **kwargs - ) -> Prediction: + def __init__(self, weaviate_collection_name: str, weaviate_client: Union[weaviate.WeaviateClient, weaviate.Client], weaviate_collection_text_key: Optional[str] = ..., k: int = ..., tenant_id: Optional[str] = ...) -> None: + ... + + def forward(self, query_or_queries: Union[str, List[str]], k: Optional[int] = ..., **kwargs) -> Prediction: """Search with Weaviate for self.k top passages for query or queries. Args: @@ -60,10 +53,13 @@ class WeaviateRM(dspy.Retrieve): dspy.Prediction: An object containing the retrieved passages. """ ... - + def get_objects(self, num_samples: int, fields: List[str]) -> List[dict]: """Get objects from Weaviate using the cursor API.""" ... - - def insert(self, new_object_properties: dict): # -> None: + + def insert(self, new_object_properties: dict): # -> None: ... + + + diff --git a/typings/dspy/signatures/__init__.pyi b/typings/dspy/signatures/__init__.pyi index db3fe87..8501972 100644 --- a/typings/dspy/signatures/__init__.pyi +++ b/typings/dspy/signatures/__init__.pyi @@ -3,23 +3,6 @@ This type stub file was generated by pyright. """ from dspy.signatures.field import InputField, OldField, OldInputField, OldOutputField, OutputField -from dspy.signatures.signature import ( - Signature, - SignatureMeta, - ensure_signature, - infer_prefix, - make_signature, -) +from dspy.signatures.signature import Signature, SignatureMeta, ensure_signature, infer_prefix, make_signature -__all__ = [ - "InputField", - "OutputField", - "OldField", - "OldInputField", - "OldOutputField", - "SignatureMeta", - "Signature", - "infer_prefix", - "ensure_signature", - "make_signature", -] +__all__ = ["InputField", "OutputField", "OldField", "OldInputField", "OldOutputField", "SignatureMeta", "Signature", "infer_prefix", "ensure_signature", "make_signature"] diff --git a/typings/dspy/signatures/field.pyi b/typings/dspy/signatures/field.pyi index a33889e..067b9bc 100644 --- a/typings/dspy/signatures/field.pyi +++ b/typings/dspy/signatures/field.pyi @@ -4,29 +4,44 @@ This type stub file was generated by pyright. DSPY_FIELD_ARG_NAMES = ... PYDANTIC_CONSTRAINT_MAP = ... - -def move_kwargs(**kwargs): # -> dict[Any, Any]: +def move_kwargs(**kwargs): # -> dict[Any, Any]: ... -def InputField(**kwargs): # -> Any: + +def InputField(**kwargs): # -> Any: ... -def OutputField(**kwargs): # -> Any: + +def OutputField(**kwargs): # -> Any: ... -def new_to_old_field(field): # -> OldInputField | OldOutputField: + +def new_to_old_field(field): # -> OldInputField | OldOutputField: ... class OldField: """A more ergonomic datatype that infers prefix and desc if omitted.""" - def __init__(self, *, prefix=..., desc=..., input, format=...) -> None: ... - def finalize(self, key, inferred_prefix): # -> None: + def __init__(self, *, prefix=..., desc=..., input, format=...) -> None: + ... + + def finalize(self, key, inferred_prefix): # -> None: """Set the prefix if it's not provided explicitly.""" ... - - def __repr__(self): # -> str: + + def __repr__(self): # -> str: + ... + + def __eq__(self, __value: object) -> bool: ... - def __eq__(self, __value: object) -> bool: ... + + class OldInputField(OldField): - def __init__(self, *, prefix=..., desc=..., format=...) -> None: ... + def __init__(self, *, prefix=..., desc=..., format=...) -> None: + ... + + class OldOutputField(OldField): - def __init__(self, *, prefix=..., desc=..., format=...) -> None: ... + def __init__(self, *, prefix=..., desc=..., format=...) -> None: + ... + + + diff --git a/typings/dspy/signatures/signature.pyi b/typings/dspy/signatures/signature.pyi index 905c988..37165e1 100644 --- a/typings/dspy/signatures/signature.pyi +++ b/typings/dspy/signatures/signature.pyi @@ -2,7 +2,7 @@ This type stub file was generated by pyright. """ -from typing import Dict, Optional, Tuple, Type, Union +from typing import Any, Dict, Optional, Tuple, Type, Union from pydantic import BaseModel from pydantic.fields import FieldInfo @@ -22,28 +22,39 @@ or a signature, you can use the ensure_signature function. For compatibility with the legacy dsp format, you can use the signature_to_template function. """ - class SignatureMeta(type(BaseModel)): - def __call__(cls, *args, **kwargs): # -> type[Signature] | Any: + def __call__(cls, *args, **kwargs): # -> type[Signature] | Any: ... - def __new__(mcs, signature_name, bases, namespace, **kwargs): # -> type: + + def __new__(mcs, signature_name, bases, namespace, **kwargs): # -> type: ... + @property - def instructions(cls) -> str: ... + def instructions(cls) -> str: + ... + @instructions.setter - def instructions(cls, instructions: str) -> None: ... + def instructions(cls, instructions: str) -> None: + ... + @property - def input_fields(cls) -> dict[str, FieldInfo]: ... + def input_fields(cls) -> dict[str, FieldInfo]: + ... + @property - def output_fields(cls) -> dict[str, FieldInfo]: ... + def output_fields(cls) -> dict[str, FieldInfo]: + ... + @property - def fields(cls) -> dict[str, FieldInfo]: ... + def fields(cls) -> dict[str, FieldInfo]: + ... + @property def signature(cls) -> str: """The string representation of the signature.""" ... - - def __repr__(cls): # -> str: + + def __repr__(cls): # -> str: """Output a representation of the signature. Uses the form: @@ -54,13 +65,17 @@ class SignatureMeta(type(BaseModel)): ). """ ... + + class Signature(BaseModel, metaclass=SignatureMeta): "" @classmethod - def with_instructions(cls, instructions: str) -> Type[Signature]: ... + def with_instructions(cls, instructions: str) -> Type[Signature]: + ... + @classmethod - def with_updated_fields(cls, name, type_=..., **kwargs) -> Type[Signature]: + def with_updated_fields(cls, name: str, type_: Optional[Type] = ..., **kwargs: dict[str, Any]) -> Type[Signature]: """Create a new Signature class with the updated field information. Returns a new Signature class with the field, name, updated @@ -69,42 +84,48 @@ class Signature(BaseModel, metaclass=SignatureMeta): Args: name: The name of the field to update. type_: The new type of the field. - **kwargs: The new values for the field. + kwargs: The new values for the field. Returns: A new Signature class (not an instance) with the updated field information. """ ... - + @classmethod - def prepend(cls, name, field, type_=...) -> Type[Signature]: ... + def prepend(cls, name, field, type_=...) -> Type[Signature]: + ... + @classmethod - def append(cls, name, field, type_=...) -> Type[Signature]: ... + def append(cls, name, field, type_=...) -> Type[Signature]: + ... + @classmethod - def delete(cls, name) -> Type[Signature]: ... + def delete(cls, name) -> Type[Signature]: + ... + @classmethod - def insert( - cls, index: int, name: str, field, type_: Optional[Type] = ... - ) -> Type[Signature]: ... + def insert(cls, index: int, name: str, field, type_: Optional[Type] = ...) -> Type[Signature]: + ... + @classmethod def equals(cls, other) -> bool: """Compare the JSON schema of two Signature classes.""" ... - + @classmethod - def dump_state(cls): # -> dict[str, str | list[Any]]: + def dump_state(cls): # -> dict[str, str | list[Any]]: ... + @classmethod - def load_state(cls, state): # -> Signature: + def load_state(cls, state): # -> Signature: ... + -def ensure_signature(signature: Union[str, Type[Signature]], instructions=...) -> Signature: ... -def make_signature( - signature: Union[str, Dict[str, Tuple[type, FieldInfo]]], - instructions: Optional[str] = ..., - signature_name: str = ..., - custom_types: Optional[Dict[str, Type]] = ..., -) -> Type[Signature]: + +def ensure_signature(signature: Union[str, Type[Signature]], instructions=...) -> Signature: + ... + +def make_signature(signature: Union[str, Dict[str, Tuple[type, FieldInfo]]], instructions: Optional[str] = ..., signature_name: str = ..., custom_types: Optional[Dict[str, Type]] = ...) -> Type[Signature]: """Create a new Signature subclass with the specified fields and instructions. Args: @@ -151,3 +172,4 @@ def infer_prefix(attribute_name: str) -> str: "HTMLParser" -> "HTML Parser" """ ... + diff --git a/typings/dspy/signatures/utils.pyi b/typings/dspy/signatures/utils.pyi index 2cff8ed..993e1c8 100644 --- a/typings/dspy/signatures/utils.pyi +++ b/typings/dspy/signatures/utils.pyi @@ -5,4 +5,6 @@ This type stub file was generated by pyright. from typing import Literal from pydantic.fields import FieldInfo -def get_dspy_field_type(field: FieldInfo) -> Literal["input", "output"]: ... +def get_dspy_field_type(field: FieldInfo) -> Literal["input", "output"]: + ... + diff --git a/typings/dspy/streaming/__init__.pyi b/typings/dspy/streaming/__init__.pyi index b5ac15c..18b0621 100644 --- a/typings/dspy/streaming/__init__.pyi +++ b/typings/dspy/streaming/__init__.pyi @@ -6,12 +6,4 @@ from dspy.streaming.messages import StatusMessage, StatusMessageProvider, Stream from dspy.streaming.streamify import apply_sync_streaming, streamify, streaming_response from dspy.streaming.streaming_listener import StreamListener -__all__ = [ - "StatusMessage", - "StatusMessageProvider", - "streamify", - "StreamListener", - "StreamResponse", - "streaming_response", - "apply_sync_streaming", -] +__all__ = ["StatusMessage", "StatusMessageProvider", "streamify", "StreamListener", "StreamResponse", "streaming_response", "apply_sync_streaming"] diff --git a/typings/dspy/streaming/messages.pyi b/typings/dspy/streaming/messages.pyi index a1ed0f4..4f53a0f 100644 --- a/typings/dspy/streaming/messages.pyi +++ b/typings/dspy/streaming/messages.pyi @@ -13,15 +13,16 @@ class StreamResponse: chunk: str ... + @dataclass class StatusMessage: """Dataclass that wraps a status message for status streaming.""" - message: str ... -def sync_send_to_stream(stream, message): - """Send message to stream in a sync context, regardless of whether the caller is async or not.""" + +def sync_send_to_stream(stream, message): # -> None: + """Send message to stream in a sync context, regardless of event loop state.""" ... class StatusMessageProvider: @@ -43,47 +44,53 @@ class StatusMessageProvider: program = dspy.streamify(dspy.Predict("q->a"), status_message_provider=MyStatusMessageProvider()) ``` """ - def tool_start_status_message(self, instance: Any, inputs: Dict[str, Any]): # -> str: + def tool_start_status_message(self, instance: Any, inputs: Dict[str, Any]): # -> str: """Status message before a `dspy.Tool` is called.""" ... - - def tool_end_status_message(self, outputs: Any): # -> LiteralString: + + def tool_end_status_message(self, outputs: Any): # -> LiteralString: """Status message after a `dspy.Tool` is called.""" ... - - def module_start_status_message(self, instance: Any, inputs: Dict[str, Any]): # -> None: + + def module_start_status_message(self, instance: Any, inputs: Dict[str, Any]): # -> None: """Status message before a `dspy.Module` or `dspy.Predict` is called.""" ... - - def module_end_status_message(self, outputs: Any): # -> None: + + def module_end_status_message(self, outputs: Any): # -> None: """Status message after a `dspy.Module` or `dspy.Predict` is called.""" ... - - def lm_start_status_message(self, instance: Any, inputs: Dict[str, Any]): # -> None: + + def lm_start_status_message(self, instance: Any, inputs: Dict[str, Any]): # -> None: """Status message before a `dspy.LM` is called.""" ... - - def lm_end_status_message(self, outputs: Any): # -> None: + + def lm_end_status_message(self, outputs: Any): # -> None: """Status message after a `dspy.LM` is called.""" ... + + class StatusStreamingCallback(BaseCallback): - def __init__(self, status_message_provider: Optional[StatusMessageProvider] = ...) -> None: ... - def on_tool_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: + def __init__(self, status_message_provider: Optional[StatusMessageProvider] = ...) -> None: ... - def on_tool_end( - self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ... - ): # -> None: + + def on_tool_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: ... - def on_lm_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: + + def on_tool_end(self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ...): # -> None: ... - def on_lm_end( - self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ... - ): # -> None: + + def on_lm_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: ... - def on_module_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: + + def on_lm_end(self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ...): # -> None: ... - def on_module_end( - self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ... - ): # -> None: + + def on_module_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: ... + + def on_module_end(self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ...): # -> None: + ... + + + diff --git a/typings/dspy/streaming/streamify.pyi b/typings/dspy/streaming/streamify.pyi index 7387226..37aa9f3 100644 --- a/typings/dspy/streaming/streamify.pyi +++ b/typings/dspy/streaming/streamify.pyi @@ -2,31 +2,15 @@ This type stub file was generated by pyright. """ -from typing import ( - Any, - AsyncGenerator, - Awaitable, - Callable, - Generator, - List, - Optional, - TYPE_CHECKING, -) +from typing import Any, AsyncGenerator, Awaitable, Callable, Generator, List, Optional, TYPE_CHECKING from dspy.streaming.messages import StatusMessageProvider from dspy.streaming.streaming_listener import StreamListener -from dspy.primitives.program import Module +from dspy.primitives.module import Module logger = ... -if TYPE_CHECKING: ... - -def streamify( - program: Module, - status_message_provider: Optional[StatusMessageProvider] = ..., - stream_listeners: Optional[List[StreamListener]] = ..., - include_final_prediction_in_output_stream: bool = ..., - is_async_program: bool = ..., - async_streaming: bool = ..., -) -> Callable[[Any, Any], Awaitable[Any]]: +if TYPE_CHECKING: + ... +def streamify(program: Module, status_message_provider: Optional[StatusMessageProvider] = ..., stream_listeners: Optional[List[StreamListener]] = ..., include_final_prediction_in_output_stream: bool = ..., is_async_program: bool = ..., async_streaming: bool = ...) -> Callable[[Any, Any], Awaitable[Any]]: """ Wrap a DSPy program so that it streams its outputs incrementally, rather than returning them all at once. It also provides status messages to the user to indicate the progress of the program, and users @@ -162,3 +146,4 @@ async def streaming_response(streamer: AsyncGenerator) -> AsyncGenerator: An async generator that yields OpenAI-compatible streaming response chunks. """ ... + diff --git a/typings/dspy/streaming/streaming_listener.pyi b/typings/dspy/streaming/streaming_listener.pyi index f26e2f7..723a6a2 100644 --- a/typings/dspy/streaming/streaming_listener.pyi +++ b/typings/dspy/streaming/streaming_listener.pyi @@ -4,15 +4,13 @@ This type stub file was generated by pyright. from typing import Any, List, Optional, TYPE_CHECKING from litellm import ModelResponseStream -from dspy.primitives.program import Module - -if TYPE_CHECKING: ... +from dspy.primitives.module import Module +if TYPE_CHECKING: + ... class StreamListener: """Class that listens to the stream to capture the streeaming of a specific output field of a predictor.""" - def __init__( - self, signature_field_name: str, predict: Any = ..., predict_name: Optional[str] = ... - ) -> None: + def __init__(self, signature_field_name: str, predict: Any = ..., predict_name: Optional[str] = ...) -> None: """ Args: signature_field_name: The name of the field to listen to. @@ -22,9 +20,10 @@ class StreamListener: automatically look for the predictor that has the `signature_field_name` in its signature. """ ... - - def receive(self, chunk: ModelResponseStream): # -> StreamResponse | None: + + def receive(self, chunk: ModelResponseStream): # -> StreamResponse | None: ... + def flush(self) -> str: """Flush all tokens in the field end queue. @@ -33,10 +32,10 @@ class StreamListener: with the purpose to not yield the end_identifier tokens, e.g., "[[ ## ... ## ]]" for ChatAdapter. """ ... + -def find_predictor_for_stream_listeners( - program: Module, stream_listeners: List[StreamListener] -): # -> defaultdict[Any, list[Any]]: + +def find_predictor_for_stream_listeners(program: Module, stream_listeners: List[StreamListener]): # -> defaultdict[Any, list[Any]]: """Find the predictor for each stream listener. This is a utility function to automatically find the predictor for each stream listener. It is used when some @@ -44,3 +43,4 @@ def find_predictor_for_stream_listeners( unique in the program, this function will raise an error. """ ... + diff --git a/typings/dspy/teleprompt/__init__.pyi b/typings/dspy/teleprompt/__init__.pyi index 1005c41..9f11018 100644 --- a/typings/dspy/teleprompt/__init__.pyi +++ b/typings/dspy/teleprompt/__init__.pyi @@ -17,18 +17,4 @@ from dspy.teleprompt.teleprompt import Teleprompter from dspy.teleprompt.teleprompt_optuna import BootstrapFewShotWithOptuna from dspy.teleprompt.vanilla import LabeledFewShot -__all__ = [ - "AvatarOptimizer", - "BetterTogether", - "BootstrapFewShot", - "BootstrapFinetune", - "COPRO", - "Ensemble", - "KNNFewShot", - "MIPROv2", - "BootstrapFewShotWithRandomSearch", - "BootstrapFewShotWithOptuna", - "LabeledFewShot", - "InferRules", - "SIMBA", -] +__all__ = ["AvatarOptimizer", "BetterTogether", "BootstrapFewShot", "BootstrapFinetune", "COPRO", "Ensemble", "KNNFewShot", "MIPROv2", "BootstrapFewShotWithRandomSearch", "BootstrapFewShotWithOptuna", "LabeledFewShot", "InferRules", "SIMBA"] diff --git a/typings/dspy/teleprompt/avatar_optimizer.pyi b/typings/dspy/teleprompt/avatar_optimizer.pyi index cc57b36..8ba0118 100644 --- a/typings/dspy/teleprompt/avatar_optimizer.pyi +++ b/typings/dspy/teleprompt/avatar_optimizer.pyi @@ -9,56 +9,51 @@ from dspy.predict.avatar import ActionOutput from dspy.teleprompt.teleprompt import Teleprompter DEFAULT_MAX_EXAMPLES = ... - class EvalResult(BaseModel): example: dict score: float actions: Optional[List[ActionOutput]] = ... + class Comparator(dspy.Signature): """After executing the given actions on user inputs using the given instruction, some inputs have yielded good, results, while others have not. I'll provide you the inputs along with their, corresponding evaluation metrics: - Task: - (1) Firstly, identify and contrast the patterns of inputs that have achieved good results with those that have not. - (2) Then, review the computational logic for any inconsistencies in the previous actions. - (3) Lastly, specify the modification in tools used that can lead to improved performance on the negative inputs.""" - +Task: +(1) Firstly, identify and contrast the patterns of inputs that have achieved good results with those that have not. +(2) Then, review the computational logic for any inconsistencies in the previous actions. +(3) Lastly, specify the modification in tools used that can lead to improved performance on the negative inputs.""" instruction: str = ... actions: List[str] = ... pos_input_with_metrics: List[EvalResult] = ... neg_input_with_metrics: List[EvalResult] = ... feedback: str = ... + class FeedbackBasedInstruction(dspy.Signature): """There is a task that needs to be completed for which one can use multiple tools to achieve the desired outcome. A group's performance was evaluated on a dataset of inputs, the inputs that did well are positive inputs, and the inputs that did not do well are negative inputs. - You received feedback on how they can better use the tools to improve your performance on the negative inputs. You have been provided with the previous instruction, that they followed to use tools to complete the task, and the feedback on your performance. - - Your task is to incorporate the feedback and generate a detailed instruction for the group to follow to improve their performance on the task. +You received feedback on how they can better use the tools to improve your performance on the negative inputs. You have been provided with the previous instruction, that they followed to use tools to complete the task, and the feedback on your performance. - Make sure that the new instruction talks about how to use the tools effectively and should be no more than 3 paragraphs long. The previous instruction contains general guidelines that you must retain in the new instruction.""" +Your task is to incorporate the feedback and generate a detailed instruction for the group to follow to improve their performance on the task. +Make sure that the new instruction talks about how to use the tools effectively and should be no more than 3 paragraphs long. The previous instruction contains general guidelines that you must retain in the new instruction.""" previous_instruction: str = ... feedback: str = ... new_instruction: str = ... + class AvatarOptimizer(Teleprompter): - def __init__( - self, - metric: Callable, - max_iters: int = ..., - lower_bound: int = ..., - upper_bound: int = ..., - max_positive_inputs: Optional[int] = ..., - max_negative_inputs: Optional[int] = ..., - optimize_for: str = ..., - ) -> None: ... - def process_example( - self, actor, example, return_outputs - ): # -> tuple[Any, Any, Any] | tuple[Any, None, Literal[0]] | Literal[0]: + def __init__(self, metric: Callable, max_iters: int = ..., lower_bound: int = ..., upper_bound: int = ..., max_positive_inputs: Optional[int] = ..., max_negative_inputs: Optional[int] = ..., optimize_for: str = ...) -> None: + ... + + def process_example(self, actor, example, return_outputs): # -> tuple[Any, Any, Any] | tuple[Any, None, Literal[0]] | Literal[0]: + ... + + def thread_safe_evaluator(self, devset, actor, return_outputs=..., num_threads=...): # -> tuple[Any | float, list[Any]] | float: ... - def thread_safe_evaluator( - self, devset, actor, return_outputs=..., num_threads=... - ): # -> tuple[Any | float, list[Any]] | float: + + def compile(self, student, *, trainset): ... - def compile(self, student, *, trainset): ... + + + diff --git a/typings/dspy/teleprompt/bettertogether.pyi b/typings/dspy/teleprompt/bettertogether.pyi index c7a964b..4a672b8 100644 --- a/typings/dspy/teleprompt/bettertogether.pyi +++ b/typings/dspy/teleprompt/bettertogether.pyi @@ -4,20 +4,17 @@ This type stub file was generated by pyright. from typing import Callable, List, Optional from dspy.primitives.example import Example -from dspy.primitives.program import Program +from dspy.primitives.module import Module from dspy.teleprompt.teleprompt import Teleprompter logger = ... - class BetterTogether(Teleprompter): STRAT_SEP = ... - def __init__( - self, - metric: Callable, - prompt_optimizer: Optional[Teleprompter] = ..., - weight_optimizer: Optional[Teleprompter] = ..., - seed: Optional[int] = ..., - ) -> None: ... - def compile( - self, student: Program, trainset: List[Example], strategy: str = ..., valset_ratio=... - ) -> Program: ... + def __init__(self, metric: Callable, prompt_optimizer: Optional[Teleprompter] = ..., weight_optimizer: Optional[Teleprompter] = ..., seed: Optional[int] = ...) -> None: + ... + + def compile(self, student: Module, trainset: List[Example], strategy: str = ..., valset_ratio=...) -> Module: + ... + + + diff --git a/typings/dspy/teleprompt/bootstrap.pyi b/typings/dspy/teleprompt/bootstrap.pyi index 070ee64..8c09f65 100644 --- a/typings/dspy/teleprompt/bootstrap.pyi +++ b/typings/dspy/teleprompt/bootstrap.pyi @@ -6,18 +6,8 @@ from typing import Dict, Optional from dspy.teleprompt.teleprompt import Teleprompter logger = ... - class BootstrapFewShot(Teleprompter): - def __init__( - self, - metric=..., - metric_threshold=..., - teacher_settings: Optional[Dict] = ..., - max_bootstrapped_demos=..., - max_labeled_demos=..., - max_rounds=..., - max_errors=..., - ) -> None: + def __init__(self, metric=..., metric_threshold=..., teacher_settings: Optional[Dict] = ..., max_bootstrapped_demos=..., max_labeled_demos=..., max_rounds=..., max_errors=...) -> None: """A Teleprompter class that composes a set of demos/examples to go into a predictor's prompt. These demos come from a combination of labeled examples in the training set, and bootstrapped demos. @@ -35,8 +25,13 @@ class BootstrapFewShot(Teleprompter): Defaults to 16. max_rounds (int): Number of iterations to attempt generating the required bootstrap examples. If unsuccessful after `max_rounds`, the program ends. Defaults to 1. - max_errors (int): Maximum number of errors until program ends. Defaults to 5. + max_errors (Optional[int]): Maximum number of errors until program ends. + If ``None``, inherits from ``dspy.settings.max_errors``. """ ... + + def compile(self, student, *, teacher=..., trainset): + ... + + - def compile(self, student, *, teacher=..., trainset): ... diff --git a/typings/dspy/teleprompt/bootstrap_finetune.pyi b/typings/dspy/teleprompt/bootstrap_finetune.pyi index 34f3c21..0d98cf1 100644 --- a/typings/dspy/teleprompt/bootstrap_finetune.pyi +++ b/typings/dspy/teleprompt/bootstrap_finetune.pyi @@ -7,69 +7,70 @@ from typing import Any, Callable, Dict, List, Optional, Union from dspy.adapters.base import Adapter from dspy.clients.lm import LM from dspy.primitives.example import Example -from dspy.primitives.program import Program +from dspy.primitives.module import Module from dspy.teleprompt.teleprompt import Teleprompter logger = ... - class FinetuneTeleprompter(Teleprompter): - def __init__( - self, train_kwargs: Optional[Union[Dict[str, Any], Dict[LM, Dict[str, Any]]]] = ... - ) -> None: ... + def __init__(self, train_kwargs: Optional[Union[Dict[str, Any], Dict[LM, Dict[str, Any]]]] = ...) -> None: + ... + @staticmethod - def convert_to_lm_dict(arg) -> Dict[LM, Any]: ... + def convert_to_lm_dict(arg) -> Dict[LM, Any]: + ... + + class BootstrapFinetune(FinetuneTeleprompter): - def __init__( - self, - metric: Optional[Callable] = ..., - multitask: bool = ..., - train_kwargs: Optional[Union[Dict[str, Any], Dict[LM, Dict[str, Any]]]] = ..., - adapter: Optional[Union[Adapter, Dict[LM, Adapter]]] = ..., - exclude_demos: bool = ..., - num_threads: Optional[int] = ..., - ) -> None: ... - def compile( - self, - student: Program, - trainset: List[Example], - teacher: Optional[Union[Program, List[Program]]] = ..., - ) -> Program: ... + def __init__(self, metric: Optional[Callable] = ..., multitask: bool = ..., train_kwargs: Optional[Union[Dict[str, Any], Dict[LM, Dict[str, Any]]]] = ..., adapter: Optional[Union[Adapter, Dict[LM, Adapter]]] = ..., exclude_demos: bool = ..., num_threads: Optional[int] = ...) -> None: + ... + + def compile(self, student: Module, trainset: List[Example], teacher: Optional[Union[Module, List[Module]]] = ...) -> Module: + ... + @staticmethod - def finetune_lms(finetune_dict) -> Dict[Any, LM]: ... + def finetune_lms(finetune_dict) -> Dict[Any, LM]: + ... + + + +def build_call_data_from_trace(trace: List[Dict], pred_ind: int, adapter: Adapter, exclude_demos: bool = ...) -> Dict[str, List[Dict[str, Any]]]: + ... -def build_call_data_from_trace( - trace: List[Dict], pred_ind: int, adapter: Adapter, exclude_demos: bool = ... -) -> Dict[str, List[Dict[str, Any]]]: ... @dataclass class FailedPrediction: completion_text: str format_reward: Union[float, None] = ... -def bootstrap_trace_data( - program: Program, - dataset: List[Example], - metric: Optional[Callable] = ..., - num_threads: Optional[int] = ..., - raise_on_error=..., - capture_failed_parses=..., - failure_score: float = ..., - format_failure_score: float = ..., - log_format_failures: bool = ..., -) -> List[Dict[str, Any]]: ... -def all_predictors_have_lms(program: Program) -> bool: + +def bootstrap_trace_data(program: Module, dataset: List[Example], metric: Optional[Callable] = ..., num_threads: Optional[int] = ..., raise_on_error=..., capture_failed_parses=..., failure_score: float = ..., format_failure_score: float = ..., log_format_failures: bool = ...) -> List[Dict[str, Any]]: + ... + +def all_predictors_have_lms(program: Module) -> bool: """Return True if all predictors in the program have an LM set.""" ... -def copy_program_with_lms(program: Program) -> Program: ... -def prepare_student(student: Program) -> Program: ... -def prepare_teacher(student: Program, teacher: Optional[Program] = ...) -> Program: ... -def assert_structural_equivalency(program1: object, program2: object): # -> None: +def copy_program_with_lms(program: Module) -> Module: + ... + +def prepare_student(student: Module) -> Module: ... -def assert_no_shared_predictor(program1: Program, program2: Program): # -> None: + +def prepare_teacher(student: Module, teacher: Optional[Module] = ...) -> Module: + ... + +def assert_structural_equivalency(program1: object, program2: object): # -> None: + ... + +def assert_no_shared_predictor(program1: Module, program2: Module): # -> None: ... -def get_unique_lms(program: Program) -> List[LM]: ... -def launch_lms(program: Program): # -> None: + +def get_unique_lms(program: Module) -> List[LM]: ... -def kill_lms(program: Program): # -> None: + +def launch_lms(program: Module): # -> None: ... + +def kill_lms(program: Module): # -> None: + ... + diff --git a/typings/dspy/teleprompt/copro_optimizer.pyi b/typings/dspy/teleprompt/copro_optimizer.pyi index 6e7404f..b1900b9 100644 --- a/typings/dspy/teleprompt/copro_optimizer.pyi +++ b/typings/dspy/teleprompt/copro_optimizer.pyi @@ -7,34 +7,26 @@ from dspy.signatures import Signature from dspy.teleprompt.teleprompt import Teleprompter logger = ... - class BasicGenerateInstruction(Signature): """You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative.""" - basic_instruction = ... proposed_instruction = ... proposed_prefix_for_output_field = ... + class GenerateInstructionGivenAttempts(dspy.Signature): """You are an instruction optimizer for large language models. I will give some task instructions I've tried, along with their corresponding validation scores. The instructions are arranged in increasing order based on their scores, where higher scores indicate better quality. Your task is to propose a new instruction that will lead a good language model to perform the task even better. Don't be afraid to be creative.""" - attempted_instructions = ... proposed_instruction = ... proposed_prefix_for_output_field = ... + class COPRO(Teleprompter): - def __init__( - self, - prompt_model=..., - metric=..., - breadth=..., - depth=..., - init_temperature=..., - track_stats=..., - **_kwargs, - ) -> None: ... + def __init__(self, prompt_model=..., metric=..., breadth=..., depth=..., init_temperature=..., track_stats=..., **_kwargs) -> None: + ... + def compile(self, student, *, trainset, eval_kwargs): """ optimizes `signature` of `student` program - note that it may be zero-shot or already pre-optimized (demos already chosen - `demos != []`) @@ -48,3 +40,6 @@ class COPRO(Teleprompter): Returns optimized version of `student`. """ ... + + + diff --git a/typings/dspy/teleprompt/ensemble.pyi b/typings/dspy/teleprompt/ensemble.pyi index 4ebde0d..baba8e9 100644 --- a/typings/dspy/teleprompt/ensemble.pyi +++ b/typings/dspy/teleprompt/ensemble.pyi @@ -8,6 +8,12 @@ class Ensemble(Teleprompter): def __init__(self, *, reduce_fn=..., size=..., deterministic=...) -> None: """A common reduce_fn is dspy.majority.""" ... + + def compile(self, programs): # -> EnsembledProgram: + class EnsembledProgram(dspy.Module): + ... + + + + - def compile(self, programs): # -> EnsembledProgram: - class EnsembledProgram(dspy.Module): ... diff --git a/typings/dspy/teleprompt/grpo.pyi b/typings/dspy/teleprompt/grpo.pyi index 9496685..1e53311 100644 --- a/typings/dspy/teleprompt/grpo.pyi +++ b/typings/dspy/teleprompt/grpo.pyi @@ -6,67 +6,36 @@ from typing import Any, Callable, Dict, List, Literal, Optional, Union from dspy.adapters.base import Adapter from dspy.clients.lm import LM from dspy.primitives.example import Example -from dspy.primitives.program import Program +from dspy.primitives.module import Module from dspy.teleprompt.bootstrap_finetune import FinetuneTeleprompter logger = ... - class GRPO(FinetuneTeleprompter): - def __init__( - self, - metric: Optional[Callable] = ..., - multitask: bool = ..., - train_kwargs: Optional[Union[Dict[str, Any], Dict[LM, Dict[str, Any]]]] = ..., - adapter: Optional[Union[Adapter, Dict[LM, Adapter]]] = ..., - exclude_demos: bool = ..., - num_threads: int = ..., - num_train_steps: int = ..., - seed: int = ..., - num_dspy_examples_per_grpo_step: int = ..., - num_rollouts_per_grpo_step: int = ..., - use_train_as_val: bool = ..., - num_steps_for_val: int = ..., - report_train_scores: bool = ..., - failure_score: float = ..., - format_failure_score: float = ..., - variably_invoked_predictor_grouping_mode: Union[ - Literal["truncate"], Literal["fill"], Literal["ragged"] - ] = ..., - variably_invoked_predictor_fill_strategy: Optional[ - Union[Literal["randint"], Literal["max"]] - ] = ..., - ) -> None: ... - def validate_trace_data_and_log_issues( - self, - trace_data: List[List[List[Dict[str, Any]]]], - subsample_training_dataset: List[Example], - num_teachers: int, - num_samples_per_input: int, - pred_signature_hash_to_ind: Dict[int, int], - ): # -> None: + def __init__(self, metric: Optional[Callable] = ..., multitask: bool = ..., train_kwargs: Optional[Union[Dict[str, Any], Dict[LM, Dict[str, Any]]]] = ..., adapter: Optional[Union[Adapter, Dict[LM, Adapter]]] = ..., exclude_demos: bool = ..., num_threads: int = ..., num_train_steps: int = ..., seed: int = ..., num_dspy_examples_per_grpo_step: int = ..., num_rollouts_per_grpo_step: int = ..., use_train_as_val: bool = ..., num_steps_for_val: int = ..., report_train_scores: bool = ..., failure_score: float = ..., format_failure_score: float = ..., variably_invoked_predictor_grouping_mode: Union[Literal["truncate"], Literal["fill"], Literal["ragged"]] = ..., variably_invoked_predictor_fill_strategy: Optional[Union[Literal["randint"], Literal["max"]]] = ...) -> None: + ... + + def validate_trace_data_and_log_issues(self, trace_data: List[List[List[Dict[str, Any]]]], subsample_training_dataset: List[Example], num_teachers: int, num_samples_per_input: int, pred_signature_hash_to_ind: Dict[int, int]): # -> None: + ... + + def report_validation_metrics(self, student, trainset, valset, logger, step_idx=...): # -> None: ... - def report_validation_metrics( - self, student, trainset, valset, logger, step_idx=... - ): # -> None: + + def update_shuffled_trainset(self, original_trainset): # -> None: ... - def update_shuffled_trainset(self, original_trainset): # -> None: + + def select_training_sample_and_update_shuffled_trainset(self, original_trainset: List[Example], train_step_idx: int) -> List[Example]: ... - def select_training_sample_and_update_shuffled_trainset( - self, original_trainset: List[Example], train_step_idx: int - ) -> List[Example]: ... - def compile( - self, - student: Program, - trainset: List[Example], - teacher: Optional[Union[Program, List[Program]]] = ..., - valset: Optional[List[Example]] = ..., - **kwargs, - ) -> Program: ... + + def compile(self, student: Module, trainset: List[Example], teacher: Optional[Union[Module, List[Module]]] = ..., valset: Optional[List[Example]] = ..., **kwargs) -> Module: + ... + + -def disable_lm_cache(program: Program, lm_cache_dict: dict): # -> None: +def disable_lm_cache(program: Module, lm_cache_dict: dict): # -> None: """Disable the LM cache for all predictors in the program.""" ... -def recover_lm_cache(program: Program, lm_cache_dict: dict): # -> None: +def recover_lm_cache(program: Module, lm_cache_dict: dict): # -> None: """Recover the LM caches for all predictors in the program to their original state.""" ... + diff --git a/typings/dspy/teleprompt/infer_rules.pyi b/typings/dspy/teleprompt/infer_rules.pyi index b63d264..3ea66d8 100644 --- a/typings/dspy/teleprompt/infer_rules.pyi +++ b/typings/dspy/teleprompt/infer_rules.pyi @@ -6,25 +6,39 @@ import dspy from dspy.teleprompt import BootstrapFewShot logger = ... - class InferRules(BootstrapFewShot): - def __init__( - self, num_candidates=..., num_rules=..., num_threads=..., teacher_settings=..., **kwargs - ) -> None: ... - def compile(self, student, *, teacher=..., trainset, valset=...): # -> None: + def __init__(self, num_candidates=..., num_rules=..., num_threads=..., teacher_settings=..., **kwargs) -> None: + ... + + def compile(self, student, *, teacher=..., trainset, valset=...): # -> None: + ... + + def induce_natural_language_rules(self, predictor, trainset): # -> object | Any: ... - def induce_natural_language_rules(self, predictor, trainset): ... - def update_program_instructions(self, predictor, natural_language_rules): # -> None: + + def update_program_instructions(self, predictor, natural_language_rules): # -> None: ... - def format_examples(self, demos, signature): # -> str: + + def format_examples(self, demos, signature): # -> str: ... - def get_predictor_demos(self, trainset, predictor): # -> list[dict[Any, Any]]: + + def get_predictor_demos(self, trainset, predictor): # -> list[dict[Any, Any]]: ... - def evaluate_program(self, program, dataset): # -> float: + + def evaluate_program(self, program, dataset): ... + + class RulesInductionProgram(dspy.Module): def __init__(self, num_rules, teacher_settings=...) -> None: - class CustomRulesInduction(dspy.Signature): ... + class CustomRulesInduction(dspy.Signature): + ... + + + + def forward(self, examples_text): # -> Any: + ... + + - def forward(self, examples_text): ... diff --git a/typings/dspy/teleprompt/knn_fewshot.pyi b/typings/dspy/teleprompt/knn_fewshot.pyi index 4ed2d4a..0b5547d 100644 --- a/typings/dspy/teleprompt/knn_fewshot.pyi +++ b/typings/dspy/teleprompt/knn_fewshot.pyi @@ -2,14 +2,13 @@ This type stub file was generated by pyright. """ +from typing import Any from dspy.clients import Embedder from dspy.primitives import Example from dspy.teleprompt.teleprompt import Teleprompter class KNNFewShot(Teleprompter): - def __init__( - self, k: int, trainset: list[Example], vectorizer: Embedder, **few_shot_bootstrap_args - ) -> None: + def __init__(self, k: int, trainset: list[Example], vectorizer: Embedder, **few_shot_bootstrap_args: dict[str, Any]) -> None: """ KNNFewShot is an optimizer that uses an in-memory KNN retriever to find the k nearest neighbors in a trainset at test time. For each input example in a forward call, it identifies the k most @@ -50,5 +49,9 @@ class KNNFewShot(Teleprompter): ``` """ ... + + def compile(self, student, *, teacher=...): + ... + + - def compile(self, student, *, teacher=...): ... diff --git a/typings/dspy/teleprompt/mipro_optimizer_v2.pyi b/typings/dspy/teleprompt/mipro_optimizer_v2.pyi index afe9536..36a0799 100644 --- a/typings/dspy/teleprompt/mipro_optimizer_v2.pyi +++ b/typings/dspy/teleprompt/mipro_optimizer_v2.pyi @@ -5,7 +5,8 @@ This type stub file was generated by pyright. from typing import Any, Callable, List, Literal, Optional, TYPE_CHECKING from dspy.teleprompt.teleprompt import Teleprompter -if TYPE_CHECKING: ... +if TYPE_CHECKING: + ... logger = ... BOOTSTRAPPED_FEWSHOT_EXAMPLES_IN_CONTEXT = ... LABELED_FEWSHOT_EXAMPLES_IN_CONTEXT = ... @@ -16,46 +17,12 @@ GREEN = ... BLUE = ... BOLD = ... ENDC = ... - class MIPROv2(Teleprompter): - def __init__( - self, - metric: Callable, - prompt_model: Optional[Any] = ..., - task_model: Optional[Any] = ..., - teacher_settings: Optional[dict] = ..., - max_bootstrapped_demos: int = ..., - max_labeled_demos: int = ..., - auto: Optional[Literal["light", "medium", "heavy"]] = ..., - num_candidates: Optional[int] = ..., - num_threads: Optional[int] = ..., - max_errors: int = ..., - seed: int = ..., - init_temperature: float = ..., - verbose: bool = ..., - track_stats: bool = ..., - log_dir: Optional[str] = ..., - metric_threshold: Optional[float] = ..., - ) -> None: ... - def compile( - self, - student: Any, - *, - trainset: List, - teacher: Any = ..., - valset: Optional[List] = ..., - num_trials: Optional[int] = ..., - max_bootstrapped_demos: Optional[int] = ..., - max_labeled_demos: Optional[int] = ..., - seed: Optional[int] = ..., - minibatch: bool = ..., - minibatch_size: int = ..., - minibatch_full_eval_steps: int = ..., - program_aware_proposer: bool = ..., - data_aware_proposer: bool = ..., - view_data_batch_size: int = ..., - tip_aware_proposer: bool = ..., - fewshot_aware_proposer: bool = ..., - requires_permission_to_run: bool = ..., - provide_traceback: Optional[bool] = ..., - ) -> Any: ... + def __init__(self, metric: Callable, prompt_model: Optional[Any] = ..., task_model: Optional[Any] = ..., teacher_settings: Optional[dict] = ..., max_bootstrapped_demos: int = ..., max_labeled_demos: int = ..., auto: Optional[Literal["light", "medium", "heavy"]] = ..., num_candidates: Optional[int] = ..., num_threads: Optional[int] = ..., max_errors: Optional[int] = ..., seed: int = ..., init_temperature: float = ..., verbose: bool = ..., track_stats: bool = ..., log_dir: Optional[str] = ..., metric_threshold: Optional[float] = ...) -> None: + ... + + def compile(self, student: Any, *, trainset: List, teacher: Any = ..., valset: Optional[List] = ..., num_trials: Optional[int] = ..., max_bootstrapped_demos: Optional[int] = ..., max_labeled_demos: Optional[int] = ..., seed: Optional[int] = ..., minibatch: bool = ..., minibatch_size: int = ..., minibatch_full_eval_steps: int = ..., program_aware_proposer: bool = ..., data_aware_proposer: bool = ..., view_data_batch_size: int = ..., tip_aware_proposer: bool = ..., fewshot_aware_proposer: bool = ..., requires_permission_to_run: bool = ..., provide_traceback: Optional[bool] = ...) -> Any: + ... + + + diff --git a/typings/dspy/teleprompt/random_search.pyi b/typings/dspy/teleprompt/random_search.pyi index b3a7c68..f24104d 100644 --- a/typings/dspy/teleprompt/random_search.pyi +++ b/typings/dspy/teleprompt/random_search.pyi @@ -5,19 +5,11 @@ This type stub file was generated by pyright. from dspy.teleprompt.teleprompt import Teleprompter class BootstrapFewShotWithRandomSearch(Teleprompter): - def __init__( - self, - metric, - teacher_settings=..., - max_bootstrapped_demos=..., - max_labeled_demos=..., - max_rounds=..., - num_candidate_programs=..., - num_threads=..., - max_errors=..., - stop_at_score=..., - metric_threshold=..., - ) -> None: ... - def compile( - self, student, *, teacher=..., trainset, valset=..., restrict=..., labeled_sample=... - ): ... + def __init__(self, metric, teacher_settings=..., max_bootstrapped_demos=..., max_labeled_demos=..., max_rounds=..., num_candidate_programs=..., num_threads=..., max_errors=..., stop_at_score=..., metric_threshold=...) -> None: + ... + + def compile(self, student, *, teacher=..., trainset, valset=..., restrict=..., labeled_sample=...): + ... + + + diff --git a/typings/dspy/teleprompt/signature_opt.pyi b/typings/dspy/teleprompt/signature_opt.pyi index 5f0981f..cd8abb8 100644 --- a/typings/dspy/teleprompt/signature_opt.pyi +++ b/typings/dspy/teleprompt/signature_opt.pyi @@ -5,14 +5,11 @@ This type stub file was generated by pyright. from .copro_optimizer import COPRO class SignatureOptimizer(COPRO): - def __init__( - self, - prompt_model=..., - metric=..., - breadth=..., - depth=..., - init_temperature=..., - verbose=..., - track_stats=..., - ) -> None: ... - def compile(self, student, *, devset, eval_kwargs): ... + def __init__(self, prompt_model=..., metric=..., breadth=..., depth=..., init_temperature=..., verbose=..., track_stats=...) -> None: + ... + + def compile(self, student, *, devset, eval_kwargs): + ... + + + diff --git a/typings/dspy/teleprompt/simba.pyi b/typings/dspy/teleprompt/simba.pyi index 232a26f..304dc05 100644 --- a/typings/dspy/teleprompt/simba.pyi +++ b/typings/dspy/teleprompt/simba.pyi @@ -7,21 +7,8 @@ from typing import Callable from dspy.teleprompt.teleprompt import Teleprompter logger = ... - class SIMBA(Teleprompter): - def __init__( - self, - *, - metric: Callable, - bsize=..., - num_candidates=..., - max_steps=..., - max_demos=..., - demo_input_field_maxlen=..., - num_threads=..., - temperature_for_sampling=..., - temperature_for_candidates=..., - ) -> None: + def __init__(self, *, metric: Callable, bsize=..., num_candidates=..., max_steps=..., max_demos=..., demo_input_field_maxlen=..., num_threads=..., temperature_for_sampling=..., temperature_for_candidates=...) -> None: """ Initializes SIMBA. @@ -44,8 +31,9 @@ class SIMBA(Teleprompter): the source program for building new candidates. Defaults to 0.2. """ ... - - def compile( - self, student: dspy.Module, *, trainset: list[dspy.Example], seed: int = ... - ): # -> Module: + + def compile(self, student: dspy.Module, *, trainset: list[dspy.Example], seed: int = ...): # -> Module: ... + + + diff --git a/typings/dspy/teleprompt/simba_utils.pyi b/typings/dspy/teleprompt/simba_utils.pyi index ea1732d..bb67c50 100644 --- a/typings/dspy/teleprompt/simba_utils.pyi +++ b/typings/dspy/teleprompt/simba_utils.pyi @@ -6,16 +6,16 @@ import dspy from typing import Callable logger = ... - -def prepare_models_for_resampling(program: dspy.Module, n: int): # -> list[LM | Any]: +def prepare_models_for_resampling(program: dspy.Module, n: int): # -> list[LM | Any]: ... -def wrap_program( - program: dspy.Module, metric: Callable -): # -> Callable[..., dict[str, Any | float | None]]: + +def wrap_program(program: dspy.Module, metric: Callable): # -> Callable[..., dict[str, object | Any | float | None]]: ... -def append_a_demo(demo_input_field_maxlen): # -> Callable[..., Literal[True]]: + +def append_a_demo(demo_input_field_maxlen): # -> Callable[..., Literal[True]]: ... -def append_a_rule(bucket, system, **kwargs): # -> bool: + +def append_a_rule(bucket, system, **kwargs): # -> bool: ... class OfferFeedback(dspy.Signature): @@ -32,7 +32,6 @@ class OfferFeedback(dspy.Signature): - Rely on contrasting the behavior of the worse trajectory against the better trajectory in making recommendations. - Ensure each unique module name appears exactly once as a key in the advice dictionary. """ - program_code: str = ... modules_defn: str = ... program_inputs: str = ... @@ -47,9 +46,10 @@ class OfferFeedback(dspy.Signature): discussion: str = ... module_advice: dict[str, str] = ... -def inspect_modules(program): # -> str: + +def inspect_modules(program): # -> str: ... -def recursive_mask( - o, -): # -> dict[Any, Any | dict[Any, Any] | list[Any | dict[Any, Any] | list[Any] | tuple[Any, ...] | str] | tuple[Any, ...] | str] | list[Any | dict[Any, Any | dict[Any, Any] | list[Any] | tuple[Any, ...] | str] | list[Any] | tuple[Any, ...] | str] | tuple[Any, ...] | str: + +def recursive_mask(o): # -> dict[Any, Any | dict[Any, Any] | list[Any | dict[Any, Any] | list[Any] | tuple[Any, ...] | str] | tuple[Any, ...] | str] | list[Any | dict[Any, Any | dict[Any, Any] | list[Any] | tuple[Any, ...] | str] | list[Any] | tuple[Any, ...] | str] | tuple[Any, ...] | str: ... + diff --git a/typings/dspy/teleprompt/teleprompt.pyi b/typings/dspy/teleprompt/teleprompt.pyi index 53ecc05..f0a1826 100644 --- a/typings/dspy/teleprompt/teleprompt.pyi +++ b/typings/dspy/teleprompt/teleprompt.pyi @@ -6,16 +6,10 @@ from typing import Any, Optional from dspy.primitives import Example, Module class Teleprompter: - def __init__(self) -> None: ... - def compile( - self, - student: Module, - *, - trainset: list[Example], - teacher: Optional[Module] = ..., - valset: Optional[list[Example]] = ..., - **kwargs, - ) -> Module: + def __init__(self) -> None: + ... + + def compile(self, student: Module, *, trainset: list[Example], teacher: Optional[Module] = ..., valset: Optional[list[Example]] = ..., **kwargs) -> Module: """ Optimize the student program. @@ -29,7 +23,7 @@ class Teleprompter: The optimized student program. """ ... - + def get_params(self) -> dict[str, Any]: """ Get the parameters of the teleprompter. @@ -38,3 +32,6 @@ class Teleprompter: The parameters of the teleprompter. """ ... + + + diff --git a/typings/dspy/teleprompt/teleprompt_optuna.pyi b/typings/dspy/teleprompt/teleprompt_optuna.pyi index ae16597..7b7bec0 100644 --- a/typings/dspy/teleprompt/teleprompt_optuna.pyi +++ b/typings/dspy/teleprompt/teleprompt_optuna.pyi @@ -5,19 +5,14 @@ This type stub file was generated by pyright. from dspy.teleprompt.teleprompt import Teleprompter class BootstrapFewShotWithOptuna(Teleprompter): - def __init__( - self, - metric, - teacher_settings=..., - max_bootstrapped_demos=..., - max_labeled_demos=..., - max_rounds=..., - num_candidate_programs=..., - num_threads=..., - ) -> None: ... - def objective( - self, trial - ): # -> tuple[float, list[tuple[Example, Prediction, float]], list[float]] | tuple[float, list[float]] | tuple[float, list[tuple[Example, Prediction, float]]] | float: + def __init__(self, metric, teacher_settings=..., max_bootstrapped_demos=..., max_labeled_demos=..., max_rounds=..., num_candidate_programs=..., num_threads=...) -> None: ... - def compile(self, student, *, teacher=..., max_demos, trainset, valset=...): # -> Any: + + def objective(self, trial): ... + + def compile(self, student, *, teacher=..., max_demos, trainset, valset=...): # -> Any: + ... + + + diff --git a/typings/dspy/teleprompt/utils.pyi b/typings/dspy/teleprompt/utils.pyi index daad69a..fca4a56 100644 --- a/typings/dspy/teleprompt/utils.pyi +++ b/typings/dspy/teleprompt/utils.pyi @@ -3,32 +3,23 @@ This type stub file was generated by pyright. """ logger = ... - -def create_minibatch(trainset, batch_size=..., rng=...): # -> list[Any]: +def create_minibatch(trainset, batch_size=..., rng=...): # -> list[Any]: """Create a minibatch from the trainset.""" ... -def eval_candidate_program( - batch_size, trainset, candidate_program, evaluate, rng=..., return_all_scores=... -): # -> tuple[float, list[float]] | float: +def eval_candidate_program(batch_size, trainset, candidate_program, evaluate, rng=...): # -> Prediction: """Evaluate a candidate program on the trainset, using the specified batch size.""" ... -def eval_candidate_program_with_pruning( - trial, trial_logs, trainset, candidate_program, evaluate, trial_num, batch_size=... -): # -> tuple[Any, Any, int, Literal[True]] | tuple[Any, Any, int, Literal[False]]: +def eval_candidate_program_with_pruning(trial, trial_logs, trainset, candidate_program, evaluate, trial_num, batch_size=...): # -> tuple[Any, Any, int, Literal[True]] | tuple[Any, Any, int, Literal[False]]: """Evaluation of candidate_program with pruning implemented""" ... -def get_program_with_highest_avg_score( - param_score_dict, fully_evaled_param_combos -): # -> tuple[Any, Any, Any, Any] | tuple[Any, Any | floating[Any], Any, Any]: +def get_program_with_highest_avg_score(param_score_dict, fully_evaled_param_combos): # -> tuple[Any, Any, Any, Any] | tuple[Any, Any | floating[Any], Any, Any]: """Used as a helper function for bayesian + minibatching optimizers. Returns the program with the highest average score from the batches evaluated so far.""" ... -def calculate_last_n_proposed_quality( - base_program, trial_logs, evaluate, trainset, devset, n -): # -> tuple[Any | Literal[0], Any, Any | Literal[0], Any]: +def calculate_last_n_proposed_quality(base_program, trial_logs, evaluate, trainset, devset, n): # -> tuple[Any | Literal[0], Any, Any | Literal[0], Any]: """ Calculate the average and best quality of the last n programs proposed. This is useful for seeing if our proposals are actually 'improving' overtime or not. @@ -39,17 +30,18 @@ def get_task_model_history_for_full_example(candidate_program, task_model, devse """Get a full trace of the task model's history for a given candidate program.""" ... -def print_full_program(program): # -> None: +def print_full_program(program): # -> None: """Print out the program's instructions & prefixes for each module.""" ... -def save_candidate_program(program, log_dir, trial_num, note=...): # -> str | None: +def save_candidate_program(program, log_dir, trial_num, note=...): # -> str | None: """Save the candidate program to the log directory.""" ... -def save_file_to_log_dir(source_file_path, log_dir): # -> None: +def save_file_to_log_dir(source_file_path, log_dir): # -> None: ... -def setup_logging(log_dir): # -> None: + +def setup_logging(log_dir): # -> None: """Setup logger, which will log our print statements to a txt file at our log_dir for later viewing""" ... @@ -60,44 +52,32 @@ def get_token_usage(model) -> tuple[int, int]: """ ... -def log_token_usage(trial_logs, trial_num, model_dict): # -> None: +def log_token_usage(trial_logs, trial_num, model_dict): # -> None: """ Extract total input and output tokens used by each model and log to trial_logs[trial_num]["token_usage"]. """ ... -def get_prompt_model(prompt_model): # -> Any: - ... -def get_signature(predictor): ... -def set_signature(predictor, updated_signature): # -> None: - ... -def create_n_fewshot_demo_sets( - student, - num_candidate_sets, - trainset, - max_labeled_demos, - max_bootstrapped_demos, - metric, - teacher_settings, - max_errors=..., - max_rounds=..., - labeled_sample=..., - min_num_samples=..., - metric_threshold=..., - teacher=..., - include_non_bootstrapped=..., - seed=..., - rng=..., -): # -> dict[Any, Any]: +def get_prompt_model(prompt_model): + ... + +def get_signature(predictor): + ... + +def set_signature(predictor, updated_signature): # -> None: + ... + +def create_n_fewshot_demo_sets(student, num_candidate_sets, trainset, max_labeled_demos, max_bootstrapped_demos, metric, teacher_settings, max_errors=..., max_rounds=..., labeled_sample=..., min_num_samples=..., metric_threshold=..., teacher=..., include_non_bootstrapped=..., seed=..., rng=...): # -> dict[Any, Any]: """ This function is copied from random_search.py, and creates fewshot examples in the same way that random search does. This allows us to take advantage of using the same fewshot examples when we use the same random seed in our optimizers. """ ... -def old_getfile(object): # -> str | None: +def old_getfile(object): # -> str | None: """Work out which source or compiled file an object was defined in.""" ... -def new_getfile(object): # -> str | None: +def new_getfile(object): # -> str | None: ... + diff --git a/typings/dspy/teleprompt/vanilla.pyi b/typings/dspy/teleprompt/vanilla.pyi index 391fab2..d976c02 100644 --- a/typings/dspy/teleprompt/vanilla.pyi +++ b/typings/dspy/teleprompt/vanilla.pyi @@ -5,5 +5,11 @@ This type stub file was generated by pyright. from dspy.teleprompt.teleprompt import Teleprompter class LabeledFewShot(Teleprompter): - def __init__(self, k=...) -> None: ... - def compile(self, student, *, trainset, sample=...): ... + def __init__(self, k=...) -> None: + ... + + def compile(self, student, *, trainset, sample=...): + ... + + + diff --git a/typings/dspy/utils/__init__.pyi b/typings/dspy/utils/__init__.pyi index d0a1a4d..d13d3fc 100644 --- a/typings/dspy/utils/__init__.pyi +++ b/typings/dspy/utils/__init__.pyi @@ -10,18 +10,7 @@ from dspy.utils.callback import BaseCallback, with_callbacks from dspy.utils.dummies import DummyLM, DummyVectorizer, dummy_rm from dspy.utils.inspect_history import pretty_print_history -def download(url): # -> None: +def download(url): # -> None: ... -__all__ = [ - "download", - "exceptions", - "BaseCallback", - "with_callbacks", - "DummyLM", - "DummyVectorizer", - "dummy_rm", - "StatusMessage", - "StatusMessageProvider", - "pretty_print_history", -] +__all__ = ["download", "exceptions", "BaseCallback", "with_callbacks", "DummyLM", "DummyVectorizer", "dummy_rm", "StatusMessage", "StatusMessageProvider", "pretty_print_history"] diff --git a/typings/dspy/utils/asyncify.pyi b/typings/dspy/utils/asyncify.pyi index 9a55926..50c7212 100644 --- a/typings/dspy/utils/asyncify.pyi +++ b/typings/dspy/utils/asyncify.pyi @@ -3,15 +3,17 @@ This type stub file was generated by pyright. """ from typing import Any, Awaitable, Callable, TYPE_CHECKING -from dspy.primitives.program import Module +from dspy.primitives.module import Module -if TYPE_CHECKING: ... +if TYPE_CHECKING: + ... _limiter = ... - -def get_async_max_workers(): # -> Any: +def get_async_max_workers(): ... -def get_limiter(): # -> CapacityLimiter: + +def get_limiter(): # -> CapacityLimiter: ... + def asyncify(program: Module) -> Callable[[Any, Any], Awaitable[Any]]: """ Wraps a DSPy program so that it can be called asynchronously. This is useful for running a @@ -27,3 +29,4 @@ def asyncify(program: Module) -> Callable[[Any, Any], Awaitable[Any]]: The current thread's configuration context is inherited for each call. """ ... + diff --git a/typings/dspy/utils/caching.pyi b/typings/dspy/utils/caching.pyi index 6512b2b..d663b3d 100644 --- a/typings/dspy/utils/caching.pyi +++ b/typings/dspy/utils/caching.pyi @@ -4,7 +4,7 @@ This type stub file was generated by pyright. _DEFAULT_CACHE_DIR = ... DSPY_CACHEDIR = ... - def create_subdir_in_cachedir(subdir: str) -> str: """Create a subdirectory in the DSPy cache directory.""" ... + diff --git a/typings/dspy/utils/callback.pyi b/typings/dspy/utils/callback.pyi index 1483b8b..81cf0ce 100644 --- a/typings/dspy/utils/callback.pyi +++ b/typings/dspy/utils/callback.pyi @@ -6,7 +6,6 @@ from typing import Any, Dict, Optional ACTIVE_CALL_ID = ... logger = ... - class BaseCallback: """A base class for defining callback handlers for DSPy components. @@ -56,7 +55,7 @@ class BaseCallback: # No logging here because only `lm_1` has the callback set. ``` """ - def on_module_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: + def on_module_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: """A handler triggered when forward() method of a module (subclass of dspy.Module) is called. Args: @@ -66,10 +65,8 @@ class BaseCallback: a key-value pair in a dictionary. """ ... - - def on_module_end( - self, call_id: str, outputs: Optional[Any], exception: Optional[Exception] = ... - ): # -> None: + + def on_module_end(self, call_id: str, outputs: Optional[Any], exception: Optional[Exception] = ...): # -> None: """A handler triggered after forward() method of a module (subclass of dspy.Module) is executed. Args: @@ -79,8 +76,8 @@ class BaseCallback: exception: If an exception is raised during the execution, it will be stored here. """ ... - - def on_lm_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: + + def on_lm_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: """A handler triggered when __call__ method of dspy.LM instance is called. Args: @@ -90,10 +87,8 @@ class BaseCallback: a key-value pair in a dictionary. """ ... - - def on_lm_end( - self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ... - ): # -> None: + + def on_lm_end(self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ...): # -> None: """A handler triggered after __call__ method of dspy.LM instance is executed. Args: @@ -103,10 +98,8 @@ class BaseCallback: exception: If an exception is raised during the execution, it will be stored here. """ ... - - def on_adapter_format_start( - self, call_id: str, instance: Any, inputs: Dict[str, Any] - ): # -> None: + + def on_adapter_format_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: """A handler triggered when format() method of an adapter (subclass of dspy.Adapter) is called. Args: @@ -116,10 +109,8 @@ class BaseCallback: a key-value pair in a dictionary. """ ... - - def on_adapter_format_end( - self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ... - ): # -> None: + + def on_adapter_format_end(self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ...): # -> None: """A handler triggered after format() method of an adapter (subclass of dspy.Adapter) is called.. Args: @@ -129,10 +120,8 @@ class BaseCallback: exception: If an exception is raised during the execution, it will be stored here. """ ... - - def on_adapter_parse_start( - self, call_id: str, instance: Any, inputs: Dict[str, Any] - ): # -> None: + + def on_adapter_parse_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: """A handler triggered when parse() method of an adapter (subclass of dspy.Adapter) is called. Args: @@ -142,10 +131,8 @@ class BaseCallback: a key-value pair in a dictionary. """ ... - - def on_adapter_parse_end( - self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ... - ): # -> None: + + def on_adapter_parse_end(self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ...): # -> None: """A handler triggered after parse() method of an adapter (subclass of dspy.Adapter) is called. Args: @@ -155,8 +142,8 @@ class BaseCallback: exception: If an exception is raised during the execution, it will be stored here. """ ... - - def on_tool_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: + + def on_tool_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: """A handler triggered when a tool is called. Args: @@ -166,10 +153,8 @@ class BaseCallback: a key-value pair in a dictionary. """ ... - - def on_tool_end( - self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ... - ): # -> None: + + def on_tool_end(self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ...): # -> None: """A handler triggered after a tool is executed. Args: @@ -179,8 +164,8 @@ class BaseCallback: exception: If an exception is raised during the execution, it will be stored here. """ ... - - def on_evaluate_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: + + def on_evaluate_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: """A handler triggered when evaluation is started. Args: @@ -190,10 +175,8 @@ class BaseCallback: a key-value pair in a dictionary. """ ... - - def on_evaluate_end( - self, call_id: str, outputs: Optional[Any], exception: Optional[Exception] = ... - ): # -> None: + + def on_evaluate_end(self, call_id: str, outputs: Optional[Any], exception: Optional[Exception] = ...): # -> None: """A handler triggered after evaluation is executed. Args: @@ -203,9 +186,10 @@ class BaseCallback: exception: If an exception is raised during the execution, it will be stored here. """ ... + + -def with_callbacks( - fn, -): # -> _Wrapped[..., Any, ..., CoroutineType[Any, Any, Any]] | _Wrapped[..., Any, ..., Any]: +def with_callbacks(fn): # -> _Wrapped[..., Any, ..., CoroutineType[Any, Any, Any]] | _Wrapped[..., Any, ..., Any]: """Decorator to add callback functionality to instance methods.""" ... + diff --git a/typings/dspy/utils/dummies.pyi b/typings/dspy/utils/dummies.pyi index b0c0634..e387384 100644 --- a/typings/dspy/utils/dummies.pyi +++ b/typings/dspy/utils/dummies.pyi @@ -62,23 +62,32 @@ class DummyLM(LM): ``` """ - def __init__( - self, - answers: Union[list[dict[str, str]], dict[str, dict[str, str]]], - follow_examples: bool = ..., - ) -> None: ... + def __init__(self, answers: Union[list[dict[str, str]], dict[str, dict[str, str]]], follow_examples: bool = ...) -> None: + ... + @with_callbacks - def __call__(self, prompt=..., messages=..., **kwargs): # -> list[Any]: + def __call__(self, prompt=..., messages=..., **kwargs): # -> list[Any]: ... - async def acall(self, prompt=..., messages=..., **kwargs): # -> list[Any]: + + async def acall(self, prompt=..., messages=..., **kwargs): # -> list[Any]: ... - def get_convo(self, index): # -> tuple[Any, Any]: + + def get_convo(self, index): # -> tuple[Any, Any]: """Get the prompt + answer from the ith message.""" ... + + -def dummy_rm(passages=...) -> callable: ... +def dummy_rm(passages=...) -> callable: + ... class DummyVectorizer: """Simple vectorizer based on n-grams.""" - def __init__(self, max_length=..., n_gram=...) -> None: ... - def __call__(self, texts: list[str]) -> np.ndarray: ... + def __init__(self, max_length=..., n_gram=...) -> None: + ... + + def __call__(self, texts: list[str]) -> np.ndarray: + ... + + + diff --git a/typings/dspy/utils/exceptions.pyi b/typings/dspy/utils/exceptions.pyi index 4ccd2ee..7e75bd2 100644 --- a/typings/dspy/utils/exceptions.pyi +++ b/typings/dspy/utils/exceptions.pyi @@ -7,11 +7,8 @@ from dspy.signatures.signature import Signature class AdapterParseError(Exception): """Exception raised when adapter cannot parse the LM response.""" - def __init__( - self, - adapter_name: str, - signature: Signature, - lm_response: str, - message: Optional[str] = ..., - parsed_result: Optional[str] = ..., - ) -> None: ... + def __init__(self, adapter_name: str, signature: Signature, lm_response: str, message: Optional[str] = ..., parsed_result: Optional[str] = ...) -> None: + ... + + + diff --git a/typings/dspy/utils/inspect_history.pyi b/typings/dspy/utils/inspect_history.pyi index c740d09..e516980 100644 --- a/typings/dspy/utils/inspect_history.pyi +++ b/typings/dspy/utils/inspect_history.pyi @@ -2,6 +2,7 @@ This type stub file was generated by pyright. """ -def pretty_print_history(history, n: int = ...): # -> None: +def pretty_print_history(history, n: int = ...): # -> None: """Prints the last n prompts and their completions.""" ... + diff --git a/typings/dspy/utils/langchain_tool.pyi b/typings/dspy/utils/langchain_tool.pyi index 7bd7b46..dda9168 100644 --- a/typings/dspy/utils/langchain_tool.pyi +++ b/typings/dspy/utils/langchain_tool.pyi @@ -6,11 +6,11 @@ from typing import TYPE_CHECKING from dspy.adapters.types.tool import Tool from langchain.tools import BaseTool -if TYPE_CHECKING: ... - +if TYPE_CHECKING: + ... def convert_langchain_tool(tool: BaseTool) -> Tool: """Build a DSPy tool from a LangChain tool. - + This function converts a LangChain tool (either created with @tool decorator or by subclassing BaseTool) into a DSPy Tool. @@ -21,3 +21,4 @@ def convert_langchain_tool(tool: BaseTool) -> Tool: A DSPy Tool object. """ ... + diff --git a/typings/dspy/utils/logging_utils.pyi b/typings/dspy/utils/logging_utils.pyi index b0b1818..b1d7fc8 100644 --- a/typings/dspy/utils/logging_utils.pyi +++ b/typings/dspy/utils/logging_utils.pyi @@ -4,7 +4,6 @@ This type stub file was generated by pyright. LOGGING_LINE_FORMAT = ... LOGGING_DATETIME_FORMAT = ... - class DSPyLoggingStream: """ A Python stream for use with event logging APIs throughout DSPy (`eprint()`, @@ -12,28 +11,34 @@ class DSPyLoggingStream: `flush()` calls to the stream referred to by `sys.stderr` at the time of the call. It also provides capabilities for disabling the stream to silence event logs. """ - def __init__(self) -> None: ... - def write(self, text): # -> None: + def __init__(self) -> None: + ... + + def write(self, text): # -> None: ... - def flush(self): # -> None: + + def flush(self): # -> None: ... + @property - def enabled(self): # -> bool: + def enabled(self): # -> bool: ... + @enabled.setter - def enabled(self, value): # -> None: + def enabled(self, value): # -> None: ... + -DSPY_LOGGING_STREAM = ... -def disable_logging(): # -> None: +DSPY_LOGGING_STREAM = ... +def disable_logging(): # -> None: """ Disables the `DSPyLoggingStream` used by event logging APIs throughout DSPy (`eprint()`, `logger.info()`, etc), silencing all subsequent event logs. """ ... -def enable_logging(): # -> None: +def enable_logging(): # -> None: """ Enables the `DSPyLoggingStream` used by event logging APIs throughout DSPy (`eprint()`, `logger.info()`, etc), emitting all subsequent event logs. This @@ -41,5 +46,6 @@ def enable_logging(): # -> None: """ ... -def configure_dspy_loggers(root_module_name): # -> None: +def configure_dspy_loggers(root_module_name): # -> None: ... + diff --git a/typings/dspy/utils/mcp.pyi b/typings/dspy/utils/mcp.pyi index 6d52fb3..2279dfb 100644 --- a/typings/dspy/utils/mcp.pyi +++ b/typings/dspy/utils/mcp.pyi @@ -6,8 +6,8 @@ import mcp from typing import TYPE_CHECKING from dspy.adapters.types.tool import Tool -if TYPE_CHECKING: ... - +if TYPE_CHECKING: + ... def convert_mcp_tool(session: mcp.client.session.ClientSession, tool: mcp.types.Tool) -> Tool: """Build a DSPy tool from an MCP tool. @@ -19,3 +19,4 @@ def convert_mcp_tool(session: mcp.client.session.ClientSession, tool: mcp.types. A dspy Tool object. """ ... + diff --git a/typings/dspy/utils/parallelizer.pyi b/typings/dspy/utils/parallelizer.pyi index f92a52a..700513d 100644 --- a/typings/dspy/utils/parallelizer.pyi +++ b/typings/dspy/utils/parallelizer.pyi @@ -3,23 +3,16 @@ This type stub file was generated by pyright. """ logger = ... - class ParallelExecutor: - def __init__( - self, - num_threads=..., - max_errors=..., - disable_progress_bar=..., - provide_traceback=..., - compare_results=..., - timeout=..., - straggler_limit=..., - ) -> None: + def __init__(self, num_threads=..., max_errors=..., disable_progress_bar=..., provide_traceback=..., compare_results=..., timeout=..., straggler_limit=...) -> None: """ Offers isolation between the tasks (dspy.settings) irrespective of whether num_threads == 1 or > 1. Handles also straggler timeouts. """ ... - - def execute(self, function, data): # -> list[None]: + + def execute(self, function, data): # -> list[None]: ... + + + diff --git a/typings/dspy/utils/saving.pyi b/typings/dspy/utils/saving.pyi index bc6d49c..65beb34 100644 --- a/typings/dspy/utils/saving.pyi +++ b/typings/dspy/utils/saving.pyi @@ -2,11 +2,16 @@ This type stub file was generated by pyright. """ -logger = ... +from typing import TYPE_CHECKING +from dspy.primitives.module import Module -def get_dependency_versions(): # -> dict[str, str]: +if TYPE_CHECKING: + ... +logger = ... +def get_dependency_versions(): # -> dict[str, str]: ... -def load(path): # -> Any: + +def load(path: str) -> Module: """Load saved DSPy model. This method is used to load a saved DSPy model with `save_program=True`, i.e., the model is saved with cloudpickle. @@ -18,3 +23,4 @@ def load(path): # -> Any: The loaded model, a `dspy.Module` instance. """ ... + diff --git a/typings/dspy/utils/unbatchify.pyi b/typings/dspy/utils/unbatchify.pyi index 22c89c4..895cfb8 100644 --- a/typings/dspy/utils/unbatchify.pyi +++ b/typings/dspy/utils/unbatchify.pyi @@ -5,12 +5,7 @@ This type stub file was generated by pyright. from typing import Any, Callable, List class Unbatchify: - def __init__( - self, - batch_fn: Callable[[List[Any]], List[Any]], - max_batch_size: int = ..., - max_wait_time: float = ..., - ) -> None: + def __init__(self, batch_fn: Callable[[List[Any]], List[Any]], max_batch_size: int = ..., max_wait_time: float = ...) -> None: """ Initializes the Unbatchify. @@ -20,7 +15,7 @@ class Unbatchify: max_wait_time: The maximum time (in seconds) to wait for batch to fill before processing. """ ... - + def __call__(self, input_item: Any) -> Any: """ Thread-safe function that accepts a single input and returns the corresponding output. @@ -32,27 +27,30 @@ class Unbatchify: The output corresponding to the input_item after processing through batch_fn. """ ... - - def close(self): # -> None: + + def close(self): # -> None: """ Stops the worker thread and cleans up resources. """ ... - - def __enter__(self): # -> Self: + + def __enter__(self): # -> Self: """ Enables use as a context manager. """ ... - - def __exit__(self, exc_type, exc_value, traceback): # -> None: + + def __exit__(self, exc_type, exc_value, traceback): # -> None: """ Ensures resources are cleaned up when exiting context. """ ... - - def __del__(self): # -> None: + + def __del__(self): # -> None: """ Ensures the worker thread is terminated when the object is garbage collected. """ ... + + + diff --git a/typings/dspy/utils/usage_tracker.pyi b/typings/dspy/utils/usage_tracker.pyi index 6d12ded..d6f7279 100644 --- a/typings/dspy/utils/usage_tracker.pyi +++ b/typings/dspy/utils/usage_tracker.pyi @@ -6,19 +6,23 @@ from contextlib import contextmanager from typing import Any """Usage tracking utilities for DSPy.""" - class UsageTracker: """Tracks LM usage data within a context.""" - def __init__(self) -> None: ... - def add_usage(self, lm: str, usage_entry: dict): # -> None: + def __init__(self) -> None: + ... + + def add_usage(self, lm: str, usage_entry: dict): # -> None: """Add a usage entry to the tracker.""" ... - + def get_total_tokens(self) -> dict[str, dict[str, Any]]: """Calculate total tokens from all tracked usage.""" ... + + @contextmanager -def track_usage(): # -> Generator[UsageTracker, Any, None]: +def track_usage(): # -> Generator[UsageTracker, Any, None]: """Context manager for tracking LM usage.""" ... + From 68cc2c201e663ff827fe871f051c95411e6c35cc Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Tue, 24 Jun 2025 04:15:37 -0500 Subject: [PATCH 12/26] docs: add functional programming guidelines and type system best practices for Python 3.13 --- .vscode/settings.json | 2 +- .windsurfrules | 1356 ++++++++++++++++++++++++++++++++++++----- pyproject.toml | 1 + uv.lock | 161 +---- 4 files changed, 1204 insertions(+), 316 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index aa20fa4..2ff2db9 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,5 @@ { - "python.languageServer": "Default", + "python.languageServer": "None", "windsurfPyright.analysis.typeCheckingMode": "basedpyright", "editor.defaultFormatter": "Codeium.windsurfPyright", "windsurfPyright.analysis.autoSearchPaths": true, diff --git a/.windsurfrules b/.windsurfrules index 83ca2f8..cfeef3d 100644 --- a/.windsurfrules +++ b/.windsurfrules @@ -1,202 +1,1222 @@ # [!SYSTEM] INSTRUCTIONS +## Use & Prefer Functional Programming in All Tasks (UNLESS OTHERWISE SPECIFIED) + +You must apply functional programming principles to all tasks unless explicitly instructed otherwise. + +### Core Functional Principles + +- **First-class functions**: Functions are values that can be assigned, passed as arguments, and returned from other functions +- **Pure functions**: Functions must be deterministic—given the same inputs, always return the same outputs +- **No side effects**: Functions should not alter external state or perform I/O unless explicitly tracked +- **Immutability**: Never modify existing data structures; always create new ones with required changes +- **Declarative style**: Express what should be accomplished, not how to accomplish it step by step +- **Function composition**: Build complex operations by combining simpler functions +- **Prefer recursion over iteration**: Use recursive patterns, but leverage Python's itertools for efficiency + +### Program Boundaries and Developer Interfaces + +Always create explicit program boundaries. Treat each task as a separate service with a clear interface: + +```python +from typing import Protocol +from returns.result import Result +from returns.io import IO + +class UserService(Protocol): + """Clear service boundary with typed interface""" + def get_user(self, user_id: int) -> IO[Result[User, UserNotFoundError]]: ... + def create_user(self, data: UserData) -> IO[Result[User, ValidationError]]: ... +``` + +### Error Handling as Values + +Use the type system to track errors and context, not only success values. Side effects are first-class citizens. + +```python +from returns.result import Result, Success, Failure +from returns.io import IO, IOResult +from returns.maybe import Maybe + +# Type signature shows all possible outcomes +def parse_config(path: str) -> IOResult[Config, ConfigError]: + """ + Returns: + IOResult[Config, ConfigError]: Success with Config or Failure with error + """ + return IO.from_result( + _read_file(path) + .bind(_parse_json) + .bind(_validate_config) + ) +``` + +### The `returns` Library for Effect Management + +Python 3.13 works excellently with the `returns` library for functional effect management: + +```python +from returns.result import Result, Success, Failure +from returns.io import IO, IOResult +from returns.maybe import Maybe +from returns.pipeline import flow +from returns.pointfree import bind + +# Result type for fallible operations +def divide(a: float, b: float) -> Result[float, str]: + if b == 0: + return Failure("Division by zero") + return Success(a / b) + +# IO type for side effects +def read_file(path: str) -> IOResult[str, Exception]: + @IO + def _inner() -> Result[str, Exception]: + try: + with open(path) as f: + return Success(f.read()) + except Exception as e: + return Failure(e) + return _inner() + +# Compose with flow +result = flow( + user_input, + parse_number, + bind(validate_positive), + bind(calculate_square_root), +) +``` + +## Python 3.13 Specific Features + +### New Type Parameter Syntax (PEP 695) + +Python 3.13 introduces cleaner syntax for generics: + +```python +# Classes with type parameters +class Stack[T]: + def __init__(self) -> None: + self._items: list[T] = [] + + def push(self, item: T) -> None: + self._items.append(item) + + def pop(self) -> T: + return self._items.pop() + +# Functions with type parameters +def first[T](items: Sequence[T]) -> Maybe[T]: + return Maybe.from_optional(items[0] if items else None) + +# Type aliases +type Result[T, E] = Success[T] | Failure[E] +type JsonDict = dict[str, Any] + +# With defaults (PEP 696) +class Container[T = str]: + value: T +``` + +### Pattern Matching Enhancements + +Use pattern matching for cleaner control flow: + +```python +from dataclasses import dataclass + +@dataclass(frozen=True) +class Point: + x: float + y: float + +def describe_point(point: Point) -> str: + match point: + case Point(x=0, y=0): + return "Origin" + case Point(x=0, y=y): + return f"On Y-axis at {y}" + case Point(x=x, y=0): + return f"On X-axis at {x}" + case Point(x=x, y=y) if x == y: + return f"On diagonal at {x}" + case Point(x=x, y=y): + return f"At ({x}, {y})" +``` + +### Type Narrowing with TypeIs (PEP 742) + +```python +from typing import TypeIs + +def is_non_empty_list[T](val: list[T]) -> TypeIs[NonEmptyList[T]]: + """Type guard for non-empty lists""" + return len(val) > 0 + +def process_items[T](items: list[T]) -> Maybe[T]: + if is_non_empty_list(items): + # items is narrowed to NonEmptyList[T] + return Some(items[0]) + return Nothing +``` + +## Type System Best Practices + +### Modern Import Patterns (Python 3.13) + +```python +# GOOD: Use collections.abc for abstract types +from collections.abc import Sequence, Mapping, Callable, Iterable, Iterator +from types import MappingProxyType # For immutable dict views + +# BAD: Don't use typing module for these +# from typing import Sequence, Mapping # Deprecated approach + +# GOOD: Use returns for Result types +from returns.result import Result +from returns.maybe import Maybe + +# BAD: Don't use Optional for nullable values +# from typing import Optional # Use Maybe instead +``` + +### Python 3.13 Generic Syntax (PEP 695) + +```python +# GOOD: New syntax for generics +class Box[T]: + def __init__(self, value: T) -> None: + self._value = value + + def map[U](self, func: Callable[[T], U]) -> Box[U]: + return Box(func(self._value)) + +# Function with type parameters +def first[T](items: Sequence[T]) -> Maybe[T]: + return Maybe.from_optional(items[0] if items else None) + +# Type aliases with new syntax +type Result[T, E] = Success[T] | Failure[E] +type ValidationResult[T] = Result[T, ValidationError] + +# BAD: Old TypeVar syntax (avoid unless needed for variance) +# from typing import TypeVar, Generic +# T = TypeVar('T') +# class Box(Generic[T]): ... +``` + +### Type Annotations Guidelines + +```python +# Use Final for constants +from typing import Final +MAX_RETRIES: Final = 3 + +# Use Literal for specific values +from typing import Literal +type Mode = Literal['read', 'write', 'append'] + +# Use TypedDict for structured data +from typing import TypedDict, Required, NotRequired + +class UserData(TypedDict): + id: Required[int] + name: Required[str] + email: NotRequired[str] + +# Use Protocol for structural subtyping +from typing import Protocol + +class Comparable[T](Protocol): + def __lt__(self, other: T) -> bool: ... + def __eq__(self, other: T) -> bool: ... + +# Use NewType for semantic distinctions +from typing import NewType +UserId = NewType('UserId', int) +``` + +### Pattern Matching for Algebraic Data Types + +```python +from dataclasses import dataclass +from typing import Final + +# Define sum types with dataclasses +@dataclass(frozen=True) +class Success[T]: + value: T + +@dataclass(frozen=True) +class Failure[E]: + error: E + +type Result[T, E] = Success[T] | Failure[E] + +# Pattern match on results +def handle_result[T, E](result: Result[T, E]) -> str: + match result: + case Success(value): + return f"Success: {value}" + case Failure(error): + return f"Error: {error}" +``` + +## CLI Design with Typer + +```python +import typer +from typing import Annotated +from rich.console import Console +from returns.result import Result + +app = typer.Typer() +console = Console() + +@app.command() +def process( + input_file: Annotated[str, typer.Argument(help="Input file path")], + output: Annotated[str, typer.Option("--output", "-o")] = "output.txt", + verbose: Annotated[bool, typer.Option("--verbose", "-v")] = False, +) -> None: + """Process a file with functional transformations.""" + result = ( + read_file(input_file) + .bind(parse_data) + .bind(transform_data) + .bind(lambda data: write_file(output, data)) + ) + + match result: + case Success(_): + console.print("[green]✓ Processing complete[/green]") + case Failure(error): + console.print(f"[red]✗ Error: {error}[/red]") + raise typer.Exit(code=1) +``` + +## Functional Data Processing + +### Using Itertools and Generators + +```python +from itertools import ( + chain, compress, groupby, starmap, + tee, zip_longest, islice, cycle +) +from collections.abc import Iterator, Iterable +from typing import TypeVar + +T = TypeVar('T') + +def chunked[T](iterable: Iterable[T], size: int) -> Iterator[tuple[T, ...]]: + """Split iterable into fixed-size chunks.""" + iterator = iter(iterable) + while chunk := tuple(islice(iterator, size)): + yield chunk + +def sliding_window[T](iterable: Iterable[T], n: int) -> Iterator[tuple[T, ...]]: + """Sliding window over iterable.""" + iterators = tee(iterable, n) + for i, it in enumerate(iterators): + for _ in range(i): + next(it, None) + return zip(*iterators) +``` + +### Functional Collection Operations + +```python +from functools import reduce, partial +from operator import add, mul +from returns.curry import curry + +# Curried functions for composition +@curry +def map_over[T, U](func: Callable[[T], U], items: Sequence[T]) -> list[U]: + return [func(item) for item in items] + +@curry +def filter_by[T](predicate: Callable[[T], bool], items: Sequence[T]) -> list[T]: + return [item for item in items if predicate(item)] + +@curry +def reduce_with[T, U]( + func: Callable[[U, T], U], + initial: U, + items: Sequence[T] +) -> U: + return reduce(func, items, initial) + +# Compose operations +from returns.pipeline import flow + +result = flow( + data, + filter_by(lambda x: x > 0), + map_over(lambda x: x ** 2), + reduce_with(add, 0), +) +``` + +## Async Functional Programming + +```python +from returns.future import Future, FutureResult +from returns.io import IO +import asyncio + +# Async operations as Future values +async def fetch_user(user_id: int) -> Result[User, Exception]: + try: + # Async operation + user = await async_db.get_user(user_id) + return Success(user) + except Exception as e: + return Failure(e) + +# Compose async operations +def get_user_posts(user_id: int) -> FutureResult[list[Post], Exception]: + return ( + FutureResult.from_future(fetch_user(user_id)) + .bind(lambda user: FutureResult.from_future(fetch_posts(user.id))) + ) + +# Run with asyncio +async def main() -> None: + result = await get_user_posts(123) + match result: + case Success(posts): + print(f"Found {len(posts)} posts") + case Failure(error): + print(f"Error: {error}") +``` + +## Testing Functional Code + +```python +import pytest +from hypothesis import given, strategies as st +from returns.result import Success, Failure + +# Property-based testing +@given(st.integers(), st.integers().filter(lambda x: x != 0)) +def test_divide_properties(a: int, b: int) -> None: + result = divide(a, b) + assert isinstance(result, Success) + assert result.unwrap() == a / b + +@given(st.integers()) +def test_divide_by_zero(a: int) -> None: + result = divide(a, 0) + assert isinstance(result, Failure) + +# Test pure functions with fixtures +@pytest.fixture +def sample_data() -> list[int]: + return [1, 2, 3, 4, 5] + +def test_transformation_pipeline(sample_data: list[int]) -> None: + result = flow( + sample_data, + filter_by(lambda x: x % 2 == 0), + map_over(lambda x: x ** 2), + ) + assert result == [4, 16] +``` + +## Performance Considerations + +```python +from functools import cache, lru_cache +from typing import ParamSpec, TypeVar + +P = ParamSpec('P') +R = TypeVar('R') + +# Use @cache for pure functions (Python 3.9+) +@cache +def fibonacci(n: int) -> int: + if n < 2: + return n + return fibonacci(n - 1) + fibonacci(n - 2) + +# LRU cache with size limit +@lru_cache(maxsize=128) +def expensive_computation(x: float, y: float) -> float: + # Complex calculation + return complex_math(x, y) + +# Generator for memory efficiency +def process_large_file(path: str) -> Iterator[Result[ProcessedLine, Error]]: + with open(path) as f: + for line in f: + yield process_line(line) +``` + +## Principles + +Adhere to these principles to ensure robust, maintainable, and clear system design: + +### 1. Apply the IPO Pattern + +Break systems into discrete components that accept **Input**, perform **Processing**, and return **Output**. + +```python +# Model a web server as Input → Processing → Output +def handle_request(request: Request) -> IO[Response]: + return flow( + request, + validate_request, + bind(process_business_logic), + bind(format_response), + ) +``` + +### 2. All Models Are Wrong, But Some Are Useful + +Select models that balance accuracy and simplicity. Validate utility through real-world testing. + +### 3. Integrate People as Part of the System + +Design interfaces to align with user expectations: + +- Mirror established mental models +- Eliminate surprising behaviors +- Prioritize discoverability through consistent patterns + +### 4. Principle of Least Astonishment + +Ensure interfaces behave predictably: + +- Follow Python conventions (e.g., `-h` for help in CLIs) +- Avoid hidden side effects +- Document any deviations from expected behavior + +### 5. Fail Fast with Context + +```python +@dataclass(frozen=True) +class ValidationError: + code: str + field: str + message: str + +def validate_email(email: str) -> Result[str, ValidationError]: + if "@" not in email: + return Failure(ValidationError( + code="INVALID_EMAIL", + field="email", + message="Email must contain @" + )) + return Success(email) +``` + +### 6. YAGNI (You Aren't Gonna Need It) + +- Postpone features until required +- Delete unused code proactively +- Measure complexity-to-value ratio + +### 7. Explicit Dependency Declaration + +```python +# Dependencies as typed parameters +def process_order[T]( + order: Order, + payment_service: PaymentService, + inventory: InventoryService, + logger: Logger, +) -> IOResult[Receipt, ProcessError]: + """All dependencies explicitly declared""" +``` + +### 8. Type-Driven Design + +```python +# Encode business rules in types +@dataclass(frozen=True) +class NonEmptyList[T]: + head: T + tail: list[T] + + @classmethod + def create(cls, items: list[T]) -> Maybe[NonEmptyList[T]]: + if not items: + return Nothing + return Some(cls(items[0], items[1:])) +``` + +### 9. Design by Contract + +```python +def transfer_funds( + from_account: Account, + to_account: Account, + amount: Decimal, +) -> Result[Transaction, TransferError]: + """ + Preconditions: + - amount > 0 + - from_account.balance >= amount + - from_account != to_account + + Postconditions: + - from_account.balance decreased by amount + - to_account.balance increased by amount + - Transaction record created + """ +``` + +### 10. Single Responsibility & High Cohesion + +```python +# Each module has one clear purpose +# user_repository.py - Only data access +class UserRepository: + def find_by_id(self, id: UserId) -> IOResult[User, NotFoundError]: ... + def save(self, user: User) -> IOResult[User, SaveError]: ... + +# user_validator.py - Only validation +class UserValidator: + def validate(self, data: UserData) -> Result[ValidatedUser, ValidationError]: ... +``` + +### 11. Observability by Design + +```python +from returns.context import RequiresContext + +type Deps = Logger | Metrics + +def process_with_telemetry[T]( + data: T, +) -> RequiresContext[IOResult[T, Error], Deps]: + """Operations with built-in observability""" +``` + +### 12. Progressive Abstraction + +- Start with concrete implementations +- Introduce abstractions only to eliminate duplication +- Refactor when patterns stabilize + +### 13. Self-Documenting Code + +```python +# Precise naming +def calculate_compound_interest( + principal: Decimal, + annual_rate: Decimal, + years: int, + compounds_per_year: int = 12, +) -> Decimal: + """Names explain the computation""" +``` + +### 14. Principle of Least Power + +- Prefer `map()` over manual loops +- Avoid generics until necessary +- Choose simple data structures + +### 15. Idempotency by Default + +```python +def ensure_user_exists(email: str) -> IOResult[User, Error]: + """Safe to call multiple times""" + return ( + find_user_by_email(email) + .alt(lambda _: create_user(email)) + ) +``` + +### 16. Resource Safety + +```python +from contextlib import contextmanager +from returns.context import RequiresContextIOResult + +@contextmanager +def database_transaction(): + tx = start_transaction() + try: + yield tx + tx.commit() + except Exception: + tx.rollback() + raise +``` + +### 17. Version Contracts Semantically + +```python +# API versioning +from typing import Literal + +API_VERSION: Final = "2.0.0" + +type ApiVersion = Literal["1.0", "1.1", "2.0"] + +def get_endpoint(version: ApiVersion) -> str: + return f"/api/v{version}/users" +``` + +### 18. Mechanical Sympathy + +- Profile before optimizing +- Use `__slots__` for memory efficiency +- Prefer cache-friendly data layouts + +### 19. Anti-Corruption Layers + +```python +# Shield domain from external APIs +@dataclass(frozen=True) +class ExternalUser: + user_id: str + full_name: str + +@dataclass(frozen=True) +class DomainUser: + id: UserId + name: Name + +def adapt_external_user(external: ExternalUser) -> Result[DomainUser, AdapterError]: + """Translate foreign data to internal types""" +``` + +### 20. Progressive Type Refinement + +```python +# Parse → Validate → Use +def process_age(value: str) -> Result[AdultAge, ValidationError]: + return ( + parse_int(value) + .bind(validate_positive) + .bind(ensure_adult) + ) + +@dataclass(frozen=True) +class AdultAge: + value: int + + def __post_init__(self): + if self.value < 18: + raise ValueError("Must be adult age") +``` + +## TypeIs + +```python +# TypeIs for type narrowing (PEP 742) +from typing import TypeIs + +def is_string_list(val: list[object]) -> TypeIs[list[str]]: + return all(isinstance(item, str) for item in val) + +# Use in type narrowing +def process(items: list[object]) -> str: + if is_string_list(items): + # items is narrowed to list[str] + return ', '.join(items) + return str(items) +``` + +## Code Style + +Adhere to the following programming philosophy for every code artifact you create or edit: + +### Core Principles + +- **Always DRY**: Extract shared logic and avoid duplication; every significant behavior should live in exactly one place +- **Always Optimize for Deletion**: Prefer simple, loosely-coupled structures that can be removed without cascading edits +- **Always Stateless by Default**: Treat state as a liability. Functions should carry all required data in parameters and return new data +- **Always Pure Functions Only**: Functions must be deterministic, side-effect-free, and directly testable in isolation +- **Always Swappable Services**: Build services that are identical, swappable, and trivially scalable with no memory between calls +- **Always Place State at the Edge**: Persisted data, caches, and external integrations belong in well-defined boundary layers +- **Always Design for Easy Rewrite**: Assume code might be replaced next week—keep components small, clear, and prediction-free + +### Python 3.13 Specific Guidelines + +- Always write code in a purely functional style when possible +- Always verify all code is referentially transparent +- Never use type assertions (`cast`) or `# type: ignore` +- Never use `Any` type; use proper generics or protocols +- Always define types using algebraic data types (sum and product types) +- Prefer `@dataclass(frozen=True)` over regular classes +- Use pattern matching for control flow over if/elif chains + ## Type System and Imports -- Always use `Sequence` over list; GOOD: always use `Sequence` by importing using `from collections.abc import Sequence` and BAD: never import using `from typing import Sequence` -- Always use `Mapping` over `dict`; GOOD: always use `Mapping` by importing using `from collections.abc import Mapping` and BAD: never import using `from typing import Mapping` -- Always use `Result` over `Optional` or `None`; GOOD: always use `Result` by importing using `from returns.result import Result` and BAD: never import using `from typing import Optional` or `None` -- Prefer `Callable` over `typing.Callable`; GOOD: always use `Callable` by importing using `from collections.abc import Callable` and BAD: never import using `from typing import Callable` -- Use `Iterable` for read-only iteration; GOOD: always use `Iterable` by importing using `from collections.abc import Iterable` and BAD: never import using `from typing import Iterable` -- Avoid `Any`; use explicit types or `TypeVar`; GOOD: always use specific types and BAD: never import using `from typing import Any` -- Use `TypeVar` for generic type parameters; GOOD: `T = TypeVar('T')` and BAD: never use `Any` as a generic placeholder -- Always use `Final` for constants; GOOD: `from typing import Final` then `MAX_RETRIES: Final[int] = 3` and BAD: never use mutable constants -- Use `Literal` for specific string/int values; GOOD: `from typing import Literal` then `Mode = Literal['read', 'write']` and BAD: never use plain `str` for enums -- Always prefer `TypeAlias` for complex types; GOOD: `from typing import TypeAlias` then `UserID: TypeAlias = int` and BAD: never repeat complex type annotations -- Use `Protocol` for structural subtyping; GOOD: `from typing import Protocol` for interfaces and BAD: never use ABC classes just for typing -- Always use `TypedDict` for structured dicts; GOOD: `from typing import TypedDict` and BAD: never use `dict[str, Any]` -- Prefer `frozenset` over `set` for immutable collections; GOOD: `frozenset({1, 2, 3})` and BAD: avoid mutable sets in function returns -- Use `tuple` for fixed-length sequences; GOOD: `tuple[int, str, bool]` and BAD: never use `list` for fixed-size data -- Always specify variance in TypeVars; GOOD: `TypeVar('T', covariant=True)` when appropriate and BAD: never leave variance implicit -- Use `NewType` for semantic distinctions; GOOD: `UserId = NewType('UserId', int)` and BAD: never conflate different semantic types +### Collections and Generics + +- **GOOD**: Use `Sequence[T]` from `collections.abc` for read-only lists +- **BAD**: Never use `list[T]` in function parameters +- **GOOD**: Use `Mapping[K, V]` from `collections.abc` for read-only dicts +- **BAD**: Never use `dict[K, V]` in function parameters +- **GOOD**: Use `frozenset[T]` for immutable sets +- **BAD**: Avoid mutable `set[T]` in APIs + +### Modern Generic Syntax (PEP 695) + +```python +# GOOD: Python 3.13+ syntax +class Container[T]: + value: T + +def transform[T, U](func: Callable[[T], U], value: T) -> U: + return func(value) + +type Pair[A, B] = tuple[A, B] + +# BAD: Old TypeVar syntax +T = TypeVar('T') # Only use for variance +``` + +### Result Types and Error Handling + +- **GOOD**: Use `Result[T, E]` from returns library +- **BAD**: Never raise exceptions for expected errors +- **GOOD**: Use `Maybe[T]` for nullable values +- **BAD**: Never use `Optional[T]` or raw `None` +- **GOOD**: Use `IO[T]` for side effects +- **BAD**: Never perform I/O in pure functions ## CLI and Command Line Tools -- For CLIs, always use Typer; GOOD: always use `typer` by importing using `import typer` and BAD: never import using `import click` -- When using Annotated, never put the default value in Option(); only use it as the parameter default -- Always use Annotated for CLI parameters; GOOD: `name: Annotated[str, typer.Option("--name")]` and BAD: never use direct assignment -- Group related commands using Typer sub-apps; GOOD: `app.add_typer(users_app, name="users")` and BAD: never have flat command structures -- Always provide help text for commands and options; GOOD: `typer.Option(help="User name")` and BAD: never leave options undocumented -- Use rich for enhanced CLI output; GOOD: `from rich.console import Console` and BAD: never use plain print for complex output -- Implement proper exit codes; GOOD: `raise typer.Exit(code=1)` and BAD: never use `sys.exit()` in CLI commands -- Always validate CLI inputs early; GOOD: validate in the command function and BAD: never defer validation to business logic -- Use typer.echo for output; GOOD: `typer.echo("message")` and BAD: never use print() in CLI commands -- Provide shell completion; GOOD: implement custom completion functions and BAD: never ignore shell integration - -## Error Handling and Result Types - -- Always use Result for fallible operations; GOOD: `Result[Success, Error]` and BAD: never throw exceptions for expected errors -- Chain Results with map/bind; GOOD: `result.map(transform).bind(validate)` and BAD: never unwrap Results prematurely -- Use specific error types; GOOD: `Result[User, UserNotFoundError]` and BAD: never use generic `Exception` -- Always handle both Success and Failure cases; GOOD: pattern match or use `result.fold()` and BAD: never ignore error cases -- Create error hierarchies with dataclasses; GOOD: `@dataclass class ValidationError` and BAD: never use string errors -- Use `returns.maybe.Maybe` for nullable values; GOOD: `Maybe[User]` and BAD: never use raw `None` -- Compose error-prone operations; GOOD: `returns.pipeline.pipe` and BAD: never nest try-except blocks -- Log errors at boundaries only; GOOD: log at service edges and BAD: never log deep in business logic -- Always provide error context; GOOD: include relevant ids/values in errors and BAD: never raise context-free errors -- Use `returns.io.IO` for side effects; GOOD: `IO[str]` for file reads and BAD: never perform I/O in pure functions +### Typer Best Practices + +```python +# GOOD: Use Annotated with defaults +def main( + name: Annotated[str, typer.Option()] = "World", + count: Annotated[int, typer.Option()] = 1, +): + pass + +# BAD: Never put defaults in Option() +def main( + name: str = typer.Option("World"), # Wrong! +): + pass +``` + +- Always use sub-apps for command groups +- Always provide help text for all commands and options +- Use `rich` for enhanced output, never plain `print()` +- Use `typer.Exit(code=n)` instead of `sys.exit()` +- Always validate inputs early in command functions ## Function Design and Composition -- Keep functions under 20 lines; GOOD: extract helper functions and BAD: never write long procedural functions -- Single responsibility per function; GOOD: one clear purpose and BAD: never mix concerns -- Use descriptive names; GOOD: `calculate_tax_rate()` and BAD: never use `calc()` or `process()` -- Parameters should be immutable; GOOD: accept `Sequence` and BAD: never mutate input parameters -- Return new values; GOOD: `return dataclasses.replace(obj, field=new_value)` and BAD: never modify and return same object -- Limit function parameters to 4; GOOD: use parameter objects and BAD: never have functions with 7+ parameters -- Always type annotate; GOOD: full annotations for params and returns and BAD: never rely on type inference -- Use keyword-only arguments; GOOD: `def fn(*, name: str)` and BAD: never rely on positional args for clarity -- Compose small functions; GOOD: `pipe(data, parse, validate, transform)` and BAD: never write monolithic functions -- Cache pure computations; GOOD: `@functools.cache` for expensive pure functions and BAD: never recompute identical results +### Function Guidelines + +- Keep functions under 20 lines +- Single responsibility per function +- Use descriptive names: `calculate_tax_rate()` not `calc()` +- Parameters should be immutable types +- Return new values, never mutate inputs +- Limit parameters to 4 (use parameter objects if needed) +- Always use full type annotations + +### Keyword-Only Arguments + +```python +# GOOD: Force named arguments for clarity +def create_user(*, name: str, email: str, age: int) -> User: + pass + +# BAD: Positional arguments are ambiguous +def create_user(name: str, email: str, age: int) -> User: + pass +``` + +### Function Composition + +```python +from returns.pipeline import flow +from returns.pointfree import bind + +# GOOD: Compose small functions +result = flow( + data, + parse, + bind(validate), + bind(transform), +) + +# BAD: Monolithic functions +def process_everything(data): + # 100 lines of mixed concerns + pass +``` ## Data Structures and Immutability -- Always use frozen dataclasses; GOOD: `@dataclass(frozen=True)` and BAD: never use mutable dataclasses -- Prefer immutable collections; GOOD: `tuple`, `frozenset`, `MappingProxyType` and BAD: avoid `list`, `set`, `dict` in APIs -- Use `copy.deepcopy` sparingly; GOOD: design for immutability and BAD: never rely on deep copying for safety -- Return new instances; GOOD: `dataclasses.replace()` and BAD: never mutate and return -- Use `__slots__` for performance; GOOD: `__slots__ = ('x', 'y')` in classes and BAD: never ignore memory efficiency -- Implement `__eq__` and `__hash__` properly; GOOD: use `@dataclass` or implement both and BAD: never implement just one -- Use enums for fixed choices; GOOD: `class Status(Enum)` and BAD: never use string constants -- Design algebraic data types; GOOD: sum types with Union and BAD: never use inheritance for variants -- Validate at construction; GOOD: `__post_init__` validation and BAD: never allow invalid states -- Use builders for complex objects; GOOD: builder pattern with validation and BAD: never use complex constructors +### Immutable Data Classes + +```python +# GOOD: Frozen dataclass +@dataclass(frozen=True, slots=True) +class Point: + x: float + y: float + + def move(self, dx: float, dy: float) -> Point: + return Point(self.x + dx, self.y + dy) + +# BAD: Mutable class +class Point: + def __init__(self, x: float, y: float): + self.x = x + self.y = y +``` + +### Immutable Collections + +- Use `tuple` instead of `list` for fixed sequences +- Use `frozenset` instead of `set` +- Use `MappingProxyType` for read-only dict views +- Use `dataclasses.replace()` to create modified copies +- Implement `__slots__` for memory efficiency + +### Algebraic Data Types + +```python +# Sum types with Union +type Shape = Circle | Rectangle | Triangle + +# Product types with dataclasses +@dataclass(frozen=True) +class Circle: + radius: float + +# Pattern matching +match shape: + case Circle(radius=r): + return pi * r ** 2 + case Rectangle(width=w, height=h): + return w * h +``` ## Testing and Validation -- Write tests first; GOOD: TDD approach and BAD: never write tests after implementation -- One assertion per test; GOOD: focused test cases and BAD: never test multiple behaviors -- Use descriptive test names; GOOD: `test_calculate_tax_returns_zero_for_negative_income` and BAD: never use `test_1` -- Test edge cases; GOOD: empty, null, boundary values and BAD: never test only happy path -- Use property-based testing; GOOD: `hypothesis` for invariants and BAD: never rely only on examples -- Mock at boundaries; GOOD: mock external services and BAD: never mock internal functions -- Use fixtures properly; GOOD: `@pytest.fixture` for reusable setup and BAD: never duplicate test setup -- Test error conditions; GOOD: verify error types and messages and BAD: never ignore failure cases -- Keep tests independent; GOOD: each test runs in isolation and BAD: never depend on test order -- Use test doubles correctly; GOOD: stubs for queries, mocks for commands and BAD: never overuse mocks - -## Code Organization and Architecture - -- One module per concept; GOOD: `user.py`, `payment.py` and BAD: never have `utils.py` or `helpers.py` -- Clear module boundaries; GOOD: explicit public APIs and BAD: never expose internals -- Use `__init__.py` wisely; GOOD: re-export public API and BAD: never leave empty -- Separate concerns; GOOD: business logic, I/O, presentation and BAD: never mix layers -- Dependency injection; GOOD: pass dependencies explicitly and BAD: never use global state -- Use protocols for dependencies; GOOD: depend on protocols and BAD: never depend on concrete types -- Group by feature; GOOD: `features/user/`, `features/payment/` and BAD: never group by type -- Keep imports at top; GOOD: standard, third-party, local order and BAD: never import inside functions -- Use relative imports carefully; GOOD: absolute for public API and BAD: never use relative in public modules -- Define clear interfaces; GOOD: protocol or ABC at module boundary and BAD: never leak implementation +### Testing Principles + +- Write tests first (TDD approach) +- One assertion per test +- Descriptive test names: `test_divide_by_zero_returns_failure` +- Test edge cases: empty, None, boundaries +- Use property-based testing with Hypothesis +- Mock only at boundaries (external services) +- Use fixtures for reusable setup +- Test error conditions explicitly +- Keep tests independent +- Use stubs for queries, mocks for commands + +### Property-Based Testing + +```python +from hypothesis import given, strategies as st + +@given(st.lists(st.integers())) +def test_sort_properties(items: list[int]) -> None: + sorted_items = sorted(items) + # Properties that should always hold + assert len(sorted_items) == len(items) + assert all(a <= b for a, b in zip(sorted_items, sorted_items[1:])) + assert set(sorted_items) == set(items) +``` ## Performance and Optimization -- Measure before optimizing; GOOD: use profiling tools and BAD: never optimize prematurely -- Use generators for large datasets; GOOD: `yield` for streaming and BAD: never load everything in memory -- Cache expensive computations; GOOD: `@lru_cache` or `@cache` and BAD: never recompute unchanged results -- Use appropriate data structures; GOOD: `set` for membership, `deque` for queues and BAD: never use wrong structure -- Batch I/O operations; GOOD: bulk reads/writes and BAD: never do I/O in loops -- Use `__slots__` for many instances; GOOD: memory efficiency and BAD: never ignore memory usage -- Prefer `bisect` for sorted data; GOOD: O(log n) operations and BAD: never linear search sorted data -- Use `itertools` effectively; GOOD: `chain`, `groupby`, etc. and BAD: never reinvent iterations -- Compile regex once; GOOD: `PATTERN = re.compile()` and BAD: never compile in loops -- Use numpy for numerical work; GOOD: vectorized operations and BAD: never loop over arrays +### Performance Guidelines + +- Measure before optimizing (use `cProfile`, `line_profiler`) +- Use generators for large datasets +- Cache with `@cache` or `@lru_cache` +- Use appropriate data structures: + - `set` for membership tests + - `deque` for queues + - `bisect` for sorted operations +- Batch I/O operations +- Use `__slots__` for many instances +- Compile regex once: `PATTERN = re.compile(...)` +- Use NumPy for numerical work + +### Memory-Efficient Patterns + +```python +# GOOD: Generator for streaming +def read_large_file(path: str) -> Iterator[str]: + with open(path) as f: + yield from f + +# BAD: Loading everything into memory +def read_large_file(path: str) -> list[str]: + with open(path) as f: + return f.readlines() +``` ## Async and Concurrency -- Use async for I/O bound tasks; GOOD: `async def` for I/O and BAD: never block event loop -- Prefer asyncio over threading; GOOD: async/await pattern and BAD: never use threads for I/O -- Use `asyncio.gather` for parallel tasks; GOOD: concurrent execution and BAD: never await sequentially -- Handle async errors properly; GOOD: try/except in async and BAD: never ignore async exceptions -- Use async context managers; GOOD: `async with` for resources and BAD: never leak async resources -- Limit concurrent operations; GOOD: use semaphores and BAD: never overwhelm resources -- Use `asyncio.create_task` wisely; GOOD: for fire-and-forget and BAD: never lose task references -- Test async code properly; GOOD: `pytest-asyncio` and BAD: never test async as sync -- Avoid blocking calls; GOOD: use async libraries and BAD: never call blocking functions in async -- Use queues for task distribution; GOOD: `asyncio.Queue` and BAD: never share mutable state +### Async Patterns + +```python +# GOOD: Async for I/O +async def fetch_data(url: str) -> Result[Data, Error]: + async with aiohttp.ClientSession() as session: + try: + async with session.get(url) as response: + data = await response.json() + return Success(Data(**data)) + except Exception as e: + return Failure(Error(str(e))) + +# Use asyncio.gather for parallel operations +results = await asyncio.gather( + fetch_data(url1), + fetch_data(url2), + fetch_data(url3), +) +``` + +- Prefer asyncio over threading for I/O +- Use semaphores to limit concurrent operations +- Use `asyncio.create_task` for fire-and-forget +- Test async code with `pytest-asyncio` +- Never call blocking functions in async code +- Use `asyncio.Queue` for task distribution ## Documentation and Comments -- Write docstrings for public APIs; GOOD: Google/NumPy style and BAD: never leave public functions undocumented -- Document why, not what; GOOD: explain decisions and BAD: never state the obvious -- Keep docs in sync; GOOD: update with code and BAD: never have outdated docs -- Use type hints as documentation; GOOD: self-documenting types and BAD: never rely only on docstrings -- Document exceptions; GOOD: list raised exceptions and BAD: never hide error conditions -- Provide examples; GOOD: doctest examples and BAD: never have abstract docs only -- Link to references; GOOD: cite algorithms/papers and BAD: never leave knowledge implicit -- Document assumptions; GOOD: state preconditions and BAD: never assume context -- Use meaningful variable names; GOOD: self-documenting code and BAD: never use cryptic names -- Keep README updated; GOOD: current setup/usage and BAD: never have stale instructions +### Documentation Standards + +- Write Google/NumPy style docstrings for public APIs +- Document why, not what +- Keep docs in sync with code +- Use type hints as documentation +- Document raised exceptions +- Provide doctest examples +- Link to references (papers, algorithms) +- Document assumptions and preconditions +- Use meaningful variable names +- Keep README current + +### Example Docstring + +```python +def calculate_discount( + price: Decimal, + discount_percent: Decimal, +) -> Result[Decimal, ValueError]: + """Calculate discounted price. + + Args: + price: Original price (must be positive) + discount_percent: Discount percentage (0-100) + + Returns: + Result containing discounted price or ValueError + + Examples: + >>> calculate_discount(Decimal("100"), Decimal("10")) + Success(Decimal("90")) + + Note: + Uses banker's rounding for currency calculations. + """ +``` ## Security and Safety -- Validate all inputs; GOOD: whitelist validation and BAD: never trust user input -- Use parameterized queries; GOOD: prepared statements and BAD: never concatenate SQL -- Hash passwords properly; GOOD: bcrypt/argon2 and BAD: never store plain text -- Use secrets management; GOOD: environment variables and BAD: never hardcode secrets -- Implement rate limiting; GOOD: protect endpoints and BAD: never allow unlimited requests -- Log security events; GOOD: audit trail and BAD: never ignore suspicious activity -- Use HTTPS everywhere; GOOD: TLS for all connections and BAD: never send data unencrypted -- Validate file uploads; GOOD: check type/size and BAD: never trust file extensions -- Implement CSRF protection; GOOD: tokens for state changes and BAD: never rely on cookies alone -- Keep dependencies updated; GOOD: regular updates and BAD: never ignore security advisories - -## Logging and Monitoring - -- Use structured logging; GOOD: JSON logs with context and BAD: never use print statements -- Log at appropriate levels; GOOD: ERROR for errors, INFO for events and BAD: never log everything as DEBUG -- Include correlation IDs; GOOD: trace requests and BAD: never log without context -- Log at boundaries; GOOD: entry/exit points and BAD: never log in business logic -- Avoid logging sensitive data; GOOD: mask PII and BAD: never log passwords/tokens -- Use log aggregation; GOOD: centralized logging and BAD: never rely on local logs -- Set up alerts; GOOD: monitor error rates and BAD: never ignore errors -- Log performance metrics; GOOD: timing, counts and BAD: never fly blind -- Implement health checks; GOOD: `/health` endpoint and BAD: never assume service health -- Use distributed tracing; GOOD: trace across services and BAD: never lose request context +- Validate all inputs with whitelisting +- Use parameterized queries (never concatenate SQL) +- Hash passwords with argon2 or bcrypt +- Use environment variables for secrets +- Implement rate limiting +- Log security events with structured logging +- Use HTTPS everywhere +- Validate file uploads (type, size, content) +- Implement CSRF protection +- Keep dependencies updated ## Database and Persistence -- Use migrations; GOOD: version control schema and BAD: never modify schema manually -- Write idempotent migrations; GOOD: safe to rerun and BAD: never assume migration state -- Use transactions properly; GOOD: ACID guarantees and BAD: never leave data inconsistent -- Implement retry logic; GOOD: handle transient failures and BAD: never fail on first error -- Use connection pooling; GOOD: reuse connections and BAD: never create per-request -- Index foreign keys; GOOD: performance optimization and BAD: never ignore query patterns -- Use EXPLAIN for queries; GOOD: understand performance and BAD: never deploy untested queries -- Implement soft deletes; GOOD: audit trail and BAD: never lose data permanently -- Use read replicas; GOOD: scale read operations and BAD: never overload primary -- Backup regularly; GOOD: automated backups and BAD: never rely on manual process +- Use Alembic for migrations +- Write idempotent migrations +- Use transactions for consistency +- Implement retry logic with backoff +- Use connection pooling +- Index foreign keys and common queries +- Use EXPLAIN ANALYZE for query optimization +- Implement soft deletes for audit trails +- Use read replicas for scaling +- Automate backups ## API Design -- Use consistent naming; GOOD: REST conventions and BAD: never mix styles -- Version your APIs; GOOD: `/v1/`, `/v2/` and BAD: never break clients -- Use proper HTTP methods; GOOD: GET for reads, POST for creates and BAD: never use GET for mutations -- Return appropriate status codes; GOOD: 201 for created, 404 for not found and BAD: never return 200 for errors -- Implement pagination; GOOD: limit/offset or cursor and BAD: never return unbounded lists -- Use content negotiation; GOOD: Accept headers and BAD: never hardcode formats -- Implement rate limiting; GOOD: protect resources and BAD: never allow abuse -- Document with OpenAPI; GOOD: machine-readable specs and BAD: never rely on prose -- Use HATEOAS principles; GOOD: discoverable APIs and BAD: never require hardcoded URLs -- Implement idempotency; GOOD: safe retries and BAD: never have side effects on retry - -## Refactoring and Maintenance - -- Refactor in small steps; GOOD: incremental changes and BAD: never big bang refactors -- Keep tests green; GOOD: refactor with confidence and BAD: never break tests -- Use feature flags; GOOD: gradual rollout and BAD: never deploy all at once -- Document refactoring decisions; GOOD: ADRs and BAD: never lose context -- Remove dead code; GOOD: clean as you go and BAD: never leave commented code -- Update dependencies gradually; GOOD: one at a time and BAD: never update all at once -- Use deprecation warnings; GOOD: give users time and BAD: never break without warning -- Measure impact; GOOD: performance metrics and BAD: never assume improvement -- Refactor tests too; GOOD: maintain test quality and BAD: never let tests rot -- Keep refactoring atomic; GOOD: one concept per commit and BAD: never mix refactors with features +- Use strict consistent REST conventions +- Version APIs: `/api/v1/`, `/api/v2/` +- Use proper HTTP methods and status codes +- Implement pagination with cursors +- Use content negotiation (Accept headers) +- Implement rate limiting +- Document with OpenAPI/Swagger +- Use HATEOAS principles +- Implement idempotency keys +- Return consistent error formats + +## Tooling and Development Environment + +### Type Checking + +```bash +basedpyright --pythonversion 3.13 +``` + +### Development Workflow + +1. Write type stubs first +2. Implement with TDD +3. Use property-based testing +4. Profile if needed +5. Document public APIs + +## Migration Guide + +### From Imperative to Functional + +```python +# BAD: Imperative style +def process_users(users): + result = [] + for user in users: + if user.active: + user.score = calculate_score(user) + result.append(user) + return result + +# GOOD: Functional style +def process_users(users: Sequence[User]) -> list[User]: + return flow( + users, + filter_by(lambda u: u.active), + map_over(add_score), + ) + +def add_score(user: User) -> User: + return dataclasses.replace( + user, + score=calculate_score(user) + ) +``` + +### From Exceptions to Results + +```python +# BAD: Exception-based +def get_user(user_id: int) -> User: + if not is_valid_id(user_id): + raise ValueError("Invalid ID") + user = db.find_user(user_id) + if not user: + raise NotFoundError("User not found") + return user + +# GOOD: Result-based +def get_user(user_id: int) -> IOResult[User, GetUserError]: + return ( + validate_user_id(user_id) + .bind(lambda vid: find_user_in_db(vid)) + ) +``` + +## Common Patterns + +### Railway-Oriented Programming + +```python +from returns.pipeline import flow +from returns.pointfree import bind + +def process_order(order_data: dict) -> IOResult[Order, ProcessError]: + return flow( + order_data, + validate_order_data, + bind(check_inventory), + bind(calculate_pricing), + bind(reserve_items), + bind(charge_payment), + bind(create_order_record), + ) +``` + +### Dependency Injection with Context + +```python +from returns.context import RequiresContext + +type AppContext = Database | Logger | Config + +def get_user_by_email( + email: str +) -> RequiresContext[IOResult[User, Error], AppContext]: + def _inner(ctx: AppContext) -> IOResult[User, Error]: + return ctx.database.find_user(email=email) + return RequiresContext(_inner) +``` + +### Option/Maybe Pattern + +```python +from returns.maybe import Maybe, Some, Nothing + +def find_first[T]( + predicate: Callable[[T], bool], + items: Sequence[T], +) -> Maybe[T]: + for item in items: + if predicate(item): + return Some(item) + return Nothing +``` + +## Best Practices Summary + +1. **Start Pure**: Write pure functions by default, add effects explicitly +2. **Type Everything**: Full type coverage with no `Any` or `cast` +3. **Fail Fast**: Validate at boundaries, return structured errors +4. **Compose Small Functions**: Each function does one thing well +5. **Immutable Data**: Use frozen dataclasses and immutable collections +6. **Track Effects**: Use IO, Result, Maybe to make effects visible +7. **Test Properties**: Use Hypothesis for property-based testing +8. **Document Contracts**: Clear preconditions and postconditions +9. **Version APIs**: Semantic versioning with clear deprecation +10. **Profile When Needed**: Measure before optimizing + +## Recommended Libraries + +- **returns**: Functional programming primitives (Result, Maybe, IO) +- **pyrsistent**: Persistent/immutable data structures +- **toolz**: Functional utilities and function composition +- **more-itertools**: Additional iteration utilities +- **hypothesis**: Property-based testing +- **attrs**: Alternative to dataclasses with validators +- **cattrs**: Serialization/deserialization for structured data +- **rich**: Enhanced terminal output for CLIs +- **typer**: Modern CLI framework +- **polars**: Functional-style DataFrame operations + +## Final Notes + +This guide represents best practices for functional programming in Python 3.13. The ecosystem continues to evolve, so: + +- Keep dependencies updated +- Follow PEPs for new type system features +- Contribute to typing discussions +- Share patterns that work well + +Remember: **Make invalid states unrepresentable, make effects explicit, and keep functions pure.** diff --git a/pyproject.toml b/pyproject.toml index b523315..e9e4fe0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "mlflow>=3.1.0", "toml>=0.10.2", "returns>=0.25.0", + "dspy>=3.0.0b1", ] [project.urls] diff --git a/uv.lock b/uv.lock index 2d0076e..c982d85 100644 --- a/uv.lock +++ b/uv.lock @@ -422,39 +422,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/51/db/6d34604be92a163309cbf1e3aeb11ea464012cdc33fe11132ea1eff2f072/databricks_sdk-0.57.0-py3-none-any.whl", hash = "sha256:a253bb4c7e00e43654af8b6e29ac79bee72d310e342ec73e148e4e591b75915f", size = 733790 }, ] -[[package]] -name = "datasets" -version = "3.6.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "dill" }, - { name = "filelock" }, - { name = "fsspec", extra = ["http"] }, - { name = "huggingface-hub" }, - { name = "multiprocess" }, - { name = "numpy" }, - { name = "packaging" }, - { name = "pandas" }, - { name = "pyarrow" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "tqdm" }, - { name = "xxhash" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1a/89/d3d6fef58a488f8569c82fd293ab7cbd4250244d67f425dcae64c63800ea/datasets-3.6.0.tar.gz", hash = "sha256:1b2bf43b19776e2787e181cfd329cb0ca1a358ea014780c3581e0f276375e041", size = 569336 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/20/34/a08b0ee99715eaba118cbe19a71f7b5e2425c2718ef96007c325944a1152/datasets-3.6.0-py3-none-any.whl", hash = "sha256:25000c4a2c0873a710df127d08a202a06eab7bf42441a6bc278b499c2f72cd1b", size = 491546 }, -] - -[[package]] -name = "dill" -version = "0.3.8" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/17/4d/ac7ffa80c69ea1df30a8aa11b3578692a5118e7cd1aa157e3ef73b092d15/dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca", size = 184847 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7", size = 116252 }, -] - [[package]] name = "diskcache" version = "5.6.3" @@ -489,15 +456,14 @@ wheels = [ [[package]] name = "dspy" -version = "2.6.27" -source = { registry = "https://pypi.org/simple" } +version = "3.0.0b1" +source = { git = "https://github.com/stanfordnlp/dspy.git#734eff216155207dfea89765a46e935ee1482794" } dependencies = [ { name = "anyio" }, { name = "asyncer" }, { name = "backoff" }, { name = "cachetools" }, { name = "cloudpickle" }, - { name = "datasets" }, { name = "diskcache" }, { name = "joblib" }, { name = "json-repair" }, @@ -506,7 +472,6 @@ dependencies = [ { name = "numpy" }, { name = "openai" }, { name = "optuna" }, - { name = "pandas" }, { name = "pydantic" }, { name = "regex" }, { name = "requests" }, @@ -515,10 +480,6 @@ dependencies = [ { name = "tqdm" }, { name = "ujson" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/38/8a/f7ff1a6d3b5294678f13d17ecfc596f49a59e494b190e4e30f7dea7df1dc/dspy-2.6.27.tar.gz", hash = "sha256:de1c4f6f6d127e0efed894e1915dac40f5d5623e7f1cf3d749c98d790066477a", size = 234604 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/bb/8a75d44bc1b54dea0fa0428eb52b13e7ee533b85841d2c53a53dfc360646/dspy-2.6.27-py3-none-any.whl", hash = "sha256:54e55fd6999b6a46e09b0e49e8c4b71be7dd56a881e66f7a60b8d657650c1a74", size = 297296 }, -] [[package]] name = "dspy-ai" @@ -537,7 +498,7 @@ name = "exceptiongroup" version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749 } wheels = [ @@ -735,16 +696,11 @@ wheels = [ [[package]] name = "fsspec" -version = "2025.3.0" +version = "2025.5.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/34/f4/5721faf47b8c499e776bc34c6a8fc17efdf7fdef0b00f398128bc5dcb4ac/fsspec-2025.3.0.tar.gz", hash = "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972", size = 298491 } +sdist = { url = "https://files.pythonhosted.org/packages/00/f7/27f15d41f0ed38e8fcc488584b57e902b331da7f7c6dcda53721b15838fc/fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475", size = 303033 } wheels = [ - { url = "https://files.pythonhosted.org/packages/56/53/eb690efa8513166adef3e0669afd31e95ffde69fb3c52ec2ac7223ed6018/fsspec-2025.3.0-py3-none-any.whl", hash = "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3", size = 193615 }, -] - -[package.optional-dependencies] -http = [ - { name = "aiohttp" }, + { url = "https://files.pythonhosted.org/packages/bb/61/78c7b3851add1481b048b5fdc29067397a1784e2910592bc81bb3f608635/fsspec-2025.5.1-py3-none-any.whl", hash = "sha256:24d3a2e663d5fc735ab256263c4075f374a174c3410c0b25e5bd1970bceaa462", size = 199052 }, ] [[package]] @@ -1089,11 +1045,11 @@ wheels = [ [[package]] name = "json-repair" -version = "0.47.2" +version = "0.47.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/20/b3/4d27ddc50faf1900b5e6230212e802549f19821cea88c934bc6ca9a5d7b9/json_repair-0.47.2.tar.gz", hash = "sha256:4c5db08c2af384b96125beba0e59dbe154b13406a2b5eee8089f7e4faec3ebea", size = 33832 } +sdist = { url = "https://files.pythonhosted.org/packages/42/1b/13f80ab76f81552f201fa1e5147d21b2c2c496927665b88ab37e67c185af/json_repair-0.47.3.tar.gz", hash = "sha256:030d036db0e4f7896cfc422dd47e7022c0942ffe14d8d7cd6b1b1abd40f6636a", size = 33863 } wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/b9/d6fb6d0e415282168bc16bc31de3c762c07916f75a9b6ecc346eb6a89e09/json_repair-0.47.2-py3-none-any.whl", hash = "sha256:7387f318f2b982e04d6f3ab70c314324b15d57c8d2bfe17b44215b98fbd71caf", size = 22423 }, + { url = "https://files.pythonhosted.org/packages/0d/20/3beb965d1731ce35e1b1392b4e357fa5ef2ffc14402d28b3c2ef47ff893c/json_repair-0.47.3-py3-none-any.whl", hash = "sha256:a35c909f1b2f4a9bbf5453bc4f60cc173728f394700500ad859feb47914550f5", size = 22449 }, ] [[package]] @@ -1547,24 +1503,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/44/d8/45e8fc9892a7386d074941429e033adb4640e59ff0780d96a8cf46fe788e/multidict-6.5.0-py3-none-any.whl", hash = "sha256:5634b35f225977605385f56153bd95a7133faffc0ffe12ad26e10517537e8dfc", size = 12181 }, ] -[[package]] -name = "multiprocess" -version = "0.70.16" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "dill" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b5/ae/04f39c5d0d0def03247c2893d6f2b83c136bf3320a2154d7b8858f2ba72d/multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1", size = 1772603 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/76/6e712a2623d146d314f17598df5de7224c85c0060ef63fd95cc15a25b3fa/multiprocess-0.70.16-pp310-pypy310_pp73-macosx_10_13_x86_64.whl", hash = "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee", size = 134980 }, - { url = "https://files.pythonhosted.org/packages/0f/ab/1e6e8009e380e22254ff539ebe117861e5bdb3bff1fc977920972237c6c7/multiprocess-0.70.16-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec", size = 134982 }, - { url = "https://files.pythonhosted.org/packages/bc/f7/7ec7fddc92e50714ea3745631f79bd9c96424cb2702632521028e57d3a36/multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02", size = 134824 }, - { url = "https://files.pythonhosted.org/packages/50/15/b56e50e8debaf439f44befec5b2af11db85f6e0f344c3113ae0be0593a91/multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a", size = 143519 }, - { url = "https://files.pythonhosted.org/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e", size = 146741 }, - { url = "https://files.pythonhosted.org/packages/ea/89/38df130f2c799090c978b366cfdf5b96d08de5b29a4a293df7f7429fa50b/multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435", size = 132628 }, - { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351 }, -] - [[package]] name = "nodejs-wheel-binaries" version = "22.16.0" @@ -2183,11 +2121,11 @@ wheels = [ [[package]] name = "python-dotenv" -version = "1.1.0" +version = "1.1.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/88/2c/7bb1416c5620485aa793f2de31d3df393d3686aa8a8506d11e10e13c5baf/python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5", size = 39920 } +sdist = { url = "https://files.pythonhosted.org/packages/f6/b0/4bc07ccd3572a2f9df7e6782f52b0c6c90dcbb803ac4a167702d7d0dfe1e/python_dotenv-1.1.1.tar.gz", hash = "sha256:a8a6399716257f45be6a007360200409fce5cda2661e3dec71d23dc15f6189ab", size = 41978 } wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/18/98a99ad95133c6a6e2005fe89faedf294a748bd5dc803008059409ac9b1e/python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d", size = 20256 }, + { url = "https://files.pythonhosted.org/packages/5f/ed/539768cf28c661b5b068d66d96a2f155c4971a5d55684a514c1a0e0dec2f/python_dotenv-1.1.1-py3-none-any.whl", hash = "sha256:31f23644fe2602f88ff55e1f5c79ba497e01224ee7737937930c448e4d0e24dc", size = 20556 }, ] [[package]] @@ -2391,6 +2329,7 @@ name = "robofactor" version = "0.1.1" source = { editable = "." } dependencies = [ + { name = "dspy" }, { name = "dspy-ai" }, { name = "flake8" }, { name = "mlflow" }, @@ -2411,6 +2350,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "dspy", git = "https://github.com/stanfordnlp/dspy.git" }, { name = "dspy-ai", specifier = "==2.6.19" }, { name = "flake8", specifier = ">=7.2.0" }, { name = "mlflow", specifier = ">=3.1.0" }, @@ -3043,79 +2983,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/52/24/ab44c871b0f07f491e5d2ad12c9bd7358e527510618cb1b803a88e986db1/werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e", size = 224498 }, ] -[[package]] -name = "xxhash" -version = "3.5.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/00/5e/d6e5258d69df8b4ed8c83b6664f2b47d30d2dec551a29ad72a6c69eafd31/xxhash-3.5.0.tar.gz", hash = "sha256:84f2caddf951c9cbf8dc2e22a89d4ccf5d86391ac6418fe81e3c67d0cf60b45f", size = 84241 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/8a/0e9feca390d512d293afd844d31670e25608c4a901e10202aa98785eab09/xxhash-3.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ece616532c499ee9afbb83078b1b952beffef121d989841f7f4b3dc5ac0fd212", size = 31970 }, - { url = "https://files.pythonhosted.org/packages/16/e6/be5aa49580cd064a18200ab78e29b88b1127e1a8c7955eb8ecf81f2626eb/xxhash-3.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3171f693dbc2cef6477054a665dc255d996646b4023fe56cb4db80e26f4cc520", size = 30801 }, - { url = "https://files.pythonhosted.org/packages/20/ee/b8a99ebbc6d1113b3a3f09e747fa318c3cde5b04bd9c197688fadf0eeae8/xxhash-3.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c5d3e570ef46adaf93fc81b44aca6002b5a4d8ca11bd0580c07eac537f36680", size = 220927 }, - { url = "https://files.pythonhosted.org/packages/58/62/15d10582ef159283a5c2b47f6d799fc3303fe3911d5bb0bcc820e1ef7ff4/xxhash-3.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7cb29a034301e2982df8b1fe6328a84f4b676106a13e9135a0d7e0c3e9f806da", size = 200360 }, - { url = "https://files.pythonhosted.org/packages/23/41/61202663ea9b1bd8e53673b8ec9e2619989353dba8cfb68e59a9cbd9ffe3/xxhash-3.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d0d307d27099bb0cbeea7260eb39ed4fdb99c5542e21e94bb6fd29e49c57a23", size = 428528 }, - { url = "https://files.pythonhosted.org/packages/f2/07/d9a3059f702dec5b3b703737afb6dda32f304f6e9da181a229dafd052c29/xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0342aafd421795d740e514bc9858ebddfc705a75a8c5046ac56d85fe97bf196", size = 194149 }, - { url = "https://files.pythonhosted.org/packages/eb/58/27caadf78226ecf1d62dbd0c01d152ed381c14c1ee4ad01f0d460fc40eac/xxhash-3.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3dbbd9892c5ebffeca1ed620cf0ade13eb55a0d8c84e0751a6653adc6ac40d0c", size = 207703 }, - { url = "https://files.pythonhosted.org/packages/b1/08/32d558ce23e1e068453c39aed7b3c1cdc690c177873ec0ca3a90d5808765/xxhash-3.5.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4cc2d67fdb4d057730c75a64c5923abfa17775ae234a71b0200346bfb0a7f482", size = 216255 }, - { url = "https://files.pythonhosted.org/packages/3f/d4/2b971e2d2b0a61045f842b622ef11e94096cf1f12cd448b6fd426e80e0e2/xxhash-3.5.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:ec28adb204b759306a3d64358a5e5c07d7b1dd0ccbce04aa76cb9377b7b70296", size = 202744 }, - { url = "https://files.pythonhosted.org/packages/19/ae/6a6438864a8c4c39915d7b65effd85392ebe22710412902487e51769146d/xxhash-3.5.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:1328f6d8cca2b86acb14104e381225a3d7b42c92c4b86ceae814e5c400dbb415", size = 210115 }, - { url = "https://files.pythonhosted.org/packages/48/7d/b3c27c27d1fc868094d02fe4498ccce8cec9fcc591825c01d6bcb0b4fc49/xxhash-3.5.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8d47ebd9f5d9607fd039c1fbf4994e3b071ea23eff42f4ecef246ab2b7334198", size = 414247 }, - { url = "https://files.pythonhosted.org/packages/a1/05/918f9e7d2fbbd334b829997045d341d6239b563c44e683b9a7ef8fe50f5d/xxhash-3.5.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b96d559e0fcddd3343c510a0fe2b127fbff16bf346dd76280b82292567523442", size = 191419 }, - { url = "https://files.pythonhosted.org/packages/08/29/dfe393805b2f86bfc47c290b275f0b7c189dc2f4e136fd4754f32eb18a8d/xxhash-3.5.0-cp310-cp310-win32.whl", hash = "sha256:61c722ed8d49ac9bc26c7071eeaa1f6ff24053d553146d5df031802deffd03da", size = 30114 }, - { url = "https://files.pythonhosted.org/packages/7b/d7/aa0b22c4ebb7c3ccb993d4c565132abc641cd11164f8952d89eb6a501909/xxhash-3.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:9bed5144c6923cc902cd14bb8963f2d5e034def4486ab0bbe1f58f03f042f9a9", size = 30003 }, - { url = "https://files.pythonhosted.org/packages/69/12/f969b81541ee91b55f1ce469d7ab55079593c80d04fd01691b550e535000/xxhash-3.5.0-cp310-cp310-win_arm64.whl", hash = "sha256:893074d651cf25c1cc14e3bea4fceefd67f2921b1bb8e40fcfeba56820de80c6", size = 26773 }, - { url = "https://files.pythonhosted.org/packages/b8/c7/afed0f131fbda960ff15eee7f304fa0eeb2d58770fade99897984852ef23/xxhash-3.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:02c2e816896dc6f85922ced60097bcf6f008dedfc5073dcba32f9c8dd786f3c1", size = 31969 }, - { url = "https://files.pythonhosted.org/packages/8c/0c/7c3bc6d87e5235672fcc2fb42fd5ad79fe1033925f71bf549ee068c7d1ca/xxhash-3.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6027dcd885e21581e46d3c7f682cfb2b870942feeed58a21c29583512c3f09f8", size = 30800 }, - { url = "https://files.pythonhosted.org/packages/04/9e/01067981d98069eec1c20201f8c145367698e9056f8bc295346e4ea32dd1/xxhash-3.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1308fa542bbdbf2fa85e9e66b1077eea3a88bef38ee8a06270b4298a7a62a166", size = 221566 }, - { url = "https://files.pythonhosted.org/packages/d4/09/d4996de4059c3ce5342b6e1e6a77c9d6c91acce31f6ed979891872dd162b/xxhash-3.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c28b2fdcee797e1c1961cd3bcd3d545cab22ad202c846235197935e1df2f8ef7", size = 201214 }, - { url = "https://files.pythonhosted.org/packages/62/f5/6d2dc9f8d55a7ce0f5e7bfef916e67536f01b85d32a9fbf137d4cadbee38/xxhash-3.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:924361811732ddad75ff23e90efd9ccfda4f664132feecb90895bade6a1b4623", size = 429433 }, - { url = "https://files.pythonhosted.org/packages/d9/72/9256303f10e41ab004799a4aa74b80b3c5977d6383ae4550548b24bd1971/xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89997aa1c4b6a5b1e5b588979d1da048a3c6f15e55c11d117a56b75c84531f5a", size = 194822 }, - { url = "https://files.pythonhosted.org/packages/34/92/1a3a29acd08248a34b0e6a94f4e0ed9b8379a4ff471f1668e4dce7bdbaa8/xxhash-3.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:685c4f4e8c59837de103344eb1c8a3851f670309eb5c361f746805c5471b8c88", size = 208538 }, - { url = "https://files.pythonhosted.org/packages/53/ad/7fa1a109663366de42f724a1cdb8e796a260dbac45047bce153bc1e18abf/xxhash-3.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:dbd2ecfbfee70bc1a4acb7461fa6af7748ec2ab08ac0fa298f281c51518f982c", size = 216953 }, - { url = "https://files.pythonhosted.org/packages/35/02/137300e24203bf2b2a49b48ce898ecce6fd01789c0fcd9c686c0a002d129/xxhash-3.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:25b5a51dc3dfb20a10833c8eee25903fd2e14059e9afcd329c9da20609a307b2", size = 203594 }, - { url = "https://files.pythonhosted.org/packages/23/03/aeceb273933d7eee248c4322b98b8e971f06cc3880e5f7602c94e5578af5/xxhash-3.5.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a8fb786fb754ef6ff8c120cb96629fb518f8eb5a61a16aac3a979a9dbd40a084", size = 210971 }, - { url = "https://files.pythonhosted.org/packages/e3/64/ed82ec09489474cbb35c716b189ddc1521d8b3de12b1b5ab41ce7f70253c/xxhash-3.5.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:a905ad00ad1e1c34fe4e9d7c1d949ab09c6fa90c919860c1534ff479f40fd12d", size = 415050 }, - { url = "https://files.pythonhosted.org/packages/71/43/6db4c02dcb488ad4e03bc86d70506c3d40a384ee73c9b5c93338eb1f3c23/xxhash-3.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:963be41bcd49f53af6d795f65c0da9b4cc518c0dd9c47145c98f61cb464f4839", size = 192216 }, - { url = "https://files.pythonhosted.org/packages/22/6d/db4abec29e7a567455344433d095fdb39c97db6955bb4a2c432e486b4d28/xxhash-3.5.0-cp311-cp311-win32.whl", hash = "sha256:109b436096d0a2dd039c355fa3414160ec4d843dfecc64a14077332a00aeb7da", size = 30120 }, - { url = "https://files.pythonhosted.org/packages/52/1c/fa3b61c0cf03e1da4767213672efe186b1dfa4fc901a4a694fb184a513d1/xxhash-3.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:b702f806693201ad6c0a05ddbbe4c8f359626d0b3305f766077d51388a6bac58", size = 30003 }, - { url = "https://files.pythonhosted.org/packages/6b/8e/9e6fc572acf6e1cc7ccb01973c213f895cb8668a9d4c2b58a99350da14b7/xxhash-3.5.0-cp311-cp311-win_arm64.whl", hash = "sha256:c4dcb4120d0cc3cc448624147dba64e9021b278c63e34a38789b688fd0da9bf3", size = 26777 }, - { url = "https://files.pythonhosted.org/packages/07/0e/1bfce2502c57d7e2e787600b31c83535af83746885aa1a5f153d8c8059d6/xxhash-3.5.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:14470ace8bd3b5d51318782cd94e6f94431974f16cb3b8dc15d52f3b69df8e00", size = 31969 }, - { url = "https://files.pythonhosted.org/packages/3f/d6/8ca450d6fe5b71ce521b4e5db69622383d039e2b253e9b2f24f93265b52c/xxhash-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:59aa1203de1cb96dbeab595ded0ad0c0056bb2245ae11fac11c0ceea861382b9", size = 30787 }, - { url = "https://files.pythonhosted.org/packages/5b/84/de7c89bc6ef63d750159086a6ada6416cc4349eab23f76ab870407178b93/xxhash-3.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08424f6648526076e28fae6ea2806c0a7d504b9ef05ae61d196d571e5c879c84", size = 220959 }, - { url = "https://files.pythonhosted.org/packages/fe/86/51258d3e8a8545ff26468c977101964c14d56a8a37f5835bc0082426c672/xxhash-3.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:61a1ff00674879725b194695e17f23d3248998b843eb5e933007ca743310f793", size = 200006 }, - { url = "https://files.pythonhosted.org/packages/02/0a/96973bd325412feccf23cf3680fd2246aebf4b789122f938d5557c54a6b2/xxhash-3.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2f2c61bee5844d41c3eb015ac652a0229e901074951ae48581d58bfb2ba01be", size = 428326 }, - { url = "https://files.pythonhosted.org/packages/11/a7/81dba5010f7e733de88af9555725146fc133be97ce36533867f4c7e75066/xxhash-3.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d32a592cac88d18cc09a89172e1c32d7f2a6e516c3dfde1b9adb90ab5df54a6", size = 194380 }, - { url = "https://files.pythonhosted.org/packages/fb/7d/f29006ab398a173f4501c0e4977ba288f1c621d878ec217b4ff516810c04/xxhash-3.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:70dabf941dede727cca579e8c205e61121afc9b28516752fd65724be1355cc90", size = 207934 }, - { url = "https://files.pythonhosted.org/packages/8a/6e/6e88b8f24612510e73d4d70d9b0c7dff62a2e78451b9f0d042a5462c8d03/xxhash-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e5d0ddaca65ecca9c10dcf01730165fd858533d0be84c75c327487c37a906a27", size = 216301 }, - { url = "https://files.pythonhosted.org/packages/af/51/7862f4fa4b75a25c3b4163c8a873f070532fe5f2d3f9b3fc869c8337a398/xxhash-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3e5b5e16c5a480fe5f59f56c30abdeba09ffd75da8d13f6b9b6fd224d0b4d0a2", size = 203351 }, - { url = "https://files.pythonhosted.org/packages/22/61/8d6a40f288f791cf79ed5bb113159abf0c81d6efb86e734334f698eb4c59/xxhash-3.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149b7914451eb154b3dfaa721315117ea1dac2cc55a01bfbd4df7c68c5dd683d", size = 210294 }, - { url = "https://files.pythonhosted.org/packages/17/02/215c4698955762d45a8158117190261b2dbefe9ae7e5b906768c09d8bc74/xxhash-3.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:eade977f5c96c677035ff39c56ac74d851b1cca7d607ab3d8f23c6b859379cab", size = 414674 }, - { url = "https://files.pythonhosted.org/packages/31/5c/b7a8db8a3237cff3d535261325d95de509f6a8ae439a5a7a4ffcff478189/xxhash-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fa9f547bd98f5553d03160967866a71056a60960be00356a15ecc44efb40ba8e", size = 192022 }, - { url = "https://files.pythonhosted.org/packages/78/e3/dd76659b2811b3fd06892a8beb850e1996b63e9235af5a86ea348f053e9e/xxhash-3.5.0-cp312-cp312-win32.whl", hash = "sha256:f7b58d1fd3551b8c80a971199543379be1cee3d0d409e1f6d8b01c1a2eebf1f8", size = 30170 }, - { url = "https://files.pythonhosted.org/packages/d9/6b/1c443fe6cfeb4ad1dcf231cdec96eb94fb43d6498b4469ed8b51f8b59a37/xxhash-3.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:fa0cafd3a2af231b4e113fba24a65d7922af91aeb23774a8b78228e6cd785e3e", size = 30040 }, - { url = "https://files.pythonhosted.org/packages/0f/eb/04405305f290173acc0350eba6d2f1a794b57925df0398861a20fbafa415/xxhash-3.5.0-cp312-cp312-win_arm64.whl", hash = "sha256:586886c7e89cb9828bcd8a5686b12e161368e0064d040e225e72607b43858ba2", size = 26796 }, - { url = "https://files.pythonhosted.org/packages/c9/b8/e4b3ad92d249be5c83fa72916c9091b0965cb0faeff05d9a0a3870ae6bff/xxhash-3.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:37889a0d13b0b7d739cfc128b1c902f04e32de17b33d74b637ad42f1c55101f6", size = 31795 }, - { url = "https://files.pythonhosted.org/packages/fc/d8/b3627a0aebfbfa4c12a41e22af3742cf08c8ea84f5cc3367b5de2d039cce/xxhash-3.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:97a662338797c660178e682f3bc180277b9569a59abfb5925e8620fba00b9fc5", size = 30792 }, - { url = "https://files.pythonhosted.org/packages/c3/cc/762312960691da989c7cd0545cb120ba2a4148741c6ba458aa723c00a3f8/xxhash-3.5.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f85e0108d51092bdda90672476c7d909c04ada6923c14ff9d913c4f7dc8a3bc", size = 220950 }, - { url = "https://files.pythonhosted.org/packages/fe/e9/cc266f1042c3c13750e86a535496b58beb12bf8c50a915c336136f6168dc/xxhash-3.5.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd2fd827b0ba763ac919440042302315c564fdb797294d86e8cdd4578e3bc7f3", size = 199980 }, - { url = "https://files.pythonhosted.org/packages/bf/85/a836cd0dc5cc20376de26b346858d0ac9656f8f730998ca4324921a010b9/xxhash-3.5.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:82085c2abec437abebf457c1d12fccb30cc8b3774a0814872511f0f0562c768c", size = 428324 }, - { url = "https://files.pythonhosted.org/packages/b4/0e/15c243775342ce840b9ba34aceace06a1148fa1630cd8ca269e3223987f5/xxhash-3.5.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07fda5de378626e502b42b311b049848c2ef38784d0d67b6f30bb5008642f8eb", size = 194370 }, - { url = "https://files.pythonhosted.org/packages/87/a1/b028bb02636dfdc190da01951d0703b3d904301ed0ef6094d948983bef0e/xxhash-3.5.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c279f0d2b34ef15f922b77966640ade58b4ccdfef1c4d94b20f2a364617a493f", size = 207911 }, - { url = "https://files.pythonhosted.org/packages/80/d5/73c73b03fc0ac73dacf069fdf6036c9abad82de0a47549e9912c955ab449/xxhash-3.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:89e66ceed67b213dec5a773e2f7a9e8c58f64daeb38c7859d8815d2c89f39ad7", size = 216352 }, - { url = "https://files.pythonhosted.org/packages/b6/2a/5043dba5ddbe35b4fe6ea0a111280ad9c3d4ba477dd0f2d1fe1129bda9d0/xxhash-3.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bcd51708a633410737111e998ceb3b45d3dbc98c0931f743d9bb0a209033a326", size = 203410 }, - { url = "https://files.pythonhosted.org/packages/a2/b2/9a8ded888b7b190aed75b484eb5c853ddd48aa2896e7b59bbfbce442f0a1/xxhash-3.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3ff2c0a34eae7df88c868be53a8dd56fbdf592109e21d4bfa092a27b0bf4a7bf", size = 210322 }, - { url = "https://files.pythonhosted.org/packages/98/62/440083fafbc917bf3e4b67c2ade621920dd905517e85631c10aac955c1d2/xxhash-3.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:4e28503dccc7d32e0b9817aa0cbfc1f45f563b2c995b7a66c4c8a0d232e840c7", size = 414725 }, - { url = "https://files.pythonhosted.org/packages/75/db/009206f7076ad60a517e016bb0058381d96a007ce3f79fa91d3010f49cc2/xxhash-3.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a6c50017518329ed65a9e4829154626f008916d36295b6a3ba336e2458824c8c", size = 192070 }, - { url = "https://files.pythonhosted.org/packages/1f/6d/c61e0668943a034abc3a569cdc5aeae37d686d9da7e39cf2ed621d533e36/xxhash-3.5.0-cp313-cp313-win32.whl", hash = "sha256:53a068fe70301ec30d868ece566ac90d873e3bb059cf83c32e76012c889b8637", size = 30172 }, - { url = "https://files.pythonhosted.org/packages/96/14/8416dce965f35e3d24722cdf79361ae154fa23e2ab730e5323aa98d7919e/xxhash-3.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:80babcc30e7a1a484eab952d76a4f4673ff601f54d5142c26826502740e70b43", size = 30041 }, - { url = "https://files.pythonhosted.org/packages/27/ee/518b72faa2073f5aa8e3262408d284892cb79cf2754ba0c3a5870645ef73/xxhash-3.5.0-cp313-cp313-win_arm64.whl", hash = "sha256:4811336f1ce11cac89dcbd18f3a25c527c16311709a89313c3acaf771def2d4b", size = 26801 }, - { url = "https://files.pythonhosted.org/packages/ab/9a/233606bada5bd6f50b2b72c45de3d9868ad551e83893d2ac86dc7bb8553a/xxhash-3.5.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:2014c5b3ff15e64feecb6b713af12093f75b7926049e26a580e94dcad3c73d8c", size = 29732 }, - { url = "https://files.pythonhosted.org/packages/0c/67/f75276ca39e2c6604e3bee6c84e9db8a56a4973fde9bf35989787cf6e8aa/xxhash-3.5.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fab81ef75003eda96239a23eda4e4543cedc22e34c373edcaf744e721a163986", size = 36214 }, - { url = "https://files.pythonhosted.org/packages/0f/f8/f6c61fd794229cc3848d144f73754a0c107854372d7261419dcbbd286299/xxhash-3.5.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e2febf914ace002132aa09169cc572e0d8959d0f305f93d5828c4836f9bc5a6", size = 32020 }, - { url = "https://files.pythonhosted.org/packages/79/d3/c029c99801526f859e6b38d34ab87c08993bf3dcea34b11275775001638a/xxhash-3.5.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5d3a10609c51da2a1c0ea0293fc3968ca0a18bd73838455b5bca3069d7f8e32b", size = 40515 }, - { url = "https://files.pythonhosted.org/packages/62/e3/bef7b82c1997579c94de9ac5ea7626d01ae5858aa22bf4fcb38bf220cb3e/xxhash-3.5.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5a74f23335b9689b66eb6dbe2a931a88fcd7a4c2cc4b1cb0edba8ce381c7a1da", size = 30064 }, -] - [[package]] name = "yarl" version = "1.20.1" From d1a600cfec080255a7b0c00eb13fda238b2069a6 Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Tue, 24 Jun 2025 11:44:46 -0500 Subject: [PATCH 13/26] refactor: remove type-stubs --- typings/dspy/__init__.pyi | 26 --- typings/dspy/__metadata__.pyi | 10 - typings/dspy/adapters/__init__.pyi | 12 -- typings/dspy/adapters/base.pyi | 186 ----------------- typings/dspy/adapters/chat_adapter.pyi | 97 --------- typings/dspy/adapters/json_adapter.pyi | 47 ----- typings/dspy/adapters/two_step_adapter.pyi | 77 ------- typings/dspy/adapters/types/__init__.pyi | 11 - typings/dspy/adapters/types/audio.pyi | 61 ------ typings/dspy/adapters/types/base_type.pyi | 76 ------- typings/dspy/adapters/types/history.pyi | 65 ------ typings/dspy/adapters/types/image.pyi | 65 ------ typings/dspy/adapters/types/tool.pyi | 175 ---------------- typings/dspy/adapters/utils.pyi | 57 ----- typings/dspy/adapters/xml_adapter.pyi | 24 --- typings/dspy/clients/__init__.pyi | 42 ---- typings/dspy/clients/base_lm.pyi | 82 -------- typings/dspy/clients/cache.pyi | 67 ------ typings/dspy/clients/databricks.pyi | 41 ---- typings/dspy/clients/embedding.pyi | 102 --------- typings/dspy/clients/lm.pyi | 76 ------- typings/dspy/clients/lm_local.pyi | 62 ------ typings/dspy/clients/lm_local_arbor.pyi | 103 --------- typings/dspy/clients/openai.pyi | 67 ------ typings/dspy/clients/provider.pyi | 80 ------- typings/dspy/clients/utils_finetune.pyi | 67 ------ typings/dspy/datasets/__init__.pyi | 12 -- typings/dspy/datasets/alfworld/__init__.pyi | 6 - typings/dspy/datasets/alfworld/alfworld.pyi | 64 ------ typings/dspy/datasets/colors.pyi | 16 -- typings/dspy/datasets/dataloader.pyi | 42 ---- typings/dspy/datasets/dataset.pyi | 29 --- typings/dspy/datasets/hotpotqa.pyi | 15 -- typings/dspy/datasets/math.pyi | 16 -- typings/dspy/dsp/__init__.pyi | 4 - typings/dspy/dsp/colbertv2.pyi | 73 ------- typings/dspy/dsp/utils/__init__.pyi | 8 - typings/dspy/dsp/utils/dpr.pyi | 131 ------------ typings/dspy/dsp/utils/settings.pyi | 78 ------- typings/dspy/dsp/utils/utils.pyi | 97 --------- typings/dspy/evaluate/__init__.pyi | 9 - typings/dspy/evaluate/auto_evaluation.pyi | 83 -------- typings/dspy/evaluate/evaluate.pyi | 116 ----------- typings/dspy/evaluate/metrics.pyi | 34 --- typings/dspy/predict/__init__.pyi | 17 -- typings/dspy/predict/aggregation.pyi | 15 -- typings/dspy/predict/avatar/__init__.pyi | 8 - typings/dspy/predict/avatar/avatar.pyi | 18 -- typings/dspy/predict/avatar/models.pyi | 32 --- typings/dspy/predict/avatar/signatures.pyi | 18 -- typings/dspy/predict/best_of_n.pyi | 48 ----- typings/dspy/predict/chain_of_thought.pyi | 31 --- typings/dspy/predict/code_act.pyi | 43 ---- typings/dspy/predict/knn.pyi | 46 ----- .../dspy/predict/multi_chain_comparison.pyi | 15 -- typings/dspy/predict/parallel.pyi | 19 -- typings/dspy/predict/parameter.pyi | 8 - typings/dspy/predict/predict.pyi | 62 ------ typings/dspy/predict/program_of_thought.pyi | 39 ---- typings/dspy/predict/react.pyi | 52 ----- typings/dspy/predict/refine.pyi | 81 -------- typings/dspy/predict/retry.pyi | 4 - typings/dspy/primitives/__init__.pyi | 11 - typings/dspy/primitives/base_module.pyi | 87 -------- typings/dspy/primitives/example.pyi | 76 ------- typings/dspy/primitives/module.pyi | 77 ------- typings/dspy/primitives/prediction.pyi | 98 --------- .../dspy/primitives/python_interpreter.pyi | 55 ----- typings/dspy/propose/__init__.pyi | 7 - .../propose/dataset_summary_generator.pyi | 31 --- typings/dspy/propose/grounded_proposer.pyi | 55 ----- typings/dspy/propose/propose_base.pyi | 19 -- typings/dspy/propose/utils.pyi | 25 --- typings/dspy/retrievers/__init__.pyi | 8 - typings/dspy/retrievers/databricks_rm.pyi | 135 ------------ typings/dspy/retrievers/embeddings.pyi | 18 -- typings/dspy/retrievers/retrieve.pyi | 37 ---- typings/dspy/retrievers/weaviate_rm.pyi | 65 ------ typings/dspy/signatures/__init__.pyi | 8 - typings/dspy/signatures/field.pyi | 47 ----- typings/dspy/signatures/signature.pyi | 175 ---------------- typings/dspy/signatures/utils.pyi | 10 - typings/dspy/streaming/__init__.pyi | 9 - typings/dspy/streaming/messages.pyi | 96 --------- typings/dspy/streaming/streamify.pyi | 149 ------------- typings/dspy/streaming/streaming_listener.pyi | 46 ----- typings/dspy/teleprompt/__init__.pyi | 20 -- typings/dspy/teleprompt/avatar_optimizer.pyi | 59 ------ typings/dspy/teleprompt/bettertogether.pyi | 20 -- typings/dspy/teleprompt/bootstrap.pyi | 37 ---- .../dspy/teleprompt/bootstrap_finetune.pyi | 76 ------- typings/dspy/teleprompt/copro_optimizer.pyi | 45 ---- typings/dspy/teleprompt/ensemble.pyi | 19 -- typings/dspy/teleprompt/grpo.pyi | 41 ---- typings/dspy/teleprompt/infer_rules.pyi | 44 ---- typings/dspy/teleprompt/knn_fewshot.pyi | 57 ----- .../dspy/teleprompt/mipro_optimizer_v2.pyi | 28 --- typings/dspy/teleprompt/random_search.pyi | 15 -- typings/dspy/teleprompt/signature_opt.pyi | 15 -- typings/dspy/teleprompt/simba.pyi | 39 ---- typings/dspy/teleprompt/simba_utils.pyi | 55 ----- typings/dspy/teleprompt/teleprompt.pyi | 37 ---- typings/dspy/teleprompt/teleprompt_optuna.pyi | 18 -- typings/dspy/teleprompt/utils.pyi | 83 -------- typings/dspy/teleprompt/vanilla.pyi | 15 -- typings/dspy/utils/__init__.pyi | 16 -- typings/dspy/utils/asyncify.pyi | 32 --- typings/dspy/utils/caching.pyi | 10 - typings/dspy/utils/callback.pyi | 195 ------------------ typings/dspy/utils/dummies.pyi | 93 --------- typings/dspy/utils/exceptions.pyi | 14 -- typings/dspy/utils/inspect_history.pyi | 8 - typings/dspy/utils/langchain_tool.pyi | 24 --- typings/dspy/utils/logging_utils.pyi | 51 ----- typings/dspy/utils/mcp.pyi | 22 -- typings/dspy/utils/parallelizer.pyi | 18 -- typings/dspy/utils/saving.pyi | 26 --- typings/dspy/utils/unbatchify.pyi | 56 ----- typings/dspy/utils/usage_tracker.pyi | 28 --- 119 files changed, 5827 deletions(-) delete mode 100644 typings/dspy/__init__.pyi delete mode 100644 typings/dspy/__metadata__.pyi delete mode 100644 typings/dspy/adapters/__init__.pyi delete mode 100644 typings/dspy/adapters/base.pyi delete mode 100644 typings/dspy/adapters/chat_adapter.pyi delete mode 100644 typings/dspy/adapters/json_adapter.pyi delete mode 100644 typings/dspy/adapters/two_step_adapter.pyi delete mode 100644 typings/dspy/adapters/types/__init__.pyi delete mode 100644 typings/dspy/adapters/types/audio.pyi delete mode 100644 typings/dspy/adapters/types/base_type.pyi delete mode 100644 typings/dspy/adapters/types/history.pyi delete mode 100644 typings/dspy/adapters/types/image.pyi delete mode 100644 typings/dspy/adapters/types/tool.pyi delete mode 100644 typings/dspy/adapters/utils.pyi delete mode 100644 typings/dspy/adapters/xml_adapter.pyi delete mode 100644 typings/dspy/clients/__init__.pyi delete mode 100644 typings/dspy/clients/base_lm.pyi delete mode 100644 typings/dspy/clients/cache.pyi delete mode 100644 typings/dspy/clients/databricks.pyi delete mode 100644 typings/dspy/clients/embedding.pyi delete mode 100644 typings/dspy/clients/lm.pyi delete mode 100644 typings/dspy/clients/lm_local.pyi delete mode 100644 typings/dspy/clients/lm_local_arbor.pyi delete mode 100644 typings/dspy/clients/openai.pyi delete mode 100644 typings/dspy/clients/provider.pyi delete mode 100644 typings/dspy/clients/utils_finetune.pyi delete mode 100644 typings/dspy/datasets/__init__.pyi delete mode 100644 typings/dspy/datasets/alfworld/__init__.pyi delete mode 100644 typings/dspy/datasets/alfworld/alfworld.pyi delete mode 100644 typings/dspy/datasets/colors.pyi delete mode 100644 typings/dspy/datasets/dataloader.pyi delete mode 100644 typings/dspy/datasets/dataset.pyi delete mode 100644 typings/dspy/datasets/hotpotqa.pyi delete mode 100644 typings/dspy/datasets/math.pyi delete mode 100644 typings/dspy/dsp/__init__.pyi delete mode 100644 typings/dspy/dsp/colbertv2.pyi delete mode 100644 typings/dspy/dsp/utils/__init__.pyi delete mode 100644 typings/dspy/dsp/utils/dpr.pyi delete mode 100644 typings/dspy/dsp/utils/settings.pyi delete mode 100644 typings/dspy/dsp/utils/utils.pyi delete mode 100644 typings/dspy/evaluate/__init__.pyi delete mode 100644 typings/dspy/evaluate/auto_evaluation.pyi delete mode 100644 typings/dspy/evaluate/evaluate.pyi delete mode 100644 typings/dspy/evaluate/metrics.pyi delete mode 100644 typings/dspy/predict/__init__.pyi delete mode 100644 typings/dspy/predict/aggregation.pyi delete mode 100644 typings/dspy/predict/avatar/__init__.pyi delete mode 100644 typings/dspy/predict/avatar/avatar.pyi delete mode 100644 typings/dspy/predict/avatar/models.pyi delete mode 100644 typings/dspy/predict/avatar/signatures.pyi delete mode 100644 typings/dspy/predict/best_of_n.pyi delete mode 100644 typings/dspy/predict/chain_of_thought.pyi delete mode 100644 typings/dspy/predict/code_act.pyi delete mode 100644 typings/dspy/predict/knn.pyi delete mode 100644 typings/dspy/predict/multi_chain_comparison.pyi delete mode 100644 typings/dspy/predict/parallel.pyi delete mode 100644 typings/dspy/predict/parameter.pyi delete mode 100644 typings/dspy/predict/predict.pyi delete mode 100644 typings/dspy/predict/program_of_thought.pyi delete mode 100644 typings/dspy/predict/react.pyi delete mode 100644 typings/dspy/predict/refine.pyi delete mode 100644 typings/dspy/predict/retry.pyi delete mode 100644 typings/dspy/primitives/__init__.pyi delete mode 100644 typings/dspy/primitives/base_module.pyi delete mode 100644 typings/dspy/primitives/example.pyi delete mode 100644 typings/dspy/primitives/module.pyi delete mode 100644 typings/dspy/primitives/prediction.pyi delete mode 100644 typings/dspy/primitives/python_interpreter.pyi delete mode 100644 typings/dspy/propose/__init__.pyi delete mode 100644 typings/dspy/propose/dataset_summary_generator.pyi delete mode 100644 typings/dspy/propose/grounded_proposer.pyi delete mode 100644 typings/dspy/propose/propose_base.pyi delete mode 100644 typings/dspy/propose/utils.pyi delete mode 100644 typings/dspy/retrievers/__init__.pyi delete mode 100644 typings/dspy/retrievers/databricks_rm.pyi delete mode 100644 typings/dspy/retrievers/embeddings.pyi delete mode 100644 typings/dspy/retrievers/retrieve.pyi delete mode 100644 typings/dspy/retrievers/weaviate_rm.pyi delete mode 100644 typings/dspy/signatures/__init__.pyi delete mode 100644 typings/dspy/signatures/field.pyi delete mode 100644 typings/dspy/signatures/signature.pyi delete mode 100644 typings/dspy/signatures/utils.pyi delete mode 100644 typings/dspy/streaming/__init__.pyi delete mode 100644 typings/dspy/streaming/messages.pyi delete mode 100644 typings/dspy/streaming/streamify.pyi delete mode 100644 typings/dspy/streaming/streaming_listener.pyi delete mode 100644 typings/dspy/teleprompt/__init__.pyi delete mode 100644 typings/dspy/teleprompt/avatar_optimizer.pyi delete mode 100644 typings/dspy/teleprompt/bettertogether.pyi delete mode 100644 typings/dspy/teleprompt/bootstrap.pyi delete mode 100644 typings/dspy/teleprompt/bootstrap_finetune.pyi delete mode 100644 typings/dspy/teleprompt/copro_optimizer.pyi delete mode 100644 typings/dspy/teleprompt/ensemble.pyi delete mode 100644 typings/dspy/teleprompt/grpo.pyi delete mode 100644 typings/dspy/teleprompt/infer_rules.pyi delete mode 100644 typings/dspy/teleprompt/knn_fewshot.pyi delete mode 100644 typings/dspy/teleprompt/mipro_optimizer_v2.pyi delete mode 100644 typings/dspy/teleprompt/random_search.pyi delete mode 100644 typings/dspy/teleprompt/signature_opt.pyi delete mode 100644 typings/dspy/teleprompt/simba.pyi delete mode 100644 typings/dspy/teleprompt/simba_utils.pyi delete mode 100644 typings/dspy/teleprompt/teleprompt.pyi delete mode 100644 typings/dspy/teleprompt/teleprompt_optuna.pyi delete mode 100644 typings/dspy/teleprompt/utils.pyi delete mode 100644 typings/dspy/teleprompt/vanilla.pyi delete mode 100644 typings/dspy/utils/__init__.pyi delete mode 100644 typings/dspy/utils/asyncify.pyi delete mode 100644 typings/dspy/utils/caching.pyi delete mode 100644 typings/dspy/utils/callback.pyi delete mode 100644 typings/dspy/utils/dummies.pyi delete mode 100644 typings/dspy/utils/exceptions.pyi delete mode 100644 typings/dspy/utils/inspect_history.pyi delete mode 100644 typings/dspy/utils/langchain_tool.pyi delete mode 100644 typings/dspy/utils/logging_utils.pyi delete mode 100644 typings/dspy/utils/mcp.pyi delete mode 100644 typings/dspy/utils/parallelizer.pyi delete mode 100644 typings/dspy/utils/saving.pyi delete mode 100644 typings/dspy/utils/unbatchify.pyi delete mode 100644 typings/dspy/utils/usage_tracker.pyi diff --git a/typings/dspy/__init__.pyi b/typings/dspy/__init__.pyi deleted file mode 100644 index bd3e92a..0000000 --- a/typings/dspy/__init__.pyi +++ /dev/null @@ -1,26 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.predict import * -from dspy.primitives import * -from dspy.retrievers import * -from dspy.signatures import * -from dspy.teleprompt import * -from dspy.evaluate import Evaluate -from dspy.clients import * -from dspy.clients import DSPY_CACHE -from dspy.adapters import Adapter, Audio, BaseType, ChatAdapter, History, Image, JSONAdapter, Tool, ToolCalls, TwoStepAdapter, XMLAdapter -from dspy.utils.logging_utils import configure_dspy_loggers, disable_logging, enable_logging -from dspy.utils.asyncify import asyncify -from dspy.utils.saving import load -from dspy.streaming.streamify import streamify -from dspy.utils.usage_tracker import track_usage -from dspy.dsp.utils.settings import settings -from dspy.dsp.colbertv2 import ColBERTv2 -from dspy.__metadata__ import __author__, __author_email__, __description__, __name__, __url__, __version__ - -configure = ... -context = ... -BootstrapRS = BootstrapFewShotWithRandomSearch -cache = ... diff --git a/typings/dspy/__metadata__.pyi b/typings/dspy/__metadata__.pyi deleted file mode 100644 index 446283b..0000000 --- a/typings/dspy/__metadata__.pyi +++ /dev/null @@ -1,10 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -__name__ = ... -__version__ = ... -__description__ = ... -__url__ = ... -__author__ = ... -__author_email__ = ... diff --git a/typings/dspy/adapters/__init__.pyi b/typings/dspy/adapters/__init__.pyi deleted file mode 100644 index 3188d8f..0000000 --- a/typings/dspy/adapters/__init__.pyi +++ /dev/null @@ -1,12 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.adapters.base import Adapter -from dspy.adapters.chat_adapter import ChatAdapter -from dspy.adapters.json_adapter import JSONAdapter -from dspy.adapters.two_step_adapter import TwoStepAdapter -from dspy.adapters.types import Audio, BaseType, History, Image, Tool, ToolCalls -from dspy.adapters.xml_adapter import XMLAdapter - -__all__ = ["Adapter", "ChatAdapter", "BaseType", "History", "Image", "Audio", "JSONAdapter", "XMLAdapter", "TwoStepAdapter", "Tool", "ToolCalls"] diff --git a/typings/dspy/adapters/base.pyi b/typings/dspy/adapters/base.pyi deleted file mode 100644 index abeafe5..0000000 --- a/typings/dspy/adapters/base.pyi +++ /dev/null @@ -1,186 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Optional, TYPE_CHECKING, Type -from dspy.signatures.signature import Signature -from dspy.utils.callback import BaseCallback -from dspy.clients.lm import LM - -logger = ... -if TYPE_CHECKING: - ... -class Adapter: - def __init__(self, callbacks: Optional[list[BaseCallback]] = ...) -> None: - ... - - def __init_subclass__(cls, **kwargs) -> None: - ... - - def __call__(self, lm: LM, lm_kwargs: dict[str, Any], signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: - ... - - async def acall(self, lm: LM, lm_kwargs: dict[str, Any], signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: - ... - - def format(self, signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: - """Format the input messages for the LM call. - - This method converts the DSPy structured input along with few-shot examples and conversation history into - multiturn messages as expected by the LM. For custom adapters, this method can be overridden to customize - the formatting of the input messages. - - In general we recommend the messages to have the following structure: - ``` - [ - {"role": "system", "content": system_message}, - # Begin few-shot examples - {"role": "user", "content": few_shot_example_1_input}, - {"role": "assistant", "content": few_shot_example_1_output}, - {"role": "user", "content": few_shot_example_2_input}, - {"role": "assistant", "content": few_shot_example_2_output}, - ... - # End few-shot examples - # Begin conversation history - {"role": "user", "content": conversation_history_1_input}, - {"role": "assistant", "content": conversation_history_1_output}, - {"role": "user", "content": conversation_history_2_input}, - {"role": "assistant", "content": conversation_history_2_output}, - ... - # End conversation history - {"role": "user", "content": current_input}, - ] - - And system message should contain the field description, field structure, and task description. - ``` - - - Args: - signature: The DSPy signature for which to format the input messages. - demos: A list of few-shot examples. - inputs: The input arguments to the DSPy module. - - Returns: - A list of multiturn messages as expected by the LM. - """ - ... - - def format_field_description(self, signature: Type[Signature]) -> str: - """Format the field description for the system message. - - This method formats the field description for the system message. It should return a string that contains - the field description for the input fields and the output fields. - - Args: - signature: The DSPy signature for which to format the field description. - - Returns: - A string that contains the field description for the input fields and the output fields. - """ - ... - - def format_field_structure(self, signature: Type[Signature]) -> str: - """Format the field structure for the system message. - - This method formats the field structure for the system message. It should return a string that dictates the - format the input fields should be provided to the LM, and the format the output fields will be in the response. - Refer to the ChatAdapter and JsonAdapter for an example. - - Args: - signature: The DSPy signature for which to format the field structure. - """ - ... - - def format_task_description(self, signature: Type[Signature]) -> str: - """Format the task description for the system message. - - This method formats the task description for the system message. In most cases this is just a thin wrapper - over `signature.instructions`. - - Args: - signature: The DSPy signature of the DSpy module. - - Returns: - A string that describes the task. - """ - ... - - def format_user_message_content(self, signature: Type[Signature], inputs: dict[str, Any], prefix: str = ..., suffix: str = ..., main_request: bool = ...) -> str: - """Format the user message content. - - This method formats the user message content, which can be used in formatting few-shot examples, conversation - history, and the current input. - - Args: - signature: The DSPy signature for which to format the user message content. - inputs: The input arguments to the DSPy module. - prefix: A prefix to the user message content. - suffix: A suffix to the user message content. - - Returns: - A string that contains the user message content. - """ - ... - - def format_assistant_message_content(self, signature: Type[Signature], outputs: dict[str, Any], missing_field_message: Optional[str] = ...) -> str: - """Format the assistant message content. - - This method formats the assistant message content, which can be used in formatting few-shot examples, - conversation history. - - Args: - signature: The DSPy signature for which to format the assistant message content. - outputs: The output fields to be formatted. - missing_field_message: A message to be used when a field is missing. - - Returns: - A string that contains the assistant message content. - """ - ... - - def format_demos(self, signature: Type[Signature], demos: list[dict[str, Any]]) -> list[dict[str, Any]]: - """Format the few-shot examples. - - This method formats the few-shot examples as multiturn messages. - - Args: - signature: The DSPy signature for which to format the few-shot examples. - demos: A list of few-shot examples, each element is a dictionary with keys of the input and output fields of - the signature. - - Returns: - A list of multiturn messages. - """ - ... - - def format_conversation_history(self, signature: Type[Signature], history_field_name: str, inputs: dict[str, Any]) -> list[dict[str, Any]]: - """Format the conversation history. - - This method formats the conversation history and the current input as multiturn messages. - - Args: - signature: The DSPy signature for which to format the conversation history. - history_field_name: The name of the history field in the signature. - inputs: The input arguments to the DSPy module. - - Returns: - A list of multiturn messages. - """ - ... - - def parse(self, signature: Type[Signature], completion: str) -> dict[str, Any]: - """Parse the LM output into a dictionary of the output fields. - - This method parses the LM output into a dictionary of the output fields. - - Args: - signature: The DSPy signature for which to parse the LM output. - completion: The LM output to be parsed. - - Returns: - A dictionary of the output fields. - """ - ... - - - diff --git a/typings/dspy/adapters/chat_adapter.pyi b/typings/dspy/adapters/chat_adapter.pyi deleted file mode 100644 index 97ee019..0000000 --- a/typings/dspy/adapters/chat_adapter.pyi +++ /dev/null @@ -1,97 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Dict, NamedTuple, Optional, Type -from pydantic.fields import FieldInfo -from dspy.adapters.base import Adapter -from dspy.clients.lm import LM -from dspy.signatures.signature import Signature -from dspy.utils.callback import BaseCallback - -field_header_pattern = ... -class FieldInfoWithName(NamedTuple): - name: str - info: FieldInfo - ... - - -class ChatAdapter(Adapter): - def __init__(self, callbacks: Optional[list[BaseCallback]] = ...) -> None: - ... - - def __call__(self, lm: LM, lm_kwargs: dict[str, Any], signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: - ... - - async def acall(self, lm: LM, lm_kwargs: dict[str, Any], signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: - ... - - def format_field_description(self, signature: Type[Signature]) -> str: - ... - - def format_field_structure(self, signature: Type[Signature]) -> str: - """ - `ChatAdapter` requires input and output fields to be in their own sections, with section header using markers - `[[ ## field_name ## ]]`. An arbitrary field `completed` ([[ ## completed ## ]]) is added to the end of the - output fields section to indicate the end of the output fields. - """ - ... - - def format_task_description(self, signature: Type[Signature]) -> str: - ... - - def format_user_message_content(self, signature: Type[Signature], inputs: dict[str, Any], prefix: str = ..., suffix: str = ..., main_request: bool = ...) -> str: - ... - - def user_message_output_requirements(self, signature: Type[Signature]) -> str: - """Returns a simplified format reminder for the language model. - - In chat-based interactions, language models may lose track of the required output format - as the conversation context grows longer. This method generates a concise reminder of - the expected output structure that can be included in user messages. - - Args: - signature (Type[Signature]): The DSPy signature defining the expected input/output fields. - - Returns: - str: A simplified description of the required output format. - - Note: - This is a more lightweight version of `format_field_structure` specifically designed - for inline reminders within chat messages. - """ - ... - - def format_assistant_message_content(self, signature: Type[Signature], outputs: dict[str, Any], missing_field_message=...) -> str: - ... - - def parse(self, signature: Type[Signature], completion: str) -> dict[str, Any]: - ... - - def format_field_with_value(self, fields_with_values: Dict[FieldInfoWithName, Any]) -> str: - """ - Formats the values of the specified fields according to the field's DSPy type (input or output), - annotation (e.g. str, int, etc.), and the type of the value itself. Joins the formatted values - into a single string, which is is a multiline string if there are multiple fields. - - Args: - fields_with_values: A dictionary mapping information about a field to its corresponding - value. - - Returns: - The joined formatted values of the fields, represented as a string - """ - ... - - def format_finetune_data(self, signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any], outputs: dict[str, Any]) -> dict[str, list[Any]]: - """ - Format the call data into finetuning data according to the OpenAI API specifications. - - For the chat adapter, this means formatting the data as a list of messages, where each message is a dictionary - with a "role" and "content" key. The role can be "system", "user", or "assistant". Then, the messages are - wrapped in a dictionary with a "messages" key. - """ - ... - - - diff --git a/typings/dspy/adapters/json_adapter.pyi b/typings/dspy/adapters/json_adapter.pyi deleted file mode 100644 index 13775f0..0000000 --- a/typings/dspy/adapters/json_adapter.pyi +++ /dev/null @@ -1,47 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Dict, Type -from dspy.adapters.chat_adapter import ChatAdapter, FieldInfoWithName -from dspy.clients.lm import LM -from dspy.signatures.signature import Signature - -logger = ... -class JSONAdapter(ChatAdapter): - def __call__(self, lm: LM, lm_kwargs: dict[str, Any], signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: - ... - - async def acall(self, lm: LM, lm_kwargs: dict[str, Any], signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: - ... - - def format_field_structure(self, signature: Type[Signature]) -> str: - ... - - def user_message_output_requirements(self, signature: Type[Signature]) -> str: - ... - - def format_assistant_message_content(self, signature: Type[Signature], outputs: dict[str, Any], missing_field_message=...) -> str: - ... - - def parse(self, signature: Type[Signature], completion: str) -> dict[str, Any]: - ... - - def format_field_with_value(self, fields_with_values: Dict[FieldInfoWithName, Any], role: str = ...) -> str: - """ - Formats the values of the specified fields according to the field's DSPy type (input or output), - annotation (e.g. str, int, etc.), and the type of the value itself. Joins the formatted values - into a single string, which is a multiline string if there are multiple fields. - - Args: - fields_with_values: A dictionary mapping information about a field to its corresponding value. - Returns: - The joined formatted values of the fields, represented as a string. - """ - ... - - def format_finetune_data(self, signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any], outputs: dict[str, Any]) -> dict[str, list[Any]]: - ... - - - diff --git a/typings/dspy/adapters/two_step_adapter.pyi b/typings/dspy/adapters/two_step_adapter.pyi deleted file mode 100644 index 2245be2..0000000 --- a/typings/dspy/adapters/two_step_adapter.pyi +++ /dev/null @@ -1,77 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Optional, Type -from dspy.adapters.base import Adapter -from dspy.clients import LM -from dspy.signatures.signature import Signature - -class TwoStepAdapter(Adapter): - """ - A two-stage adapter that: - 1. Uses a simpler, more natural prompt for the main LM - 2. Uses a smaller LM with chat adapter to extract structured data from the response of main LM - This adapter uses a common __call__ logic defined in base Adapter class. - This class is particularly useful when interacting with reasoning models as the main LM since reasoning models - are known to struggle with structured outputs. - - Example: - ``` - import dspy - lm = dspy.LM(model="openai/o3-mini", max_tokens=10000, temperature = 1.0) - adapter = dspy.TwoStepAdapter(dspy.LM("openai/gpt-4o-mini")) - dspy.configure(lm=lm, adapter=adapter) - program = dspy.ChainOfThought("question->answer") - result = program("What is the capital of France?") - print(result) - ``` - """ - def __init__(self, extraction_model: LM) -> None: - ... - - def format(self, signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: - """ - Format a prompt for the first stage with the main LM. - This no specific structure is required for the main LM, we customize the format method - instead of format_field_description or format_field_structure. - - Args: - signature: The signature of the original task - demos: A list of demo examples - inputs: The current input - - Returns: - A list of messages to be passed to the main LM. - """ - ... - - def parse(self, signature: Signature, completion: str) -> dict[str, Any]: - """ - Use a smaller LM (extraction_model) with chat adapter to extract structured data - from the raw completion text of the main LM. - - Args: - signature: The signature of the original task - completion: The completion from the main LM - - Returns: - A dictionary containing the extracted structured data. - """ - ... - - async def acall(self, lm: LM, lm_kwargs: dict[str, Any], signature: Type[Signature], demos: list[dict[str, Any]], inputs: dict[str, Any]) -> list[dict[str, Any]]: - ... - - def format_task_description(self, signature: Signature) -> str: - """Create a description of the task based on the signature""" - ... - - def format_user_message_content(self, signature: Type[Signature], inputs: dict[str, Any], prefix: str = ..., suffix: str = ...) -> str: - ... - - def format_assistant_message_content(self, signature: Type[Signature], outputs: dict[str, Any], missing_field_message: Optional[str] = ...) -> str: - ... - - - diff --git a/typings/dspy/adapters/types/__init__.pyi b/typings/dspy/adapters/types/__init__.pyi deleted file mode 100644 index 9e00508..0000000 --- a/typings/dspy/adapters/types/__init__.pyi +++ /dev/null @@ -1,11 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.adapters.types.audio import Audio -from dspy.adapters.types.base_type import BaseType -from dspy.adapters.types.history import History -from dspy.adapters.types.image import Image -from dspy.adapters.types.tool import Tool, ToolCalls - -__all__ = ["History", "Image", "Audio", "BaseType", "Tool", "ToolCalls"] diff --git a/typings/dspy/adapters/types/audio.pyi b/typings/dspy/adapters/types/audio.pyi deleted file mode 100644 index d21adc1..0000000 --- a/typings/dspy/adapters/types/audio.pyi +++ /dev/null @@ -1,61 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import pydantic -from typing import Any, Union -from dspy.adapters.types.base_type import BaseType - -SF_AVAILABLE = ... -class Audio(BaseType): - data: str - audio_format: str - model_config = ... - def format(self) -> list[dict[str, Any]]: - ... - - @pydantic.model_validator(mode="before") - @classmethod - def validate_input(cls, values: Any) -> Any: - """ - Validate input for Audio, expecting 'data' and 'audio_format' keys in dictionary. - """ - ... - - @classmethod - def from_url(cls, url: str) -> Audio: - """ - Download an audio file from URL and encode it as base64. - """ - ... - - @classmethod - def from_file(cls, file_path: str) -> Audio: - """ - Read local audio file and encode it as base64. - """ - ... - - @classmethod - def from_array(cls, array: Any, sampling_rate: int, format: str = ...) -> Audio: - """ - Process numpy-like array and encode it as base64. Uses sampling rate and audio format for encoding. - """ - ... - - def __str__(self) -> str: - ... - - def __repr__(self) -> str: - ... - - - -def encode_audio(audio: Union[str, bytes, dict, Audio, Any], sampling_rate: int = ..., format: str = ...) -> dict: - """ - Encode audio to a dict with 'data' and 'audio_format'. - - Accepts: local file path, URL, data URI, dict, Audio instance, numpy array, or bytes (with known format). - """ - ... - diff --git a/typings/dspy/adapters/types/base_type.pyi b/typings/dspy/adapters/types/base_type.pyi deleted file mode 100644 index 19ab9ad..0000000 --- a/typings/dspy/adapters/types/base_type.pyi +++ /dev/null @@ -1,76 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import pydantic -from typing import Any, Union - -CUSTOM_TYPE_START_IDENTIFIER = ... -CUSTOM_TYPE_END_IDENTIFIER = ... -class BaseType(pydantic.BaseModel): - """Base class to support creating custom types for DSPy signatures. - - This is the parent class of DSPy custom types, e.g, dspy.Image. Subclasses must implement the `format` method to - return a list of dictionaries (same as the Array of content parts in the OpenAI API user message's content field). - - Example: - - ```python - class Image(BaseType): - url: str - - def format(self) -> list[dict[str, Any]]: - return [{"type": "image_url", "image_url": {"url": self.url}}] - ``` - """ - def format(self) -> Union[list[dict[str, Any]], str]: - ... - - @classmethod - def description(cls) -> str: - """Description of the custom type""" - ... - - @classmethod - def extract_custom_type_from_annotation(cls, annotation): # -> list[type[Self]] | list[Any]: - """Extract all custom types from the annotation. - - This is used to extract all custom types from the annotation of a field, while the annotation can - have arbitrary level of nesting. For example, we detect `Tool` is in `list[dict[str, Tool]]`. - """ - ... - - @pydantic.model_serializer() - def serialize_model(self): # -> str: - ... - - - -def split_message_content_for_custom_types(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: - """Split user message content into a list of content blocks. - - This method splits each user message's content in the `messages` list to be a list of content block, so that - the custom types like `dspy.Image` can be properly formatted for better quality. For example, the split content - may look like below if the user message has a `dspy.Image` object: - - ``` - [ - {"type": "text", "text": "{text_before_image}"}, - {"type": "image_url", "image_url": {"url": "{image_url}"}}, - {"type": "text", "text": "{text_after_image}"}, - ] - ``` - - This is implemented by finding the `<>` and `<>` - in the user message content and splitting the content around them. The `<>` - and `<>` are the reserved identifiers for the custom types as in `dspy.BaseType`. - - Args: - messages: a list of messages sent to the LM. The format is the same as [OpenAI API's messages - format](https://platform.openai.com/docs/guides/chat-completions/response-format). - - Returns: - A list of messages with the content split into a list of content blocks around custom types content. - """ - ... - diff --git a/typings/dspy/adapters/types/history.pyi b/typings/dspy/adapters/types/history.pyi deleted file mode 100644 index 86256cd..0000000 --- a/typings/dspy/adapters/types/history.pyi +++ /dev/null @@ -1,65 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import pydantic -from typing import Any - -class History(pydantic.BaseModel): - """Class representing the conversation history. - - The conversation history is a list of messages, each message entity should have keys from the associated signature. - For example, if you have the following signature: - - ``` - class MySignature(dspy.Signature): - question: str = dspy.InputField() - history: dspy.History = dspy.InputField() - answer: str = dspy.OutputField() - ``` - - Then the history should be a list of dictionaries with keys "question" and "answer". - - Example: - ``` - import dspy - - dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini")) - - class MySignature(dspy.Signature): - question: str = dspy.InputField() - history: dspy.History = dspy.InputField() - answer: str = dspy.OutputField() - - history = dspy.History( - messages=[ - {"question": "What is the capital of France?", "answer": "Paris"}, - {"question": "What is the capital of Germany?", "answer": "Berlin"}, - ] - ) - - predict = dspy.Predict(MySignature) - outputs = predict(question="What is the capital of France?", history=history) - ``` - - Example of capturing the conversation history: - ``` - import dspy - - dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini")) - - class MySignature(dspy.Signature): - question: str = dspy.InputField() - history: dspy.History = dspy.InputField() - answer: str = dspy.OutputField() - - predict = dspy.Predict(MySignature) - outputs = predict(question="What is the capital of France?") - history = dspy.History(messages=[{"question": "What is the capital of France?", **outputs}]) - outputs_with_history = predict(question="Are you sure?", history=history) - ``` - """ - messages: list[dict[str, Any]] - model_config = ... - - diff --git a/typings/dspy/adapters/types/image.pyi b/typings/dspy/adapters/types/image.pyi deleted file mode 100644 index 5c4140e..0000000 --- a/typings/dspy/adapters/types/image.pyi +++ /dev/null @@ -1,65 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import pydantic -from typing import Any, Union -from dspy.adapters.types.base_type import BaseType -from PIL import Image as PILImage - -PIL_AVAILABLE = ... -class Image(BaseType): - url: str - model_config = ... - def format(self) -> Union[list[dict[str, Any]], str]: - ... - - @pydantic.model_validator(mode="before") - @classmethod - def validate_input(cls, values): # -> dict[str, str] | dict[Any, Any] | dict[str, Any]: - ... - - @classmethod - def from_url(cls, url: str, download: bool = ...): # -> Self: - ... - - @classmethod - def from_file(cls, file_path: str): # -> Self: - ... - - @classmethod - def from_PIL(cls, pil_image): # -> Self: - ... - - def __str__(self) -> str: - ... - - def __repr__(self): # -> str: - ... - - - -def is_url(string: str) -> bool: - """Check if a string is a valid URL.""" - ... - -def encode_image(image: Union[str, bytes, PILImage.Image, dict], download_images: bool = ...) -> str: - """ - Encode an image or file to a base64 data URI. - - Args: - image: The image or file to encode. Can be a PIL Image, file path, URL, or data URI. - download_images: Whether to download images from URLs. - - Returns: - str: The data URI of the file or the URL if download_images is False. - - Raises: - ValueError: If the file type is not supported. - """ - ... - -def is_image(obj) -> bool: - """Check if the object is an image or a valid media file reference.""" - ... - diff --git a/typings/dspy/adapters/types/tool.pyi b/typings/dspy/adapters/types/tool.pyi deleted file mode 100644 index 18ab60c..0000000 --- a/typings/dspy/adapters/types/tool.pyi +++ /dev/null @@ -1,175 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import mcp -from typing import Any, Callable, Optional, TYPE_CHECKING, Tuple, Type -from pydantic import BaseModel -from dspy.adapters.types.base_type import BaseType -from dspy.utils.callback import with_callbacks -from langchain.tools import BaseTool - -if TYPE_CHECKING: - ... -_TYPE_MAPPING = ... -class Tool(BaseType): - """Tool class. - - This class is used to simplify the creation of tools for tool calling (function calling) in LLMs. Only supports - functions for now. - """ - func: Callable - name: Optional[str] = ... - desc: Optional[str] = ... - args: Optional[dict[str, Any]] = ... - arg_types: Optional[dict[str, Any]] = ... - arg_desc: Optional[dict[str, str]] = ... - has_kwargs: bool = ... - def __init__(self, func: Callable, name: Optional[str] = ..., desc: Optional[str] = ..., args: Optional[dict[str, Any]] = ..., arg_types: Optional[dict[str, Any]] = ..., arg_desc: Optional[dict[str, str]] = ...) -> None: - """Initialize the Tool class. - - Users can choose to specify the `name`, `desc`, `args`, and `arg_types`, or let the `dspy.Tool` - automatically infer the values from the function. For values that are specified by the user, automatic inference - will not be performed on them. - - Args: - func (Callable): The actual function that is being wrapped by the tool. - name (Optional[str], optional): The name of the tool. Defaults to None. - desc (Optional[str], optional): The description of the tool. Defaults to None. - args (Optional[dict[str, Any]], optional): The args and their schema of the tool, represented as a - dictionary from arg name to arg's json schema. Defaults to None. - arg_types (Optional[dict[str, Any]], optional): The argument types of the tool, represented as a dictionary - from arg name to the type of the argument. Defaults to None. - arg_desc (Optional[dict[str, str]], optional): Descriptions for each arg, represented as a - dictionary from arg name to description string. Defaults to None. - - Example: - - ```python - def foo(x: int, y: str = "hello"): - return str(x) + y - - tool = Tool(foo) - print(tool.args) - # Expected output: {'x': {'type': 'integer'}, 'y': {'type': 'string', 'default': 'hello'}} - ``` - """ - ... - - def format(self): # -> str: - ... - - def format_as_litellm_function_call(self): # -> dict[str, str | dict[str, str | dict[str, str | dict[str, Any] | list[str] | None] | None]]: - ... - - @with_callbacks - def __call__(self, **kwargs): # -> Any: - ... - - @with_callbacks - async def acall(self, **kwargs): # -> Any: - ... - - @classmethod - def from_mcp_tool(cls, session: mcp.client.session.ClientSession, tool: mcp.types.Tool) -> Tool: - """ - Build a DSPy tool from an MCP tool and a ClientSession. - - Args: - session: The MCP session to use. - tool: The MCP tool to convert. - - Returns: - A Tool object. - """ - ... - - @classmethod - def from_langchain(cls, tool: BaseTool) -> Tool: - """ - Build a DSPy tool from a LangChain tool. - - Args: - tool: The LangChain tool to convert. - - Returns: - A Tool object. - - Example: - - ```python - import asyncio - import dspy - from langchain.tools import tool as lc_tool - - @lc_tool - def add(x: int, y: int): - "Add two numbers together." - return x + y - - dspy_tool = dspy.Tool.from_langchain(add) - - async def run_tool(): - return await dspy_tool.acall(x=1, y=2) - - print(asyncio.run(run_tool())) - # 3 - ``` - """ - ... - - def __repr__(self): # -> str: - ... - - def __str__(self) -> str: - ... - - - -class ToolCalls(BaseType): - class ToolCall(BaseModel): - name: str - args: dict[str, Any] - ... - - - tool_calls: list[ToolCall] - @classmethod - def from_dict_list(cls, tool_calls_dicts: list[dict[str, Any]]) -> ToolCalls: - """Convert a list of dictionaries to a ToolCalls instance. - - Args: - dict_list: A list of dictionaries, where each dictionary should have 'name' and 'args' keys. - - Returns: - A ToolCalls instance. - - Example: - - ```python - tool_calls_dict = [ - {"name": "search", "args": {"query": "hello"}}, - {"name": "translate", "args": {"text": "world"}} - ] - tool_calls = ToolCalls.from_dict_list(tool_calls_dict) - ``` - """ - ... - - @classmethod - def description(cls) -> str: - ... - - - -def convert_input_schema_to_tool_args(schema: dict[str, Any]) -> Tuple[dict[str, Any], dict[str, Type], dict[str, str]]: - """Convert an input json schema to tool arguments compatible with DSPy Tool. - - Args: - schema: An input json schema describing the tool's input parameters - - Returns: - A tuple of (args, arg_types, arg_desc) for DSPy Tool definition. - """ - ... - diff --git a/typings/dspy/adapters/utils.pyi b/typings/dspy/adapters/utils.pyi deleted file mode 100644 index 36b9a99..0000000 --- a/typings/dspy/adapters/utils.pyi +++ /dev/null @@ -1,57 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Union -from pydantic.fields import FieldInfo - -def serialize_for_json(value: Any) -> Any: - """ - Formats the specified value so that it can be serialized as a JSON string. - - Args: - value: The value to format as a JSON string. - Returns: - The formatted value, which is serializable as a JSON string. - """ - ... - -def format_field_value(field_info: FieldInfo, value: Any, assume_text=...) -> Union[str, dict]: - """ - Formats the value of the specified field according to the field's DSPy type (input or output), - annotation (e.g. str, int, etc.), and the type of the value itself. - - Args: - field_info: Information about the field, including its DSPy field type and annotation. - value: The value of the field. - Returns: - The formatted value of the field, represented as a string. - """ - ... - -def translate_field_type(field_name, field_info): # -> str: - ... - -def find_enum_member(enum, identifier): - """ - Finds the enum member corresponding to the specified identifier, which may be the - enum member's name or value. - - Args: - enum: The enum to search for the member. - identifier: If the enum is explicitly-valued, this is the value of the enum member to find. - If the enum is auto-valued, this is the name of the enum member to find. - Returns: - The enum member corresponding to the specified identifier. - """ - ... - -def parse_value(value, annotation): # -> str | EnumMeta: - ... - -def get_annotation_name(annotation): # -> str: - ... - -def get_field_description_string(fields: dict) -> str: - ... - diff --git a/typings/dspy/adapters/xml_adapter.pyi b/typings/dspy/adapters/xml_adapter.pyi deleted file mode 100644 index da4fb63..0000000 --- a/typings/dspy/adapters/xml_adapter.pyi +++ /dev/null @@ -1,24 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Dict, Optional, Type -from dspy.adapters.chat_adapter import ChatAdapter, FieldInfoWithName -from dspy.signatures.signature import Signature -from dspy.utils.callback import BaseCallback - -class XMLAdapter(ChatAdapter): - def __init__(self, callbacks: Optional[list[BaseCallback]] = ...) -> None: - ... - - def format_field_with_value(self, fields_with_values: Dict[FieldInfoWithName, Any]) -> str: - ... - - def user_message_output_requirements(self, signature: Type[Signature]) -> str: - ... - - def parse(self, signature: Type[Signature], completion: str) -> dict[str, Any]: - ... - - - diff --git a/typings/dspy/clients/__init__.pyi b/typings/dspy/clients/__init__.pyi deleted file mode 100644 index 9ee8265..0000000 --- a/typings/dspy/clients/__init__.pyi +++ /dev/null @@ -1,42 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import logging -import os -import litellm -from pathlib import Path -from typing import Optional -from litellm.caching.caching import Cache as LitellmCache -from dspy.clients.base_lm import BaseLM, inspect_history -from dspy.clients.cache import Cache -from dspy.clients.embedding import Embedder -from dspy.clients.lm import LM -from dspy.clients.provider import Provider, TrainingJob - -logger = ... -DISK_CACHE_DIR = ... -DISK_CACHE_LIMIT = ... -def configure_cache(enable_disk_cache: Optional[bool] = ..., enable_memory_cache: Optional[bool] = ..., disk_cache_dir: Optional[str] = ..., disk_size_limit_bytes: Optional[int] = ..., memory_max_entries: Optional[int] = ..., enable_litellm_cache: bool = ...): # -> None: - """Configure the cache for DSPy. - - Args: - enable_disk_cache: Whether to enable on-disk cache. - enable_memory_cache: Whether to enable in-memory cache. - disk_cache_dir: The directory to store the on-disk cache. - disk_size_limit_bytes: The size limit of the on-disk cache. - memory_max_entries: The maximum number of entries in the in-memory cache. - enable_litellm_cache: Whether to enable LiteLLM cache. - """ - ... - -DSPY_CACHE = ... -if "LITELLM_LOCAL_MODEL_COST_MAP" not in os.environ: - ... -def enable_litellm_logging(): # -> None: - ... - -def disable_litellm_logging(): # -> None: - ... - -__all__ = ["BaseLM", "LM", "Provider", "TrainingJob", "inspect_history", "Embedder", "enable_litellm_logging", "disable_litellm_logging", "configure_cache"] diff --git a/typings/dspy/clients/base_lm.pyi b/typings/dspy/clients/base_lm.pyi deleted file mode 100644 index 6cfa32a..0000000 --- a/typings/dspy/clients/base_lm.pyi +++ /dev/null @@ -1,82 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.utils.callback import with_callbacks - -MAX_HISTORY_SIZE = ... -GLOBAL_HISTORY = ... -class BaseLM: - """Base class for handling LLM calls. - - Most users can directly use the `dspy.LM` class, which is a subclass of `BaseLM`. Users can also implement their - own subclasses of `BaseLM` to support custom LLM providers and inject custom logic. To do so, simply override the - `forward` method and make sure the return format is identical to the - [OpenAI response format](https://platform.openai.com/docs/api-reference/responses/object). - - Example: - - ```python - from openai import OpenAI - - import dspy - - - class MyLM(dspy.BaseLM): - def forward(self, prompt, messages=None, **kwargs): - client = OpenAI() - return client.chat.completions.create( - model=self.model, - messages=messages or [{"role": "user", "content": prompt}], - **self.kwargs, - ) - - - lm = MyLM(model="gpt-4o-mini") - dspy.configure(lm=lm) - print(dspy.Predict("q->a")(q="Why did the chicken cross the kitchen?")) - ``` - """ - def __init__(self, model, model_type=..., temperature=..., max_tokens=..., cache=..., **kwargs) -> None: - ... - - @with_callbacks - def __call__(self, prompt=..., messages=..., **kwargs): # -> list[Any]: - ... - - @with_callbacks - async def acall(self, prompt=..., messages=..., **kwargs): # -> list[Any]: - ... - - def forward(self, prompt=..., messages=..., **kwargs): - """Forward pass for the language model. - - Subclasses must implement this method, and the response should be identical to - [OpenAI response format](https://platform.openai.com/docs/api-reference/responses/object). - """ - ... - - async def aforward(self, prompt=..., messages=..., **kwargs): - """Async forward pass for the language model. - - Subclasses that support async should implement this method, and the response should be identical to - [OpenAI response format](https://platform.openai.com/docs/api-reference/responses/object). - """ - ... - - def copy(self, **kwargs): # -> Self: - """Returns a copy of the language model with possibly updated parameters.""" - ... - - def inspect_history(self, n: int = ...): # -> None: - ... - - def update_global_history(self, entry): # -> None: - ... - - - -def inspect_history(n: int = ...): # -> None: - """The global history shared across all LMs.""" - ... - diff --git a/typings/dspy/clients/cache.pyi b/typings/dspy/clients/cache.pyi deleted file mode 100644 index a6e4853..0000000 --- a/typings/dspy/clients/cache.pyi +++ /dev/null @@ -1,67 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Dict, Optional - -logger = ... -class Cache: - """DSPy Cache - - `Cache` provides 2 levels of caching (in the given order): - 1. In-memory cache - implemented with cachetools.LRUCache - 2. On-disk cache - implemented with diskcache.FanoutCache - """ - def __init__(self, enable_disk_cache: bool, enable_memory_cache: bool, disk_cache_dir: str, disk_size_limit_bytes: Optional[int] = ..., memory_max_entries: Optional[int] = ...) -> None: - """ - Args: - enable_disk_cache: Whether to enable on-disk cache. - enable_memory_cache: Whether to enable in-memory cache. - disk_cache_dir: The directory where the disk cache is stored. - disk_size_limit_bytes: The maximum size of the disk cache (in bytes). - memory_max_entries: The maximum size of the in-memory cache (in number of items). - """ - ... - - def __contains__(self, key: str) -> bool: - """Check if a key is in the cache.""" - ... - - def cache_key(self, request: Dict[str, Any], ignored_args_for_cache_key: Optional[list[str]] = ...) -> str: - """ - Obtain a unique cache key for the given request dictionary by hashing its JSON - representation. For request fields having types that are known to be JSON-incompatible, - convert them to a JSON-serializable format before hashing. - """ - ... - - def get(self, request: Dict[str, Any], ignored_args_for_cache_key: Optional[list[str]] = ...) -> Any: - ... - - def put(self, request: Dict[str, Any], value: Any, ignored_args_for_cache_key: Optional[list[str]] = ..., enable_memory_cache: bool = ...) -> None: - ... - - def reset_memory_cache(self) -> None: - ... - - def save_memory_cache(self, filepath: str) -> None: - ... - - def load_memory_cache(self, filepath: str) -> None: - ... - - - -def request_cache(cache_arg_name: Optional[str] = ..., ignored_args_for_cache_key: Optional[list[str]] = ..., enable_memory_cache: bool = ..., *, maxsize: Optional[int] = ...): # -> Callable[..., _Wrapped[..., Any, ..., CoroutineType[Any, Any, Any]] | _Wrapped[..., Any, ..., Any]]: - """ - Decorator for applying caching to a function based on the request argument. - - Args: - cache_arg_name: The name of the argument that contains the request. If not provided, the entire kwargs is used - as the request. - ignored_args_for_cache_key: A list of arguments to ignore when computing the cache key from the request. - enable_memory_cache: Whether to enable in-memory cache at call time. If False, the memory cache will not be - written to on new data. - """ - ... - diff --git a/typings/dspy/clients/databricks.pyi b/typings/dspy/clients/databricks.pyi deleted file mode 100644 index 264de31..0000000 --- a/typings/dspy/clients/databricks.pyi +++ /dev/null @@ -1,41 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union -from dspy.clients.provider import Provider, TrainingJob -from dspy.clients.utils_finetune import TrainDataFormat - -if TYPE_CHECKING: - ... -logger = ... -class TrainingJobDatabricks(TrainingJob): - def __init__(self, finetuning_run=..., *args, **kwargs) -> None: - ... - - def status(self): # -> None: - ... - - - -class DatabricksProvider(Provider): - finetunable = ... - TrainingJob = TrainingJobDatabricks - @staticmethod - def is_provider_model(model: str) -> bool: - ... - - @staticmethod - def deploy_finetuned_model(model: str, data_format: Optional[TrainDataFormat] = ..., databricks_host: Optional[str] = ..., databricks_token: Optional[str] = ..., deploy_timeout: int = ...): # -> None: - ... - - @staticmethod - def finetune(job: TrainingJobDatabricks, model: str, train_data: List[Dict[str, Any]], train_data_format: Optional[Union[TrainDataFormat, str]] = ..., train_kwargs: Optional[Dict[str, Any]] = ...) -> str: - ... - - @staticmethod - def upload_data(train_data: List[Dict[str, Any]], databricks_unity_catalog_path: str, data_format: TrainDataFormat): # -> str: - ... - - - diff --git a/typings/dspy/clients/embedding.pyi b/typings/dspy/clients/embedding.pyi deleted file mode 100644 index 29162a0..0000000 --- a/typings/dspy/clients/embedding.pyi +++ /dev/null @@ -1,102 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import numpy as np -from typing import Any, Callable, Optional, Union - -class Embedder: - """DSPy embedding class. - - The class for computing embeddings for text inputs. This class provides a unified interface for both: - - 1. Hosted embedding models (e.g. OpenAI's text-embedding-3-small) via litellm integration - 2. Custom embedding functions that you provide - - For hosted models, simply pass the model name as a string (e.g., "openai/text-embedding-3-small"). The class will use - litellm to handle the API calls and caching. - - For custom embedding models, pass a callable function that: - - Takes a list of strings as input. - - Returns embeddings as either: - - A 2D numpy array of float32 values - - A 2D list of float32 values - - Each row should represent one embedding vector - - Args: - model: The embedding model to use. This can be either a string (representing the name of the hosted embedding - model, must be an embedding model supported by litellm) or a callable that represents a custom embedding - model. - batch_size (int, optional): The default batch size for processing inputs in batches. Defaults to 200. - caching (bool, optional): Whether to cache the embedding response when using a hosted model. Defaults to True. - **kwargs: Additional default keyword arguments to pass to the embedding model. - - Examples: - Example 1: Using a hosted model. - - ```python - import dspy - - embedder = dspy.Embedder("openai/text-embedding-3-small", batch_size=100) - embeddings = embedder(["hello", "world"]) - - assert embeddings.shape == (2, 1536) - ``` - - Example 2: Using any local embedding model, e.g. from https://huggingface.co/models?library=sentence-transformers. - - ```python - # pip install sentence_transformers - import dspy - from sentence_transformers import SentenceTransformer - - # Load an extremely efficient local model for retrieval - model = SentenceTransformer("sentence-transformers/static-retrieval-mrl-en-v1", device="cpu") - - embedder = dspy.Embedder(model.encode) - embeddings = embedder(["hello", "world"], batch_size=1) - - assert embeddings.shape == (2, 1024) - ``` - - Example 3: Using a custom function. - - ```python - import dspy - import numpy as np - - def my_embedder(texts): - return np.random.rand(len(texts), 10) - - embedder = dspy.Embedder(my_embedder) - embeddings = embedder(["hello", "world"], batch_size=1) - - assert embeddings.shape == (2, 10) - ``` - """ - def __init__(self, model: Union[str, Callable], batch_size: int = ..., caching: bool = ..., **kwargs: dict[str, Any]) -> None: - ... - - def __call__(self, inputs: Union[str, list[str]], batch_size: Optional[int] = ..., caching: Optional[bool] = ..., **kwargs: dict[str, Any]) -> np.ndarray: - """Compute embeddings for the given inputs. - - Args: - inputs: The inputs to compute embeddings for, can be a single string or a list of strings. - batch_size (int, optional): The batch size for processing inputs. If None, defaults to the batch_size set - during initialization. - caching (bool, optional): Whether to cache the embedding response when using a hosted model. If None, - defaults to the caching setting from initialization. - kwargs: Additional keyword arguments to pass to the embedding model. These will override the default - kwargs provided during initialization. - - Returns: - numpy.ndarray: If the input is a single string, returns a 1D numpy array representing the embedding. - If the input is a list of strings, returns a 2D numpy array of embeddings, one embedding per row. - """ - ... - - async def acall(self, inputs, batch_size=..., caching=..., **kwargs): # -> Any | NDArray[floating[_32Bit]]: - ... - - - diff --git a/typings/dspy/clients/lm.pyi b/typings/dspy/clients/lm.pyi deleted file mode 100644 index ff26701..0000000 --- a/typings/dspy/clients/lm.pyi +++ /dev/null @@ -1,76 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Dict, List, Literal, Optional -from dspy.clients.provider import Provider, ReinforceJob, TrainingJob -from dspy.clients.utils_finetune import TrainDataFormat -from dspy.utils.callback import BaseCallback -from .base_lm import BaseLM - -logger = ... -class LM(BaseLM): - """ - A language model supporting chat or text completion requests for use with DSPy modules. - """ - def __init__(self, model: str, model_type: Literal["chat", "text"] = ..., temperature: float = ..., max_tokens: int = ..., cache: bool = ..., cache_in_memory: bool = ..., callbacks: Optional[List[BaseCallback]] = ..., num_retries: int = ..., provider: Optional[Provider] = ..., finetuning_model: Optional[str] = ..., launch_kwargs: Optional[dict[str, Any]] = ..., train_kwargs: Optional[dict[str, Any]] = ..., **kwargs) -> None: - """ - Create a new language model instance for use with DSPy modules and programs. - - Args: - model: The model to use. This should be a string of the form ``"llm_provider/llm_name"`` - supported by LiteLLM. For example, ``"openai/gpt-4o"``. - model_type: The type of the model, either ``"chat"`` or ``"text"``. - temperature: The sampling temperature to use when generating responses. - max_tokens: The maximum number of tokens to generate per response. - cache: Whether to cache the model responses for reuse to improve performance - and reduce costs. - cache_in_memory (deprecated): To enable additional caching with LRU in memory. - callbacks: A list of callback functions to run before and after each request. - num_retries: The number of times to retry a request if it fails transiently due to - network error, rate limiting, etc. Requests are retried with exponential - backoff. - provider: The provider to use. If not specified, the provider will be inferred from the model. - finetuning_model: The model to finetune. In some providers, the models available for finetuning is different - from the models available for inference. - """ - ... - - def forward(self, prompt=..., messages=..., **kwargs): # -> Any | CoroutineType[Any, Any, Any]: - ... - - async def aforward(self, prompt=..., messages=..., **kwargs): # -> Any: - ... - - def launch(self, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: - ... - - def kill(self, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: - ... - - def finetune(self, train_data: List[Dict[str, Any]], train_data_format: Optional[TrainDataFormat], train_kwargs: Optional[Dict[str, Any]] = ...) -> TrainingJob: - ... - - def reinforce(self, train_kwargs) -> ReinforceJob: - ... - - def infer_provider(self) -> Provider: - ... - - def dump_state(self): # -> dict[str, Any]: - ... - - - -def litellm_completion(request: Dict[str, Any], num_retries: int, cache: Optional[Dict[str, Any]] = ...): # -> ModelResponse | CustomStreamWrapper | TextCompletionResponse | CoroutineType[Any, Any, ModelResponse | TextCompletionResponse | None] | None: - ... - -def litellm_text_completion(request: Dict[str, Any], num_retries: int, cache: Optional[Dict[str, Any]] = ...): # -> TextCompletionResponse | ModelResponse | CustomStreamWrapper | TextCompletionStreamWrapper | | : - ... - -async def alitellm_completion(request: Dict[str, Any], num_retries: int, cache: Optional[Dict[str, Any]] = ...): # -> ModelResponse | CustomStreamWrapper | TextCompletionResponse | None: - ... - -async def alitellm_text_completion(request: Dict[str, Any], num_retries: int, cache: Optional[Dict[str, Any]] = ...): # -> TextCompletionResponse | TextCompletionStreamWrapper: - ... - diff --git a/typings/dspy/clients/lm_local.pyi b/typings/dspy/clients/lm_local.pyi deleted file mode 100644 index 31f3cee..0000000 --- a/typings/dspy/clients/lm_local.pyi +++ /dev/null @@ -1,62 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Dict, List, Optional, TYPE_CHECKING -from dspy.clients.provider import Provider, TrainingJob -from dspy.clients.utils_finetune import TrainDataFormat -from dspy.clients.lm import LM - -if TYPE_CHECKING: - ... -logger = ... -class LocalProvider(Provider): - def __init__(self) -> None: - ... - - @staticmethod - def launch(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: - ... - - @staticmethod - def kill(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: - ... - - @staticmethod - def finetune(job: TrainingJob, model: str, train_data: List[Dict[str, Any]], train_data_format: Optional[TrainDataFormat], train_kwargs: Optional[Dict[str, Any]] = ...) -> str: - ... - - - -def create_output_dir(model_name, data_path): - ... - -def train_sft_locally(model_name, train_data, train_kwargs): - ... - -def get_free_port() -> int: - """ - Return a free TCP port on localhost. - """ - ... - -def wait_for_server(base_url: str, timeout: Optional[int] = ...) -> None: - """ - Wait for the server to be ready by polling the /v1/models endpoint. - - Args: - base_url: The base URL of the server (e.g. http://localhost:1234) - timeout: Maximum time to wait in seconds. None means wait forever. - """ - ... - -def encode_sft_example(example, tokenizer, max_seq_length): # -> dict[str, Any]: - """ - This function encodes a single example into a format that can be used for sft training. - Here, we assume each example has a 'messages' field. Each message in it is a dict with 'role' and 'content' fields. - We use the `apply_chat_template` function from the tokenizer to tokenize the messages and prepare the input and label tensors. - - Code obtained from the allenai/open-instruct repository: https://github.com/allenai/open-instruct/blob/4365dea3d1a6111e8b2712af06b22a4512a0df88/open_instruct/finetune.py - """ - ... - diff --git a/typings/dspy/clients/lm_local_arbor.pyi b/typings/dspy/clients/lm_local_arbor.pyi deleted file mode 100644 index 2cd53e0..0000000 --- a/typings/dspy/clients/lm_local_arbor.pyi +++ /dev/null @@ -1,103 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Dict, List, Optional, TYPE_CHECKING, TypedDict, Union -from dspy.clients.provider import Provider, ReinforceJob, TrainingJob -from dspy.clients.utils_finetune import GRPOGroup, TrainDataFormat, TrainingStatus -from dspy.clients.lm import LM - -if TYPE_CHECKING: - ... -class GRPOTrainKwargs(TypedDict): - num_generations: int - ... - - -class ArborTrainingJob(TrainingJob): - def __init__(self, *args, **kwargs) -> None: - ... - - def cancel(self): # -> None: - ... - - def status(self) -> TrainingStatus: - ... - - - -class ArborReinforceJob(ReinforceJob): - DEFAULT_TRAIN_KWARGS = ... - def __init__(self, lm: LM, train_kwargs: GRPOTrainKwargs) -> None: - ... - - def initialize(self): # -> None: - ... - - def step(self, train_data: List[GRPOGroup], train_data_format: Optional[Union[TrainDataFormat, str]]): # -> None: - ... - - def save_checkpoint(self, checkpoint_name: str, score: Optional[float] = ...): # -> None: - ... - - def terminate(self): # -> None: - ... - - def cancel(self): # -> None: - ... - - def status(self) -> TrainingStatus: - ... - - - -class ArborProvider(Provider): - def __init__(self) -> None: - ... - - @staticmethod - def launch(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: - ... - - @staticmethod - def kill(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: - ... - - @staticmethod - def finetune(job: ArborTrainingJob, model: str, train_data: List[Dict[str, Any]], train_data_format: Optional[TrainDataFormat], train_kwargs: Optional[Dict[str, Any]] = ...) -> str: - ... - - @staticmethod - def does_job_exist(job_id: str, training_kwargs: Dict[str, Any]) -> bool: - ... - - @staticmethod - def does_file_exist(file_id: str, training_kwargs: Dict[str, Any]) -> bool: - ... - - @staticmethod - def is_terminal_training_status(status: TrainingStatus) -> bool: - ... - - @staticmethod - def get_training_status(job_id: str, training_kwargs: Dict[str, Any]) -> TrainingStatus: - ... - - @staticmethod - def validate_data_format(data_format: TrainDataFormat): # -> None: - ... - - @staticmethod - def upload_data(data_path: str, training_kwargs: Dict[str, Any]) -> str: - ... - - @staticmethod - def wait_for_job(job: TrainingJob, training_kwargs: Dict[str, Any], poll_frequency: int = ...): # -> None: - ... - - @staticmethod - def get_trained_model(job, training_kwargs: Dict[str, Any]): # -> str | None: - ... - - - diff --git a/typings/dspy/clients/openai.pyi b/typings/dspy/clients/openai.pyi deleted file mode 100644 index d4a7b5d..0000000 --- a/typings/dspy/clients/openai.pyi +++ /dev/null @@ -1,67 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Dict, List, Optional -from dspy.clients.provider import Provider, TrainingJob -from dspy.clients.utils_finetune import TrainDataFormat, TrainingStatus - -_OPENAI_MODELS = ... -class TrainingJobOpenAI(TrainingJob): - def __init__(self, *args, **kwargs) -> None: - ... - - def cancel(self): # -> None: - ... - - def status(self) -> TrainingStatus: - ... - - - -class OpenAIProvider(Provider): - def __init__(self) -> None: - ... - - @staticmethod - def is_provider_model(model: str) -> bool: - ... - - @staticmethod - def finetune(job: TrainingJobOpenAI, model: str, train_data: List[Dict[str, Any]], train_data_format: Optional[TrainDataFormat], train_kwargs: Optional[Dict[str, Any]] = ...) -> str: - ... - - @staticmethod - def does_job_exist(job_id: str) -> bool: - ... - - @staticmethod - def does_file_exist(file_id: str) -> bool: - ... - - @staticmethod - def is_terminal_training_status(status: TrainingStatus) -> bool: - ... - - @staticmethod - def get_training_status(job_id: str) -> TrainingStatus: - ... - - @staticmethod - def validate_data_format(data_format: TrainDataFormat): # -> None: - ... - - @staticmethod - def upload_data(data_path: str) -> str: - ... - - @staticmethod - def wait_for_job(job: TrainingJobOpenAI, poll_frequency: int = ...): # -> None: - ... - - @staticmethod - def get_trained_model(job): # -> str | None: - ... - - - diff --git a/typings/dspy/clients/provider.pyi b/typings/dspy/clients/provider.pyi deleted file mode 100644 index 01818de..0000000 --- a/typings/dspy/clients/provider.pyi +++ /dev/null @@ -1,80 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from abc import abstractmethod -from concurrent.futures import Future -from threading import Thread -from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union -from dspy.clients.utils_finetune import TrainDataFormat -from dspy.clients.lm import LM - -if TYPE_CHECKING: - ... -class TrainingJob(Future): - def __init__(self, thread: Optional[Thread] = ..., model: Optional[str] = ..., train_data: Optional[List[Dict[str, Any]]] = ..., train_data_format: Optional[TrainDataFormat] = ..., train_kwargs: Optional[Dict[str, Any]] = ...) -> None: - ... - - def cancel(self): # -> None: - ... - - @abstractmethod - def status(self): - ... - - - -class ReinforceJob: - def __init__(self, lm: LM, train_kwargs: Optional[Dict[str, Any]] = ...) -> None: - ... - - @abstractmethod - def initialize(self): - ... - - @abstractmethod - def step(self, train_data: List[Dict[str, Any]], train_data_format: Optional[Union[TrainDataFormat, str]] = ...): - ... - - @abstractmethod - def terminate(self): - ... - - @abstractmethod - def update_model(self): - ... - - @abstractmethod - def save_checkpoint(self, checkpoint_name: str): - ... - - def cancel(self): - ... - - def status(self): - ... - - - -class Provider: - def __init__(self) -> None: - ... - - @staticmethod - def is_provider_model(model: str) -> bool: - ... - - @staticmethod - def launch(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: - ... - - @staticmethod - def kill(lm: LM, launch_kwargs: Optional[Dict[str, Any]] = ...): # -> None: - ... - - @staticmethod - def finetune(job: TrainingJob, model: str, train_data: List[Dict[str, Any]], train_data_format: Optional[Union[TrainDataFormat, str]], train_kwargs: Optional[Dict[str, Any]] = ...) -> str: - ... - - - diff --git a/typings/dspy/clients/utils_finetune.pyi b/typings/dspy/clients/utils_finetune.pyi deleted file mode 100644 index 1cfcccd..0000000 --- a/typings/dspy/clients/utils_finetune.pyi +++ /dev/null @@ -1,67 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from enum import Enum -from typing import Any, Dict, List, Literal, Optional, TypedDict, Union -from dspy.adapters.base import Adapter - -class TrainingStatus(str, Enum): - not_started = ... - pending = ... - running = ... - succeeded = ... - failed = ... - cancelled = ... - - -class TrainDataFormat(str, Enum): - CHAT = ... - COMPLETION = ... - GRPO_CHAT = ... - - -class Message(TypedDict): - role: Union[Literal["user"], Literal["assistant"], Literal["system"]] - content: str - ... - - -class MessageAssistant(TypedDict): - role: Literal["assistant"] - content: str - ... - - -class GRPOChatData(TypedDict): - messages: List[Message] - completion: MessageAssistant - reward: float - ... - - -GRPOGroup = List[GRPOChatData] -def infer_data_format(adapter: Adapter) -> str: - ... - -def get_finetune_directory() -> str: - ... - -def write_lines(file_path, data): # -> None: - ... - -def save_data(data: List[Dict[str, Any]]) -> str: - ... - -def validate_data_format(data: List[Dict[str, Any]], data_format: TrainDataFormat): # -> None: - ... - -def find_data_errors_completion(data_dict: Dict[str, str]) -> Optional[str]: - ... - -def find_data_error_chat(messages: Dict[str, Any]) -> Optional[str]: - ... - -def find_data_error_chat_message(message: Dict[str, Any]) -> Optional[str]: - ... - diff --git a/typings/dspy/datasets/__init__.pyi b/typings/dspy/datasets/__init__.pyi deleted file mode 100644 index 22ac983..0000000 --- a/typings/dspy/datasets/__init__.pyi +++ /dev/null @@ -1,12 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.datasets.alfworld import AlfWorld -from dspy.datasets.colors import Colors -from dspy.datasets.dataloader import DataLoader -from dspy.datasets.dataset import Dataset -from dspy.datasets.hotpotqa import HotPotQA -from dspy.datasets.math import MATH - -__all__ = ["Colors", "DataLoader", "Dataset", "HotPotQA", "MATH"] diff --git a/typings/dspy/datasets/alfworld/__init__.pyi b/typings/dspy/datasets/alfworld/__init__.pyi deleted file mode 100644 index 406446d..0000000 --- a/typings/dspy/datasets/alfworld/__init__.pyi +++ /dev/null @@ -1,6 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.datasets.alfworld.alfworld import AlfWorld - diff --git a/typings/dspy/datasets/alfworld/alfworld.pyi b/typings/dspy/datasets/alfworld/alfworld.pyi deleted file mode 100644 index 31b429f..0000000 --- a/typings/dspy/datasets/alfworld/alfworld.pyi +++ /dev/null @@ -1,64 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -def env_worker(inq, outq): # -> None: - """ - Worker process: creates a single AlfredTWEnv instance, - handles 'init' (with task idx) and 'step' (with action). - """ - ... - -class EnvPool: - """ - Pool of processes, each with a unique env_worker. - Acquire a worker using a context manager for safe usage: - with pool.session() as sess: - sess.init(5) # init with idx=5 - obs, rew, done, info = sess.step("go north") - ... - """ - def __init__(self, size=...) -> None: - ... - - def close_all(self): # -> None: - """Close all processes in the pool.""" - ... - - def session(self): # -> _EnvSession: - """Context manager that acquires/releases a single worker.""" - ... - - - -class _EnvSession: - """ - A context manager that acquires a worker from the pool, - provides .init(idx) and .step(action), then releases the worker. - """ - def __init__(self, pool: EnvPool) -> None: - ... - - def __enter__(self): # -> Self: - ... - - def __exit__(self, exc_type, exc_val, exc_tb): # -> None: - ... - - def init(self, idx): - ... - - def step(self, action): - ... - - - -class AlfWorld: - def __init__(self, max_threads=...) -> None: - ... - - def __del__(self): # -> None: - ... - - - diff --git a/typings/dspy/datasets/colors.pyi b/typings/dspy/datasets/colors.pyi deleted file mode 100644 index 7fa2279..0000000 --- a/typings/dspy/datasets/colors.pyi +++ /dev/null @@ -1,16 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.datasets.dataset import Dataset - -all_colors = ... -class Colors(Dataset): - def __init__(self, sort_by_suffix=..., *args, **kwargs) -> None: - ... - - def sorted_by_suffix(self, colors): # -> list[Any]: - ... - - - diff --git a/typings/dspy/datasets/dataloader.pyi b/typings/dspy/datasets/dataloader.pyi deleted file mode 100644 index b323967..0000000 --- a/typings/dspy/datasets/dataloader.pyi +++ /dev/null @@ -1,42 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -import pandas as pd -from collections.abc import Mapping -from typing import List, Optional, TYPE_CHECKING, Tuple, Union -from dspy.datasets.dataset import Dataset - -if TYPE_CHECKING: - ... -class DataLoader(Dataset): - def __init__(self) -> None: - ... - - def from_huggingface(self, dataset_name: str, *args, input_keys: Tuple[str] = ..., fields: Optional[Tuple[str]] = ..., **kwargs) -> Union[Mapping[str, List[dspy.Example]], List[dspy.Example]]: - ... - - def from_csv(self, file_path: str, fields: Optional[List[str]] = ..., input_keys: Tuple[str] = ...) -> List[dspy.Example]: - ... - - def from_pandas(self, df: pd.DataFrame, fields: Optional[List[str]] = ..., input_keys: tuple[str] = ...) -> list[dspy.Example]: - ... - - def from_json(self, file_path: str, fields: Optional[List[str]] = ..., input_keys: Tuple[str] = ...) -> List[dspy.Example]: - ... - - def from_parquet(self, file_path: str, fields: Optional[List[str]] = ..., input_keys: Tuple[str] = ...) -> List[dspy.Example]: - ... - - def from_rm(self, num_samples: int, fields: List[str], input_keys: List[str]) -> List[dspy.Example]: - ... - - def sample(self, dataset: List[dspy.Example], n: int, *args, **kwargs) -> List[dspy.Example]: - ... - - def train_test_split(self, dataset: List[dspy.Example], train_size: Union[int, float] = ..., test_size: Optional[Union[int, float]] = ..., random_state: Optional[int] = ...) -> Mapping[str, List[dspy.Example]]: - ... - - - diff --git a/typings/dspy/datasets/dataset.pyi b/typings/dspy/datasets/dataset.pyi deleted file mode 100644 index 6875cf8..0000000 --- a/typings/dspy/datasets/dataset.pyi +++ /dev/null @@ -1,29 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -class Dataset: - def __init__(self, train_seed=..., train_size=..., eval_seed=..., dev_size=..., test_size=..., input_keys=...) -> None: - ... - - def reset_seeds(self, train_seed=..., train_size=..., eval_seed=..., dev_size=..., test_size=...): # -> None: - ... - - @property - def train(self): # -> list[Any]: - ... - - @property - def dev(self): # -> list[Any]: - ... - - @property - def test(self): # -> list[Any]: - ... - - @classmethod - def prepare_by_seed(cls, train_seeds=..., train_size=..., dev_size=..., divide_eval_per_seed=..., eval_seed=..., **kwargs): # -> dotdict: - ... - - - diff --git a/typings/dspy/datasets/hotpotqa.pyi b/typings/dspy/datasets/hotpotqa.pyi deleted file mode 100644 index 35d9138..0000000 --- a/typings/dspy/datasets/hotpotqa.pyi +++ /dev/null @@ -1,15 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.datasets.dataset import Dataset - -class HotPotQA(Dataset): - def __init__(self, *args, only_hard_examples=..., keep_details=..., unofficial_dev=..., **kwargs) -> None: - ... - - - -if __name__ == "__main__": - data_args = ... - dataset = ... diff --git a/typings/dspy/datasets/math.pyi b/typings/dspy/datasets/math.pyi deleted file mode 100644 index 513cf83..0000000 --- a/typings/dspy/datasets/math.pyi +++ /dev/null @@ -1,16 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -class MATH: - def __init__(self, subset) -> None: - ... - - def metric(self, example, pred, trace=...): - ... - - - -def extract_answer(s): # -> str | None: - ... - diff --git a/typings/dspy/dsp/__init__.pyi b/typings/dspy/dsp/__init__.pyi deleted file mode 100644 index 006bc27..0000000 --- a/typings/dspy/dsp/__init__.pyi +++ /dev/null @@ -1,4 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - diff --git a/typings/dspy/dsp/colbertv2.pyi b/typings/dspy/dsp/colbertv2.pyi deleted file mode 100644 index 263f522..0000000 --- a/typings/dspy/dsp/colbertv2.pyi +++ /dev/null @@ -1,73 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, List, Optional, Union -from dspy.clients.cache import request_cache -from dspy.dsp.utils import dotdict - -class ColBERTv2: - """Wrapper for the ColBERTv2 Retrieval.""" - def __init__(self, url: str = ..., port: Optional[Union[str, int]] = ..., post_requests: bool = ...) -> None: - ... - - def __call__(self, query: str, k: int = ..., simplify: bool = ...) -> Union[list[str], list[dotdict]]: - ... - - - -@request_cache() -def colbertv2_get_request_v2(url: str, query: str, k: int): # -> list[dict[Any | str, Any]]: - ... - -@request_cache() -def colbertv2_get_request_v2_wrapped(*args, **kwargs): # -> list[dict[Any | str, Any]]: - ... - -colbertv2_get_request = ... -@request_cache() -def colbertv2_post_request_v2(url: str, query: str, k: int): # -> Any: - ... - -@request_cache() -def colbertv2_post_request_v2_wrapped(*args, **kwargs): # -> Any: - ... - -colbertv2_post_request = ... -class ColBERTv2RetrieverLocal: - def __init__(self, passages: List[str], colbert_config=..., load_only: bool = ...) -> None: - """Colbertv2 retriever module - - Args: - passages (List[str]): list of passages - colbert_config (ColBERTConfig, optional): colbert config for building and searching. Defaults to None. - load_only (bool, optional): whether to load the index or build and then load. Defaults to False. - """ - ... - - def build_index(self): # -> None: - ... - - def get_index(self): - ... - - def __call__(self, *args: Any, **kwargs: Any) -> Any: - ... - - def forward(self, query: str, k: int = ..., **kwargs): # -> list[Any]: - ... - - - -class ColBERTv2RerankerLocal: - def __init__(self, colbert_config=..., checkpoint: str = ...) -> None: - ... - - def __call__(self, *args: Any, **kwargs: Any) -> Any: - ... - - def forward(self, query: str, passages: Optional[List[str]] = ...): # -> NDArray[Any]: - ... - - - diff --git a/typings/dspy/dsp/utils/__init__.pyi b/typings/dspy/dsp/utils/__init__.pyi deleted file mode 100644 index 8e023fc..0000000 --- a/typings/dspy/dsp/utils/__init__.pyi +++ /dev/null @@ -1,8 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.dsp.utils.dpr import * -from dspy.dsp.utils.settings import * -from dspy.dsp.utils.utils import * - diff --git a/typings/dspy/dsp/utils/dpr.pyi b/typings/dspy/dsp/utils/dpr.pyi deleted file mode 100644 index b303068..0000000 --- a/typings/dspy/dsp/utils/dpr.pyi +++ /dev/null @@ -1,131 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -""" -Source: DPR Implementation from Facebook Research -https://github.com/facebookresearch/DPR/tree/master/dpr -Original license: https://github.com/facebookresearch/DPR/blob/main/LICENSE -""" -logger = ... -class Tokens: - """A class to represent a list of tokenized text.""" - TEXT = ... - TEXT_WS = ... - SPAN = ... - POS = ... - LEMMA = ... - NER = ... - def __init__(self, data, annotators, opts=...) -> None: - ... - - def __len__(self): # -> int: - """The number of tokens.""" - ... - - def slice(self, i=..., j=...): # -> Self: - """Return a view of the list of tokens from [i, j).""" - ... - - def untokenize(self): # -> LiteralString: - """Returns the original text (with whitespace reinserted).""" - ... - - def words(self, uncased=...): # -> list[Any]: - """Returns a list of the text of each token - - Args: - uncased: lower cases text - """ - ... - - def offsets(self): # -> list[Any]: - """Returns a list of [start, end) character offsets of each token.""" - ... - - def pos(self): # -> list[Any] | None: - """Returns a list of part-of-speech tags of each token. - Returns None if this annotation was not included. - """ - ... - - def lemmas(self): # -> list[Any] | None: - """Returns a list of the lemmatized text of each token. - Returns None if this annotation was not included. - """ - ... - - def entities(self): # -> list[Any] | None: - """Returns a list of named-entity-recognition tags of each token. - Returns None if this annotation was not included. - """ - ... - - def ngrams(self, n=..., uncased=..., filter_fn=..., as_strings=...): # -> list[str] | list[tuple[int, int]]: - """Returns a list of all ngrams from length 1 to n. - - Args: - n: upper limit of ngram length - uncased: lower cases text - filter_fn: user function that takes in an ngram list and returns - True or False to keep or not keep the ngram - as_string: return the ngram as a string vs list - """ - ... - - def entity_groups(self): # -> list[Any] | None: - """Group consecutive entity tokens with the same NER tag.""" - ... - - - -class Tokenizer: - """Base tokenizer class. - Tokenizers implement tokenize, which should return a Tokens class. - """ - def tokenize(self, text): - ... - - def shutdown(self): # -> None: - ... - - def __del__(self): # -> None: - ... - - - -class SimpleTokenizer(Tokenizer): - ALPHA_NUM = ... - NON_WS = ... - def __init__(self, **kwargs) -> None: - """ - Args: - annotators: None or empty set (only tokenizes). - """ - ... - - def tokenize(self, text): # -> Tokens: - ... - - - -def has_answer(tokenized_answers, text): # -> bool: - ... - -def locate_answers(tokenized_answers, text): # -> list[Any]: - """ - Returns each occurrence of an answer as (offset, endpos) in terms of *characters*. - """ - ... - -STokenizer = ... -def DPR_tokenize(text): # -> Tokens: - ... - -def DPR_normalize(text): # -> list[Any]: - ... - -def strip_accents(text): # -> LiteralString: - """Strips accents from a piece of text.""" - ... - diff --git a/typings/dspy/dsp/utils/settings.pyi b/typings/dspy/dsp/utils/settings.pyi deleted file mode 100644 index eb311a0..0000000 --- a/typings/dspy/dsp/utils/settings.pyi +++ /dev/null @@ -1,78 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from contextlib import contextmanager - -DEFAULT_CONFIG = ... -main_thread_config = ... -config_owner_thread_id = ... -config_owner_async_task = ... -global_lock = ... -thread_local_overrides = ... -class Settings: - """ - A singleton class for DSPy configuration settings. - Thread-safe global configuration. - - 'configure' can be called by only one 'owner' thread (the first thread that calls it). - - Other threads see the configured global values from 'main_thread_config'. - - 'context' sets thread-local overrides. These overrides propagate to threads spawned - inside that context block, when (and only when!) using a ParallelExecutor that copies overrides. - - 1. Only one unique thread (which can be any thread!) can call dspy.configure. - 2. It affects a global state, visible to all. As a result, user threads work, but they shouldn't be - mixed with concurrent changes to dspy.configure from the "main" thread. - (TODO: In the future, add warnings: if there are near-in-time user-thread reads followed by .configure calls.) - 3. Any thread can use dspy.context. It propagates to child threads created with DSPy primitives: Parallel, asyncify, etc. - """ - _instance = ... - def __new__(cls): # -> Self: - ... - - @property - def lock(self): # -> lock: - ... - - def __getattr__(self, name): - ... - - def __setattr__(self, name, value): # -> None: - ... - - def __getitem__(self, key): - ... - - def __setitem__(self, key, value): # -> None: - ... - - def __contains__(self, key): # -> bool: - ... - - def get(self, key, default=...): # -> None: - ... - - def copy(self): # -> dotdict: - ... - - @property - def config(self): # -> dotdict: - ... - - def configure(self, **kwargs): # -> None: - ... - - @contextmanager - def context(self, **kwargs): # -> Generator[None, Any, None]: - """ - Context manager for temporary configuration changes at the thread level. - Does not affect global configuration. Changes only apply to the current thread. - If threads are spawned inside this block using ParallelExecutor, they will inherit these overrides. - """ - ... - - def __repr__(self): # -> str: - ... - - - -settings = ... diff --git a/typings/dspy/dsp/utils/utils.pyi b/typings/dspy/dsp/utils/utils.pyi deleted file mode 100644 index a55daf1..0000000 --- a/typings/dspy/dsp/utils/utils.pyi +++ /dev/null @@ -1,97 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -def print_message(*s, condition=..., pad=..., sep=...): # -> str: - ... - -def timestamp(daydir=...): # -> str: - ... - -def file_tqdm(file): # -> Generator[Any, Any, None]: - ... - -def create_directory(path): # -> None: - ... - -def deduplicate(seq: list[str]) -> list[str]: - """ - Source: https://stackoverflow.com/a/480227/1493011 - """ - ... - -def batch(group, bsize, provide_offset=...): # -> Generator[tuple[int, Any] | Any, Any, None]: - ... - -class dotdict(dict): - def __getattr__(self, key): - ... - - def __setattr__(self, key, value): # -> None: - ... - - def __delattr__(self, key): # -> None: - ... - - def __deepcopy__(self, memo): # -> dotdict: - ... - - - -class dotdict_lax(dict): - __getattr__ = ... - __setattr__ = ... - __delattr__ = ... - - -def flatten(data_list): # -> list[Any]: - ... - -def zipstar(data_list, lazy=...): # -> list[list[Any]] | zip[tuple[Any, ...]] | list[tuple[Any, ...]]: - """ - A much faster A, B, C = zip(*[(a, b, c), (a, b, c), ...]) - May return lists or tuples. - """ - ... - -def zip_first(list1, list2): # -> list[tuple[Any, Any]]: - ... - -def int_or_float(val): # -> float | int: - ... - -def groupby_first_item(lst): # -> defaultdict[Any, list[Any]]: - ... - -def process_grouped_by_first_item(lst): # -> Generator[tuple[Any | None, list[Any]], Any, defaultdict[Any, list[Any]]]: - """ - Requires items in list to already be grouped by first item. - """ - ... - -def grouper(iterable, n, fillvalue=...): # -> zip_longest[tuple[Any | None, ...]]: - """ - Collect data into fixed-length chunks or blocks - Example: grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx" - Source: https://docs.python.org/3/library/itertools.html#itertools-recipes - """ - ... - -def lengths2offsets(lengths): # -> Generator[tuple[Any | Literal[0], Any], Any, None]: - ... - -class NullContextManager: - def __init__(self, dummy_resource=...) -> None: - ... - - def __enter__(self): # -> None: - ... - - def __exit__(self, *args): # -> None: - ... - - - -def load_batch_backgrounds(args, qids): # -> list[Any] | None: - ... - diff --git a/typings/dspy/evaluate/__init__.pyi b/typings/dspy/evaluate/__init__.pyi deleted file mode 100644 index ede51ba..0000000 --- a/typings/dspy/evaluate/__init__.pyi +++ /dev/null @@ -1,9 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.evaluate.auto_evaluation import CompleteAndGrounded, SemanticF1 -from dspy.evaluate.evaluate import Evaluate -from dspy.evaluate.metrics import EM, answer_exact_match, answer_passage_match, normalize_text - -__all__ = ["EM", "normalize_text", "answer_exact_match", "answer_passage_match", "Evaluate", "SemanticF1", "CompleteAndGrounded"] diff --git a/typings/dspy/evaluate/auto_evaluation.pyi b/typings/dspy/evaluate/auto_evaluation.pyi deleted file mode 100644 index 78b5177..0000000 --- a/typings/dspy/evaluate/auto_evaluation.pyi +++ /dev/null @@ -1,83 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.primitives import Module -from dspy.signatures import Signature - -class SemanticRecallPrecision(Signature): - """ - Compare a system's response to the ground truth to compute its recall and precision. - If asked to reason, enumerate key ideas in each response, and whether they are present in the other response. - """ - question: str = ... - ground_truth: str = ... - system_response: str = ... - recall: float = ... - precision: float = ... - - -class DecompositionalSemanticRecallPrecision(Signature): - """ - Compare a system's response to the ground truth to compute recall and precision of key ideas. - You will first enumerate key ideas in each response, discuss their overlap, and then report recall and precision. - """ - question: str = ... - ground_truth: str = ... - system_response: str = ... - ground_truth_key_ideas: str = ... - system_response_key_ideas: str = ... - discussion: str = ... - recall: float = ... - precision: float = ... - - -def f1_score(precision, recall): # -> float: - ... - -class SemanticF1(Module): - def __init__(self, threshold=..., decompositional=...) -> None: - ... - - def forward(self, example, pred, trace=...): # -> float | bool: - ... - - - -class AnswerCompleteness(Signature): - """ - Estimate the completeness of a system's responses, against the ground truth. - You will first enumerate key ideas in each response, discuss their overlap, and then report completeness. - """ - question: str = ... - ground_truth: str = ... - system_response: str = ... - ground_truth_key_ideas: str = ... - system_response_key_ideas: str = ... - discussion: str = ... - completeness: float = ... - - -class AnswerGroundedness(Signature): - """ - Estimate the groundedness of a system's responses, against real retrieved documents written by people. - You will first enumerate whatever non-trivial or check-worthy claims are made in the system response, and then - discuss the extent to which some or all of them can be deduced from the retrieved context and basic commonsense. - """ - question: str = ... - retrieved_context: str = ... - system_response: str = ... - system_response_claims: str = ... - discussion: str = ... - groundedness: float = ... - - -class CompleteAndGrounded(Module): - def __init__(self, threshold=...) -> None: - ... - - def forward(self, example, pred, trace=...): # -> float | bool: - ... - - - diff --git a/typings/dspy/evaluate/evaluate.pyi b/typings/dspy/evaluate/evaluate.pyi deleted file mode 100644 index e1ea8b2..0000000 --- a/typings/dspy/evaluate/evaluate.pyi +++ /dev/null @@ -1,116 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import pandas as pd -import dspy -from typing import Any, Callable, List, Optional, TYPE_CHECKING, Tuple, Union -from dspy.primitives.prediction import Prediction -from dspy.utils.callback import with_callbacks - -if TYPE_CHECKING: - ... -logger = ... -class EvaluationResult(Prediction): - """ - A class that represents the result of an evaluation. - It is a subclass of `dspy.Prediction` that contains the following fields - - - score: An float value (e.g., 67.30) representing the overall performance - - results: a list of (example, prediction, score) tuples for each example in devset - """ - def __init__(self, score: float, results: list[Tuple[dspy.Example, dspy.Example, Any]]) -> None: - ... - - def __repr__(self): # -> str: - ... - - - -class Evaluate: - """DSPy Evaluate class. - - This class is used to evaluate the performance of a DSPy program. Users need to provide a evaluation dataset and - a metric function in order to use this class. This class supports parallel evaluation on the provided dataset. - """ - def __init__(self, *, devset: List[dspy.Example], metric: Optional[Callable] = ..., num_threads: Optional[int] = ..., display_progress: bool = ..., display_table: Union[bool, int] = ..., max_errors: Optional[int] = ..., provide_traceback: Optional[bool] = ..., failure_score: float = ..., **kwargs) -> None: - """ - Args: - devset (List[dspy.Example]): the evaluation dataset. - metric (Callable): The metric function to use for evaluation. - num_threads (Optional[int]): The number of threads to use for parallel evaluation. - display_progress (bool): Whether to display progress during evaluation. - display_table (Union[bool, int]): Whether to display the evaluation results in a table. - If a number is passed, the evaluation results will be truncated to that number before displayed. - max_errors (Optional[int]): The maximum number of errors to allow before - stopping evaluation. If ``None``, inherits from ``dspy.settings.max_errors``. - provide_traceback (Optional[bool]): Whether to provide traceback information during evaluation. - failure_score (float): The default score to use if evaluation fails due to an exception. - """ - ... - - @with_callbacks - def __call__(self, program: dspy.Module, metric: Optional[Callable] = ..., devset: Optional[List[dspy.Example]] = ..., num_threads: Optional[int] = ..., display_progress: Optional[bool] = ..., display_table: Optional[Union[bool, int]] = ..., callback_metadata: Optional[dict[str, Any]] = ...) -> EvaluationResult: - """ - Args: - program (dspy.Module): The DSPy program to evaluate. - metric (Callable): The metric function to use for evaluation. if not provided, use `self.metric`. - devset (List[dspy.Example]): the evaluation dataset. if not provided, use `self.devset`. - num_threads (Optional[int]): The number of threads to use for parallel evaluation. if not provided, use - `self.num_threads`. - display_progress (bool): Whether to display progress during evaluation. if not provided, use - `self.display_progress`. - display_table (Union[bool, int]): Whether to display the evaluation results in a table. if not provided, use - `self.display_table`. If a number is passed, the evaluation results will be truncated to that number before displayed. - callback_metadata (dict): Metadata to be used for evaluate callback handlers. - - Returns: - The evaluation results are returned as a dspy.EvaluationResult object containing the following attributes: - - - score: A float percentage score (e.g., 67.30) representing overall performance - - - results: a list of (example, prediction, score) tuples for each example in devset - """ - ... - - - -def prediction_is_dictlike(prediction): # -> TypeIs[Callable[..., object]] | Literal[False]: - ... - -def merge_dicts(d1, d2) -> dict: - ... - -def truncate_cell(content) -> str: - """Truncate content of a cell to 25 words.""" - ... - -def stylize_metric_name(df: pd.DataFrame, metric_name: str) -> pd.DataFrame: - """ - Stylize the cell contents of a pandas DataFrame corresponding to the specified metric name. - - :param df: The pandas DataFrame for which to stylize cell contents. - :param metric_name: The name of the metric for which to stylize DataFrame cell contents. - """ - ... - -def display_dataframe(df: pd.DataFrame): # -> None: - """ - Display the specified Pandas DataFrame in the console. - - :param df: The Pandas DataFrame to display. - """ - ... - -def configure_dataframe_for_ipython_notebook_display(df: pd.DataFrame) -> pd.DataFrame: - """Set various pandas display options for DataFrame in an IPython notebook environment.""" - ... - -def is_in_ipython_notebook_environment(): # -> bool: - """ - Check if the current environment is an IPython notebook environment. - - :return: True if the current environment is an IPython notebook environment, False otherwise. - """ - ... - diff --git a/typings/dspy/evaluate/metrics.pyi b/typings/dspy/evaluate/metrics.pyi deleted file mode 100644 index 75bfe09..0000000 --- a/typings/dspy/evaluate/metrics.pyi +++ /dev/null @@ -1,34 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -def EM(prediction, answers_list): # -> bool: - ... - -def F1(prediction, answers_list): # -> float | int: - ... - -def HotPotF1(prediction, answers_list): # -> float | int: - ... - -def normalize_text(s): # -> str: - ... - -def em_score(prediction, ground_truth): # -> bool: - ... - -def f1_score(prediction, ground_truth): # -> float | Literal[0]: - ... - -def hotpot_f1_score(prediction, ground_truth): # -> float | Literal[0]: - ... - -def precision_score(prediction, ground_truth): # -> float | Literal[0]: - ... - -def answer_exact_match(example, pred, trace=..., frac=...): # -> bool: - ... - -def answer_passage_match(example, pred, trace=...): # -> bool: - ... - diff --git a/typings/dspy/predict/__init__.pyi b/typings/dspy/predict/__init__.pyi deleted file mode 100644 index 85597a7..0000000 --- a/typings/dspy/predict/__init__.pyi +++ /dev/null @@ -1,17 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.predict.aggregation import majority -from dspy.predict.best_of_n import BestOfN -from dspy.predict.chain_of_thought import ChainOfThought -from dspy.predict.code_act import CodeAct -from dspy.predict.knn import KNN -from dspy.predict.multi_chain_comparison import MultiChainComparison -from dspy.predict.parallel import Parallel -from dspy.predict.predict import Predict -from dspy.predict.program_of_thought import ProgramOfThought -from dspy.predict.react import ReAct, Tool -from dspy.predict.refine import Refine - -__all__ = ["majority", "BestOfN", "ChainOfThought", "CodeAct", "KNN", "MultiChainComparison", "Predict", "ProgramOfThought", "ReAct", "Refine", "Tool", "Parallel"] diff --git a/typings/dspy/predict/aggregation.pyi b/typings/dspy/predict/aggregation.pyi deleted file mode 100644 index 16dead2..0000000 --- a/typings/dspy/predict/aggregation.pyi +++ /dev/null @@ -1,15 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -def default_normalize(s): # -> str | None: - ... - -def majority(prediction_or_completions, normalize=..., field=...): # -> Prediction: - """ - Returns the most common completion for the target field (or the last field) in the signature. - When normalize returns None, that completion is ignored. - In case of a tie, earlier completion are prioritized. - """ - ... - diff --git a/typings/dspy/predict/avatar/__init__.pyi b/typings/dspy/predict/avatar/__init__.pyi deleted file mode 100644 index 8c8eea8..0000000 --- a/typings/dspy/predict/avatar/__init__.pyi +++ /dev/null @@ -1,8 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.predict.avatar.avatar import * -from dspy.predict.avatar.models import * -from dspy.predict.avatar.signatures import * - diff --git a/typings/dspy/predict/avatar/avatar.pyi b/typings/dspy/predict/avatar/avatar.pyi deleted file mode 100644 index ab8d6fb..0000000 --- a/typings/dspy/predict/avatar/avatar.pyi +++ /dev/null @@ -1,18 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy - -def get_number_with_suffix(number: int) -> str: - ... - -class Avatar(dspy.Module): - def __init__(self, signature, tools, max_iters=..., verbose=...) -> None: - ... - - def forward(self, **kwargs): # -> Prediction: - ... - - - diff --git a/typings/dspy/predict/avatar/models.pyi b/typings/dspy/predict/avatar/models.pyi deleted file mode 100644 index d254758..0000000 --- a/typings/dspy/predict/avatar/models.pyi +++ /dev/null @@ -1,32 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Optional -from pydantic import BaseModel - -class Tool(BaseModel): - tool: Any - name: str - desc: Optional[str] - input_type: Optional[str] = ... - def __str__(self) -> str: - ... - - def __repr__(self) -> str: - ... - - - -class Action(BaseModel): - tool_name: Any = ... - tool_input_query: Any = ... - - -class ActionOutput(BaseModel): - tool_name: str - tool_input_query: str - tool_output: str - ... - - diff --git a/typings/dspy/predict/avatar/signatures.pyi b/typings/dspy/predict/avatar/signatures.pyi deleted file mode 100644 index edf5693..0000000 --- a/typings/dspy/predict/avatar/signatures.pyi +++ /dev/null @@ -1,18 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from dspy.predict.avatar.models import Action - -class Actor(dspy.Signature): - """You will be given `Tools` which will be a list of tools to use to accomplish the `Goal`. Given the user query, your task is to decide which tool to use and what input values to provide. - - You will output action needed to accomplish the `Goal`. `Action` should have a tool to use and the input query to pass to the tool. - - Note: You can opt to use no tools and provide the final answer directly. You can also one tool multiple times with different input queries if applicable.""" - goal: str = ... - tools: list[str] = ... - action_1: Action = ... - - diff --git a/typings/dspy/predict/best_of_n.pyi b/typings/dspy/predict/best_of_n.pyi deleted file mode 100644 index 1e609e4..0000000 --- a/typings/dspy/predict/best_of_n.pyi +++ /dev/null @@ -1,48 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Callable, Optional -from dspy.predict.predict import Module, Prediction - -class BestOfN(Module): - def __init__(self, module: Module, N: int, reward_fn: Callable[[dict, Prediction], float], threshold: float, fail_count: Optional[int] = ...) -> None: - """ - Runs a module up to `N` times with different temperatures and returns the best prediction - out of `N` attempts or the first prediction that passes the `threshold`. - - Args: - module (Module): The module to run. - N (int): The number of times to run the module. - reward_fn (Callable[[dict, Prediction], float]): The reward function which takes in the args passed to the module, the resulting prediction, and returns a scalar reward. - threshold (float): The threshold for the reward function. - fail_count (Optional[int], optional): The number of times the module can fail before raising an error. Defaults to N if not provided. - - Example: - ```python - import dspy - - dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini")) - - # Define a QA module with chain of thought - qa = dspy.ChainOfThought("question -> answer") - - # Define a reward function that checks for one-word answers - def one_word_answer(args, pred): - return 1.0 if len(pred.answer.split()) == 1 else 0.0 - - # Create a refined module that tries up to 3 times - best_of_3 = dspy.BestOfN(module=qa, N=3, reward_fn=one_word_answer, threshold=1.0) - - # Use the refined module - result = best_of_3(question="What is the capital of Belgium?").answer - # Returns: Brussels - ``` - """ - ... - - def forward(self, **kwargs): # -> object | Any | None: - ... - - - diff --git a/typings/dspy/predict/chain_of_thought.pyi b/typings/dspy/predict/chain_of_thought.pyi deleted file mode 100644 index a8d79c8..0000000 --- a/typings/dspy/predict/chain_of_thought.pyi +++ /dev/null @@ -1,31 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Optional, Type, Union -from pydantic.fields import FieldInfo -from dspy.primitives.module import Module -from dspy.signatures.field import OutputField -from dspy.signatures.signature import Signature - -class ChainOfThought(Module): - def __init__(self, signature: Union[str, Type[Signature]], rationale_field: Optional[Union[OutputField, FieldInfo]] = ..., rationale_field_type: Type = ..., **config: dict[str, Any]) -> None: - """ - A module that reasons step by step in order to predict the output of a task. - - Args: - signature (Type[dspy.Signature]): The signature of the module. - rationale_field (Optional[Union[dspy.OutputField, pydantic.fields.FieldInfo]]): The field that will contain the reasoning. - rationale_field_type (Type): The type of the rationale field. - **config: The configuration for the module. - """ - ... - - def forward(self, **kwargs): # -> object | Any: - ... - - async def aforward(self, **kwargs): # -> Any: - ... - - - diff --git a/typings/dspy/predict/code_act.pyi b/typings/dspy/predict/code_act.pyi deleted file mode 100644 index 63c4847..0000000 --- a/typings/dspy/predict/code_act.pyi +++ /dev/null @@ -1,43 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Callable, Optional, Type, Union -from dspy.predict.program_of_thought import ProgramOfThought -from dspy.predict.react import ReAct -from dspy.primitives.python_interpreter import PythonInterpreter -from dspy.signatures.signature import Signature - -logger = ... -class CodeAct(ReAct, ProgramOfThought): - """ - CodeAct is a module that utilizes the Code Interpreter and predefined tools to solve the problem. - """ - def __init__(self, signature: Union[str, Type[Signature]], tools: list[Callable], max_iters: int = ..., interpreter: Optional[PythonInterpreter] = ...) -> None: - """ - Initializes the CodeAct class with the specified model, temperature, and max tokens. - - Args: - signature (Union[str, Type[Signature]]): The signature of the module. - tools (list[Callable]): The tool callables to be used. CodeAct only accepts functions and not callable objects. - max_iters (int): The maximum number of iterations to generate the answer. - interpreter: PythonInterpreter instance to use. If None, a new one is instantiated. - Example: - ```python - from dspy.predict import CodeAct - def factorial(n): - if n == 1: - return 1 - return n * factorial(n-1) - - act = CodeAct("n->factorial", tools=[factorial]) - act(n=5) # 120 - ``` - """ - ... - - def forward(self, **kwargs): # -> Prediction: - ... - - - diff --git a/typings/dspy/predict/knn.pyi b/typings/dspy/predict/knn.pyi deleted file mode 100644 index 9519ae7..0000000 --- a/typings/dspy/predict/knn.pyi +++ /dev/null @@ -1,46 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.clients import Embedder -from dspy.primitives import Example - -class KNN: - def __init__(self, k: int, trainset: list[Example], vectorizer: Embedder) -> None: - """ - A k-nearest neighbors retriever that finds similar examples from a training set. - - Args: - k: Number of nearest neighbors to retrieve - trainset: List of training examples to search through - vectorizer: The `Embedder` to use for vectorization - - Example: - ```python - import dspy - from sentence_transformers import SentenceTransformer - - # Create a training dataset with examples - trainset = [ - dspy.Example(input="hello", output="world"), - # ... more examples ... - ] - - # Initialize KNN with a sentence transformer model - knn = KNN( - k=3, - trainset=trainset, - vectorizer=dspy.Embedder(SentenceTransformer("all-MiniLM-L6-v2").encode) - ) - - # Find similar examples - similar_examples = knn(input="hello") - ``` - """ - ... - - def __call__(self, **kwargs) -> list: - ... - - - diff --git a/typings/dspy/predict/multi_chain_comparison.pyi b/typings/dspy/predict/multi_chain_comparison.pyi deleted file mode 100644 index 4c22d04..0000000 --- a/typings/dspy/predict/multi_chain_comparison.pyi +++ /dev/null @@ -1,15 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.primitives.module import Module - -class MultiChainComparison(Module): - def __init__(self, signature, M=..., temperature=..., **config) -> None: - ... - - def forward(self, completions, **kwargs): # -> object | Any: - ... - - - diff --git a/typings/dspy/predict/parallel.pyi b/typings/dspy/predict/parallel.pyi deleted file mode 100644 index e7fed9e..0000000 --- a/typings/dspy/predict/parallel.pyi +++ /dev/null @@ -1,19 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, List, Optional, Tuple -from dspy.primitives.example import Example - -class Parallel: - def __init__(self, num_threads: Optional[int] = ..., max_errors: Optional[int] = ..., access_examples: bool = ..., return_failed_examples: bool = ..., provide_traceback: Optional[bool] = ..., disable_progress_bar: bool = ...) -> None: - ... - - def forward(self, exec_pairs: List[Tuple[Any, Example]], num_threads: Optional[int] = ...) -> List[Any]: - ... - - def __call__(self, *args: Any, **kwargs: Any) -> Any: - ... - - - diff --git a/typings/dspy/predict/parameter.pyi b/typings/dspy/predict/parameter.pyi deleted file mode 100644 index 6fb9ba2..0000000 --- a/typings/dspy/predict/parameter.pyi +++ /dev/null @@ -1,8 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -class Parameter: - ... - - diff --git a/typings/dspy/predict/predict.pyi b/typings/dspy/predict/predict.pyi deleted file mode 100644 index cfc3c21..0000000 --- a/typings/dspy/predict/predict.pyi +++ /dev/null @@ -1,62 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Optional, Type, Union -from dspy.predict.parameter import Parameter -from dspy.primitives.module import Module -from dspy.signatures.signature import Signature -from dspy.utils.callback import BaseCallback - -logger = ... -class Predict(Module, Parameter): - def __init__(self, signature: Union[str, Type[Signature]], callbacks: Optional[list[BaseCallback]] = ..., **config) -> None: - ... - - def reset(self): # -> None: - ... - - def dump_state(self): # -> dict[str, Any]: - ... - - def load_state(self, state: dict) -> Predict: - """Load the saved state of a `Predict` object. - - Args: - state: The saved state of a `Predict` object. - - Returns: - Self to allow method chaining. - """ - ... - - def __call__(self, *args, **kwargs): # -> object | Any: - ... - - async def acall(self, *args, **kwargs): # -> Any: - ... - - def forward(self, **kwargs): # -> Prediction: - ... - - async def aforward(self, **kwargs): # -> Prediction: - ... - - def update_config(self, **kwargs): # -> None: - ... - - def get_config(self): # -> dict[str, Any]: - ... - - def __repr__(self): # -> str: - ... - - - -def serialize_object(obj): # -> dict[str, Any] | list[dict[str, Any] | list[Any] | tuple[dict[str, Any] | list[Any] | tuple[Any, ...] | dict[Any, Any] | Any, ...] | dict[Any, Any] | Any] | tuple[dict[str, Any] | list[Any] | tuple[Any, ...] | dict[Any, Any] | Any, ...] | dict[Any, Any]: - """ - Recursively serialize a given object into a JSON-compatible format. - Supports Pydantic models, lists, dicts, and primitive types. - """ - ... - diff --git a/typings/dspy/predict/program_of_thought.pyi b/typings/dspy/predict/program_of_thought.pyi deleted file mode 100644 index e9617da..0000000 --- a/typings/dspy/predict/program_of_thought.pyi +++ /dev/null @@ -1,39 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Optional, Type, Union -from dspy.primitives.module import Module -from dspy.primitives.python_interpreter import PythonInterpreter -from dspy.signatures.signature import Signature - -logger = ... -class ProgramOfThought(Module): - """ - A DSPy module that runs Python programs to solve a problem. - This module reuires deno to be installed. Please install deno following https://docs.deno.com/runtime/getting_started/installation/ - - Example: - ``` - import dspy - - lm = dspy.LM('openai/gpt-4o-mini') - dspy.configure(lm=lm) - pot = dspy.ProgramOfThought("question -> answer") - pot(question="what is 1+1?") - ``` - """ - def __init__(self, signature: Union[str, Type[Signature]], max_iters: int = ..., interpreter: Optional[PythonInterpreter] = ...) -> None: - """ - Args: - signature: The signature of the module. - max_iters: The maximum number of iterations to retry code generation and execution. - interpreter: PythonInterpreter instance to use. If None, a new one is instantiated. - """ - ... - - def forward(self, **kwargs): # -> object | Any: - ... - - - diff --git a/typings/dspy/predict/react.pyi b/typings/dspy/predict/react.pyi deleted file mode 100644 index ee85c75..0000000 --- a/typings/dspy/predict/react.pyi +++ /dev/null @@ -1,52 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Callable, TYPE_CHECKING, Type -from dspy.primitives.module import Module -from dspy.signatures.signature import Signature - -logger = ... -if TYPE_CHECKING: - ... -class ReAct(Module): - def __init__(self, signature: Type[Signature], tools: list[Callable], max_iters: int = ...) -> None: - """ - ReAct stands for "Reasoning and Acting," a popular paradigm for building tool-using agents. - In this approach, the language model is iteratively provided with a list of tools and has - to reason about the current situation. The model decides whether to call a tool to gather more - information or to finish the task based on its reasoning process. The DSPy version of ReAct is - generalized to work over any signature, thanks to signature polymorphism. - - Args: - signature: The signature of the module, which defines the input and output of the react module. - tools (list[Callable]): A list of functions, callable objects, or `dspy.Tool` instances. - max_iters (Optional[int]): The maximum number of iterations to run. Defaults to 10. - - Example: - - ```python - def get_weather(city: str) -> str: - return f"The weather in {city} is sunny." - - react = dspy.ReAct(signature="question->answer", tools=[get_weather]) - pred = react(question="What is the weather in Tokyo?") - ``` - """ - ... - - def forward(self, **input_args): # -> Prediction: - ... - - async def aforward(self, **input_args): # -> Prediction: - ... - - def truncate_trajectory(self, trajectory): - """Truncates the trajectory so that it fits in the context window. - - Users can override this method to implement their own truncation logic. - """ - ... - - - diff --git a/typings/dspy/predict/refine.pyi b/typings/dspy/predict/refine.pyi deleted file mode 100644 index ceebc23..0000000 --- a/typings/dspy/predict/refine.pyi +++ /dev/null @@ -1,81 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Callable, Optional -from dspy.predict.predict import Prediction -from dspy.signatures import Signature -from .predict import Module - -class OfferFeedback(Signature): - """ - In the discussion, assign blame to each module that contributed to the final reward being below the threshold, if - any. Then, prescribe concrete advice of how the module should act on its future input when we retry the process, if - it were to receive the same or similar inputs. If a module is not to blame, the advice should be N/A. - The module will not see its own history, so it needs to rely on entirely concrete and actionable advice from you - to avoid the same mistake on the same or similar inputs. - """ - program_code: str = ... - modules_defn: str = ... - program_inputs: str = ... - program_trajectory: str = ... - program_outputs: str = ... - reward_code: str = ... - target_threshold: float = ... - reward_value: float = ... - module_names: list[str] = ... - discussion: str = ... - advice: dict[str, str] = ... - - -class Refine(Module): - def __init__(self, module: Module, N: int, reward_fn: Callable[[dict, Prediction], float], threshold: float, fail_count: Optional[int] = ...) -> None: - """ - Refines a module by running it up to N times with different temperatures and returns the best prediction. - - This module runs the provided module multiple times with varying temperature settings and selects - either the first prediction that exceeds the specified threshold or the one with the highest reward. - If no prediction meets the threshold, it automatically generates feedback to improve future predictions. - - - Args: - module (Module): The module to refine. - N (int): The number of times to run the module. must - reward_fn (Callable): The reward function. - threshold (float): The threshold for the reward function. - fail_count (Optional[int], optional): The number of times the module can fail before raising an error - - Example: - ```python - import dspy - - dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini")) - - # Define a QA module with chain of thought - qa = dspy.ChainOfThought("question -> answer") - - # Define a reward function that checks for one-word answers - def one_word_answer(args, pred): - return 1.0 if len(pred.answer.split()) == 1 else 0.0 - - # Create a refined module that tries up to 3 times - best_of_3 = dspy.Refine(module=qa, N=3, reward_fn=one_word_answer, threshold=1.0) - - # Use the refined module - result = best_of_3(question="What is the capital of Belgium?").answer - # Returns: Brussels - ``` - """ - ... - - def forward(self, **kwargs): # -> object | Any | None: - ... - - - -def inspect_modules(program): # -> str: - ... - -def recursive_mask(o): # -> dict[Any, Any | dict[Any, Any] | list[Any | dict[Any, Any] | list[Any] | tuple[Any, ...] | str] | tuple[Any, ...] | str] | list[Any | dict[Any, Any | dict[Any, Any] | list[Any] | tuple[Any, ...] | str] | list[Any] | tuple[Any, ...] | str] | tuple[Any, ...] | str: - ... - diff --git a/typings/dspy/predict/retry.pyi b/typings/dspy/predict/retry.pyi deleted file mode 100644 index 006bc27..0000000 --- a/typings/dspy/predict/retry.pyi +++ /dev/null @@ -1,4 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - diff --git a/typings/dspy/primitives/__init__.pyi b/typings/dspy/primitives/__init__.pyi deleted file mode 100644 index 081febc..0000000 --- a/typings/dspy/primitives/__init__.pyi +++ /dev/null @@ -1,11 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.primitives.base_module import BaseModule -from dspy.primitives.example import Example -from dspy.primitives.module import Module -from dspy.primitives.prediction import Completions, Prediction -from dspy.primitives.python_interpreter import PythonInterpreter - -__all__ = ["Example", "BaseModule", "Prediction", "Completions", "Module", "PythonInterpreter"] diff --git a/typings/dspy/primitives/base_module.pyi b/typings/dspy/primitives/base_module.pyi deleted file mode 100644 index 08073b5..0000000 --- a/typings/dspy/primitives/base_module.pyi +++ /dev/null @@ -1,87 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from collections.abc import Generator - -logger = ... -class BaseModule: - def __init__(self) -> None: - ... - - def named_parameters(self): # -> list[Any]: - """ - Unlike PyTorch, handles (non-recursive) lists of parameters too. - """ - ... - - def named_sub_modules(self, type_=..., skip_compiled=...) -> Generator[tuple[str, BaseModule], None, None]: - """Find all sub-modules in the module, as well as their names. - - Say self.children[4]['key'].sub_module is a sub-module. Then the name will be - 'children[4][key].sub_module'. But if the sub-module is accessible at different - paths, only one of the paths will be returned. - """ - ... - - def parameters(self): # -> list[Any]: - ... - - def deepcopy(self): # -> Self: - """Deep copy the module. - - This is a tweak to the default python deepcopy that only deep copies `self.parameters()`, and for other - attributes, we just do the shallow copy. - """ - ... - - def reset_copy(self): # -> Self: - """Deep copy the module and reset all parameters.""" - ... - - def dump_state(self): # -> dict[Any, Any]: - ... - - def load_state(self, state): # -> None: - ... - - def save(self, path, save_program=..., modules_to_serialize=...): # -> None: - """Save the module. - - Save the module to a directory or a file. There are two modes: - - `save_program=False`: Save only the state of the module to a json or pickle file, based on the value of - the file extension. - - `save_program=True`: Save the whole module to a directory via cloudpickle, which contains both the state and - architecture of the model. - - If `save_program=True` and `modules_to_serialize` are provided, it will register those modules for serialization - with cloudpickle's `register_pickle_by_value`. This causes cloudpickle to serialize the module by value rather - than by reference, ensuring the module is fully preserved along with the saved program. This is useful - when you have custom modules that need to be serialized alongside your program. If None, then no modules - will be registered for serialization. - - We also save the dependency versions, so that the loaded model can check if there is a version mismatch on - critical dependencies or DSPy version. - - Args: - path (str): Path to the saved state file, which should be a .json or .pkl file when `save_program=False`, - and a directory when `save_program=True`. - save_program (bool): If True, save the whole module to a directory via cloudpickle, otherwise only save - the state. - modules_to_serialize (list): A list of modules to serialize with cloudpickle's `register_pickle_by_value`. - If None, then no modules will be registered for serialization. - - """ - ... - - def load(self, path): # -> None: - """Load the saved module. You may also want to check out dspy.load, if you want to - load an entire program, not just the state for an existing program. - - Args: - path (str): Path to the saved state file, which should be a .json or a .pkl file - """ - ... - - - diff --git a/typings/dspy/primitives/example.pyi b/typings/dspy/primitives/example.pyi deleted file mode 100644 index eb7bc3d..0000000 --- a/typings/dspy/primitives/example.pyi +++ /dev/null @@ -1,76 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -class Example: - def __init__(self, base=..., **kwargs) -> None: - ... - - def __getattr__(self, key): - ... - - def __setattr__(self, key, value): # -> None: - ... - - def __getitem__(self, key): - ... - - def __setitem__(self, key, value): # -> None: - ... - - def __delitem__(self, key): # -> None: - ... - - def __contains__(self, key): # -> bool: - ... - - def __len__(self): # -> int: - ... - - def __repr__(self): # -> str: - ... - - def __str__(self) -> str: - ... - - def __eq__(self, other) -> bool: - ... - - def __hash__(self) -> int: - ... - - def keys(self, include_dspy=...): # -> list[Any]: - ... - - def values(self, include_dspy=...): # -> list[Any]: - ... - - def items(self, include_dspy=...): # -> list[tuple[Any, Any]]: - ... - - def get(self, key, default=...): # -> None: - ... - - def with_inputs(self, *keys): # -> Self: - ... - - def inputs(self): # -> Self: - ... - - def labels(self): # -> Self: - ... - - def __iter__(self): # -> Iterator[Any]: - ... - - def copy(self, **kwargs): # -> Self: - ... - - def without(self, *keys): # -> Self: - ... - - def toDict(self): # -> dict[Any, Any]: - ... - - - diff --git a/typings/dspy/primitives/module.pyi b/typings/dspy/primitives/module.pyi deleted file mode 100644 index 9111394..0000000 --- a/typings/dspy/primitives/module.pyi +++ /dev/null @@ -1,77 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Optional -from dspy.primitives.base_module import BaseModule -from dspy.primitives.example import Example -from dspy.utils.callback import with_callbacks - -logger = ... -class ProgramMeta(type): - """Metaclass ensuring every ``dspy.Module`` instance is properly initialised.""" - def __call__(cls, *args, **kwargs): - ... - - - -class Module(BaseModule, metaclass=ProgramMeta): - def __init__(self, callbacks=...) -> None: - ... - - @with_callbacks - def __call__(self, *args, **kwargs): # -> object | Any: - ... - - @with_callbacks - async def acall(self, *args, **kwargs): # -> Any: - ... - - def named_predictors(self): # -> list[tuple[Any, Predict]]: - ... - - def predictors(self): # -> list[Predict]: - ... - - def set_lm(self, lm): # -> None: - ... - - def get_lm(self): # -> LM | None: - ... - - def __repr__(self): # -> LiteralString: - ... - - def map_named_predictors(self, func): # -> Self: - """Applies a function to all named predictors.""" - ... - - def inspect_history(self, n: int = ...): # -> None: - ... - - def batch(self, examples: list[Example], num_threads: Optional[int] = ..., max_errors: Optional[int] = ..., return_failed_examples: bool = ..., provide_traceback: Optional[bool] = ..., disable_progress_bar: bool = ...): # -> tuple[Any, Any, Any] | List[Any]: - """ - Processes a list of dspy.Example instances in parallel using the Parallel module. - - Args: - examples: List of dspy.Example instances to process. - num_threads: Number of threads to use for parallel processing. - max_errors: Maximum number of errors allowed before stopping execution. - If ``None``, inherits from ``dspy.settings.max_errors``. - return_failed_examples: Whether to return failed examples and exceptions. - provide_traceback: Whether to include traceback information in error logs. - disable_progress_bar: Whether to display the progress bar. - - Returns: - List of results, and optionally failed examples and exceptions. - """ - ... - - def __getattribute__(self, name): # -> Callable[..., object] | Any: - ... - - - -def set_attribute_by_name(obj, name, value): # -> None: - ... - diff --git a/typings/dspy/primitives/prediction.pyi b/typings/dspy/primitives/prediction.pyi deleted file mode 100644 index 0298384..0000000 --- a/typings/dspy/primitives/prediction.pyi +++ /dev/null @@ -1,98 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.primitives.example import Example - -class Prediction(Example): - """A prediction object that contains the output of a DSPy module. - - Prediction inherits from Example. - - To allow feedback-augmented scores, Prediction supports comparison operations - (<, >, <=, >=) for Predictions with a `score` field. The comparison operations - compare the 'score' values as floats. For equality comparison, Predictions are equal - if their underlying data stores are equal (inherited from Example). - - Arithmetic operations (+, /, etc.) are also supported for Predictions with a 'score' - field, operating on the score value. - """ - def __init__(self, *args, **kwargs) -> None: - ... - - def get_lm_usage(self): # -> None: - ... - - def set_lm_usage(self, value): # -> None: - ... - - @classmethod - def from_completions(cls, list_or_dict, signature=...): # -> Self: - ... - - def __repr__(self): # -> str: - ... - - def __str__(self) -> str: - ... - - def __float__(self): # -> float: - ... - - def __add__(self, other): # -> float: - ... - - def __radd__(self, other): # -> float: - ... - - def __truediv__(self, other): # -> float: - ... - - def __rtruediv__(self, other): # -> float: - ... - - def __lt__(self, other) -> bool: - ... - - def __le__(self, other) -> bool: - ... - - def __gt__(self, other) -> bool: - ... - - def __ge__(self, other) -> bool: - ... - - @property - def completions(self): # -> None: - ... - - - -class Completions: - def __init__(self, list_or_dict, signature=...) -> None: - ... - - def items(self): # -> dict_items[Any, Any]: - ... - - def __getitem__(self, key): # -> Prediction: - ... - - def __getattr__(self, name): - ... - - def __len__(self): # -> int: - ... - - def __contains__(self, key): # -> bool: - ... - - def __repr__(self): # -> str: - ... - - def __str__(self) -> str: - ... - - - diff --git a/typings/dspy/primitives/python_interpreter.pyi b/typings/dspy/primitives/python_interpreter.pyi deleted file mode 100644 index 752ab02..0000000 --- a/typings/dspy/primitives/python_interpreter.pyi +++ /dev/null @@ -1,55 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from os import PathLike -from types import TracebackType -from typing import Any, Dict, List, Optional, Union - -class InterpreterError(RuntimeError): - ... - - -class PythonInterpreter: - r""" - PythonInterpreter that runs code in a sandboxed environment using Deno and Pyodide. - - Prerequisites: - - Deno (https://docs.deno.com/runtime/getting_started/installation/). - - Example Usage: - ```python - code_string = "print('Hello'); 1 + 2" - with PythonInterpreter() as interp: - output = interp(code_string) # If final statement is non-None, prints the numeric result, else prints captured output - ``` - """ - def __init__(self, deno_command: Optional[List[str]] = ..., enable_read_paths: Optional[List[Union[PathLike, str]]] = ..., enable_write_paths: Optional[List[Union[PathLike, str]]] = ..., enable_env_vars: Optional[List[str]] = ..., enable_network_access: Optional[List[str]] = ..., sync_files: bool = ...) -> None: - """ - Args: - deno_command: command list to launch Deno. - enable_read_paths: Files or directories to allow reading from in the sandbox. - enable_write_paths: Files or directories to allow writing to in the sandbox. - enable_env_vars: Environment variable names to allow in the sandbox. - enable_network_access: Domains or IPs to allow network access in the sandbox. - sync_files: If set, syncs changes within the sandbox back to original files after execution. - """ - ... - - def execute(self, code: str, variables: Optional[Dict[str, Any]] = ...) -> Any: - ... - - def __enter__(self): # -> Self: - ... - - def __exit__(self, _exc_type: Optional[type[BaseException]], _exc_val: Optional[BaseException], _exc_tb: Optional[TracebackType]): # -> None: - ... - - def __call__(self, code: str, variables: Optional[Dict[str, Any]] = ...) -> Any: - ... - - def shutdown(self) -> None: - ... - - - diff --git a/typings/dspy/propose/__init__.pyi b/typings/dspy/propose/__init__.pyi deleted file mode 100644 index 45e8d87..0000000 --- a/typings/dspy/propose/__init__.pyi +++ /dev/null @@ -1,7 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.propose.grounded_proposer import GroundedProposer - -__all__ = ["GroundedProposer"] diff --git a/typings/dspy/propose/dataset_summary_generator.pyi b/typings/dspy/propose/dataset_summary_generator.pyi deleted file mode 100644 index ca5d86b..0000000 --- a/typings/dspy/propose/dataset_summary_generator.pyi +++ /dev/null @@ -1,31 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy - -class ObservationSummarizer(dspy.Signature): - """Given a series of observations I have made about my dataset, please summarize them into a brief 2-3 sentence summary which highlights only the most important details.""" - observations = ... - summary = ... - - -class DatasetDescriptor(dspy.Signature): - """Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """ """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """ """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative""" - examples = ... - observations = ... - - -class DatasetDescriptorWithPriorObservations(dspy.Signature): - """Given several examples from a dataset please write observations about trends that hold for most or all of the samples. """ """I will also provide you with a few observations I have already made. Please add your own observations or if you feel the observations are comprehensive say 'COMPLETE' """ """Some areas you may consider in your observations: topics, content, syntax, conciceness, etc. """ """It will be useful to make an educated guess as to the nature of the task this dataset will enable. Don't be afraid to be creative""" - examples = ... - prior_observations = ... - observations = ... - - -def order_input_keys_in_string(unordered_repr): # -> str: - ... - -def create_dataset_summary(trainset, view_data_batch_size, prompt_model, log_file=..., verbose=...): - ... - diff --git a/typings/dspy/propose/grounded_proposer.pyi b/typings/dspy/propose/grounded_proposer.pyi deleted file mode 100644 index 02f5924..0000000 --- a/typings/dspy/propose/grounded_proposer.pyi +++ /dev/null @@ -1,55 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from dspy.propose.propose_base import Proposer - -MAX_INSTRUCT_IN_HISTORY = ... -TIPS = ... -class DescribeProgram(dspy.Signature): - """Below is some pseudo-code for a pipeline that solves tasks with calls to language models. Please describe what type of task this program appears to be designed to solve, and how it appears to work.""" - program_code = ... - program_example = ... - program_description = ... - - -class DescribeModule(dspy.Signature): - """Below is some pseudo-code for a pipeline that solves tasks with calls to language models. Please describe the purpose of one of the specified module in this pipeline.""" - program_code = ... - program_example = ... - program_description = ... - module = ... - module_description = ... - - -def generate_instruction_class(use_dataset_summary=..., program_aware=..., use_task_demos=..., use_instruct_history=..., use_tip=...): # -> Predict: - class GenerateSingleModuleInstruction(dspy.Signature): - """Use the information below to learn about a task that we are trying to solve using calls to an LM, then generate a new instruction that will be used to prompt a Language Model to better solve the task.""" - ... - - - -class GenerateModuleInstruction(dspy.Module): - def __init__(self, program_code_string=..., use_dataset_summary=..., program_aware=..., use_task_demos=..., use_instruct_history=..., use_tip=..., verbose=...) -> None: - ... - - def forward(self, demo_candidates, pred_i, demo_set_i, program, previous_instructions, data_summary, num_demos_in_context=..., tip=...): # -> Prediction: - ... - - - -class GroundedProposer(Proposer): - def __init__(self, prompt_model, program, trainset, view_data_batch_size=..., use_dataset_summary=..., program_aware=..., use_task_demos=..., num_demos_in_context=..., use_instruct_history=..., use_tip=..., set_tip_randomly=..., set_history_randomly=..., verbose=..., rng=...) -> None: - ... - - def propose_instructions_for_program(self, trainset, program, demo_candidates, trial_logs, N, T) -> list[str]: - """This method is responsible for returning the full set of new instructions for our program, given the specified criteria.""" - ... - - def propose_instruction_for_predictor(self, program, predictor, pred_i, T, demo_candidates, demo_set_i, trial_logs, tip=...) -> str: - """This method is responsible for returning a single instruction for a given predictor, using the specified criteria.""" - ... - - - diff --git a/typings/dspy/propose/propose_base.pyi b/typings/dspy/propose/propose_base.pyi deleted file mode 100644 index 8085a69..0000000 --- a/typings/dspy/propose/propose_base.pyi +++ /dev/null @@ -1,19 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from abc import ABC, abstractmethod - -class Proposer(ABC): - def __init__(self) -> None: - ... - - @abstractmethod - def propose_instructions_for_program(self): # -> None: - ... - - def propose_instruction_for_predictor(self): # -> None: - ... - - - diff --git a/typings/dspy/propose/utils.pyi b/typings/dspy/propose/utils.pyi deleted file mode 100644 index 6033ee7..0000000 --- a/typings/dspy/propose/utils.pyi +++ /dev/null @@ -1,25 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -def strip_prefix(text): # -> str: - ... - -def create_instruction_set_history_string(base_program, trial_logs, top_n): # -> str: - ... - -def parse_list_of_instructions(instruction_string): # -> Any | list[Any]: - ... - -def get_program_instruction_set_string(program): # -> LiteralString: - ... - -def create_predictor_level_history_string(base_program, predictor_i, trial_logs, top_n): # -> Literal['']: - ... - -def create_example_string(fields, example): # -> LiteralString: - ... - -def get_dspy_source_code(module): # -> str: - ... - diff --git a/typings/dspy/retrievers/__init__.pyi b/typings/dspy/retrievers/__init__.pyi deleted file mode 100644 index ce589e3..0000000 --- a/typings/dspy/retrievers/__init__.pyi +++ /dev/null @@ -1,8 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.retrievers.embeddings import Embeddings -from dspy.retrievers.retrieve import Retrieve - -__all__ = ["Embeddings", "Retrieve"] diff --git a/typings/dspy/retrievers/databricks_rm.pyi b/typings/dspy/retrievers/databricks_rm.pyi deleted file mode 100644 index 97e91bc..0000000 --- a/typings/dspy/retrievers/databricks_rm.pyi +++ /dev/null @@ -1,135 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from dataclasses import dataclass -from typing import Any, Dict, List, Optional, Union -from dspy.primitives.prediction import Prediction - -_databricks_sdk_installed = ... -@dataclass -class Document: - page_content: str - metadata: Dict[str, Any] - type: str - def to_dict(self) -> Dict[str, Any]: - ... - - - -class DatabricksRM(dspy.Retrieve): - """ - A retriever module that uses a Databricks Mosaic AI Vector Search Index to return the top-k - embeddings for a given query. - - Examples: - Below is a code snippet that shows how to set up a Databricks Vector Search Index - and configure a DatabricksRM DSPy retriever module to query the index. - - (example adapted from "Databricks: How to create and query a Vector Search Index: - https://docs.databricks.com/en/generative-ai/create-query-vector-search.html#create-a-vector-search-index) - - ```python - from databricks.vector_search.client import VectorSearchClient - - # Create a Databricks Vector Search Endpoint - client = VectorSearchClient() - client.create_endpoint( - name="your_vector_search_endpoint_name", - endpoint_type="STANDARD" - ) - - # Create a Databricks Direct Access Vector Search Index - index = client.create_direct_access_index( - endpoint_name="your_vector_search_endpoint_name", - index_name="your_index_name", - primary_key="id", - embedding_dimension=1024, - embedding_vector_column="text_vector", - schema={ - "id": "int", - "field2": "str", - "field3": "float", - "text_vector": "array" - } - ) - - # Create a DatabricksRM retriever module to query the Databricks Direct Access Vector - # Search Index - retriever = DatabricksRM( - databricks_index_name = "your_index_name", - docs_id_column_name="id", - text_column_name="field2", - k=3 - ) - ``` - - Below is a code snippet that shows how to query the Databricks Direct Access Vector - Search Index using the DatabricksRM retriever module: - - ```python - retrieved_results = DatabricksRM(query="Example query text")) - ``` - """ - def __init__(self, databricks_index_name: str, databricks_endpoint: Optional[str] = ..., databricks_token: Optional[str] = ..., databricks_client_id: Optional[str] = ..., databricks_client_secret: Optional[str] = ..., columns: Optional[List[str]] = ..., filters_json: Optional[str] = ..., k: int = ..., docs_id_column_name: str = ..., docs_uri_column_name: Optional[str] = ..., text_column_name: str = ..., use_with_databricks_agent_framework: bool = ...) -> None: - """ - Args: - databricks_index_name (str): The name of the Databricks Vector Search Index to query. - databricks_endpoint (Optional[str]): The URL of the Databricks Workspace containing - the Vector Search Index. Defaults to the value of the ``DATABRICKS_HOST`` - environment variable. If unspecified, the Databricks SDK is used to identify the - endpoint based on the current environment. - databricks_token (Optional[str]): The Databricks Workspace authentication token to use - when querying the Vector Search Index. Defaults to the value of the - ``DATABRICKS_TOKEN`` environment variable. If unspecified, the Databricks SDK is - used to identify the token based on the current environment. - databricks_client_id (str): Databricks service principal id. If not specified, - the token is resolved from the current environment (DATABRICKS_CLIENT_ID). - databricks_client_secret (str): Databricks service principal secret. If not specified, - the endpoint is resolved from the current environment (DATABRICKS_CLIENT_SECRET). - columns (Optional[List[str]]): Extra column names to include in response, - in addition to the document id and text columns specified by - ``docs_id_column_name`` and ``text_column_name``. - filters_json (Optional[str]): A JSON string specifying additional query filters. - Example filters: ``{"id <": 5}`` selects records that have an ``id`` column value - less than 5, and ``{"id >=": 5, "id <": 10}`` selects records that have an ``id`` - column value greater than or equal to 5 and less than 10. - k (int): The number of documents to retrieve. - docs_id_column_name (str): The name of the column in the Databricks Vector Search Index - containing document IDs. - docs_uri_column_name (Optional[str]): The name of the column in the Databricks Vector Search Index - containing document URI. - text_column_name (str): The name of the column in the Databricks Vector Search Index - containing document text to retrieve. - use_with_databricks_agent_framework (bool): Whether to use the `DatabricksRM` in a way that is - compatible with the Databricks Mosaic Agent Framework. - """ - ... - - def forward(self, query: Union[str, List[float]], query_type: str = ..., filters_json: Optional[str] = ...) -> Union[dspy.Prediction, List[Dict[str, Any]]]: - """ - Retrieve documents from a Databricks Mosaic AI Vector Search Index that are relevant to the - specified query. - - Args: - query (Union[str, List[float]]): The query text or numeric query vector for which to - retrieve relevant documents. - query_type (str): The type of search query to perform against the Databricks Vector - Search Index. Must be either 'ANN' (approximate nearest neighbor) or 'HYBRID' - (hybrid search). - filters_json (Optional[str]): A JSON string specifying additional query filters. - Example filters: ``{"id <": 5}`` selects records that have an ``id`` column value - less than 5, and ``{"id >=": 5, "id <": 10}`` selects records that have an ``id`` - column value greater than or equal to 5 and less than 10. If specified, this - parameter overrides the `filters_json` parameter passed to the constructor. - - Returns: - A list of dictionaries when ``use_with_databricks_agent_framework`` is ``True``, - or a ``dspy.Prediction`` object when ``use_with_databricks_agent_framework`` is - ``False``. - """ - ... - - - diff --git a/typings/dspy/retrievers/embeddings.pyi b/typings/dspy/retrievers/embeddings.pyi deleted file mode 100644 index 1c520f3..0000000 --- a/typings/dspy/retrievers/embeddings.pyi +++ /dev/null @@ -1,18 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, List, Optional - -class Embeddings: - def __init__(self, corpus: List[str], embedder, k: int = ..., callbacks: Optional[List[Any]] = ..., cache: bool = ..., brute_force_threshold: int = ..., normalize: bool = ...) -> None: - ... - - def __call__(self, query: str): # -> Prediction: - ... - - def forward(self, query: str): # -> Prediction: - ... - - - diff --git a/typings/dspy/retrievers/retrieve.pyi b/typings/dspy/retrievers/retrieve.pyi deleted file mode 100644 index 6a1a6c3..0000000 --- a/typings/dspy/retrievers/retrieve.pyi +++ /dev/null @@ -1,37 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import List, Optional, Union -from dspy.predict.parameter import Parameter -from dspy.primitives.prediction import Prediction -from dspy.utils.callback import with_callbacks - -def single_query_passage(passages): # -> Prediction: - ... - -class Retrieve(Parameter): - name = ... - input_variable = ... - desc = ... - def __init__(self, k=..., callbacks=...) -> None: - ... - - def reset(self): # -> None: - ... - - def dump_state(self): # -> dict[str, Any]: - ... - - def load_state(self, state): # -> None: - ... - - @with_callbacks - def __call__(self, *args, **kwargs): # -> List[str] | Prediction | List[Prediction]: - ... - - def forward(self, query: str, k: Optional[int] = ..., **kwargs) -> Union[List[str], Prediction, List[Prediction]]: - ... - - - diff --git a/typings/dspy/retrievers/weaviate_rm.pyi b/typings/dspy/retrievers/weaviate_rm.pyi deleted file mode 100644 index fcc2eba..0000000 --- a/typings/dspy/retrievers/weaviate_rm.pyi +++ /dev/null @@ -1,65 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -import weaviate -from typing import List, Optional, Union -from dspy.primitives.prediction import Prediction - -class WeaviateRM(dspy.Retrieve): - """A retrieval module that uses Weaviate to return the top passages for a given query. - - Assumes that a Weaviate collection has been created and populated with the following payload: - - content: The text of the passage - - Args: - weaviate_collection_name (str): The name of the Weaviate collection. - weaviate_client (WeaviateClient): An instance of the Weaviate client. - k (int, optional): The default number of top passages to retrieve. Default to 3. - tenant_id (str, optional): The tenant to retrieve objects from. - - Examples: - Below is a code snippet that shows how to use Weaviate as the default retriever: - ```python - import weaviate - - llm = dspy.Cohere(model="command-r-plus", api_key=api_key) - weaviate_client = weaviate.connect_to_[local, wcs, custom, embedded]("your-path-here") - retriever_model = WeaviateRM("my_collection_name", weaviate_client=weaviate_client) - dspy.settings.configure(lm=llm, rm=retriever_model) - - retrieve = dspy.Retrieve(k=1) - topK_passages = retrieve("what are the stages in planning, sanctioning and execution of public works").passages - ``` - - Below is a code snippet that shows how to use Weaviate in the forward() function of a module - ```python - self.retrieve = WeaviateRM("my_collection_name", weaviate_client=weaviate_client, k=num_passages) - ``` - """ - def __init__(self, weaviate_collection_name: str, weaviate_client: Union[weaviate.WeaviateClient, weaviate.Client], weaviate_collection_text_key: Optional[str] = ..., k: int = ..., tenant_id: Optional[str] = ...) -> None: - ... - - def forward(self, query_or_queries: Union[str, List[str]], k: Optional[int] = ..., **kwargs) -> Prediction: - """Search with Weaviate for self.k top passages for query or queries. - - Args: - query_or_queries (Union[str, List[str]]): The query or queries to search for. - k (Optional[int]): The number of top passages to retrieve. Defaults to self.k. - kwargs : - - Returns: - dspy.Prediction: An object containing the retrieved passages. - """ - ... - - def get_objects(self, num_samples: int, fields: List[str]) -> List[dict]: - """Get objects from Weaviate using the cursor API.""" - ... - - def insert(self, new_object_properties: dict): # -> None: - ... - - - diff --git a/typings/dspy/signatures/__init__.pyi b/typings/dspy/signatures/__init__.pyi deleted file mode 100644 index 8501972..0000000 --- a/typings/dspy/signatures/__init__.pyi +++ /dev/null @@ -1,8 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.signatures.field import InputField, OldField, OldInputField, OldOutputField, OutputField -from dspy.signatures.signature import Signature, SignatureMeta, ensure_signature, infer_prefix, make_signature - -__all__ = ["InputField", "OutputField", "OldField", "OldInputField", "OldOutputField", "SignatureMeta", "Signature", "infer_prefix", "ensure_signature", "make_signature"] diff --git a/typings/dspy/signatures/field.pyi b/typings/dspy/signatures/field.pyi deleted file mode 100644 index 067b9bc..0000000 --- a/typings/dspy/signatures/field.pyi +++ /dev/null @@ -1,47 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -DSPY_FIELD_ARG_NAMES = ... -PYDANTIC_CONSTRAINT_MAP = ... -def move_kwargs(**kwargs): # -> dict[Any, Any]: - ... - -def InputField(**kwargs): # -> Any: - ... - -def OutputField(**kwargs): # -> Any: - ... - -def new_to_old_field(field): # -> OldInputField | OldOutputField: - ... - -class OldField: - """A more ergonomic datatype that infers prefix and desc if omitted.""" - def __init__(self, *, prefix=..., desc=..., input, format=...) -> None: - ... - - def finalize(self, key, inferred_prefix): # -> None: - """Set the prefix if it's not provided explicitly.""" - ... - - def __repr__(self): # -> str: - ... - - def __eq__(self, __value: object) -> bool: - ... - - - -class OldInputField(OldField): - def __init__(self, *, prefix=..., desc=..., format=...) -> None: - ... - - - -class OldOutputField(OldField): - def __init__(self, *, prefix=..., desc=..., format=...) -> None: - ... - - - diff --git a/typings/dspy/signatures/signature.pyi b/typings/dspy/signatures/signature.pyi deleted file mode 100644 index 37165e1..0000000 --- a/typings/dspy/signatures/signature.pyi +++ /dev/null @@ -1,175 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Dict, Optional, Tuple, Type, Union -from pydantic import BaseModel -from pydantic.fields import FieldInfo - -"""Signature class for DSPy. - -You typically subclass the Signature class, like this: - class MySignature(dspy.Signature): - input: str = InputField(desc="...") - output: int = OutputField(desc="...") - -You can call Signature("input1, input2 -> output1, output2") to create a new signature type. -You can also include instructions, Signature("input -> output", "This is a test"). -But it's generally better to use the make_signature function. - -If you are not sure if your input is a string representation, (like "input1, input2 -> output1, output2"), -or a signature, you can use the ensure_signature function. - -For compatibility with the legacy dsp format, you can use the signature_to_template function. -""" -class SignatureMeta(type(BaseModel)): - def __call__(cls, *args, **kwargs): # -> type[Signature] | Any: - ... - - def __new__(mcs, signature_name, bases, namespace, **kwargs): # -> type: - ... - - @property - def instructions(cls) -> str: - ... - - @instructions.setter - def instructions(cls, instructions: str) -> None: - ... - - @property - def input_fields(cls) -> dict[str, FieldInfo]: - ... - - @property - def output_fields(cls) -> dict[str, FieldInfo]: - ... - - @property - def fields(cls) -> dict[str, FieldInfo]: - ... - - @property - def signature(cls) -> str: - """The string representation of the signature.""" - ... - - def __repr__(cls): # -> str: - """Output a representation of the signature. - - Uses the form: - Signature(question, context -> answer - question: str = InputField(desc="..."), - context: List[str] = InputField(desc="..."), - answer: int = OutputField(desc="..."), - ). - """ - ... - - - -class Signature(BaseModel, metaclass=SignatureMeta): - "" - @classmethod - def with_instructions(cls, instructions: str) -> Type[Signature]: - ... - - @classmethod - def with_updated_fields(cls, name: str, type_: Optional[Type] = ..., **kwargs: dict[str, Any]) -> Type[Signature]: - """Create a new Signature class with the updated field information. - - Returns a new Signature class with the field, name, updated - with fields[name].json_schema_extra[key] = value. - - Args: - name: The name of the field to update. - type_: The new type of the field. - kwargs: The new values for the field. - - Returns: - A new Signature class (not an instance) with the updated field information. - """ - ... - - @classmethod - def prepend(cls, name, field, type_=...) -> Type[Signature]: - ... - - @classmethod - def append(cls, name, field, type_=...) -> Type[Signature]: - ... - - @classmethod - def delete(cls, name) -> Type[Signature]: - ... - - @classmethod - def insert(cls, index: int, name: str, field, type_: Optional[Type] = ...) -> Type[Signature]: - ... - - @classmethod - def equals(cls, other) -> bool: - """Compare the JSON schema of two Signature classes.""" - ... - - @classmethod - def dump_state(cls): # -> dict[str, str | list[Any]]: - ... - - @classmethod - def load_state(cls, state): # -> Signature: - ... - - - -def ensure_signature(signature: Union[str, Type[Signature]], instructions=...) -> Signature: - ... - -def make_signature(signature: Union[str, Dict[str, Tuple[type, FieldInfo]]], instructions: Optional[str] = ..., signature_name: str = ..., custom_types: Optional[Dict[str, Type]] = ...) -> Type[Signature]: - """Create a new Signature subclass with the specified fields and instructions. - - Args: - signature: Either a string in the format "input1, input2 -> output1, output2" - or a dictionary mapping field names to tuples of (type, FieldInfo). - instructions: Optional string containing instructions/prompt for the signature. - If not provided, defaults to a basic description of inputs and outputs. - signature_name: Optional string to name the generated Signature subclass. - Defaults to "StringSignature". - custom_types: Optional dictionary mapping type names to their actual type objects. - Useful for resolving custom types that aren't built-ins or in the typing module. - - Returns: - A new signature class with the specified fields and instructions. - - Examples: - - ``` - # Using string format - sig1 = make_signature("question, context -> answer") - - # Using dictionary format - sig2 = make_signature({ - "question": (str, InputField()), - "answer": (str, OutputField()) - }) - - # Using custom types - class MyType: - pass - - sig3 = make_signature("input: MyType -> output", custom_types={"MyType": MyType}) - ``` - """ - ... - -def infer_prefix(attribute_name: str) -> str: - """Infer a prefix from an attribute name by converting it to a human-readable format. - - Examples: - "camelCaseText" -> "Camel Case Text" - "snake_case_text" -> "Snake Case Text" - "text2number" -> "Text 2 Number" - "HTMLParser" -> "HTML Parser" - """ - ... - diff --git a/typings/dspy/signatures/utils.pyi b/typings/dspy/signatures/utils.pyi deleted file mode 100644 index 993e1c8..0000000 --- a/typings/dspy/signatures/utils.pyi +++ /dev/null @@ -1,10 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Literal -from pydantic.fields import FieldInfo - -def get_dspy_field_type(field: FieldInfo) -> Literal["input", "output"]: - ... - diff --git a/typings/dspy/streaming/__init__.pyi b/typings/dspy/streaming/__init__.pyi deleted file mode 100644 index 18b0621..0000000 --- a/typings/dspy/streaming/__init__.pyi +++ /dev/null @@ -1,9 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.streaming.messages import StatusMessage, StatusMessageProvider, StreamResponse -from dspy.streaming.streamify import apply_sync_streaming, streamify, streaming_response -from dspy.streaming.streaming_listener import StreamListener - -__all__ = ["StatusMessage", "StatusMessageProvider", "streamify", "StreamListener", "StreamResponse", "streaming_response", "apply_sync_streaming"] diff --git a/typings/dspy/streaming/messages.pyi b/typings/dspy/streaming/messages.pyi deleted file mode 100644 index 4f53a0f..0000000 --- a/typings/dspy/streaming/messages.pyi +++ /dev/null @@ -1,96 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dataclasses import dataclass -from typing import Any, Dict, Optional -from dspy.utils.callback import BaseCallback - -@dataclass -class StreamResponse: - predict_name: str - signature_field_name: str - chunk: str - ... - - -@dataclass -class StatusMessage: - """Dataclass that wraps a status message for status streaming.""" - message: str - ... - - -def sync_send_to_stream(stream, message): # -> None: - """Send message to stream in a sync context, regardless of event loop state.""" - ... - -class StatusMessageProvider: - """Provides customizable status message streaming for DSPy programs. - - This class serves as a base for creating custom status message providers. Users can subclass - and override its methods to define specific status messages for different stages of program execution, - each method must return a string. - - Example: - ```python - class MyStatusMessageProvider(StatusMessageProvider): - def lm_start_status_message(self, instance, inputs): - return f"Calling LM with inputs {inputs}..." - - def module_end_status_message(self, outputs): - return f"Module finished with output: {outputs}!" - - program = dspy.streamify(dspy.Predict("q->a"), status_message_provider=MyStatusMessageProvider()) - ``` - """ - def tool_start_status_message(self, instance: Any, inputs: Dict[str, Any]): # -> str: - """Status message before a `dspy.Tool` is called.""" - ... - - def tool_end_status_message(self, outputs: Any): # -> LiteralString: - """Status message after a `dspy.Tool` is called.""" - ... - - def module_start_status_message(self, instance: Any, inputs: Dict[str, Any]): # -> None: - """Status message before a `dspy.Module` or `dspy.Predict` is called.""" - ... - - def module_end_status_message(self, outputs: Any): # -> None: - """Status message after a `dspy.Module` or `dspy.Predict` is called.""" - ... - - def lm_start_status_message(self, instance: Any, inputs: Dict[str, Any]): # -> None: - """Status message before a `dspy.LM` is called.""" - ... - - def lm_end_status_message(self, outputs: Any): # -> None: - """Status message after a `dspy.LM` is called.""" - ... - - - -class StatusStreamingCallback(BaseCallback): - def __init__(self, status_message_provider: Optional[StatusMessageProvider] = ...) -> None: - ... - - def on_tool_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: - ... - - def on_tool_end(self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ...): # -> None: - ... - - def on_lm_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: - ... - - def on_lm_end(self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ...): # -> None: - ... - - def on_module_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: - ... - - def on_module_end(self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ...): # -> None: - ... - - - diff --git a/typings/dspy/streaming/streamify.pyi b/typings/dspy/streaming/streamify.pyi deleted file mode 100644 index 37aa9f3..0000000 --- a/typings/dspy/streaming/streamify.pyi +++ /dev/null @@ -1,149 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, AsyncGenerator, Awaitable, Callable, Generator, List, Optional, TYPE_CHECKING -from dspy.streaming.messages import StatusMessageProvider -from dspy.streaming.streaming_listener import StreamListener -from dspy.primitives.module import Module - -logger = ... -if TYPE_CHECKING: - ... -def streamify(program: Module, status_message_provider: Optional[StatusMessageProvider] = ..., stream_listeners: Optional[List[StreamListener]] = ..., include_final_prediction_in_output_stream: bool = ..., is_async_program: bool = ..., async_streaming: bool = ...) -> Callable[[Any, Any], Awaitable[Any]]: - """ - Wrap a DSPy program so that it streams its outputs incrementally, rather than returning them - all at once. It also provides status messages to the user to indicate the progress of the program, and users - can implement their own status message provider to customize the status messages and what module to generate - status messages for. - - Args: - program: The DSPy program to wrap with streaming functionality. - status_message_provider: A custom status message generator to use instead of the default one. Users can - implement their own status message generator to customize the status messages and what module to generate - status messages for. - stream_listeners: A list of stream listeners to capture the streaming output of specific fields of sub predicts - in the program. When provided, only the target fields in the target predict will be streamed to the user. - include_final_prediction_in_output_stream: Whether to include the final prediction in the output stream, only - useful when `stream_listeners` is provided. If `False`, the final prediction will not be included in the - output stream. When the program hit cache, or no listeners captured anything, the final prediction will - still be included in the output stream even if this is `False`. - is_async_program: Whether the program is async. If `False`, the program will be wrapped with `asyncify`, - otherwise the program will be called with `acall`. - async_streaming: Whether to return an async generator or a sync generator. If `False`, the streaming will be - converted to a sync generator. - - Returns: - A function that takes the same arguments as the original program, but returns an async - generator that yields the program's outputs incrementally. - - Example: - - ```python - import asyncio - import dspy - - dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini")) - # Create the program and wrap it with streaming functionality - program = dspy.streamify(dspy.Predict("q->a")) - - # Use the program with streaming output - async def use_streaming(): - output = program(q="Why did a chicken cross the kitchen?") - return_value = None - async for value in output: - if isinstance(value, dspy.Prediction): - return_value = value - else: - print(value) - return return_value - - output = asyncio.run(use_streaming()) - print(output) - ``` - - Example with custom status message provider: - ```python - import asyncio - import dspy - - dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini")) - - class MyStatusMessageProvider(StatusMessageProvider): - def module_start_status_message(self, instance, inputs): - return f"Predicting..." - - def tool_end_status_message(self, outputs): - return f"Tool calling finished with output: {outputs}!" - - # Create the program and wrap it with streaming functionality - program = dspy.streamify(dspy.Predict("q->a"), status_message_provider=MyStatusMessageProvider()) - - # Use the program with streaming output - async def use_streaming(): - output = program(q="Why did a chicken cross the kitchen?") - return_value = None - async for value in output: - if isinstance(value, dspy.Prediction): - return_value = value - else: - print(value) - return return_value - - output = asyncio.run(use_streaming()) - print(output) - ``` - - Example with stream listeners: - - ```python - import asyncio - import dspy - - dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini", cache=False)) - - # Create the program and wrap it with streaming functionality - predict = dspy.Predict("question->answer, reasoning") - stream_listeners = [ - dspy.streaming.StreamListener(signature_field_name="answer"), - dspy.streaming.StreamListener(signature_field_name="reasoning"), - ] - stream_predict = dspy.streamify(predict, stream_listeners=stream_listeners) - - async def use_streaming(): - output = stream_predict( - question="why did a chicken cross the kitchen?", - include_final_prediction_in_output_stream=False, - ) - return_value = None - async for value in output: - if isinstance(value, dspy.Prediction): - return_value = value - else: - print(value) - return return_value - - output = asyncio.run(use_streaming()) - print(output) - ``` - - You should see the streaming chunks (in the format of `dspy.streaming.StreamResponse`) in the console output. - """ - ... - -def apply_sync_streaming(async_generator: AsyncGenerator) -> Generator: - """Convert the async streaming generator to a sync generator.""" - ... - -async def streaming_response(streamer: AsyncGenerator) -> AsyncGenerator: - """ - Convert a DSPy program output stream to an OpenAI-compatible output stream that can be - used by a service as an API response to a streaming request. - - Args: - streamer: An async generator that yields values from a DSPy program output stream. - Returns: - An async generator that yields OpenAI-compatible streaming response chunks. - """ - ... - diff --git a/typings/dspy/streaming/streaming_listener.pyi b/typings/dspy/streaming/streaming_listener.pyi deleted file mode 100644 index 723a6a2..0000000 --- a/typings/dspy/streaming/streaming_listener.pyi +++ /dev/null @@ -1,46 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, List, Optional, TYPE_CHECKING -from litellm import ModelResponseStream -from dspy.primitives.module import Module - -if TYPE_CHECKING: - ... -class StreamListener: - """Class that listens to the stream to capture the streeaming of a specific output field of a predictor.""" - def __init__(self, signature_field_name: str, predict: Any = ..., predict_name: Optional[str] = ...) -> None: - """ - Args: - signature_field_name: The name of the field to listen to. - predict: The predictor to listen to. If None, when calling `streamify()` it will automatically look for - the predictor that has the `signature_field_name` in its signature. - predict_name: The name of the predictor to listen to. If None, when calling `streamify()` it will - automatically look for the predictor that has the `signature_field_name` in its signature. - """ - ... - - def receive(self, chunk: ModelResponseStream): # -> StreamResponse | None: - ... - - def flush(self) -> str: - """Flush all tokens in the field end queue. - - This method is called to flush out the last a few tokens when the stream is ended. These tokens - are in the buffer because we don't directly yield the tokens received by the stream listener - with the purpose to not yield the end_identifier tokens, e.g., "[[ ## ... ## ]]" for ChatAdapter. - """ - ... - - - -def find_predictor_for_stream_listeners(program: Module, stream_listeners: List[StreamListener]): # -> defaultdict[Any, list[Any]]: - """Find the predictor for each stream listener. - - This is a utility function to automatically find the predictor for each stream listener. It is used when some - listeners don't specify the predictor they want to listen to. If a listener's `signature_field_name` is not - unique in the program, this function will raise an error. - """ - ... - diff --git a/typings/dspy/teleprompt/__init__.pyi b/typings/dspy/teleprompt/__init__.pyi deleted file mode 100644 index 9f11018..0000000 --- a/typings/dspy/teleprompt/__init__.pyi +++ /dev/null @@ -1,20 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.teleprompt.avatar_optimizer import AvatarOptimizer -from dspy.teleprompt.bettertogether import BetterTogether -from dspy.teleprompt.bootstrap import BootstrapFewShot -from dspy.teleprompt.bootstrap_finetune import BootstrapFinetune -from dspy.teleprompt.copro_optimizer import COPRO -from dspy.teleprompt.ensemble import Ensemble -from dspy.teleprompt.infer_rules import InferRules -from dspy.teleprompt.knn_fewshot import KNNFewShot -from dspy.teleprompt.mipro_optimizer_v2 import MIPROv2 -from dspy.teleprompt.random_search import BootstrapFewShotWithRandomSearch -from dspy.teleprompt.simba import SIMBA -from dspy.teleprompt.teleprompt import Teleprompter -from dspy.teleprompt.teleprompt_optuna import BootstrapFewShotWithOptuna -from dspy.teleprompt.vanilla import LabeledFewShot - -__all__ = ["AvatarOptimizer", "BetterTogether", "BootstrapFewShot", "BootstrapFinetune", "COPRO", "Ensemble", "KNNFewShot", "MIPROv2", "BootstrapFewShotWithRandomSearch", "BootstrapFewShotWithOptuna", "LabeledFewShot", "InferRules", "SIMBA"] diff --git a/typings/dspy/teleprompt/avatar_optimizer.pyi b/typings/dspy/teleprompt/avatar_optimizer.pyi deleted file mode 100644 index 8ba0118..0000000 --- a/typings/dspy/teleprompt/avatar_optimizer.pyi +++ /dev/null @@ -1,59 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from typing import Callable, List, Optional -from pydantic import BaseModel -from dspy.predict.avatar import ActionOutput -from dspy.teleprompt.teleprompt import Teleprompter - -DEFAULT_MAX_EXAMPLES = ... -class EvalResult(BaseModel): - example: dict - score: float - actions: Optional[List[ActionOutput]] = ... - - -class Comparator(dspy.Signature): - """After executing the given actions on user inputs using the given instruction, some inputs have yielded good, results, while others have not. I'll provide you the inputs along with their, corresponding evaluation metrics: - -Task: -(1) Firstly, identify and contrast the patterns of inputs that have achieved good results with those that have not. -(2) Then, review the computational logic for any inconsistencies in the previous actions. -(3) Lastly, specify the modification in tools used that can lead to improved performance on the negative inputs.""" - instruction: str = ... - actions: List[str] = ... - pos_input_with_metrics: List[EvalResult] = ... - neg_input_with_metrics: List[EvalResult] = ... - feedback: str = ... - - -class FeedbackBasedInstruction(dspy.Signature): - """There is a task that needs to be completed for which one can use multiple tools to achieve the desired outcome. A group's performance was evaluated on a dataset of inputs, the inputs that did well are positive inputs, and the inputs that did not do well are negative inputs. - -You received feedback on how they can better use the tools to improve your performance on the negative inputs. You have been provided with the previous instruction, that they followed to use tools to complete the task, and the feedback on your performance. - -Your task is to incorporate the feedback and generate a detailed instruction for the group to follow to improve their performance on the task. - -Make sure that the new instruction talks about how to use the tools effectively and should be no more than 3 paragraphs long. The previous instruction contains general guidelines that you must retain in the new instruction.""" - previous_instruction: str = ... - feedback: str = ... - new_instruction: str = ... - - -class AvatarOptimizer(Teleprompter): - def __init__(self, metric: Callable, max_iters: int = ..., lower_bound: int = ..., upper_bound: int = ..., max_positive_inputs: Optional[int] = ..., max_negative_inputs: Optional[int] = ..., optimize_for: str = ...) -> None: - ... - - def process_example(self, actor, example, return_outputs): # -> tuple[Any, Any, Any] | tuple[Any, None, Literal[0]] | Literal[0]: - ... - - def thread_safe_evaluator(self, devset, actor, return_outputs=..., num_threads=...): # -> tuple[Any | float, list[Any]] | float: - ... - - def compile(self, student, *, trainset): - ... - - - diff --git a/typings/dspy/teleprompt/bettertogether.pyi b/typings/dspy/teleprompt/bettertogether.pyi deleted file mode 100644 index 4a672b8..0000000 --- a/typings/dspy/teleprompt/bettertogether.pyi +++ /dev/null @@ -1,20 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Callable, List, Optional -from dspy.primitives.example import Example -from dspy.primitives.module import Module -from dspy.teleprompt.teleprompt import Teleprompter - -logger = ... -class BetterTogether(Teleprompter): - STRAT_SEP = ... - def __init__(self, metric: Callable, prompt_optimizer: Optional[Teleprompter] = ..., weight_optimizer: Optional[Teleprompter] = ..., seed: Optional[int] = ...) -> None: - ... - - def compile(self, student: Module, trainset: List[Example], strategy: str = ..., valset_ratio=...) -> Module: - ... - - - diff --git a/typings/dspy/teleprompt/bootstrap.pyi b/typings/dspy/teleprompt/bootstrap.pyi deleted file mode 100644 index 8c09f65..0000000 --- a/typings/dspy/teleprompt/bootstrap.pyi +++ /dev/null @@ -1,37 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Dict, Optional -from dspy.teleprompt.teleprompt import Teleprompter - -logger = ... -class BootstrapFewShot(Teleprompter): - def __init__(self, metric=..., metric_threshold=..., teacher_settings: Optional[Dict] = ..., max_bootstrapped_demos=..., max_labeled_demos=..., max_rounds=..., max_errors=...) -> None: - """A Teleprompter class that composes a set of demos/examples to go into a predictor's prompt. - These demos come from a combination of labeled examples in the training set, and bootstrapped demos. - - Args: - metric (Callable): A function that compares an expected value and predicted value, - outputting the result of that comparison. - metric_threshold (float, optional): If the metric yields a numerical value, then check it - against this threshold when deciding whether or not to accept a bootstrap example. - Defaults to None. - teacher_settings (dict, optional): Settings for the `teacher` model. - Defaults to None. - max_bootstrapped_demos (int): Maximum number of bootstrapped demonstrations to include. - Defaults to 4. - max_labeled_demos (int): Maximum number of labeled demonstrations to include. - Defaults to 16. - max_rounds (int): Number of iterations to attempt generating the required bootstrap - examples. If unsuccessful after `max_rounds`, the program ends. Defaults to 1. - max_errors (Optional[int]): Maximum number of errors until program ends. - If ``None``, inherits from ``dspy.settings.max_errors``. - """ - ... - - def compile(self, student, *, teacher=..., trainset): - ... - - - diff --git a/typings/dspy/teleprompt/bootstrap_finetune.pyi b/typings/dspy/teleprompt/bootstrap_finetune.pyi deleted file mode 100644 index 0d98cf1..0000000 --- a/typings/dspy/teleprompt/bootstrap_finetune.pyi +++ /dev/null @@ -1,76 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dataclasses import dataclass -from typing import Any, Callable, Dict, List, Optional, Union -from dspy.adapters.base import Adapter -from dspy.clients.lm import LM -from dspy.primitives.example import Example -from dspy.primitives.module import Module -from dspy.teleprompt.teleprompt import Teleprompter - -logger = ... -class FinetuneTeleprompter(Teleprompter): - def __init__(self, train_kwargs: Optional[Union[Dict[str, Any], Dict[LM, Dict[str, Any]]]] = ...) -> None: - ... - - @staticmethod - def convert_to_lm_dict(arg) -> Dict[LM, Any]: - ... - - - -class BootstrapFinetune(FinetuneTeleprompter): - def __init__(self, metric: Optional[Callable] = ..., multitask: bool = ..., train_kwargs: Optional[Union[Dict[str, Any], Dict[LM, Dict[str, Any]]]] = ..., adapter: Optional[Union[Adapter, Dict[LM, Adapter]]] = ..., exclude_demos: bool = ..., num_threads: Optional[int] = ...) -> None: - ... - - def compile(self, student: Module, trainset: List[Example], teacher: Optional[Union[Module, List[Module]]] = ...) -> Module: - ... - - @staticmethod - def finetune_lms(finetune_dict) -> Dict[Any, LM]: - ... - - - -def build_call_data_from_trace(trace: List[Dict], pred_ind: int, adapter: Adapter, exclude_demos: bool = ...) -> Dict[str, List[Dict[str, Any]]]: - ... - -@dataclass -class FailedPrediction: - completion_text: str - format_reward: Union[float, None] = ... - - -def bootstrap_trace_data(program: Module, dataset: List[Example], metric: Optional[Callable] = ..., num_threads: Optional[int] = ..., raise_on_error=..., capture_failed_parses=..., failure_score: float = ..., format_failure_score: float = ..., log_format_failures: bool = ...) -> List[Dict[str, Any]]: - ... - -def all_predictors_have_lms(program: Module) -> bool: - """Return True if all predictors in the program have an LM set.""" - ... - -def copy_program_with_lms(program: Module) -> Module: - ... - -def prepare_student(student: Module) -> Module: - ... - -def prepare_teacher(student: Module, teacher: Optional[Module] = ...) -> Module: - ... - -def assert_structural_equivalency(program1: object, program2: object): # -> None: - ... - -def assert_no_shared_predictor(program1: Module, program2: Module): # -> None: - ... - -def get_unique_lms(program: Module) -> List[LM]: - ... - -def launch_lms(program: Module): # -> None: - ... - -def kill_lms(program: Module): # -> None: - ... - diff --git a/typings/dspy/teleprompt/copro_optimizer.pyi b/typings/dspy/teleprompt/copro_optimizer.pyi deleted file mode 100644 index b1900b9..0000000 --- a/typings/dspy/teleprompt/copro_optimizer.pyi +++ /dev/null @@ -1,45 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from dspy.signatures import Signature -from dspy.teleprompt.teleprompt import Teleprompter - -logger = ... -class BasicGenerateInstruction(Signature): - """You are an instruction optimizer for large language models. I will give you a ``signature`` of fields (inputs and outputs) in English. Your task is to propose an instruction that will lead a good language model to perform the task well. Don't be afraid to be creative.""" - basic_instruction = ... - proposed_instruction = ... - proposed_prefix_for_output_field = ... - - -class GenerateInstructionGivenAttempts(dspy.Signature): - """You are an instruction optimizer for large language models. I will give some task instructions I've tried, along with their corresponding validation scores. The instructions are arranged in increasing order based on their scores, where higher scores indicate better quality. - - Your task is to propose a new instruction that will lead a good language model to perform the task even better. Don't be afraid to be creative.""" - attempted_instructions = ... - proposed_instruction = ... - proposed_prefix_for_output_field = ... - - -class COPRO(Teleprompter): - def __init__(self, prompt_model=..., metric=..., breadth=..., depth=..., init_temperature=..., track_stats=..., **_kwargs) -> None: - ... - - def compile(self, student, *, trainset, eval_kwargs): - """ - optimizes `signature` of `student` program - note that it may be zero-shot or already pre-optimized (demos already chosen - `demos != []`) - - parameters: - student: program to optimize and left modified. - trainset: iterable of `Example`s - eval_kwargs: optional, dict - Additional keywords to go into `Evaluate` for the metric. - - Returns optimized version of `student`. - """ - ... - - - diff --git a/typings/dspy/teleprompt/ensemble.pyi b/typings/dspy/teleprompt/ensemble.pyi deleted file mode 100644 index baba8e9..0000000 --- a/typings/dspy/teleprompt/ensemble.pyi +++ /dev/null @@ -1,19 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.teleprompt.teleprompt import Teleprompter - -class Ensemble(Teleprompter): - def __init__(self, *, reduce_fn=..., size=..., deterministic=...) -> None: - """A common reduce_fn is dspy.majority.""" - ... - - def compile(self, programs): # -> EnsembledProgram: - class EnsembledProgram(dspy.Module): - ... - - - - - diff --git a/typings/dspy/teleprompt/grpo.pyi b/typings/dspy/teleprompt/grpo.pyi deleted file mode 100644 index 1e53311..0000000 --- a/typings/dspy/teleprompt/grpo.pyi +++ /dev/null @@ -1,41 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Callable, Dict, List, Literal, Optional, Union -from dspy.adapters.base import Adapter -from dspy.clients.lm import LM -from dspy.primitives.example import Example -from dspy.primitives.module import Module -from dspy.teleprompt.bootstrap_finetune import FinetuneTeleprompter - -logger = ... -class GRPO(FinetuneTeleprompter): - def __init__(self, metric: Optional[Callable] = ..., multitask: bool = ..., train_kwargs: Optional[Union[Dict[str, Any], Dict[LM, Dict[str, Any]]]] = ..., adapter: Optional[Union[Adapter, Dict[LM, Adapter]]] = ..., exclude_demos: bool = ..., num_threads: int = ..., num_train_steps: int = ..., seed: int = ..., num_dspy_examples_per_grpo_step: int = ..., num_rollouts_per_grpo_step: int = ..., use_train_as_val: bool = ..., num_steps_for_val: int = ..., report_train_scores: bool = ..., failure_score: float = ..., format_failure_score: float = ..., variably_invoked_predictor_grouping_mode: Union[Literal["truncate"], Literal["fill"], Literal["ragged"]] = ..., variably_invoked_predictor_fill_strategy: Optional[Union[Literal["randint"], Literal["max"]]] = ...) -> None: - ... - - def validate_trace_data_and_log_issues(self, trace_data: List[List[List[Dict[str, Any]]]], subsample_training_dataset: List[Example], num_teachers: int, num_samples_per_input: int, pred_signature_hash_to_ind: Dict[int, int]): # -> None: - ... - - def report_validation_metrics(self, student, trainset, valset, logger, step_idx=...): # -> None: - ... - - def update_shuffled_trainset(self, original_trainset): # -> None: - ... - - def select_training_sample_and_update_shuffled_trainset(self, original_trainset: List[Example], train_step_idx: int) -> List[Example]: - ... - - def compile(self, student: Module, trainset: List[Example], teacher: Optional[Union[Module, List[Module]]] = ..., valset: Optional[List[Example]] = ..., **kwargs) -> Module: - ... - - - -def disable_lm_cache(program: Module, lm_cache_dict: dict): # -> None: - """Disable the LM cache for all predictors in the program.""" - ... - -def recover_lm_cache(program: Module, lm_cache_dict: dict): # -> None: - """Recover the LM caches for all predictors in the program to their original state.""" - ... - diff --git a/typings/dspy/teleprompt/infer_rules.pyi b/typings/dspy/teleprompt/infer_rules.pyi deleted file mode 100644 index 3ea66d8..0000000 --- a/typings/dspy/teleprompt/infer_rules.pyi +++ /dev/null @@ -1,44 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from dspy.teleprompt import BootstrapFewShot - -logger = ... -class InferRules(BootstrapFewShot): - def __init__(self, num_candidates=..., num_rules=..., num_threads=..., teacher_settings=..., **kwargs) -> None: - ... - - def compile(self, student, *, teacher=..., trainset, valset=...): # -> None: - ... - - def induce_natural_language_rules(self, predictor, trainset): # -> object | Any: - ... - - def update_program_instructions(self, predictor, natural_language_rules): # -> None: - ... - - def format_examples(self, demos, signature): # -> str: - ... - - def get_predictor_demos(self, trainset, predictor): # -> list[dict[Any, Any]]: - ... - - def evaluate_program(self, program, dataset): - ... - - - -class RulesInductionProgram(dspy.Module): - def __init__(self, num_rules, teacher_settings=...) -> None: - class CustomRulesInduction(dspy.Signature): - ... - - - - def forward(self, examples_text): # -> Any: - ... - - - diff --git a/typings/dspy/teleprompt/knn_fewshot.pyi b/typings/dspy/teleprompt/knn_fewshot.pyi deleted file mode 100644 index 0b5547d..0000000 --- a/typings/dspy/teleprompt/knn_fewshot.pyi +++ /dev/null @@ -1,57 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any -from dspy.clients import Embedder -from dspy.primitives import Example -from dspy.teleprompt.teleprompt import Teleprompter - -class KNNFewShot(Teleprompter): - def __init__(self, k: int, trainset: list[Example], vectorizer: Embedder, **few_shot_bootstrap_args: dict[str, Any]) -> None: - """ - KNNFewShot is an optimizer that uses an in-memory KNN retriever to find the k nearest neighbors - in a trainset at test time. For each input example in a forward call, it identifies the k most - similar examples from the trainset and attaches them as demonstrations to the student module. - - Args: - k: The number of nearest neighbors to attach to the student model. - trainset: The training set to use for few-shot prompting. - vectorizer: The `Embedder` to use for vectorization - **few_shot_bootstrap_args: Additional arguments for the `BootstrapFewShot` optimizer. - - Example: - ```python - import dspy - from sentence_transformers import SentenceTransformer - - # Define a QA module with chain of thought - qa = dspy.ChainOfThought("question -> answer") - - # Create a training dataset with examples - trainset = [ - dspy.Example(question="What is the capital of France?", answer="Paris").with_inputs("question"), - # ... more examples ... - ] - - # Initialize KNNFewShot with a sentence transformer model - knn_few_shot = KNNFewShot( - k=3, - trainset=trainset, - vectorizer=dspy.Embedder(SentenceTransformer("all-MiniLM-L6-v2").encode) - ) - - # Compile the QA module with few-shot learning - compiled_qa = knn_few_shot.compile(qa) - - # Use the compiled module - result = compiled_qa("What is the capital of Belgium?") - ``` - """ - ... - - def compile(self, student, *, teacher=...): - ... - - - diff --git a/typings/dspy/teleprompt/mipro_optimizer_v2.pyi b/typings/dspy/teleprompt/mipro_optimizer_v2.pyi deleted file mode 100644 index 36a0799..0000000 --- a/typings/dspy/teleprompt/mipro_optimizer_v2.pyi +++ /dev/null @@ -1,28 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Callable, List, Literal, Optional, TYPE_CHECKING -from dspy.teleprompt.teleprompt import Teleprompter - -if TYPE_CHECKING: - ... -logger = ... -BOOTSTRAPPED_FEWSHOT_EXAMPLES_IN_CONTEXT = ... -LABELED_FEWSHOT_EXAMPLES_IN_CONTEXT = ... -MIN_MINIBATCH_SIZE = ... -AUTO_RUN_SETTINGS = ... -YELLOW = ... -GREEN = ... -BLUE = ... -BOLD = ... -ENDC = ... -class MIPROv2(Teleprompter): - def __init__(self, metric: Callable, prompt_model: Optional[Any] = ..., task_model: Optional[Any] = ..., teacher_settings: Optional[dict] = ..., max_bootstrapped_demos: int = ..., max_labeled_demos: int = ..., auto: Optional[Literal["light", "medium", "heavy"]] = ..., num_candidates: Optional[int] = ..., num_threads: Optional[int] = ..., max_errors: Optional[int] = ..., seed: int = ..., init_temperature: float = ..., verbose: bool = ..., track_stats: bool = ..., log_dir: Optional[str] = ..., metric_threshold: Optional[float] = ...) -> None: - ... - - def compile(self, student: Any, *, trainset: List, teacher: Any = ..., valset: Optional[List] = ..., num_trials: Optional[int] = ..., max_bootstrapped_demos: Optional[int] = ..., max_labeled_demos: Optional[int] = ..., seed: Optional[int] = ..., minibatch: bool = ..., minibatch_size: int = ..., minibatch_full_eval_steps: int = ..., program_aware_proposer: bool = ..., data_aware_proposer: bool = ..., view_data_batch_size: int = ..., tip_aware_proposer: bool = ..., fewshot_aware_proposer: bool = ..., requires_permission_to_run: bool = ..., provide_traceback: Optional[bool] = ...) -> Any: - ... - - - diff --git a/typings/dspy/teleprompt/random_search.pyi b/typings/dspy/teleprompt/random_search.pyi deleted file mode 100644 index f24104d..0000000 --- a/typings/dspy/teleprompt/random_search.pyi +++ /dev/null @@ -1,15 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.teleprompt.teleprompt import Teleprompter - -class BootstrapFewShotWithRandomSearch(Teleprompter): - def __init__(self, metric, teacher_settings=..., max_bootstrapped_demos=..., max_labeled_demos=..., max_rounds=..., num_candidate_programs=..., num_threads=..., max_errors=..., stop_at_score=..., metric_threshold=...) -> None: - ... - - def compile(self, student, *, teacher=..., trainset, valset=..., restrict=..., labeled_sample=...): - ... - - - diff --git a/typings/dspy/teleprompt/signature_opt.pyi b/typings/dspy/teleprompt/signature_opt.pyi deleted file mode 100644 index cd8abb8..0000000 --- a/typings/dspy/teleprompt/signature_opt.pyi +++ /dev/null @@ -1,15 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from .copro_optimizer import COPRO - -class SignatureOptimizer(COPRO): - def __init__(self, prompt_model=..., metric=..., breadth=..., depth=..., init_temperature=..., verbose=..., track_stats=...) -> None: - ... - - def compile(self, student, *, devset, eval_kwargs): - ... - - - diff --git a/typings/dspy/teleprompt/simba.pyi b/typings/dspy/teleprompt/simba.pyi deleted file mode 100644 index 304dc05..0000000 --- a/typings/dspy/teleprompt/simba.pyi +++ /dev/null @@ -1,39 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from typing import Callable -from dspy.teleprompt.teleprompt import Teleprompter - -logger = ... -class SIMBA(Teleprompter): - def __init__(self, *, metric: Callable, bsize=..., num_candidates=..., max_steps=..., max_demos=..., demo_input_field_maxlen=..., num_threads=..., temperature_for_sampling=..., temperature_for_candidates=...) -> None: - """ - Initializes SIMBA. - - Args: - metric (Callable): A function that takes an Example and a prediction_dict - as input and returns a float. - bsize (int, optional): Mini-batch size. Defaults to 32. - num_candidates (int, optional): Number of new candidate programs to produce - per iteration. Defaults to 6. - max_steps (int, optional): Number of optimization steps to run. Defaults to 8. - max_demos (int, optional): Maximum number of demos a predictor can hold - before dropping some. Defaults to 4. - demo_input_field_maxlen (int, optional): Maximum number of characters to keep - in an input field when building a new demo. Defaults to 100,000. - num_threads (int, optional): Number of threads for parallel execution. - Defaults to None. - temperature_for_sampling (float, optional): Temperature used for picking - programs during the trajectory-sampling step. Defaults to 0.2. - temperature_for_candidates (float, optional): Temperature used for picking - the source program for building new candidates. Defaults to 0.2. - """ - ... - - def compile(self, student: dspy.Module, *, trainset: list[dspy.Example], seed: int = ...): # -> Module: - ... - - - diff --git a/typings/dspy/teleprompt/simba_utils.pyi b/typings/dspy/teleprompt/simba_utils.pyi deleted file mode 100644 index bb67c50..0000000 --- a/typings/dspy/teleprompt/simba_utils.pyi +++ /dev/null @@ -1,55 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import dspy -from typing import Callable - -logger = ... -def prepare_models_for_resampling(program: dspy.Module, n: int): # -> list[LM | Any]: - ... - -def wrap_program(program: dspy.Module, metric: Callable): # -> Callable[..., dict[str, object | Any | float | None]]: - ... - -def append_a_demo(demo_input_field_maxlen): # -> Callable[..., Literal[True]]: - ... - -def append_a_rule(bucket, system, **kwargs): # -> bool: - ... - -class OfferFeedback(dspy.Signature): - """ - You will be given two trajectories of an LLM-driven program's execution. Your goal is to help the program's modules - build up experience on how to maximize the reward value assigned to the program's outputs if it were to receive - similar inputs in the future. - - The module won't see its own history. It will rely on your advice balancing being concrete and being generalizable. - - In your advice: - - Avoid boilerplate. Offer advice that would change the module's behavior for the better in the future. - - Ensure that advice offered to a module M is specific to that M's specific sub-task, not the overall program. - - Rely on contrasting the behavior of the worse trajectory against the better trajectory in making recommendations. - - Ensure each unique module name appears exactly once as a key in the advice dictionary. - """ - program_code: str = ... - modules_defn: str = ... - program_inputs: str = ... - oracle_metadata: str = ... - worse_program_trajectory: str = ... - worse_program_outputs: str = ... - worse_reward_value: float = ... - better_program_trajectory: str = ... - better_program_outputs: str = ... - better_reward_value: float = ... - module_names: list[str] = ... - discussion: str = ... - module_advice: dict[str, str] = ... - - -def inspect_modules(program): # -> str: - ... - -def recursive_mask(o): # -> dict[Any, Any | dict[Any, Any] | list[Any | dict[Any, Any] | list[Any] | tuple[Any, ...] | str] | tuple[Any, ...] | str] | list[Any | dict[Any, Any | dict[Any, Any] | list[Any] | tuple[Any, ...] | str] | list[Any] | tuple[Any, ...] | str] | tuple[Any, ...] | str: - ... - diff --git a/typings/dspy/teleprompt/teleprompt.pyi b/typings/dspy/teleprompt/teleprompt.pyi deleted file mode 100644 index f0a1826..0000000 --- a/typings/dspy/teleprompt/teleprompt.pyi +++ /dev/null @@ -1,37 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Optional -from dspy.primitives import Example, Module - -class Teleprompter: - def __init__(self) -> None: - ... - - def compile(self, student: Module, *, trainset: list[Example], teacher: Optional[Module] = ..., valset: Optional[list[Example]] = ..., **kwargs) -> Module: - """ - Optimize the student program. - - Args: - student: The student program to optimize. - trainset: The training set to use for optimization. - teacher: The teacher program to use for optimization. - valset: The validation set to use for optimization. - - Returns: - The optimized student program. - """ - ... - - def get_params(self) -> dict[str, Any]: - """ - Get the parameters of the teleprompter. - - Returns: - The parameters of the teleprompter. - """ - ... - - - diff --git a/typings/dspy/teleprompt/teleprompt_optuna.pyi b/typings/dspy/teleprompt/teleprompt_optuna.pyi deleted file mode 100644 index 7b7bec0..0000000 --- a/typings/dspy/teleprompt/teleprompt_optuna.pyi +++ /dev/null @@ -1,18 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.teleprompt.teleprompt import Teleprompter - -class BootstrapFewShotWithOptuna(Teleprompter): - def __init__(self, metric, teacher_settings=..., max_bootstrapped_demos=..., max_labeled_demos=..., max_rounds=..., num_candidate_programs=..., num_threads=...) -> None: - ... - - def objective(self, trial): - ... - - def compile(self, student, *, teacher=..., max_demos, trainset, valset=...): # -> Any: - ... - - - diff --git a/typings/dspy/teleprompt/utils.pyi b/typings/dspy/teleprompt/utils.pyi deleted file mode 100644 index fca4a56..0000000 --- a/typings/dspy/teleprompt/utils.pyi +++ /dev/null @@ -1,83 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -logger = ... -def create_minibatch(trainset, batch_size=..., rng=...): # -> list[Any]: - """Create a minibatch from the trainset.""" - ... - -def eval_candidate_program(batch_size, trainset, candidate_program, evaluate, rng=...): # -> Prediction: - """Evaluate a candidate program on the trainset, using the specified batch size.""" - ... - -def eval_candidate_program_with_pruning(trial, trial_logs, trainset, candidate_program, evaluate, trial_num, batch_size=...): # -> tuple[Any, Any, int, Literal[True]] | tuple[Any, Any, int, Literal[False]]: - """Evaluation of candidate_program with pruning implemented""" - ... - -def get_program_with_highest_avg_score(param_score_dict, fully_evaled_param_combos): # -> tuple[Any, Any, Any, Any] | tuple[Any, Any | floating[Any], Any, Any]: - """Used as a helper function for bayesian + minibatching optimizers. Returns the program with the highest average score from the batches evaluated so far.""" - ... - -def calculate_last_n_proposed_quality(base_program, trial_logs, evaluate, trainset, devset, n): # -> tuple[Any | Literal[0], Any, Any | Literal[0], Any]: - """ - Calculate the average and best quality of the last n programs proposed. This is useful for seeing if our proposals - are actually 'improving' overtime or not. - """ - ... - -def get_task_model_history_for_full_example(candidate_program, task_model, devset, evaluate): - """Get a full trace of the task model's history for a given candidate program.""" - ... - -def print_full_program(program): # -> None: - """Print out the program's instructions & prefixes for each module.""" - ... - -def save_candidate_program(program, log_dir, trial_num, note=...): # -> str | None: - """Save the candidate program to the log directory.""" - ... - -def save_file_to_log_dir(source_file_path, log_dir): # -> None: - ... - -def setup_logging(log_dir): # -> None: - """Setup logger, which will log our print statements to a txt file at our log_dir for later viewing""" - ... - -def get_token_usage(model) -> tuple[int, int]: - """ - Extract total input tokens and output tokens from a model's interaction history. - Returns (total_input_tokens, total_output_tokens). - """ - ... - -def log_token_usage(trial_logs, trial_num, model_dict): # -> None: - """ - Extract total input and output tokens used by each model and log to trial_logs[trial_num]["token_usage"]. - """ - ... - -def get_prompt_model(prompt_model): - ... - -def get_signature(predictor): - ... - -def set_signature(predictor, updated_signature): # -> None: - ... - -def create_n_fewshot_demo_sets(student, num_candidate_sets, trainset, max_labeled_demos, max_bootstrapped_demos, metric, teacher_settings, max_errors=..., max_rounds=..., labeled_sample=..., min_num_samples=..., metric_threshold=..., teacher=..., include_non_bootstrapped=..., seed=..., rng=...): # -> dict[Any, Any]: - """ - This function is copied from random_search.py, and creates fewshot examples in the same way that random search does. - This allows us to take advantage of using the same fewshot examples when we use the same random seed in our optimizers. - """ - ... - -def old_getfile(object): # -> str | None: - """Work out which source or compiled file an object was defined in.""" - ... - -def new_getfile(object): # -> str | None: - ... - diff --git a/typings/dspy/teleprompt/vanilla.pyi b/typings/dspy/teleprompt/vanilla.pyi deleted file mode 100644 index d976c02..0000000 --- a/typings/dspy/teleprompt/vanilla.pyi +++ /dev/null @@ -1,15 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from dspy.teleprompt.teleprompt import Teleprompter - -class LabeledFewShot(Teleprompter): - def __init__(self, k=...) -> None: - ... - - def compile(self, student, *, trainset, sample=...): - ... - - - diff --git a/typings/dspy/utils/__init__.pyi b/typings/dspy/utils/__init__.pyi deleted file mode 100644 index d13d3fc..0000000 --- a/typings/dspy/utils/__init__.pyi +++ /dev/null @@ -1,16 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import os -import requests -from dspy.streaming.messages import StatusMessage, StatusMessageProvider -from dspy.utils import exceptions -from dspy.utils.callback import BaseCallback, with_callbacks -from dspy.utils.dummies import DummyLM, DummyVectorizer, dummy_rm -from dspy.utils.inspect_history import pretty_print_history - -def download(url): # -> None: - ... - -__all__ = ["download", "exceptions", "BaseCallback", "with_callbacks", "DummyLM", "DummyVectorizer", "dummy_rm", "StatusMessage", "StatusMessageProvider", "pretty_print_history"] diff --git a/typings/dspy/utils/asyncify.pyi b/typings/dspy/utils/asyncify.pyi deleted file mode 100644 index 50c7212..0000000 --- a/typings/dspy/utils/asyncify.pyi +++ /dev/null @@ -1,32 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Awaitable, Callable, TYPE_CHECKING -from dspy.primitives.module import Module - -if TYPE_CHECKING: - ... -_limiter = ... -def get_async_max_workers(): - ... - -def get_limiter(): # -> CapacityLimiter: - ... - -def asyncify(program: Module) -> Callable[[Any, Any], Awaitable[Any]]: - """ - Wraps a DSPy program so that it can be called asynchronously. This is useful for running a - program in parallel with another task (e.g., another DSPy program). - - This implementation propagates the current thread's configuration context to the worker thread. - - Args: - program: The DSPy program to be wrapped for asynchronous execution. - - Returns: - An async function: An async function that, when awaited, runs the program in a worker thread. - The current thread's configuration context is inherited for each call. - """ - ... - diff --git a/typings/dspy/utils/caching.pyi b/typings/dspy/utils/caching.pyi deleted file mode 100644 index d663b3d..0000000 --- a/typings/dspy/utils/caching.pyi +++ /dev/null @@ -1,10 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -_DEFAULT_CACHE_DIR = ... -DSPY_CACHEDIR = ... -def create_subdir_in_cachedir(subdir: str) -> str: - """Create a subdirectory in the DSPy cache directory.""" - ... - diff --git a/typings/dspy/utils/callback.pyi b/typings/dspy/utils/callback.pyi deleted file mode 100644 index 81cf0ce..0000000 --- a/typings/dspy/utils/callback.pyi +++ /dev/null @@ -1,195 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Dict, Optional - -ACTIVE_CALL_ID = ... -logger = ... -class BaseCallback: - """A base class for defining callback handlers for DSPy components. - - To use a callback, subclass this class and implement the desired handlers. Each handler - will be called at the appropriate time before/after the execution of the corresponding component. For example, if - you want to print a message before and after an LM is called, implement `the on_llm_start` and `on_lm_end` handler. - Users can set the callback globally using `dspy.settings.configure` or locally by passing it to the component - constructor. - - - Example 1: Set a global callback using `dspy.settings.configure`. - - ``` - import dspy - from dspy.utils.callback import BaseCallback - - class LoggingCallback(BaseCallback): - - def on_lm_start(self, call_id, instance, inputs): - print(f"LM is called with inputs: {inputs}") - - def on_lm_end(self, call_id, outputs, exception): - print(f"LM is finished with outputs: {outputs}") - - dspy.settings.configure( - callbacks=[LoggingCallback()] - ) - - cot = dspy.ChainOfThought("question -> answer") - cot(question="What is the meaning of life?") - - # > LM is called with inputs: {'question': 'What is the meaning of life?'} - # > LM is finished with outputs: {'answer': '42'} - ``` - - Example 2: Set a local callback by passing it to the component constructor. - - ``` - lm_1 = dspy.LM("gpt-3.5-turbo", callbacks=[LoggingCallback()]) - lm_1(question="What is the meaning of life?") - - # > LM is called with inputs: {'question': 'What is the meaning of life?'} - # > LM is finished with outputs: {'answer': '42'} - - lm_2 = dspy.LM("gpt-3.5-turbo") - lm_2(question="What is the meaning of life?") - # No logging here because only `lm_1` has the callback set. - ``` - """ - def on_module_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: - """A handler triggered when forward() method of a module (subclass of dspy.Module) is called. - - Args: - call_id: A unique identifier for the call. Can be used to connect start/end handlers. - instance: The Module instance. - inputs: The inputs to the module's forward() method. Each arguments is stored as - a key-value pair in a dictionary. - """ - ... - - def on_module_end(self, call_id: str, outputs: Optional[Any], exception: Optional[Exception] = ...): # -> None: - """A handler triggered after forward() method of a module (subclass of dspy.Module) is executed. - - Args: - call_id: A unique identifier for the call. Can be used to connect start/end handlers. - outputs: The outputs of the module's forward() method. If the method is interrupted by - an exception, this will be None. - exception: If an exception is raised during the execution, it will be stored here. - """ - ... - - def on_lm_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: - """A handler triggered when __call__ method of dspy.LM instance is called. - - Args: - call_id: A unique identifier for the call. Can be used to connect start/end handlers. - instance: The LM instance. - inputs: The inputs to the LM's __call__ method. Each arguments is stored as - a key-value pair in a dictionary. - """ - ... - - def on_lm_end(self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ...): # -> None: - """A handler triggered after __call__ method of dspy.LM instance is executed. - - Args: - call_id: A unique identifier for the call. Can be used to connect start/end handlers. - outputs: The outputs of the LM's __call__ method. If the method is interrupted by - an exception, this will be None. - exception: If an exception is raised during the execution, it will be stored here. - """ - ... - - def on_adapter_format_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: - """A handler triggered when format() method of an adapter (subclass of dspy.Adapter) is called. - - Args: - call_id: A unique identifier for the call. Can be used to connect start/end handlers. - instance: The Adapter instance. - inputs: The inputs to the Adapter's format() method. Each arguments is stored as - a key-value pair in a dictionary. - """ - ... - - def on_adapter_format_end(self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ...): # -> None: - """A handler triggered after format() method of an adapter (subclass of dspy.Adapter) is called.. - - Args: - call_id: A unique identifier for the call. Can be used to connect start/end handlers. - outputs: The outputs of the Adapter's format() method. If the method is interrupted - by an exception, this will be None. - exception: If an exception is raised during the execution, it will be stored here. - """ - ... - - def on_adapter_parse_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: - """A handler triggered when parse() method of an adapter (subclass of dspy.Adapter) is called. - - Args: - call_id: A unique identifier for the call. Can be used to connect start/end handlers. - instance: The Adapter instance. - inputs: The inputs to the Adapter's parse() method. Each arguments is stored as - a key-value pair in a dictionary. - """ - ... - - def on_adapter_parse_end(self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ...): # -> None: - """A handler triggered after parse() method of an adapter (subclass of dspy.Adapter) is called. - - Args: - call_id: A unique identifier for the call. Can be used to connect start/end handlers. - outputs: The outputs of the Adapter's parse() method. If the method is interrupted - by an exception, this will be None. - exception: If an exception is raised during the execution, it will be stored here. - """ - ... - - def on_tool_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: - """A handler triggered when a tool is called. - - Args: - call_id: A unique identifier for the call. Can be used to connect start/end handlers. - instance: The Tool instance. - inputs: The inputs to the Tool's __call__ method. Each arguments is stored as - a key-value pair in a dictionary. - """ - ... - - def on_tool_end(self, call_id: str, outputs: Optional[Dict[str, Any]], exception: Optional[Exception] = ...): # -> None: - """A handler triggered after a tool is executed. - - Args: - call_id: A unique identifier for the call. Can be used to connect start/end handlers. - outputs: The outputs of the Tool's __call__ method. If the method is interrupted by - an exception, this will be None. - exception: If an exception is raised during the execution, it will be stored here. - """ - ... - - def on_evaluate_start(self, call_id: str, instance: Any, inputs: Dict[str, Any]): # -> None: - """A handler triggered when evaluation is started. - - Args: - call_id: A unique identifier for the call. Can be used to connect start/end handlers. - instance: The Evaluate instance. - inputs: The inputs to the Evaluate's __call__ method. Each arguments is stored as - a key-value pair in a dictionary. - """ - ... - - def on_evaluate_end(self, call_id: str, outputs: Optional[Any], exception: Optional[Exception] = ...): # -> None: - """A handler triggered after evaluation is executed. - - Args: - call_id: A unique identifier for the call. Can be used to connect start/end handlers. - outputs: The outputs of the Evaluate's __call__ method. If the method is interrupted by - an exception, this will be None. - exception: If an exception is raised during the execution, it will be stored here. - """ - ... - - - -def with_callbacks(fn): # -> _Wrapped[..., Any, ..., CoroutineType[Any, Any, Any]] | _Wrapped[..., Any, ..., Any]: - """Decorator to add callback functionality to instance methods.""" - ... - diff --git a/typings/dspy/utils/dummies.pyi b/typings/dspy/utils/dummies.pyi deleted file mode 100644 index e387384..0000000 --- a/typings/dspy/utils/dummies.pyi +++ /dev/null @@ -1,93 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import numpy as np -from typing import Union -from dspy.clients.lm import LM -from dspy.utils.callback import with_callbacks - -class DummyLM(LM): - """Dummy language model for unit testing purposes. - - Three modes of operation: - - Mode 1: List of dictionaries - - If a list of dictionaries is provided, the dummy model will return the next dictionary - in the list for each request, formatted according to the `format_field_with_value` function. - - Example: - - ``` - lm = DummyLM([{"answer": "red"}, {"answer": "blue"}]) - dspy.settings.configure(lm=lm) - predictor("What color is the sky?") - # Output: - # [[## answer ##]] - # red - predictor("What color is the sky?") - # Output: - # [[## answer ##]] - # blue - ``` - - Mode 2: Dictionary of dictionaries - - If a dictionary of dictionaries is provided, the dummy model will return the value - corresponding to the key which is contained with the final message of the prompt, - formatted according to the `format_field_with_value` function from the chat adapter. - - ``` - lm = DummyLM({"What color is the sky?": {"answer": "blue"}}) - dspy.settings.configure(lm=lm) - predictor("What color is the sky?") - # Output: - # [[## answer ##]] - # blue - ``` - - Mode 3: Follow examples - - If `follow_examples` is set to True, and the prompt contains an example input exactly equal to the prompt, - the dummy model will return the output from that example. - - ``` - lm = DummyLM([{"answer": "red"}], follow_examples=True) - dspy.settings.configure(lm=lm) - predictor("What color is the sky?, demos=dspy.Example(input="What color is the sky?", output="blue")) - # Output: - # [[## answer ##]] - # blue - ``` - - """ - def __init__(self, answers: Union[list[dict[str, str]], dict[str, dict[str, str]]], follow_examples: bool = ...) -> None: - ... - - @with_callbacks - def __call__(self, prompt=..., messages=..., **kwargs): # -> list[Any]: - ... - - async def acall(self, prompt=..., messages=..., **kwargs): # -> list[Any]: - ... - - def get_convo(self, index): # -> tuple[Any, Any]: - """Get the prompt + answer from the ith message.""" - ... - - - -def dummy_rm(passages=...) -> callable: - ... - -class DummyVectorizer: - """Simple vectorizer based on n-grams.""" - def __init__(self, max_length=..., n_gram=...) -> None: - ... - - def __call__(self, texts: list[str]) -> np.ndarray: - ... - - - diff --git a/typings/dspy/utils/exceptions.pyi b/typings/dspy/utils/exceptions.pyi deleted file mode 100644 index 7e75bd2..0000000 --- a/typings/dspy/utils/exceptions.pyi +++ /dev/null @@ -1,14 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Optional -from dspy.signatures.signature import Signature - -class AdapterParseError(Exception): - """Exception raised when adapter cannot parse the LM response.""" - def __init__(self, adapter_name: str, signature: Signature, lm_response: str, message: Optional[str] = ..., parsed_result: Optional[str] = ...) -> None: - ... - - - diff --git a/typings/dspy/utils/inspect_history.pyi b/typings/dspy/utils/inspect_history.pyi deleted file mode 100644 index e516980..0000000 --- a/typings/dspy/utils/inspect_history.pyi +++ /dev/null @@ -1,8 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -def pretty_print_history(history, n: int = ...): # -> None: - """Prints the last n prompts and their completions.""" - ... - diff --git a/typings/dspy/utils/langchain_tool.pyi b/typings/dspy/utils/langchain_tool.pyi deleted file mode 100644 index dda9168..0000000 --- a/typings/dspy/utils/langchain_tool.pyi +++ /dev/null @@ -1,24 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import TYPE_CHECKING -from dspy.adapters.types.tool import Tool -from langchain.tools import BaseTool - -if TYPE_CHECKING: - ... -def convert_langchain_tool(tool: BaseTool) -> Tool: - """Build a DSPy tool from a LangChain tool. - - This function converts a LangChain tool (either created with @tool decorator - or by subclassing BaseTool) into a DSPy Tool. - - Args: - tool: The LangChain tool to convert. - - Returns: - A DSPy Tool object. - """ - ... - diff --git a/typings/dspy/utils/logging_utils.pyi b/typings/dspy/utils/logging_utils.pyi deleted file mode 100644 index b1d7fc8..0000000 --- a/typings/dspy/utils/logging_utils.pyi +++ /dev/null @@ -1,51 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -LOGGING_LINE_FORMAT = ... -LOGGING_DATETIME_FORMAT = ... -class DSPyLoggingStream: - """ - A Python stream for use with event logging APIs throughout DSPy (`eprint()`, - `logger.info()`, etc.). This stream wraps `sys.stderr`, forwarding `write()` and - `flush()` calls to the stream referred to by `sys.stderr` at the time of the call. - It also provides capabilities for disabling the stream to silence event logs. - """ - def __init__(self) -> None: - ... - - def write(self, text): # -> None: - ... - - def flush(self): # -> None: - ... - - @property - def enabled(self): # -> bool: - ... - - @enabled.setter - def enabled(self, value): # -> None: - ... - - - -DSPY_LOGGING_STREAM = ... -def disable_logging(): # -> None: - """ - Disables the `DSPyLoggingStream` used by event logging APIs throughout DSPy - (`eprint()`, `logger.info()`, etc), silencing all subsequent event logs. - """ - ... - -def enable_logging(): # -> None: - """ - Enables the `DSPyLoggingStream` used by event logging APIs throughout DSPy - (`eprint()`, `logger.info()`, etc), emitting all subsequent event logs. This - reverses the effects of `disable_logging()`. - """ - ... - -def configure_dspy_loggers(root_module_name): # -> None: - ... - diff --git a/typings/dspy/utils/mcp.pyi b/typings/dspy/utils/mcp.pyi deleted file mode 100644 index 2279dfb..0000000 --- a/typings/dspy/utils/mcp.pyi +++ /dev/null @@ -1,22 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -import mcp -from typing import TYPE_CHECKING -from dspy.adapters.types.tool import Tool - -if TYPE_CHECKING: - ... -def convert_mcp_tool(session: mcp.client.session.ClientSession, tool: mcp.types.Tool) -> Tool: - """Build a DSPy tool from an MCP tool. - - Args: - session: The MCP session to use. - tool: The MCP tool to convert. - - Returns: - A dspy Tool object. - """ - ... - diff --git a/typings/dspy/utils/parallelizer.pyi b/typings/dspy/utils/parallelizer.pyi deleted file mode 100644 index 700513d..0000000 --- a/typings/dspy/utils/parallelizer.pyi +++ /dev/null @@ -1,18 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -logger = ... -class ParallelExecutor: - def __init__(self, num_threads=..., max_errors=..., disable_progress_bar=..., provide_traceback=..., compare_results=..., timeout=..., straggler_limit=...) -> None: - """ - Offers isolation between the tasks (dspy.settings) irrespective of whether num_threads == 1 or > 1. - Handles also straggler timeouts. - """ - ... - - def execute(self, function, data): # -> list[None]: - ... - - - diff --git a/typings/dspy/utils/saving.pyi b/typings/dspy/utils/saving.pyi deleted file mode 100644 index 65beb34..0000000 --- a/typings/dspy/utils/saving.pyi +++ /dev/null @@ -1,26 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import TYPE_CHECKING -from dspy.primitives.module import Module - -if TYPE_CHECKING: - ... -logger = ... -def get_dependency_versions(): # -> dict[str, str]: - ... - -def load(path: str) -> Module: - """Load saved DSPy model. - - This method is used to load a saved DSPy model with `save_program=True`, i.e., the model is saved with cloudpickle. - - Args: - path (str): Path to the saved model. - - Returns: - The loaded model, a `dspy.Module` instance. - """ - ... - diff --git a/typings/dspy/utils/unbatchify.pyi b/typings/dspy/utils/unbatchify.pyi deleted file mode 100644 index 895cfb8..0000000 --- a/typings/dspy/utils/unbatchify.pyi +++ /dev/null @@ -1,56 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from typing import Any, Callable, List - -class Unbatchify: - def __init__(self, batch_fn: Callable[[List[Any]], List[Any]], max_batch_size: int = ..., max_wait_time: float = ...) -> None: - """ - Initializes the Unbatchify. - - Args: - batch_fn: The batch-processing function that accepts a list of inputs and returns a list of outputs. - max_batch_size: The maximum number of items to include in a batch. - max_wait_time: The maximum time (in seconds) to wait for batch to fill before processing. - """ - ... - - def __call__(self, input_item: Any) -> Any: - """ - Thread-safe function that accepts a single input and returns the corresponding output. - - Args: - input_item: The single input item to process. - - Returns: - The output corresponding to the input_item after processing through batch_fn. - """ - ... - - def close(self): # -> None: - """ - Stops the worker thread and cleans up resources. - """ - ... - - def __enter__(self): # -> Self: - """ - Enables use as a context manager. - """ - ... - - def __exit__(self, exc_type, exc_value, traceback): # -> None: - """ - Ensures resources are cleaned up when exiting context. - """ - ... - - def __del__(self): # -> None: - """ - Ensures the worker thread is terminated when the object is garbage collected. - """ - ... - - - diff --git a/typings/dspy/utils/usage_tracker.pyi b/typings/dspy/utils/usage_tracker.pyi deleted file mode 100644 index d6f7279..0000000 --- a/typings/dspy/utils/usage_tracker.pyi +++ /dev/null @@ -1,28 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from contextlib import contextmanager -from typing import Any - -"""Usage tracking utilities for DSPy.""" -class UsageTracker: - """Tracks LM usage data within a context.""" - def __init__(self) -> None: - ... - - def add_usage(self, lm: str, usage_entry: dict): # -> None: - """Add a usage entry to the tracker.""" - ... - - def get_total_tokens(self) -> dict[str, dict[str, Any]]: - """Calculate total tokens from all tracked usage.""" - ... - - - -@contextmanager -def track_usage(): # -> Generator[UsageTracker, Any, None]: - """Context manager for tracking LM usage.""" - ... - From 68e725d499e4bcf386089bba582b3ad2a704e176 Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Wed, 25 Jun 2025 02:23:49 -0500 Subject: [PATCH 14/26] chore(config): update project configuration files --- .vscode/settings.json | 6 +- .windsurfrules | 1400 +++++++++-------------------------------- pyproject.toml | 4 +- uv.lock | 136 +++- 4 files changed, 450 insertions(+), 1096 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 2ff2db9..b8b1cdb 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,11 +3,13 @@ "windsurfPyright.analysis.typeCheckingMode": "basedpyright", "editor.defaultFormatter": "Codeium.windsurfPyright", "windsurfPyright.analysis.autoSearchPaths": true, - "windsurfPyright.analysis.stubPath": "typings", "windsurfPyright.analysis.useLibraryCodeForTypes": true, "python.terminal.shellIntegration.enabled": true, "windsurfPyright.analysis.inlayHints.callArgumentNames": false, "windsurfPyright.analysis.inlayHints.functionReturnTypes": false, "windsurfPyright.analysis.inlayHints.genericTypes": false, - "windsurfPyright.analysis.inlayHints.variableTypes": false + "windsurfPyright.analysis.inlayHints.variableTypes": false, + "[python]": { + "editor.defaultFormatter": "charliermarsh.ruff" + } } diff --git a/.windsurfrules b/.windsurfrules index cfeef3d..6deae6c 100644 --- a/.windsurfrules +++ b/.windsurfrules @@ -1,1222 +1,438 @@ -# [!SYSTEM] INSTRUCTIONS +# Robofactor Workspace Rules -## Use & Prefer Functional Programming in All Tasks (UNLESS OTHERWISE SPECIFIED) +Adopt a purely functional, stateless approach: -You must apply functional programming principles to all tasks unless explicitly instructed otherwise. +- Write pure, deterministic functions with no side effects; outputs depend only on inputs. +- Favor immutability—never mutate data; always create new values. +- Represent all effects and failures explicitly using containers like IO, Result, or Maybe. +- Compose small, single-purpose functions using flow from returns.pipeline. +- Declare all dependencies explicitly; never use global state or hidden context. +- Validate inputs at boundaries and return structured error values instead of raising exceptions. +- Structure logic as input-process-output pipelines. +- Ensure all operations are idempotent and safely repeatable. -### Core Functional Principles +### Type System & Data Modeling -- **First-class functions**: Functions are values that can be assigned, passed as arguments, and returned from other functions -- **Pure functions**: Functions must be deterministic—given the same inputs, always return the same outputs -- **No side effects**: Functions should not alter external state or perform I/O unless explicitly tracked -- **Immutability**: Never modify existing data structures; always create new ones with required changes -- **Declarative style**: Express what should be accomplished, not how to accomplish it step by step -- **Function composition**: Build complex operations by combining simpler functions -- **Prefer recursion over iteration**: Use recursive patterns, but leverage Python's itertools for efficiency +Use the type system to enforce correctness and make invalid states unrepresentable. -### Program Boundaries and Developer Interfaces +- **Use Modern Generic Syntax (PEP 695):** Define generic functions, classes, and type aliases using the `[T]` syntax. Avoid the legacy `TypeVar` and `Generic` from the `typing` module unless required for variance control. -Always create explicit program boundaries. Treat each task as a separate service with a clear interface: + ```python + # Generic class and function + class Stack[T]: + ... + def first[T](items: Sequence[T]) -> Maybe[T]: + ... -```python -from typing import Protocol -from returns.result import Result -from returns.io import IO - -class UserService(Protocol): - """Clear service boundary with typed interface""" - def get_user(self, user_id: int) -> IO[Result[User, UserNotFoundError]]: ... - def create_user(self, data: UserData) -> IO[Result[User, ValidationError]]: ... -``` + # Generic type alias + type Result[T, E] = Success[T] | Failure[E] + ``` -### Error Handling as Values +- **Define Algebraic Data Types (ADTs):** + - **Product Types:** Use `@dataclass(frozen=True, slots=True)` to create immutable structured data records. + - **Sum Types:** Use `|` (union types) to define a type that can be one of several distinct variants. +- **Use Abstract Collection Types in Signatures:** For function parameters, use read-only abstract types from `collections.abc` (e.g., `Sequence`, `Mapping`, `Iterable`). Never use mutable concrete types like `list` or `dict` in function signatures. +- **Use Precise Type Annotations:** + - `Final`: For constants that must not be reassigned. + - `Literal`: For variables that must hold one of a few specific values. + - `TypedDict`: For dictionary-like data with a defined structure. + - `Protocol`: For structural subtyping (duck typing). + - `TypeIs`: Create type guard functions that narrow types within a conditional block (PEP 742). +- **Create Semantic Type Aliases:** Use the `type` keyword to create distinct names for primitive types to improve clarity (e.g., `type UserId = int`). -Use the type system to track errors and context, not only success values. Side effects are first-class citizens. +### Function Design & Composition -```python -from returns.result import Result, Success, Failure -from returns.io import IO, IOResult -from returns.maybe import Maybe +Design functions for clarity, testability, and composability. -# Type signature shows all possible outcomes -def parse_config(path: str) -> IOResult[Config, ConfigError]: - """ - Returns: - IOResult[Config, ConfigError]: Success with Config or Failure with error - """ - return IO.from_result( - _read_file(path) - .bind(_parse_json) - .bind(_validate_config) - ) -``` +- **Enforce Keyword-Only Arguments:** Use the `*` separator in function signatures to force callers to use keyword arguments, enhancing clarity. -### The `returns` Library for Effect Management + ```python + def create_user(*, name: str, email: str) -> User: ... -Python 3.13 works excellently with the `returns` library for functional effect management: + ``` -```python -from returns.result import Result, Success, Failure -from returns.io import IO, IOResult -from returns.maybe import Maybe -from returns.pipeline import flow -from returns.pointfree import bind +- **Limit Function Complexity:** Keep functions short (under 20 lines) and focused on a single responsibility. Limit parameters to a maximum of four; use a parameter object (a `dataclass`) for more complex inputs. +- **Use **`operator`** for Simple Access:** Employ `attrgetter`, `itemgetter`, and `methodcaller` for direct, high-performance attribute/item access or method calls in higher-order functions. +- **Use **`lambda`** for Complex Logic:** Reserve `lambda` for anonymous functions that require computations, conditional logic, or custom error handling not supported by the `operator` module. +- **Compose with **`returns.pipeline.flow`**:** Construct processing pipelines by passing data through a sequence of functions. Use `returns.pointfree.bind` to chain operations that return containers (`Result`, `Maybe`). -# Result type for fallible operations -def divide(a: float, b: float) -> Result[float, str]: - if b == 0: - return Failure("Division by zero") - return Success(a / b) - -# IO type for side effects -def read_file(path: str) -> IOResult[str, Exception]: - @IO - def _inner() -> Result[str, Exception]: - try: - with open(path) as f: - return Success(f.read()) - except Exception as e: - return Failure(e) - return _inner() - -# Compose with flow -result = flow( - user_input, - parse_number, - bind(validate_positive), - bind(calculate_square_root), -) -``` + ```python + from returns.pipeline import flow + from returns.pointfree import bind -## Python 3.13 Specific Features + result = flow( + initial_data, + validate_input, + bind(fetch_record), + bind(transform_record), + ) -### New Type Parameter Syntax (PEP 695) + ``` -Python 3.13 introduces cleaner syntax for generics: +### Effect & Error Handling with`returns` -```python -# Classes with type parameters -class Stack[T]: - def __init__(self) -> None: - self._items: list[T] = [] +Make all effects and potential failures explicit in function signatures using the `returns` library. - def push(self, item: T) -> None: - self._items.append(item) +- **For Fallible Operations, Use **`Result`**:** Return `Success(value)` or `Failure(error)` for any operation that can fail. Never raise exceptions for predictable errors. +- **For Optionality, Use **`Maybe`**:** Return `Some(value)` or `Nothing` for values that may be absent. Never use `Optional[T]` or return raw `None`. +- **For Synchronous Side Effects, Use **`IO`**:** Wrap any function that performs I/O (e.g., file access, network requests, database calls) in an `IO` or `IOResult` container. This marks the function as impure and defers its execution. +- **For Asynchronous Side Effects, Use **`FutureResult`**:** Use `FutureResult` to compose asynchronous operations that may fail, ensuring that exceptions do not break the `asyncio` event loop. - def pop(self) -> T: - return self._items.pop() +### Application Boundaries: CLI & API Design -# Functions with type parameters -def first[T](items: Sequence[T]) -> Maybe[T]: - return Maybe.from_optional(items[0] if items else None) +Structure applications with clear, type-safe interfaces. -# Type aliases -type Result[T, E] = Success[T] | Failure[E] -type JsonDict = dict[str, Any] +- **Design CLIs with **`Typer`**:** + - Build CLIs based on Python type hints. + - Use `Annotated` for defining options and arguments with help text. + - Place default values in the function signature, not within `typer.Option()`. + - Use `rich.console` for all terminal output; never use `print()`. + - Exit with `typer.Exit(code=n)` instead of `sys.exit()`. +- **Design REST APIs:** + - Adhere to strict and consistent REST conventions. + - Version APIs explicitly in the URL path (e.g., `/api/v1/`). + - Use correct HTTP methods and status codes. + - Return consistent, structured error formats. + - Implement idempotency keys for unsafe methods. +- **Isolate External Systems:** Use an Anti-Corruption Layer—a dedicated module for translating data between external APIs and your internal domain models—to protect your core logic from outside influence. -# With defaults (PEP 696) -class Container[T = str]: - value: T -``` +### Data Processing & Persistence -### Pattern Matching Enhancements +Handle data in a functional and safe manner. -Use pattern matching for cleaner control flow: +- **Use **`itertools`** and Generators:** Leverage `itertools` for efficient, memory-safe iteration. Create custom iterators using generator functions (`yield`) for lazy data processing. +- **Use Immutable Collections:** Use `tuple` for fixed sequences, `frozenset` for immutable sets, and `types.MappingProxyType` for read-only dictionary views. +- **Ensure Database Safety:** + - Use transactions for all write operations to maintain consistency. + - Manage database migrations with a tool like Alembic. + - Write idempotent migration scripts. + - Use connection pooling for performance and resource management. + - Validate and sanitize all inputs to prevent SQL injection. -```python -from dataclasses import dataclass +### Testing & Validation -@dataclass(frozen=True) -class Point: - x: float - y: float - -def describe_point(point: Point) -> str: - match point: - case Point(x=0, y=0): - return "Origin" - case Point(x=0, y=y): - return f"On Y-axis at {y}" - case Point(x=x, y=0): - return f"On X-axis at {x}" - case Point(x=x, y=y) if x == y: - return f"On diagonal at {x}" - case Point(x=x, y=y): - return f"At ({x}, {y})" -``` +Verify correctness through rigorous, automated testing. -### Type Narrowing with TypeIs (PEP 742) +- **Write Tests First:** Follow a Test-Driven Development (TDD) approach. +- **Test Properties, Not Just Examples:** Use `hypothesis` for property-based testing. Define properties and invariants that your code must satisfy, and let the library generate hundreds of diverse examples to find edge cases. -```python -from typing import TypeIs + ```python + from hypothesis import given, strategies as st -def is_non_empty_list[T](val: list[T]) -> TypeIs[NonEmptyList[T]]: - """Type guard for non-empty lists""" - return len(val) > 0 + @given(st.lists(st.integers())) + def test_sort_is_idempotent(items: list[int]) -> None: + assert sorted(sorted(items)) == sorted(items) -def process_items[T](items: list[T]) -> Maybe[T]: - if is_non_empty_list(items): - # items is narrowed to NonEmptyList[T] - return Some(items[0]) - return Nothing -``` + ``` -## Type System Best Practices +- **Isolate and Test Pure Functions:** Pure functions can be tested in complete isolation with a given input and an expected output, requiring no mocks or complex setup. +- **Mock Only at External Boundaries:** Restrict mocking to interfaces that communicate with external systems (e.g., APIs, databases). Do not mock internal application logic. -### Modern Import Patterns (Python 3.13) +### Documentation & Tooling -```python -# GOOD: Use collections.abc for abstract types -from collections.abc import Sequence, Mapping, Callable, Iterable, Iterator -from types import MappingProxyType # For immutable dict views +Maintain a clear and efficient development environment. -# BAD: Don't use typing module for these -# from typing import Sequence, Mapping # Deprecated approach +- **Write Self-Documenting Code:** Use precise, descriptive names for functions and variables. +- **Document the "Why," Not the "What":** Use Google-style docstrings for public APIs to explain intent, preconditions, and postconditions. +- **Enforce Static Type Checking:** Use a strict type checker like `basedpyright` in your development workflow. -# GOOD: Use returns for Result types -from returns.result import Result -from returns.maybe import Maybe + ```bash + basedpyright --pythonversion 3.13 + ``` -# BAD: Don't use Optional for nullable values -# from typing import Optional # Use Maybe instead -``` +- **Never Suppress Type Errors:** Do not use `cast` or `# type: ignore`. Address all type errors directly. Do not use `Any`. -### Python 3.13 Generic Syntax (PEP 695) +## Development Philosophy -```python -# GOOD: New syntax for generics -class Box[T]: - def __init__(self, value: T) -> None: - self._value = value - - def map[U](self, func: Callable[[T], U]) -> Box[U]: - return Box(func(self._value)) - -# Function with type parameters -def first[T](items: Sequence[T]) -> Maybe[T]: - return Maybe.from_optional(items[0] if items else None) - -# Type aliases with new syntax -type Result[T, E] = Success[T] | Failure[E] -type ValidationResult[T] = Result[T, ValidationError] - -# BAD: Old TypeVar syntax (avoid unless needed for variance) -# from typing import TypeVar, Generic -# T = TypeVar('T') -# class Box(Generic[T]): ... -``` +Apply functional programming principles consistently throughout the codebase. +Write pure, composable functions with explicit error handling and immutable data structures. -### Type Annotations Guidelines +## Package Management & Environment -```python -# Use Final for constants -from typing import Final -MAX_RETRIES: Final = 3 +- **ALWAYS use `uv`** - Never use `pip`, `pipx`, `conda`, or other package managers +- **ALWAYS use `uv run`** instead of `python` directly +- Use `pyproject.toml` for all project configuration +- Pin exact versions in `.python-version` -# Use Literal for specific values -from typing import Literal -type Mode = Literal['read', 'write', 'append'] +## Type System Requirements -# Use TypedDict for structured data -from typing import TypedDict, Required, NotRequired +### Mandatory Type Safety -class UserData(TypedDict): - id: Required[int] - name: Required[str] - email: NotRequired[str] +- **NEVER use `Any`** - Use proper generics, protocols, or union types +- **NEVER use `cast()`** or `# type: ignore` +- **NEVER use `Optional[T]`** - Use `Maybe[T]` from `returns` library +- **ALWAYS use PEP 695 syntax** for generics: `class Box[T]:` not `class Box(Generic[T]):` +- **ALWAYS use `type` aliases**: `type UserId = int` +- **ALWAYS use explicit type narrowing functions** to refine types at runtime. + - Type narrowing functions must return `bool` and be annotated with a type predicate (e.g., `def is_str(x: object) -> TypeIs[str]: ...`). +- **ALWAYS use `TypeIs` for type narrowing** when both positive and negative branches require precise type inference. + - The narrowed type in the `if` branch is the intersection of the argument’s type and the `TypeIs` return type. + - The narrowed type in the `else` branch is the intersection of the argument’s type and the complement of the `TypeIs` return type. +- **ALWAYS use `TypeGuard` for user-defined type guards** when only the positive branch requires narrowing. + - Type narrowing applies only in the `if` branch; the `else` branch is not narrowed. +- **Type narrowing functions MUST accept at least one positional argument.** + - Type narrowing is applied only to the first argument. +- **The return type of a type narrowing function MUST be assignable to the input type.** + - It is a type error to narrow to a type that is not assignable to the input. +- **Type narrowing functions MUST be pure and deterministic.** + - No side effects or mutation are allowed. +- **NEVER use `Any` or `cast()` for type narrowing.** + - Always use type predicates and explicit type guards. +- **ALWAYS use pattern matching or explicit type checks** to implement type narrowing logic. +- **ALWAYS document the expected narrowed types** in both positive and negative branches using type assertions or comments. +- **NEVER rely on implicit narrowing or runtime exceptions** for type safety. -# Use Protocol for structural subtyping -from typing import Protocol - -class Comparable[T](Protocol): - def __lt__(self, other: T) -> bool: ... - def __eq__(self, other: T) -> bool: ... - -# Use NewType for semantic distinctions -from typing import NewType -UserId = NewType('UserId', int) -``` - -### Pattern Matching for Algebraic Data Types - -```python -from dataclasses import dataclass -from typing import Final - -# Define sum types with dataclasses -@dataclass(frozen=True) -class Success[T]: - value: T - -@dataclass(frozen=True) -class Failure[E]: - error: E - -type Result[T, E] = Success[T] | Failure[E] - -# Pattern match on results -def handle_result[T, E](result: Result[T, E]) -> str: - match result: - case Success(value): - return f"Success: {value}" - case Failure(error): - return f"Error: {error}" -``` - -## CLI Design with Typer - -```python -import typer -from typing import Annotated -from rich.console import Console -from returns.result import Result - -app = typer.Typer() -console = Console() - -@app.command() -def process( - input_file: Annotated[str, typer.Argument(help="Input file path")], - output: Annotated[str, typer.Option("--output", "-o")] = "output.txt", - verbose: Annotated[bool, typer.Option("--verbose", "-v")] = False, -) -> None: - """Process a file with functional transformations.""" - result = ( - read_file(input_file) - .bind(parse_data) - .bind(transform_data) - .bind(lambda data: write_file(output, data)) - ) - - match result: - case Success(_): - console.print("[green]✓ Processing complete[/green]") - case Failure(error): - console.print(f"[red]✗ Error: {error}[/red]") - raise typer.Exit(code=1) -``` - -## Functional Data Processing - -### Using Itertools and Generators +### Import Standards ```python -from itertools import ( - chain, compress, groupby, starmap, - tee, zip_longest, islice, cycle -) -from collections.abc import Iterator, Iterable -from typing import TypeVar - -T = TypeVar('T') - -def chunked[T](iterable: Iterable[T], size: int) -> Iterator[tuple[T, ...]]: - """Split iterable into fixed-size chunks.""" - iterator = iter(iterable) - while chunk := tuple(islice(iterator, size)): - yield chunk - -def sliding_window[T](iterable: Iterable[T], n: int) -> Iterator[tuple[T, ...]]: - """Sliding window over iterable.""" - iterators = tee(iterable, n) - for i, it in enumerate(iterators): - for _ in range(i): - next(it, None) - return zip(*iterators) -``` - -### Functional Collection Operations +# REQUIRED imports for collections +from collections.abc import Sequence, Mapping, Callable, Iterable, Iterator -```python -from functools import reduce, partial -from operator import add, mul -from returns.curry import curry - -# Curried functions for composition -@curry -def map_over[T, U](func: Callable[[T], U], items: Sequence[T]) -> list[U]: - return [func(item) for item in items] - -@curry -def filter_by[T](predicate: Callable[[T], bool], items: Sequence[T]) -> list[T]: - return [item for item in items if predicate(item)] - -@curry -def reduce_with[T, U]( - func: Callable[[U, T], U], - initial: U, - items: Sequence[T] -) -> U: - return reduce(func, items, initial) - -# Compose operations +# REQUIRED for functional programming +from returns.result import Result, Success, Failure +from returns.maybe import Maybe, Some, Nothing +from returns.io import IO, IOResult from returns.pipeline import flow +from returns.pointfree import bind -result = flow( - data, - filter_by(lambda x: x > 0), - map_over(lambda x: x ** 2), - reduce_with(add, 0), -) -``` - -## Async Functional Programming - -```python -from returns.future import Future, FutureResult -from returns.io import IO -import asyncio - -# Async operations as Future values -async def fetch_user(user_id: int) -> Result[User, Exception]: - try: - # Async operation - user = await async_db.get_user(user_id) - return Success(user) - except Exception as e: - return Failure(e) - -# Compose async operations -def get_user_posts(user_id: int) -> FutureResult[list[Post], Exception]: - return ( - FutureResult.from_future(fetch_user(user_id)) - .bind(lambda user: FutureResult.from_future(fetch_posts(user.id))) - ) - -# Run with asyncio -async def main() -> None: - result = await get_user_posts(123) - match result: - case Success(posts): - print(f"Found {len(posts)} posts") - case Failure(error): - print(f"Error: {error}") -``` - -## Testing Functional Code - -```python -import pytest -from hypothesis import given, strategies as st -from returns.result import Success, Failure - -# Property-based testing -@given(st.integers(), st.integers().filter(lambda x: x != 0)) -def test_divide_properties(a: int, b: int) -> None: - result = divide(a, b) - assert isinstance(result, Success) - assert result.unwrap() == a / b - -@given(st.integers()) -def test_divide_by_zero(a: int) -> None: - result = divide(a, 0) - assert isinstance(result, Failure) - -# Test pure functions with fixtures -@pytest.fixture -def sample_data() -> list[int]: - return [1, 2, 3, 4, 5] - -def test_transformation_pipeline(sample_data: list[int]) -> None: - result = flow( - sample_data, - filter_by(lambda x: x % 2 == 0), - map_over(lambda x: x ** 2), - ) - assert result == [4, 16] -``` - -## Performance Considerations - -```python -from functools import cache, lru_cache -from typing import ParamSpec, TypeVar - -P = ParamSpec('P') -R = TypeVar('R') - -# Use @cache for pure functions (Python 3.9+) -@cache -def fibonacci(n: int) -> int: - if n < 2: - return n - return fibonacci(n - 1) + fibonacci(n - 2) - -# LRU cache with size limit -@lru_cache(maxsize=128) -def expensive_computation(x: float, y: float) -> float: - # Complex calculation - return complex_math(x, y) - -# Generator for memory efficiency -def process_large_file(path: str) -> Iterator[Result[ProcessedLine, Error]]: - with open(path) as f: - for line in f: - yield process_line(line) -``` - -## Principles - -Adhere to these principles to ensure robust, maintainable, and clear system design: - -### 1. Apply the IPO Pattern - -Break systems into discrete components that accept **Input**, perform **Processing**, and return **Output**. - -```python -# Model a web server as Input → Processing → Output -def handle_request(request: Request) -> IO[Response]: - return flow( - request, - validate_request, - bind(process_business_logic), - bind(format_response), - ) -``` - -### 2. All Models Are Wrong, But Some Are Useful - -Select models that balance accuracy and simplicity. Validate utility through real-world testing. - -### 3. Integrate People as Part of the System - -Design interfaces to align with user expectations: - -- Mirror established mental models -- Eliminate surprising behaviors -- Prioritize discoverability through consistent patterns - -### 4. Principle of Least Astonishment - -Ensure interfaces behave predictably: - -- Follow Python conventions (e.g., `-h` for help in CLIs) -- Avoid hidden side effects -- Document any deviations from expected behavior - -### 5. Fail Fast with Context - -```python -@dataclass(frozen=True) -class ValidationError: - code: str - field: str - message: str - -def validate_email(email: str) -> Result[str, ValidationError]: - if "@" not in email: - return Failure(ValidationError( - code="INVALID_EMAIL", - field="email", - message="Email must contain @" - )) - return Success(email) -``` - -### 6. YAGNI (You Aren't Gonna Need It) - -- Postpone features until required -- Delete unused code proactively -- Measure complexity-to-value ratio - -### 7. Explicit Dependency Declaration - -```python -# Dependencies as typed parameters -def process_order[T]( - order: Order, - payment_service: PaymentService, - inventory: InventoryService, - logger: Logger, -) -> IOResult[Receipt, ProcessError]: - """All dependencies explicitly declared""" +# FORBIDDEN - Use collections.abc instead +# from typing import Sequence, Mapping, List, Dict ``` -### 8. Type-Driven Design +### Function Parameters -```python -# Encode business rules in types -@dataclass(frozen=True) -class NonEmptyList[T]: - head: T - tail: list[T] - - @classmethod - def create(cls, items: list[T]) -> Maybe[NonEmptyList[T]]: - if not items: - return Nothing - return Some(cls(items[0], items[1:])) -``` +- **NEVER use `list[T]` in parameters** - Use `Sequence[T]` +- **NEVER use `dict[K, V]` in parameters** - Use `Mapping[K, V]` +- **ALWAYS use immutable parameter types** -### 9. Design by Contract +## Data Structures -```python -def transfer_funds( - from_account: Account, - to_account: Account, - amount: Decimal, -) -> Result[Transaction, TransferError]: - """ - Preconditions: - - amount > 0 - - from_account.balance >= amount - - from_account != to_account - - Postconditions: - - from_account.balance decreased by amount - - to_account.balance increased by amount - - Transaction record created - """ -``` +### Mandatory Patterns -### 10. Single Responsibility & High Cohesion +- **ALWAYS use `@dataclass(frozen=True, slots=True)`** for data classes +- **ALWAYS use `tuple` instead of `list`** for fixed sequences +- **ALWAYS use `frozenset` instead of `set`** for APIs +- **NEVER mutate objects in place** - Return new instances -```python -# Each module has one clear purpose -# user_repository.py - Only data access -class UserRepository: - def find_by_id(self, id: UserId) -> IOResult[User, NotFoundError]: ... - def save(self, user: User) -> IOResult[User, SaveError]: ... - -# user_validator.py - Only validation -class UserValidator: - def validate(self, data: UserData) -> Result[ValidatedUser, ValidationError]: ... -``` +### Error Handling -### 11. Observability by Design +- **ALWAYS use `Result[T, E]`** for operations that can fail +- **NEVER raise exceptions** for expected errors +- **ALWAYS use `Maybe[T]`** for nullable values +- **ALWAYS use pattern matching** on Result and Maybe types -```python -from returns.context import RequiresContext +## Function Design -type Deps = Logger | Metrics +### Mandatory Requirements -def process_with_telemetry[T]( - data: T, -) -> RequiresContext[IOResult[T, Error], Deps]: - """Operations with built-in observability""" -``` +- **Functions MUST be pure** - Same input always produces same output +- **Functions MUST NOT have side effects** - Use `IO[T]` for effects +- **Maximum 20 lines per function** +- **Maximum 4 parameters** - Use parameter objects if needed +- **ALWAYS use keyword-only arguments** for functions with 3+ parameters -### 12. Progressive Abstraction - -- Start with concrete implementations -- Introduce abstractions only to eliminate duplication -- Refactor when patterns stabilize - -### 13. Self-Documenting Code +### Composition ```python -# Precise naming -def calculate_compound_interest( - principal: Decimal, - annual_rate: Decimal, - years: int, - compounds_per_year: int = 12, -) -> Decimal: - """Names explain the computation""" +# REQUIRED pattern for operation chaining +result = flow( + input_data, + validate, + bind(transform), + bind(save), +) ``` -### 14. Principle of Least Power +## Testing Framework -- Prefer `map()` over manual loops -- Avoid generics until necessary -- Choose simple data structures +- **Primary: pytest** with type-checked fixtures +- **Property testing: hypothesis** for all pure functions +- **ALWAYS test error paths** explicitly +- **One assertion per test** +- **Descriptive test names**: `test_divide_by_zero_returns_failure` -### 15. Idempotency by Default - -```python -def ensure_user_exists(email: str) -> IOResult[User, Error]: - """Safe to call multiple times""" - return ( - find_user_by_email(email) - .alt(lambda _: create_user(email)) - ) -``` +## CLI Development -### 16. Resource Safety +### Typer Standards ```python -from contextlib import contextmanager -from returns.context import RequiresContextIOResult - -@contextmanager -def database_transaction(): - tx = start_transaction() - try: - yield tx - tx.commit() - except Exception: - tx.rollback() - raise +# REQUIRED pattern - no defaults in Option() +def command( + file: Annotated[Path, typer.Argument(help="Input file")], + output: Annotated[str, typer.Option("--output", "-o")] = "out.txt", +) -> None: ``` -### 17. Version Contracts Semantically - -```python -# API versioning -from typing import Literal - -API_VERSION: Final = "2.0.0" - -type ApiVersion = Literal["1.0", "1.1", "2.0"] - -def get_endpoint(version: ApiVersion) -> str: - return f"/api/v{version}/users" -``` +- **ALWAYS use `rich` for output** - Never plain `print()` +- **ALWAYS use `typer.Exit(code=n)`** instead of `sys.exit()` +- **ALWAYS validate inputs early** -### 18. Mechanical Sympathy +## Code Organization -- Profile before optimizing -- Use `__slots__` for memory efficiency -- Prefer cache-friendly data layouts +### File Structure -### 19. Anti-Corruption Layers +- **Pure functions**: No external dependencies in function bodies +- **Boundary functions**: Handle I/O and external integrations +- **Domain models**: Immutable dataclasses with business logic +- **Services**: Protocol-based interfaces for external systems -```python -# Shield domain from external APIs -@dataclass(frozen=True) -class ExternalUser: - user_id: str - full_name: str +### Module Dependencies -@dataclass(frozen=True) -class DomainUser: - id: UserId - name: Name +- **Domain modules MUST NOT import infrastructure** +- **Use dependency injection** for external services +- **ALWAYS define clear service boundaries** -def adapt_external_user(external: ExternalUser) -> Result[DomainUser, AdapterError]: - """Translate foreign data to internal types""" -``` +## Error Handling Patterns -### 20. Progressive Type Refinement +### Required Patterns ```python -# Parse → Validate → Use -def process_age(value: str) -> Result[AdultAge, ValidationError]: +# Configuration parsing +def load_config(path: Path) -> Result[Config, ConfigError]: return ( - parse_int(value) - .bind(validate_positive) - .bind(ensure_adult) + read_file(path) + .bind(parse_json) + .bind(validate_config) ) -@dataclass(frozen=True) -class AdultAge: - value: int - - def __post_init__(self): - if self.value < 18: - raise ValueError("Must be adult age") -``` - -## TypeIs - -```python -# TypeIs for type narrowing (PEP 742) -from typing import TypeIs - -def is_string_list(val: list[object]) -> TypeIs[list[str]]: - return all(isinstance(item, str) for item in val) - -# Use in type narrowing -def process(items: list[object]) -> str: - if is_string_list(items): - # items is narrowed to list[str] - return ', '.join(items) - return str(items) -``` - -## Code Style - -Adhere to the following programming philosophy for every code artifact you create or edit: - -### Core Principles - -- **Always DRY**: Extract shared logic and avoid duplication; every significant behavior should live in exactly one place -- **Always Optimize for Deletion**: Prefer simple, loosely-coupled structures that can be removed without cascading edits -- **Always Stateless by Default**: Treat state as a liability. Functions should carry all required data in parameters and return new data -- **Always Pure Functions Only**: Functions must be deterministic, side-effect-free, and directly testable in isolation -- **Always Swappable Services**: Build services that are identical, swappable, and trivially scalable with no memory between calls -- **Always Place State at the Edge**: Persisted data, caches, and external integrations belong in well-defined boundary layers -- **Always Design for Easy Rewrite**: Assume code might be replaced next week—keep components small, clear, and prediction-free - -### Python 3.13 Specific Guidelines - -- Always write code in a purely functional style when possible -- Always verify all code is referentially transparent -- Never use type assertions (`cast`) or `# type: ignore` -- Never use `Any` type; use proper generics or protocols -- Always define types using algebraic data types (sum and product types) -- Prefer `@dataclass(frozen=True)` over regular classes -- Use pattern matching for control flow over if/elif chains - -## Type System and Imports - -### Collections and Generics - -- **GOOD**: Use `Sequence[T]` from `collections.abc` for read-only lists -- **BAD**: Never use `list[T]` in function parameters -- **GOOD**: Use `Mapping[K, V]` from `collections.abc` for read-only dicts -- **BAD**: Never use `dict[K, V]` in function parameters -- **GOOD**: Use `frozenset[T]` for immutable sets -- **BAD**: Avoid mutable `set[T]` in APIs - -### Modern Generic Syntax (PEP 695) - -```python -# GOOD: Python 3.13+ syntax -class Container[T]: - value: T - -def transform[T, U](func: Callable[[T], U], value: T) -> U: - return func(value) - -type Pair[A, B] = tuple[A, B] - -# BAD: Old TypeVar syntax -T = TypeVar('T') # Only use for variance +# Pattern matching on results +match result: + case Success(value): + process(value) + case Failure(error): + handle_error(error) + case _: # ALWAYS include exhaustive case + pass ``` -### Result Types and Error Handling - -- **GOOD**: Use `Result[T, E]` from returns library -- **BAD**: Never raise exceptions for expected errors -- **GOOD**: Use `Maybe[T]` for nullable values -- **BAD**: Never use `Optional[T]` or raw `None` -- **GOOD**: Use `IO[T]` for side effects -- **BAD**: Never perform I/O in pure functions - -## CLI and Command Line Tools - -### Typer Best Practices +## Performance & Quality -```python -# GOOD: Use Annotated with defaults -def main( - name: Annotated[str, typer.Option()] = "World", - count: Annotated[int, typer.Option()] = 1, -): - pass - -# BAD: Never put defaults in Option() -def main( - name: str = typer.Option("World"), # Wrong! -): - pass -``` - -- Always use sub-apps for command groups -- Always provide help text for all commands and options -- Use `rich` for enhanced output, never plain `print()` -- Use `typer.Exit(code=n)` instead of `sys.exit()` -- Always validate inputs early in command functions +### Type Checking -## Function Design and Composition +- **ALWAYS use `basedpyright --pythonversion 3.13`** +- **Zero type errors tolerance** +- **Full type coverage required** -### Function Guidelines +### Code Quality -- Keep functions under 20 lines -- Single responsibility per function -- Use descriptive names: `calculate_tax_rate()` not `calc()` -- Parameters should be immutable types -- Return new values, never mutate inputs -- Limit parameters to 4 (use parameter objects if needed) -- Always use full type annotations +- **ALWAYS use `ruff` for linting** +- **ALWAYS use `ruff format` for formatting** +- **No magic numbers** - Use named constants +- **Meaningful variable names** - No abbreviations -### Keyword-Only Arguments +## Security Requirements -```python -# GOOD: Force named arguments for clarity -def create_user(*, name: str, email: str, age: int) -> User: - pass +- **NEVER commit secrets** to repository +- **ALWAYS validate inputs** at boundaries +- **Use environment variables** for configuration +- **ALWAYS use parameterized queries** for databases -# BAD: Positional arguments are ambiguous -def create_user(name: str, email: str, age: int) -> User: - pass -``` +## Forbidden Patterns -### Function Composition +### Type System ```python -from returns.pipeline import flow -from returns.pointfree import bind - -# GOOD: Compose small functions -result = flow( - data, - parse, - bind(validate), - bind(transform), -) - -# BAD: Monolithic functions -def process_everything(data): - # 100 lines of mixed concerns - pass -``` - -## Data Structures and Immutability +# FORBIDDEN +from typing import Optional, List, Dict, Any +def func(data: Any) -> Optional[List[str]]: + return cast(List[str], data) -### Immutable Data Classes - -```python -# GOOD: Frozen dataclass -@dataclass(frozen=True, slots=True) -class Point: - x: float - y: float - - def move(self, dx: float, dy: float) -> Point: - return Point(self.x + dx, self.y + dy) - -# BAD: Mutable class -class Point: - def __init__(self, x: float, y: float): - self.x = x - self.y = y +# REQUIRED +from collections.abc import Sequence +from returns.maybe import Maybe +def func[T](data: T) -> Maybe[Sequence[str]]: + # type-safe implementation ``` -### Immutable Collections - -- Use `tuple` instead of `list` for fixed sequences -- Use `frozenset` instead of `set` -- Use `MappingProxyType` for read-only dict views -- Use `dataclasses.replace()` to create modified copies -- Implement `__slots__` for memory efficiency - -### Algebraic Data Types +### State Management ```python -# Sum types with Union -type Shape = Circle | Rectangle | Triangle +# FORBIDDEN - mutable state +class Counter: + def __init__(self): + self.count = 0 + def increment(self): + self.count += 1 -# Product types with dataclasses +# REQUIRED - immutable state @dataclass(frozen=True) -class Circle: - radius: float - -# Pattern matching -match shape: - case Circle(radius=r): - return pi * r ** 2 - case Rectangle(width=w, height=h): - return w * h -``` - -## Testing and Validation - -### Testing Principles - -- Write tests first (TDD approach) -- One assertion per test -- Descriptive test names: `test_divide_by_zero_returns_failure` -- Test edge cases: empty, None, boundaries -- Use property-based testing with Hypothesis -- Mock only at boundaries (external services) -- Use fixtures for reusable setup -- Test error conditions explicitly -- Keep tests independent -- Use stubs for queries, mocks for commands - -### Property-Based Testing - -```python -from hypothesis import given, strategies as st - -@given(st.lists(st.integers())) -def test_sort_properties(items: list[int]) -> None: - sorted_items = sorted(items) - # Properties that should always hold - assert len(sorted_items) == len(items) - assert all(a <= b for a, b in zip(sorted_items, sorted_items[1:])) - assert set(sorted_items) == set(items) -``` - -## Performance and Optimization - -### Performance Guidelines - -- Measure before optimizing (use `cProfile`, `line_profiler`) -- Use generators for large datasets -- Cache with `@cache` or `@lru_cache` -- Use appropriate data structures: - - `set` for membership tests - - `deque` for queues - - `bisect` for sorted operations -- Batch I/O operations -- Use `__slots__` for many instances -- Compile regex once: `PATTERN = re.compile(...)` -- Use NumPy for numerical work - -### Memory-Efficient Patterns - -```python -# GOOD: Generator for streaming -def read_large_file(path: str) -> Iterator[str]: - with open(path) as f: - yield from f - -# BAD: Loading everything into memory -def read_large_file(path: str) -> list[str]: - with open(path) as f: - return f.readlines() +class Counter: + count: int = 0 + def increment(self) -> Counter: + return Counter(self.count + 1) ``` -## Async and Concurrency - -### Async Patterns +### Error Handling ```python -# GOOD: Async for I/O -async def fetch_data(url: str) -> Result[Data, Error]: - async with aiohttp.ClientSession() as session: - try: - async with session.get(url) as response: - data = await response.json() - return Success(Data(**data)) - except Exception as e: - return Failure(Error(str(e))) - -# Use asyncio.gather for parallel operations -results = await asyncio.gather( - fetch_data(url1), - fetch_data(url2), - fetch_data(url3), -) -``` - -- Prefer asyncio over threading for I/O -- Use semaphores to limit concurrent operations -- Use `asyncio.create_task` for fire-and-forget -- Test async code with `pytest-asyncio` -- Never call blocking functions in async code -- Use `asyncio.Queue` for task distribution - -## Documentation and Comments - -### Documentation Standards - -- Write Google/NumPy style docstrings for public APIs -- Document why, not what -- Keep docs in sync with code -- Use type hints as documentation -- Document raised exceptions -- Provide doctest examples -- Link to references (papers, algorithms) -- Document assumptions and preconditions -- Use meaningful variable names -- Keep README current - -### Example Docstring - -```python -def calculate_discount( - price: Decimal, - discount_percent: Decimal, -) -> Result[Decimal, ValueError]: - """Calculate discounted price. - - Args: - price: Original price (must be positive) - discount_percent: Discount percentage (0-100) - - Returns: - Result containing discounted price or ValueError - - Examples: - >>> calculate_discount(Decimal("100"), Decimal("10")) - Success(Decimal("90")) - - Note: - Uses banker's rounding for currency calculations. - """ -``` +# FORBIDDEN +def parse_int(value: str) -> int: + return int(value) # Can raise ValueError -## Security and Safety - -- Validate all inputs with whitelisting -- Use parameterized queries (never concatenate SQL) -- Hash passwords with argon2 or bcrypt -- Use environment variables for secrets -- Implement rate limiting -- Log security events with structured logging -- Use HTTPS everywhere -- Validate file uploads (type, size, content) -- Implement CSRF protection -- Keep dependencies updated - -## Database and Persistence - -- Use Alembic for migrations -- Write idempotent migrations -- Use transactions for consistency -- Implement retry logic with backoff -- Use connection pooling -- Index foreign keys and common queries -- Use EXPLAIN ANALYZE for query optimization -- Implement soft deletes for audit trails -- Use read replicas for scaling -- Automate backups - -## API Design - -- Use strict consistent REST conventions -- Version APIs: `/api/v1/`, `/api/v2/` -- Use proper HTTP methods and status codes -- Implement pagination with cursors -- Use content negotiation (Accept headers) -- Implement rate limiting -- Document with OpenAPI/Swagger -- Use HATEOAS principles -- Implement idempotency keys -- Return consistent error formats - -## Tooling and Development Environment - -### Type Checking - -```bash -basedpyright --pythonversion 3.13 +# REQUIRED +def parse_int(value: str) -> Result[int, ValueError]: + try: + return Success(int(value)) + except ValueError as e: + return Failure(e) ``` -### Development Workflow - -1. Write type stubs first -2. Implement with TDD -3. Use property-based testing -4. Profile if needed -5. Document public APIs +## Library Preferences -## Migration Guide +### Required Libraries -### From Imperative to Functional - -```python -# BAD: Imperative style -def process_users(users): - result = [] - for user in users: - if user.active: - user.score = calculate_score(user) - result.append(user) - return result - -# GOOD: Functional style -def process_users(users: Sequence[User]) -> list[User]: - return flow( - users, - filter_by(lambda u: u.active), - map_over(add_score), - ) - -def add_score(user: User) -> User: - return dataclasses.replace( - user, - score=calculate_score(user) - ) -``` - -### From Exceptions to Results - -```python -# BAD: Exception-based -def get_user(user_id: int) -> User: - if not is_valid_id(user_id): - raise ValueError("Invalid ID") - user = db.find_user(user_id) - if not user: - raise NotFoundError("User not found") - return user - -# GOOD: Result-based -def get_user(user_id: int) -> IOResult[User, GetUserError]: - return ( - validate_user_id(user_id) - .bind(lambda vid: find_user_in_db(vid)) - ) -``` +- **returns**: Functional programming primitives +- **typer**: CLI development +- **rich**: Terminal output +- **pytest**: Testing framework +- **hypothesis**: Property-based testing -## Common Patterns +### Forbidden Libraries -### Railway-Oriented Programming +- **Click**: Use Typer instead +- **argparse**: Use Typer instead +- **requests**: Use httpx instead (async by default) -```python -from returns.pipeline import flow -from returns.pointfree import bind +## Git & Version Control -def process_order(order_data: dict) -> IOResult[Order, ProcessError]: - return flow( - order_data, - validate_order_data, - bind(check_inventory), - bind(calculate_pricing), - bind(reserve_items), - bind(charge_payment), - bind(create_order_record), - ) -``` +- **Use conventional commits**: `feat:`, `fix:`, `refactor:`, etc. +- **Never commit directly to main** - Use feature branches +- **ALWAYS rebase** instead of merge commits +- **Squash commits** before merging to main -### Dependency Injection with Context +## Documentation -```python -from returns.context import RequiresContext +- **Google-style docstrings** for public APIs +- **Type hints ARE documentation** - Use them comprehensively +- **Document business logic rationale** - Not implementation details +- **Keep README.md updated** with setup and usage -type AppContext = Database | Logger | Config +## File Naming & Organization -def get_user_by_email( - email: str -) -> RequiresContext[IOResult[User, Error], AppContext]: - def _inner(ctx: AppContext) -> IOResult[User, Error]: - return ctx.database.find_user(email=email) - return RequiresContext(_inner) -``` +- **Snake_case for files**: `user_service.py` +- **Clear module purposes**: One responsibility per module +- **Group related functionality**: Keep cohesive functions together +- **Separate concerns**: Domain, infrastructure, presentation layers -### Option/Maybe Pattern +## Development Workflow -```python -from returns.maybe import Maybe, Some, Nothing +1. **Write types first** - Define interfaces before implementation +2. **Write tests next** - TDD approach required +3. **Implement with pure functions** - No side effects in business logic +4. **Compose at boundaries** - Assemble pure functions into workflows +5. **Type check continuously** - Fix type errors immediately -def find_first[T]( - predicate: Callable[[T], bool], - items: Sequence[T], -) -> Maybe[T]: - for item in items: - if predicate(item): - return Some(item) - return Nothing -``` +This workspace emphasizes functional programming principles, type safety, and clear separation of concerns. Every code change should advance these goals while maintaining zero technical debt. -## Best Practices Summary - -1. **Start Pure**: Write pure functions by default, add effects explicitly -2. **Type Everything**: Full type coverage with no `Any` or `cast` -3. **Fail Fast**: Validate at boundaries, return structured errors -4. **Compose Small Functions**: Each function does one thing well -5. **Immutable Data**: Use frozen dataclasses and immutable collections -6. **Track Effects**: Use IO, Result, Maybe to make effects visible -7. **Test Properties**: Use Hypothesis for property-based testing -8. **Document Contracts**: Clear preconditions and postconditions -9. **Version APIs**: Semantic versioning with clear deprecation -10. **Profile When Needed**: Measure before optimizing - -## Recommended Libraries - -- **returns**: Functional programming primitives (Result, Maybe, IO) -- **pyrsistent**: Persistent/immutable data structures -- **toolz**: Functional utilities and function composition -- **more-itertools**: Additional iteration utilities -- **hypothesis**: Property-based testing -- **attrs**: Alternative to dataclasses with validators -- **cattrs**: Serialization/deserialization for structured data -- **rich**: Enhanced terminal output for CLIs -- **typer**: Modern CLI framework -- **polars**: Functional-style DataFrame operations +## [!REMINDER]: Use Perplexity in all tasks -## Final Notes +Always begin each response with atleast 1 call to perplexity. -This guide represents best practices for functional programming in Python 3.13. The ecosystem continues to evolve, so: +Ask perplexity several questions to PRIOR TO BEGINNING each task the user gives you. -- Keep dependencies updated -- Follow PEPs for new type system features -- Contribute to typing discussions -- Share patterns that work well +> Note: Send as many questions as needed to perplexity. Make sure each questions is precise and is IN ITS OWN TOOL CALL. Do not ask multiple questions in the same tool call. Make sure to provide perplexity all the context it needs. +> Note: If you come across any other concerns or problems while working on the user's code, just ask perplexity more questions as needed! Frequently consult with perplexity to get the best possible information. +> Note: Send a batch of countless calls to perplexity to get many answers in parallel to all of your questions. +> Note: Send questions about different frameworks separately from questions about other frameworks. Questions must be about the individual frameworks rather than the bridging between the frameworks (For example: Elysia has gained some popularity. However, no one really uses Effect with Elysia yet. Be epistemically humble and ask all questions.) -Remember: **Make invalid states unrepresentable, make effects explicit, and keep functions pure.** +While you must begin each response with atleast 1 call to perplexity, you may skip calls to perplexity if you have already asked all the questions you need to or if your previous interaction with perplexity was recent enough. diff --git a/pyproject.toml b/pyproject.toml index e9e4fe0..fc49422 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,8 @@ dependencies = [ "toml>=0.10.2", "returns>=0.25.0", "dspy>=3.0.0b1", + "networkx>=3.4.2", + "grep-ast>=0.9.0", ] [project.urls] @@ -73,10 +75,10 @@ exclude = [ "src/experimental", "src/typestubs", ] -stubPath = "typings" reportUnknownMemberType = false reportUnknownVariableType = false reportUnknownArgumentType = false +reportMissingTypeStubs = false [tool.basedpyright.defineConstant] DEBUG = true diff --git a/uv.lock b/uv.lock index c982d85..8abd214 100644 --- a/uv.lock +++ b/uv.lock @@ -498,7 +498,7 @@ name = "exceptiongroup" version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749 } wheels = [ @@ -828,6 +828,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5c/4f/aab73ecaa6b3086a4c89863d94cf26fa84cbff63f52ce9bc4342b3087a06/greenlet-3.2.3-cp314-cp314-win_amd64.whl", hash = "sha256:8c47aae8fbbfcf82cc13327ae802ba13c9c36753b67e760023fd116bc124a62a", size = 301236 }, ] +[[package]] +name = "grep-ast" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pathspec" }, + { name = "tree-sitter-language-pack" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/67/82/a87079945a7c15d242cb586ae22e17952132439eaa9c878ec5fbdc61c54d/grep_ast-0.9.0.tar.gz", hash = "sha256:620a242a4493e6721338d1c9a6c234ae651f8774f4924a6dcf90f6865d4b2ee3", size = 14125 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/79/29f1373b2ce1eec37c03aefbc17194c2470d8b61ede288e5043231825999/grep_ast-0.9.0-py3-none-any.whl", hash = "sha256:a3973dca99f1abc026a01bbbc70e00a63860c8ff94a56182ff18b089836826d7", size = 13918 }, +] + [[package]] name = "gunicorn" version = "23.0.0" @@ -1503,6 +1516,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/44/d8/45e8fc9892a7386d074941429e033adb4640e59ff0780d96a8cf46fe788e/multidict-6.5.0-py3-none-any.whl", hash = "sha256:5634b35f225977605385f56153bd95a7133faffc0ffe12ad26e10517537e8dfc", size = 12181 }, ] +[[package]] +name = "networkx" +version = "3.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fd/1d/06475e1cd5264c0b870ea2cc6fdb3e37177c1e565c43f56ff17a10e3937f/networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1", size = 2151368 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263 }, +] + [[package]] name = "nodejs-wheel-binaries" version = "22.16.0" @@ -1715,6 +1737,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/c2/646d2e93e0af70f4e5359d870a63584dacbc324b54d73e6b3267920ff117/pandas-2.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bb3be958022198531eb7ec2008cfc78c5b1eed51af8600c6c5d9160d89d8d249", size = 13231847 }, ] +[[package]] +name = "pathspec" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 }, +] + [[package]] name = "pillow" version = "11.2.1" @@ -2332,7 +2363,9 @@ dependencies = [ { name = "dspy" }, { name = "dspy-ai" }, { name = "flake8" }, + { name = "grep-ast" }, { name = "mlflow" }, + { name = "networkx" }, { name = "pyflakes" }, { name = "returns" }, { name = "rich" }, @@ -2353,7 +2386,9 @@ requires-dist = [ { name = "dspy", git = "https://github.com/stanfordnlp/dspy.git" }, { name = "dspy-ai", specifier = "==2.6.19" }, { name = "flake8", specifier = ">=7.2.0" }, + { name = "grep-ast", specifier = ">=0.9.0" }, { name = "mlflow", specifier = ">=3.1.0" }, + { name = "networkx", specifier = ">=3.4.2" }, { name = "pyflakes", specifier = ">=3.3.2" }, { name = "returns", specifier = ">=0.25.0" }, { name = "rich", specifier = ">=13.7.1" }, @@ -2840,6 +2875,105 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 }, ] +[[package]] +name = "tree-sitter" +version = "0.24.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/a2/698b9d31d08ad5558f8bfbfe3a0781bd4b1f284e89bde3ad18e05101a892/tree-sitter-0.24.0.tar.gz", hash = "sha256:abd95af65ca2f4f7eca356343391ed669e764f37748b5352946f00f7fc78e734", size = 168304 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/9a/bd627a02e41671af73222316e1fcf87772c7804dc2fba99405275eb1f3eb/tree_sitter-0.24.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f3f00feff1fc47a8e4863561b8da8f5e023d382dd31ed3e43cd11d4cae445445", size = 140890 }, + { url = "https://files.pythonhosted.org/packages/5b/9b/b1ccfb187f8be78e2116176a091a2f2abfd043a06d78f80c97c97f315b37/tree_sitter-0.24.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f9691be48d98c49ef8f498460278884c666b44129222ed6217477dffad5d4831", size = 134413 }, + { url = "https://files.pythonhosted.org/packages/01/39/e25b0042a049eb27e991133a7aa7c49bb8e49a8a7b44ca34e7e6353ba7ac/tree_sitter-0.24.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:098a81df9f89cf254d92c1cd0660a838593f85d7505b28249216661d87adde4a", size = 560427 }, + { url = "https://files.pythonhosted.org/packages/1c/59/4d132f1388da5242151b90acf32cc56af779bfba063923699ab28b276b62/tree_sitter-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b26bf9e958da6eb7e74a081aab9d9c7d05f9baeaa830dbb67481898fd16f1f5", size = 574327 }, + { url = "https://files.pythonhosted.org/packages/ec/97/3914e45ab9e0ff0f157e493caa91791372508488b97ff0961a0640a37d25/tree_sitter-0.24.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2a84ff87a2f2a008867a1064aba510ab3bd608e3e0cd6e8fef0379efee266c73", size = 577171 }, + { url = "https://files.pythonhosted.org/packages/c5/b0/266a529c3eef171137b73cde8ad7aa282734354609a8b2f5564428e8f12d/tree_sitter-0.24.0-cp310-cp310-win_amd64.whl", hash = "sha256:c012e4c345c57a95d92ab5a890c637aaa51ab3b7ff25ed7069834b1087361c95", size = 120260 }, + { url = "https://files.pythonhosted.org/packages/c1/c3/07bfaa345e0037ff75d98b7a643cf940146e4092a1fd54eed0359836be03/tree_sitter-0.24.0-cp310-cp310-win_arm64.whl", hash = "sha256:033506c1bc2ba7bd559b23a6bdbeaf1127cee3c68a094b82396718596dfe98bc", size = 108416 }, + { url = "https://files.pythonhosted.org/packages/66/08/82aaf7cbea7286ee2a0b43e9b75cb93ac6ac132991b7d3c26ebe5e5235a3/tree_sitter-0.24.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:de0fb7c18c6068cacff46250c0a0473e8fc74d673e3e86555f131c2c1346fb13", size = 140733 }, + { url = "https://files.pythonhosted.org/packages/8c/bd/1a84574911c40734d80327495e6e218e8f17ef318dd62bb66b55c1e969f5/tree_sitter-0.24.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a7c9c89666dea2ce2b2bf98e75f429d2876c569fab966afefdcd71974c6d8538", size = 134243 }, + { url = "https://files.pythonhosted.org/packages/46/c1/c2037af2c44996d7bde84eb1c9e42308cc84b547dd6da7f8a8bea33007e1/tree_sitter-0.24.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ddb113e6b8b3e3b199695b1492a47d87d06c538e63050823d90ef13cac585fd", size = 562030 }, + { url = "https://files.pythonhosted.org/packages/4c/aa/2fb4d81886df958e6ec7e370895f7106d46d0bbdcc531768326124dc8972/tree_sitter-0.24.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01ea01a7003b88b92f7f875da6ba9d5d741e0c84bb1bd92c503c0eecd0ee6409", size = 575585 }, + { url = "https://files.pythonhosted.org/packages/e3/3c/5f997ce34c0d1b744e0f0c0757113bdfc173a2e3dadda92c751685cfcbd1/tree_sitter-0.24.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:464fa5b2cac63608915a9de8a6efd67a4da1929e603ea86abaeae2cb1fe89921", size = 578203 }, + { url = "https://files.pythonhosted.org/packages/d5/1f/f2bc7fa7c3081653ea4f2639e06ff0af4616c47105dbcc0746137da7620d/tree_sitter-0.24.0-cp311-cp311-win_amd64.whl", hash = "sha256:3b1f3cbd9700e1fba0be2e7d801527e37c49fc02dc140714669144ef6ab58dce", size = 120147 }, + { url = "https://files.pythonhosted.org/packages/c0/4c/9add771772c4d72a328e656367ca948e389432548696a3819b69cdd6f41e/tree_sitter-0.24.0-cp311-cp311-win_arm64.whl", hash = "sha256:f3f08a2ca9f600b3758792ba2406971665ffbad810847398d180c48cee174ee2", size = 108302 }, + { url = "https://files.pythonhosted.org/packages/e9/57/3a590f287b5aa60c07d5545953912be3d252481bf5e178f750db75572bff/tree_sitter-0.24.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:14beeff5f11e223c37be7d5d119819880601a80d0399abe8c738ae2288804afc", size = 140788 }, + { url = "https://files.pythonhosted.org/packages/61/0b/fc289e0cba7dbe77c6655a4dd949cd23c663fd62a8b4d8f02f97e28d7fe5/tree_sitter-0.24.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:26a5b130f70d5925d67b47db314da209063664585a2fd36fa69e0717738efaf4", size = 133945 }, + { url = "https://files.pythonhosted.org/packages/86/d7/80767238308a137e0b5b5c947aa243e3c1e3e430e6d0d5ae94b9a9ffd1a2/tree_sitter-0.24.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fc5c3c26d83c9d0ecb4fc4304fba35f034b7761d35286b936c1db1217558b4e", size = 564819 }, + { url = "https://files.pythonhosted.org/packages/bf/b3/6c5574f4b937b836601f5fb556b24804b0a6341f2eb42f40c0e6464339f4/tree_sitter-0.24.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:772e1bd8c0931c866b848d0369b32218ac97c24b04790ec4b0e409901945dd8e", size = 579303 }, + { url = "https://files.pythonhosted.org/packages/0a/f4/bd0ddf9abe242ea67cca18a64810f8af230fc1ea74b28bb702e838ccd874/tree_sitter-0.24.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:24a8dd03b0d6b8812425f3b84d2f4763322684e38baf74e5bb766128b5633dc7", size = 581054 }, + { url = "https://files.pythonhosted.org/packages/8c/1c/ff23fa4931b6ef1bbeac461b904ca7e49eaec7e7e5398584e3eef836ec96/tree_sitter-0.24.0-cp312-cp312-win_amd64.whl", hash = "sha256:f9e8b1605ab60ed43803100f067eed71b0b0e6c1fb9860a262727dbfbbb74751", size = 120221 }, + { url = "https://files.pythonhosted.org/packages/b2/2a/9979c626f303177b7612a802237d0533155bf1e425ff6f73cc40f25453e2/tree_sitter-0.24.0-cp312-cp312-win_arm64.whl", hash = "sha256:f733a83d8355fc95561582b66bbea92ffd365c5d7a665bc9ebd25e049c2b2abb", size = 108234 }, + { url = "https://files.pythonhosted.org/packages/61/cd/2348339c85803330ce38cee1c6cbbfa78a656b34ff58606ebaf5c9e83bd0/tree_sitter-0.24.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0d4a6416ed421c4210f0ca405a4834d5ccfbb8ad6692d4d74f7773ef68f92071", size = 140781 }, + { url = "https://files.pythonhosted.org/packages/8b/a3/1ea9d8b64e8dcfcc0051028a9c84a630301290995cd6e947bf88267ef7b1/tree_sitter-0.24.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e0992d483677e71d5c5d37f30dfb2e3afec2f932a9c53eec4fca13869b788c6c", size = 133928 }, + { url = "https://files.pythonhosted.org/packages/fe/ae/55c1055609c9428a4aedf4b164400ab9adb0b1bf1538b51f4b3748a6c983/tree_sitter-0.24.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57277a12fbcefb1c8b206186068d456c600dbfbc3fd6c76968ee22614c5cd5ad", size = 564497 }, + { url = "https://files.pythonhosted.org/packages/ce/d0/f2ffcd04882c5aa28d205a787353130cbf84b2b8a977fd211bdc3b399ae3/tree_sitter-0.24.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d25fa22766d63f73716c6fec1a31ee5cf904aa429484256bd5fdf5259051ed74", size = 578917 }, + { url = "https://files.pythonhosted.org/packages/af/82/aebe78ea23a2b3a79324993d4915f3093ad1af43d7c2208ee90be9273273/tree_sitter-0.24.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7d5d9537507e1c8c5fa9935b34f320bfec4114d675e028f3ad94f11cf9db37b9", size = 581148 }, + { url = "https://files.pythonhosted.org/packages/a1/b4/6b0291a590c2b0417cfdb64ccb8ea242f270a46ed429c641fbc2bfab77e0/tree_sitter-0.24.0-cp313-cp313-win_amd64.whl", hash = "sha256:f58bb4956917715ec4d5a28681829a8dad5c342cafd4aea269f9132a83ca9b34", size = 120207 }, + { url = "https://files.pythonhosted.org/packages/a8/18/542fd844b75272630229c9939b03f7db232c71a9d82aadc59c596319ea6a/tree_sitter-0.24.0-cp313-cp313-win_arm64.whl", hash = "sha256:23641bd25dcd4bb0b6fa91b8fb3f46cc9f1c9f475efe4d536d3f1f688d1b84c8", size = 108232 }, +] + +[[package]] +name = "tree-sitter-c-sharp" +version = "0.23.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/22/85/a61c782afbb706a47d990eaee6977e7c2bd013771c5bf5c81c617684f286/tree_sitter_c_sharp-0.23.1.tar.gz", hash = "sha256:322e2cfd3a547a840375276b2aea3335fa6458aeac082f6c60fec3f745c967eb", size = 1317728 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/04/f6c2df4c53a588ccd88d50851155945cff8cd887bd70c175e00aaade7edf/tree_sitter_c_sharp-0.23.1-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2b612a6e5bd17bb7fa2aab4bb6fc1fba45c94f09cb034ab332e45603b86e32fd", size = 372235 }, + { url = "https://files.pythonhosted.org/packages/99/10/1aa9486f1e28fc22810fa92cbdc54e1051e7f5536a5e5b5e9695f609b31e/tree_sitter_c_sharp-0.23.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1a8b98f62bc53efcd4d971151950c9b9cd5cbe3bacdb0cd69fdccac63350d83e", size = 419046 }, + { url = "https://files.pythonhosted.org/packages/0f/21/13df29f8fcb9ba9f209b7b413a4764b673dfd58989a0dd67e9c7e19e9c2e/tree_sitter_c_sharp-0.23.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:986e93d845a438ec3c4416401aa98e6a6f6631d644bbbc2e43fcb915c51d255d", size = 415999 }, + { url = "https://files.pythonhosted.org/packages/ca/72/fc6846795bcdae2f8aa94cc8b1d1af33d634e08be63e294ff0d6794b1efc/tree_sitter_c_sharp-0.23.1-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8024e466b2f5611c6dc90321f232d8584893c7fb88b75e4a831992f877616d2", size = 402830 }, + { url = "https://files.pythonhosted.org/packages/fe/3a/b6028c5890ce6653807d5fa88c72232c027c6ceb480dbeb3b186d60e5971/tree_sitter_c_sharp-0.23.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7f9bf876866835492281d336b9e1f9626ab668737f74e914c31d285261507da7", size = 397880 }, + { url = "https://files.pythonhosted.org/packages/47/d2/4facaa34b40f8104d8751746d0e1cd2ddf0beb9f1404b736b97f372bd1f3/tree_sitter_c_sharp-0.23.1-cp39-abi3-win_amd64.whl", hash = "sha256:ae9a9e859e8f44e2b07578d44f9a220d3fa25b688966708af6aa55d42abeebb3", size = 377562 }, + { url = "https://files.pythonhosted.org/packages/d8/88/3cf6bd9959d94d1fec1e6a9c530c5f08ff4115a474f62aedb5fedb0f7241/tree_sitter_c_sharp-0.23.1-cp39-abi3-win_arm64.whl", hash = "sha256:c81548347a93347be4f48cb63ec7d60ef4b0efa91313330e69641e49aa5a08c5", size = 375157 }, +] + +[[package]] +name = "tree-sitter-embedded-template" +version = "0.23.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/28/d6/5a58ea2f0480f5ed188b733114a8c275532a2fd1568b3898793b13d28af5/tree_sitter_embedded_template-0.23.2.tar.gz", hash = "sha256:7b24dcf2e92497f54323e617564d36866230a8bfb719dbb7b45b461510dcddaa", size = 8471 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/c1/be0c48ed9609b720e74ade86f24ea086e353fe9c7405ee9630c3d52d09a2/tree_sitter_embedded_template-0.23.2-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:a505c2d2494464029d79db541cab52f6da5fb326bf3d355e69bf98b84eb89ae0", size = 9554 }, + { url = "https://files.pythonhosted.org/packages/6d/a5/7c12f5d302525ee36d1eafc28a68e4454da5bad208436d547326bee4ed76/tree_sitter_embedded_template-0.23.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:28028b93b42cc3753261ae7ce066675d407f59de512417524f9c3ab7792b1d37", size = 10051 }, + { url = "https://files.pythonhosted.org/packages/cd/87/95aaba8b64b849200bd7d4ae510cc394ecaef46a031499cbff301766970d/tree_sitter_embedded_template-0.23.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec399d59ce93ffb60759a2d96053eed529f3c3f6a27128f261710d0d0de60e10", size = 17532 }, + { url = "https://files.pythonhosted.org/packages/13/f8/8c837b898f00b35f9f3f76a4abc525e80866a69343083c9ff329e17ecb03/tree_sitter_embedded_template-0.23.2-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcfa01f62b88d50dbcb736cc23baec8ddbfe08daacfdc613eee8c04ab65efd09", size = 17394 }, + { url = "https://files.pythonhosted.org/packages/89/9b/893adf9e465d2d7f14870871bf2f3b30045e5ac417cb596f667a72eda493/tree_sitter_embedded_template-0.23.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6debd24791466f887109a433c31aa4a5deeba2b217817521c745a4e748a944ed", size = 16439 }, + { url = "https://files.pythonhosted.org/packages/40/96/e79934572723673db9f867000500c6eea61a37705e02c7aee9ee031bbb6f/tree_sitter_embedded_template-0.23.2-cp39-abi3-win_amd64.whl", hash = "sha256:158fecb38be5b15db0190ef7238e5248f24bf32ae3cab93bc1197e293a5641eb", size = 12572 }, + { url = "https://files.pythonhosted.org/packages/63/06/27f678b9874e4e2e39ddc6f5cce3374c8c60e6046ea8588a491ab6fc9fcb/tree_sitter_embedded_template-0.23.2-cp39-abi3-win_arm64.whl", hash = "sha256:9f1f3b79fe273f3d15a5b64c85fc6ebfb48decfbe8542accd05f5b7694860df0", size = 11232 }, +] + +[[package]] +name = "tree-sitter-language-pack" +version = "0.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tree-sitter" }, + { name = "tree-sitter-c-sharp" }, + { name = "tree-sitter-embedded-template" }, + { name = "tree-sitter-yaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/93/b7/1272925d5cccd0c7a79df85fdc1a728a9cd9536adca10c473a86ea6a1022/tree_sitter_language_pack-0.8.0.tar.gz", hash = "sha256:49aafe322eb59ef4d4457577210fb20c18c5535b1a42b8e753aa699ed3bf9eed", size = 43693098 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2e/44/f7d3c4c5e075de1b3ad9e7d006f2057d65d39d5a573d6ee72b1a7f3f6cd1/tree_sitter_language_pack-0.8.0-cp39-abi3-macosx_10_13_universal2.whl", hash = "sha256:7ab5dd0e4383bd0c845c153f65da62df035591fc79759a5f6efd5b27aaa551c5", size = 28609869 }, + { url = "https://files.pythonhosted.org/packages/bf/24/86f32fae7eaaf829cfd0013f8173fb0f3e75f6e0a8bc58bd165c821e17de/tree_sitter_language_pack-0.8.0-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:1757c04af8350ffdfd5509951fb7874dc1947604d6d9f16a2f88a0cd4fcc54cb", size = 17871704 }, + { url = "https://files.pythonhosted.org/packages/00/7d/9356ecb8d5fcc16e39154821226d0dc3662393b9f46326f539e3e71dc384/tree_sitter_language_pack-0.8.0-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:81aac45ddde6c7e9ac222d0157af03648b1382d4de3af321d1b913af96b796f0", size = 17729371 }, + { url = "https://files.pythonhosted.org/packages/19/49/cfe141b0be9e08aeb9e20f3a182e58b7af12a28f46949403005e5483afc6/tree_sitter_language_pack-0.8.0-cp39-abi3-win_amd64.whl", hash = "sha256:e870a3cc067352b249393e887710dae4918c6454f7fd41e43108f3621a5f41f8", size = 14552212 }, +] + +[[package]] +name = "tree-sitter-yaml" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/d0/97899f366e3d982ad92dd83faa2b1dd0060e5db99990e0d7f660902493f8/tree_sitter_yaml-0.7.1.tar.gz", hash = "sha256:2cea5f8d4ca4d10439bd7d9e458c61b330cb33cf7a92e4ef1d428e10e1ab7e2c", size = 91533 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/7e/83a40de4315b8f9975d3fd562071bda8fa1dfc088b3359d048003f174fd0/tree_sitter_yaml-0.7.1-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:0256632914d6eb21819f21a85bab649505496ac01fac940eb08a410669346822", size = 43788 }, + { url = "https://files.pythonhosted.org/packages/ca/05/760b38e31f9ca1e8667cf82a07119956dcb865728f7d777a22f5ddf296c6/tree_sitter_yaml-0.7.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:bf9dd2649392e1f28a20f920f49acd9398cfb872876e338aa84562f8f868dc4d", size = 45001 }, + { url = "https://files.pythonhosted.org/packages/88/e9/6d8d502eeb96fb363c1ac926ac456afc55019836fc675263fd23754dfdc6/tree_sitter_yaml-0.7.1-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94eb8fcb1ac8e43f7da47e63880b6f283524460153f08420a167c1721e42b08a", size = 93852 }, + { url = "https://files.pythonhosted.org/packages/85/ef/b84bc6aaaa08022b4cc1d36212e837ce051306d50dd62993ffc21c9bf4ab/tree_sitter_yaml-0.7.1-cp310-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30410089828ebdece9abf3aa16b2e172b84cf2fd90a2b7d8022f6ed8cde90ecb", size = 92125 }, + { url = "https://files.pythonhosted.org/packages/16/0c/5caa26da012c93da1eadf66c6babb1b1e2e8dd4434668c7232739df87e46/tree_sitter_yaml-0.7.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:219af34f4b35b5c16f25426cc3f90cf725fbba17c9592f78504086e67787be09", size = 90443 }, + { url = "https://files.pythonhosted.org/packages/92/25/a14297ea2a575bc3c19fcf58a5983a926ad732c32af23a346d7fa0563d8d/tree_sitter_yaml-0.7.1-cp310-abi3-win_amd64.whl", hash = "sha256:550645223d68b7d6b4cfedf4972754724e64d369ec321fa33f57d3ca54cafc7c", size = 45517 }, + { url = "https://files.pythonhosted.org/packages/62/fa/b25e688df5b4e024bc3627bc3f951524ef9c8b0756f0646411efa5063a10/tree_sitter_yaml-0.7.1-cp310-abi3-win_arm64.whl", hash = "sha256:298ade69ad61f76bb3e50ced809650ec30521a51aa2708166b176419ccb0a6ba", size = 43801 }, +] + [[package]] name = "typer" version = "0.16.0" From d96fc5f04cc7a10d7a3082889a8eae2bf738df37 Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Wed, 25 Jun 2025 02:24:14 -0500 Subject: [PATCH 15/26] docs: update README and add modern Python patterns document --- README.md | 38 +- docs/modern_python_patterns.md | 653 +++++++++++++++++++++++++++++++++ 2 files changed, 673 insertions(+), 18 deletions(-) create mode 100644 docs/modern_python_patterns.md diff --git a/README.md b/README.md index e30f1b4..dedfbf6 100644 --- a/README.md +++ b/README.md @@ -27,33 +27,35 @@ Follow these steps to get Robofactor set up on your local machine. - Python 3.12 or higher - [uv](https://github.com/astral-sh/uv) package manager. If you don't have it, you can install it via pip: + ```bash pip install uv ``` ### Step-by-Step Guide -1. **Clone the repository:** +1. **Clone the repository:** + + ```bash + git clone https://github.com/ethan-wickstrom/robofactor.git + cd robofactor + ``` - ```bash - git clone https://github.com/ethan-wickstrom/robofactor.git - cd robofactor - ``` +2. **Install dependencies:** -2. **Install dependencies:** + This project uses `uv` to manage dependencies and virtual environments. - This project uses `uv` to manage dependencies and virtual environments. + - For a **standard installation** (to use the tool): - - For a **standard installation** (to use the tool): + ```bash + uv sync --no-dev + ``` - ```bash - uv sync --no-dev - ``` + - For a **development installation** (includes testing and linting tools): - - For a **development installation** (includes testing and linting tools): - ```bash - uv sync --all-groups - ``` + ```bash + uv sync --all-groups + ``` ## 🚀 Usage @@ -129,9 +131,9 @@ The AI's suggested refactoring is never trusted blindly. Before any changes are - **Module:** `src/robofactor/evaluation.py` - **Process:** The evaluation consists of several automated checks: - 1. **Syntax Check**: The refactored code is parsed again to ensure it is valid Python syntax. - 2. **Code Quality Analysis**: The code is linted using `flake8` to check for style guide violations, logical errors, and code smells. - 3. **Functional Correctness**: The original function's test cases are executed against the refactored code in a sandboxed environment. This critical step verifies that the refactoring did not alter the function's behavior or introduce regressions. + 1. **Syntax Check**: The refactored code is parsed again to ensure it is valid Python syntax. + 2. **Code Quality Analysis**: The code is linted using `flake8` to check for style guide violations, logical errors, and code smells. + 3. **Functional Correctness**: The original function's test cases are executed against the refactored code in a sandboxed environment. This critical step verifies that the refactoring did not alter the function's behavior or introduce regressions. Only if the refactored code passes all three checks is the process considered a success. diff --git a/docs/modern_python_patterns.md b/docs/modern_python_patterns.md new file mode 100644 index 0000000..9917f99 --- /dev/null +++ b/docs/modern_python_patterns.md @@ -0,0 +1,653 @@ +# Using Modern Python 3.13 Type System Patterns + +This document presents three fundamental patterns for leveraging Python 3.13's enhanced type system: type aliases using PEP 695 syntax, the Maybe monad pattern for nullable values, and TypeGuard functions for safe type narrowing. These patterns enable developers to write more expressive, type-safe code while avoiding unsafe practices such as type casting and runtime type assertions. + +--- + +## 1. Type Aliases with PEP 695 Syntax + +### 1.1 Functional Description + +Type aliases in Python 3.13 provide a mechanism for creating semantic type synonyms that enhance code readability and maintainability. The new PEP 695 syntax introduces the `type` statement, which creates type aliases with improved scoping rules and cleaner syntax compared to traditional type variable assignments. + +**Key Properties:** + +- **Semantic clarity**: Type aliases communicate domain-specific meaning +- **Composability**: Complex types can be built from simpler components +- **Zero runtime overhead**: Aliases exist only at type-checking time +- **Generic support**: Type parameters can be directly specified + +### 1.2 Instructions for Implementation + +1. **Basic Type Alias Definition** + + ```python + type UserId = int + type Email = str + type Timestamp = float + ``` + +2. **Complex Type Aliases** + + ```python + type JsonValue = dict[str, Any] | list[Any] | str | int | float | bool | None + type HttpHeaders = dict[str, str] + type QueryParams = dict[str, list[str]] + ``` + +3. **Generic Type Aliases** + + ```python + type Result[T, E] = Success[T] | Failure[E] + type Predicate[T] = Callable[[T], bool] + type Transform[A, B] = Callable[[A], B] + ``` + +4. **Nested Type Aliases** + + ```python + type Point2D = tuple[float, float] + type Line = tuple[Point2D, Point2D] + type Polygon = list[Point2D] + ``` + +### 1.3 Practical Examples + +#### Example 1: Domain Modeling with Type Aliases + +```python +from dataclasses import dataclass +from decimal import Decimal +from typing import Literal + +# Define domain-specific type aliases +type CustomerId = int +type OrderId = int +type ProductCode = str +type Quantity = int +type Price = Decimal +type Currency = Literal["USD", "EUR", "GBP"] +type OrderStatus = Literal["pending", "confirmed", "shipped", "delivered", "cancelled"] + +# Use aliases in data structures +@dataclass(frozen=True) +class OrderItem: + product: ProductCode + quantity: Quantity + unit_price: Price + +@dataclass(frozen=True) +class Order: + id: OrderId + customer: CustomerId + items: tuple[OrderItem, ...] + currency: Currency + status: OrderStatus + + @property + def total_price(self) -> Price: + return sum(item.unit_price * item.quantity for item in self.items) +``` + +#### Example 2: Graph Algorithm Type Aliases + +```python +from collections.abc import Mapping, Set + +# Define graph-related type aliases +type NodeId = str +type Weight = float +type Edge = tuple[NodeId, NodeId, Weight] +type AdjacencyList = Mapping[NodeId, Set[NodeId]] +type WeightedAdjacencyList = Mapping[NodeId, Mapping[NodeId, Weight]] +type Path = list[NodeId] +type Distance = float | float('inf') + +def dijkstra( + graph: WeightedAdjacencyList, + start: NodeId, + end: NodeId +) -> tuple[Distance, Path]: + """Find shortest path between nodes using Dijkstra's algorithm.""" + # Implementation details omitted for brevity + pass +``` + +#### Example 3: Parser Combinator Type Aliases + +```python +from typing import TypeVar, Callable +from collections.abc import Sequence + +# Generic type aliases for parser combinators +type ParseResult[T] = tuple[T, str] | None +type Parser[T] = Callable[[str], ParseResult[T]] +type Combinator[A, B] = Callable[[Parser[A]], Parser[B]] + +# Specific parser type aliases +type TokenParser = Parser[str] +type NumberParser = Parser[float] +type IdentifierParser = Parser[str] + +def sequence[T](parsers: Sequence[Parser[T]]) -> Parser[list[T]]: + """Combine multiple parsers in sequence.""" + def parse(input_str: str) -> ParseResult[list[T]]: + results = [] + remaining = input_str + for parser in parsers: + result = parser(remaining) + if result is None: + return None + value, remaining = result + results.append(value) + return results, remaining + return parse +``` + +--- + +## 2. The Maybe Pattern for Nullable Values + +### 2.1 Functional Description + +The Maybe pattern, borrowed from functional programming languages like Haskell, provides a type-safe alternative to nullable references. It explicitly models the presence or absence of a value, forcing developers to handle both cases and eliminating null pointer exceptions at the type level. + +**Mathematical Foundation:** +The Maybe type forms a monad with the following operations: + +- `return`: Wraps a value in Some +- `bind`: Chains computations that may produce Nothing +- Identity laws and associativity hold + +**Key Benefits:** + +- **Explicit null handling**: Absence is a first-class concept +- **Composability**: Chain operations without null checks +- **Type safety**: Prevents null pointer exceptions +- **Functional purity**: No hidden nulls or exceptions + +### 2.2 Instructions for Implementation + +1. **Import Required Types** + + ```python + from returns.maybe import Maybe, Some, Nothing + from returns.pointfree import bind + from returns.pipeline import flow + ``` + +2. **Creating Maybe Values** + + ```python + # From a value + maybe_value = Some(42) + + # Representing absence + no_value = Nothing + + # From optional + maybe_from_optional = Maybe.from_optional(some_optional_value) + ``` + +3. **Pattern Matching on Maybe** + + ```python + match maybe_value: + case Some(value): + # Handle the present value + process(value) + case Nothing: + # Handle absence + handle_missing() + ``` + +4. **Chaining Operations** + + ```python + result = flow( + initial_value, + parse_input, + bind(validate), + bind(transform), + bind(save_to_database) + ) + ``` + +### 2.3 Practical Examples + +#### Example 1: Safe Dictionary Access + +```python +from returns.maybe import Maybe, Some, Nothing +from typing import TypeVar, Mapping + +K = TypeVar('K') +V = TypeVar('V') + +def safe_get[K, V](mapping: Mapping[K, V], key: K) -> Maybe[V]: + """Safely retrieve a value from a mapping.""" + try: + return Some(mapping[key]) + except KeyError: + return Nothing + +def get_nested_value(data: dict[str, dict[str, int]], + outer_key: str, + inner_key: str) -> Maybe[int]: + """Safely navigate nested dictionaries.""" + return ( + safe_get(data, outer_key) + .bind(lambda inner_dict: safe_get(inner_dict, inner_key)) + ) + +# Usage example +data = { + "users": {"alice": 42, "bob": 17}, + "admins": {"charlie": 99} +} + +# This returns Some(42) +alice_value = get_nested_value(data, "users", "alice") + +# This returns Nothing (no "eve" in users) +eve_value = get_nested_value(data, "users", "eve") + +# This returns Nothing (no "guests" key) +guest_value = get_nested_value(data, "guests", "anyone") +``` + +#### Example 2: Configuration Parsing + +```python +from returns.maybe import Maybe, Some, Nothing +from returns.pointfree import bind +from pathlib import Path +import json + +@dataclass(frozen=True) +class DatabaseConfig: + host: str + port: int + username: str + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> Maybe[DatabaseConfig]: + """Parse database configuration from dictionary.""" + try: + return Some(cls( + host=data["host"], + port=int(data["port"]), + username=data["username"] + )) + except (KeyError, ValueError, TypeError): + return Nothing + +def load_config(path: Path) -> Maybe[DatabaseConfig]: + """Load configuration from JSON file.""" + def read_file(p: Path) -> Maybe[str]: + try: + return Some(p.read_text()) + except (IOError, OSError): + return Nothing + + def parse_json(content: str) -> Maybe[dict[str, Any]]: + try: + return Some(json.loads(content)) + except json.JSONDecodeError: + return Nothing + + return ( + read_file(path) + .bind(parse_json) + .bind(DatabaseConfig.from_dict) + ) + +# Usage +config_path = Path("config.json") +match load_config(config_path): + case Some(config): + print(f"Connecting to {config.host}:{config.port}") + case Nothing: + print("Failed to load configuration, using defaults") +``` + +#### Example 3: User Authentication Chain + +```python +from returns.maybe import Maybe, Some, Nothing +from returns.pipeline import flow +from returns.pointfree import bind +import hashlib +from dataclasses import dataclass + +@dataclass(frozen=True) +class User: + id: int + username: str + password_hash: str + is_active: bool + +type UserId = int +type Username = str +type SessionToken = str + +def find_user_by_username(username: Username) -> Maybe[User]: + """Lookup user in database by username.""" + # Simulated database lookup + users = { + "alice": User(1, "alice", hashlib.sha256(b"secret123").hexdigest(), True), + "bob": User(2, "bob", hashlib.sha256(b"password").hexdigest(), False), + } + return Maybe.from_optional(users.get(username)) + +def verify_password(password: str, user: User) -> Maybe[User]: + """Verify password matches stored hash.""" + password_hash = hashlib.sha256(password.encode()).hexdigest() + return Some(user) if password_hash == user.password_hash else Nothing + +def check_active(user: User) -> Maybe[User]: + """Ensure user account is active.""" + return Some(user) if user.is_active else Nothing + +def generate_session(user: User) -> Maybe[SessionToken]: + """Generate session token for authenticated user.""" + # Simplified token generation + return Some(f"session_{user.id}_{user.username}") + +def authenticate(username: Username, password: str) -> Maybe[SessionToken]: + """Complete authentication pipeline.""" + return flow( + find_user_by_username(username), + bind(lambda user: verify_password(password, user)), + bind(check_active), + bind(generate_session) + ) + +# Usage examples +match authenticate("alice", "secret123"): + case Some(token): + print(f"Authentication successful: {token}") + case Nothing: + print("Authentication failed") + +match authenticate("bob", "password"): + case Some(token): + print(f"Authentication successful: {token}") + case Nothing: + print("Authentication failed") # This will print (Bob is inactive) +``` + +--- + +## 3. TypeGuard Pattern for Safe Type Narrowing + +### 3.1 Functional Description + +TypeGuard functions provide a type-safe mechanism for narrowing types within conditional branches without resorting to unsafe type casting. Introduced in PEP 647, TypeGuards establish a contract between runtime checks and static type analysis, enabling type checkers to understand type refinements based on boolean predicates. + +**Formal Properties:** + +- **Soundness**: If a TypeGuard returns True, the type narrowing is guaranteed to be valid +- **Composability**: TypeGuards can be combined using logical operators +- **No runtime overhead**: TypeGuards are regular functions with special type annotations +- **Static verification**: Type checkers validate TypeGuard usage at compile time + +### 3.2 Instructions for Implementation + +1. **Basic TypeGuard Structure** + + ```python + from typing import TypeGuard + + def is_type_name(value: broader_type) -> TypeGuard[narrower_type]: + """Check if value is of narrower_type.""" + return isinstance(value, narrower_type) # or other validation + ``` + +2. **TypeGuard Requirements** + + - Must return a boolean value + - The guarded type must be a subtype of the input type + - The function body must actually perform the check + - Should be pure (no side effects) + +3. **Using TypeGuards** + + ```python + if is_type_name(value): + # value is now narrowed to narrower_type + use_narrowed_value(value) + else: + # value remains as broader_type + handle_other_case(value) + ``` + +### 3.3 Practical Examples + +#### Example 1: Literal Type Narrowing + +```python +from typing import TypeGuard, Literal, get_args + +# Define a complex literal type +type HttpMethod = Literal["GET", "POST", "PUT", "DELETE", "PATCH"] +type SafeMethod = Literal["GET", "HEAD", "OPTIONS"] +type DatabaseOperation = Literal["SELECT", "INSERT", "UPDATE", "DELETE"] + +def is_http_method(value: str) -> TypeGuard[HttpMethod]: + """Check if a string is a valid HTTP method.""" + valid_methods = get_args(HttpMethod) + return value in valid_methods + +def is_safe_method(method: HttpMethod) -> TypeGuard[SafeMethod]: + """Check if an HTTP method is safe (no side effects).""" + safe_methods = get_args(SafeMethod) + return method in safe_methods + +def is_database_operation(value: str) -> TypeGuard[DatabaseOperation]: + """Check if a string is a valid database operation.""" + valid_ops = get_args(DatabaseOperation) + return value in valid_ops + +# Usage example +def process_request(method_str: str, path: str) -> str: + """Process HTTP request with proper type narrowing.""" + if not is_http_method(method_str): + return f"Invalid HTTP method: {method_str}" + + # method_str is now narrowed to HttpMethod + if is_safe_method(method_str): + # method_str is now narrowed to SafeMethod + return f"Safe request: {method_str} {path}" + else: + # method_str is HttpMethod but not SafeMethod + return f"Unsafe request: {method_str} {path} (requires authentication)" + +# Examples +print(process_request("GET", "/users")) # Safe request: GET /users +print(process_request("POST", "/users")) # Unsafe request: POST /users (requires authentication) +print(process_request("INVALID", "/users")) # Invalid HTTP method: INVALID +``` + +#### Example 2: Structural Type Validation + +```python +from typing import TypeGuard, Protocol, Any +from dataclasses import dataclass + +# Define protocols for structural typing +class Comparable(Protocol): + def __lt__(self, other: Any) -> bool: ... + def __eq__(self, other: Any) -> bool: ... + +class Sized(Protocol): + def __len__(self) -> int: ... + +class Container[T](Protocol): + def __contains__(self, item: T) -> bool: ... + def __len__(self) -> int: ... + +# TypeGuard functions for protocol checking +def is_comparable(obj: object) -> TypeGuard[Comparable]: + """Check if object implements Comparable protocol.""" + return ( + hasattr(obj, '__lt__') and + callable(getattr(obj, '__lt__')) and + hasattr(obj, '__eq__') and + callable(getattr(obj, '__eq__')) + ) + +def is_sized(obj: object) -> TypeGuard[Sized]: + """Check if object implements Sized protocol.""" + return hasattr(obj, '__len__') and callable(getattr(obj, '__len__')) + +def is_container(obj: object) -> TypeGuard[Container[Any]]: + """Check if object implements Container protocol.""" + return ( + is_sized(obj) and + hasattr(obj, '__contains__') and + callable(getattr(obj, '__contains__')) + ) + +# Usage with type narrowing +def find_min[T](items: object) -> T | None: + """Find minimum value in a comparable container.""" + if not is_container(items): + return None + + # items is now Container[Any] + if len(items) == 0: + return None + + result = None + for item in items: # Assuming Container is iterable + if result is None: + result = item + elif is_comparable(item) and is_comparable(result): + if item < result: + result = item + + return result + +# Examples +print(find_min([3, 1, 4, 1, 5])) # 1 +print(find_min("hello")) # 'e' +print(find_min(42)) # None (not a container) +``` + +#### Example 3: Complex Data Validation + +```python +from typing import TypeGuard, Any, TypedDict +from datetime import datetime + +# Define typed dictionaries for API responses +class UserData(TypedDict): + id: int + username: str + email: str + created_at: str + is_active: bool + +class AdminData(UserData): + permissions: list[str] + admin_level: int + +class PartialUserData(TypedDict, total=False): + id: int + username: str + email: str + +# TypeGuard functions for validation +def is_valid_email(email: str) -> bool: + """Check if string is a valid email format.""" + return '@' in email and '.' in email.split('@')[1] + +def is_user_data(data: dict[str, Any]) -> TypeGuard[UserData]: + """Validate that dictionary conforms to UserData structure.""" + return ( + isinstance(data.get('id'), int) and + isinstance(data.get('username'), str) and + isinstance(data.get('email'), str) and + is_valid_email(data.get('email', '')) and + isinstance(data.get('created_at'), str) and + isinstance(data.get('is_active'), bool) + ) + +def is_admin_data(data: dict[str, Any]) -> TypeGuard[AdminData]: + """Validate that dictionary conforms to AdminData structure.""" + return ( + is_user_data(data) and + isinstance(data.get('permissions'), list) and + all(isinstance(p, str) for p in data.get('permissions', [])) and + isinstance(data.get('admin_level'), int) and + data.get('admin_level', 0) > 0 + ) + +def is_partial_user_data(data: dict[str, Any]) -> TypeGuard[PartialUserData]: + """Validate partial user data for updates.""" + allowed_keys = {'id', 'username', 'email'} + return ( + all(key in allowed_keys for key in data.keys()) and + all( + isinstance(data.get('id'), int) if 'id' in data else True, + isinstance(data.get('username'), str) if 'username' in data else True, + isinstance(data.get('email'), str) and is_valid_email(data['email']) if 'email' in data else True, + ) + ) + +# Usage in API endpoint +def process_user_update(user_id: int, update_data: dict[str, Any]) -> str: + """Process user update with proper validation.""" + if not is_partial_user_data(update_data): + return "Invalid update data format" + + # update_data is now PartialUserData + if 'email' in update_data: + print(f"Updating email to: {update_data['email']}") + + if 'username' in update_data: + print(f"Updating username to: {update_data['username']}") + + return "Update successful" + +# Example usage +api_response: dict[str, Any] = { + "id": 123, + "username": "alice", + "email": "alice@example.com", + "created_at": "2024-01-01T00:00:00Z", + "is_active": True, + "permissions": ["read", "write"], + "admin_level": 2 +} + +if is_admin_data(api_response): + # api_response is narrowed to AdminData + print(f"Admin user {api_response['username']} has permissions: {api_response['permissions']}") +elif is_user_data(api_response): + # api_response is narrowed to UserData + print(f"Regular user {api_response['username']}") +else: + print("Invalid user data") +``` + +--- + +## Conclusion + +These three patterns—type aliases with PEP 695, the Maybe pattern, and TypeGuard functions—form a powerful trio for writing type-safe, expressive Python code. By adopting these patterns, developers can: + +1. **Communicate intent** through semantic type aliases +2. **Eliminate null pointer exceptions** with explicit Maybe handling +3. **Avoid unsafe casting** through principled type narrowing + +Together, they enable a more functional, type-driven development style that catches errors at compile time while maintaining Python's expressiveness and readability. + +## References + +1. PEP 695 – Type Parameter Syntax. Python Enhancement Proposals. +2. PEP 647 – User-Defined Type Guards. Python Enhancement Proposals. +3. Lipovača, M. (2011). Learn You a Haskell for Great Good! No Starch Press. +4. Petricek, T., & Skeet, J. (2009). Real World Functional Programming. Manning Publications. +5. Returns Documentation. (2024). From 630327f304f44348898f36bc5d3e4e547325a504 Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Wed, 25 Jun 2025 16:56:28 -0500 Subject: [PATCH 16/26] docs: improve windsurfrules prompting --- .windsurfrules | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/.windsurfrules b/.windsurfrules index 6deae6c..237aa89 100644 --- a/.windsurfrules +++ b/.windsurfrules @@ -11,6 +11,8 @@ Adopt a purely functional, stateless approach: - Structure logic as input-process-output pipelines. - Ensure all operations are idempotent and safely repeatable. +## RULES + ### Type System & Data Modeling Use the type system to enforce correctness and make invalid states unrepresentable. @@ -423,16 +425,3 @@ def parse_int(value: str) -> Result[int, ValueError]: 5. **Type check continuously** - Fix type errors immediately This workspace emphasizes functional programming principles, type safety, and clear separation of concerns. Every code change should advance these goals while maintaining zero technical debt. - -## [!REMINDER]: Use Perplexity in all tasks - -Always begin each response with atleast 1 call to perplexity. - -Ask perplexity several questions to PRIOR TO BEGINNING each task the user gives you. - -> Note: Send as many questions as needed to perplexity. Make sure each questions is precise and is IN ITS OWN TOOL CALL. Do not ask multiple questions in the same tool call. Make sure to provide perplexity all the context it needs. -> Note: If you come across any other concerns or problems while working on the user's code, just ask perplexity more questions as needed! Frequently consult with perplexity to get the best possible information. -> Note: Send a batch of countless calls to perplexity to get many answers in parallel to all of your questions. -> Note: Send questions about different frameworks separately from questions about other frameworks. Questions must be about the individual frameworks rather than the bridging between the frameworks (For example: Elysia has gained some popularity. However, no one really uses Effect with Elysia yet. Be epistemically humble and ask all questions.) - -While you must begin each response with atleast 1 call to perplexity, you may skip calls to perplexity if you have already asked all the questions you need to or if your previous interaction with perplexity was recent enough. From d896326e909a0223cd60ecc088e21243bd956e3c Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 14 Jul 2025 13:49:40 -0500 Subject: [PATCH 17/26] docs: condense type system docs and add quick fix guide for Typer issue --- docs/issues-in-typer.md | 213 +---------- docs/modern_python_patterns.md | 653 ++------------------------------- 2 files changed, 29 insertions(+), 837 deletions(-) diff --git a/docs/issues-in-typer.md b/docs/issues-in-typer.md index 2e03002..28f268c 100644 --- a/docs/issues-in-typer.md +++ b/docs/issues-in-typer.md @@ -1,211 +1,14 @@ -# Typer 0.16.0 "Name defined twice" Issue with Annotated and Option +# Typer 0.16.0 Quick Fix -## Executive Summary - -In Typer 0.16.0 with Python 3.13, using `Annotated` with `Option()` can trigger a `TypeError: Name 'X' defined twice` error when the default value is specified as the first argument to `Option()`. This occurs due to how Typer and Click process parameter declarations internally. - -## The Problem - -### Failing Code +**Problem:** `TypeError: Name 'X' defined twice` when using `Annotated` with `typer.Option`. +**Fix:** Never put the default inside `Option()` when you also give the parameter a default. ```python -@app.command() -def main( - mode: Annotated[ - DiffMode, - typer.Option( - DiffMode.both, # ❌ Default value as first argument - "-m", - "--mode", - help="Which mode to use", - ), - ] = DiffMode.both, -): - pass -``` - -**Error:** `TypeError: Name 'mode' defined twice` +# ❌ Breaks +mode: Annotated[DiffMode, typer.Option(DiffMode.both, "-m", "--mode")] = DiffMode.both -### Working Code - -```python -@app.command() -def main( - mode: Annotated[ - DiffMode, - typer.Option( - "-m", - "--mode", - help="Which mode to use", - ), - ] = DiffMode.both, # ✅ Default only as parameter default -): - pass +# ✅ Works +mode: Annotated[DiffMode, typer.Option("-m", "--mode")] = DiffMode.both ``` -## Root Cause Analysis - -### 1. Option() Function Signature - -The `Option()` function in `typer/params.py` has this signature: - -```python -def Option( - default: Optional[Any] = ..., - *param_decls: str, - # ... other parameters -) -> OptionInfo -``` - -When you call: - -- `Option(DiffMode.both, "-m", "--mode")` → `default=DiffMode.both`, `param_decls=('-m', '--mode')` -- `Option("-m", "--mode")` → `default='-m'`, `param_decls=('--mode',)` - -### 2. Typer's Parameter Processing - -In `typer/main.py` at line 895, when processing parameters: - -```python -param_decls = [param.name] # Adds the parameter name first -if parameter_info.param_decls: - param_decls.extend(parameter_info.param_decls) -``` - -For a parameter named `mode`: - -- With `Option(DiffMode.both, "-m", "--mode")`: `param_decls = ['mode', '-m', '--mode']` -- With `Option("-m", "--mode")`: `param_decls = ['mode', '--mode']` - -### 3. Click's \_parse_decls Method - -Click's `_parse_decls` method in `click/core.py` (line 2683) processes these declarations: - -```python -def _parse_decls(self, decls, expose_value): - name = None - for decl in decls: - if decl.isidentifier(): - if name is not None: - raise TypeError(f"Name '{name}' defined twice") - name = decl -``` - -The method: - -1. Iterates through each declaration -2. If it's an identifier (passes `.isidentifier()`), it sets it as the parameter name -3. If a name was already set, it raises the "defined twice" error - -### 4. The Conflict - -When `param_decls = ['mode', '-m', '--mode']`: - -1. `'mode'` is processed → `name = 'mode'` (it's an identifier) -2. `'-m'` is processed → treated as option flag (not an identifier) -3. `'--mode'` is processed → Click extracts `'mode'` from it internally -4. Since `'mode'` was already set as the name, the error is raised - -## Full Traceability - -### Call Stack Flow - -1. **User code**: Defines function with `Annotated[Type, Option(...)]` -2. **typer/main.py:341**: `Typer.__call__()` is invoked -3. **typer/main.py:377**: `get_command()` processes the command -4. **typer/main.py:586**: `get_command_from_info()` extracts command info -5. **typer/main.py:562**: `get_params_convertors_ctx_param_name_from_function()` processes parameters -6. **typer/main.py:901**: `get_click_param()` creates Click parameters - - Line 895: Prepends parameter name to `param_decls` -7. **typer/core.py:444**: `TyperOption.__init__()` is called -8. **click/core.py:2558**: `click.Option.__init__()` is called -9. **click/core.py:2098**: `click.Parameter.__init__()` is called -10. **click/core.py:2694**: `_parse_decls()` raises the error - -### Environment Details - -- **Python**: 3.13 -- **Typer**: 0.16.0 -- **Click**: (bundled with Typer) -- **Platform**: darwin (macOS) - -## Solution - -### Best Practice - -When using `Annotated` with `Option()`, never specify the default value as the first argument to `Option()`: - -```python -# ❌ WRONG - Causes "Name defined twice" error -mode: Annotated[Type, typer.Option(default_value, "-m", "--mode")] = default_value - -# ✅ CORRECT - Default only as parameter default -mode: Annotated[Type, typer.Option("-m", "--mode")] = default_value -``` - -### Why This Works - -- Without a default in `Option()`, the first argument becomes a param_decl -- This prevents the parameter name from appearing twice in the declarations -- The default value is properly handled through Python's parameter default mechanism - -## Alternative Patterns - -### 1. Direct Option Usage (No Annotated) - -```python -def main( - mode: DiffMode = typer.Option( - DiffMode.both, # Can specify default here - "-m", - "--mode", - help="Which mode to use", - ), -): - pass -``` - -### 2. Argument Instead of Option - -```python -def main( - mode: Annotated[ - DiffMode, - typer.Argument(help="Which mode to use"), - ] = DiffMode.both, -): - pass -``` - -## Impact and Considerations - -### When This Issue Occurs - -- Using Typer 0.16.0 -- Using `Annotated` type hints -- Specifying default value as first argument to `Option()` -- The parameter name matches (after transformation) an option name - -### When This Issue Does NOT Occur - -- Using direct assignment pattern (no `Annotated`) -- Not specifying default in `Option()` constructor -- Using `Argument()` instead of `Option()` - -## Recommendations - -1. **For New Code**: Always omit the default value from `Option()` when using `Annotated` -2. **For Migration**: Remove default values from `Option()` calls in `Annotated` contexts -3. **For Teams**: Establish coding standards that enforce this pattern -4. **For Tooling**: Consider linters or pre-commit hooks to catch this pattern - -## Related Issues - -This issue is specific to the interaction between: - -- Typer's parameter processing -- Click's declaration parsing -- Python's `Annotated` type hints -- The overlapping namespace between parameter names and option names - -The error message "Name 'X' defined twice" is misleading as it doesn't clearly indicate the source of the duplication. +That’s it—remove the first positional argument from `Option()` and rely on the parameter default. diff --git a/docs/modern_python_patterns.md b/docs/modern_python_patterns.md index 9917f99..9449a67 100644 --- a/docs/modern_python_patterns.md +++ b/docs/modern_python_patterns.md @@ -1,653 +1,42 @@ -# Using Modern Python 3.13 Type System Patterns +# Modern Python 3.13 Cheat-Sheet -This document presents three fundamental patterns for leveraging Python 3.13's enhanced type system: type aliases using PEP 695 syntax, the Maybe monad pattern for nullable values, and TypeGuard functions for safe type narrowing. These patterns enable developers to write more expressive, type-safe code while avoiding unsafe practices such as type casting and runtime type assertions. - ---- - -## 1. Type Aliases with PEP 695 Syntax - -### 1.1 Functional Description - -Type aliases in Python 3.13 provide a mechanism for creating semantic type synonyms that enhance code readability and maintainability. The new PEP 695 syntax introduces the `type` statement, which creates type aliases with improved scoping rules and cleaner syntax compared to traditional type variable assignments. - -**Key Properties:** - -- **Semantic clarity**: Type aliases communicate domain-specific meaning -- **Composability**: Complex types can be built from simpler components -- **Zero runtime overhead**: Aliases exist only at type-checking time -- **Generic support**: Type parameters can be directly specified - -### 1.2 Instructions for Implementation - -1. **Basic Type Alias Definition** - - ```python - type UserId = int - type Email = str - type Timestamp = float - ``` - -2. **Complex Type Aliases** - - ```python - type JsonValue = dict[str, Any] | list[Any] | str | int | float | bool | None - type HttpHeaders = dict[str, str] - type QueryParams = dict[str, list[str]] - ``` - -3. **Generic Type Aliases** - - ```python - type Result[T, E] = Success[T] | Failure[E] - type Predicate[T] = Callable[[T], bool] - type Transform[A, B] = Callable[[A], B] - ``` - -4. **Nested Type Aliases** - - ```python - type Point2D = tuple[float, float] - type Line = tuple[Point2D, Point2D] - type Polygon = list[Point2D] - ``` - -### 1.3 Practical Examples - -#### Example 1: Domain Modeling with Type Aliases - -```python -from dataclasses import dataclass -from decimal import Decimal -from typing import Literal - -# Define domain-specific type aliases -type CustomerId = int -type OrderId = int -type ProductCode = str -type Quantity = int -type Price = Decimal -type Currency = Literal["USD", "EUR", "GBP"] -type OrderStatus = Literal["pending", "confirmed", "shipped", "delivered", "cancelled"] - -# Use aliases in data structures -@dataclass(frozen=True) -class OrderItem: - product: ProductCode - quantity: Quantity - unit_price: Price - -@dataclass(frozen=True) -class Order: - id: OrderId - customer: CustomerId - items: tuple[OrderItem, ...] - currency: Currency - status: OrderStatus - - @property - def total_price(self) -> Price: - return sum(item.unit_price * item.quantity for item in self.items) -``` - -#### Example 2: Graph Algorithm Type Aliases - -```python -from collections.abc import Mapping, Set - -# Define graph-related type aliases -type NodeId = str -type Weight = float -type Edge = tuple[NodeId, NodeId, Weight] -type AdjacencyList = Mapping[NodeId, Set[NodeId]] -type WeightedAdjacencyList = Mapping[NodeId, Mapping[NodeId, Weight]] -type Path = list[NodeId] -type Distance = float | float('inf') - -def dijkstra( - graph: WeightedAdjacencyList, - start: NodeId, - end: NodeId -) -> tuple[Distance, Path]: - """Find shortest path between nodes using Dijkstra's algorithm.""" - # Implementation details omitted for brevity - pass -``` - -#### Example 3: Parser Combinator Type Aliases +## 1. Type Aliases (PEP 695) ```python -from typing import TypeVar, Callable -from collections.abc import Sequence - -# Generic type aliases for parser combinators -type ParseResult[T] = tuple[T, str] | None -type Parser[T] = Callable[[str], ParseResult[T]] -type Combinator[A, B] = Callable[[Parser[A]], Parser[B]] - -# Specific parser type aliases -type TokenParser = Parser[str] -type NumberParser = Parser[float] -type IdentifierParser = Parser[str] - -def sequence[T](parsers: Sequence[Parser[T]]) -> Parser[list[T]]: - """Combine multiple parsers in sequence.""" - def parse(input_str: str) -> ParseResult[list[T]]: - results = [] - remaining = input_str - for parser in parsers: - result = parser(remaining) - if result is None: - return None - value, remaining = result - results.append(value) - return results, remaining - return parse -``` - ---- - -## 2. The Maybe Pattern for Nullable Values - -### 2.1 Functional Description - -The Maybe pattern, borrowed from functional programming languages like Haskell, provides a type-safe alternative to nullable references. It explicitly models the presence or absence of a value, forcing developers to handle both cases and eliminating null pointer exceptions at the type level. - -**Mathematical Foundation:** -The Maybe type forms a monad with the following operations: - -- `return`: Wraps a value in Some -- `bind`: Chains computations that may produce Nothing -- Identity laws and associativity hold - -**Key Benefits:** - -- **Explicit null handling**: Absence is a first-class concept -- **Composability**: Chain operations without null checks -- **Type safety**: Prevents null pointer exceptions -- **Functional purity**: No hidden nulls or exceptions - -### 2.2 Instructions for Implementation - -1. **Import Required Types** - - ```python - from returns.maybe import Maybe, Some, Nothing - from returns.pointfree import bind - from returns.pipeline import flow - ``` - -2. **Creating Maybe Values** - - ```python - # From a value - maybe_value = Some(42) - - # Representing absence - no_value = Nothing - - # From optional - maybe_from_optional = Maybe.from_optional(some_optional_value) - ``` - -3. **Pattern Matching on Maybe** - - ```python - match maybe_value: - case Some(value): - # Handle the present value - process(value) - case Nothing: - # Handle absence - handle_missing() - ``` - -4. **Chaining Operations** - - ```python - result = flow( - initial_value, - parse_input, - bind(validate), - bind(transform), - bind(save_to_database) - ) - ``` - -### 2.3 Practical Examples - -#### Example 1: Safe Dictionary Access - -```python -from returns.maybe import Maybe, Some, Nothing -from typing import TypeVar, Mapping - -K = TypeVar('K') -V = TypeVar('V') - -def safe_get[K, V](mapping: Mapping[K, V], key: K) -> Maybe[V]: - """Safely retrieve a value from a mapping.""" - try: - return Some(mapping[key]) - except KeyError: - return Nothing - -def get_nested_value(data: dict[str, dict[str, int]], - outer_key: str, - inner_key: str) -> Maybe[int]: - """Safely navigate nested dictionaries.""" - return ( - safe_get(data, outer_key) - .bind(lambda inner_dict: safe_get(inner_dict, inner_key)) - ) - -# Usage example -data = { - "users": {"alice": 42, "bob": 17}, - "admins": {"charlie": 99} -} - -# This returns Some(42) -alice_value = get_nested_value(data, "users", "alice") - -# This returns Nothing (no "eve" in users) -eve_value = get_nested_value(data, "users", "eve") - -# This returns Nothing (no "guests" key) -guest_value = get_nested_value(data, "guests", "anyone") -``` - -#### Example 2: Configuration Parsing - -```python -from returns.maybe import Maybe, Some, Nothing -from returns.pointfree import bind -from pathlib import Path -import json - -@dataclass(frozen=True) -class DatabaseConfig: - host: str - port: int - username: str - - @classmethod - def from_dict(cls, data: dict[str, Any]) -> Maybe[DatabaseConfig]: - """Parse database configuration from dictionary.""" - try: - return Some(cls( - host=data["host"], - port=int(data["port"]), - username=data["username"] - )) - except (KeyError, ValueError, TypeError): - return Nothing - -def load_config(path: Path) -> Maybe[DatabaseConfig]: - """Load configuration from JSON file.""" - def read_file(p: Path) -> Maybe[str]: - try: - return Some(p.read_text()) - except (IOError, OSError): - return Nothing - - def parse_json(content: str) -> Maybe[dict[str, Any]]: - try: - return Some(json.loads(content)) - except json.JSONDecodeError: - return Nothing - - return ( - read_file(path) - .bind(parse_json) - .bind(DatabaseConfig.from_dict) - ) - -# Usage -config_path = Path("config.json") -match load_config(config_path): - case Some(config): - print(f"Connecting to {config.host}:{config.port}") - case Nothing: - print("Failed to load configuration, using defaults") -``` - -#### Example 3: User Authentication Chain - -```python -from returns.maybe import Maybe, Some, Nothing -from returns.pipeline import flow -from returns.pointfree import bind -import hashlib -from dataclasses import dataclass - -@dataclass(frozen=True) -class User: - id: int - username: str - password_hash: str - is_active: bool - type UserId = int -type Username = str -type SessionToken = str - -def find_user_by_username(username: Username) -> Maybe[User]: - """Lookup user in database by username.""" - # Simulated database lookup - users = { - "alice": User(1, "alice", hashlib.sha256(b"secret123").hexdigest(), True), - "bob": User(2, "bob", hashlib.sha256(b"password").hexdigest(), False), - } - return Maybe.from_optional(users.get(username)) - -def verify_password(password: str, user: User) -> Maybe[User]: - """Verify password matches stored hash.""" - password_hash = hashlib.sha256(password.encode()).hexdigest() - return Some(user) if password_hash == user.password_hash else Nothing - -def check_active(user: User) -> Maybe[User]: - """Ensure user account is active.""" - return Some(user) if user.is_active else Nothing - -def generate_session(user: User) -> Maybe[SessionToken]: - """Generate session token for authenticated user.""" - # Simplified token generation - return Some(f"session_{user.id}_{user.username}") - -def authenticate(username: Username, password: str) -> Maybe[SessionToken]: - """Complete authentication pipeline.""" - return flow( - find_user_by_username(username), - bind(lambda user: verify_password(password, user)), - bind(check_active), - bind(generate_session) - ) - -# Usage examples -match authenticate("alice", "secret123"): - case Some(token): - print(f"Authentication successful: {token}") - case Nothing: - print("Authentication failed") - -match authenticate("bob", "password"): - case Some(token): - print(f"Authentication successful: {token}") - case Nothing: - print("Authentication failed") # This will print (Bob is inactive) +type Result[T, E] = Success[T] | Failure[E] ``` ---- - -## 3. TypeGuard Pattern for Safe Type Narrowing - -### 3.1 Functional Description - -TypeGuard functions provide a type-safe mechanism for narrowing types within conditional branches without resorting to unsafe type casting. Introduced in PEP 647, TypeGuards establish a contract between runtime checks and static type analysis, enabling type checkers to understand type refinements based on boolean predicates. - -**Formal Properties:** - -- **Soundness**: If a TypeGuard returns True, the type narrowing is guaranteed to be valid -- **Composability**: TypeGuards can be combined using logical operators -- **No runtime overhead**: TypeGuards are regular functions with special type annotations -- **Static verification**: Type checkers validate TypeGuard usage at compile time - -### 3.2 Instructions for Implementation - -1. **Basic TypeGuard Structure** +Use them anywhere you repeat a complex type. - ```python - from typing import TypeGuard +## 2. Safe Null Handling (Maybe) - def is_type_name(value: broader_type) -> TypeGuard[narrower_type]: - """Check if value is of narrower_type.""" - return isinstance(value, narrower_type) # or other validation - ``` - -2. **TypeGuard Requirements** - - - Must return a boolean value - - The guarded type must be a subtype of the input type - - The function body must actually perform the check - - Should be pure (no side effects) - -3. **Using TypeGuards** - - ```python - if is_type_name(value): - # value is now narrowed to narrower_type - use_narrowed_value(value) - else: - # value remains as broader_type - handle_other_case(value) - ``` - -### 3.3 Practical Examples - -#### Example 1: Literal Type Narrowing +Install: `pip install returns` ```python -from typing import TypeGuard, Literal, get_args - -# Define a complex literal type -type HttpMethod = Literal["GET", "POST", "PUT", "DELETE", "PATCH"] -type SafeMethod = Literal["GET", "HEAD", "OPTIONS"] -type DatabaseOperation = Literal["SELECT", "INSERT", "UPDATE", "DELETE"] - -def is_http_method(value: str) -> TypeGuard[HttpMethod]: - """Check if a string is a valid HTTP method.""" - valid_methods = get_args(HttpMethod) - return value in valid_methods - -def is_safe_method(method: HttpMethod) -> TypeGuard[SafeMethod]: - """Check if an HTTP method is safe (no side effects).""" - safe_methods = get_args(SafeMethod) - return method in safe_methods - -def is_database_operation(value: str) -> TypeGuard[DatabaseOperation]: - """Check if a string is a valid database operation.""" - valid_ops = get_args(DatabaseOperation) - return value in valid_ops - -# Usage example -def process_request(method_str: str, path: str) -> str: - """Process HTTP request with proper type narrowing.""" - if not is_http_method(method_str): - return f"Invalid HTTP method: {method_str}" +from returns.maybe import Maybe, Some, Nothing - # method_str is now narrowed to HttpMethod - if is_safe_method(method_str): - # method_str is now narrowed to SafeMethod - return f"Safe request: {method_str} {path}" - else: - # method_str is HttpMethod but not SafeMethod - return f"Unsafe request: {method_str} {path} (requires authentication)" +def safe_get(d, k) -> Maybe[V]: + return Some(d[k]) if k in d else Nothing -# Examples -print(process_request("GET", "/users")) # Safe request: GET /users -print(process_request("POST", "/users")) # Unsafe request: POST /users (requires authentication) -print(process_request("INVALID", "/users")) # Invalid HTTP method: INVALID +match safe_get(data, "key"): + case Some(v): use(v) + case Nothing: handle_missing() ``` -#### Example 2: Structural Type Validation - -```python -from typing import TypeGuard, Protocol, Any -from dataclasses import dataclass - -# Define protocols for structural typing -class Comparable(Protocol): - def __lt__(self, other: Any) -> bool: ... - def __eq__(self, other: Any) -> bool: ... - -class Sized(Protocol): - def __len__(self) -> int: ... - -class Container[T](Protocol): - def __contains__(self, item: T) -> bool: ... - def __len__(self) -> int: ... - -# TypeGuard functions for protocol checking -def is_comparable(obj: object) -> TypeGuard[Comparable]: - """Check if object implements Comparable protocol.""" - return ( - hasattr(obj, '__lt__') and - callable(getattr(obj, '__lt__')) and - hasattr(obj, '__eq__') and - callable(getattr(obj, '__eq__')) - ) - -def is_sized(obj: object) -> TypeGuard[Sized]: - """Check if object implements Sized protocol.""" - return hasattr(obj, '__len__') and callable(getattr(obj, '__len__')) - -def is_container(obj: object) -> TypeGuard[Container[Any]]: - """Check if object implements Container protocol.""" - return ( - is_sized(obj) and - hasattr(obj, '__contains__') and - callable(getattr(obj, '__contains__')) - ) - -# Usage with type narrowing -def find_min[T](items: object) -> T | None: - """Find minimum value in a comparable container.""" - if not is_container(items): - return None - - # items is now Container[Any] - if len(items) == 0: - return None - - result = None - for item in items: # Assuming Container is iterable - if result is None: - result = item - elif is_comparable(item) and is_comparable(result): - if item < result: - result = item - - return result - -# Examples -print(find_min([3, 1, 4, 1, 5])) # 1 -print(find_min("hello")) # 'e' -print(find_min(42)) # None (not a container) -``` +Chain safely with `.bind()` instead of `if x is not None`. -#### Example 3: Complex Data Validation +## 3. Type Guards (PEP 647) ```python -from typing import TypeGuard, Any, TypedDict -from datetime import datetime +from typing import TypeGuard -# Define typed dictionaries for API responses -class UserData(TypedDict): - id: int - username: str - email: str - created_at: str - is_active: bool +def is_email(s: str) -> TypeGuard[Email]: + return "@" in s and "." in s.split("@")[1] -class AdminData(UserData): - permissions: list[str] - admin_level: int - -class PartialUserData(TypedDict, total=False): - id: int - username: str - email: str - -# TypeGuard functions for validation -def is_valid_email(email: str) -> bool: - """Check if string is a valid email format.""" - return '@' in email and '.' in email.split('@')[1] - -def is_user_data(data: dict[str, Any]) -> TypeGuard[UserData]: - """Validate that dictionary conforms to UserData structure.""" - return ( - isinstance(data.get('id'), int) and - isinstance(data.get('username'), str) and - isinstance(data.get('email'), str) and - is_valid_email(data.get('email', '')) and - isinstance(data.get('created_at'), str) and - isinstance(data.get('is_active'), bool) - ) - -def is_admin_data(data: dict[str, Any]) -> TypeGuard[AdminData]: - """Validate that dictionary conforms to AdminData structure.""" - return ( - is_user_data(data) and - isinstance(data.get('permissions'), list) and - all(isinstance(p, str) for p in data.get('permissions', [])) and - isinstance(data.get('admin_level'), int) and - data.get('admin_level', 0) > 0 - ) - -def is_partial_user_data(data: dict[str, Any]) -> TypeGuard[PartialUserData]: - """Validate partial user data for updates.""" - allowed_keys = {'id', 'username', 'email'} - return ( - all(key in allowed_keys for key in data.keys()) and - all( - isinstance(data.get('id'), int) if 'id' in data else True, - isinstance(data.get('username'), str) if 'username' in data else True, - isinstance(data.get('email'), str) and is_valid_email(data['email']) if 'email' in data else True, - ) - ) - -# Usage in API endpoint -def process_user_update(user_id: int, update_data: dict[str, Any]) -> str: - """Process user update with proper validation.""" - if not is_partial_user_data(update_data): - return "Invalid update data format" - - # update_data is now PartialUserData - if 'email' in update_data: - print(f"Updating email to: {update_data['email']}") - - if 'username' in update_data: - print(f"Updating username to: {update_data['username']}") - - return "Update successful" - -# Example usage -api_response: dict[str, Any] = { - "id": 123, - "username": "alice", - "email": "alice@example.com", - "created_at": "2024-01-01T00:00:00Z", - "is_active": True, - "permissions": ["read", "write"], - "admin_level": 2 -} - -if is_admin_data(api_response): - # api_response is narrowed to AdminData - print(f"Admin user {api_response['username']} has permissions: {api_response['permissions']}") -elif is_user_data(api_response): - # api_response is narrowed to UserData - print(f"Regular user {api_response['username']}") -else: - print("Invalid user data") +if is_email(raw): + # raw is now Email + send_mail(raw) ``` ---- - -## Conclusion - -These three patterns—type aliases with PEP 695, the Maybe pattern, and TypeGuard functions—form a powerful trio for writing type-safe, expressive Python code. By adopting these patterns, developers can: - -1. **Communicate intent** through semantic type aliases -2. **Eliminate null pointer exceptions** with explicit Maybe handling -3. **Avoid unsafe casting** through principled type narrowing - -Together, they enable a more functional, type-driven development style that catches errors at compile time while maintaining Python's expressiveness and readability. - -## References - -1. PEP 695 – Type Parameter Syntax. Python Enhancement Proposals. -2. PEP 647 – User-Defined Type Guards. Python Enhancement Proposals. -3. Lipovača, M. (2011). Learn You a Haskell for Great Good! No Starch Press. -4. Petricek, T., & Skeet, J. (2009). Real World Functional Programming. Manning Publications. -5. Returns Documentation. (2024). +No casts, no runtime surprises. From b5f3f605306fa7ce9ff44147601294ff1295f5fc Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 14 Jul 2025 13:54:50 -0500 Subject: [PATCH 18/26] docs: condense workspace rules into quick-start guide with core principles --- .windsurfrules | 441 ++++++------------------------------------------- 1 file changed, 53 insertions(+), 388 deletions(-) diff --git a/.windsurfrules b/.windsurfrules index 237aa89..490806e 100644 --- a/.windsurfrules +++ b/.windsurfrules @@ -1,427 +1,92 @@ -# Robofactor Workspace Rules +# Robofactor Quick-Start -Adopt a purely functional, stateless approach: +## 1. Write Pure Functions Whenever Possible -- Write pure, deterministic functions with no side effects; outputs depend only on inputs. -- Favor immutability—never mutate data; always create new values. -- Represent all effects and failures explicitly using containers like IO, Result, or Maybe. -- Compose small, single-purpose functions using flow from returns.pipeline. -- Declare all dependencies explicitly; never use global state or hidden context. -- Validate inputs at boundaries and return structured error values instead of raising exceptions. -- Structure logic as input-process-output pipelines. -- Ensure all operations are idempotent and safely repeatable. +- **No side effects** → wrap I/O in `IO[T]` or `IOResult[T, E]` +- **No mutation** → return new values +- **No exceptions** → return `Result[T, E]` or `Maybe[T]` -## RULES +Push all side effects to CLI entrypoint. -### Type System & Data Modeling +## 2. Type System (PEP 695) -Use the type system to enforce correctness and make invalid states unrepresentable. - -- **Use Modern Generic Syntax (PEP 695):** Define generic functions, classes, and type aliases using the `[T]` syntax. Avoid the legacy `TypeVar` and `Generic` from the `typing` module unless required for variance control. - - ```python - # Generic class and function - class Stack[T]: - ... - def first[T](items: Sequence[T]) -> Maybe[T]: - ... - - # Generic type alias - type Result[T, E] = Success[T] | Failure[E] - ``` - -- **Define Algebraic Data Types (ADTs):** - - **Product Types:** Use `@dataclass(frozen=True, slots=True)` to create immutable structured data records. - - **Sum Types:** Use `|` (union types) to define a type that can be one of several distinct variants. -- **Use Abstract Collection Types in Signatures:** For function parameters, use read-only abstract types from `collections.abc` (e.g., `Sequence`, `Mapping`, `Iterable`). Never use mutable concrete types like `list` or `dict` in function signatures. -- **Use Precise Type Annotations:** - - `Final`: For constants that must not be reassigned. - - `Literal`: For variables that must hold one of a few specific values. - - `TypedDict`: For dictionary-like data with a defined structure. - - `Protocol`: For structural subtyping (duck typing). - - `TypeIs`: Create type guard functions that narrow types within a conditional block (PEP 742). -- **Create Semantic Type Aliases:** Use the `type` keyword to create distinct names for primitive types to improve clarity (e.g., `type UserId = int`). - -### Function Design & Composition - -Design functions for clarity, testability, and composability. - -- **Enforce Keyword-Only Arguments:** Use the `*` separator in function signatures to force callers to use keyword arguments, enhancing clarity. - - ```python - def create_user(*, name: str, email: str) -> User: ... - - ``` - -- **Limit Function Complexity:** Keep functions short (under 20 lines) and focused on a single responsibility. Limit parameters to a maximum of four; use a parameter object (a `dataclass`) for more complex inputs. -- **Use **`operator`** for Simple Access:** Employ `attrgetter`, `itemgetter`, and `methodcaller` for direct, high-performance attribute/item access or method calls in higher-order functions. -- **Use **`lambda`** for Complex Logic:** Reserve `lambda` for anonymous functions that require computations, conditional logic, or custom error handling not supported by the `operator` module. -- **Compose with **`returns.pipeline.flow`**:** Construct processing pipelines by passing data through a sequence of functions. Use `returns.pointfree.bind` to chain operations that return containers (`Result`, `Maybe`). - - ```python - from returns.pipeline import flow - from returns.pointfree import bind - - result = flow( - initial_data, - validate_input, - bind(fetch_record), - bind(transform_record), - ) - - ``` - -### Effect & Error Handling with`returns` - -Make all effects and potential failures explicit in function signatures using the `returns` library. - -- **For Fallible Operations, Use **`Result`**:** Return `Success(value)` or `Failure(error)` for any operation that can fail. Never raise exceptions for predictable errors. -- **For Optionality, Use **`Maybe`**:** Return `Some(value)` or `Nothing` for values that may be absent. Never use `Optional[T]` or return raw `None`. -- **For Synchronous Side Effects, Use **`IO`**:** Wrap any function that performs I/O (e.g., file access, network requests, database calls) in an `IO` or `IOResult` container. This marks the function as impure and defers its execution. -- **For Asynchronous Side Effects, Use **`FutureResult`**:** Use `FutureResult` to compose asynchronous operations that may fail, ensuring that exceptions do not break the `asyncio` event loop. - -### Application Boundaries: CLI & API Design - -Structure applications with clear, type-safe interfaces. - -- **Design CLIs with **`Typer`**:** - - Build CLIs based on Python type hints. - - Use `Annotated` for defining options and arguments with help text. - - Place default values in the function signature, not within `typer.Option()`. - - Use `rich.console` for all terminal output; never use `print()`. - - Exit with `typer.Exit(code=n)` instead of `sys.exit()`. -- **Design REST APIs:** - - Adhere to strict and consistent REST conventions. - - Version APIs explicitly in the URL path (e.g., `/api/v1/`). - - Use correct HTTP methods and status codes. - - Return consistent, structured error formats. - - Implement idempotency keys for unsafe methods. -- **Isolate External Systems:** Use an Anti-Corruption Layer—a dedicated module for translating data between external APIs and your internal domain models—to protect your core logic from outside influence. - -### Data Processing & Persistence - -Handle data in a functional and safe manner. - -- **Use **`itertools`** and Generators:** Leverage `itertools` for efficient, memory-safe iteration. Create custom iterators using generator functions (`yield`) for lazy data processing. -- **Use Immutable Collections:** Use `tuple` for fixed sequences, `frozenset` for immutable sets, and `types.MappingProxyType` for read-only dictionary views. -- **Ensure Database Safety:** - - Use transactions for all write operations to maintain consistency. - - Manage database migrations with a tool like Alembic. - - Write idempotent migration scripts. - - Use connection pooling for performance and resource management. - - Validate and sanitize all inputs to prevent SQL injection. - -### Testing & Validation - -Verify correctness through rigorous, automated testing. - -- **Write Tests First:** Follow a Test-Driven Development (TDD) approach. -- **Test Properties, Not Just Examples:** Use `hypothesis` for property-based testing. Define properties and invariants that your code must satisfy, and let the library generate hundreds of diverse examples to find edge cases. - - ```python - from hypothesis import given, strategies as st - - @given(st.lists(st.integers())) - def test_sort_is_idempotent(items: list[int]) -> None: - assert sorted(sorted(items)) == sorted(items) - - ``` - -- **Isolate and Test Pure Functions:** Pure functions can be tested in complete isolation with a given input and an expected output, requiring no mocks or complex setup. -- **Mock Only at External Boundaries:** Restrict mocking to interfaces that communicate with external systems (e.g., APIs, databases). Do not mock internal application logic. - -### Documentation & Tooling - -Maintain a clear and efficient development environment. - -- **Write Self-Documenting Code:** Use precise, descriptive names for functions and variables. -- **Document the "Why," Not the "What":** Use Google-style docstrings for public APIs to explain intent, preconditions, and postconditions. -- **Enforce Static Type Checking:** Use a strict type checker like `basedpyright` in your development workflow. - - ```bash - basedpyright --pythonversion 3.13 - ``` - -- **Never Suppress Type Errors:** Do not use `cast` or `# type: ignore`. Address all type errors directly. Do not use `Any`. - -## Development Philosophy - -Apply functional programming principles consistently throughout the codebase. -Write pure, composable functions with explicit error handling and immutable data structures. - -## Package Management & Environment - -- **ALWAYS use `uv`** - Never use `pip`, `pipx`, `conda`, or other package managers -- **ALWAYS use `uv run`** instead of `python` directly -- Use `pyproject.toml` for all project configuration -- Pin exact versions in `.python-version` - -## Type System Requirements +```python +type UserId = int +type Result[T, E] = Success[T] | Failure[E] -### Mandatory Type Safety +@dataclass(frozen=True, slots=True) +class User: ... +``` -- **NEVER use `Any`** - Use proper generics, protocols, or union types -- **NEVER use `cast()`** or `# type: ignore` -- **NEVER use `Optional[T]`** - Use `Maybe[T]` from `returns` library -- **ALWAYS use PEP 695 syntax** for generics: `class Box[T]:` not `class Box(Generic[T]):` -- **ALWAYS use `type` aliases**: `type UserId = int` -- **ALWAYS use explicit type narrowing functions** to refine types at runtime. - - Type narrowing functions must return `bool` and be annotated with a type predicate (e.g., `def is_str(x: object) -> TypeIs[str]: ...`). -- **ALWAYS use `TypeIs` for type narrowing** when both positive and negative branches require precise type inference. - - The narrowed type in the `if` branch is the intersection of the argument’s type and the `TypeIs` return type. - - The narrowed type in the `else` branch is the intersection of the argument’s type and the complement of the `TypeIs` return type. -- **ALWAYS use `TypeGuard` for user-defined type guards** when only the positive branch requires narrowing. - - Type narrowing applies only in the `if` branch; the `else` branch is not narrowed. -- **Type narrowing functions MUST accept at least one positional argument.** - - Type narrowing is applied only to the first argument. -- **The return type of a type narrowing function MUST be assignable to the input type.** - - It is a type error to narrow to a type that is not assignable to the input. -- **Type narrowing functions MUST be pure and deterministic.** - - No side effects or mutation are allowed. -- **NEVER use `Any` or `cast()` for type narrowing.** - - Always use type predicates and explicit type guards. -- **ALWAYS use pattern matching or explicit type checks** to implement type narrowing logic. -- **ALWAYS document the expected narrowed types** in both positive and negative branches using type assertions or comments. -- **NEVER rely on implicit narrowing or runtime exceptions** for type safety. +## 3. Function Rules -### Import Standards +- ≤ 20 lines, ≤ 4 params +- Keyword-only after `*` +- Use `flow` to compose: ```python -# REQUIRED imports for collections -from collections.abc import Sequence, Mapping, Callable, Iterable, Iterator - -# REQUIRED for functional programming -from returns.result import Result, Success, Failure -from returns.maybe import Maybe, Some, Nothing -from returns.io import IO, IOResult from returns.pipeline import flow from returns.pointfree import bind -# FORBIDDEN - Use collections.abc instead -# from typing import Sequence, Mapping, List, Dict -``` - -### Function Parameters - -- **NEVER use `list[T]` in parameters** - Use `Sequence[T]` -- **NEVER use `dict[K, V]` in parameters** - Use `Mapping[K, V]` -- **ALWAYS use immutable parameter types** - -## Data Structures - -### Mandatory Patterns - -- **ALWAYS use `@dataclass(frozen=True, slots=True)`** for data classes -- **ALWAYS use `tuple` instead of `list`** for fixed sequences -- **ALWAYS use `frozenset` instead of `set`** for APIs -- **NEVER mutate objects in place** - Return new instances - -### Error Handling - -- **ALWAYS use `Result[T, E]`** for operations that can fail -- **NEVER raise exceptions** for expected errors -- **ALWAYS use `Maybe[T]`** for nullable values -- **ALWAYS use pattern matching** on Result and Maybe types - -## Function Design - -### Mandatory Requirements - -- **Functions MUST be pure** - Same input always produces same output -- **Functions MUST NOT have side effects** - Use `IO[T]` for effects -- **Maximum 20 lines per function** -- **Maximum 4 parameters** - Use parameter objects if needed -- **ALWAYS use keyword-only arguments** for functions with 3+ parameters - -### Composition - -```python -# REQUIRED pattern for operation chaining result = flow( - input_data, + data, validate, - bind(transform), + bind(process), bind(save), ) ``` -## Testing Framework - -- **Primary: pytest** with type-checked fixtures -- **Property testing: hypothesis** for all pure functions -- **ALWAYS test error paths** explicitly -- **One assertion per test** -- **Descriptive test names**: `test_divide_by_zero_returns_failure` - -## CLI Development - -### Typer Standards +## 4. CLI (Typer) ```python -# REQUIRED pattern - no defaults in Option() -def command( - file: Annotated[Path, typer.Argument(help="Input file")], - output: Annotated[str, typer.Option("--output", "-o")] = "out.txt", +def cmd( + file: Annotated[Path, typer.Argument()], + out: Annotated[str, typer.Option("-o")] = "out.txt", ) -> None: + ... ``` -- **ALWAYS use `rich` for output** - Never plain `print()` -- **ALWAYS use `typer.Exit(code=n)`** instead of `sys.exit()` -- **ALWAYS validate inputs early** - -## Code Organization - -### File Structure - -- **Pure functions**: No external dependencies in function bodies -- **Boundary functions**: Handle I/O and external integrations -- **Domain models**: Immutable dataclasses with business logic -- **Services**: Protocol-based interfaces for external systems +- Defaults live in the signature, **not** in `typer.Option`. +- Use `rich` for output, `typer.Exit(code)` to quit. -### Module Dependencies - -- **Domain modules MUST NOT import infrastructure** -- **Use dependency injection** for external services -- **ALWAYS define clear service boundaries** - -## Error Handling Patterns - -### Required Patterns +## 5. Error Handling ```python -# Configuration parsing -def load_config(path: Path) -> Result[Config, ConfigError]: - return ( - read_file(path) - .bind(parse_json) - .bind(validate_config) - ) - -# Pattern matching on results -match result: - case Success(value): - process(value) - case Failure(error): - handle_error(error) - case _: # ALWAYS include exhaustive case - pass -``` - -## Performance & Quality - -### Type Checking - -- **ALWAYS use `basedpyright --pythonversion 3.13`** -- **Zero type errors tolerance** -- **Full type coverage required** - -### Code Quality - -- **ALWAYS use `ruff` for linting** -- **ALWAYS use `ruff format` for formatting** -- **No magic numbers** - Use named constants -- **Meaningful variable names** - No abbreviations - -## Security Requirements - -- **NEVER commit secrets** to repository -- **ALWAYS validate inputs** at boundaries -- **Use environment variables** for configuration -- **ALWAYS use parameterized queries** for databases - -## Forbidden Patterns - -### Type System - -```python -# FORBIDDEN -from typing import Optional, List, Dict, Any -def func(data: Any) -> Optional[List[str]]: - return cast(List[str], data) - -# REQUIRED -from collections.abc import Sequence -from returns.maybe import Maybe -def func[T](data: T) -> Maybe[Sequence[str]]: - # type-safe implementation -``` - -### State Management - -```python -# FORBIDDEN - mutable state -class Counter: - def __init__(self): - self.count = 0 - def increment(self): - self.count += 1 - -# REQUIRED - immutable state -@dataclass(frozen=True) -class Counter: - count: int = 0 - def increment(self) -> Counter: - return Counter(self.count + 1) -``` - -### Error Handling - -```python -# FORBIDDEN -def parse_int(value: str) -> int: - return int(value) # Can raise ValueError - -# REQUIRED -def parse_int(value: str) -> Result[int, ValueError]: +def parse_int(s: str) -> Result[int, ValueError]: try: - return Success(int(value)) + return Success(int(s)) except ValueError as e: return Failure(e) -``` - -## Library Preferences - -### Required Libraries -- **returns**: Functional programming primitives -- **typer**: CLI development -- **rich**: Terminal output -- **pytest**: Testing framework -- **hypothesis**: Property-based testing - -### Forbidden Libraries - -- **Click**: Use Typer instead -- **argparse**: Use Typer instead -- **requests**: Use httpx instead (async by default) - -## Git & Version Control +match parse_int("42"): + case Success(n): ... + case Failure(e): ... +``` -- **Use conventional commits**: `feat:`, `fix:`, `refactor:`, etc. -- **Never commit directly to main** - Use feature branches -- **ALWAYS rebase** instead of merge commits -- **Squash commits** before merging to main +## 6. Testing -## Documentation +- **pytest + hypothesis** +- One assertion per test +- Property tests for pure functions -- **Google-style docstrings** for public APIs -- **Type hints ARE documentation** - Use them comprehensively -- **Document business logic rationale** - Not implementation details -- **Keep README.md updated** with setup and usage +## 7. Tooling -## File Naming & Organization +```bash +uv run basedpyright --pythonversion 3.13 +uv run ruff check +uv run ruff format +``` -- **Snake_case for files**: `user_service.py` -- **Clear module purposes**: One responsibility per module -- **Group related functionality**: Keep cohesive functions together -- **Separate concerns**: Domain, infrastructure, presentation layers +## 8. Forbidden -## Development Workflow +- `Any`, `cast`, `# type: ignore`, `Optional`, `print`, `sys.exit`, mutable globals, `list`/`dict` in signatures. -1. **Write types first** - Define interfaces before implementation -2. **Write tests next** - TDD approach required -3. **Implement with pure functions** - No side effects in business logic -4. **Compose at boundaries** - Assemble pure functions into workflows -5. **Type check continuously** - Fix type errors immediately +## 9. Project Skeleton -This workspace emphasizes functional programming principles, type safety, and clear separation of concerns. Every code change should advance these goals while maintaining zero technical debt. +```bash +src/ + robofactor/ # project source +tests/ + test_*.py # property tests +pyproject.toml # uv + ruff + basedpyright +``` From 87d726ae5fe6b20c8affb38f721ae2d8b23875ae Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 14 Jul 2025 13:55:09 -0500 Subject: [PATCH 19/26] refactor: remove deprecated list-to-sequence transformation script --- scripts/replace_list_with_sequence.py | 387 -------------------------- 1 file changed, 387 deletions(-) delete mode 100644 scripts/replace_list_with_sequence.py diff --git a/scripts/replace_list_with_sequence.py b/scripts/replace_list_with_sequence.py deleted file mode 100644 index b62a025..0000000 --- a/scripts/replace_list_with_sequence.py +++ /dev/null @@ -1,387 +0,0 @@ -""" -Functional code transformation script for replacing list/List with Sequence. - -Implements functional programming principles with clear service boundaries, -error handling as values, and immutable data transformations. -""" - -from __future__ import annotations - -import re -import shutil -from collections.abc import Sequence as SeqType -from dataclasses import dataclass, replace -from pathlib import Path -from typing import Protocol - -from returns.result import Result, Success, Failure, safe - - -# ============================================================================ -# Domain Models -# ============================================================================ - -@dataclass(frozen=True) -class FileContent: - """Immutable representation of file content.""" - path: Path - content: str - encoding: str = "utf-8" - - -@dataclass(frozen=True) -class TransformationRule: - """Immutable rule for text transformation.""" - pattern: re.Pattern[str] - replacement: str - description: str - - -@dataclass(frozen=True) -class ImportStatement: - """Immutable representation of an import statement.""" - module: str - imports: tuple[str, ...] - line: str - - -@dataclass(frozen=True) -class TransformationResult: - """Result of applying transformations to file content.""" - original: FileContent - transformed: FileContent - rules_applied: tuple[TransformationRule, ...] - import_added: ImportStatement | None = None - - -# ============================================================================ -# Service Interfaces -# ============================================================================ - -class FileOperations(Protocol): - """Interface for file I/O operations.""" - - def read_file(self, path: Path) -> Result[FileContent, Exception]: - """Read file content safely.""" - ... - - def write_file(self, content: FileContent) -> Result[Path, Exception]: - """Write content to file safely.""" - ... - - def create_backup(self, path: Path) -> Result[Path, Exception]: - """Create backup of file.""" - ... - - -class TextTransformer(Protocol): - """Interface for text transformation operations.""" - - def apply_transformations( - self, - content: FileContent, - rules: SeqType[TransformationRule] - ) -> Result[TransformationResult, str]: - """Apply transformation rules to content.""" - ... - - def ensure_import( - self, - content: FileContent, - import_statement: ImportStatement - ) -> Result[FileContent, str]: - """Ensure import statement exists in content.""" - ... - - -class DirectoryProcessor(Protocol): - """Interface for directory traversal operations.""" - - def find_python_files(self, directory: Path) -> Result[tuple[Path, ...], str]: - """Find all Python files in directory recursively.""" - ... - - -# ============================================================================ -# Implementation Services -# ============================================================================ - -class SafeFileOperations: - """Safe file operations implementation using functional patterns.""" - - @safe - def read_file(self, path: Path) -> FileContent: - """Read file content with automatic error handling.""" - content = path.read_text(encoding="utf-8") - return FileContent(path=path, content=content) - - @safe - def write_file(self, content: FileContent) -> Path: - """Write content to file with automatic error handling.""" - _ = content.path.write_text(content.content, encoding=content.encoding) - return content.path - - @safe - def create_backup(self, path: Path) -> Path: - """Create backup file with automatic error handling.""" - backup_path = path.with_suffix(path.suffix + ".bak") - shutil.copy2(path, backup_path) - return backup_path - - -class FunctionalTextTransformer: - """Functional text transformation implementation.""" - - def apply_transformations( - self, - content: FileContent, - rules: SeqType[TransformationRule] - ) -> Result[TransformationResult, str]: - """Apply transformation rules functionally.""" - def _apply_rule(text: str, rule: TransformationRule) -> str: - return rule.pattern.sub(rule.replacement, text) - - try: - # Apply transformations immutably - transformed_content = content.content - applied_rules: list[TransformationRule] = [] - - for rule in rules: - original_content = transformed_content - transformed_content = _apply_rule(transformed_content, rule) - - # Track which rules were actually applied - if original_content != transformed_content: - applied_rules.append(rule) - - transformed_file = replace(content, content=transformed_content) - - return Success(TransformationResult( - original=content, - transformed=transformed_file, - rules_applied=tuple(applied_rules) - )) - - except Exception as e: - return Failure(f"Transformation failed: {e}") - - def ensure_import( - self, - content: FileContent, - import_statement: ImportStatement - ) -> Result[FileContent, str]: - """Ensure import statement exists, adding if necessary.""" - try: - lines = content.content.splitlines() - - # Check if import already exists - has_import = any( - import_statement.module in line and - all(imp in line for imp in import_statement.imports) - for line in lines - ) - - if has_import: - return Success(content) - - # Add import at the top after any existing imports - import_line = import_statement.line - - # Find insertion point (after last import or at beginning) - insert_index = 0 - for i, line in enumerate(lines): - if line.strip().startswith(('import ', 'from ')): - insert_index = i + 1 - elif line.strip() and not line.startswith('#'): - break - - new_lines = lines[:insert_index] + [import_line] + lines[insert_index:] - new_content = '\n'.join(new_lines) - - return Success(replace(content, content=new_content)) - - except Exception as e: - return Failure(f"Import addition failed: {e}") - - -class RecursiveDirectoryProcessor: - """Directory processing implementation.""" - - def find_python_files(self, directory: Path) -> Result[tuple[Path, ...], str]: - """Find Python files recursively with error handling.""" - try: - if not directory.exists(): - return Failure(f"Directory does not exist: {directory}") - - if not directory.is_dir(): - return Failure(f"Path is not a directory: {directory}") - - python_files = tuple( - path for path in directory.rglob("*.py") - if path.is_file() - ) - - return Success(python_files) - - except Exception as e: - return Failure(f"Directory traversal failed: {e}") - - -# ============================================================================ -# Configuration and Rules -# ============================================================================ - -# Transformation rules for list -> Sequence replacement -LIST_TO_SEQUENCE_RULES: tuple[TransformationRule, ...] = ( - TransformationRule( - pattern=re.compile(r'\blist\b'), - replacement="Sequence", - description="Replace 'list' with 'Sequence'" - ), - TransformationRule( - pattern=re.compile(r'\bList\b'), - replacement="Sequence", - description="Replace 'List' with 'Sequence'" - ), -) - -# Import statement to add -SEQUENCE_IMPORT = ImportStatement( - module="collections.abc", - imports=("Sequence",), - line="from collections.abc import Sequence" -) - - -# ============================================================================ -# Application Service -# ============================================================================ - -@dataclass(frozen=True) -class CodeTransformationService: - """Main application service with dependency injection.""" - - file_ops: FileOperations - text_transformer: TextTransformer - directory_processor: DirectoryProcessor - - def transform_file(self, file_path: Path) -> Result[TransformationResult, str]: - """Transform a single file with full error handling.""" - def _process_content(content: FileContent) -> Result[TransformationResult, str]: - # Apply transformations - transform_result = self.text_transformer.apply_transformations( - content, LIST_TO_SEQUENCE_RULES - ) - - match transform_result: - case Success(result): - # Ensure import if transformations were applied - if result.rules_applied: - import_result = self.text_transformer.ensure_import( - result.transformed, SEQUENCE_IMPORT - ) - match import_result: - case Success(updated_content): - # Create backup and write - backup_result = self.file_ops.create_backup(content.path) - match backup_result: - case Success(_): - write_result = self.file_ops.write_file(updated_content) - match write_result: - case Success(_): - return Success(replace(result, transformed=updated_content)) - case Failure(error): - return Failure(f"Write failed: {error}") - case _: - return Failure("Unknown write error") - case Failure(error): - return Failure(f"Backup failed: {error}") - case _: - return Failure("Unknown backup error") - case Failure(error): - return Failure(f"Import failed: {error}") - case _: - return Failure("Unknown import error") - else: - # No changes needed, return original - return Success(result) - case Failure(error): - return Failure(error) - case _: - return Failure("Unknown transformation error") - - # Read file and process - read_result = self.file_ops.read_file(file_path) - match read_result: - case Success(content): - return _process_content(content) - case Failure(error): - return Failure(f"Read failed: {error}") - case _: - return Failure("Unknown read error") - - def transform_directory(self, directory: Path) -> Result[tuple[TransformationResult, ...], str]: - """Transform all Python files in directory.""" - def _transform_files(files: tuple[Path, ...]) -> Result[tuple[TransformationResult, ...], str]: - results: list[TransformationResult] = [] - errors: list[str] = [] - - for file_path in files: - result = self.transform_file(file_path) - match result: - case Success(transformation_result): - results.append(transformation_result) - case Failure(error): - errors.append(f"Failed to transform {file_path}: {error}") - case _: - errors.append(f"Unknown error for {file_path}") - - if errors: - return Failure(f"Errors occurred: {'; '.join(errors)}") - - return Success(tuple(results)) - - files_result = self.directory_processor.find_python_files(directory) - match files_result: - case Success(files): - return _transform_files(files) - case Failure(error): - return Failure(error) - case _: - return Failure("Unknown directory processing error") - - -# ============================================================================ -# Application Entry Point -# ============================================================================ - -def create_application() -> CodeTransformationService: - """Factory function for creating the application with dependencies.""" - return CodeTransformationService( - file_ops=SafeFileOperations(), - text_transformer=FunctionalTextTransformer(), - directory_processor=RecursiveDirectoryProcessor() - ) - - -def main() -> None: - """Main application entry point.""" - app = create_application() - target_directory = Path("src/robofactor") - - match app.transform_directory(target_directory): - case Success(results): - print(f"✅ Successfully transformed {len(results)} files in {target_directory}") - for result in results: - if result.rules_applied: - print(f" 📝 Transformed: {result.transformed.path}") - case Failure(error): - print(f"❌ Transformation failed: {error}") - _ = exit(1) # Explicitly ignore return value - case _: - print("❌ Unknown error occurred") - _ = exit(1) - - -if __name__ == "__main__": - main() From d4ad4ec34fc3dafac295b28542c8ef9db42d5e1e Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 14 Jul 2025 14:24:06 -0500 Subject: [PATCH 20/26] refactor: implement immutable configuration and type-safe training data loading Restructures the configuration system to enforce immutability and single source of truth (SSoT) principles. Replaces manual JSON parsing in the training loader with Pydantic models for compile-time type safety. Configuration changes: - All config values now use `Final` type annotations - Added explicit `__all__` exports to prevent external mutation - Centralized file paths using ROOT_DIR base path - Enhanced module docstring to clarify SSoT requirements Training data changes: - Created Pydantic models (TrainingEntry, TrainingSetAdapter) for JSON schema - Eliminated runtime type guards in favor of Pydantic validation - Simplified error handling with structured ValidationError - Pre-compiled TypeAdapter for performance optimization Also includes minor formatting improvements across evaluation modules to maintain consistency with updated import paths. --- src/robofactor/app/config.py | 58 +++++++++++++------- src/robofactor/app/main.py | 26 +++------ src/robofactor/evaluation/checkers.py | 28 +++------- src/robofactor/evaluation/pipeline.py | 10 +--- src/robofactor/training/models.py | 36 +++++++++++++ src/robofactor/training/training_loader.py | 63 ++++++++++------------ 6 files changed, 122 insertions(+), 99 deletions(-) create mode 100644 src/robofactor/training/models.py diff --git a/src/robofactor/app/config.py b/src/robofactor/app/config.py index e4bb2c1..758533d 100644 --- a/src/robofactor/app/config.py +++ b/src/robofactor/app/config.py @@ -1,34 +1,54 @@ """ -Centralized configuration for the refactoring tool. - -This module consolidates all constants, magic numbers, and default settings -to simplify management and modification. +Configuration for the Robofactor tool. """ from pathlib import Path +from typing import Final # --- File Paths --- -OPTIMIZER_FILENAME: Path = Path("optimized/") -TRAINING_DATA_FILE = Path("training/training_data.json") +ROOT_DIR: Final[Path] = Path(__file__).parent.parent +OPTIMIZER_FILENAME: Final[Path] = ROOT_DIR / "optimized" / "program.pkl" +OPTIMIZER_METADATA: Final[Path] = ROOT_DIR / "optimized" / "metadata.json" +TRAINING_DATA_FILE: Final[Path] = ROOT_DIR / "training" / "training_data.json" # --- DSPy Model Configuration --- -DEFAULT_TASK_LLM: str = "gemini/gemini-2.5-flash-lite-preview-06-17" -DEFAULT_PROMPT_LLM: str = "gemini/gemini-2.5-pro" -TASK_LLM_MAX_TOKENS: int = 64000 -PROMPT_LLM_MAX_TOKENS: int = 64000 +DEFAULT_TASK_LLM: Final[str] = "gemini/gemini-2.5-flash-lite-preview-06-17" +DEFAULT_PROMPT_LLM: Final[str] = "gemini/gemini-2.5-pro" +TASK_LLM_MAX_TOKENS: Final[int] = 64000 +PROMPT_LLM_MAX_TOKENS: Final[int] = 64000 # --- Refinement Configuration --- -REFINEMENT_THRESHOLD: float = 0.9 -REFINEMENT_COUNT: int = 3 +REFINEMENT_THRESHOLD: Final[float] = 0.9 +REFINEMENT_COUNT: Final[int] = 3 -# --- Analysis Configuration --- -FLAKE8_COMPLEXITY_CODE: str = "C901" -FLAKE8_MAX_COMPLEXITY: int = 10 -LINTING_PENALTY_PER_ISSUE: float = 0.1 +# --- Analysis & Linting Configuration --- +FLAKE8_COMPLEXITY_CODE: Final[str] = "C901" +FLAKE8_MAX_COMPLEXITY: Final[int] = 10 +LINTING_PENALTY_PER_ISSUE: Final[float] = 0.1 # --- UI Configuration --- -RICH_SYNTAX_THEME: str = "monokai" +RICH_SYNTAX_THEME: Final[str] = "monokai" # --- MLflow Configuration --- -DEFAULT_MLFLOW_TRACKING_URI: str = "http://127.0.0.1:5000" -DEFAULT_MLFLOW_EXPERIMENT_NAME: str = "robofactor" +DEFAULT_MLFLOW_TRACKING_URI: Final[str] = "http://127.0.0.1:5000" +DEFAULT_MLFLOW_EXPERIMENT_NAME: Final[str] = "robofactor" + +# --- SSoT Enforcement --- +__all__ = [ + "DEFAULT_MLFLOW_EXPERIMENT_NAME", + "DEFAULT_MLFLOW_TRACKING_URI", + "DEFAULT_PROMPT_LLM", + "DEFAULT_TASK_LLM", + "FLAKE8_COMPLEXITY_CODE", + "FLAKE8_MAX_COMPLEXITY", + "LINTING_PENALTY_PER_ISSUE", + "OPTIMIZER_FILENAME", + "OPTIMIZER_METADATA", + "PROMPT_LLM_MAX_TOKENS", + "REFINEMENT_COUNT", + "REFINEMENT_THRESHOLD", + "RICH_SYNTAX_THEME", + "ROOT_DIR", + "TASK_LLM_MAX_TOKENS", + "TRAINING_DATA_FILE", +] diff --git a/src/robofactor/app/main.py b/src/robofactor/app/main.py index 5d15b87..e074df4 100644 --- a/src/robofactor/app/main.py +++ b/src/robofactor/app/main.py @@ -26,16 +26,12 @@ app = typer.Typer() -def _setup_environment( - tracing: bool, mlflow_uri: str, mlflow_experiment: str -) -> Console: +def _setup_environment(tracing: bool, mlflow_uri: str, mlflow_experiment: str) -> Console: """Configures warnings, MLflow, and returns a rich Console.""" utils.suppress_pydantic_warnings() console = Console() if tracing: - console.print( - f"[bold yellow]MLflow tracing enabled. URI: {mlflow_uri}[/bold yellow]" - ) + console.print(f"[bold yellow]MLflow tracing enabled. URI: {mlflow_uri}[/bold yellow]") mlflow.set_tracking_uri(mlflow_uri) _ = mlflow.set_experiment(mlflow_experiment) _ = mlflow.autolog() @@ -93,17 +89,15 @@ def _run_refactoring_on_file( console.print( Panel( - Syntax( - source_code, "python", theme=config.RICH_SYNTAX_THEME, line_numbers=True - ), + Syntax(source_code, "python", theme=config.RICH_SYNTAX_THEME, line_numbers=True), title=f"[bold]Original Code: {script_path.name}[/bold]", border_style="blue", ) ) - refactor_example = dspy.Example( - code_snippet=source_code, test_cases=[] - ).with_inputs("code_snippet") + refactor_example = dspy.Example(code_snippet=source_code, test_cases=[]).with_inputs( + "code_snippet" + ) prediction = refactorer(**refactor_example.inputs()) ui.display_refactoring_process(console, prediction) @@ -121,9 +115,7 @@ def _run_refactoring_on_file( f"[yellow]Writing refactored code back to {script_path.name}...[/yellow]" ) _ = script_path.write_text(refactored_code, encoding="utf-8") - console.print( - f"[green]Refactoring of {script_path.name} complete.[/green]" - ) + console.print(f"[green]Refactoring of {script_path.name} complete.[/green]") case Failure(error_message): console.print( Panel( @@ -169,9 +161,7 @@ def main( "--prompt-llm", help="Model for generating prompts during optimization.", ), - tracing: bool = typer.Option( - True, "--tracing/--no-tracing", help="Enable MLflow tracing." - ), + tracing: bool = typer.Option(True, "--tracing/--no-tracing", help="Enable MLflow tracing."), mlflow_uri: str = typer.Option( config.DEFAULT_MLFLOW_TRACKING_URI, "--mlflow-uri", diff --git a/src/robofactor/evaluation/checkers.py b/src/robofactor/evaluation/checkers.py index ff6bd71..ecc098f 100644 --- a/src/robofactor/evaluation/checkers.py +++ b/src/robofactor/evaluation/checkers.py @@ -17,8 +17,8 @@ import dspy -from ..app import config -from ..parsing.models import CodeQualityScores, TestCase +from robofactor.app import config +from robofactor.parsing.models import CodeQualityScores, TestCase def check_syntax(code: str) -> tuple[bool, str | None, str | None]: @@ -50,15 +50,11 @@ def _get_ast_based_scores(tree: ast.AST, func_name: str | None) -> tuple[float, if not all_funcs: return 0.0, 0.0 - target_funcs = ( - [f for f in all_funcs if f.name == func_name] if func_name else all_funcs - ) + target_funcs = [f for f in all_funcs if f.name == func_name] if func_name else all_funcs if not target_funcs: return 0.0, 0.0 - docstring_score = sum(1.0 for f in target_funcs if ast.get_docstring(f)) / len( - target_funcs - ) + docstring_score = sum(1.0 for f in target_funcs if ast.get_docstring(f)) / len(target_funcs) typed_elements, typeable_elements = 0, 0 for func_node in target_funcs: @@ -90,9 +86,7 @@ def check_code_quality(code: str, func_name: str | None = None) -> CodeQualitySc subprocess.CalledProcessError: If the flake8 command fails. SyntaxError: If the code cannot be parsed into an AST. """ - with tempfile.NamedTemporaryFile( - "w", suffix=".py", delete=False, encoding="utf-8" - ) as tmp: + with tempfile.NamedTemporaryFile("w", suffix=".py", delete=False, encoding="utf-8") as tmp: _ = tmp.write(code) tmp_path = Path(tmp.name) @@ -117,9 +111,7 @@ def check_code_quality(code: str, func_name: str | None = None) -> CodeQualitySc ] complexity_score = 1.0 if not complexity_warnings else 0.0 - linting_score = max( - 0.0, 1.0 - (config.LINTING_PENALTY_PER_ISSUE * len(linting_issues)) - ) + linting_score = max(0.0, 1.0 - (config.LINTING_PENALTY_PER_ISSUE * len(linting_issues))) tree = ast.parse(code) docstring_score, typing_score = _get_ast_based_scores(tree, func_name) @@ -155,9 +147,7 @@ def _build_execution_script(func_name: str, test_case: TestCase) -> str: ) -def check_functional_correctness( - code: str, func_name: str, test_cases: Sequence[TestCase] -) -> int: +def check_functional_correctness(code: str, func_name: str, test_cases: Sequence[TestCase]) -> int: """ Executes test cases against code in a sandboxed Python interpreter. @@ -183,9 +173,7 @@ def check_functional_correctness( exec_script = _build_execution_script(func_name, test) actual_output_json = interp.execute(exec_script) actual_output = json.loads(actual_output_json) - normalized_expected_output = json.loads( - json.dumps(test.expected_output) - ) + normalized_expected_output = json.loads(json.dumps(test.expected_output)) if actual_output == normalized_expected_output: passed_count += 1 except Exception: diff --git a/src/robofactor/evaluation/pipeline.py b/src/robofactor/evaluation/pipeline.py index b535e8b..69132fd 100644 --- a/src/robofactor/evaluation/pipeline.py +++ b/src/robofactor/evaluation/pipeline.py @@ -5,12 +5,8 @@ from returns.result import Failure, Result, Success, safe -from . import checkers from ..parsing.models import CodeQualityScores, TestCase - - - - +from . import checkers class FunctionalCheckResult(NamedTuple): @@ -70,9 +66,7 @@ def _check_functional_correctness( return FunctionalCheckResult(passed_tests=passed_tests, total_tests=len(tests)) -def evaluate_refactored_code( - code: str, tests: Sequence[TestCase] -) -> Result[EvaluationResult, str]: +def evaluate_refactored_code(code: str, tests: Sequence[TestCase]) -> Result[EvaluationResult, str]: """ Performs a full evaluation of the refactored code. diff --git a/src/robofactor/training/models.py b/src/robofactor/training/models.py new file mode 100644 index 0000000..6814bda --- /dev/null +++ b/src/robofactor/training/models.py @@ -0,0 +1,36 @@ +"""Pydantic models for the training data JSON format. + +These models ensure type-safe deserialization of the training dataset and +eliminate the reliance on ``json.loads`` returning ``Any``. They also provide +single-source-of-truth schemas that other parts of the codebase can reference, +keeping the system DRY and explicit. +""" + +from collections.abc import Sequence + +from pydantic import BaseModel, Field, TypeAdapter + +from ..parsing.models import TestCase + + +class TrainingEntry(BaseModel): + """A single training data record. + + Attributes + ---------- + code_snippet + The Python source code which DSPy will analyse and refactor. + test_cases + A possibly empty sequence of :class:`robofactor.parsing.models.TestCase` + instances that validate the behaviour of ``code_snippet``. + """ + + code_snippet: str = Field(..., min_length=1) + test_cases: Sequence[TestCase] = Field(default_factory=tuple) + + +#: A pre-configured adapter that can validate a *top-level* JSON array of +#: :class:`TrainingEntry` objects. We declare the adapter at module import +#: so that the heavy schema compilation happens once and can be reused by +#: every call to :pyfunc:`robofactor.training.training_loader.load_training_data`. +TrainingSetAdapter: TypeAdapter[list[TrainingEntry]] = TypeAdapter(list[TrainingEntry]) diff --git a/src/robofactor/training/training_loader.py b/src/robofactor/training/training_loader.py index 59afbdd..163c45b 100644 --- a/src/robofactor/training/training_loader.py +++ b/src/robofactor/training/training_loader.py @@ -1,18 +1,19 @@ -import json +from __future__ import annotations + +import logging from collections.abc import Sequence -from logging import getLogger -from pathlib import Path -from typing import TypeGuard, cast +from typing import TypeGuard import dspy +from pydantic import ValidationError + +from robofactor.app.config import TRAINING_DATA_FILE +from robofactor.json.types import JSON, JSONObject -from ..app.config import TRAINING_DATA_FILE -from ..parsing.models import TestCase -from ..json.is_json_list import is_json_list -from ..json.types import JSON, JSONObject +from .models import TrainingEntry, TrainingSetAdapter FAILURE_SCORE = 0.0 -logger = getLogger(__name__) +logger = logging.getLogger(__name__) def is_training_item(x: JSON) -> TypeGuard[JSONObject]: @@ -30,35 +31,29 @@ def is_training_item(x: JSON) -> TypeGuard[JSONObject]: ) +def is_json_object(x: JSON) -> TypeGuard[JSONObject]: + return isinstance(x, dict) + + def load_training_data() -> list[dspy.Example]: - data_path = Path(__file__).parent / TRAINING_DATA_FILE + """Return the validated training set as a list of DSPy ``Example`` objects.""" + try: - # CAST the untyped json.loads → JSON - raw = cast(JSON, json.loads(data_path.read_text(encoding="utf-8"))) + raw_text: str = TRAINING_DATA_FILE.read_text(encoding="utf-8") except FileNotFoundError: - logger.error(f"Training data file not found: {data_path}") - return [] - except json.JSONDecodeError as e: - logger.error(f"Invalid JSON in training data: {e}") + logger.error("Training data file not found: %s", TRAINING_DATA_FILE) return [] - # NARROW to actual Sequence[JSON] - if not is_json_list(raw): - logger.error(f"Expected top-level array, got {type(raw).__name__}") + try: + entries: list[TrainingEntry] = TrainingSetAdapter.validate_json(raw_text) + except ValidationError as exc: + logger.error("Invalid JSON in training data: %s", exc) return [] - items: list[dspy.Example] = [] - for idx, entry in enumerate(raw): - if not is_training_item(entry): - logger.error(f"Invalid training entry at index {idx}: {entry!r}") - continue - code = entry["code_snippet"] - raw_tcs = entry.get("test_cases", []) - tcs = cast(Sequence[JSONObject], raw_tcs) - items.append( - dspy.Example( - code_snippet=code, - test_cases=[TestCase(**tc) for tc in tcs], - ).with_inputs("code_snippet") - ) - return items + return [ + dspy.Example( + code_snippet=entry.code_snippet, + test_cases=list(entry.test_cases), # Already validated TestCase models + ).with_inputs("code_snippet") + for entry in entries + ] From 05855bbf0a82ef2e45c4f35c13ea3eaf49450213 Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 14 Jul 2025 14:24:14 -0500 Subject: [PATCH 21/26] docs: strengthen type safety guidelines by prohibiting Any/cast/ignore --- docs/modern_python_patterns.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modern_python_patterns.md b/docs/modern_python_patterns.md index 9449a67..4dc3c95 100644 --- a/docs/modern_python_patterns.md +++ b/docs/modern_python_patterns.md @@ -39,4 +39,4 @@ if is_email(raw): send_mail(raw) ``` -No casts, no runtime surprises. +No casts, no runtime surprises. Never use Any, cast, or # type: ignore for type fixes. From 9f388f62a948179ceaea15e02a8320f7813ed744 Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 14 Jul 2025 14:48:41 -0500 Subject: [PATCH 22/26] refactor: improve type safety and simplify CLI argument handling --- src/robofactor/app/main.py | 51 ++++++++++++---------------- src/robofactor/app/ui.py | 3 +- src/robofactor/refactoring/module.py | 2 +- 3 files changed, 23 insertions(+), 33 deletions(-) diff --git a/src/robofactor/app/main.py b/src/robofactor/app/main.py index e074df4..89f8a6c 100644 --- a/src/robofactor/app/main.py +++ b/src/robofactor/app/main.py @@ -2,8 +2,9 @@ Main entry point for the command-line interface (CLI) of the refactoring tool. """ +from collections.abc import Callable from pathlib import Path -from typing import Annotated +from typing import Annotated, cast import dspy import mlflow @@ -17,6 +18,7 @@ from robofactor import utils from robofactor.app import config, ui from robofactor.evaluation import evaluate_refactored_code +from robofactor.json.types import JSONObject from robofactor.parsing.analysis import extract_python_code from robofactor.parsing.models import TestCase from robofactor.refactoring.evaluator import RefactoringEvaluator @@ -49,7 +51,9 @@ def _load_or_compile_model( refactorer = CodeRefactor() self_correcting_refactorer = dspy.Refine( module=refactorer, - reward_fn=RefactoringEvaluator(), + reward_fn=cast( + Callable[[JSONObject, dspy.Prediction], float], RefactoringEvaluator().forward + ), threshold=config.REFINEMENT_THRESHOLD, N=config.REFINEMENT_COUNT, ) @@ -98,7 +102,7 @@ def _run_refactoring_on_file( refactor_example = dspy.Example(code_snippet=source_code, test_cases=[]).with_inputs( "code_snippet" ) - prediction = refactorer(**refactor_example.inputs()) + prediction = cast(dspy.Prediction, refactorer(**refactor_example.inputs())) ui.display_refactoring_process(console, prediction) refactored_code = extract_python_code(prediction.refactored_code) @@ -107,7 +111,7 @@ def _run_refactoring_on_file( evaluation = evaluate_refactored_code(refactored_code, tests) - match evaluation: # type: ignore[reportMatchNotExhaustive] + match evaluation: case Success(eval_data): ui.display_evaluation_results(console, eval_data) if write: @@ -127,34 +131,25 @@ def _run_refactoring_on_file( console.print( "[bold yellow]Skipping write-back due to evaluation failure.[/bold yellow]" ) + case _: + # Fallback case for static type checkers. + pass @app.command() def main( path: Annotated[ - Path | None, - typer.Argument( - help="Path to the Python file to refactor.", - exists=True, - file_okay=True, - dir_okay=False, - readable=True, - resolve_path=True, - ), + Path | None, typer.Argument(help="Path to Python file to refactor.", exists=True) ] = None, self_refactor: bool = typer.Option( False, "--dog-food", help="Self-refactor the script you are running." ), - write: bool = typer.Option( - False, "--write", help="Write the refactored code back to the file." - ), + write: bool = typer.Option(False, "--write", help="Write refactored code back to the file."), optimize: bool = typer.Option( False, "--optimize", help="Force re-optimization of the DSPy model." ), task_llm_model: str = typer.Option( - config.DEFAULT_TASK_LLM, - "--task-llm", - help="Model for the main refactoring task.", + config.DEFAULT_TASK_LLM, "--task-llm", help="Model for the main refactoring task." ), prompt_llm_model: str = typer.Option( config.DEFAULT_PROMPT_LLM, @@ -163,14 +158,10 @@ def main( ), tracing: bool = typer.Option(True, "--tracing/--no-tracing", help="Enable MLflow tracing."), mlflow_uri: str = typer.Option( - config.DEFAULT_MLFLOW_TRACKING_URI, - "--mlflow-uri", - help="MLflow tracking server URI.", + config.DEFAULT_MLFLOW_TRACKING_URI, "--mlflow-uri", help="MLflow tracking server URI." ), mlflow_experiment: str = typer.Option( - config.DEFAULT_MLFLOW_EXPERIMENT_NAME, - "--mlflow-experiment", - help="MLflow experiment name.", + config.DEFAULT_MLFLOW_EXPERIMENT_NAME, "--mlflow-experiment", help="MLflow experiment name." ), ): """A DSPy-powered tool to analyze, plan, and refactor Python code.""" @@ -184,15 +175,15 @@ def main( config.OPTIMIZER_FILENAME, optimize, console, prompt_llm, task_llm ) - target_path: Path | None = None + file_path: Path | None = None if self_refactor: - target_path = Path(__file__) + file_path = Path(__file__) console.print(Rule("[bold magenta]Self-Refactoring Mode[/bold magenta]")) elif path: - target_path = path + file_path = path - if target_path: - _run_refactoring_on_file(console, refactorer, target_path, write) + if file_path: + _run_refactoring_on_file(console, refactorer, file_path, write) else: console.print( "[bold red]Error:[/bold red] Please provide a path to a file or use --dog-food." diff --git a/src/robofactor/app/ui.py b/src/robofactor/app/ui.py index 6e3bb05..824f95e 100644 --- a/src/robofactor/app/ui.py +++ b/src/robofactor/app/ui.py @@ -1,4 +1,3 @@ - """ Presentation logic for displaying results in the console. @@ -14,9 +13,9 @@ from rich.table import Table from rich.text import Text +from ..evaluation import EvaluationResult from ..parsing import analysis from . import config -from ..evaluation import EvaluationResult def display_refactoring_process(console: Console, prediction: dspy.Prediction) -> None: diff --git a/src/robofactor/refactoring/module.py b/src/robofactor/refactoring/module.py index fce5aa0..f1cba3f 100644 --- a/src/robofactor/refactoring/module.py +++ b/src/robofactor/refactoring/module.py @@ -4,7 +4,7 @@ import dspy -from .signatures import CodeAnalysis, RefactoringPlan, RefactoredCode +from .signatures import CodeAnalysis, RefactoredCode, RefactoringPlan class CodeRefactor(dspy.Module): From f7cfd8aaaf3af8993294a41f09c492f1e08d52c3 Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 14 Jul 2025 15:00:24 -0500 Subject: [PATCH 23/26] refactor: consolidate shared utilities into common package Extracts duplicated functionality across modules into a centralized common package to enforce DRY principles and establish single sources of truth. Moves core utilities to common package: - AST manipulation (ast_node_to_source) from parsing.ast_parser - Python code extraction from parsing.analysis - JSON validation functions from json package and training loader Updates all import paths to reference the new common package location. Removes redundant implementations and the now-empty json.is_json_list module. This structural reorganization improves maintainability by eliminating code duplication while preserving all existing functionality and APIs. --- src/robofactor/common/__init__.py | 16 +++++++++ src/robofactor/common/ast_utils.py | 23 +++++++++++++ src/robofactor/common/code_extraction.py | 25 ++++++++++++++ src/robofactor/common/json_validation.py | 38 ++++++++++++++++++++++ src/robofactor/evaluation/__init__.py | 5 +-- src/robofactor/json/is_json_list.py | 8 ----- src/robofactor/parsing/__init__.py | 3 ++ src/robofactor/parsing/analysis.py | 19 ++--------- src/robofactor/parsing/ast_parser.py | 20 ++---------- src/robofactor/refactoring/__init__.py | 2 ++ src/robofactor/refactoring/signatures.py | 2 +- src/robofactor/training/training_loader.py | 22 ------------- 12 files changed, 115 insertions(+), 68 deletions(-) create mode 100644 src/robofactor/common/__init__.py create mode 100644 src/robofactor/common/ast_utils.py create mode 100644 src/robofactor/common/code_extraction.py create mode 100644 src/robofactor/common/json_validation.py delete mode 100644 src/robofactor/json/is_json_list.py diff --git a/src/robofactor/common/__init__.py b/src/robofactor/common/__init__.py new file mode 100644 index 0000000..7422c1d --- /dev/null +++ b/src/robofactor/common/__init__.py @@ -0,0 +1,16 @@ +""" +Common utilities and shared functionality for Robofactor. +This package provides single sources of truth for cross-cutting concerns. +""" + +from .ast_utils import ast_node_to_source +from .code_extraction import extract_python_code +from .json_validation import is_json_list, is_json_object, is_training_item + +__all__ = [ + "ast_node_to_source", + "extract_python_code", + "is_json_list", + "is_json_object", + "is_training_item", +] diff --git a/src/robofactor/common/ast_utils.py b/src/robofactor/common/ast_utils.py new file mode 100644 index 0000000..e49396a --- /dev/null +++ b/src/robofactor/common/ast_utils.py @@ -0,0 +1,23 @@ +""" +Single source of truth for AST-related utilities. +""" + +import ast + + +def ast_node_to_source(node: ast.AST) -> str: + """ + Convert an AST node back to its source code representation. + + Args: + node: The AST node to convert. + + Returns: + The source code string for the node, or a repr for fallback. + """ + try: + return ast.unparse(node) + except Exception: + # Fallback for nodes that ast.unparse might not handle gracefully. + # This ensures that even complex or unusual AST structures can be represented. + return repr(node) diff --git a/src/robofactor/common/code_extraction.py b/src/robofactor/common/code_extraction.py new file mode 100644 index 0000000..5ed5db0 --- /dev/null +++ b/src/robofactor/common/code_extraction.py @@ -0,0 +1,25 @@ +""" +Single source of truth for extracting Python code from text/markdown. +""" + +import re +from typing import Final + +_CODE_BLOCK_PATTERN: Final[re.Pattern[str]] = re.compile(r"```python\n(.*?)\n```", re.DOTALL) + + +def extract_python_code(text: str) -> str: + """ + Extracts Python code from a markdown block. + + If a python markdown block (```python...```) is found, its content is + returned. Otherwise, the original text is returned. + + Args: + text: The string to search for a Python code block. + + Returns: + The extracted Python code, or the original text if no block is found. + """ + match = _CODE_BLOCK_PATTERN.search(text) + return match.group(1).strip() if match else text diff --git a/src/robofactor/common/json_validation.py b/src/robofactor/common/json_validation.py new file mode 100644 index 0000000..a1e4ead --- /dev/null +++ b/src/robofactor/common/json_validation.py @@ -0,0 +1,38 @@ +""" +Single source of truth for JSON validation utilities. +""" + +from collections.abc import Sequence +from typing import TypeGuard + +from ..json.types import JSON, JSONObject + + +def is_json_object(x: JSON) -> TypeGuard[JSONObject]: + """Type guard for JSON object validation.""" + return isinstance(x, dict) + + +def is_json_list(x: JSON) -> TypeGuard[Sequence[JSON]]: + """Type guard for JSON list validation.""" + return isinstance(x, Sequence) and not isinstance(x, str) + + +def is_training_item(x: JSON) -> TypeGuard[JSONObject]: + """ + Type guard for training data validation. + + Validates that the input is a JSON object with required training data structure. + """ + return ( + is_json_object(x) + and "code_snippet" in x + and isinstance(x["code_snippet"], str) + and ( + "test_cases" not in x + or ( + isinstance(x["test_cases"], Sequence) + and all(is_json_object(tc) for tc in x["test_cases"]) + ) + ) + ) diff --git a/src/robofactor/evaluation/__init__.py b/src/robofactor/evaluation/__init__.py index 7abcb98..a3f7793 100644 --- a/src/robofactor/evaluation/__init__.py +++ b/src/robofactor/evaluation/__init__.py @@ -4,7 +4,8 @@ quality, and functional checks to ensure that AI-generated code is safe, reliable, and adheres to best practices. """ + from . import checkers, pipeline -from .pipeline import evaluate_refactored_code, EvaluationResult +from .pipeline import EvaluationResult, evaluate_refactored_code -__all__ = ["evaluate_refactored_code", "EvaluationResult", "checkers", "pipeline"] +__all__ = ["EvaluationResult", "checkers", "evaluate_refactored_code", "pipeline"] diff --git a/src/robofactor/json/is_json_list.py b/src/robofactor/json/is_json_list.py deleted file mode 100644 index affa90b..0000000 --- a/src/robofactor/json/is_json_list.py +++ /dev/null @@ -1,8 +0,0 @@ -from collections.abc import Sequence -from typing import TypeGuard - -from .types import JSON - - -def is_json_list(x: JSON) -> TypeGuard[Sequence[JSON]]: - return isinstance(x, Sequence) diff --git a/src/robofactor/parsing/__init__.py b/src/robofactor/parsing/__init__.py index 0b5dd23..5dfa8df 100644 --- a/src/robofactor/parsing/__init__.py +++ b/src/robofactor/parsing/__init__.py @@ -4,4 +4,7 @@ code into a structured format, making it easier for other parts of the application to understand and manipulate. """ + from . import analysis, ast_parser, models + +__all__ = ["analysis", "ast_parser", "models"] diff --git a/src/robofactor/parsing/analysis.py b/src/robofactor/parsing/analysis.py index 8ffe58e..bf39abf 100644 --- a/src/robofactor/parsing/analysis.py +++ b/src/robofactor/parsing/analysis.py @@ -2,21 +2,6 @@ Provides utility functions for parsing and extracting code from text. """ -import re +from robofactor.common.code_extraction import extract_python_code - -def extract_python_code(text: str) -> str: - """ - Extracts Python code from a markdown block. - - If a python markdown block (```python...```) is found, its content is - returned. Otherwise, the original text is returned. - - Args: - text: The string to search for a Python code block. - - Returns: - The extracted Python code, or the original text if no block is found. - """ - match = re.search(r"```python\n(.*?)\n```", text, re.DOTALL) - return match.group(1).strip() if match else text +__all__ = ["extract_python_code"] diff --git a/src/robofactor/parsing/ast_parser.py b/src/robofactor/parsing/ast_parser.py index 840d713..8ac6e06 100644 --- a/src/robofactor/parsing/ast_parser.py +++ b/src/robofactor/parsing/ast_parser.py @@ -6,6 +6,8 @@ from returns.io import impure_safe from returns.result import safe +from robofactor.common.ast_utils import ast_node_to_source + from .models import ( ClassContext, Decorator, @@ -86,24 +88,6 @@ def from_ast_node( ) -def ast_node_to_source(node: ast.AST) -> str: - """ - Convert an AST node back to its source code representation. - - Args: - node: The AST node to convert. - - Returns: - The source code string for the node, or a repr for fallback. - """ - try: - return ast.unparse(node) - except Exception: - # Fallback for nodes that ast.unparse might not handle gracefully. - # This ensures that even complex or unusual AST structures can be represented. - return repr(node) - - def extract_decorators(decorators: Sequence[ast.expr]) -> tuple[Decorator, ...]: """ Extract decorator information from an AST decorator Sequence. diff --git a/src/robofactor/refactoring/__init__.py b/src/robofactor/refactoring/__init__.py index 2ca476c..4b0d8ef 100644 --- a/src/robofactor/refactoring/__init__.py +++ b/src/robofactor/refactoring/__init__.py @@ -5,3 +5,5 @@ """ from . import evaluator, module, signatures + +__all__ = ["evaluator", "module", "signatures"] diff --git a/src/robofactor/refactoring/signatures.py b/src/robofactor/refactoring/signatures.py index 1dc1d04..1cedaab 100644 --- a/src/robofactor/refactoring/signatures.py +++ b/src/robofactor/refactoring/signatures.py @@ -11,7 +11,7 @@ import dspy from pydantic import BaseModel, Field, field_validator, model_validator -from ..parsing.analysis import extract_python_code +from robofactor.common.code_extraction import extract_python_code # --- Pydantic Models --- diff --git a/src/robofactor/training/training_loader.py b/src/robofactor/training/training_loader.py index 163c45b..3dc8207 100644 --- a/src/robofactor/training/training_loader.py +++ b/src/robofactor/training/training_loader.py @@ -1,14 +1,11 @@ from __future__ import annotations import logging -from collections.abc import Sequence -from typing import TypeGuard import dspy from pydantic import ValidationError from robofactor.app.config import TRAINING_DATA_FILE -from robofactor.json.types import JSON, JSONObject from .models import TrainingEntry, TrainingSetAdapter @@ -16,25 +13,6 @@ logger = logging.getLogger(__name__) -def is_training_item(x: JSON) -> TypeGuard[JSONObject]: - return ( - isinstance(x, dict) - and "code_snippet" in x - and isinstance(x["code_snippet"], str) - and ( - "test_cases" not in x - or ( - isinstance(x["test_cases"], Sequence) - and all(isinstance(tc, dict) for tc in x["test_cases"]) - ) - ) - ) - - -def is_json_object(x: JSON) -> TypeGuard[JSONObject]: - return isinstance(x, dict) - - def load_training_data() -> list[dspy.Example]: """Return the validated training set as a list of DSPy ``Example`` objects.""" From 062bb552af21533edc3069aa6eabf3032ea976f7 Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 14 Jul 2025 15:00:51 -0500 Subject: [PATCH 24/26] docs(project): add comprehensive Python development guidelines Establishes project-wide Python coding standards and best practices to ensure consistency across the codebase. Documents conventions for project structure, code style, type hints, documentation, workflow, and dependency management. Provides clear guidance for: - Source layout and directory organization - Black/isort formatting and PEP 8 compliance - Type annotation requirements and patterns - Google-style docstring conventions - Virtual environment and CI/CD workflows - Dependency pinning and security practices --- .windsurf/rules/python-guidelines.md | 59 ++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 .windsurf/rules/python-guidelines.md diff --git a/.windsurf/rules/python-guidelines.md b/.windsurf/rules/python-guidelines.md new file mode 100644 index 0000000..94d6f78 --- /dev/null +++ b/.windsurf/rules/python-guidelines.md @@ -0,0 +1,59 @@ +--- +trigger: glob +globs: **/*.py, src/**/*.py, tests/**/*.py +--- + +# Python Best Practices + +## Project Structure + +- Use src-layout with `src/your_package_name/` +- Place tests in `tests/` directory parallel to `src/` +- Keep configuration in `config/` +- Store requirements in `pyproject.toml` +- Place static files in `static/` directory +- Use `templates/` for Jinja2 templates + +## Code Style + +- Follow Black code formatting +- Use isort for import sorting +- Follow PEP 8 naming conventions: + - snake_case for functions and variables + - PascalCase for classes + - UPPER_CASE for constants +- Maximum line length of 88 characters (Black default) +- Use absolute imports over relative imports + +## Type Hints + +- Use type hints for all function parameters and returns +- Import types from `typing` module +- Use `Optional[Type]` instead of `Type | None` +- Define custom types in `types.py` +- Use `Protocol` for duck typing + +## Documentation + +- Use Google-style docstrings +- Document all public APIs +- Use proper inline comments +- Generate API documentation +- Document environment setup + +## Development Workflow + +- Use virtual environments (venv) +- Implement pre-commit hooks +- Use proper Git workflow +- Follow semantic versioning +- Use proper CI/CD practices +- Implement proper logging + +## Dependencies + +- Pin dependency versions +- Separate dev dependencies +- Use proper package versions +- Regularly update dependencies +- Check for security vulnerabilities From 12f55a09d8fde324dba78e1465e36c443e5f6de2 Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 14 Jul 2025 15:07:52 -0500 Subject: [PATCH 25/26] build: modernize makefile with basedpyright and consolidated tooling Replaces mypy with basedpyright for type checking with explicit Python 3.13 version specification. Consolidates code formatting under ruff, removing separate isort command. Simplifies test execution with graceful handling for empty test directories. Removes unused targets and enhances the clean target for more comprehensive artifact removal. --- Makefile | 48 ++++++++++++++++++++++++------------------------ pyproject.toml | 8 +------- 2 files changed, 25 insertions(+), 31 deletions(-) diff --git a/Makefile b/Makefile index 7930d6e..bc253b6 100644 --- a/Makefile +++ b/Makefile @@ -1,18 +1,16 @@ -.PHONY: help install install-dev clean test test-unit test-integration lint format type-check check build docs serve-docs readme +.PHONY: help install install-dev clean test lint format type-check check readme # Default target help: @echo "Available commands:" @echo " install Install the package in production mode" - @echo " install-dev Install the package in development mode" + @echo " install-dev Install the package in development mode with all groups" @echo " clean Remove build artifacts and caches" - @echo " test Run all tests" - @echo " test-unit Run unit tests only" - @echo " test-integration Run integration tests only" - @echo " lint Run linting checks" - @echo " format Format code with black and isort" - @echo " type-check Run mypy type checking" - @echo " check Run all checks (lint, type-check, test)" + @echo " test Run all tests with pytest" + @echo " lint Run ruff linting checks with auto-fix" + @echo " format Format code with ruff" + @echo " type-check Run basedpyright type checking" + @echo " check Run all checks (type-check, lint, test)" @echo " readme Generate README.md using DSPy" # Installation targets @@ -22,19 +20,22 @@ install: install-dev: uv sync --all-groups +# Cleaning +clean: + rm -rf build dist .eggs *.egg-info + rm -rf .pytest_cache .ruff_cache .mypy_cache + rm -rf htmlcov .coverage coverage.xml + find . -type d -name __pycache__ -exec rm -rf {} + + find . -type f -name "*.pyc" -delete + find . -type f -name "*.pyo" -delete + # Testing test: - uv run pytest - -test-unit: - uv run pytest tests/unit - -test-integration: - uv run pytest tests/integration - -test-coverage: - uv run pytest --cov-report=html - @echo "Coverage report generated in htmlcov/index.html" + @if [ -n "$$(find tests -name '*.py' 2>/dev/null)" ]; then \ + uv run pytest; \ + else \ + echo "No tests found in tests directory"; \ + fi # Code quality lint: @@ -42,14 +43,13 @@ lint: format: uv run ruff format src tests - uv run isort src tests type-check: - uv run mypy src + uv run basedpyright --pythonversion 3.13 src # Combined checks -check: lint type-check test +check: type-check lint test # Documentation readme: - uv run scripts/generate_readme.py + uv run python scripts/generate_readme.py diff --git a/pyproject.toml b/pyproject.toml index fc49422..9345f5c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ build-backend = "hatchling.build" packages = ["src/robofactor"] [dependency-groups] -dev = ["basedpyright>=1.29.4", "isort>=6.0.1", "ruff>=0.11.13", "toml>=0.10.2"] +dev = ["basedpyright>=1.29.4", "ruff>=0.11.13", "toml>=0.10.2"] [tool.ruff] line-length = 100 @@ -112,11 +112,5 @@ exclude_lines = [ "if __name__ == .__main__.:", ] -[tool.isort] -profile = "black" -line_length = 100 -multi_line_output = 3 -include_trailing_comma = true - [tool.uv.sources] dspy = { git = "https://github.com/stanfordnlp/dspy.git" } From 77b7db093531f64e5c4b554becba8c767dbb489d Mon Sep 17 00:00:00 2001 From: ethan-wickstrom Date: Mon, 14 Jul 2025 15:27:37 -0500 Subject: [PATCH 26/26] chore: remove isort dependency in favor of ruff formatter --- uv.lock | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/uv.lock b/uv.lock index 8abd214..d1d267d 100644 --- a/uv.lock +++ b/uv.lock @@ -945,15 +945,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656 }, ] -[[package]] -name = "isort" -version = "6.0.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b8/21/1e2a441f74a653a144224d7d21afe8f4169e6c7c20bb13aec3a2dc3815e0/isort-6.0.1.tar.gz", hash = "sha256:1cb5df28dfbc742e490c5e41bad6da41b805b0a8be7bc93cd0fb2a8a890ac450", size = 821955 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/11/114d0a5f4dabbdcedc1125dee0888514c3c3b16d3e9facad87ed96fad97c/isort-6.0.1-py3-none-any.whl", hash = "sha256:2dc5d7f65c9678d94c88dfc29161a320eec67328bc97aad576874cb4be1e9615", size = 94186 }, -] - [[package]] name = "itsdangerous" version = "2.2.0" @@ -2376,7 +2367,6 @@ dependencies = [ [package.dev-dependencies] dev = [ { name = "basedpyright" }, - { name = "isort" }, { name = "ruff" }, { name = "toml" }, ] @@ -2399,7 +2389,6 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ { name = "basedpyright", specifier = ">=1.29.4" }, - { name = "isort", specifier = ">=6.0.1" }, { name = "ruff", specifier = ">=0.11.13" }, { name = "toml", specifier = ">=0.10.2" }, ]