Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
360 changes: 359 additions & 1 deletion poetry.lock

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "coreason_optimizer"
version = "0.1.0"
version = "0.2.0"
description = "coreason-optimizer"
authors = ["Gowtham A Rao <gowtham.rao@coreason.ai>"]
license = "Prosperity-3.0"
Expand All @@ -16,10 +16,11 @@ openai = "^1.50"
click = "^8.1.7"
numpy = "^2.1"
scikit-learn = "^1.5.2"
anyio = "*"
anyio = "^4.12.1"
httpx = "*"
aiofiles = "*"
types-aiofiles = "*"
coreason-identity = "^0.4.1"

[tool.poetry.scripts]
coreason-opt = "coreason_optimizer.main:cli"
Expand Down
2 changes: 1 addition & 1 deletion src/coreason_optimizer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from coreason_optimizer.core.config import OptimizerConfig
from coreason_optimizer.core.interfaces import PromptOptimizer

__version__ = "0.1.0"
__version__ = "0.2.0"
__author__ = "Gowtham A Rao"
__email__ = "gowtham.rao@coreason.ai"

Expand Down
105 changes: 105 additions & 0 deletions src/coreason_optimizer/core/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@
"""

import os
import uuid
from typing import Any, Optional

import anyio
import httpx
from coreason_identity.models import UserContext
from openai import AsyncOpenAI

from coreason_optimizer.core.budget import BudgetManager
Expand Down Expand Up @@ -462,3 +464,106 @@ def embed(self, texts: list[str], model: str | None = None) -> EmbeddingResponse
response = self.provider.embed(texts, model)
self.budget_manager.consume(response.usage)
return response


class OptimizationClient:
"""
Client for managing optimization studies with identity awareness.

This client serves as a centralized manager for optimization studies,
ensuring all operations are audited and authorized against a UserContext.
"""

def __init__(self) -> None:
# In-memory store for studies (simulating backend)
self._studies: dict[str, dict[str, Any]] = {}

def register_study(self, study_name: str, *, context: UserContext) -> str:
"""
Register a new optimization study.

Args:
study_name: The name of the study.
context: The user context authorizing this operation.

Returns:
The study ID.

Raises:
ValueError: If context is missing.
"""
if context is None:
raise ValueError("UserContext is required.")

# Simulate study creation
study_id = f"study_{uuid.uuid4().hex[:8]}"
self._studies[study_id] = {
"name": study_name,
"owner": context.user_id,
"trials": [],
}

# Audit the operation
# Note: context.user_id is a string in coreason-identity 0.4.x, not SecretStr.
# We log it as the authenticated user identifier.
logger.info(
"Registering optimization study",
user_id=context.user_id,
study_name=study_name,
study_id=study_id,
)
return study_id

def get_suggestion(self, study_id: str, *, context: UserContext) -> dict[str, Any]:
"""
Get the next parameter suggestion for a study.

Args:
study_id: The ID of the study.
context: The user context authorizing this operation.

Returns:
A dictionary of suggested parameters.

Raises:
ValueError: If context is missing.
"""
if context is None:
raise ValueError("UserContext is required.")

# Verify access (simple check)
# In a real system, we would check RLS/permissions here.

logger.debug(
"Requesting parameter suggestion",
user_id=context.user_id,
study_id=str(study_id),
)

# Return a dummy suggestion or based on prior trials (simulated)
return {"param_a": 0.1, "param_b": "strategy_v1"}

def report_metric(self, study_id: str, metric: float, *, context: UserContext) -> None:
"""
Report a metric for a trial.

Args:
study_id: The ID of the study.
metric: The metric value.
context: The user context authorizing this operation.

Raises:
ValueError: If context is missing.
"""
if context is None:
raise ValueError("UserContext is required.")

if study_id in self._studies:
self._studies[study_id]["trials"].append({"metric": metric, "user": context.user_id})

logger.debug(
"Reporting metric",
user_id=context.user_id,
study_id=study_id,
metric=metric,
)
36 changes: 34 additions & 2 deletions src/coreason_optimizer/data/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
from collections.abc import Iterator
from pathlib import Path

from coreason_identity.models import UserContext

from coreason_optimizer.core.models import TrainingExample
from coreason_optimizer.utils.logger import logger


class Dataset:
Expand All @@ -46,25 +49,43 @@ def __iter__(self) -> Iterator[TrainingExample]:
return iter(self.examples)

@classmethod
def from_csv(cls, filepath: str | Path, input_cols: list[str], reference_col: str) -> "Dataset":
def from_csv(
cls,
filepath: str | Path,
input_cols: list[str],
reference_col: str,
*,
context: UserContext,
) -> "Dataset":
"""
Load a dataset from a CSV file.

Args:
filepath: Path to the CSV file.
input_cols: List of column names to treat as inputs.
reference_col: Column name to treat as the reference output.
context: The user context authorizing this operation.

Returns:
A Dataset instance.

Raises:
FileNotFoundError: If the file does not exist.
ValueError: If context is missing.
"""
if context is None:
raise ValueError("UserContext is required.")

path = Path(filepath)
if not path.exists():
raise FileNotFoundError(f"File not found: {path}")

logger.info(
"Loading dataset from CSV",
user_id=context.user_id,
filepath=str(path),
)

examples = []
with path.open("r", encoding="utf-8") as f:
reader = csv.DictReader(f)
Expand All @@ -88,7 +109,7 @@ def from_csv(cls, filepath: str | Path, input_cols: list[str], reference_col: st
return cls(examples)

@classmethod
def from_jsonl(cls, filepath: str | Path) -> "Dataset":
def from_jsonl(cls, filepath: str | Path, *, context: UserContext) -> "Dataset":
"""
Load a dataset from a JSONL file.

Expand All @@ -99,17 +120,28 @@ def from_jsonl(cls, filepath: str | Path) -> "Dataset":

Args:
filepath: Path to the JSONL file.
context: The user context authorizing this operation.

Returns:
A Dataset instance.

Raises:
FileNotFoundError: If the file does not exist.
ValueError: If context is missing.
"""
if context is None:
raise ValueError("UserContext is required.")

path = Path(filepath)
if not path.exists():
raise FileNotFoundError(f"File not found: {path}")

logger.info(
"Loading dataset from JSONL",
user_id=context.user_id,
filepath=str(path),
)

examples = []
with path.open("r", encoding="utf-8") as f:
for line in f:
Expand Down
50 changes: 44 additions & 6 deletions src/coreason_optimizer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@
from pathlib import Path

import click
from coreason_identity.models import UserContext

from coreason_optimizer.core.client import OpenAIClient, OpenAIEmbeddingClient
from coreason_optimizer.core.client import OpenAIClient, OpenAIEmbeddingClient, OptimizationClient
from coreason_optimizer.core.config import OptimizerConfig
from coreason_optimizer.core.formatter import format_prompt
from coreason_optimizer.core.interfaces import PromptOptimizer
Expand All @@ -29,6 +30,7 @@
from coreason_optimizer.data.loader import Dataset
from coreason_optimizer.strategies.bootstrap import BootstrapFewShot
from coreason_optimizer.strategies.mipro import MiproOptimizer
from coreason_optimizer.strategies.selector import StrategySelector
from coreason_optimizer.utils.import_utils import load_agent_from_path
from coreason_optimizer.utils.logger import logger

Expand Down Expand Up @@ -88,7 +90,28 @@ def tune(
strategy: Optimization strategy to use ('mipro' or 'bootstrap').
selector: Few-shot example selection strategy ('random' or 'semantic').
"""
logger.info(f"Starting optimization for agent: {agent}")
# Create System Context
system_context = UserContext(
user_id="cli-user",
email="cli-user@coreason.ai",
groups=["system"],
claims={"source": "cli"},
)

logger.info(
f"Starting optimization for agent: {agent}",
user_id=system_context.user_id,
)

# Initialize Optimization Client (Audit)
opt_client = OptimizationClient()
# We store the study_id but don't strictly use it yet in this version of the optimizer,
# but we register it for audit compliance.
_ = opt_client.register_study(f"opt-{Path(agent).stem}", context=system_context)

# Validate Strategy
strat_selector = StrategySelector()
strategy = strat_selector.select_strategy(strategy, context=system_context)

# Load Agent
try:
Expand All @@ -101,11 +124,13 @@ def tune(
try:
ds_path = Path(dataset)
if ds_path.suffix.lower() == ".jsonl":
full_ds = Dataset.from_jsonl(ds_path)
full_ds = Dataset.from_jsonl(ds_path, context=system_context)
elif ds_path.suffix.lower() == ".csv":
# Assume reference col is 'reference' and inputs are from construct
input_cols = construct.inputs
full_ds = Dataset.from_csv(ds_path, input_cols=input_cols, reference_col="reference")
full_ds = Dataset.from_csv(
ds_path, input_cols=input_cols, reference_col="reference", context=system_context
)
else:
raise click.ClickException("Unsupported file format. Use .csv or .jsonl")
except Exception as e:
Expand Down Expand Up @@ -202,16 +227,29 @@ def evaluate(manifest: str, dataset: str, metric: str) -> None:
except Exception as e:
raise click.ClickException(f"Failed to load manifest: {e}") from e

# Create System Context for Evaluation
system_context = UserContext(
user_id="cli-evaluator",
email="evaluator@coreason.ai",
groups=["system"],
claims={"source": "cli-eval"},
)

# Load Dataset
try:
ds_path = Path(dataset)
if ds_path.suffix.lower() == ".jsonl":
eval_ds = Dataset.from_jsonl(ds_path)
eval_ds = Dataset.from_jsonl(ds_path, context=system_context)
else:
# Fallback for CSV: try to use keys from first few-shot example if available
if manifest_obj.few_shot_examples:
input_cols = list(manifest_obj.few_shot_examples[0].inputs.keys())
eval_ds = Dataset.from_csv(ds_path, input_cols=input_cols, reference_col="reference")
eval_ds = Dataset.from_csv(
ds_path,
input_cols=input_cols,
reference_col="reference",
context=system_context,
)
else:
raise click.ClickException(
"Cannot infer CSV schema for evaluation without few-shot examples in manifest. Use JSONL."
Expand Down
31 changes: 31 additions & 0 deletions src/coreason_optimizer/strategies/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@
from abc import ABC, abstractmethod

import numpy as np
from coreason_identity.models import UserContext
from sklearn.cluster import KMeans

from coreason_optimizer.core.interfaces import EmbeddingProvider
from coreason_optimizer.core.models import TrainingExample
from coreason_optimizer.data.loader import Dataset
from coreason_optimizer.utils.logger import logger


class BaseSelector(ABC):
Expand Down Expand Up @@ -165,3 +167,32 @@ def select(self, trainset: Dataset, k: int = 4) -> list[TrainingExample]:
selected_indices.sort()

return [trainset[idx] for idx in selected_indices]


class StrategySelector:
"""Selector for choosing the optimization strategy based on identity and policy."""

def select_strategy(self, strategy: str, context: UserContext) -> str:
"""
Select and validate the optimization strategy.

Args:
strategy: The requested strategy name.
context: The user context.

Returns:
The authorized strategy name.

Raises:
ValueError: If context is missing.
"""
if context is None:
raise ValueError("UserContext is required.")

logger.info(
"Selecting optimization strategy",
user_id=context.user_id,
authorized_strategies=context.claims.get("strategies", "all"),
)

return strategy
Loading
Loading