Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
a3cf34a
Staging branch for LLM module
eb8680 Oct 9, 2025
b57a18d
Move LLM interface code from `robotl` (#358)
jfeser Oct 9, 2025
8170a64
Implement basic tool calling (#366)
jfeser Oct 10, 2025
bcbf7bb
Merge branch 'master' into staging-llm
eb8680 Oct 10, 2025
66a5eb4
Merge branch 'master' into staging-llm
jfeser Oct 20, 2025
ab9e2fe
enable strict mode for tool calling (#375)
jfeser Oct 20, 2025
661cab8
add structured generation and remove unused `decode` operation (#376)
jfeser Oct 20, 2025
02c4378
implemented support for class methods in `Template.define` (#377)
kiranandcode Oct 24, 2025
d9d1782
Revert "implemented support for class methods in `Template.define` (#…
kiranandcode Oct 24, 2025
1053fdd
Add support for methods in `Template.define` (#377) (#378)
kiranandcode Oct 26, 2025
54efb77
Adding a lower-level event and a logger example (#382)
datvo06 Oct 28, 2025
657924e
Add support for tools returning images (#385)
kiranandcode Oct 29, 2025
68af295
Implement Caching Handler for LLM (#392)
datvo06 Nov 12, 2025
b9207b4
implement first to k-ahead sampler (#412)
kiranandcode Nov 24, 2025
41b52b4
Add inheritable class for stateful templates (#416)
jfeser Nov 26, 2025
248ff6e
Support multiple providers (via `litellm`) (#418)
kiranandcode Dec 1, 2025
e4c0d99
store source of generated functions in `__src__` attribute (#403)
kiranandcode Dec 2, 2025
5cb8e89
Adds type-based encoding and support for legacy APIs (#411)
kiranandcode Dec 2, 2025
1f50599
Add LLM Integration tests to the workflows. (#420)
kiranandcode Dec 3, 2025
8118a8f
Merge master into llm-staging (#423)
jfeser Dec 4, 2025
62e45a4
Fix `staging-llm` diff against `master` (#426)
eb8680 Dec 5, 2025
1c37637
Implement a RetryHandler for LLM module (#428)
datvo06 Dec 9, 2025
bb5bded
Merge `master` into `staging-llm` again (#443)
eb8680 Dec 12, 2025
44d7d12
Implements a unified `encode`ing/`decode`ing pipeline for `llm` (#442)
kiranandcode Dec 15, 2025
931d507
Initial version of Lexical Context Collection - Collecting Tools and …
datvo06 Dec 15, 2025
50ce47c
EncodableSynthesizedFunction
datvo06 Dec 22, 2025
8530fd0
Update `staging-llm` from `master` (#457)
eb8680 Dec 22, 2025
45083c6
Merge branch 'staging-llm' of https://github.com/BasisResearch/effect…
datvo06 Dec 23, 2025
b0f28e7
Passing test
datvo06 Dec 23, 2025
7abea68
Linting
datvo06 Dec 23, 2025
19fbfa4
Trim decode changes
datvo06 Dec 23, 2025
ada8512
Linting tests
datvo06 Dec 23, 2025
e6e21bb
Minor
datvo06 Dec 23, 2025
bae8d02
Convert `Template` into an operation (#424)
jfeser Dec 29, 2025
3311d1b
Fail when encoding terms or operations (#474)
jfeser Dec 29, 2025
23f95ef
Implemented record and replay fixtures for LLM calls (#467)
kiranandcode Dec 31, 2025
2094f22
Remove program synthesis code (#475)
jfeser Dec 31, 2025
05b28ef
Disables direct recursion on templates by default (#466)
kiranandcode Dec 31, 2025
d91d4c9
drop k-ahead sampler (#479)
jfeser Dec 31, 2025
e3e8c7e
Document `Template` and `Tool` (#478)
jfeser Jan 1, 2026
13f41ee
Merge staging-llm
datvo06 Jan 2, 2026
74589b4
Minor fix and merge
datvo06 Jan 4, 2026
34a8b17
Minor
datvo06 Jan 4, 2026
8adab92
Linting
datvo06 Jan 5, 2026
0229ce9
Remove `agent.py` from handler and add docs on how to implement Agent…
kiranandcode Jan 5, 2026
cabeead
Fix RetryHandler not to use Template.replace() (#483)
datvo06 Jan 14, 2026
b65992e
Rename `handlers.llm.providers` to `completions` (#485)
eb8680 Jan 15, 2026
53356ad
Merge branch 'staging-llm' of https://github.com/basisresearch/effect…
datvo06 Jan 21, 2026
91c4fe0
Update test to include synthesis again
datvo06 Jan 21, 2026
dc9b013
Rebase to master
datvo06 Jan 21, 2026
a58f051
Revert minor change
datvo06 Jan 21, 2026
6e797f4
Remove uv lock
datvo06 Jan 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 166 additions & 3 deletions effectful/handlers/llm/synthesis.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,30 @@
from effectful.ops.syntax import ObjectInterpretation
import collections
import collections.abc
import inspect
import linecache
import textwrap
import typing
from collections import ChainMap
from collections.abc import Callable
from typing import Any

import pydantic
from pydantic import Field

from effectful.handlers.llm import Template
from effectful.handlers.llm.completions import (
InstructionHandler,
OpenAIMessageContentListBlock,
)
from effectful.handlers.llm.encoding import EncodableAs, type_to_encodable_type
from effectful.ops.semantics import NotHandled, fwd, handler
from effectful.ops.syntax import ObjectInterpretation, defop, implements


@defop
def get_synthesis_context() -> ChainMap[str, Any] | None:
"""Get the current synthesis context for decoding synthesized code."""
raise NotHandled


class SynthesisError(Exception):
Expand All @@ -9,6 +35,143 @@ def __init__(self, message, code=None):
self.code = code


class SynthesizedFunction(pydantic.BaseModel):
"""Structured output for function synthesis.

Pydantic model representing synthesized code with function name and module code.
"""

function_name: str = Field(
...,
description="The name of the main function that satisfies the specification",
)
module_code: str = Field(
...,
description="Complete Python module code (no imports needed)",
)


@type_to_encodable_type.register(collections.abc.Callable)
class EncodableSynthesizedFunction(
EncodableAs[Callable, SynthesizedFunction],
):
"""Encodes Callable to SynthesizedFunction and vice versa."""

t = SynthesizedFunction

@classmethod
def encode(
cls, vl: Callable, context: ChainMap[str, Any] | None = None
) -> SynthesizedFunction:
"""Encode a Callable to a SynthesizedFunction.

Extracts the function name and source code.
"""
func_name = vl.__name__
try:
source = inspect.getsource(vl)
except (OSError, TypeError):
# If we can't get source, create a minimal representation
try:
sig = inspect.signature(vl)
source = f"def {func_name}{sig}:\n pass # Source unavailable"
except (ValueError, TypeError):
source = f"def {func_name}(...):\n pass # Source unavailable"

return SynthesizedFunction(
function_name=func_name, module_code=textwrap.dedent(source).strip()
)

# Counter for unique filenames
_decode_counter: typing.ClassVar[int] = 0

@classmethod
def decode(cls, vl: SynthesizedFunction) -> Callable:
"""Decode a SynthesizedFunction to a Callable.

Executes the module code and returns the named function.
"""
context: ChainMap[str, Any] | None = get_synthesis_context()
func_name = vl.function_name
module_code = textwrap.dedent(vl.module_code).strip()

cls._decode_counter += 1
filename = f"<synthesized:{func_name}:{cls._decode_counter}>"
lines = module_code.splitlines(keepends=True)
# Ensure last line has newline for linecache
if lines and not lines[-1].endswith("\n"):
lines[-1] += "\n"
linecache.cache[filename] = (
len(module_code),
None,
lines,
filename,
)

# Start with provided context or empty dict
exec_globals: dict[str, typing.Any] = {}
if context is not None:
exec_globals.update(context)

try:
code_obj = compile(module_code, filename, "exec")
exec(code_obj, exec_globals)
except SyntaxError as exc:
raise SynthesisError(
f"Syntax error in generated code: {exc}", module_code
) from exc
except Exception as exc:
raise SynthesisError(f"Evaluation failed: {exc!r}", module_code) from exc

if func_name not in exec_globals:
raise SynthesisError(
f"Function '{func_name}' not found after execution. "
f"Available names: {[k for k in exec_globals.keys() if not k.startswith('_')]}",
module_code,
)

func = exec_globals[func_name]
# Also attach source code directly for convenience
func.__source__ = module_code
func.__synthesized__ = vl
return func

@classmethod
def serialize(cls, vl: SynthesizedFunction) -> list[OpenAIMessageContentListBlock]:
return [{"type": "text", "text": vl.model_dump_json()}]


class ProgramSynthesis(ObjectInterpretation):
def __init__(self, *args, **kwargs):
raise NotImplementedError
"""Provides a `template` handler to instruct the LLM to generate code of the
right form and with the right type.

"""

@implements(Template.__apply__)
def _call(self, template, *args, **kwargs) -> None:
ret_type = template.__signature__.return_annotation
origin = typing.get_origin(ret_type)
ret_type = ret_type if origin is None else origin

if not (issubclass(ret_type, collections.abc.Callable)): # type: ignore[arg-type]
return fwd()

prompt_ext = textwrap.dedent(f"""
Given the specification above, generate a Python function satisfying the following specification and type signature.

<signature>{str(ret_type)}</signature>

<instructions>
1. Produce one block of Python code.
2. Do not include usage examples.
3. Return your response in <code> tags.
4. Do not return your response in markdown blocks.
5. Your output function def must be the final statement in the code block.
</instructions>
""").strip()

with (
handler(InstructionHandler(prompt_ext)),
handler({get_synthesis_context: lambda: template.__context__}),
):
return fwd()
45 changes: 26 additions & 19 deletions tests/test_handlers_llm.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import json
from collections.abc import Callable
from typing import Annotated

import pytest
from litellm import Choices, Message
from litellm.types.utils import ModelResponse

from effectful.handlers.llm import Template
from effectful.handlers.llm.completions import (
LiteLLMProvider,
RetryLLMHandler,
completion,
compute_response,
format_model_input,
)
Expand Down Expand Up @@ -44,22 +49,24 @@ def _call[**P](
return response


class SingleResponseLLMProvider[T](ObjectInterpretation):
"""Simplified mock provider that returns a single response for any prompt."""
class SingleResponseLLMProvider[T](LiteLLMProvider):
"""Mock provider that reuses LiteLLMProvider and overrides completion."""

def __init__(self, response: T):
"""Initialize with a single response string.

Args:
response: The response to return for any template call
"""
"""Initialize with a response value."""
super().__init__(model_name="mock")
self.response = response

@implements(Template.__apply__)
def _call[**P](
self, template: Template[P, T], *args: P.args, **kwargs: P.kwargs
) -> T:
return self.response
@implements(completion)
def _completion(self, *args, **kwargs) -> ModelResponse:
result = (
self.response
if isinstance(self.response, str)
else json.dumps({"value": self.response})
)
message = Message(role="assistant", content=result)
choice = Choices(index=0, message=message, finish_reason="stop")
return ModelResponse(model="mock", choices=[choice])


# Test templates from the notebook examples
Expand Down Expand Up @@ -124,18 +131,18 @@ def test_primes_decode_int():
assert isinstance(result, int)


@pytest.mark.xfail(reason="Synthesis handler not yet implemented")
def test_count_char_with_program_synthesis():
"""Test the count_char template with program synthesis."""
mock_code = """<code>
def count_occurrences(s):
return s.count('a')
</code>"""
mock_provider = SingleResponseLLMProvider(mock_code)
mock_provider = SingleResponseLLMProvider(
{
"function_name": "count_occurrences",
"module_code": "def count_occurrences(s):\n return s.count('a')",
}
)

with handler(mock_provider), handler(ProgramSynthesis()):
count_a = count_char("a")
assert callable(count_a)
assert callable(count_a), f"count_a is not callable: {count_a}"
assert count_a("banana") == 3
assert count_a("cherry") == 0

Expand Down
Loading