Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ class Settings(BaseSettings):
cache_ttl: str = "2d" # HTTP Cache-Control max-age (e.g., "2d", "48h", "172800s")
cache_ttl_seconds: int = 172800 # Computed from cache_ttl for Cache-Control header
log_level: str = "INFO" # Logging level (DEBUG, INFO, WARNING, ERROR)
# Wall-clock budget for a single Claude API call. Kept comfortably below the
# API Gateway HTTP API integration timeout (a hard 30s ceiling that cannot be
# raised, unlike the Lambda timeout) so we return a clean, handled error
# instead of the gateway severing the connection with an opaque 503.
anthropic_timeout_seconds: float = 27.0
# SDK auto-retries. Retries share the wall-clock budget above, so a fast
# transient failure (e.g. an overloaded 529) can still be retried while the
# total time can never exceed anthropic_timeout_seconds.
anthropic_max_retries: int = 2
model_config = SettingsConfigDict(env_file=".env")

@field_validator("cache_ttl_seconds", mode="before")
Expand Down
67 changes: 64 additions & 3 deletions app/explain.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
import logging

from anthropic import AsyncAnthropic
from anthropic import APIConnectionError, APIStatusError, APITimeoutError, AsyncAnthropic

from app.cache import CacheProvider, cache_response, get_cached_response
from app.explain_api import CostBreakdown, ExplainRequest, ExplainResponse, TokenUsage
Expand All @@ -16,13 +17,19 @@
MAX_CODE_LENGTH = 10000 # 10K chars should be enough for most source files
MAX_ASM_LENGTH = 20000 # 20K chars for assembly output

# Default wall-clock budget for the Claude call. Overridden per request from
# settings.anthropic_timeout_seconds; the default keeps direct callers (tests,
# local server) bounded below the API Gateway 30s integration ceiling too.
DEFAULT_ANTHROPIC_DEADLINE_SECONDS = 27.0


async def process_request(
body: ExplainRequest,
client: AsyncAnthropic,
prompt: Prompt,
metrics_provider: MetricsProvider,
cache_provider: CacheProvider | None = None,
deadline_seconds: float = DEFAULT_ANTHROPIC_DEADLINE_SECONDS,
) -> ExplainResponse:
"""Process a request and return the response.

Expand All @@ -35,6 +42,7 @@ async def process_request(
prompt: Prompt instance for generating messages
metrics_provider: metrics provider for tracking stats
cache_provider: cache provider for storing/retrieving responses
deadline_seconds: wall-clock budget for the Claude call before giving up

Returns:
An ExplainResponse Pydantic model
Expand All @@ -57,7 +65,7 @@ async def process_request(
return cached_response

# Cache miss or no cache - proceed with Anthropic API call
response = await _call_anthropic_api(body, client, prompt, metrics_provider)
response = await _call_anthropic_api(body, client, prompt, metrics_provider, deadline_seconds)

# Cache the response (if cache provider is available). Don't cache
# error responses — they consume real tokens but produce no useful
Expand All @@ -69,11 +77,49 @@ async def process_request(
return response


def _transient_error_response(
body: ExplainRequest,
model: str,
metrics_provider: MetricsProvider,
error: Exception,
) -> ExplainResponse:
"""Build a structured error response for a timed-out or transiently failed call.

Returned (rather than raised) so the client gets a clear, retryable message
well within the API Gateway 30s window instead of an opaque 503. No token
usage is available because the call did not complete.
"""
if isinstance(error, (TimeoutError, APITimeoutError)):
message_text = (
"Claude Explain took too long to respond — the input may be very large "
"or the model is under heavy load. Please try again in a moment."
)
else:
message_text = "Claude Explain is temporarily unavailable. Please try again in a moment."
LOGGER.warning("Anthropic call failed (%s): %s", type(error).__name__, error)
metrics_provider.set_property("language", body.language)
metrics_provider.set_property("compiler", body.compiler)
metrics_provider.set_property("instructionSet", body.instructionSet or "unknown")
metrics_provider.set_property("cached", "false")
metrics_provider.put_metric("ClaudeExplainRequest", 1)
if isinstance(error, (TimeoutError, APITimeoutError)):
metrics_provider.put_metric("ClaudeExplainTimeout", 1)
else:
metrics_provider.put_metric("ClaudeExplainTransientError", 1)
return ExplainResponse(
status="error",
message=message_text,
model=model,
usage=TokenUsage(inputTokens=0, outputTokens=0, totalTokens=0),
)


async def _call_anthropic_api(
body: ExplainRequest,
client: AsyncAnthropic,
prompt: Prompt,
metrics_provider: MetricsProvider,
deadline_seconds: float = DEFAULT_ANTHROPIC_DEADLINE_SECONDS,
) -> ExplainResponse:
"""Make the actual call to Anthropic API and create response.

Expand All @@ -98,7 +144,22 @@ async def _call_anthropic_api(
prompt_data["model"],
bool(prompt_data.get("thinking")),
)
message = await client.messages.create(**prompt_data)
# Bound the call to a wall-clock budget below the API Gateway HTTP API
# integration timeout (a hard 30s ceiling). Without this, a slow generation
# runs to completion inside the Lambda — billing tokens we never deliver —
# while the gateway has already returned an opaque 503 to the user. Failing
# within the budget lets us surface a clear, retryable message instead.
try:
async with asyncio.timeout(deadline_seconds):
message = await client.messages.create(**prompt_data)
except (TimeoutError, APITimeoutError, APIConnectionError) as e:
return _transient_error_response(body, prompt_data["model"], metrics_provider, e)
except APIStatusError as e:
# Surface only transient upstream failures gracefully; let genuine
# client errors (e.g. a malformed 400) propagate as a real failure.
if e.status_code in (408, 409, 429, 500, 502, 503, 504, 529):
return _transient_error_response(body, prompt_data["model"], metrics_provider, e)
raise

# Extract usage information
input_tokens = message.usage.input_tokens
Expand Down
7 changes: 6 additions & 1 deletion app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,11 @@ async def lifespan(app: FastAPI):

# Store shared resources in app.state
app.state.settings = settings
app.state.anthropic_client = AsyncAnthropic(api_key=settings.anthropic_api_key)
app.state.anthropic_client = AsyncAnthropic(
api_key=settings.anthropic_api_key,
timeout=settings.anthropic_timeout_seconds,
max_retries=settings.anthropic_max_retries,
)

# Load the prompt configuration
prompt_config_path = Path(__file__).parent / "prompt.yaml"
Expand Down Expand Up @@ -133,4 +137,5 @@ async def explain(explain_request: ExplainRequest, request: Request) -> ExplainR
request.app.state.prompt,
metrics_provider,
cache_provider,
deadline_seconds=request.app.state.settings.anthropic_timeout_seconds,
)
22 changes: 22 additions & 0 deletions app/test_explain.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,28 @@ async def test_returns_error_when_no_text_block(self, sample_request, noop_metri
assert response.usage.inputTokens == 100
assert response.usage.outputTokens == 50

@pytest.mark.asyncio
async def test_returns_error_when_call_exceeds_deadline(self, sample_request, noop_metrics):
"""A Claude call that overruns the wall-clock budget must return a
structured error well inside the API Gateway 30s window, not hang until
the gateway severs the connection with an opaque 503."""
import asyncio

async def slow_create(**_kwargs):
await asyncio.sleep(1.0)

mock_client = MagicMock()
mock_client.messages.create = AsyncMock(side_effect=slow_create)

test_prompt = Prompt(Path("app/prompt.yaml"))
response = await process_request(sample_request, mock_client, test_prompt, noop_metrics, deadline_seconds=0.01)

assert response.status == "error"
assert response.explanation is None
assert "too long" in response.message
assert response.usage is not None
assert response.usage.totalTokens == 0


class TestPromptValidation:
"""Validation rules enforced at Prompt construction."""
Expand Down
Loading