Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ TELEMETRY_LLM_PREVIEW_CHARS=800
# Comma-separated list of allowed origins for the API (set in production).
CORS_ALLOWED_ORIGINS=http://localhost:8000

# ─────────────────────────────────────────────────
# Data
# ─────────────────────────────────────────────────
# Limit number of epics loaded (useful for fast test runs).
# EPICS_LIMIT=1

# ─────────────────────────────────────────────────
# Generation Settings
# ─────────────────────────────────────────────────
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/deploy-deno.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ jobs:
deployctl deploy \
--project="${{ vars.DENO_DEPLOY_PROJECT }}" \
--prod \
--include=src/ui/dist \
--exclude=node_modules \
--exclude=.git \
--exclude=.github \
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/deploy-staging.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ jobs:
deployctl deploy \
--project="${{ vars.DENO_DEPLOY_PROJECT_STAGING }}" \
--prod \
--include=src/ui/dist \
--exclude=node_modules \
--exclude=.git \
--exclude=.github \
Expand Down
1 change: 1 addition & 0 deletions deno.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"imports": {
"@std/assert": "jsr:@std/assert@^1.0.16",
"@std/expect": "jsr:@std/expect@^1.0.17",
"@std/dotenv": "jsr:@std/dotenv@0.225.6",
"@std/dotenv/load": "jsr:@std/dotenv@0.225.6/load",
"@std/http/file-server": "jsr:@std/http@^1.0.23/file-server",
"@std/path": "jsr:@std/path@^1.1.4",
Expand Down
43 changes: 39 additions & 4 deletions playwright.config.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,34 @@
import { defineConfig, devices } from "@playwright/test";
import { parse } from "@std/dotenv";

const readEnvFile = (): Record<string, string> => {
try {
const raw = Deno.readTextFileSync(".env");
return parse(raw);
} catch (e) {
if (e instanceof Deno.errors.NotFound) {
return {};
}
throw e;
}
};

const envFromFile = readEnvFile();
const getEnv = (key: string, fallback?: string) =>
Deno.env.get(key) ?? envFromFile[key] ?? fallback ?? "";

const llmBaseUrl = getEnv("LLM_BASE_URL", "https://openrouter.ai/api/v1");
const llmApiKey = getEnv("LLM_API_KEY");
const llmModel = getEnv("LLM_MODEL", "openai/gpt-4o-mini");

Comment on lines +20 to +23

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Avoid defaulting e2e LLM to OpenRouter without a key

The Playwright webServer now defaults LLM_BASE_URL to OpenRouter while leaving LLM_API_KEY empty when no .env/env vars are set. In a fresh checkout this makes the server pick an external base URL with no credentials, so any LLM call in the optimization flow will fail with 401s and the new e2e test will hang/fail. Consider falling back to the previous local LM Studio defaults unless LLM_* is explicitly provided, or require a non-empty key before using OpenRouter.

Useful? React with 👍 / 👎.

// Warn when targeting a remote LLM provider without an API key
const isLocalhost = /^https?:\/\/(localhost|127\.0\.0\.1)(:\d+)?/.test(llmBaseUrl);
if (!llmApiKey && !isLocalhost) {
console.warn(
`[playwright] LLM_API_KEY is empty while LLM_BASE_URL points to a remote provider (${llmBaseUrl}). ` +
"LLM calls will likely fail with 401. Set LLM_API_KEY in .env or environment.",
);
}
export default defineConfig({
testDir: "./tests/e2e",
timeout: 60_000,
Expand All @@ -18,10 +47,16 @@ export default defineConfig({
reuseExistingServer: !Deno.env.get("CI"),
timeout: 120_000,
env: {
LMSTUDIO_BASE_URL: "http://127.0.0.1:1234/v1",
LMSTUDIO_API_KEY: "lm-studio",
LMSTUDIO_MODEL: "gpt-oss-120b",
LMSTUDIO_JUDGE_MODEL: "gpt-oss-120b",
...Deno.env.toObject(),
LLM_BASE_URL: llmBaseUrl,
LLM_API_KEY: llmApiKey,
LLM_MODEL: llmModel,
POLL_ENABLED: "false",
EVAL_REPLICATES: "1",
OPT_CONCURRENCY: "3",
OPT_ITERATIONS: "1",
OPT_PATCH_CANDIDATES: "1",
EPICS_LIMIT: "1",
},
},
projects: [
Expand Down
9 changes: 5 additions & 4 deletions prompts/champion.base.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ Rules:
3. Each story MUST include:
- title (short, action-oriented)
- asA / iWant / soThat
- acceptanceCriteria: >= 2 items, objectively testable
- acceptanceCriteria: >= 1 item, objectively testable
4. Prefer acceptance criteria in Given/When/Then style.
5. Do NOT invent requirements. If something is unclear, put it in assumptions or
5. Keep output compact; omit optional fields unless needed.
6. Do NOT invent requirements. If something is unclear, put it in assumptions or
followUps.
6. Reflect constraints/nonFunctional/outOfScope from the Epic.
7. Reflect constraints/nonFunctional/outOfScope from the Epic.

Azure DevOps mapping:
Azure DevOps mapping (optional if requested):

- System.Title: story title
- System.Description: include As a / I want / So that in readable Markdown
Expand Down
9 changes: 5 additions & 4 deletions prompts/champion.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ Rules:
3. Each story MUST include:
- title (short, action-oriented)
- asA / iWant / soThat
- acceptanceCriteria: >= 2 items, objectively testable
- acceptanceCriteria: >= 1 item, objectively testable
4. Prefer acceptance criteria in Given/When/Then style.
5. Do NOT invent requirements. If something is unclear, put it in assumptions or
5. Keep output compact; omit optional fields unless needed.
6. Do NOT invent requirements. If something is unclear, put it in assumptions or
followUps.
6. Reflect constraints/nonFunctional/outOfScope from the Epic.
7. Reflect constraints/nonFunctional/outOfScope from the Epic.

Azure DevOps mapping:
Azure DevOps mapping (optional if requested):

- System.Title: story title
- System.Description: include As a / I want / So that in readable Markdown
Expand Down
6 changes: 6 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ const EnvSchema = z.object({
// ─────────────────────────────────────────────────
CORS_ALLOWED_ORIGINS: z.string().default(""),

// ─────────────────────────────────────────────────
// Data
// ─────────────────────────────────────────────────
/** Limit number of epics loaded (useful for fast test runs) */
EPICS_LIMIT: z.coerce.number().int().min(1).max(100).optional(),

// ─────────────────────────────────────────────────
// Generation Settings
// ─────────────────────────────────────────────────
Expand Down
37 changes: 17 additions & 20 deletions src/fpf/poll.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* PoLL - Panel of LLM Evaluators
*
* Implements FPF B.3 Trust & Assurance Calculus with:
* - 3 diverse judges (different temperatures for diversity)
* - N diverse judges (env-configured count and temperatures)
* - Per-criterion evaluation (decomposed INVEST + GWT)
* - WLNK aggregation: R_eff = max(0, min(R_i) - Φ(CL_min))
* - Full SCR audit trail
Expand Down Expand Up @@ -38,26 +38,23 @@ import {
// CONFIGURATION
// ═══════════════════════════════════════════════════════════════

const DEFAULT_JUDGES: JudgeConfig[] = [
{
id: "judge-1",
model: "gpt-4o-mini",
temperature: 0.3,
provider: "lmstudio",
},
{
id: "judge-2",
model: "gpt-4o-mini",
temperature: 0.5,
provider: "lmstudio",
},
{
id: "judge-3",
model: "gpt-4o-mini",
temperature: 0.7,
const clampTemp = (value: number) => Math.max(0, Math.min(2, value));

const buildDefaultJudges = (): JudgeConfig[] => {
const model = env.LMSTUDIO_JUDGE_MODEL ?? env.LMSTUDIO_MODEL;
const baseTemp = env.POLL_TEMP_BASE;
const spread = env.POLL_TEMP_SPREAD;
const count = env.POLL_NUM_JUDGES;

return Array.from({ length: count }, (_, index) => ({
id: `judge-${index + 1}`,
model,
temperature: clampTemp(baseTemp + index * spread),
provider: "lmstudio",
},
];
}));
};

const DEFAULT_JUDGES = buildDefaultJudges();

const CRITERIA_WEIGHTS: Record<EvaluationCriterion, number> = {
[EvaluationCriterion.CORRECTNESS]: 0.2,
Expand Down
17 changes: 14 additions & 3 deletions src/generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import { parseAcceptanceCriteria } from "./utils/acceptanceCriteria.ts";
export const baseStoryAgent = new Agent({
id: "story-generator",
name: "Story Generator",
instructions: "You generate Azure DevOps user stories from epics.",
instructions:
"You generate Azure DevOps user stories from epics. Always return JSON that matches the provided schema and keep the response compact.",
model: makeGeneratorModel(),
});

Expand Down Expand Up @@ -37,6 +38,14 @@ type ValidationFailure = {
issues: string[];
};

const SCHEMA_GUARDRAILS = [
"Output MUST match the provided JSON schema.",
"Keep the response compact; omit optional fields unless needed.",
"Include `acceptanceCriteria` with at least 1 item per story.",
"If you include `ado.fields`, keep each value brief.",
"Do not omit or rename required schema fields.",
].join("\n");

/**
* Helper to build provider-specific options including seed.
* LM Studio accepts `seed` in the OpenAI-compatible API.
Expand Down Expand Up @@ -324,6 +333,8 @@ export async function generateStoryPack(
},
];

const guardedInstructions = `${candidatePrompt}\n\nSchema guardrails:\n${SCHEMA_GUARDRAILS}`;

let storyPack: StoryPack | null = null;
let rawText = "";
let error: string | undefined;
Expand All @@ -339,7 +350,7 @@ export async function generateStoryPack(
{ name: "story-generator", model: env.LMSTUDIO_MODEL },
() =>
baseStoryAgent.generate(messages, {
instructions: candidatePrompt,
instructions: guardedInstructions,
structuredOutput: {
schema: storyPackSchema,
jsonPromptInjection: true,
Expand Down Expand Up @@ -441,7 +452,7 @@ export async function generateStoryPack(
return {
storyPack,
rawText,
instructions: candidatePrompt,
instructions: guardedInstructions,
trace,
gammaTime: startedAt,
seed,
Expand Down
Loading
Loading