diff --git a/src/domain/customer-insight/CustomerInsightService.ts b/src/domain/customer-insight/CustomerInsightService.ts index 7fc2d94d..45bdd886 100644 --- a/src/domain/customer-insight/CustomerInsightService.ts +++ b/src/domain/customer-insight/CustomerInsightService.ts @@ -15,6 +15,7 @@ import Anthropic from "@anthropic-ai/sdk" import { z } from "zod" import { Result } from "@/domain/shared/types/Result" +import { extractJsonObject } from "@/lib/ai/extract-json-object" // ----------------------------------------------------------- // Types @@ -91,23 +92,6 @@ const customerInsightSchema = z.object({ .transform((v) => Math.max(0, Math.min(1, v))), }) -// ----------------------------------------------------------- -// Helpers -// ----------------------------------------------------------- - -// Tolerant JSON extraction: finds the outermost JSON object in the response, -// even when the model wraps it in markdown code blocks or surrounding prose. -// Reason: Claude Sonnet 4.6 sometimes adds explanatory text despite system-prompt -// instructions. See salvage-vision/CLAUDE.md for the same learning. -function extractJsonObject(text: string): string { - const start = text.indexOf("{") - const end = text.lastIndexOf("}") - if (start === -1 || end === -1 || end <= start) { - return text.trim() - } - return text.slice(start, end + 1) -} - // ----------------------------------------------------------- // System prompt // ----------------------------------------------------------- diff --git a/src/domain/voice-log/VoiceInterpretationService.test.ts b/src/domain/voice-log/VoiceInterpretationService.test.ts index f87eef2a..581a75e1 100644 --- a/src/domain/voice-log/VoiceInterpretationService.test.ts +++ b/src/domain/voice-log/VoiceInterpretationService.test.ts @@ -182,6 +182,83 @@ describe("VoiceInterpretationService", () => { expect(result.value.customerName).toBe("Anna") }) + // Regression: Sonnet 4.6 sometimes adds prose around JSON. See salvage-vision/CLAUDE.md. + it("handles prose before JSON object", async () => { + const jsonContent = JSON.stringify({ + bookingId: "booking-1", + customerName: "Anna Johansson", + horseName: "Stella", + markAsCompleted: true, + workPerformed: "Klart", + horseObservation: null, + horseNoteCategory: "farrier", + nextVisitWeeks: 8, + confidence: 0.9, + }) + + mockCreate.mockResolvedValue({ + content: [{ type: "text", text: "Här är tolkningen:\n" + jsonContent }], + }) + + const result = await service.interpret("klar med Anna", SAMPLE_BOOKINGS) + expect(result.isSuccess).toBe(true) + expect(result.value.bookingId).toBe("booking-1") + }) + + it("handles prose after JSON object", async () => { + const jsonContent = JSON.stringify({ + bookingId: "booking-1", + customerName: "Anna", + horseName: "Stella", + markAsCompleted: true, + workPerformed: "Klart", + horseObservation: null, + horseNoteCategory: "farrier", + nextVisitWeeks: 8, + confidence: 0.9, + }) + + mockCreate.mockResolvedValue({ + content: [ + { type: "text", text: jsonContent + "\n\nHoppas detta hjälper!" }, + ], + }) + + const result = await service.interpret("klar med Anna", SAMPLE_BOOKINGS) + expect(result.isSuccess).toBe(true) + expect(result.value.bookingId).toBe("booking-1") + }) + + it("handles markdown code block wrapped in prose", async () => { + const jsonContent = JSON.stringify({ + bookingId: "booking-1", + customerName: "Anna", + horseName: "Stella", + markAsCompleted: true, + workPerformed: "Klart", + horseObservation: null, + horseNoteCategory: "farrier", + nextVisitWeeks: 8, + confidence: 0.9, + }) + + mockCreate.mockResolvedValue({ + content: [ + { + type: "text", + text: + "Visst, här kommer tolkningen:\n```json\n" + + jsonContent + + "\n```\nHör av dig om något.", + }, + ], + }) + + const result = await service.interpret("klar med Anna", SAMPLE_BOOKINGS) + expect(result.isSuccess).toBe(true) + expect(result.value.bookingId).toBe("booking-1") + }) + it("handles invalid JSON from LLM", async () => { mockCreate.mockResolvedValue({ content: [{ type: "text", text: "This is not JSON" }], @@ -605,6 +682,85 @@ describe("VoiceInterpretationService", () => { expect(result.value.cleanedText).toBe("Allt bra.") }) + // Regression: LLMs sometimes add prose around JSON. See salvage-vision/CLAUDE.md. + it("handles prose before JSON object", async () => { + const json = JSON.stringify({ + cleanedText: "Allt bra.", + isHealthRelated: false, + horseNoteCategory: null, + suggestedNextWeeks: null, + }) + + mockCreate.mockResolvedValue({ + content: [ + { type: "text", text: "Här är den uppstädade texten:\n" + json }, + ], + }) + + const result = await service.interpretQuickNote("allt bra", { + customerName: "Erik", + horseName: "Blansen", + serviceType: "Hovvård", + }) + + expect(result.isSuccess).toBe(true) + expect(result.value.cleanedText).toBe("Allt bra.") + }) + + it("handles prose after JSON object", async () => { + const json = JSON.stringify({ + cleanedText: "Allt bra.", + isHealthRelated: false, + horseNoteCategory: null, + suggestedNextWeeks: null, + }) + + mockCreate.mockResolvedValue({ + content: [ + { type: "text", text: json + "\n\nHör av dig om du behöver något." }, + ], + }) + + const result = await service.interpretQuickNote("allt bra", { + customerName: "Erik", + horseName: "Blansen", + serviceType: "Hovvård", + }) + + expect(result.isSuccess).toBe(true) + expect(result.value.cleanedText).toBe("Allt bra.") + }) + + it("handles markdown code block wrapped in prose", async () => { + const json = JSON.stringify({ + cleanedText: "Allt bra.", + isHealthRelated: false, + horseNoteCategory: null, + suggestedNextWeeks: null, + }) + + mockCreate.mockResolvedValue({ + content: [ + { + type: "text", + text: + "Visst, här kommer resultatet:\n```json\n" + + json + + "\n```\nHör av dig om något.", + }, + ], + }) + + const result = await service.interpretQuickNote("allt bra", { + customerName: "Erik", + horseName: "Blansen", + serviceType: "Hovvård", + }) + + expect(result.isSuccess).toBe(true) + expect(result.value.cleanedText).toBe("Allt bra.") + }) + it("handles API error gracefully", async () => { mockCreate.mockRejectedValue(new Error("API quota exceeded")) diff --git a/src/domain/voice-log/VoiceInterpretationService.ts b/src/domain/voice-log/VoiceInterpretationService.ts index a6a2b3e3..edb4d58c 100644 --- a/src/domain/voice-log/VoiceInterpretationService.ts +++ b/src/domain/voice-log/VoiceInterpretationService.ts @@ -17,6 +17,7 @@ import Anthropic from "@anthropic-ai/sdk" import { z } from "zod" import { Result } from "@/domain/shared" +import { extractJsonObject } from "@/lib/ai/extract-json-object" // ----------------------------------------------------------- // Types @@ -124,22 +125,6 @@ function buildSystemPrompt(vocabularyPrompt?: string): string { return BASE_SYSTEM_PROMPT + "\n" + vocabularyPrompt } -/** - * Strip markdown code block wrappers (```json ... ```) that LLMs sometimes add - * despite being told to return raw JSON. - */ -function stripMarkdownCodeBlock(text: string): string { - const trimmed = text.trim() - if (trimmed.startsWith("```")) { - // Remove opening ``` (with optional language tag) and closing ``` - return trimmed - .replace(/^```(?:json)?\s*\n?/, "") - .replace(/\n?```\s*$/, "") - .trim() - } - return trimmed -} - export class VoiceInterpretationService { private apiKey: string | undefined @@ -209,7 +194,7 @@ Transkribering: }) } - const cleanedText = stripMarkdownCodeBlock(content.text) + const cleanedText = extractJsonObject(content.text) const rawParsed = JSON.parse(cleanedText) const validated = interpretedVoiceLogSchema.safeParse(rawParsed) @@ -282,7 +267,7 @@ Transkribering: }) } - const cleanedText = stripMarkdownCodeBlock(content.text) + const cleanedText = extractJsonObject(content.text) const rawParsed = JSON.parse(cleanedText) const validated = quickNoteSchema.safeParse(rawParsed) diff --git a/src/lib/ai/extract-json-object.test.ts b/src/lib/ai/extract-json-object.test.ts new file mode 100644 index 00000000..e2e7f173 --- /dev/null +++ b/src/lib/ai/extract-json-object.test.ts @@ -0,0 +1,84 @@ +import { describe, it, expect } from "vitest" +import { extractJsonObject } from "./extract-json-object" + +describe("extractJsonObject", () => { + it("returns plain JSON as-is", () => { + const json = '{"foo":"bar","n":1}' + expect(extractJsonObject(json)).toBe(json) + }) + + it("trims surrounding whitespace on plain JSON", () => { + const json = '{"foo":"bar"}' + expect(extractJsonObject(` \n${json}\n `)).toBe(json) + }) + + it("strips markdown code fences (```json)", () => { + const json = '{"foo":"bar"}' + expect(extractJsonObject(`\`\`\`json\n${json}\n\`\`\``)).toBe(json) + }) + + it("strips markdown code fences without language tag (```)", () => { + const json = '{"foo":"bar"}' + expect(extractJsonObject(`\`\`\`\n${json}\n\`\`\``)).toBe(json) + }) + + it("strips prose before the JSON object", () => { + const json = '{"foo":"bar"}' + expect(extractJsonObject(`Här är analysen:\n${json}`)).toBe(json) + }) + + it("strips prose after the JSON object", () => { + const json = '{"foo":"bar"}' + expect(extractJsonObject(`${json}\n\nHoppas detta hjälper!`)).toBe(json) + }) + + it("strips prose both before and after JSON", () => { + const json = '{"foo":"bar"}' + expect( + extractJsonObject(`Visst, här:\n${json}\n\nHör av dig om något.`) + ).toBe(json) + }) + + it("handles markdown code block wrapped in prose", () => { + const json = '{"foo":"bar"}' + expect( + extractJsonObject( + `Visst, här kommer analysen:\n\`\`\`json\n${json}\n\`\`\`\nHör av dig.` + ) + ).toBe(json) + }) + + it("preserves nested objects (returns full outer object via lastIndexOf)", () => { + const json = '{"outer":{"inner":1},"x":2}' + expect(extractJsonObject(`Result:\n${json}\nDone`)).toBe(json) + }) + + it("preserves braces inside string values", () => { + const json = '{"text":"this has {braces} inside","n":1}' + expect(extractJsonObject(json)).toBe(json) + }) + + it("returns trimmed input when no opening brace exists", () => { + expect(extractJsonObject("This is not JSON at all")).toBe( + "This is not JSON at all" + ) + }) + + it("returns trimmed input when no closing brace exists", () => { + expect(extractJsonObject("Plain text { missing close")).toBe( + "Plain text { missing close" + ) + }) + + it("returns trimmed input when closing brace comes before opening", () => { + expect(extractJsonObject("} text {")).toBe("} text {") + }) + + it("returns trimmed input for empty string", () => { + expect(extractJsonObject("")).toBe("") + }) + + it("returns trimmed input for whitespace-only string", () => { + expect(extractJsonObject(" \n\t ")).toBe("") + }) +}) diff --git a/src/lib/ai/extract-json-object.ts b/src/lib/ai/extract-json-object.ts new file mode 100644 index 00000000..d48ccf11 --- /dev/null +++ b/src/lib/ai/extract-json-object.ts @@ -0,0 +1,20 @@ +/** + * Tolerant JSON-object extractor for LLM responses. + * + * Finds the outermost JSON object in a text response, even when the model + * wraps it in markdown code fences or surrounding prose. Returns the original + * trimmed text if no `{...}` pair is found, letting downstream `JSON.parse` + * surface the underlying issue. + * + * Reason: Claude (and other LLMs) sometimes add explanatory prose around the + * JSON despite system-prompt instructions like "answer with JSON only". This + * mirrors the learning captured in `salvage-vision/CLAUDE.md`. + */ +export function extractJsonObject(text: string): string { + const start = text.indexOf("{") + const end = text.lastIndexOf("}") + if (start === -1 || end === -1 || end <= start) { + return text.trim() + } + return text.slice(start, end + 1) +}