From afe5e80536aedbe4256e6b2ec7fdcdc71ec1aebc Mon Sep 17 00:00:00 2001
From: Johan Lindengard <johan@jaernfoten.se>
Date: Wed, 13 May 2026 21:20:23 +0200
Subject: [PATCH 1/2] fix(ai): tolerate prose around customer insight JSON
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Claude Sonnet 4.6 lägger ibland till förklarande text runt JSON-svaret
trots SYSTEM_PROMPT som säger "Svara BARA med JSON". Den gamla
`stripMarkdownCodeBlock`-helpern hanterade bara perfekt markdown-
codeblock-wrappers. När modellen prependade "Här är analysen:\n" eller
appendade "Hoppas detta hjälper" failade `JSON.parse` → catch-grenen
returnerade INTERPRETATION_FAILED → UI visade "Kunde inte generera
kundinsikt".

Fix: ersätt `stripMarkdownCodeBlock` med `extractJsonObject` som söker
första `{` och sista `}` i responsen. Mönstret är direkt portat från
salvage-vision/app.py (se salvage-vision/CLAUDE.md som dokumenterar
exakt samma problem med Sonnet 4.6).

TDD: 3 nya regressions-tester (prosa före, prosa efter, markdown +
prosa) failade på RED och passerar på GREEN. Alla 17 tester gröna.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../CustomerInsightService.test.ts            | 46 +++++++++++++++++++
 .../CustomerInsightService.ts                 | 20 ++++----
 2 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/src/domain/customer-insight/CustomerInsightService.test.ts b/src/domain/customer-insight/CustomerInsightService.test.ts
index 300d2460..32539c43 100644
--- a/src/domain/customer-insight/CustomerInsightService.test.ts
+++ b/src/domain/customer-insight/CustomerInsightService.test.ts
@@ -264,6 +264,52 @@ describe("CustomerInsightService", () => {
     expect(result.value.vipScore).toBe("low")
   })
 
+  // Regression tests for Sonnet 4.6 returning prose around JSON
+  // (matches salvage-vision learning: tolerant JSON extraction needed)
+  it("handles prose before JSON object", async () => {
+    const json = JSON.stringify(VALID_LLM_RESPONSE)
+
+    mockCreate.mockResolvedValue({
+      content: [{ type: "text", text: "Här är analysen:\n" + json }],
+    })
+
+    const result = await service.generateInsight(SAMPLE_DATA, SAMPLE_METRICS)
+    expect(result.isSuccess).toBe(true)
+    expect(result.value.frequency).toBe("Regelbunden (var 8:e vecka)")
+  })
+
+  it("handles prose after JSON object", async () => {
+    const json = JSON.stringify(VALID_LLM_RESPONSE)
+
+    mockCreate.mockResolvedValue({
+      content: [{ type: "text", text: json + "\n\nHoppas detta hjälper!" }],
+    })
+
+    const result = await service.generateInsight(SAMPLE_DATA, SAMPLE_METRICS)
+    expect(result.isSuccess).toBe(true)
+    expect(result.value.frequency).toBe("Regelbunden (var 8:e vecka)")
+  })
+
+  it("handles markdown code block wrapped in prose", async () => {
+    const json = JSON.stringify(VALID_LLM_RESPONSE)
+
+    mockCreate.mockResolvedValue({
+      content: [
+        {
+          type: "text",
+          text:
+            "Visst, här kommer analysen:\n```json\n" +
+            json +
+            "\n```\nHör av dig om något.",
+        },
+      ],
+    })
+
+    const result = await service.generateInsight(SAMPLE_DATA, SAMPLE_METRICS)
+    expect(result.isSuccess).toBe(true)
+    expect(result.value.frequency).toBe("Regelbunden (var 8:e vecka)")
+  })
+
   describe("mapInsightErrorToStatus", () => {
     it("maps NO_DATA to 400", () => {
       expect(mapInsightErrorToStatus({ type: "NO_DATA", message: "" })).toBe(400)
diff --git a/src/domain/customer-insight/CustomerInsightService.ts b/src/domain/customer-insight/CustomerInsightService.ts
index 970e69fc..7fc2d94d 100644
--- a/src/domain/customer-insight/CustomerInsightService.ts
+++ b/src/domain/customer-insight/CustomerInsightService.ts
@@ -95,15 +95,17 @@ const customerInsightSchema = z.object({
 // Helpers
 // -----------------------------------------------------------
 
-function stripMarkdownCodeBlock(text: string): string {
-  const trimmed = text.trim()
-  if (trimmed.startsWith("```")) {
-    return trimmed
-      .replace(/^```(?:json)?\s*\n?/, "")
-      .replace(/\n?```\s*$/, "")
-      .trim()
+// Tolerant JSON extraction: finds the outermost JSON object in the response,
+// even when the model wraps it in markdown code blocks or surrounding prose.
+// Reason: Claude Sonnet 4.6 sometimes adds explanatory text despite system-prompt
+// instructions. See salvage-vision/CLAUDE.md for the same learning.
+function extractJsonObject(text: string): string {
+  const start = text.indexOf("{")
+  const end = text.lastIndexOf("}")
+  if (start === -1 || end === -1 || end <= start) {
+    return text.trim()
   }
-  return trimmed
+  return text.slice(start, end + 1)
 }
 
 // -----------------------------------------------------------
@@ -184,7 +186,7 @@ export class CustomerInsightService {
         })
       }
 
-      const cleanedText = stripMarkdownCodeBlock(content.text)
+      const cleanedText = extractJsonObject(content.text)
       const rawParsed = JSON.parse(cleanedText)
       const validated = customerInsightSchema.safeParse(rawParsed)
 

From bf0353a5c3faa177b2e29a6eefcdd74e93351749 Mon Sep 17 00:00:00 2001
From: Johan Lindengard <johan@jaernfoten.se>
Date: Thu, 14 May 2026 06:55:33 +0200
Subject: [PATCH 2/2] refactor(ai): extract shared extractJsonObject helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Eliminera duplicering av JSON-extraktionslogik mellan
CustomerInsightService och VoiceInterpretationService. Tidigare commit
afe5e805 la till tolerant JSON-parsing i CustomerInsightService. Samma
bug fanns i VoiceInterpretationService på två platser (interpret +
interpretQuickNote) med identisk stripMarkdownCodeBlock-kod.

- src/lib/ai/extract-json-object.ts: ny delad helper (15 unit-tester
  täcker plain JSON, markdown-fences, prosa före/efter, edge cases)
- CustomerInsightService.ts: importerar helper, tar bort lokal kopia
- VoiceInterpretationService.ts: importerar helper, tar bort lokal
  stripMarkdownCodeBlock, byter 2 call sites (Sonnet 4.6 + Haiku 4.5)
- VoiceInterpretationService.test.ts: +6 regression-tester (prosa
  före/efter/runt JSON, för båda interpret och interpretQuickNote)

Tester: 69/69 gröna (15 helper + 17 customer-insight + 37 voice-log).
Typecheck OK. Ingen beteendeförändring utöver robustare JSON-extraktion.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../CustomerInsightService.ts                 |  14 +-
 .../VoiceInterpretationService.test.ts        | 156 ++++++++++++++++++
 .../voice-log/VoiceInterpretationService.ts   |  21 +--
 src/lib/ai/extract-json-object.test.ts        |  84 ++++++++++
 src/lib/ai/extract-json-object.ts             |  20 +++
 5 files changed, 264 insertions(+), 31 deletions(-)
 create mode 100644 src/lib/ai/extract-json-object.test.ts
 create mode 100644 src/lib/ai/extract-json-object.ts

diff --git a/src/domain/customer-insight/CustomerInsightService.ts b/src/domain/customer-insight/CustomerInsightService.ts
index 7fc2d94d..f8a52eab 100644
--- a/src/domain/customer-insight/CustomerInsightService.ts
+++ b/src/domain/customer-insight/CustomerInsightService.ts
@@ -15,6 +15,7 @@
 import Anthropic from "@anthropic-ai/sdk"
 import { z } from "zod"
 import { Result } from "@/domain/shared/types/Result"
+import { extractJsonObject } from "@/lib/ai/extract-json-object"
 
 // -----------------------------------------------------------
 // Types
@@ -95,19 +96,6 @@ const customerInsightSchema = z.object({
 // Helpers
 // -----------------------------------------------------------
 
-// Tolerant JSON extraction: finds the outermost JSON object in the response,
-// even when the model wraps it in markdown code blocks or surrounding prose.
-// Reason: Claude Sonnet 4.6 sometimes adds explanatory text despite system-prompt
-// instructions. See salvage-vision/CLAUDE.md for the same learning.
-function extractJsonObject(text: string): string {
-  const start = text.indexOf("{")
-  const end = text.lastIndexOf("}")
-  if (start === -1 || end === -1 || end <= start) {
-    return text.trim()
-  }
-  return text.slice(start, end + 1)
-}
-
 // -----------------------------------------------------------
 // System prompt
 // -----------------------------------------------------------
diff --git a/src/domain/voice-log/VoiceInterpretationService.test.ts b/src/domain/voice-log/VoiceInterpretationService.test.ts
index f87eef2a..581a75e1 100644
--- a/src/domain/voice-log/VoiceInterpretationService.test.ts
+++ b/src/domain/voice-log/VoiceInterpretationService.test.ts
@@ -182,6 +182,83 @@ describe("VoiceInterpretationService", () => {
       expect(result.value.customerName).toBe("Anna")
     })
 
+    // Regression: Sonnet 4.6 sometimes adds prose around JSON. See salvage-vision/CLAUDE.md.
+    it("handles prose before JSON object", async () => {
+      const jsonContent = JSON.stringify({
+        bookingId: "booking-1",
+        customerName: "Anna Johansson",
+        horseName: "Stella",
+        markAsCompleted: true,
+        workPerformed: "Klart",
+        horseObservation: null,
+        horseNoteCategory: "farrier",
+        nextVisitWeeks: 8,
+        confidence: 0.9,
+      })
+
+      mockCreate.mockResolvedValue({
+        content: [{ type: "text", text: "Här är tolkningen:\n" + jsonContent }],
+      })
+
+      const result = await service.interpret("klar med Anna", SAMPLE_BOOKINGS)
+      expect(result.isSuccess).toBe(true)
+      expect(result.value.bookingId).toBe("booking-1")
+    })
+
+    it("handles prose after JSON object", async () => {
+      const jsonContent = JSON.stringify({
+        bookingId: "booking-1",
+        customerName: "Anna",
+        horseName: "Stella",
+        markAsCompleted: true,
+        workPerformed: "Klart",
+        horseObservation: null,
+        horseNoteCategory: "farrier",
+        nextVisitWeeks: 8,
+        confidence: 0.9,
+      })
+
+      mockCreate.mockResolvedValue({
+        content: [
+          { type: "text", text: jsonContent + "\n\nHoppas detta hjälper!" },
+        ],
+      })
+
+      const result = await service.interpret("klar med Anna", SAMPLE_BOOKINGS)
+      expect(result.isSuccess).toBe(true)
+      expect(result.value.bookingId).toBe("booking-1")
+    })
+
+    it("handles markdown code block wrapped in prose", async () => {
+      const jsonContent = JSON.stringify({
+        bookingId: "booking-1",
+        customerName: "Anna",
+        horseName: "Stella",
+        markAsCompleted: true,
+        workPerformed: "Klart",
+        horseObservation: null,
+        horseNoteCategory: "farrier",
+        nextVisitWeeks: 8,
+        confidence: 0.9,
+      })
+
+      mockCreate.mockResolvedValue({
+        content: [
+          {
+            type: "text",
+            text:
+              "Visst, här kommer tolkningen:\n```json\n" +
+              jsonContent +
+              "\n```\nHör av dig om något.",
+          },
+        ],
+      })
+
+      const result = await service.interpret("klar med Anna", SAMPLE_BOOKINGS)
+      expect(result.isSuccess).toBe(true)
+      expect(result.value.bookingId).toBe("booking-1")
+    })
+
     it("handles invalid JSON from LLM", async () => {
       mockCreate.mockResolvedValue({
         content: [{ type: "text", text: "This is not JSON" }],
@@ -605,6 +682,85 @@ describe("VoiceInterpretationService", () => {
       expect(result.value.cleanedText).toBe("Allt bra.")
     })
 
+    // Regression: LLMs sometimes add prose around JSON. See salvage-vision/CLAUDE.md.
+    it("handles prose before JSON object", async () => {
+      const json = JSON.stringify({
+        cleanedText: "Allt bra.",
+        isHealthRelated: false,
+        horseNoteCategory: null,
+        suggestedNextWeeks: null,
+      })
+
+      mockCreate.mockResolvedValue({
+        content: [
+          { type: "text", text: "Här är den uppstädade texten:\n" + json },
+        ],
+      })
+
+      const result = await service.interpretQuickNote("allt bra", {
+        customerName: "Erik",
+        horseName: "Blansen",
+        serviceType: "Hovvård",
+      })
+
+      expect(result.isSuccess).toBe(true)
+      expect(result.value.cleanedText).toBe("Allt bra.")
+    })
+
+    it("handles prose after JSON object", async () => {
+      const json = JSON.stringify({
+        cleanedText: "Allt bra.",
+        isHealthRelated: false,
+        horseNoteCategory: null,
+        suggestedNextWeeks: null,
+      })
+
+      mockCreate.mockResolvedValue({
+        content: [
+          { type: "text", text: json + "\n\nHör av dig om du behöver något." },
+        ],
+      })
+
+      const result = await service.interpretQuickNote("allt bra", {
+        customerName: "Erik",
+        horseName: "Blansen",
+        serviceType: "Hovvård",
+      })
+
+      expect(result.isSuccess).toBe(true)
+      expect(result.value.cleanedText).toBe("Allt bra.")
+    })
+
+    it("handles markdown code block wrapped in prose", async () => {
+      const json = JSON.stringify({
+        cleanedText: "Allt bra.",
+        isHealthRelated: false,
+        horseNoteCategory: null,
+        suggestedNextWeeks: null,
+      })
+
+      mockCreate.mockResolvedValue({
+        content: [
+          {
+            type: "text",
+            text:
+              "Visst, här kommer resultatet:\n```json\n" +
+              json +
+              "\n```\nHör av dig om något.",
+          },
+        ],
+      })
+
+      const result = await service.interpretQuickNote("allt bra", {
+        customerName: "Erik",
+        horseName: "Blansen",
+        serviceType: "Hovvård",
+      })
+
+      expect(result.isSuccess).toBe(true)
+      expect(result.value.cleanedText).toBe("Allt bra.")
+    })
+
     it("handles API error gracefully", async () => {
       mockCreate.mockRejectedValue(new Error("API quota exceeded"))
 
diff --git a/src/domain/voice-log/VoiceInterpretationService.ts b/src/domain/voice-log/VoiceInterpretationService.ts
index a6a2b3e3..edb4d58c 100644
--- a/src/domain/voice-log/VoiceInterpretationService.ts
+++ b/src/domain/voice-log/VoiceInterpretationService.ts
@@ -17,6 +17,7 @@
 import Anthropic from "@anthropic-ai/sdk"
 import { z } from "zod"
 import { Result } from "@/domain/shared"
+import { extractJsonObject } from "@/lib/ai/extract-json-object"
 
 // -----------------------------------------------------------
 // Types
@@ -124,22 +125,6 @@ function buildSystemPrompt(vocabularyPrompt?: string): string {
   return BASE_SYSTEM_PROMPT + "\n" + vocabularyPrompt
 }
 
-/**
- * Strip markdown code block wrappers (```json ... ```) that LLMs sometimes add
- * despite being told to return raw JSON.
- */
-function stripMarkdownCodeBlock(text: string): string {
-  const trimmed = text.trim()
-  if (trimmed.startsWith("```")) {
-    // Remove opening ``` (with optional language tag) and closing ```
-    return trimmed
-      .replace(/^```(?:json)?\s*\n?/, "")
-      .replace(/\n?```\s*$/, "")
-      .trim()
-  }
-  return trimmed
-}
-
 export class VoiceInterpretationService {
   private apiKey: string | undefined
 
@@ -209,7 +194,7 @@ Transkribering:
         })
       }
 
-      const cleanedText = stripMarkdownCodeBlock(content.text)
+      const cleanedText = extractJsonObject(content.text)
       const rawParsed = JSON.parse(cleanedText)
       const validated = interpretedVoiceLogSchema.safeParse(rawParsed)
 
@@ -282,7 +267,7 @@ Transkribering:
         })
       }
 
-      const cleanedText = stripMarkdownCodeBlock(content.text)
+      const cleanedText = extractJsonObject(content.text)
       const rawParsed = JSON.parse(cleanedText)
       const validated = quickNoteSchema.safeParse(rawParsed)
 
diff --git a/src/lib/ai/extract-json-object.test.ts b/src/lib/ai/extract-json-object.test.ts
new file mode 100644
index 00000000..e2e7f173
--- /dev/null
+++ b/src/lib/ai/extract-json-object.test.ts
@@ -0,0 +1,84 @@
+import { describe, it, expect } from "vitest"
+import { extractJsonObject } from "./extract-json-object"
+
+describe("extractJsonObject", () => {
+  it("returns plain JSON as-is", () => {
+    const json = '{"foo":"bar","n":1}'
+    expect(extractJsonObject(json)).toBe(json)
+  })
+
+  it("trims surrounding whitespace on plain JSON", () => {
+    const json = '{"foo":"bar"}'
+    expect(extractJsonObject(`  \n${json}\n  `)).toBe(json)
+  })
+
+  it("strips markdown code fences (```json)", () => {
+    const json = '{"foo":"bar"}'
+    expect(extractJsonObject(`\`\`\`json\n${json}\n\`\`\``)).toBe(json)
+  })
+
+  it("strips markdown code fences without language tag (```)", () => {
+    const json = '{"foo":"bar"}'
+    expect(extractJsonObject(`\`\`\`\n${json}\n\`\`\``)).toBe(json)
+  })
+
+  it("strips prose before the JSON object", () => {
+    const json = '{"foo":"bar"}'
+    expect(extractJsonObject(`Här är analysen:\n${json}`)).toBe(json)
+  })
+
+  it("strips prose after the JSON object", () => {
+    const json = '{"foo":"bar"}'
+    expect(extractJsonObject(`${json}\n\nHoppas detta hjälper!`)).toBe(json)
+  })
+
+  it("strips prose both before and after JSON", () => {
+    const json = '{"foo":"bar"}'
+    expect(
+      extractJsonObject(`Visst, här:\n${json}\n\nHör av dig om något.`)
+    ).toBe(json)
+  })
+
+  it("handles markdown code block wrapped in prose", () => {
+    const json = '{"foo":"bar"}'
+    expect(
+      extractJsonObject(
+        `Visst, här kommer analysen:\n\`\`\`json\n${json}\n\`\`\`\nHör av dig.`
+      )
+    ).toBe(json)
+  })
+
+  it("preserves nested objects (returns full outer object via lastIndexOf)", () => {
+    const json = '{"outer":{"inner":1},"x":2}'
+    expect(extractJsonObject(`Result:\n${json}\nDone`)).toBe(json)
+  })
+
+  it("preserves braces inside string values", () => {
+    const json = '{"text":"this has {braces} inside","n":1}'
+    expect(extractJsonObject(json)).toBe(json)
+  })
+
+  it("returns trimmed input when no opening brace exists", () => {
+    expect(extractJsonObject("This is not JSON at all")).toBe(
+      "This is not JSON at all"
+    )
+  })
+
+  it("returns trimmed input when no closing brace exists", () => {
+    expect(extractJsonObject("Plain text { missing close")).toBe(
+      "Plain text { missing close"
+    )
+  })
+
+  it("returns trimmed input when closing brace comes before opening", () => {
+    expect(extractJsonObject("} text {")).toBe("} text {")
+  })
+
+  it("returns trimmed input for empty string", () => {
+    expect(extractJsonObject("")).toBe("")
+  })
+
+  it("returns trimmed input for whitespace-only string", () => {
+    expect(extractJsonObject("   \n\t  ")).toBe("")
+  })
+})
diff --git a/src/lib/ai/extract-json-object.ts b/src/lib/ai/extract-json-object.ts
new file mode 100644
index 00000000..d48ccf11
--- /dev/null
+++ b/src/lib/ai/extract-json-object.ts
@@ -0,0 +1,20 @@
+/**
+ * Tolerant JSON-object extractor for LLM responses.
+ *
+ * Finds the outermost JSON object in a text response, even when the model
+ * wraps it in markdown code fences or surrounding prose. Returns the original
+ * trimmed text if no `{...}` pair is found, letting downstream `JSON.parse`
+ * surface the underlying issue.
+ *
+ * Reason: Claude (and other LLMs) sometimes add explanatory prose around the
+ * JSON despite system-prompt instructions like "answer with JSON only". This
+ * mirrors the learning captured in `salvage-vision/CLAUDE.md`.
+ */
+export function extractJsonObject(text: string): string {
+  const start = text.indexOf("{")
+  const end = text.lastIndexOf("}")
+  if (start === -1 || end === -1 || end <= start) {
+    return text.trim()
+  }
+  return text.slice(start, end + 1)
+}