Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 1 addition & 17 deletions src/domain/customer-insight/CustomerInsightService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import Anthropic from "@anthropic-ai/sdk"
import { z } from "zod"
import { Result } from "@/domain/shared/types/Result"
import { extractJsonObject } from "@/lib/ai/extract-json-object"

// -----------------------------------------------------------
// Types
Expand Down Expand Up @@ -91,23 +92,6 @@ const customerInsightSchema = z.object({
.transform((v) => Math.max(0, Math.min(1, v))),
})

// -----------------------------------------------------------
// Helpers
// -----------------------------------------------------------

// Tolerant JSON extraction: finds the outermost JSON object in the response,
// even when the model wraps it in markdown code blocks or surrounding prose.
// Reason: Claude Sonnet 4.6 sometimes adds explanatory text despite system-prompt
// instructions. See salvage-vision/CLAUDE.md for the same learning.
function extractJsonObject(text: string): string {
const start = text.indexOf("{")
const end = text.lastIndexOf("}")
if (start === -1 || end === -1 || end <= start) {
return text.trim()
}
return text.slice(start, end + 1)
}

// -----------------------------------------------------------
// System prompt
// -----------------------------------------------------------
Expand Down
156 changes: 156 additions & 0 deletions src/domain/voice-log/VoiceInterpretationService.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,83 @@ describe("VoiceInterpretationService", () => {
expect(result.value.customerName).toBe("Anna")
})

// Regression: Sonnet 4.6 sometimes adds prose around JSON. See salvage-vision/CLAUDE.md.
it("handles prose before JSON object", async () => {
const jsonContent = JSON.stringify({
bookingId: "booking-1",
customerName: "Anna Johansson",
horseName: "Stella",
markAsCompleted: true,
workPerformed: "Klart",
horseObservation: null,
horseNoteCategory: "farrier",
nextVisitWeeks: 8,
confidence: 0.9,
})

mockCreate.mockResolvedValue({
content: [{ type: "text", text: "Här är tolkningen:\n" + jsonContent }],
})

const result = await service.interpret("klar med Anna", SAMPLE_BOOKINGS)
expect(result.isSuccess).toBe(true)
expect(result.value.bookingId).toBe("booking-1")
})

it("handles prose after JSON object", async () => {
const jsonContent = JSON.stringify({
bookingId: "booking-1",
customerName: "Anna",
horseName: "Stella",
markAsCompleted: true,
workPerformed: "Klart",
horseObservation: null,
horseNoteCategory: "farrier",
nextVisitWeeks: 8,
confidence: 0.9,
})

mockCreate.mockResolvedValue({
content: [
{ type: "text", text: jsonContent + "\n\nHoppas detta hjälper!" },
],
})

const result = await service.interpret("klar med Anna", SAMPLE_BOOKINGS)
expect(result.isSuccess).toBe(true)
expect(result.value.bookingId).toBe("booking-1")
})

it("handles markdown code block wrapped in prose", async () => {
const jsonContent = JSON.stringify({
bookingId: "booking-1",
customerName: "Anna",
horseName: "Stella",
markAsCompleted: true,
workPerformed: "Klart",
horseObservation: null,
horseNoteCategory: "farrier",
nextVisitWeeks: 8,
confidence: 0.9,
})

mockCreate.mockResolvedValue({
content: [
{
type: "text",
text:
"Visst, här kommer tolkningen:\n```json\n" +
jsonContent +
"\n```\nHör av dig om något.",
},
],
})

const result = await service.interpret("klar med Anna", SAMPLE_BOOKINGS)
expect(result.isSuccess).toBe(true)
expect(result.value.bookingId).toBe("booking-1")
})

it("handles invalid JSON from LLM", async () => {
mockCreate.mockResolvedValue({
content: [{ type: "text", text: "This is not JSON" }],
Expand Down Expand Up @@ -605,6 +682,85 @@ describe("VoiceInterpretationService", () => {
expect(result.value.cleanedText).toBe("Allt bra.")
})

// Regression: LLMs sometimes add prose around JSON. See salvage-vision/CLAUDE.md.
it("handles prose before JSON object", async () => {
const json = JSON.stringify({
cleanedText: "Allt bra.",
isHealthRelated: false,
horseNoteCategory: null,
suggestedNextWeeks: null,
})

mockCreate.mockResolvedValue({
content: [
{ type: "text", text: "Här är den uppstädade texten:\n" + json },
],
})

const result = await service.interpretQuickNote("allt bra", {
customerName: "Erik",
horseName: "Blansen",
serviceType: "Hovvård",
})

expect(result.isSuccess).toBe(true)
expect(result.value.cleanedText).toBe("Allt bra.")
})

it("handles prose after JSON object", async () => {
const json = JSON.stringify({
cleanedText: "Allt bra.",
isHealthRelated: false,
horseNoteCategory: null,
suggestedNextWeeks: null,
})

mockCreate.mockResolvedValue({
content: [
{ type: "text", text: json + "\n\nHör av dig om du behöver något." },
],
})

const result = await service.interpretQuickNote("allt bra", {
customerName: "Erik",
horseName: "Blansen",
serviceType: "Hovvård",
})

expect(result.isSuccess).toBe(true)
expect(result.value.cleanedText).toBe("Allt bra.")
})

it("handles markdown code block wrapped in prose", async () => {
const json = JSON.stringify({
cleanedText: "Allt bra.",
isHealthRelated: false,
horseNoteCategory: null,
suggestedNextWeeks: null,
})

mockCreate.mockResolvedValue({
content: [
{
type: "text",
text:
"Visst, här kommer resultatet:\n```json\n" +
json +
"\n```\nHör av dig om något.",
},
],
})

const result = await service.interpretQuickNote("allt bra", {
customerName: "Erik",
horseName: "Blansen",
serviceType: "Hovvård",
})

expect(result.isSuccess).toBe(true)
expect(result.value.cleanedText).toBe("Allt bra.")
})

it("handles API error gracefully", async () => {
mockCreate.mockRejectedValue(new Error("API quota exceeded"))

Expand Down
21 changes: 3 additions & 18 deletions src/domain/voice-log/VoiceInterpretationService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import Anthropic from "@anthropic-ai/sdk"
import { z } from "zod"
import { Result } from "@/domain/shared"
import { extractJsonObject } from "@/lib/ai/extract-json-object"

// -----------------------------------------------------------
// Types
Expand Down Expand Up @@ -124,22 +125,6 @@ function buildSystemPrompt(vocabularyPrompt?: string): string {
return BASE_SYSTEM_PROMPT + "\n" + vocabularyPrompt
}

/**
* Strip markdown code block wrappers (```json ... ```) that LLMs sometimes add
* despite being told to return raw JSON.
*/
function stripMarkdownCodeBlock(text: string): string {
const trimmed = text.trim()
if (trimmed.startsWith("```")) {
// Remove opening ``` (with optional language tag) and closing ```
return trimmed
.replace(/^```(?:json)?\s*\n?/, "")
.replace(/\n?```\s*$/, "")
.trim()
}
return trimmed
}

export class VoiceInterpretationService {
private apiKey: string | undefined

Expand Down Expand Up @@ -209,7 +194,7 @@ Transkribering:
})
}

const cleanedText = stripMarkdownCodeBlock(content.text)
const cleanedText = extractJsonObject(content.text)
const rawParsed = JSON.parse(cleanedText)
const validated = interpretedVoiceLogSchema.safeParse(rawParsed)

Expand Down Expand Up @@ -282,7 +267,7 @@ Transkribering:
})
}

const cleanedText = stripMarkdownCodeBlock(content.text)
const cleanedText = extractJsonObject(content.text)
const rawParsed = JSON.parse(cleanedText)
const validated = quickNoteSchema.safeParse(rawParsed)

Expand Down
84 changes: 84 additions & 0 deletions src/lib/ai/extract-json-object.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import { describe, it, expect } from "vitest"
import { extractJsonObject } from "./extract-json-object"

describe("extractJsonObject", () => {
it("returns plain JSON as-is", () => {
const json = '{"foo":"bar","n":1}'
expect(extractJsonObject(json)).toBe(json)
})

it("trims surrounding whitespace on plain JSON", () => {
const json = '{"foo":"bar"}'
expect(extractJsonObject(` \n${json}\n `)).toBe(json)
})

it("strips markdown code fences (```json)", () => {
const json = '{"foo":"bar"}'
expect(extractJsonObject(`\`\`\`json\n${json}\n\`\`\``)).toBe(json)
})

it("strips markdown code fences without language tag (```)", () => {
const json = '{"foo":"bar"}'
expect(extractJsonObject(`\`\`\`\n${json}\n\`\`\``)).toBe(json)
})

it("strips prose before the JSON object", () => {
const json = '{"foo":"bar"}'
expect(extractJsonObject(`Här är analysen:\n${json}`)).toBe(json)
})

it("strips prose after the JSON object", () => {
const json = '{"foo":"bar"}'
expect(extractJsonObject(`${json}\n\nHoppas detta hjälper!`)).toBe(json)
})

it("strips prose both before and after JSON", () => {
const json = '{"foo":"bar"}'
expect(
extractJsonObject(`Visst, här:\n${json}\n\nHör av dig om något.`)
).toBe(json)
})

it("handles markdown code block wrapped in prose", () => {
const json = '{"foo":"bar"}'
expect(
extractJsonObject(
`Visst, här kommer analysen:\n\`\`\`json\n${json}\n\`\`\`\nHör av dig.`
)
).toBe(json)
})

it("preserves nested objects (returns full outer object via lastIndexOf)", () => {
const json = '{"outer":{"inner":1},"x":2}'
expect(extractJsonObject(`Result:\n${json}\nDone`)).toBe(json)
})

it("preserves braces inside string values", () => {
const json = '{"text":"this has {braces} inside","n":1}'
expect(extractJsonObject(json)).toBe(json)
})

it("returns trimmed input when no opening brace exists", () => {
expect(extractJsonObject("This is not JSON at all")).toBe(
"This is not JSON at all"
)
})

it("returns trimmed input when no closing brace exists", () => {
expect(extractJsonObject("Plain text { missing close")).toBe(
"Plain text { missing close"
)
})

it("returns trimmed input when closing brace comes before opening", () => {
expect(extractJsonObject("} text {")).toBe("} text {")
})

it("returns trimmed input for empty string", () => {
expect(extractJsonObject("")).toBe("")
})

it("returns trimmed input for whitespace-only string", () => {
expect(extractJsonObject(" \n\t ")).toBe("")
})
})
20 changes: 20 additions & 0 deletions src/lib/ai/extract-json-object.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* Tolerant JSON-object extractor for LLM responses.
*
* Finds the outermost JSON object in a text response, even when the model
* wraps it in markdown code fences or surrounding prose. Returns the original
* trimmed text if no `{...}` pair is found, letting downstream `JSON.parse`
* surface the underlying issue.
*
* Reason: Claude (and other LLMs) sometimes add explanatory prose around the
* JSON despite system-prompt instructions like "answer with JSON only". This
* mirrors the learning captured in `salvage-vision/CLAUDE.md`.
*/
export function extractJsonObject(text: string): string {
const start = text.indexOf("{")
const end = text.lastIndexOf("}")
if (start === -1 || end === -1 || end <= start) {
return text.trim()
}
return text.slice(start, end + 1)
}