diff --git a/pkg/attachment/attachment.go b/pkg/attachment/attachment.go new file mode 100644 index 000000000..28125ffb4 --- /dev/null +++ b/pkg/attachment/attachment.go @@ -0,0 +1,61 @@ +// Package attachment provides MIME-aware routing for document attachments. +// +// It defines how a chat.Document should be sent to a model: either dropped +// (unsupported), wrapped in a plain-text envelope (StrategyTXT), or encoded +// as inline base64 data (StrategyB64). +package attachment + +import ( + "fmt" + + "github.com/docker/docker-agent/pkg/attachment/modelcaps" + "github.com/docker/docker-agent/pkg/chat" +) + +// Strategy describes how an attachment should be handled before sending to the +// provider. +type Strategy int + +const ( + // StrategyDrop means the attachment is not supported by the model or has no + // inline content, and should be silently skipped (with a log warning). + StrategyDrop Strategy = iota + + // StrategyTXT means the attachment should be wrapped in a TXTEnvelope and + // sent as plain text. Used for text/* MIME types whose content is already + // in Source.InlineText. + StrategyTXT + + // StrategyB64 means the attachment content (Source.InlineData) should be + // base64-encoded and sent as a native provider image/document block. + StrategyB64 +) + +// Decide returns the routing Strategy for a document given the current model's +// capabilities. +// +// Algorithm: +// 1. If the model does not support the document's MIME type → (Drop, reason). +// 2. If Source.InlineData is non-empty → (B64, ""). +// 3. If Source.InlineText is non-empty → (TXT, ""). +// 4. Otherwise → (Drop, "no inline content"). +func Decide(doc chat.Document, mc modelcaps.ModelCapabilities) (Strategy, string) { + if !mc.Supports(doc.MimeType) { + return StrategyDrop, fmt.Sprintf("model does not support MIME type %q", doc.MimeType) + } + if len(doc.Source.InlineData) > 0 { + return StrategyB64, "" + } + if doc.Source.InlineText != "" { + return StrategyTXT, "" + } + return StrategyDrop, "no inline content" +} + +// TXTEnvelope wraps a text document body in an XML-like tag that models can +// parse as a named attachment. +// +// …body… +func TXTEnvelope(name, mimeType, body string) string { + return fmt.Sprintf("%s", name, mimeType, body) +} diff --git a/pkg/attachment/decide_test.go b/pkg/attachment/decide_test.go new file mode 100644 index 000000000..6eee38153 --- /dev/null +++ b/pkg/attachment/decide_test.go @@ -0,0 +1,140 @@ +package attachment_test + +import ( + "strings" + "testing" + + "github.com/docker/docker-agent/pkg/attachment" + "github.com/docker/docker-agent/pkg/attachment/modelcaps" + "github.com/docker/docker-agent/pkg/chat" +) + +// testCaps is a small helper that builds a ModelCapabilities directly. +func visionCaps() modelcaps.ModelCapabilities { + return modelcaps.CapsWith(true, true) +} + +func textOnlyCaps() modelcaps.ModelCapabilities { + return modelcaps.CapsWith(false, false) +} + +func imageNoPDFCaps() modelcaps.ModelCapabilities { + return modelcaps.CapsWith(true, false) +} + +func TestDecide(t *testing.T) { + tests := []struct { + name string + doc chat.Document + caps modelcaps.ModelCapabilities + wantStrategy attachment.Strategy + wantReasonHas string // non-empty: reason must contain this substring + }{ + { + name: "b64 image supported", + doc: chat.Document{ + Name: "photo.jpg", + MimeType: "image/jpeg", + Source: chat.DocumentSource{InlineData: []byte{0xFF, 0xD8}}, + }, + caps: visionCaps(), + wantStrategy: attachment.StrategyB64, + }, + { + name: "txt text plain", + doc: chat.Document{ + Name: "notes.txt", + MimeType: "text/plain", + Source: chat.DocumentSource{InlineText: "hello world"}, + }, + caps: textOnlyCaps(), + wantStrategy: attachment.StrategyTXT, + }, + { + name: "drop image when model has no vision", + doc: chat.Document{ + Name: "photo.jpg", + MimeType: "image/jpeg", + Source: chat.DocumentSource{InlineData: []byte{0xFF, 0xD8}}, + }, + caps: textOnlyCaps(), + wantStrategy: attachment.StrategyDrop, + wantReasonHas: "does not support MIME type", + }, + { + name: "drop pdf when model has no pdf support", + doc: chat.Document{ + Name: "doc.pdf", + MimeType: "application/pdf", + Source: chat.DocumentSource{InlineData: []byte{0x25, 0x50, 0x44, 0x46}}, + }, + caps: imageNoPDFCaps(), + wantStrategy: attachment.StrategyDrop, + wantReasonHas: "does not support MIME type", + }, + { + name: "drop no inline content", + doc: chat.Document{ + Name: "empty.md", + MimeType: "text/markdown", + Source: chat.DocumentSource{}, + }, + caps: textOnlyCaps(), + wantStrategy: attachment.StrategyDrop, + wantReasonHas: "no inline content", + }, + { + name: "b64 pdf when pdf supported", + doc: chat.Document{ + Name: "spec.pdf", + MimeType: "application/pdf", + Source: chat.DocumentSource{InlineData: []byte{0x25, 0x50, 0x44, 0x46}}, + }, + caps: visionCaps(), + wantStrategy: attachment.StrategyB64, + }, + { + name: "drop office doc (DOCX is binary, not supported without models.dev office modality)", + doc: chat.Document{ + Name: "report.docx", + MimeType: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + Source: chat.DocumentSource{InlineData: []byte{0x50, 0x4B}}, // ZIP magic bytes + }, + caps: visionCaps(), // even full caps can't send DOCX — no modality + wantStrategy: attachment.StrategyDrop, + wantReasonHas: "does not support MIME type", + }, + { + name: "b64 wins over txt when both inline sources present", + doc: chat.Document{ + Name: "data.txt", + MimeType: "text/plain", + Source: chat.DocumentSource{InlineData: []byte("hello"), InlineText: "hello"}, + }, + caps: textOnlyCaps(), + wantStrategy: attachment.StrategyB64, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + gotStrategy, gotReason := attachment.Decide(tc.doc, tc.caps) + if gotStrategy != tc.wantStrategy { + t.Errorf("strategy: got %d, want %d", gotStrategy, tc.wantStrategy) + } + if tc.wantReasonHas != "" { + if !strings.Contains(gotReason, tc.wantReasonHas) { + t.Errorf("reason %q does not contain %q", gotReason, tc.wantReasonHas) + } + } + }) + } +} + +func TestTXTEnvelope(t *testing.T) { + got := attachment.TXTEnvelope("readme.md", "text/markdown", "# Hello") + want := `# Hello` + if got != want { + t.Errorf("TXTEnvelope:\ngot %q\nwant %q", got, want) + } +} diff --git a/pkg/attachment/modelcaps/modelcaps.go b/pkg/attachment/modelcaps/modelcaps.go new file mode 100644 index 000000000..bbc1680d5 --- /dev/null +++ b/pkg/attachment/modelcaps/modelcaps.go @@ -0,0 +1,157 @@ +// Package modelcaps provides model capability queries for the attachment system. +// It translates models.dev modality information into MIME-type support decisions +// used by the attachment routing logic. +package modelcaps + +import ( + "context" + "log/slog" + "strings" + "time" + + "github.com/docker/docker-agent/pkg/modelsdev" +) + +// ModelCapabilities describes what MIME types a given model can accept as +// document attachments. +type ModelCapabilities struct { + // supportsImage is true when the model accepts image/* MIME types. + supportsImage bool + // supportsPDF is true when the model accepts application/pdf. + supportsPDF bool + // modelFound is false when models.dev has no record for this model, + // which causes conservative fallback behaviour (text-only). + modelFound bool +} + +// isOfficeMIME returns true for Office document binary formats +// (OOXML, legacy Office, RTF). These are ZIP-based or binary formats +// that cannot be naively TXT-enveloped and require explicit model support. +func isOfficeMIME(mt string) bool { + switch mt { + case "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "application/vnd.ms-excel", + "application/vnd.ms-powerpoint", + "application/msword", + "application/rtf", + "text/rtf": + return true + } + return false +} + +// Supports returns true when the model can accept an attachment with the given +// MIME type. +// +// Resolution rules (in order): +// 1. image/* → requires supportsImage (models.dev "image" modality) +// 2. application/pdf → requires supportsPDF (models.dev "pdf" modality) +// 3. text/* → always supported (plain text; TXT envelope is universally safe) +// 4. Office/binary document MIMEs (DOCX, XLSX, PPTX, etc.) → not supported unless +// models.dev explicitly declares a document modality. models.dev currently has +// no "document" or "office" modality field, so these return false for all +// models until the schema is extended. +// 5. Everything else (audio/*, video/*, unknown binary) → false +func (mc ModelCapabilities) Supports(mimeType string) bool { + mt := strings.ToLower(mimeType) + if strings.HasPrefix(mt, "image/") { + return mc.supportsImage + } + if mt == "application/pdf" { + return mc.supportsPDF + } + // text/* MIMEs (text/plain, text/markdown, text/html, text/csv, …) are always + // supported — they are actual text and TXT envelope works universally. + if strings.HasPrefix(mt, "text/") { + return true + } + // Office document formats (DOCX, XLSX, PPTX, etc.) are ZIP-based binaries; + // they cannot be naively TXT-enveloped. models.dev does not yet declare an + // "office" or "document" modality, so we conservatively return false until + // the schema provides explicit capability data. + if isOfficeMIME(mt) { + return false + } + // audio/*, video/*, and all other unknown binary types are not supported. + return false +} + +// loadTimeout is the maximum time allowed for a models.dev capability lookup. +// If the fetch takes longer, Load falls back to conservative text-only caps. +const loadTimeout = 10 * time.Second + +// Load fetches (or returns from cache) the capability record for the given +// model ID. The model ID should be in "provider/model" format as used by +// models.dev (e.g. "anthropic/claude-3-5-sonnet-20241022"). +// +// When the model is not found in the models.dev database, Load returns a +// conservative capability set that only allows text MIME types. The returned +// error is always nil; capability detection failures are silent and safe. +func Load(modelID string) (ModelCapabilities, error) { + ctx, cancel := context.WithTimeout(context.Background(), loadTimeout) + defer cancel() + + store, err := modelsdev.NewStore() + if err != nil { + slog.Warn("modelcaps: failed to load models.dev store, using conservative caps", + "error", err, "model", modelID) + return ModelCapabilities{modelFound: false}, nil + } + + model, err := store.GetModel(ctx, modelID) + if err != nil { + if ctx.Err() != nil { + slog.Warn("modelcaps: models.dev lookup timed out, using conservative caps", + "model", modelID, "timeout", loadTimeout) + } + // Model not found or context cancelled — conservative: text-only. + return ModelCapabilities{modelFound: false}, nil + } + + mc := ModelCapabilities{modelFound: true} + for _, input := range model.Modalities.Input { + switch strings.ToLower(input) { + case "image": + mc.supportsImage = true + case "pdf": + mc.supportsPDF = true + } + } + return mc, nil +} + +// CapsWith constructs a ModelCapabilities value directly from booleans. This is +// intended for use in tests and provider implementations that need to create a +// capabilities value without hitting the network. +func CapsWith(supportsImage, supportsPDF bool) ModelCapabilities { + return ModelCapabilities{ + supportsImage: supportsImage, + supportsPDF: supportsPDF, + modelFound: true, + } +} + +// LoadFromStore is like Load but accepts an explicit *modelsdev.Store, making +// it convenient for tests that inject a pre-populated in-memory store. +func LoadFromStore(store *modelsdev.Store, modelID string) ModelCapabilities { + ctx, cancel := context.WithTimeout(context.Background(), loadTimeout) + defer cancel() + + model, err := store.GetModel(ctx, modelID) + if err != nil { + return ModelCapabilities{modelFound: false} + } + + mc := ModelCapabilities{modelFound: true} + for _, input := range model.Modalities.Input { + switch strings.ToLower(input) { + case "image": + mc.supportsImage = true + case "pdf": + mc.supportsPDF = true + } + } + return mc +} diff --git a/pkg/attachment/modelcaps/modelcaps_test.go b/pkg/attachment/modelcaps/modelcaps_test.go new file mode 100644 index 000000000..a473887a6 --- /dev/null +++ b/pkg/attachment/modelcaps/modelcaps_test.go @@ -0,0 +1,167 @@ +package modelcaps_test + +import ( + "testing" + + "github.com/docker/docker-agent/pkg/attachment/modelcaps" + "github.com/docker/docker-agent/pkg/modelsdev" +) + +// buildStore creates an in-memory Store with the given models for testing. +func buildStore(providers map[string]modelsdev.Provider) *modelsdev.Store { + db := &modelsdev.Database{Providers: providers} + return modelsdev.NewDatabaseStore(db) +} + +func TestLoadFromStore_VisionModel(t *testing.T) { + store := buildStore(map[string]modelsdev.Provider{ + "anthropic": { + Models: map[string]modelsdev.Model{ + "claude-3-5-sonnet": { + Name: "Claude 3.5 Sonnet", + Modalities: modelsdev.Modalities{ + Input: []string{"text", "image", "pdf"}, + Output: []string{"text"}, + }, + }, + }, + }, + }) + + mc := modelcaps.LoadFromStore(store, "anthropic/claude-3-5-sonnet") + + if !mc.Supports("image/jpeg") { + t.Error("expected image/jpeg to be supported for vision model") + } + if !mc.Supports("image/png") { + t.Error("expected image/png to be supported for vision model") + } + if !mc.Supports("application/pdf") { + t.Error("expected application/pdf to be supported for pdf model") + } + if !mc.Supports("text/plain") { + t.Error("expected text/plain to always be supported") + } +} + +func TestLoadFromStore_TextOnlyModel(t *testing.T) { + store := buildStore(map[string]modelsdev.Provider{ + "openai": { + Models: map[string]modelsdev.Model{ + "gpt-3.5-turbo": { + Name: "GPT-3.5 Turbo", + Modalities: modelsdev.Modalities{ + Input: []string{"text"}, + Output: []string{"text"}, + }, + }, + }, + }, + }) + + mc := modelcaps.LoadFromStore(store, "openai/gpt-3.5-turbo") + + if mc.Supports("image/jpeg") { + t.Error("expected image/jpeg NOT to be supported for text-only model") + } + if mc.Supports("application/pdf") { + t.Error("expected application/pdf NOT to be supported for text-only model") + } + // Text MIMEs are always allowed + if !mc.Supports("text/plain") { + t.Error("expected text/plain to always be supported") + } + if !mc.Supports("text/markdown") { + t.Error("expected text/markdown to always be supported") + } +} + +func TestLoadFromStore_ModelNotFound(t *testing.T) { + store := buildStore(map[string]modelsdev.Provider{}) + + mc := modelcaps.LoadFromStore(store, "unknown/nonexistent-model") + + // Conservative fallback: only text is allowed + if mc.Supports("image/jpeg") { + t.Error("expected image/jpeg NOT to be supported for unknown model") + } + if mc.Supports("application/pdf") { + t.Error("expected application/pdf NOT to be supported for unknown model") + } + if !mc.Supports("text/plain") { + t.Error("expected text/plain to always be supported even for unknown model") + } +} + +func TestLoadFromStore_OfficeDocsNotAllowed(t *testing.T) { + // Office document MIMEs (DOCX, XLSX, etc.) are ZIP-based binaries and + // cannot be naively TXT-enveloped. models.dev has no "office" or + // "document" modality, so they must return false for all models. + store := buildStore(map[string]modelsdev.Provider{ + "openai": { + Models: map[string]modelsdev.Model{ + "gpt-4o": { + Name: "GPT-4o", + Modalities: modelsdev.Modalities{ + Input: []string{"text", "image", "pdf"}, + Output: []string{"text"}, + }, + }, + }, + }, + }) + + mc := modelcaps.LoadFromStore(store, "openai/gpt-4o") + + for _, officeMIME := range []string{ + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "application/msword", + "application/vnd.ms-excel", + "application/rtf", + } { + if mc.Supports(officeMIME) { + t.Errorf("expected Office MIME %q NOT to be supported (models.dev has no document modality)", officeMIME) + } + } +} + +func TestCapsWith(t *testing.T) { + mc := modelcaps.CapsWith(true, false) + if !mc.Supports("image/jpeg") { + t.Error("expected image/jpeg to be supported") + } + if mc.Supports("application/pdf") { + t.Error("expected pdf NOT to be supported") + } + + mc2 := modelcaps.CapsWith(false, false) + if mc2.Supports("image/png") { + t.Error("expected image/png NOT to be supported") + } +} + +// TestSupports_AudioVideoRejected verifies that audio/video MIMEs and Office +// document binaries are NOT allowed — they require explicit model support +// declarations which Phase 1 does not implement (models.dev has no such modality). +func TestSupports_AudioVideoRejected(t *testing.T) { + // Even a vision+pdf capable model should reject audio/video/office. + mc := modelcaps.CapsWith(true, true) + + for _, mime := range []string{ + "audio/mp3", + "audio/wav", + "audio/ogg", + "video/mp4", + "video/webm", + "application/octet-stream", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/msword", + } { + if mc.Supports(mime) { + t.Errorf("expected %q to NOT be supported (not in Phase 1 allowlist)", mime) + } + } +} diff --git a/pkg/chat/chat.go b/pkg/chat/chat.go index 9e4a2ced0..519df89f0 100644 --- a/pkg/chat/chat.go +++ b/pkg/chat/chat.go @@ -29,9 +29,11 @@ const ( type MessagePartType string const ( - MessagePartTypeText MessagePartType = "text" + MessagePartTypeText MessagePartType = "text" + // MessagePartTypeImageURL is superseded by MessagePartTypeDocument. Will be removed in a future release. MessagePartTypeImageURL MessagePartType = "image_url" - MessagePartTypeFile MessagePartType = "file" + // MessagePartTypeFile is superseded by MessagePartTypeDocument. Will be removed in a future release. + MessagePartTypeFile MessagePartType = "file" ) type ImageURLDetail string @@ -106,10 +108,14 @@ type MessageFile struct { } type MessagePart struct { - Type MessagePartType `json:"type,omitempty"` - Text string `json:"text,omitempty"` + Type MessagePartType `json:"type,omitempty"` + Text string `json:"text,omitempty"` + // Note: superseded by Document+MessagePartTypeDocument. Will be removed in a future release. ImageURL *MessageImageURL `json:"image_url,omitempty"` - File *MessageFile `json:"file,omitempty"` + // Note: superseded by Document+MessagePartTypeDocument. Will be removed in a future release. + File *MessageFile `json:"file,omitempty"` + // Document is set when Type is MessagePartTypeDocument. + Document *Document `json:"document,omitempty"` } // FinishReason represents the reason why the model finished generating a response diff --git a/pkg/chat/document.go b/pkg/chat/document.go new file mode 100644 index 000000000..b24a85f9c --- /dev/null +++ b/pkg/chat/document.go @@ -0,0 +1,46 @@ +package chat + +// MessagePartTypeDocument is the part type for a structured document attachment. +// Use this type when attaching files (images, PDFs, text, Office docs, etc.) to +// a message. The Document field must be set when this type is used. +// +// This supersedes MessagePartTypeFile and MessagePartTypeImageURL, which are +// deprecated but remain supported for backward compatibility. +const MessagePartTypeDocument MessagePartType = "document" + +// DocumentSource holds the actual content of a document. Exactly one of the +// fields should be set. +type DocumentSource struct { + // InlineText holds the raw text for text/* MIME types (TXT, MD, HTML, CSV, …). + // Used for StrategyTXT attachments. + InlineText string `json:"inline_text,omitempty"` + + // InlineData holds binary content (images, PDFs, Office docs, …) that is + // base64-encoded when sent to the provider. Used for StrategyB64 attachments. + InlineData []byte `json:"inline_data,omitempty"` +} + +// Document represents a file attachment in a message part. It carries +// the file name, post-processing MIME type, and the actual content via Source. +// +// The MimeType field always reflects the final MIME that the attachment system +// will use when sending to the provider (e.g. "image/jpeg" after image +// normalisation, never the original "image/bmp"). +type Document struct { + // Name is the display name of the document (e.g. "report.pdf"). + Name string `json:"name"` + + // MimeType is the post-processing MIME type of the document. For images + // this is always "image/jpeg" or "image/png" regardless of the original + // format. For text files it is the exact MIME (e.g. "text/plain", + // "text/markdown", "text/html"). For binary documents it is the original + // MIME (e.g. "application/pdf"). + MimeType string `json:"mime_type"` + + // Size is the byte length of the document content (InlineData or InlineText). + // Optional; zero means unknown. + Size int64 `json:"size,omitempty"` + + // Source holds the actual document content. + Source DocumentSource `json:"source"` +} diff --git a/pkg/model/provider/anthropic/attachments.go b/pkg/model/provider/anthropic/attachments.go new file mode 100644 index 000000000..0ee4d73e4 --- /dev/null +++ b/pkg/model/provider/anthropic/attachments.go @@ -0,0 +1,91 @@ +package anthropic + +import ( + "context" + "encoding/base64" + "fmt" + "log/slog" + "strings" + + "github.com/anthropics/anthropic-sdk-go" + + "github.com/docker/docker-agent/pkg/attachment" + "github.com/docker/docker-agent/pkg/attachment/modelcaps" + "github.com/docker/docker-agent/pkg/chat" +) + +// convertDocument converts a chat.Document to standard Anthropic SDK content blocks +// (not the Beta API). +// +// Routing: +// - image/* with InlineData → ImageBlockParam (base64 source) +// - application/pdf with InlineData → DocumentBlockParam (base64) +// - text with InlineText → TextBlockParam with TXTEnvelope +// - unsupported / no content → nil (logged as warning) +func convertDocument(ctx context.Context, doc chat.Document, modelID string) ([]anthropic.ContentBlockParamUnion, error) { + mc, _ := modelcaps.Load(modelID) + return convertDocumentWithCaps(ctx, doc, mc) +} + +// convertDocumentWithCaps is the caps-injectable variant used by tests. +func convertDocumentWithCaps(ctx context.Context, doc chat.Document, mc modelcaps.ModelCapabilities) ([]anthropic.ContentBlockParamUnion, error) { + strategy, reason := attachment.Decide(doc, mc) + + switch strategy { + case attachment.StrategyDrop: + slog.WarnContext(ctx, "attachment dropped", "reason", reason, "doc", doc.Name) + return nil, nil + + case attachment.StrategyB64: + mime := strings.ToLower(doc.MimeType) + b64Data := base64.StdEncoding.EncodeToString(doc.Source.InlineData) + + if IsImageMime(mime) { + return []anthropic.ContentBlockParamUnion{ + { + OfImage: &anthropic.ImageBlockParam{ + Source: anthropic.ImageBlockParamSourceUnion{ + OfBase64: &anthropic.Base64ImageSourceParam{ + Data: b64Data, + MediaType: anthropic.Base64ImageSourceMediaType(mime), + }, + }, + }, + }, + }, nil + } + + if IsAnthropicDocumentMime(mime) { + // application/pdf → native document block + return []anthropic.ContentBlockParamUnion{ + { + OfDocument: &anthropic.DocumentBlockParam{ + Source: anthropic.DocumentBlockParamSourceUnion{ + OfBase64: &anthropic.Base64PDFSourceParam{ + Data: b64Data, + MediaType: "application/pdf", + }, + }, + }, + }, + }, nil + } + + // Other binary: fall back to TXT envelope. + slog.DebugContext(ctx, "anthropic: no native block for MIME, falling back to TXT envelope", + "mime", doc.MimeType, "doc", doc.Name) + envelope := attachment.TXTEnvelope(doc.Name, doc.MimeType, b64Data) + return []anthropic.ContentBlockParamUnion{ + {OfText: &anthropic.TextBlockParam{Text: envelope}}, + }, nil + + case attachment.StrategyTXT: + envelope := attachment.TXTEnvelope(doc.Name, doc.MimeType, doc.Source.InlineText) + return []anthropic.ContentBlockParamUnion{ + {OfText: &anthropic.TextBlockParam{Text: envelope}}, + }, nil + + default: + return nil, fmt.Errorf("unknown attachment strategy %d", strategy) + } +} diff --git a/pkg/model/provider/anthropic/attachments_test.go b/pkg/model/provider/anthropic/attachments_test.go new file mode 100644 index 000000000..f5ccd2117 --- /dev/null +++ b/pkg/model/provider/anthropic/attachments_test.go @@ -0,0 +1,109 @@ +package anthropic + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/docker/docker-agent/pkg/attachment/modelcaps" + "github.com/docker/docker-agent/pkg/chat" +) + +// minJPEG is a minimal JPEG magic-byte header for use in tests. +var minJPEG = []byte{0xFF, 0xD8, 0xFF, 0xE0} + +// minPDF is a minimal PDF magic-byte header for use in tests. +var minPDF = []byte{0x25, 0x50, 0x44, 0x46, 0x2D} // %PDF- + +// TestConvertDocumentAnthropic_StrategyB64_Image verifies that an image document +// with InlineData and a vision-capable model produces a native ImageBlockParam. +func TestConvertDocumentAnthropic_StrategyB64_Image(t *testing.T) { + doc := chat.Document{ + Name: "photo.jpg", + MimeType: "image/jpeg", + Source: chat.DocumentSource{InlineData: minJPEG}, + } + + visionCaps := modelcaps.CapsWith(true, true) + blocks, err := convertDocumentWithCaps(t.Context(), doc, visionCaps) + require.NoError(t, err) + require.Len(t, blocks, 1, "expected exactly one block") + require.NotNil(t, blocks[0].OfImage, "expected image block") + assert.Nil(t, blocks[0].OfText, "expected no text block for image") +} + +// TestConvertDocumentAnthropic_StrategyB64_PDF verifies that a PDF document +// produces a native BetaRequestDocumentBlock when the model supports PDFs. +func TestConvertDocumentAnthropic_StrategyB64_PDF(t *testing.T) { + doc := chat.Document{ + Name: "spec.pdf", + MimeType: "application/pdf", + Source: chat.DocumentSource{InlineData: minPDF}, + } + + pdfCaps := modelcaps.CapsWith(true, true) + blocks, err := convertDocumentWithCaps(t.Context(), doc, pdfCaps) + require.NoError(t, err) + require.Len(t, blocks, 1, "expected exactly one block") + require.NotNil(t, blocks[0].OfDocument, "expected document block for PDF") + assert.Nil(t, blocks[0].OfText, "expected no text block for PDF") +} + +func TestConvertDocumentAnthropic_StrategyTXT(t *testing.T) { + doc := chat.Document{ + Name: "spec.md", + MimeType: "text/markdown", + Source: chat.DocumentSource{InlineText: "## Specification"}, + } + + blocks, err := convertDocument(t.Context(), doc, "") + require.NoError(t, err) + require.Len(t, blocks, 1) + require.NotNil(t, blocks[0].OfText) + assert.Contains(t, blocks[0].OfText.Text, "spec.md") + assert.Contains(t, blocks[0].OfText.Text, "text/markdown") + assert.Contains(t, blocks[0].OfText.Text, "## Specification") +} + +func TestConvertDocumentAnthropic_StrategyTXT_Envelope(t *testing.T) { + doc := chat.Document{ + Name: "notes.txt", + MimeType: "text/plain", + Source: chat.DocumentSource{InlineText: "some notes"}, + } + + blocks, err := convertDocument(t.Context(), doc, "") + require.NoError(t, err) + require.Len(t, blocks, 1) + require.NotNil(t, blocks[0].OfText) + text := blocks[0].OfText.Text + assert.True(t, strings.HasPrefix(text, "= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') || r == '-' { + sb.WriteRune(r) + } else { + sb.WriteRune('-') + } + } + result := sb.String() + if result == "" { + return "document" + } + return result +} diff --git a/pkg/model/provider/bedrock/attachments_test.go b/pkg/model/provider/bedrock/attachments_test.go new file mode 100644 index 000000000..fd2bc2516 --- /dev/null +++ b/pkg/model/provider/bedrock/attachments_test.go @@ -0,0 +1,118 @@ +package bedrock + +import ( + "strings" + "testing" + + "github.com/aws/aws-sdk-go-v2/service/bedrockruntime/types" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/docker/docker-agent/pkg/attachment/modelcaps" + "github.com/docker/docker-agent/pkg/chat" +) + +// minJPEG is a minimal JPEG magic-byte header for use in tests. +var minJPEG = []byte{0xFF, 0xD8, 0xFF, 0xE0} + +// minPDF is a minimal PDF magic-byte header for use in tests. +var minPDF = []byte{0x25, 0x50, 0x44, 0x46, 0x2D} // %PDF- + +// TestConvertDocumentBedrock_StrategyB64_Image verifies that an image document +// with InlineData and a vision-capable model produces a ContentBlockMemberImage. +func TestConvertDocumentBedrock_StrategyB64_Image(t *testing.T) { + doc := chat.Document{ + Name: "photo.jpg", + MimeType: "image/jpeg", + Source: chat.DocumentSource{InlineData: minJPEG}, + } + + visionCaps := modelcaps.CapsWith(true, true) + blocks, err := convertDocumentWithCaps(t.Context(), doc, visionCaps) + require.NoError(t, err) + require.Len(t, blocks, 1, "expected exactly one block") + imageBlock, ok := blocks[0].(*types.ContentBlockMemberImage) + require.True(t, ok, "expected ContentBlockMemberImage, got %T", blocks[0]) + assert.Equal(t, types.ImageFormatJpeg, imageBlock.Value.Format) + srcBytes, ok := imageBlock.Value.Source.(*types.ImageSourceMemberBytes) + require.True(t, ok, "expected ImageSourceMemberBytes") + assert.Equal(t, minJPEG, srcBytes.Value) +} + +// TestConvertDocumentBedrock_StrategyB64_PDF verifies that a PDF document +// produces a ContentBlockMemberDocument when the model supports PDFs. +func TestConvertDocumentBedrock_StrategyB64_PDF(t *testing.T) { + doc := chat.Document{ + Name: "spec.pdf", + MimeType: "application/pdf", + Source: chat.DocumentSource{InlineData: minPDF}, + } + + pdfCaps := modelcaps.CapsWith(true, true) + blocks, err := convertDocumentWithCaps(t.Context(), doc, pdfCaps) + require.NoError(t, err) + require.Len(t, blocks, 1, "expected exactly one block") + docBlock, ok := blocks[0].(*types.ContentBlockMemberDocument) + require.True(t, ok, "expected ContentBlockMemberDocument, got %T", blocks[0]) + assert.Equal(t, types.DocumentFormatPdf, docBlock.Value.Format) +} + +// TestConvertDocumentBedrock_StrategyB64_ImageDropped verifies that an image +// is dropped when the model does not support vision. +func TestConvertDocumentBedrock_StrategyB64_ImageDropped(t *testing.T) { + doc := chat.Document{ + Name: "photo.jpg", + MimeType: "image/jpeg", + Source: chat.DocumentSource{InlineData: minJPEG}, + } + + textOnlyCaps := modelcaps.CapsWith(false, false) + blocks, err := convertDocumentWithCaps(t.Context(), doc, textOnlyCaps) + require.NoError(t, err) + assert.Nil(t, blocks, "image should be dropped for text-only model") +} + +func TestConvertDocumentBedrock_StrategyTXT(t *testing.T) { + doc := chat.Document{ + Name: "notes.md", + MimeType: "text/markdown", + Source: chat.DocumentSource{InlineText: "## Notes"}, + } + + blocks, err := convertDocument(t.Context(), doc, "") + require.NoError(t, err) + require.Len(t, blocks, 1) + textBlock, ok := blocks[0].(*types.ContentBlockMemberText) + require.True(t, ok, "expected text block for TXT strategy") + assert.Contains(t, textBlock.Value, "notes.md") + assert.Contains(t, textBlock.Value, "text/markdown") + assert.Contains(t, textBlock.Value, "## Notes") +} + +func TestConvertDocumentBedrock_StrategyTXT_Envelope(t *testing.T) { + doc := chat.Document{ + Name: "data.csv", + MimeType: "text/csv", + Source: chat.DocumentSource{InlineText: "a,b"}, + } + + blocks, err := convertDocument(t.Context(), doc, "") + require.NoError(t, err) + require.Len(t, blocks, 1) + textBlock, ok := blocks[0].(*types.ContentBlockMemberText) + require.True(t, ok, "expected text block") + assert.True(t, strings.HasPrefix(textBlock.Value, " 0 { bedrockMessages = append(bedrockMessages, types.Message{ Role: types.ConversationRoleUser, @@ -119,7 +120,7 @@ func applyCachePointsToMessages(messages []types.Message) { } } -func convertUserContent(msg *chat.Message) []types.ContentBlock { +func convertUserContent(ctx context.Context, msg *chat.Message, modelID string) []types.ContentBlock { var blocks []types.ContentBlock if len(msg.MultiContent) > 0 { @@ -130,11 +131,21 @@ func convertUserContent(msg *chat.Message) []types.ContentBlock { Value: part.Text, }) case chat.MessagePartTypeImageURL: + // Note: superseded by MessagePartTypeDocument. if part.ImageURL != nil { if imageBlock := convertImageURL(part.ImageURL); imageBlock != nil { blocks = append(blocks, imageBlock) } } + case chat.MessagePartTypeDocument: + if part.Document != nil { + docBlocks, err := convertDocument(ctx, *part.Document, modelID) + if err != nil { + slog.WarnContext(ctx, "failed to convert document attachment", "error", err, "doc", part.Document.Name) + continue + } + blocks = append(blocks, docBlocks...) + } } } } else { diff --git a/pkg/model/provider/dmr/client.go b/pkg/model/provider/dmr/client.go index bfbe52988..d3693a54b 100644 --- a/pkg/model/provider/dmr/client.go +++ b/pkg/model/provider/dmr/client.go @@ -147,8 +147,8 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, opts ...options.Opt // convertMessages converts chat messages to OpenAI format and merges consecutive // system/user messages, which is needed by some local models run by DMR. -func convertMessages(messages []chat.Message) []openai.ChatCompletionMessageParamUnion { - openaiMessages := oaistream.ConvertMessages(messages) +func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) []openai.ChatCompletionMessageParamUnion { + openaiMessages := oaistream.ConvertMessages(ctx, messages, c.ModelConfig.Model) return oaistream.MergeConsecutiveMessages(openaiMessages) } @@ -171,7 +171,7 @@ func (c *Client) CreateChatCompletionStream(ctx context.Context, messages []chat params := openai.ChatCompletionNewParams{ Model: c.ModelConfig.Model, - Messages: convertMessages(messages), + Messages: c.convertMessages(ctx, messages), StreamOptions: openai.ChatCompletionStreamOptionsParam{ IncludeUsage: openai.Bool(trackUsage), }, diff --git a/pkg/model/provider/gemini/attachments.go b/pkg/model/provider/gemini/attachments.go new file mode 100644 index 000000000..9d5b53d00 --- /dev/null +++ b/pkg/model/provider/gemini/attachments.go @@ -0,0 +1,46 @@ +package gemini + +import ( + "context" + "fmt" + "log/slog" + + "google.golang.org/genai" + + "github.com/docker/docker-agent/pkg/attachment" + "github.com/docker/docker-agent/pkg/attachment/modelcaps" + "github.com/docker/docker-agent/pkg/chat" +) + +// convertDocument converts a chat.Document to a Gemini genai.Part. +// +// Routing: +// - image/* or binary with InlineData → genai.Blob part +// - text MIMEs with InlineText → genai.Text part with TXTEnvelope +// - unsupported / no content → nil (logged as warning) +func convertDocument(ctx context.Context, doc chat.Document, modelID string) (*genai.Part, error) { + mc, _ := modelcaps.Load(modelID) + return convertDocumentWithCaps(ctx, doc, mc) +} + +// convertDocumentWithCaps is the caps-injectable variant used by tests. +func convertDocumentWithCaps(ctx context.Context, doc chat.Document, mc modelcaps.ModelCapabilities) (*genai.Part, error) { + strategy, reason := attachment.Decide(doc, mc) + + switch strategy { + case attachment.StrategyDrop: + slog.WarnContext(ctx, "attachment dropped", "reason", reason, "doc", doc.Name) + return nil, nil + + case attachment.StrategyB64: + // Gemini's genai.NewPartFromBytes wraps binary data as an inline blob. + return genai.NewPartFromBytes(doc.Source.InlineData, doc.MimeType), nil + + case attachment.StrategyTXT: + envelope := attachment.TXTEnvelope(doc.Name, doc.MimeType, doc.Source.InlineText) + return genai.NewPartFromText(envelope), nil + + default: + return nil, fmt.Errorf("unknown attachment strategy %d", strategy) + } +} diff --git a/pkg/model/provider/gemini/attachments_test.go b/pkg/model/provider/gemini/attachments_test.go new file mode 100644 index 000000000..ea85f5514 --- /dev/null +++ b/pkg/model/provider/gemini/attachments_test.go @@ -0,0 +1,90 @@ +package gemini + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/docker/docker-agent/pkg/attachment/modelcaps" + "github.com/docker/docker-agent/pkg/chat" +) + +// minJPEG is a minimal JPEG magic-byte header for use in tests. +var minJPEG = []byte{0xFF, 0xD8, 0xFF, 0xE0} + +// TestConvertDocumentGemini_StrategyB64_Image verifies that an image document +// with InlineData and a vision-capable model produces a Blob part (not a text part). +func TestConvertDocumentGemini_StrategyB64_Image(t *testing.T) { + doc := chat.Document{ + Name: "photo.jpg", + MimeType: "image/jpeg", + Source: chat.DocumentSource{InlineData: minJPEG}, + } + + visionCaps := modelcaps.CapsWith(true, true) + part, err := convertDocumentWithCaps(t.Context(), doc, visionCaps) + require.NoError(t, err) + require.NotNil(t, part, "expected a non-nil part for B64 image") + // For a blob part the Text field is empty; the inline blob carries the data. + assert.Empty(t, part.Text, "expected blob part, not text part") + assert.Equal(t, minJPEG, part.InlineData.Data, "inline data should match input bytes") + assert.Equal(t, "image/jpeg", part.InlineData.MIMEType) +} + +// TestConvertDocumentGemini_StrategyB64_ImageDropped verifies that an image is +// dropped when the model does not support vision. +func TestConvertDocumentGemini_StrategyB64_ImageDropped(t *testing.T) { + doc := chat.Document{ + Name: "photo.jpg", + MimeType: "image/jpeg", + Source: chat.DocumentSource{InlineData: minJPEG}, + } + + textOnlyCaps := modelcaps.CapsWith(false, false) + part, err := convertDocumentWithCaps(t.Context(), doc, textOnlyCaps) + require.NoError(t, err) + assert.Nil(t, part, "image should be dropped for text-only model") +} + +func TestConvertDocumentGemini_StrategyTXT(t *testing.T) { + doc := chat.Document{ + Name: "readme.md", + MimeType: "text/markdown", + Source: chat.DocumentSource{InlineText: "# Read Me"}, + } + + part, err := convertDocument(t.Context(), doc, "") + require.NoError(t, err) + require.NotNil(t, part) + assert.Contains(t, part.Text, "readme.md") + assert.Contains(t, part.Text, "text/markdown") + assert.Contains(t, part.Text, "# Read Me") +} + +func TestConvertDocumentGemini_StrategyTXT_Envelope(t *testing.T) { + doc := chat.Document{ + Name: "data.csv", + MimeType: "text/csv", + Source: chat.DocumentSource{InlineText: "col1,col2"}, + } + + part, err := convertDocument(t.Context(), doc, "") + require.NoError(t, err) + require.NotNil(t, part) + assert.True(t, strings.HasPrefix(part.Text, " 0 { - parts := convertMultiContent(msg.MultiContent, msg.ThoughtSignature) + parts := convertMultiContent(ctx, msg.MultiContent, msg.ThoughtSignature, modelID) if len(parts) > 0 { contents = append(contents, genai.NewContentFromParts(parts, role)) } @@ -287,16 +287,28 @@ func newTextPartWithSignature(text string, signature []byte) *genai.Part { } // convertMultiContent converts multi-part content to Gemini parts -func convertMultiContent(multiContent []chat.MessagePart, thoughtSignature []byte) []*genai.Part { +func convertMultiContent(ctx context.Context, multiContent []chat.MessagePart, thoughtSignature []byte, modelID string) []*genai.Part { parts := make([]*genai.Part, 0, len(multiContent)) for _, part := range multiContent { switch part.Type { case chat.MessagePartTypeText: parts = append(parts, newTextPartWithSignature(part.Text, thoughtSignature)) case chat.MessagePartTypeImageURL: + // Note: superseded by MessagePartTypeDocument. if imgPart := convertImageURLToPart(part.ImageURL); imgPart != nil { parts = append(parts, imgPart) } + case chat.MessagePartTypeDocument: + if part.Document != nil { + docPart, err := convertDocument(ctx, *part.Document, modelID) + if err != nil { + slog.WarnContext(ctx, "failed to convert document attachment", "error", err, "doc", part.Document.Name) + continue + } + if docPart != nil { + parts = append(parts, docPart) + } + } } } return parts @@ -589,7 +601,7 @@ func (c *Client) CreateChatCompletionStream( } } - contents := convertMessagesToGemini(messages) + contents := convertMessagesToGemini(ctx, messages, c.ModelConfig.Model) // Debug: Log the messages we're sending slog.Debug("Gemini messages", "count", len(contents)) diff --git a/pkg/model/provider/gemini/client_test.go b/pkg/model/provider/gemini/client_test.go index 2f81688bf..8aeb7b205 100644 --- a/pkg/model/provider/gemini/client_test.go +++ b/pkg/model/provider/gemini/client_test.go @@ -362,10 +362,10 @@ func TestConvertMessagesToGemini_ThoughtSignature(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() - contents := convertMessagesToGemini([]chat.Message{ + contents := convertMessagesToGemini(t.Context(), []chat.Message{ {Role: chat.MessageRoleUser, Content: "go"}, tt.message, - }) + }, "") require.Len(t, contents, 2) assistant := contents[1] diff --git a/pkg/model/provider/oaistream/attachments.go b/pkg/model/provider/oaistream/attachments.go new file mode 100644 index 000000000..738120642 --- /dev/null +++ b/pkg/model/provider/oaistream/attachments.go @@ -0,0 +1,73 @@ +package oaistream + +import ( + "context" + "encoding/base64" + "fmt" + "log/slog" + "strings" + + "github.com/openai/openai-go/v3" + + "github.com/docker/docker-agent/pkg/attachment" + "github.com/docker/docker-agent/pkg/attachment/modelcaps" + "github.com/docker/docker-agent/pkg/chat" +) + +// convertDocument converts a chat.Document to zero or more +// ChatCompletionContentPartUnionParam values using the OpenAI Chat Completions +// format. It is also used by all oaistream-based providers (Mistral, xAI, +// Ollama, Nebius, MiniMax, GitHub Copilot, Azure, Requesty). +// +// Routing: +// - image/* with InlineData → data-URI image part +// - other binary MIMEs with InlineData → text part with TXTEnvelope fallback +// - text MIMEs with InlineText → text part with TXTEnvelope +// - unsupported / no content → nil (logged as warning) +func convertDocument(ctx context.Context, doc chat.Document, modelID string) ([]openai.ChatCompletionContentPartUnionParam, error) { + mc, _ := modelcaps.Load(modelID) + return convertDocumentWithCaps(ctx, doc, mc) +} + +// convertDocumentWithCaps is the caps-injectable variant used by tests. +func convertDocumentWithCaps(ctx context.Context, doc chat.Document, mc modelcaps.ModelCapabilities) ([]openai.ChatCompletionContentPartUnionParam, error) { + strategy, reason := attachment.Decide(doc, mc) + + switch strategy { + case attachment.StrategyDrop: + slog.WarnContext(ctx, "attachment dropped", "reason", reason, "doc", doc.Name) + return nil, nil + + case attachment.StrategyB64: + mime := strings.ToLower(doc.MimeType) + if strings.HasPrefix(mime, "image/") { + // Build an OpenAI image part with a data URI. + dataURI := fmt.Sprintf("data:%s;base64,%s", + doc.MimeType, + base64.StdEncoding.EncodeToString(doc.Source.InlineData)) + return []openai.ChatCompletionContentPartUnionParam{ + openai.ImageContentPart(openai.ChatCompletionContentPartImageImageURLParam{ + URL: dataURI, + }), + }, nil + } + // Non-image binary (PDF, Office docs…): OpenAI Chat Completions has no + // native document block, so fall back to a TXT envelope. + slog.DebugContext(ctx, "oaistream: no native block for MIME, falling back to TXT envelope", + "mime", doc.MimeType, "doc", doc.Name) + envelope := attachment.TXTEnvelope(doc.Name, doc.MimeType, + base64.StdEncoding.EncodeToString(doc.Source.InlineData)) + return []openai.ChatCompletionContentPartUnionParam{ + openai.TextContentPart(envelope), + }, nil + + case attachment.StrategyTXT: + envelope := attachment.TXTEnvelope(doc.Name, doc.MimeType, doc.Source.InlineText) + return []openai.ChatCompletionContentPartUnionParam{ + openai.TextContentPart(envelope), + }, nil + + default: + return nil, fmt.Errorf("unknown attachment strategy %d", strategy) + } +} diff --git a/pkg/model/provider/oaistream/attachments_test.go b/pkg/model/provider/oaistream/attachments_test.go new file mode 100644 index 000000000..ea82661e4 --- /dev/null +++ b/pkg/model/provider/oaistream/attachments_test.go @@ -0,0 +1,99 @@ +package oaistream + +import ( + "encoding/base64" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/docker/docker-agent/pkg/attachment/modelcaps" + "github.com/docker/docker-agent/pkg/chat" +) + +// minJPEG is a minimal JPEG magic-byte header for use in tests. +var minJPEG = []byte{0xFF, 0xD8, 0xFF, 0xE0} + +// TestConvertDocument_StrategyB64_Image verifies that an image document with +// InlineData and a vision-capable model produces an image content part with +// a data-URI, not a text part. +func TestConvertDocument_StrategyB64_Image(t *testing.T) { + doc := chat.Document{ + Name: "photo.jpg", + MimeType: "image/jpeg", + Source: chat.DocumentSource{InlineData: minJPEG}, + } + + visionCaps := modelcaps.CapsWith(true, true) + parts, err := convertDocumentWithCaps(t.Context(), doc, visionCaps) + require.NoError(t, err) + require.Len(t, parts, 1, "expected exactly one image part") + require.NotNil(t, parts[0].OfImageURL, "expected image part, got non-image") + assert.Nil(t, parts[0].OfText, "expected no text part for B64 image") + + // Data URI must embed the base64-encoded payload. + wantB64 := base64.StdEncoding.EncodeToString(minJPEG) + assert.Contains(t, parts[0].OfImageURL.ImageURL.URL, "data:image/jpeg;base64,") + assert.Contains(t, parts[0].OfImageURL.ImageURL.URL, wantB64) +} + +// TestConvertDocument_StrategyB64_ImageDropped verifies that an image is +// dropped when the model does not support vision. +func TestConvertDocument_StrategyB64_ImageDropped(t *testing.T) { + doc := chat.Document{ + Name: "photo.jpg", + MimeType: "image/jpeg", + Source: chat.DocumentSource{InlineData: minJPEG}, + } + + textOnlyCaps := modelcaps.CapsWith(false, false) + parts, err := convertDocumentWithCaps(t.Context(), doc, textOnlyCaps) + require.NoError(t, err) + assert.Nil(t, parts, "image should be dropped for text-only model") +} + +func TestConvertDocument_StrategyTXT(t *testing.T) { + doc := chat.Document{ + Name: "readme.md", + MimeType: "text/markdown", + Source: chat.DocumentSource{InlineText: "# Hello World"}, + } + + parts, err := convertDocument(t.Context(), doc, "") + require.NoError(t, err) + require.Len(t, parts, 1) + require.NotNil(t, parts[0].OfText) + assert.Contains(t, parts[0].OfText.Text, "readme.md") + assert.Contains(t, parts[0].OfText.Text, "text/markdown") + assert.Contains(t, parts[0].OfText.Text, "# Hello World") +} + +func TestConvertDocument_StrategyTXT_Envelope(t *testing.T) { + doc := chat.Document{ + Name: "data.csv", + MimeType: "text/csv", + Source: chat.DocumentSource{InlineText: "a,b,c\n1,2,3"}, + } + + parts, err := convertDocument(t.Context(), doc, "") + require.NoError(t, err) + require.Len(t, parts, 1) + require.NotNil(t, parts[0].OfText) + text := parts[0].OfText.Text + assert.True(t, strings.HasPrefix(text, "