@@ -12,20 +12,64 @@ type ContentMessage = {
1212 content : string ;
1313} ;
1414
15+ /**
16+ * Message format used by OpenAI and Anthropic APIs for media.
17+ */
18+ type ContentArrayMessage = {
19+ [ key : string ] : unknown ;
20+ content : {
21+ [ key : string ] : unknown ;
22+ type : string ;
23+ } [ ] ;
24+ } ;
25+
26+ /**
27+ * Inline media content source, with a potentially very large base64
28+ * blob or data: uri.
29+ */
30+ type ContentMedia = Record < string , unknown > &
31+ (
32+ | {
33+ media_type : string ;
34+ data : string ;
35+ }
36+ | {
37+ image_url : `data:${string } `;
38+ }
39+ | {
40+ type : 'blob' | 'base64' ;
41+ content : string ;
42+ }
43+ | {
44+ b64_json : string ;
45+ }
46+ | {
47+ uri : `data:${string } `;
48+ }
49+ ) ;
50+
1551/**
1652 * Message format used by Google GenAI API.
1753 * Parts can be strings or objects with a text property.
1854 */
1955type PartsMessage = {
2056 [ key : string ] : unknown ;
21- parts : Array < string | { text : string } > ;
57+ parts : Array < TextPart | MediaPart > ;
2258} ;
2359
2460/**
2561 * A part in a Google GenAI message that contains text.
2662 */
2763type TextPart = string | { text : string } ;
2864
65+ /**
66+ * A part in a Google GenAI that contains media.
67+ */
68+ type MediaPart = {
69+ type : string ;
70+ content : string ;
71+ } ;
72+
2973/**
3074 * Calculate the UTF-8 byte length of a string.
3175 */
@@ -79,11 +123,12 @@ function truncateTextByBytes(text: string, maxBytes: number): string {
79123 *
80124 * @returns The text content
81125 */
82- function getPartText ( part : TextPart ) : string {
126+ function getPartText ( part : TextPart | MediaPart ) : string {
83127 if ( typeof part === 'string' ) {
84128 return part ;
85129 }
86- return part . text ;
130+ if ( 'text' in part ) return part . text ;
131+ return '' ;
87132}
88133
89134/**
@@ -93,7 +138,7 @@ function getPartText(part: TextPart): string {
93138 * @param text - New text content
94139 * @returns New part with updated text
95140 */
96- function withPartText ( part : TextPart , text : string ) : TextPart {
141+ function withPartText ( part : TextPart | MediaPart , text : string ) : TextPart {
97142 if ( typeof part === 'string' ) {
98143 return text ;
99144 }
@@ -112,6 +157,33 @@ function isContentMessage(message: unknown): message is ContentMessage {
112157 ) ;
113158}
114159
160+ /**
161+ * Check if a message has the OpenAI/Anthropic content array format.
162+ */
163+ function isContentArrayMessage ( message : unknown ) : message is ContentArrayMessage {
164+ return message !== null && typeof message === 'object' && 'content' in message && Array . isArray ( message . content ) ;
165+ }
166+
167+ /**
168+ * Check if a content part is an OpenAI/Anthropic media source
169+ */
170+ function isContentMedia ( part : unknown ) : part is ContentMedia {
171+ if ( ! part || typeof part !== 'object' ) return false ;
172+
173+ return (
174+ isContentMediaSource ( part ) ||
175+ ( 'media_type' in part && typeof part . media_type === 'string' && 'data' in part ) ||
176+ ( 'image_url' in part && typeof part . image_url === 'string' && part . image_url . startsWith ( 'data:' ) ) ||
177+ ( 'type' in part && ( part . type === 'blob' || part . type === 'base64' ) ) ||
178+ 'b64_json' in part ||
179+ ( 'type' in part && 'result' in part && part . type === 'image_generation' ) ||
180+ ( 'uri' in part && typeof part . uri === 'string' && part . uri . startsWith ( 'data:' ) )
181+ ) ;
182+ }
183+ function isContentMediaSource ( part : NonNullable < unknown > ) : boolean {
184+ return 'type' in part && typeof part . type === 'string' && 'source' in part && isContentMedia ( part . source ) ;
185+ }
186+
115187/**
116188 * Check if a message has the Google GenAI parts format.
117189 */
@@ -167,7 +239,7 @@ function truncatePartsMessage(message: PartsMessage, maxBytes: number): unknown[
167239 }
168240
169241 // Include parts until we run out of space
170- const includedParts : TextPart [ ] = [ ] ;
242+ const includedParts : ( TextPart | MediaPart ) [ ] = [ ] ;
171243
172244 for ( const part of parts ) {
173245 const text = getPartText ( part ) ;
@@ -190,7 +262,10 @@ function truncatePartsMessage(message: PartsMessage, maxBytes: number): unknown[
190262 }
191263 }
192264
265+ /* c8 ignore start
266+ * for type safety only, algorithm guarantees SOME text included */
193267 return includedParts . length > 0 ? [ { ...message , parts : includedParts } ] : [ ] ;
268+ /* c8 ignore stop */
194269}
195270
196271/**
@@ -205,9 +280,11 @@ function truncatePartsMessage(message: PartsMessage, maxBytes: number): unknown[
205280 * @returns Array containing the truncated message, or empty array if truncation fails
206281 */
207282function truncateSingleMessage ( message : unknown , maxBytes : number ) : unknown [ ] {
283+ /* c8 ignore start - unreachable */
208284 if ( ! message || typeof message !== 'object' ) {
209285 return [ ] ;
210286 }
287+ /* c8 ignore start - unreachable */
211288
212289 if ( isContentMessage ( message ) ) {
213290 return truncateContentMessage ( message , maxBytes ) ;
@@ -221,6 +298,59 @@ function truncateSingleMessage(message: unknown, maxBytes: number): unknown[] {
221298 return [ ] ;
222299}
223300
301+ const REMOVED_STRING = '<removed>' ;
302+
303+ const MEDIA_FIELDS = [ 'image_url' , 'data' , 'content' , 'b64_json' , 'result' , 'uri' ] as const ;
304+
305+ function stripInlineMediaFromSingleMessage ( part : ContentMedia ) : ContentMedia {
306+ const strip = { ...part } ;
307+ if ( isContentMedia ( strip . source ) ) {
308+ strip . source = stripInlineMediaFromSingleMessage ( strip . source ) ;
309+ }
310+ for ( const field of MEDIA_FIELDS ) {
311+ if ( strip [ field ] ) strip [ field ] = REMOVED_STRING ;
312+ }
313+ return strip ;
314+ }
315+
316+ /**
317+ * Strip the inline media from message arrays.
318+ *
319+ * This returns a stripped message. We do NOT want to mutate the data in place,
320+ * because of course we still want the actual API/client to handle the media.
321+ */
322+ export function stripInlineMediaFromMessages ( messages : unknown [ ] ) : unknown [ ] {
323+ return messages . map ( message => {
324+ if ( ! ! message && typeof message === 'object' ) {
325+ if ( isContentArrayMessage ( message ) ) {
326+ // eslint-disable-next-line no-param-reassign
327+ message = {
328+ ...message ,
329+ content : stripInlineMediaFromMessages ( message . content ) ,
330+ } ;
331+ } else if ( 'content' in message && isContentMedia ( message . content ) ) {
332+ // eslint-disable-next-line no-param-reassign
333+ message = {
334+ ...message ,
335+ content : stripInlineMediaFromSingleMessage ( message . content ) ,
336+ } ;
337+ }
338+ if ( isPartsMessage ( message ) ) {
339+ // eslint-disable-next-line no-param-reassign
340+ message = {
341+ ...message ,
342+ parts : stripInlineMediaFromMessages ( message . parts ) ,
343+ } ;
344+ }
345+ if ( isContentMedia ( message ) ) {
346+ // eslint-disable-next-line no-param-reassign
347+ message = stripInlineMediaFromSingleMessage ( message ) ;
348+ }
349+ }
350+ return message ;
351+ } ) ;
352+ }
353+
224354/**
225355 * Truncate an array of messages to fit within a byte limit.
226356 *
@@ -246,6 +376,11 @@ export function truncateMessagesByBytes(messages: unknown[], maxBytes: number):
246376 return messages ;
247377 }
248378
379+ // strip inline media first. This will often get us below the threshold,
380+ // while preserving human-readable information about messages sent.
381+ // eslint-disable-next-line no-param-reassign
382+ messages = stripInlineMediaFromMessages ( messages ) ;
383+
249384 // Fast path: if all messages fit, return as-is
250385 const totalBytes = jsonBytes ( messages ) ;
251386 if ( totalBytes <= maxBytes ) {
0 commit comments