@@ -268,19 +268,149 @@ function normalizeGraphSpec(rawSpec = {}) {
268268 }
269269}
270270
271+ function findBalancedJsonEnd ( text = '' , startIndex = 0 ) {
272+ if ( startIndex < 0 || startIndex >= text . length ) return null
273+ if ( text [ startIndex ] !== '{' && text [ startIndex ] !== '[' ) return null
274+
275+ const stack = [ ]
276+ let inString = false
277+ let escaped = false
278+
279+ for ( let index = startIndex ; index < text . length ; index += 1 ) {
280+ const char = text [ index ]
281+
282+ if ( inString ) {
283+ if ( escaped ) {
284+ escaped = false
285+ continue
286+ }
287+ if ( char === '\\' ) {
288+ escaped = true
289+ continue
290+ }
291+ if ( char === '"' ) {
292+ inString = false
293+ }
294+ continue
295+ }
296+
297+ if ( char === '"' ) {
298+ inString = true
299+ continue
300+ }
301+
302+ if ( char === '{' ) {
303+ stack . push ( '}' )
304+ continue
305+ }
306+
307+ if ( char === '[' ) {
308+ stack . push ( ']' )
309+ continue
310+ }
311+
312+ if ( char === '}' || char === ']' ) {
313+ if ( stack . length === 0 || stack . pop ( ) !== char ) return null
314+ if ( stack . length === 0 ) return index + 1
315+ }
316+ }
317+
318+ return null
319+ }
320+
321+ function extractTopLevelJsonSegmentsFromText ( text = '' ) {
322+ if ( ! text ) return [ ]
323+
324+ const segments = [ ]
325+
326+ for ( let index = 0 ; index < text . length ; index += 1 ) {
327+ const char = text [ index ]
328+ if ( char !== '{' && char !== '[' ) continue
329+
330+ const endIndex = findBalancedJsonEnd ( text , index )
331+ if ( ! endIndex ) continue
332+
333+ const rawJson = text . slice ( index , endIndex )
334+
335+ try {
336+ const value = JSON . parse ( rawJson )
337+ segments . push ( { start : index , end : endIndex , rawJson, value } )
338+ index = endIndex - 1
339+ } catch {
340+ // Keep scanning in case a valid JSON payload starts later in the text.
341+ }
342+ }
343+
344+ return segments
345+ }
346+
347+ function extractRelayedToolCallsFromParsedJson ( parsed ) {
348+ const rawCalls = Array . isArray ( parsed )
349+ ? parsed
350+ : Array . isArray ( parsed ?. tool_calls )
351+ ? parsed . tool_calls
352+ : parsed ?. tool_call
353+ ? [ parsed . tool_call ]
354+ : parsed && typeof parsed === 'object' && typeof parsed . name === 'string'
355+ ? [ parsed ]
356+ : [ ]
357+
358+ return rawCalls
359+ . map ( normalizeRelayedToolCall )
360+ . filter ( Boolean )
361+ }
362+
363+ function extractGraphSpecsFromJsonValue ( value , graphs = [ ] , seen = new Set ( ) ) {
364+ if ( ! value || typeof value !== 'object' ) return graphs
365+ if ( seen . has ( value ) ) return graphs
366+ seen . add ( value )
367+
368+ const normalized = normalizeGraphSpec ( value )
369+ if ( normalized ) {
370+ graphs . push ( normalized )
371+ return graphs
372+ }
373+
374+ const relayedToolCalls = extractRelayedToolCallsFromParsedJson ( value )
375+ if ( relayedToolCalls . length > 0 ) {
376+ for ( const toolCall of relayedToolCalls ) {
377+ if ( toolCall . name !== 'create_basic_graph' ) continue
378+ const graph = normalizeGraphSpec ( toolCall . arguments )
379+ if ( graph ) graphs . push ( graph )
380+ }
381+ return graphs
382+ }
383+
384+ if ( Array . isArray ( value ) ) {
385+ for ( const item of value ) {
386+ extractGraphSpecsFromJsonValue ( item , graphs , seen )
387+ }
388+ return graphs
389+ }
390+
391+ for ( const nestedValue of Object . values ( value ) ) {
392+ if ( nestedValue && typeof nestedValue === 'object' ) {
393+ extractGraphSpecsFromJsonValue ( nestedValue , graphs , seen )
394+ }
395+ }
396+
397+ return graphs
398+ }
399+
271400function extractGraphSpecsFromResponseText ( responseText = '' ) {
272401 if ( ! responseText ) return { textWithoutGraphs : responseText , graphs : [ ] }
273402
274403 const graphs = [ ]
275404
276- // Match code blocks with explicit graph language tags
277- const graphBlockRegex = / ` ` ` (?: v f b - g r a p h | v f b _ g r a p h | g r a p h j s o n | g r a p h - j s o n ) \s * ( [ \s \S ] * ?) ` ` ` / gi
405+ // Match explicit graph blocks first, then fall back to generic JSON code blocks
406+ // and inline JSON segments that parse to graph specs.
407+ const graphBlockRegex = / ` ` ` (?: v f b - g r a p h | v f b _ g r a p h | g r a p h j s o n | g r a p h - j s o n | j s o n ) ? \s * ( [ \s \S ] * ?) ` ` ` / gi
278408 let textWithoutGraphs = responseText . replace ( graphBlockRegex , ( match , rawJson ) => {
279409 try {
280410 const parsed = JSON . parse ( String ( rawJson || '' ) . trim ( ) )
281- const normalized = normalizeGraphSpec ( parsed )
282- if ( normalized ) {
283- graphs . push ( normalized )
411+ const extractedGraphs = dedupeGraphSpecs ( extractGraphSpecsFromJsonValue ( parsed ) )
412+ if ( extractedGraphs . length > 0 ) {
413+ graphs . push ( ... extractedGraphs )
284414 return ''
285415 }
286416 } catch {
@@ -289,44 +419,28 @@ function extractGraphSpecsFromResponseText(responseText = '') {
289419 return match
290420 } )
291421
292- // Also match any JSON code block or bare JSON that looks like a graph spec
293- // (has "nodes" and "edges" arrays) — the LLM sometimes dumps graph JSON
294- // without the special language tag
295- const genericJsonBlockRegex = / ` ` ` (?: j s o n ) ? \s * ( \{ [ \s \S ] * ?" n o d e s " \s * : \s * \[ [ \s \S ] * ?" e d g e s " \s * : \s * \[ [ \s \S ] * ?\} ) \s * ` ` ` / gi
296- textWithoutGraphs = textWithoutGraphs . replace ( genericJsonBlockRegex , ( match , rawJson ) => {
297- try {
298- const parsed = JSON . parse ( String ( rawJson || '' ) . trim ( ) )
299- const normalized = normalizeGraphSpec ( parsed )
300- if ( normalized ) {
301- graphs . push ( normalized )
302- return ''
303- }
304- } catch {
305- // Keep original block when parsing fails.
306- }
307- return match
308- } )
422+ const jsonSegments = extractTopLevelJsonSegmentsFromText ( textWithoutGraphs )
423+ if ( jsonSegments . length > 0 ) {
424+ let rebuiltText = ''
425+ let lastIndex = 0
309426
310- // Finally, try to catch bare JSON graph objects in the text (no code fences)
311- // Look for JSON objects that span multiple lines and contain both nodes and edges
312- const bareJsonGraphRegex = / ( \{ \s * (?: " [ ^ " ] * " \s * : \s * (?: " [ ^ " ] * " | [ ^ , } \] ] * | \[ [ ^ \] ] * \] ) \s * , \s * ) * " n o d e s " \s * : \s * \[ [ \s \S ] * ?" e d g e s " \s * : \s * \[ [ \s \S ] * ?\] \s * \} ) / gi
313- textWithoutGraphs = textWithoutGraphs . replace ( bareJsonGraphRegex , ( match , rawJson ) => {
314- try {
315- const parsed = JSON . parse ( String ( rawJson || '' ) . trim ( ) )
316- // Only treat as graph if it actually has nodes and edges arrays
317- if ( ! Array . isArray ( parsed . nodes ) || ! Array . isArray ( parsed . edges ) ) return match
318- const normalized = normalizeGraphSpec ( parsed )
319- if ( normalized ) {
320- graphs . push ( normalized )
321- return ''
322- }
323- } catch {
324- // Not valid JSON, keep as-is
427+ for ( const segment of jsonSegments ) {
428+ const extractedGraphs = dedupeGraphSpecs ( extractGraphSpecsFromJsonValue ( segment . value ) )
429+ if ( extractedGraphs . length === 0 ) continue
430+
431+ rebuiltText += textWithoutGraphs . slice ( lastIndex , segment . start )
432+ lastIndex = segment . end
433+ graphs . push ( ...extractedGraphs )
325434 }
326- return match
327- } )
328435
329- return { textWithoutGraphs, graphs }
436+ rebuiltText += textWithoutGraphs . slice ( lastIndex )
437+ textWithoutGraphs = rebuiltText
438+ }
439+
440+ return {
441+ textWithoutGraphs : textWithoutGraphs . replace ( / \n { 3 , } / g, '\n\n' ) . trim ( ) ,
442+ graphs : dedupeGraphSpecs ( graphs )
443+ }
330444}
331445
332446function extractGraphSpecsFromToolOutputs ( toolOutputs = [ ] ) {
@@ -379,28 +493,57 @@ function extractImagesFromResponseText(responseText = '') {
379493}
380494
381495function stripLeakedToolCallJson ( text = '' ) {
382- if ( ! text ) return text
496+ if ( ! text ) return { cleanedText : text , graphs : [ ] }
383497
384- // Remove code-fenced JSON blocks that contain "tool_calls" or "name"+"arguments" patterns
385- let cleaned = text . replace ( / ` ` ` (?: j s o n ) ? \s * \{ [ \s \S ] * ?" (?: t o o l _ c a l l s | n a m e ) " [ \s \S ] * ?\} [ \s \S ] * ?` ` ` / g, '' )
498+ const graphs = [ ]
386499
387- // Remove bare JSON objects that look like tool call payloads (start with { and contain "tool_calls")
388- cleaned = cleaned . replace ( / \{ [ \s \S ] * ?" t o o l _ c a l l s " \s * : \s * \[ [ \s \S ] * ?\] \s * \} / g, '' )
500+ const toolCallCodeBlockRegex = / ` ` ` (?: j s o n ) ? \s * ( [ \s \S ] * ?) ` ` ` / gi
501+ let cleaned = text . replace ( toolCallCodeBlockRegex , ( match , rawJson ) => {
502+ try {
503+ const parsed = JSON . parse ( String ( rawJson || '' ) . trim ( ) )
504+ const relayedToolCalls = extractRelayedToolCallsFromParsedJson ( parsed )
505+ if ( relayedToolCalls . length > 0 ) {
506+ graphs . push ( ...extractGraphSpecsFromJsonValue ( parsed ) )
507+ return ''
508+ }
509+ } catch {
510+ // Keep original block when parsing fails.
511+ }
512+ return match
513+ } )
514+
515+ const jsonSegments = extractTopLevelJsonSegmentsFromText ( cleaned )
516+ if ( jsonSegments . length > 0 ) {
517+ let rebuiltText = ''
518+ let lastIndex = 0
519+
520+ for ( const segment of jsonSegments ) {
521+ const relayedToolCalls = extractRelayedToolCallsFromParsedJson ( segment . value )
522+ if ( relayedToolCalls . length === 0 ) continue
389523
390- // Clean up excess whitespace left behind
391- cleaned = cleaned . replace ( / \n { 3 , } / g, '\n\n' ) . trim ( )
524+ rebuiltText += cleaned . slice ( lastIndex , segment . start )
525+ lastIndex = segment . end
526+ graphs . push ( ...extractGraphSpecsFromJsonValue ( segment . value ) )
527+ }
528+
529+ rebuiltText += cleaned . slice ( lastIndex )
530+ cleaned = rebuiltText
531+ }
392532
393- return cleaned || text
533+ return {
534+ cleanedText : cleaned . replace ( / \n { 3 , } / g, '\n\n' ) . trim ( ) ,
535+ graphs : dedupeGraphSpecs ( graphs )
536+ }
394537}
395538
396539function buildSuccessfulTextResult ( { responseText, responseId, toolUsage, toolRounds, outboundAllowList, graphSpecs = [ ] } ) {
397- const strippedText = stripLeakedToolCallJson ( responseText )
398- const { sanitizedText, blockedDomains } = sanitizeAssistantOutput ( strippedText , outboundAllowList )
540+ const { cleanedText , graphs : leakedToolCallGraphs } = stripLeakedToolCallJson ( responseText )
541+ const { sanitizedText, blockedDomains } = sanitizeAssistantOutput ( cleanedText , outboundAllowList )
399542 const { textWithoutGraphs, graphs : inlineGraphs } = extractGraphSpecsFromResponseText ( sanitizedText )
400543 const linkedResponseText = linkifyFollowUpQueryItems ( textWithoutGraphs )
401544 const images = extractImagesFromResponseText ( linkedResponseText )
402- const graphs = dedupeGraphSpecs ( [ ...( Array . isArray ( graphSpecs ) ? graphSpecs : [ ] ) , ...inlineGraphs ] )
403- console . log ( `[VFBchat] Final result: ${ graphs . length } graph(s) (${ graphSpecs . length } from tools, ${ inlineGraphs . length } inline)` )
545+ const graphs = dedupeGraphSpecs ( [ ...( Array . isArray ( graphSpecs ) ? graphSpecs : [ ] ) , ...leakedToolCallGraphs , ... inlineGraphs ] )
546+ console . log ( `[VFBchat] Final result: ${ graphs . length } graph(s) (${ graphSpecs . length } from tools, ${ leakedToolCallGraphs . length } from leaked tool calls, ${ inlineGraphs . length } inline)` )
404547
405548 return {
406549 ok : true ,
@@ -745,7 +888,7 @@ function getToolConfig() {
745888 tools . push ( {
746889 type : 'function' ,
747890 name : 'create_basic_graph' ,
748- description : 'Create a lightweight graph specification for UI rendering. Use this to visualise connectivity as nodes and edges. IMPORTANT: Always set the "group" field on every node to a shared biological category (e.g. neurotransmitter type like "cholinergic", "GABAergic", "glutamatergic"; or system/region like "visual system", "central complex"; or cell class like "sensory neuron", "interneuron") so that nodes are colour-coded meaningfully. Choose the most informative grouping for the specific query context .' ,
891+ description : 'Create a lightweight graph specification for UI rendering. Use this to visualise connectivity as nodes and edges. IMPORTANT: Always set the "group" field on every node to a shared biological category (e.g. neurotransmitter type like "cholinergic", "GABAergic", "glutamatergic"; or system/region like "visual system", "central complex"; or cell class like "sensory neuron", "interneuron") so that nodes are colour-coded meaningfully. For directional connectivity graphs, prefer 2-3 reused groups aligned to the query sides (source-side, target-side, optional intermediate) rather than giving each node or subtype its own one-off group .' ,
749892 parameters : {
750893 type : 'object' ,
751894 properties : {
@@ -759,7 +902,7 @@ function getToolConfig() {
759902 properties : {
760903 id : { type : 'string' , description : 'Unique node identifier' } ,
761904 label : { type : 'string' , description : 'Display label for the node' } ,
762- group : { type : 'string' , description : 'REQUIRED: Shared biological category for colour-coding. Use neurotransmitter type (cholinergic, GABAergic, glutamatergic), system/region (visual system, central complex), cell class (sensory neuron, interneuron, projection neuron), or other contextually meaningful grouping.' } ,
905+ group : { type : 'string' , description : 'REQUIRED: Shared biological category for colour-coding. Use neurotransmitter type (cholinergic, GABAergic, glutamatergic), system/region (visual system, central complex), cell class (sensory neuron, interneuron, projection neuron), or other contextually meaningful grouping. For directional connectivity graphs, reuse coarse groups across many nodes, usually source-side, target-side, and optional intermediate. ' } ,
763906 size : { type : 'number' , description : 'Optional relative node size (1-3 recommended)' }
764907 } ,
765908 required : [ 'id' , 'group' ]
@@ -2572,6 +2715,8 @@ GRAPH VISUALS:
25722715 * Neurotransmitter type (cholinergic, GABAergic, glutamatergic, etc.) when NT data is available
25732716 * Brain region/system (visual system, central complex, mushroom body, etc.) when comparing across regions
25742717 * Cell class (sensory neuron, interneuron, projection neuron, motor neuron, etc.) as a general fallback
2718+ * For directional connectivity graphs, keep groups coarse and reusable: usually source-side, target-side, and optional intermediate
2719+ * Do NOT create a separate group for every named neuron class or subtype if that would produce one-off colours
25752720 * The LLM should use its knowledge of Drosophila neurobiology to assign the most useful grouping
25762721
25772722TOOL RELAY:
@@ -2800,23 +2945,20 @@ function normalizeRelayedToolCall(toolCall) {
28002945}
28012946
28022947function parseRelayedToolCalls ( responseText = '' ) {
2948+ const structuredSegments = extractTopLevelJsonSegmentsFromText ( responseText )
2949+ for ( const segment of structuredSegments ) {
2950+ const normalizedCalls = extractRelayedToolCallsFromParsedJson ( segment . value )
2951+ if ( normalizedCalls . length > 0 ) {
2952+ return normalizedCalls
2953+ }
2954+ }
2955+
28032956 const candidates = extractJsonCandidates ( responseText )
28042957
28052958 for ( const candidate of candidates ) {
28062959 try {
28072960 const parsed = JSON . parse ( candidate )
2808- const rawCalls = Array . isArray ( parsed )
2809- ? parsed
2810- : Array . isArray ( parsed ?. tool_calls )
2811- ? parsed . tool_calls
2812- : parsed ?. tool_call
2813- ? [ parsed . tool_call ]
2814- : [ ]
2815-
2816- const normalizedCalls = rawCalls
2817- . map ( normalizeRelayedToolCall )
2818- . filter ( Boolean )
2819-
2961+ const normalizedCalls = extractRelayedToolCallsFromParsedJson ( parsed )
28202962 if ( normalizedCalls . length > 0 ) {
28212963 return normalizedCalls
28222964 }
@@ -3013,6 +3155,7 @@ function buildToolPolicyCorrectionMessage({
30133155 '- For VFB query-type questions, prefer vfb_get_term_info + vfb_run_query as the first pass because vfb_run_query is typically cached and fast.' ,
30143156 '- Use more specialized tools (for example vfb_query_connectivity, vfb_resolve_entity, vfb_find_stocks, vfb_resolve_combination, vfb_find_combo_publications) when deeper refinement is needed.' ,
30153157 '- When connectivity data is returned, ALWAYS call create_basic_graph to visualise the connections as a node/edge graph with meaningful group labels for colour-coding.' ,
3158+ '- For directional connectivity graphs, keep graph groups coarse and reusable (usually source-side, target-side, and optional intermediate), not one unique group per node.' ,
30163159 '- Prefer direct data tools over documentation search when the question asks for concrete VFB data.' ,
30173160 '- If existing tool outputs already answer the question, provide the final answer instead of requesting more tools.'
30183161 ]
0 commit comments