Skip to content

Commit db37bd8

Browse files
committed
feat: implement connectivity endpoint normalization and scoring functions for improved search results
1 parent 9717820 commit db37bd8

1 file changed

Lines changed: 222 additions & 5 deletions

File tree

app/api/chat/route.js

Lines changed: 222 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1594,6 +1594,115 @@ function extractRowsFromRunQueryPayload(rawPayload) {
15941594
return rows
15951595
}
15961596

1597+
function normalizeEndpointSearchText(value = '') {
1598+
return String(value || '')
1599+
.toLowerCase()
1600+
.replace(/[_-]+/g, ' ')
1601+
.replace(/[^a-z0-9\s]/g, ' ')
1602+
.replace(/\s+/g, ' ')
1603+
.trim()
1604+
}
1605+
1606+
function singularizeEndpointSearchText(value = '') {
1607+
const text = normalizeEndpointSearchText(value)
1608+
if (!text) return text
1609+
if (text.endsWith('ies')) return `${text.slice(0, -3)}y`
1610+
if (text.endsWith(' neurons')) return text.slice(0, -1)
1611+
if (text.endsWith(' classes')) return text.slice(0, -2)
1612+
if (text.endsWith('s') && !text.endsWith('ss')) return text.slice(0, -1)
1613+
return text
1614+
}
1615+
1616+
function extractDocsFromSearchTermsPayload(rawPayload) {
1617+
const parsed = parseJsonPayload(rawPayload)
1618+
if (!parsed || typeof parsed !== 'object') return []
1619+
1620+
if (Array.isArray(parsed?.response?.docs)) {
1621+
return parsed.response.docs
1622+
}
1623+
1624+
if (Array.isArray(parsed.docs)) {
1625+
return parsed.docs
1626+
}
1627+
1628+
const docs = []
1629+
for (const value of Object.values(parsed)) {
1630+
if (!value || typeof value !== 'object') continue
1631+
if (Array.isArray(value?.response?.docs)) {
1632+
docs.push(...value.response.docs)
1633+
} else if (Array.isArray(value.docs)) {
1634+
docs.push(...value.docs)
1635+
}
1636+
}
1637+
1638+
return docs
1639+
}
1640+
1641+
function scoreSearchDocForConnectivityEndpoint(doc = {}, queryText = '') {
1642+
const shortForm = String(doc.short_form || doc.shortForm || '').trim()
1643+
const labelNorm = normalizeEndpointSearchText(doc.label || '')
1644+
const queryNorm = normalizeEndpointSearchText(queryText)
1645+
const querySingular = singularizeEndpointSearchText(queryNorm)
1646+
const labelSingular = singularizeEndpointSearchText(labelNorm)
1647+
const synonyms = Array.isArray(doc.synonym)
1648+
? doc.synonym.map(entry => normalizeEndpointSearchText(entry)).filter(Boolean)
1649+
: []
1650+
const facets = Array.isArray(doc.facets_annotation)
1651+
? doc.facets_annotation.map(entry => String(entry || '').toLowerCase())
1652+
: []
1653+
1654+
if (!shortForm || !queryNorm) return Number.NEGATIVE_INFINITY
1655+
1656+
let score = 0
1657+
if (/^FBbt_\d{8}$/i.test(shortForm)) score += 30
1658+
1659+
if (labelNorm === queryNorm || labelNorm === querySingular || labelSingular === queryNorm) {
1660+
score += 220
1661+
} else if (synonyms.includes(queryNorm) || synonyms.includes(querySingular)) {
1662+
score += 180
1663+
}
1664+
1665+
if (labelNorm && (labelNorm.includes(queryNorm) || queryNorm.includes(labelNorm))) {
1666+
score += 70
1667+
}
1668+
1669+
if (synonyms.some(syn => syn && (syn.includes(queryNorm) || queryNorm.includes(syn)))) {
1670+
score += 55
1671+
}
1672+
1673+
const queryTokens = queryNorm.split(' ').filter(Boolean)
1674+
const labelTokens = new Set(labelNorm.split(' ').filter(Boolean))
1675+
const overlap = queryTokens.filter(token => labelTokens.has(token)).length
1676+
score += overlap * 5
1677+
if (queryTokens.length > 0 && overlap === queryTokens.length) {
1678+
score += 35
1679+
}
1680+
1681+
if (facets.includes('neuron')) score += 10
1682+
if (facets.includes('class')) score += 5
1683+
1684+
return score
1685+
}
1686+
1687+
function pickBestConnectivityEndpointDoc(docs = [], queryText = '') {
1688+
if (!Array.isArray(docs) || docs.length === 0) return null
1689+
1690+
const fbbtDocs = docs.filter(doc => /^FBbt_\d{8}$/i.test(String(doc?.short_form || doc?.shortForm || '').trim()))
1691+
const candidateDocs = fbbtDocs.length > 0 ? fbbtDocs : docs
1692+
1693+
let bestDoc = null
1694+
let bestScore = Number.NEGATIVE_INFINITY
1695+
for (const doc of candidateDocs) {
1696+
const score = scoreSearchDocForConnectivityEndpoint(doc, queryText)
1697+
if (score > bestScore) {
1698+
bestScore = score
1699+
bestDoc = doc
1700+
}
1701+
}
1702+
1703+
return bestDoc
1704+
}
1705+
15971706
function extractNeuronClassCandidatesFromRows(rows = [], limit = 10) {
15981707
if (!Array.isArray(rows) || rows.length === 0) return []
15991708

@@ -1676,15 +1785,43 @@ async function callVfbToolTextWithFallback(client, toolName, toolArguments = {})
16761785
}
16771786
}
16781787

1788+
async function resolveConnectivityEndpointValue(client, rawValue = '') {
1789+
const normalizedValue = normalizeConnectivityEndpointValue(rawValue)
1790+
const canonicalId = extractCanonicalVfbTermId(normalizedValue)
1791+
if (canonicalId && VFB_NEURON_CLASS_ID_REGEX.test(canonicalId)) return canonicalId
1792+
if (canonicalId) return normalizedValue
1793+
1794+
const queryText = stripMarkdownLinkText(normalizedValue).trim()
1795+
if (!queryText) return normalizedValue
1796+
1797+
try {
1798+
const searchText = await callVfbToolTextWithFallback(client, 'search_terms', {
1799+
query: queryText,
1800+
rows: 25,
1801+
minimize_results: true
1802+
})
1803+
const docs = extractDocsFromSearchTermsPayload(searchText)
1804+
const bestDoc = pickBestConnectivityEndpointDoc(docs, queryText)
1805+
if (!bestDoc) return normalizedValue
1806+
1807+
const bestId = extractCanonicalVfbTermId(bestDoc.short_form || bestDoc.shortForm || bestDoc.id || '')
1808+
return bestId || normalizedValue
1809+
} catch {
1810+
return normalizedValue
1811+
}
1812+
}
1813+
16791814
async function assessConnectivityEndpointForNeuronClass({ client, side, rawValue }) {
16801815
const normalizedValue = normalizeConnectivityEndpointValue(rawValue)
1681-
const termId = extractCanonicalVfbTermId(normalizedValue)
1816+
const resolvedValue = await resolveConnectivityEndpointValue(client, normalizedValue)
1817+
const termId = extractCanonicalVfbTermId(resolvedValue)
16821818

16831819
if (!termId || !VFB_NEURON_CLASS_ID_REGEX.test(termId)) {
16841820
return {
16851821
side,
16861822
raw_input: String(rawValue || ''),
16871823
normalized_input: normalizedValue,
1824+
resolved_input: resolvedValue,
16881825
requires_selection: false
16891826
}
16901827
}
@@ -1697,6 +1834,7 @@ async function assessConnectivityEndpointForNeuronClass({ client, side, rawValue
16971834
side,
16981835
raw_input: String(rawValue || ''),
16991836
normalized_input: normalizedValue,
1837+
resolved_input: termId,
17001838
term_id: termId,
17011839
requires_selection: false
17021840
}
@@ -1709,6 +1847,7 @@ async function assessConnectivityEndpointForNeuronClass({ client, side, rawValue
17091847
side,
17101848
raw_input: String(rawValue || ''),
17111849
normalized_input: normalizedValue,
1850+
resolved_input: termId,
17121851
term_id: termId,
17131852
term_name: termName,
17141853
requires_selection: false
@@ -1748,6 +1887,7 @@ async function assessConnectivityEndpointForNeuronClass({ client, side, rawValue
17481887
side,
17491888
raw_input: String(rawValue || ''),
17501889
normalized_input: normalizedValue,
1890+
resolved_input: termId,
17511891
term_id: termId,
17521892
term_name: termName,
17531893
super_types: Array.isArray(termRecord.SuperTypes) ? termRecord.SuperTypes : [],
@@ -1850,6 +1990,15 @@ async function executeFunctionTool(name, args) {
18501990
})
18511991
])
18521992

1993+
const upstreamCheck = endpointChecks.find(check => check.side === 'upstream')
1994+
const downstreamCheck = endpointChecks.find(check => check.side === 'downstream')
1995+
if (upstreamCheck?.resolved_input) {
1996+
cleanArgs.upstream_type = upstreamCheck.resolved_input
1997+
}
1998+
if (downstreamCheck?.resolved_input) {
1999+
cleanArgs.downstream_type = downstreamCheck.resolved_input
2000+
}
2001+
18532002
const selectionsNeeded = endpointChecks.filter(check => check.requires_selection)
18542003
if (selectionsNeeded.length > 0) {
18552004
return JSON.stringify({
@@ -2223,10 +2372,10 @@ function buildVfbQueryLinkSkill() {
22232372
- In term-info JSON, read short names from Queries[].query and user-facing descriptions from Queries[].label.
22242373
- Treat Queries[] from vfb_get_term_info as authoritative for the current term; use the static list below as a fallback reference.
22252374
- When you answer with query findings, include matching query-result links when useful.
2226-
- Examples:
2227-
- ${VFB_QUERY_LINK_BASE}FBbt_00100482,ListAllAvailableImages
2228-
- ${VFB_QUERY_LINK_BASE}FBbt_00100482,SubclassesOf
2229-
- ${VFB_QUERY_LINK_BASE}FBbt_00100482,ref_upstream_class_connectivity_query
2375+
- Example templates:
2376+
- ${VFB_QUERY_LINK_BASE}<TERM_ID>,ListAllAvailableImages
2377+
- ${VFB_QUERY_LINK_BASE}<TERM_ID>,SubclassesOf
2378+
- ${VFB_QUERY_LINK_BASE}<TERM_ID>,ref_upstream_class_connectivity_query
22302379
- Query short names and descriptions (from geppetto-vfb/model):
22312380
${queryLines}`
22322381
}
@@ -2285,6 +2434,9 @@ TOOL SELECTION:
22852434
- Questions about split-GAL4 combination names/synonyms (for example MB002B, SS04495): use vfb_resolve_combination first, then vfb_find_combo_publications (and optionally vfb_find_stocks if the user asks for lines)
22862435
- Questions about comparative connectivity between neuron classes across datasets: use vfb_query_connectivity (optionally vfb_list_connectome_datasets first to pick valid dataset symbols)
22872436
- vfb_query_connectivity requires neuron class inputs. If the user provides anatomy regions (for example medulla or central complex), use NeuronsPartHere first for each region, then ask the user to pick one neuron class per side before running vfb_query_connectivity.
2437+
- For directional requests like "connections from X to Y" or "between X and Y", treat X as upstream (presynaptic) and Y as downstream (postsynaptic), and prefer vfb_query_connectivity over a single-term run_query.
2438+
- Do not infer identity from examples in this prompt. Only map IDs to labels (or labels to IDs) using tool outputs from this turn.
2439+
- Never claim "TERM_A (ID) is TERM_B" unless vfb_get_term_info confirms that exact mapping.
22882440
- Questions about published papers or recent literature (only when explicitly asked): use PubMed first, optionally bioRxiv/medRxiv for preprints
22892441
- Questions about VFB, NeuroFly, VFB Connect Python documentation, or approved FlyBase documentation pages, news posts, workshops, conference pages, or event dates: use search_reviewed_docs, then use get_reviewed_page when you need page details
22902442
- For questions about how to run VFB queries in Python or how to use vfb-connect, prioritize search_reviewed_docs/get_reviewed_page on vfb-connect.readthedocs.io alongside VFB tool outputs when useful.
@@ -2700,10 +2852,16 @@ function hasConnectivityIntent(message = '') {
27002852
return /\b(connectome|connectivity|connection|connections|synapse|synaptic|presynaptic|postsynaptic|input|inputs|output|outputs|nblast)\b/i.test(message)
27012853
}
27022854

2855+
function hasDirectionalConnectivityRequest(message = '') {
2856+
if (!hasConnectivityIntent(message)) return false
2857+
return /\bfrom\b[\s\S]{1,160}\bto\b/i.test(message) || /\bbetween\b[\s\S]{1,160}\band\b/i.test(message)
2858+
}
2859+
27032860
function buildToolPolicyCorrectionMessage({
27042861
userMessage = '',
27052862
explicitRunQueryRequested = false,
27062863
connectivityIntent = false,
2864+
requireConnectivityComparison = false,
27072865
missingRunQueryExecution = false,
27082866
requestedQueryTypes = [],
27092867
hasCanonicalIdInUserMessage = false
@@ -2734,6 +2892,11 @@ function buildToolPolicyCorrectionMessage({
27342892
policyBullets.push('- This is a connectivity-style request; favor VFB connectivity/query tools over docs-only search.')
27352893
}
27362894

2895+
if (requireConnectivityComparison) {
2896+
policyBullets.push('- This request is directional connectivity between two entities; call vfb_query_connectivity with upstream_type = source term and downstream_type = target term.')
2897+
policyBullets.push('- Do not conclude \"no connection\" from only NeuronsPresynapticHere/NeuronsPostsynapticHere on a single term. Use vfb_query_connectivity output as the primary evidence.')
2898+
}
2899+
27372900
if (missingRunQueryExecution) {
27382901
policyBullets.push('- You have not executed vfb_run_query yet in this turn; correct that now if feasible.')
27392902
}
@@ -3131,6 +3294,7 @@ async function processResponseStream({
31313294
const explicitRunQueryRequested = hasExplicitVfbRunQueryRequest(userMessage) || requestedQueryTypes.length > 0
31323295
const hasCanonicalIdInUserMessage = hasCanonicalVfbOrFlybaseId(userMessage)
31333296
const connectivityIntent = hasConnectivityIntent(userMessage)
3297+
const directionalConnectivityRequested = hasDirectionalConnectivityRequest(userMessage)
31343298
const collectedGraphSpecs = []
31353299
let currentResponse = apiResponse
31363300
let latestResponseId = null
@@ -3206,6 +3370,7 @@ async function processResponseStream({
32063370
const shouldCorrectToolChoice = toolPolicyCorrections < maxToolPolicyCorrections && (
32073371
(explicitRunQueryRequested && !hasVfbToolCall) ||
32083372
(explicitRunQueryRequested && !hasVfbRunQueryToolCall && !hasRunQueryPreparationCall) ||
3373+
(directionalConnectivityRequested && !hasConnectivityComparisonCall) ||
32093374
(requestedQueryTypes.length > 0 && hasConnectivityComparisonCall && !hasVfbRunQueryToolCall)
32103375
)
32113376

@@ -3222,6 +3387,7 @@ async function processResponseStream({
32223387
userMessage,
32233388
explicitRunQueryRequested,
32243389
connectivityIntent,
3390+
requireConnectivityComparison: directionalConnectivityRequested,
32253391
requestedQueryTypes,
32263392
hasCanonicalIdInUserMessage
32273393
})
@@ -3355,6 +3521,7 @@ async function processResponseStream({
33553521
userMessage,
33563522
explicitRunQueryRequested,
33573523
connectivityIntent,
3524+
requireConnectivityComparison: directionalConnectivityRequested,
33583525
missingRunQueryExecution: true,
33593526
requestedQueryTypes,
33603527
hasCanonicalIdInUserMessage
@@ -3392,6 +3559,56 @@ async function processResponseStream({
33923559
continue
33933560
}
33943561

3562+
if (directionalConnectivityRequested && (toolUsage.vfb_query_connectivity || 0) === 0 && toolPolicyCorrections < maxToolPolicyCorrections) {
3563+
sendEvent('status', { message: 'Honoring directional connectivity workflow', phase: 'llm' })
3564+
3565+
if (textAccumulator.trim()) {
3566+
accumulatedItems.push({ role: 'assistant', content: textAccumulator.trim() })
3567+
}
3568+
3569+
accumulatedItems.push({
3570+
role: 'user',
3571+
content: buildToolPolicyCorrectionMessage({
3572+
userMessage,
3573+
explicitRunQueryRequested,
3574+
connectivityIntent,
3575+
requireConnectivityComparison: true,
3576+
requestedQueryTypes,
3577+
hasCanonicalIdInUserMessage
3578+
})
3579+
})
3580+
3581+
const correctionResponse = await fetch(`${apiBaseUrl}${CHAT_COMPLETIONS_ENDPOINT}`, {
3582+
method: 'POST',
3583+
headers: {
3584+
'Content-Type': 'application/json',
3585+
...(apiKey ? { 'Authorization': `Bearer ${apiKey}` } : {})
3586+
},
3587+
body: JSON.stringify(createChatCompletionsRequestBody({
3588+
apiModel,
3589+
conversationInput: [...conversationInput, ...accumulatedItems],
3590+
allowToolRelay: true
3591+
}))
3592+
})
3593+
3594+
if (!correctionResponse.ok) {
3595+
const correctionErrorText = await correctionResponse.text()
3596+
return {
3597+
ok: false,
3598+
responseId: latestResponseId,
3599+
toolUsage,
3600+
toolRounds,
3601+
errorMessage: `Failed to honor directional connectivity flow. ${sanitizeApiError(correctionResponse.status, correctionErrorText)}`,
3602+
errorCategory: 'directional_connectivity_enforcement_failed',
3603+
errorStatus: correctionResponse.status
3604+
}
3605+
}
3606+
3607+
toolPolicyCorrections += 1
3608+
currentResponse = correctionResponse
3609+
continue
3610+
}
3611+
33953612
if (!textAccumulator.trim()) {
33963613
const clarification = await requestClarifyingFollowUp({
33973614
sendEvent,

0 commit comments

Comments
 (0)