Skip to content

Commit 6a9b878

Browse files
committed
improvement(connectors): audit and harden all 30 knowledge base connectors
1 parent 8906439 commit 6a9b878

File tree

25 files changed

+697
-188
lines changed

25 files changed

+697
-188
lines changed

.claude/commands/validate-connector.md

Lines changed: 316 additions & 0 deletions
Large diffs are not rendered by default.

apps/sim/connectors/confluence/confluence.ts

Lines changed: 49 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { createLogger } from '@sim/logger'
22
import { ConfluenceIcon } from '@/components/icons'
3-
import { fetchWithRetry } from '@/lib/knowledge/documents/utils'
3+
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
44
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
55
import { computeContentHash, htmlToPlainText, joinTagArray, parseTagDate } from '@/connectors/utils'
66
import { getConfluenceCloudId } from '@/tools/confluence/utils'
@@ -243,43 +243,54 @@ export const confluenceConnector: ConnectorConfig = {
243243
const domain = sourceConfig.domain as string
244244
const cloudId = await getConfluenceCloudId(domain, accessToken)
245245

246-
const url = `https://api.atlassian.com/ex/confluence/${cloudId}/wiki/api/v2/pages/${externalId}?body-format=storage`
247-
248-
const response = await fetchWithRetry(url, {
249-
method: 'GET',
250-
headers: {
251-
Accept: 'application/json',
252-
Authorization: `Bearer ${accessToken}`,
253-
},
254-
})
246+
// Try pages first, fall back to blogposts if not found
247+
let page: Record<string, unknown> | null = null
248+
for (const endpoint of ['pages', 'blogposts']) {
249+
const url = `https://api.atlassian.com/ex/confluence/${cloudId}/wiki/api/v2/${endpoint}/${externalId}?body-format=storage`
250+
const response = await fetchWithRetry(url, {
251+
method: 'GET',
252+
headers: {
253+
Accept: 'application/json',
254+
Authorization: `Bearer ${accessToken}`,
255+
},
256+
})
255257

256-
if (!response.ok) {
257-
if (response.status === 404) return null
258-
throw new Error(`Failed to get Confluence page: ${response.status}`)
258+
if (response.ok) {
259+
page = await response.json()
260+
break
261+
}
262+
if (response.status !== 404) {
263+
throw new Error(`Failed to get Confluence content: ${response.status}`)
264+
}
259265
}
260266

261-
const page = await response.json()
262-
const rawContent = page.body?.storage?.value || ''
267+
if (!page) return null
268+
const body = page.body as Record<string, unknown> | undefined
269+
const storage = body?.storage as Record<string, unknown> | undefined
270+
const rawContent = (storage?.value as string) || ''
263271
const plainText = htmlToPlainText(rawContent)
264272
const contentHash = await computeContentHash(plainText)
265273

266274
// Fetch labels for this page
267275
const labelMap = await fetchLabelsForPages(cloudId, accessToken, [String(page.id)])
268276
const labels = labelMap.get(String(page.id)) ?? []
269277

278+
const links = page._links as Record<string, unknown> | undefined
279+
const version = page.version as Record<string, unknown> | undefined
280+
270281
return {
271282
externalId: String(page.id),
272-
title: page.title || 'Untitled',
283+
title: (page.title as string) || 'Untitled',
273284
content: plainText,
274285
mimeType: 'text/plain',
275-
sourceUrl: page._links?.webui ? `https://${domain}/wiki${page._links.webui}` : undefined,
286+
sourceUrl: links?.webui ? `https://${domain}/wiki${links.webui}` : undefined,
276287
contentHash,
277288
metadata: {
278289
spaceId: page.spaceId,
279290
status: page.status,
280-
version: page.version?.number,
291+
version: version?.number,
281292
labels,
282-
lastModified: page.version?.createdAt,
293+
lastModified: version?.createdAt,
283294
},
284295
}
285296
},
@@ -302,7 +313,25 @@ export const confluenceConnector: ConnectorConfig = {
302313

303314
try {
304315
const cloudId = await getConfluenceCloudId(domain, accessToken)
305-
await resolveSpaceId(cloudId, accessToken, spaceKey)
316+
const spaceUrl = `https://api.atlassian.com/ex/confluence/${cloudId}/wiki/api/v2/spaces?keys=${encodeURIComponent(spaceKey)}&limit=1`
317+
const response = await fetchWithRetry(
318+
spaceUrl,
319+
{
320+
method: 'GET',
321+
headers: {
322+
Accept: 'application/json',
323+
Authorization: `Bearer ${accessToken}`,
324+
},
325+
},
326+
VALIDATE_RETRY_OPTIONS
327+
)
328+
if (!response.ok) {
329+
return { valid: false, error: `Failed to validate space: ${response.status}` }
330+
}
331+
const data = await response.json()
332+
if (!data.results?.length) {
333+
return { valid: false, error: `Space "${spaceKey}" not found` }
334+
}
306335
return { valid: true }
307336
} catch (error) {
308337
const message = error instanceof Error ? error.message : 'Failed to validate configuration'

apps/sim/connectors/github/github.ts

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { createLogger } from '@sim/logger'
22
import { GithubIcon } from '@/components/icons'
33
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
44
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
5-
import { computeContentHash } from '@/connectors/utils'
5+
import { computeContentHash, parseTagDate } from '@/connectors/utils'
66

77
const logger = createLogger('GitHubConnector')
88

@@ -82,7 +82,7 @@ async function fetchTree(
8282
const data = await response.json()
8383

8484
if (data.truncated) {
85-
logger.error('GitHub tree was truncated — some files may be missing', { owner, repo, branch })
85+
logger.warn('GitHub tree was truncated — some files may be missing', { owner, repo, branch })
8686
}
8787

8888
return (data.tree || []).filter((item: TreeItem) => item.type === 'blob')
@@ -139,7 +139,7 @@ async function treeItemToDocument(
139139
title: item.path.split('/').pop() || item.path,
140140
content,
141141
mimeType: 'text/plain',
142-
sourceUrl: `https://github.com/${owner}/${repo}/blob/${branch}/${item.path}`,
142+
sourceUrl: `https://github.com/${owner}/${repo}/blob/${encodeURIComponent(branch)}/${item.path.split('/').map(encodeURIComponent).join('/')}`,
143143
contentHash,
144144
metadata: {
145145
path: item.path,
@@ -302,6 +302,7 @@ export const githubConnector: ConnectorConfig = {
302302
throw new Error(`Failed to fetch file ${path}: ${response.status}`)
303303
}
304304

305+
const lastModifiedHeader = response.headers.get('last-modified') || undefined
305306
const data = await response.json()
306307
const content =
307308
data.encoding === 'base64'
@@ -314,14 +315,15 @@ export const githubConnector: ConnectorConfig = {
314315
title: path.split('/').pop() || path,
315316
content,
316317
mimeType: 'text/plain',
317-
sourceUrl: `https://github.com/${owner}/${repo}/blob/${branch}/${path}`,
318+
sourceUrl: `https://github.com/${owner}/${repo}/blob/${encodeURIComponent(branch)}/${path.split('/').map(encodeURIComponent).join('/')}`,
318319
contentHash,
319320
metadata: {
320321
path,
321322
sha: data.sha as string,
322323
size: data.size as number,
323324
branch,
324325
repository: `${owner}/${repo}`,
326+
lastModified: lastModifiedHeader,
325327
},
326328
}
327329
} catch (error) {
@@ -400,6 +402,7 @@ export const githubConnector: ConnectorConfig = {
400402
{ id: 'repository', displayName: 'Repository', fieldType: 'text' },
401403
{ id: 'branch', displayName: 'Branch', fieldType: 'text' },
402404
{ id: 'size', displayName: 'File Size', fieldType: 'number' },
405+
{ id: 'lastModified', displayName: 'Last Modified', fieldType: 'date' },
403406
],
404407

405408
mapTags: (metadata: Record<string, unknown>): Record<string, unknown> => {
@@ -414,6 +417,9 @@ export const githubConnector: ConnectorConfig = {
414417
if (!Number.isNaN(num)) result.size = num
415418
}
416419

420+
const lastModified = parseTagDate(metadata.lastModified)
421+
if (lastModified) result.lastModified = lastModified
422+
417423
return result
418424
},
419425
}

apps/sim/connectors/google-calendar/google-calendar.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,8 @@ export const googleCalendarConnector: ConnectorConfig = {
439439
{ id: 'attendeeCount', displayName: 'Attendee Count', fieldType: 'number' },
440440
{ id: 'location', displayName: 'Location', fieldType: 'text' },
441441
{ id: 'eventDate', displayName: 'Event Date', fieldType: 'date' },
442+
{ id: 'lastModified', displayName: 'Last Modified', fieldType: 'date' },
443+
{ id: 'createdAt', displayName: 'Created', fieldType: 'date' },
442444
],
443445

444446
mapTags: (metadata: Record<string, unknown>): Record<string, unknown> => {
@@ -459,6 +461,12 @@ export const googleCalendarConnector: ConnectorConfig = {
459461
const eventDate = parseTagDate(metadata.eventDate)
460462
if (eventDate) result.eventDate = eventDate
461463

464+
const lastModified = parseTagDate(metadata.updatedTime)
465+
if (lastModified) result.lastModified = lastModified
466+
467+
const createdAt = parseTagDate(metadata.createdTime)
468+
if (createdAt) result.createdAt = createdAt
469+
462470
return result
463471
},
464472
}

apps/sim/connectors/google-docs/google-docs.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ function buildQuery(sourceConfig: Record<string, unknown>): string {
162162

163163
const folderId = sourceConfig.folderId as string | undefined
164164
if (folderId?.trim()) {
165-
parts.push(`'${folderId.trim()}' in parents`)
165+
parts.push(`'${folderId.trim().replace(/'/g, "\\'")}' in parents`)
166166
}
167167

168168
return parts.join(' and ')

apps/sim/connectors/google-drive/google-drive.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ function buildQuery(sourceConfig: Record<string, unknown>): string {
112112

113113
const folderId = sourceConfig.folderId as string | undefined
114114
if (folderId?.trim()) {
115-
parts.push(`'${folderId.trim()}' in parents`)
115+
parts.push(`'${folderId.trim().replace(/'/g, "\\'")}' in parents`)
116116
}
117117

118118
const fileType = (sourceConfig.fileType as string) || 'all'

apps/sim/connectors/google-sheets/google-sheets.ts

Lines changed: 72 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@ import { createLogger } from '@sim/logger'
22
import { GoogleSheetsIcon } from '@/components/icons'
33
import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils'
44
import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types'
5-
import { computeContentHash } from '@/connectors/utils'
5+
import { computeContentHash, parseTagDate } from '@/connectors/utils'
66

77
const logger = createLogger('GoogleSheetsConnector')
88

99
const SHEETS_API_BASE = 'https://sheets.googleapis.com/v4/spreadsheets'
10+
const DRIVE_API_BASE = 'https://www.googleapis.com/drive/v3/files'
1011
const MAX_ROWS = 10000
1112
const CONCURRENCY = 3
1213

@@ -102,14 +103,47 @@ async function fetchSpreadsheetMetadata(
102103
return (await response.json()) as SpreadsheetMetadata
103104
}
104105

106+
/**
107+
* Fetches the spreadsheet's modifiedTime from the Drive API.
108+
*/
109+
async function fetchSpreadsheetModifiedTime(
110+
accessToken: string,
111+
spreadsheetId: string
112+
): Promise<string | undefined> {
113+
try {
114+
const url = `${DRIVE_API_BASE}/${encodeURIComponent(spreadsheetId)}?fields=modifiedTime&supportsAllDrives=true`
115+
const response = await fetchWithRetry(url, {
116+
method: 'GET',
117+
headers: {
118+
Authorization: `Bearer ${accessToken}`,
119+
Accept: 'application/json',
120+
},
121+
})
122+
123+
if (!response.ok) {
124+
logger.warn('Failed to fetch modifiedTime from Drive API', { status: response.status })
125+
return undefined
126+
}
127+
128+
const data = (await response.json()) as { modifiedTime?: string }
129+
return data.modifiedTime
130+
} catch (error) {
131+
logger.warn('Error fetching modifiedTime from Drive API', {
132+
error: error instanceof Error ? error.message : String(error),
133+
})
134+
return undefined
135+
}
136+
}
137+
105138
/**
106139
* Converts a single sheet tab into an ExternalDocument.
107140
*/
108141
async function sheetToDocument(
109142
accessToken: string,
110143
spreadsheetId: string,
111144
spreadsheetTitle: string,
112-
sheet: SheetProperties
145+
sheet: SheetProperties,
146+
modifiedTime?: string
113147
): Promise<ExternalDocument | null> {
114148
try {
115149
const values = await fetchSheetValues(accessToken, spreadsheetId, sheet.title)
@@ -151,6 +185,7 @@ async function sheetToDocument(
151185
sheetId: sheet.sheetId,
152186
rowCount,
153187
columnCount: headers.length,
188+
...(modifiedTime ? { modifiedTime } : {}),
154189
},
155190
}
156191
} catch (error) {
@@ -208,7 +243,10 @@ export const googleSheetsConnector: ConnectorConfig = {
208243

209244
logger.info('Fetching spreadsheet metadata', { spreadsheetId })
210245

211-
const metadata = await fetchSpreadsheetMetadata(accessToken, spreadsheetId)
246+
const [metadata, modifiedTime] = await Promise.all([
247+
fetchSpreadsheetMetadata(accessToken, spreadsheetId),
248+
fetchSpreadsheetModifiedTime(accessToken, spreadsheetId),
249+
])
212250
const sheetFilter = (sourceConfig.sheetFilter as string) || 'all'
213251

214252
let sheets = metadata.sheets.map((s) => s.properties)
@@ -226,7 +264,13 @@ export const googleSheetsConnector: ConnectorConfig = {
226264
const batch = sheets.slice(i, i + CONCURRENCY)
227265
const results = await Promise.all(
228266
batch.map((sheet) =>
229-
sheetToDocument(accessToken, spreadsheetId, metadata.properties.title, sheet)
267+
sheetToDocument(
268+
accessToken,
269+
spreadsheetId,
270+
metadata.properties.title,
271+
sheet,
272+
modifiedTime
273+
)
230274
)
231275
)
232276
documents.push(...(results.filter(Boolean) as ExternalDocument[]))
@@ -257,7 +301,22 @@ export const googleSheetsConnector: ConnectorConfig = {
257301
return null
258302
}
259303

260-
const metadata = await fetchSpreadsheetMetadata(accessToken, spreadsheetId)
304+
let metadata: SpreadsheetMetadata
305+
let modifiedTime: string | undefined
306+
try {
307+
;[metadata, modifiedTime] = await Promise.all([
308+
fetchSpreadsheetMetadata(accessToken, spreadsheetId),
309+
fetchSpreadsheetModifiedTime(accessToken, spreadsheetId),
310+
])
311+
} catch (error) {
312+
const message = error instanceof Error ? error.message : String(error)
313+
if (message.includes('404')) {
314+
logger.info('Spreadsheet not found (possibly deleted)', { spreadsheetId })
315+
return null
316+
}
317+
throw error
318+
}
319+
261320
const sheetEntry = metadata.sheets.find((s) => s.properties.sheetId === sheetId)
262321

263322
if (!sheetEntry) {
@@ -269,7 +328,8 @@ export const googleSheetsConnector: ConnectorConfig = {
269328
accessToken,
270329
spreadsheetId,
271330
metadata.properties.title,
272-
sheetEntry.properties
331+
sheetEntry.properties,
332+
modifiedTime
273333
)
274334
},
275335

@@ -325,6 +385,7 @@ export const googleSheetsConnector: ConnectorConfig = {
325385
{ id: 'sheetTitle', displayName: 'Sheet Name', fieldType: 'text' },
326386
{ id: 'rowCount', displayName: 'Row Count', fieldType: 'number' },
327387
{ id: 'columnCount', displayName: 'Column Count', fieldType: 'number' },
388+
{ id: 'lastModified', displayName: 'Last Modified', fieldType: 'date' },
328389
],
329390

330391
mapTags: (metadata: Record<string, unknown>): Record<string, unknown> => {
@@ -342,6 +403,11 @@ export const googleSheetsConnector: ConnectorConfig = {
342403
result.columnCount = metadata.columnCount
343404
}
344405

406+
const lastModified = parseTagDate(metadata.modifiedTime)
407+
if (lastModified) {
408+
result.lastModified = lastModified
409+
}
410+
345411
return result
346412
},
347413
}

0 commit comments

Comments
 (0)