|
| 1 | +import { createLogger } from '@sim/logger' |
| 2 | +import { DiscordIcon } from '@/components/icons' |
| 3 | +import { fetchWithRetry, VALIDATE_RETRY_OPTIONS } from '@/lib/knowledge/documents/utils' |
| 4 | +import type { ConnectorConfig, ExternalDocument, ExternalDocumentList } from '@/connectors/types' |
| 5 | +import { computeContentHash, parseTagDate } from '@/connectors/utils' |
| 6 | + |
| 7 | +const logger = createLogger('DiscordConnector') |
| 8 | + |
| 9 | +const DISCORD_API_BASE = 'https://discord.com/api/v10' |
| 10 | +const DEFAULT_MAX_MESSAGES = 1000 |
| 11 | +const MESSAGES_PER_PAGE = 100 |
| 12 | + |
| 13 | +interface DiscordMessage { |
| 14 | + id: string |
| 15 | + channel_id: string |
| 16 | + author: { |
| 17 | + id: string |
| 18 | + username: string |
| 19 | + discriminator?: string |
| 20 | + bot?: boolean |
| 21 | + } |
| 22 | + content: string |
| 23 | + timestamp: string |
| 24 | + edited_timestamp?: string | null |
| 25 | + type: number |
| 26 | +} |
| 27 | + |
| 28 | +interface DiscordChannel { |
| 29 | + id: string |
| 30 | + name?: string |
| 31 | + topic?: string | null |
| 32 | + guild_id?: string |
| 33 | + type: number |
| 34 | +} |
| 35 | + |
| 36 | +/** |
| 37 | + * Calls the Discord REST API with Bot token auth. |
| 38 | + * Unlike Slack, Discord returns proper HTTP status codes for errors. |
| 39 | + */ |
| 40 | +async function discordApiGet( |
| 41 | + path: string, |
| 42 | + botToken: string, |
| 43 | + params?: Record<string, string>, |
| 44 | + retryOptions?: Parameters<typeof fetchWithRetry>[2] |
| 45 | +): Promise<unknown> { |
| 46 | + const queryParams = params ? `?${new URLSearchParams(params).toString()}` : '' |
| 47 | + const url = `${DISCORD_API_BASE}${path}${queryParams}` |
| 48 | + |
| 49 | + const response = await fetchWithRetry( |
| 50 | + url, |
| 51 | + { |
| 52 | + method: 'GET', |
| 53 | + headers: { |
| 54 | + Authorization: `Bot ${botToken}`, |
| 55 | + Accept: 'application/json', |
| 56 | + }, |
| 57 | + }, |
| 58 | + retryOptions |
| 59 | + ) |
| 60 | + |
| 61 | + if (!response.ok) { |
| 62 | + const body = await response.text().catch(() => '') |
| 63 | + throw new Error(`Discord API error ${response.status}: ${body}`) |
| 64 | + } |
| 65 | + |
| 66 | + return response.json() |
| 67 | +} |
| 68 | + |
| 69 | +/** |
| 70 | + * Fetches all messages from a channel, up to a maximum count, using `before`-based pagination. |
| 71 | + * Discord returns messages newest-first; we collect them all then reverse for chronological order. |
| 72 | + */ |
| 73 | +async function fetchChannelMessages( |
| 74 | + botToken: string, |
| 75 | + channelId: string, |
| 76 | + maxMessages: number |
| 77 | +): Promise<{ messages: DiscordMessage[]; lastActivityTs?: string }> { |
| 78 | + const allMessages: DiscordMessage[] = [] |
| 79 | + let beforeId: string | undefined |
| 80 | + let lastActivityTs: string | undefined |
| 81 | + |
| 82 | + while (allMessages.length < maxMessages) { |
| 83 | + const limit = Math.min(MESSAGES_PER_PAGE, maxMessages - allMessages.length) |
| 84 | + const params: Record<string, string> = { limit: String(limit) } |
| 85 | + if (beforeId) { |
| 86 | + params.before = beforeId |
| 87 | + } |
| 88 | + |
| 89 | + const messages = (await discordApiGet( |
| 90 | + `/channels/${channelId}/messages`, |
| 91 | + botToken, |
| 92 | + params |
| 93 | + )) as DiscordMessage[] |
| 94 | + |
| 95 | + if (!messages || messages.length === 0) break |
| 96 | + |
| 97 | + if (!lastActivityTs && messages.length > 0) { |
| 98 | + lastActivityTs = messages[0].timestamp |
| 99 | + } |
| 100 | + |
| 101 | + allMessages.push(...messages) |
| 102 | + |
| 103 | + // The last message in the batch is the oldest; use its ID for the next page |
| 104 | + beforeId = messages[messages.length - 1].id |
| 105 | + |
| 106 | + // If we got fewer than requested, there are no more messages |
| 107 | + if (messages.length < limit) break |
| 108 | + } |
| 109 | + |
| 110 | + return { messages: allMessages.slice(0, maxMessages), lastActivityTs } |
| 111 | +} |
| 112 | + |
| 113 | +/** |
| 114 | + * Converts fetched messages into a single document content string. |
| 115 | + * Each line: "[ISO timestamp] username: message content" |
| 116 | + * Messages are returned chronologically (oldest first). |
| 117 | + */ |
| 118 | +function formatMessages(messages: DiscordMessage[]): string { |
| 119 | + const lines: string[] = [] |
| 120 | + |
| 121 | + // Discord returns newest first; reverse for chronological order |
| 122 | + const chronological = [...messages].reverse() |
| 123 | + |
| 124 | + for (const msg of chronological) { |
| 125 | + // Skip system messages (type 0 = DEFAULT, type 19 = REPLY are user messages) |
| 126 | + if (msg.type !== 0 && msg.type !== 19) continue |
| 127 | + if (!msg.content) continue |
| 128 | + |
| 129 | + const userName = msg.author.username |
| 130 | + lines.push(`[${msg.timestamp}] ${userName}: ${msg.content}`) |
| 131 | + } |
| 132 | + |
| 133 | + return lines.join('\n') |
| 134 | +} |
| 135 | + |
| 136 | +export const discordConnector: ConnectorConfig = { |
| 137 | + id: 'discord', |
| 138 | + name: 'Discord', |
| 139 | + description: 'Sync channel messages from Discord into your knowledge base', |
| 140 | + version: '1.0.0', |
| 141 | + icon: DiscordIcon, |
| 142 | + |
| 143 | + auth: { |
| 144 | + mode: 'apiKey', |
| 145 | + label: 'Bot Token', |
| 146 | + placeholder: 'Enter your Discord bot token', |
| 147 | + }, |
| 148 | + |
| 149 | + configFields: [ |
| 150 | + { |
| 151 | + id: 'channelId', |
| 152 | + title: 'Channel ID', |
| 153 | + type: 'short-input', |
| 154 | + placeholder: 'e.g. 123456789012345678', |
| 155 | + required: true, |
| 156 | + description: 'The Discord channel ID to sync messages from', |
| 157 | + }, |
| 158 | + { |
| 159 | + id: 'maxMessages', |
| 160 | + title: 'Max Messages', |
| 161 | + type: 'short-input', |
| 162 | + required: false, |
| 163 | + placeholder: `e.g. 500 (default: ${DEFAULT_MAX_MESSAGES})`, |
| 164 | + }, |
| 165 | + ], |
| 166 | + |
| 167 | + listDocuments: async ( |
| 168 | + accessToken: string, |
| 169 | + sourceConfig: Record<string, unknown>, |
| 170 | + _cursor?: string, |
| 171 | + _syncContext?: Record<string, unknown> |
| 172 | + ): Promise<ExternalDocumentList> => { |
| 173 | + const channelId = sourceConfig.channelId as string |
| 174 | + if (!channelId?.trim()) { |
| 175 | + throw new Error('Channel ID is required') |
| 176 | + } |
| 177 | + |
| 178 | + const maxMessages = sourceConfig.maxMessages |
| 179 | + ? Number(sourceConfig.maxMessages) |
| 180 | + : DEFAULT_MAX_MESSAGES |
| 181 | + |
| 182 | + logger.info('Syncing Discord channel', { channelId, maxMessages }) |
| 183 | + |
| 184 | + const channel = (await discordApiGet( |
| 185 | + `/channels/${channelId.trim()}`, |
| 186 | + accessToken |
| 187 | + )) as DiscordChannel |
| 188 | + |
| 189 | + const { messages, lastActivityTs } = await fetchChannelMessages( |
| 190 | + accessToken, |
| 191 | + channel.id, |
| 192 | + maxMessages |
| 193 | + ) |
| 194 | + |
| 195 | + const content = formatMessages(messages) |
| 196 | + if (!content.trim()) { |
| 197 | + logger.info('No messages found in Discord channel', { channelId: channel.id }) |
| 198 | + return { documents: [], hasMore: false } |
| 199 | + } |
| 200 | + |
| 201 | + const contentHash = await computeContentHash(content) |
| 202 | + const channelName = channel.name || channel.id |
| 203 | + const sourceUrl = `https://discord.com/channels/${channel.guild_id || '@me'}/${channel.id}` |
| 204 | + |
| 205 | + const document: ExternalDocument = { |
| 206 | + externalId: channel.id, |
| 207 | + title: `#${channelName}`, |
| 208 | + content, |
| 209 | + mimeType: 'text/plain', |
| 210 | + sourceUrl, |
| 211 | + contentHash, |
| 212 | + metadata: { |
| 213 | + channelName, |
| 214 | + messageCount: messages.length, |
| 215 | + lastActivity: lastActivityTs, |
| 216 | + topic: channel.topic ?? undefined, |
| 217 | + }, |
| 218 | + } |
| 219 | + |
| 220 | + return { |
| 221 | + documents: [document], |
| 222 | + hasMore: false, |
| 223 | + } |
| 224 | + }, |
| 225 | + |
| 226 | + getDocument: async ( |
| 227 | + accessToken: string, |
| 228 | + sourceConfig: Record<string, unknown>, |
| 229 | + externalId: string |
| 230 | + ): Promise<ExternalDocument | null> => { |
| 231 | + const maxMessages = sourceConfig.maxMessages |
| 232 | + ? Number(sourceConfig.maxMessages) |
| 233 | + : DEFAULT_MAX_MESSAGES |
| 234 | + |
| 235 | + try { |
| 236 | + const channel = (await discordApiGet( |
| 237 | + `/channels/${externalId}`, |
| 238 | + accessToken |
| 239 | + )) as DiscordChannel |
| 240 | + |
| 241 | + const { messages, lastActivityTs } = await fetchChannelMessages( |
| 242 | + accessToken, |
| 243 | + externalId, |
| 244 | + maxMessages |
| 245 | + ) |
| 246 | + |
| 247 | + const content = formatMessages(messages) |
| 248 | + if (!content.trim()) return null |
| 249 | + |
| 250 | + const contentHash = await computeContentHash(content) |
| 251 | + const channelName = channel.name || channel.id |
| 252 | + const sourceUrl = `https://discord.com/channels/${channel.guild_id || '@me'}/${channel.id}` |
| 253 | + |
| 254 | + return { |
| 255 | + externalId: channel.id, |
| 256 | + title: `#${channelName}`, |
| 257 | + content, |
| 258 | + mimeType: 'text/plain', |
| 259 | + sourceUrl, |
| 260 | + contentHash, |
| 261 | + metadata: { |
| 262 | + channelName, |
| 263 | + messageCount: messages.length, |
| 264 | + lastActivity: lastActivityTs, |
| 265 | + topic: channel.topic ?? undefined, |
| 266 | + }, |
| 267 | + } |
| 268 | + } catch (error) { |
| 269 | + logger.warn('Failed to get Discord channel document', { |
| 270 | + externalId, |
| 271 | + error: error instanceof Error ? error.message : String(error), |
| 272 | + }) |
| 273 | + return null |
| 274 | + } |
| 275 | + }, |
| 276 | + |
| 277 | + validateConfig: async ( |
| 278 | + accessToken: string, |
| 279 | + sourceConfig: Record<string, unknown> |
| 280 | + ): Promise<{ valid: boolean; error?: string }> => { |
| 281 | + const channelId = sourceConfig.channelId as string | undefined |
| 282 | + const maxMessages = sourceConfig.maxMessages as string | undefined |
| 283 | + |
| 284 | + if (!channelId?.trim()) { |
| 285 | + return { valid: false, error: 'Channel ID is required' } |
| 286 | + } |
| 287 | + |
| 288 | + if (maxMessages && (Number.isNaN(Number(maxMessages)) || Number(maxMessages) <= 0)) { |
| 289 | + return { valid: false, error: 'Max messages must be a positive number' } |
| 290 | + } |
| 291 | + |
| 292 | + try { |
| 293 | + await discordApiGet( |
| 294 | + `/channels/${channelId.trim()}`, |
| 295 | + accessToken, |
| 296 | + undefined, |
| 297 | + VALIDATE_RETRY_OPTIONS |
| 298 | + ) |
| 299 | + return { valid: true } |
| 300 | + } catch (error) { |
| 301 | + const message = error instanceof Error ? error.message : 'Failed to validate configuration' |
| 302 | + if (message.includes('401') || message.includes('403')) { |
| 303 | + return { valid: false, error: 'Invalid bot token or missing permissions for this channel' } |
| 304 | + } |
| 305 | + if (message.includes('404')) { |
| 306 | + return { valid: false, error: `Channel not found: ${channelId}` } |
| 307 | + } |
| 308 | + return { valid: false, error: message } |
| 309 | + } |
| 310 | + }, |
| 311 | + |
| 312 | + tagDefinitions: [ |
| 313 | + { id: 'channelName', displayName: 'Channel Name', fieldType: 'text' }, |
| 314 | + { id: 'messageCount', displayName: 'Message Count', fieldType: 'number' }, |
| 315 | + { id: 'lastActivity', displayName: 'Last Activity', fieldType: 'date' }, |
| 316 | + ], |
| 317 | + |
| 318 | + mapTags: (metadata: Record<string, unknown>): Record<string, unknown> => { |
| 319 | + const result: Record<string, unknown> = {} |
| 320 | + |
| 321 | + if (typeof metadata.channelName === 'string') { |
| 322 | + result.channelName = metadata.channelName |
| 323 | + } |
| 324 | + |
| 325 | + if (typeof metadata.messageCount === 'number') { |
| 326 | + result.messageCount = metadata.messageCount |
| 327 | + } |
| 328 | + |
| 329 | + const lastActivity = parseTagDate(metadata.lastActivity) |
| 330 | + if (lastActivity) { |
| 331 | + result.lastActivity = lastActivity |
| 332 | + } |
| 333 | + |
| 334 | + return result |
| 335 | + }, |
| 336 | +} |
0 commit comments