Skip to content

Commit 12fd2eb

Browse files
committed
fix(gmail): simplify RFC 2047 encoding to match Google's own sample
1 parent b05f0b3 commit 12fd2eb

File tree

2 files changed

+19
-108
lines changed

2 files changed

+19
-108
lines changed

apps/sim/tools/gmail/utils.test.ts

Lines changed: 16 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -4,87 +4,33 @@
44
import { describe, expect, it } from 'vitest'
55
import { encodeRfc2047 } from './utils'
66

7-
/**
8-
* Decode an RFC 2047 encoded header (single or multi-word) back to a string.
9-
*/
10-
function decodeRfc2047(encoded: string): string {
11-
const words = encoded.split(/\r\n\s+/)
12-
return words
13-
.map((word) => {
14-
const match = word.match(/^=\?UTF-8\?B\?(.+)\?=$/)
15-
if (!match) return word
16-
return Buffer.from(match[1], 'base64').toString('utf-8')
17-
})
18-
.join('')
19-
}
20-
217
describe('encodeRfc2047', () => {
228
it('returns ASCII text unchanged', () => {
23-
const input = 'Simple ASCII Subject'
24-
expect(encodeRfc2047(input)).toBe(input)
9+
expect(encodeRfc2047('Simple ASCII Subject')).toBe('Simple ASCII Subject')
2510
})
2611

2712
it('returns empty string unchanged', () => {
2813
expect(encodeRfc2047('')).toBe('')
2914
})
3015

31-
it('encodes short non-ASCII text in a single encoded word', () => {
32-
const input = 'Hello 世界'
33-
const result = encodeRfc2047(input)
34-
expect(result).toMatch(/^=\?UTF-8\?B\?[A-Za-z0-9+/=]+\?=$/)
35-
expect(result.length).toBeLessThanOrEqual(75)
36-
expect(decodeRfc2047(result)).toBe(input)
37-
})
38-
39-
it('encodes emojis correctly', () => {
40-
const input = 'Time to Stretch! 🧘'
41-
const result = encodeRfc2047(input)
42-
expect(result).toMatch(/^=\?UTF-8\?B\?[A-Za-z0-9+/=]+\?=$/)
43-
expect(decodeRfc2047(result)).toBe(input)
44-
})
45-
46-
it('splits long non-ASCII text into multiple encoded words', () => {
47-
const input = '今週のミーティングアジェンダについて検討します'
48-
const result = encodeRfc2047(input)
49-
const words = result.split('\r\n ')
50-
words.forEach((word) => {
51-
expect(word.length).toBeLessThanOrEqual(75)
52-
expect(word).toMatch(/^=\?UTF-8\?B\?[A-Za-z0-9+/=]+\?=$/)
53-
})
54-
expect(decodeRfc2047(result)).toBe(input)
55-
})
56-
57-
it('handles very long subjects with emojis without splitting characters', () => {
58-
const input = '🎉 '.repeat(30)
59-
const result = encodeRfc2047(input)
60-
const words = result.split('\r\n ')
61-
words.forEach((word) => {
62-
expect(word.length).toBeLessThanOrEqual(75)
63-
expect(word).toMatch(/^=\?UTF-8\?B\?[A-Za-z0-9+/=]+\?=$/)
64-
})
65-
expect(decodeRfc2047(result)).toBe(input)
66-
})
67-
68-
it('does not split already-encoded subjects (pure ASCII passthrough)', () => {
69-
const input = '=?UTF-8?B?VGltZSB0byBTdHJldGNoISDwn6eY?='
70-
const result = encodeRfc2047(input)
71-
expect(result).toBe(input)
16+
it('encodes emojis as RFC 2047 base64', () => {
17+
const result = encodeRfc2047('Time to Stretch! 🧘')
18+
expect(result).toBe('=?UTF-8?B?VGltZSB0byBTdHJldGNoISDwn6eY?=')
7219
})
7320

74-
it('handles accented characters', () => {
75-
const input = 'Café résumé'
76-
const result = encodeRfc2047(input)
77-
expect(result).toMatch(/^=\?UTF-8\?B\?[A-Za-z0-9+/=]+\?=$/)
78-
expect(decodeRfc2047(result)).toBe(input)
21+
it('round-trips non-ASCII subjects correctly', () => {
22+
const subjects = ['Hello 世界', 'Café résumé', '🎉🎊🎈 Party!', '今週のミーティング']
23+
for (const subject of subjects) {
24+
const encoded = encodeRfc2047(subject)
25+
const match = encoded.match(/^=\?UTF-8\?B\?(.+)\?=$/)
26+
expect(match).not.toBeNull()
27+
const decoded = Buffer.from(match![1], 'base64').toString('utf-8')
28+
expect(decoded).toBe(subject)
29+
}
7930
})
8031

81-
it('handles mixed ASCII and multi-byte characters in long subjects', () => {
82-
const input = 'Important: 会議の議事録をお送りします - please review by Friday 🙏'
83-
const result = encodeRfc2047(input)
84-
const words = result.split('\r\n ')
85-
words.forEach((word) => {
86-
expect(word.length).toBeLessThanOrEqual(75)
87-
})
88-
expect(decodeRfc2047(result)).toBe(input)
32+
it('does not double-encode already-encoded subjects', () => {
33+
const alreadyEncoded = '=?UTF-8?B?VGltZSB0byBTdHJldGNoISDwn6eY?='
34+
expect(encodeRfc2047(alreadyEncoded)).toBe(alreadyEncoded)
8935
})
9036
})

apps/sim/tools/gmail/utils.ts

Lines changed: 3 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -296,50 +296,15 @@ function generateBoundary(): string {
296296

297297
/**
298298
* Encode a header value using RFC 2047 Base64 encoding if it contains non-ASCII characters.
299-
* Email headers per RFC 2822 must be ASCII-only. Non-ASCII characters (emojis, accented
300-
* characters, etc.) must be encoded as =?UTF-8?B?<base64>?= to avoid mojibake.
301-
*
302-
* Per RFC 2047 §2, each encoded-word must not exceed 75 characters. Long values are split
303-
* into multiple encoded-words separated by CRLF + space (folding whitespace). Splits always
304-
* occur on character boundaries to avoid producing invalid UTF-8 fragments.
305-
* @param value The header value to encode
306-
* @returns The encoded header value, or the original if it's already ASCII
299+
* This matches Google's own Gmail API sample: `=?utf-8?B?${Buffer.from(subject).toString('base64')}?=`
300+
* @see https://github.com/googleapis/google-api-nodejs-client/blob/main/samples/gmail/send.js
307301
*/
308302
export function encodeRfc2047(value: string): string {
309303
// eslint-disable-next-line no-control-regex
310304
if (/^[\x00-\x7F]*$/.test(value)) {
311305
return value
312306
}
313-
314-
// =?UTF-8?B? (10) + ?= (2) = 12 chars overhead. Max 75 - 12 = 63 chars for base64 payload.
315-
// base64 encodes 3 bytes → 4 chars, so max raw bytes = floor(63 / 4) * 3 = 45 bytes per chunk.
316-
const MAX_BYTES_PER_CHUNK = 45
317-
const encodedWords: string[] = []
318-
319-
// Split on character boundaries by iterating characters, not raw bytes
320-
let currentChars: string[] = []
321-
let currentByteLen = 0
322-
323-
for (const char of value) {
324-
const charByteLen = Buffer.byteLength(char, 'utf-8')
325-
326-
if (currentByteLen + charByteLen > MAX_BYTES_PER_CHUNK && currentChars.length > 0) {
327-
const chunkStr = currentChars.join('')
328-
encodedWords.push(`=?UTF-8?B?${Buffer.from(chunkStr, 'utf-8').toString('base64')}?=`)
329-
currentChars = []
330-
currentByteLen = 0
331-
}
332-
333-
currentChars.push(char)
334-
currentByteLen += charByteLen
335-
}
336-
337-
if (currentChars.length > 0) {
338-
const chunkStr = currentChars.join('')
339-
encodedWords.push(`=?UTF-8?B?${Buffer.from(chunkStr, 'utf-8').toString('base64')}?=`)
340-
}
341-
342-
return encodedWords.join('\r\n ')
307+
return `=?UTF-8?B?${Buffer.from(value, 'utf-8').toString('base64')}?=`
343308
}
344309

345310
/**

0 commit comments

Comments
 (0)