fix(gmail): simplify RFC 2047 encoding to match Google's own sample

waleedlatif1 · waleedlatif1 · commit 12fd2eb84f82 · 2026-03-11T15:47:35.000-07:00
diff --git a/apps/sim/tools/gmail/utils.test.ts b/apps/sim/tools/gmail/utils.test.ts
@@ -4,87 +4,33 @@
 import { describe, expect, it } from 'vitest'
 import { encodeRfc2047 } from './utils'
 
-/**
- * Decode an RFC 2047 encoded header (single or multi-word) back to a string.
- */
-function decodeRfc2047(encoded: string): string {
-  const words = encoded.split(/\r\n\s+/)
-  return words
-    .map((word) => {
-      const match = word.match(/^=\?UTF-8\?B\?(.+)\?=$/)
-      if (!match) return word
-      return Buffer.from(match[1], 'base64').toString('utf-8')
-    })
-    .join('')
-}
-
 describe('encodeRfc2047', () => {
   it('returns ASCII text unchanged', () => {
-    const input = 'Simple ASCII Subject'
-    expect(encodeRfc2047(input)).toBe(input)
+    expect(encodeRfc2047('Simple ASCII Subject')).toBe('Simple ASCII Subject')
   })
 
   it('returns empty string unchanged', () => {
     expect(encodeRfc2047('')).toBe('')
   })
 
-  it('encodes short non-ASCII text in a single encoded word', () => {
-    const input = 'Hello 世界'
-    const result = encodeRfc2047(input)
-    expect(result).toMatch(/^=\?UTF-8\?B\?[A-Za-z0-9+/=]+\?=$/)
-    expect(result.length).toBeLessThanOrEqual(75)
-    expect(decodeRfc2047(result)).toBe(input)
-  })
-
-  it('encodes emojis correctly', () => {
-    const input = 'Time to Stretch! 🧘'
-    const result = encodeRfc2047(input)
-    expect(result).toMatch(/^=\?UTF-8\?B\?[A-Za-z0-9+/=]+\?=$/)
-    expect(decodeRfc2047(result)).toBe(input)
-  })
-
-  it('splits long non-ASCII text into multiple encoded words', () => {
-    const input = '今週のミーティングアジェンダについて検討します'
-    const result = encodeRfc2047(input)
-    const words = result.split('\r\n ')
-    words.forEach((word) => {
-      expect(word.length).toBeLessThanOrEqual(75)
-      expect(word).toMatch(/^=\?UTF-8\?B\?[A-Za-z0-9+/=]+\?=$/)
-    })
-    expect(decodeRfc2047(result)).toBe(input)
-  })
-
-  it('handles very long subjects with emojis without splitting characters', () => {
-    const input = '🎉 '.repeat(30)
-    const result = encodeRfc2047(input)
-    const words = result.split('\r\n ')
-    words.forEach((word) => {
-      expect(word.length).toBeLessThanOrEqual(75)
-      expect(word).toMatch(/^=\?UTF-8\?B\?[A-Za-z0-9+/=]+\?=$/)
-    })
-    expect(decodeRfc2047(result)).toBe(input)
-  })
-
-  it('does not split already-encoded subjects (pure ASCII passthrough)', () => {
-    const input = '=?UTF-8?B?VGltZSB0byBTdHJldGNoISDwn6eY?='
-    const result = encodeRfc2047(input)
-    expect(result).toBe(input)
+  it('encodes emojis as RFC 2047 base64', () => {
+    const result = encodeRfc2047('Time to Stretch! 🧘')
+    expect(result).toBe('=?UTF-8?B?VGltZSB0byBTdHJldGNoISDwn6eY?=')
   })
 
-  it('handles accented characters', () => {
-    const input = 'Café résumé'
-    const result = encodeRfc2047(input)
-    expect(result).toMatch(/^=\?UTF-8\?B\?[A-Za-z0-9+/=]+\?=$/)
-    expect(decodeRfc2047(result)).toBe(input)
+  it('round-trips non-ASCII subjects correctly', () => {
+    const subjects = ['Hello 世界', 'Café résumé', '🎉🎊🎈 Party!', '今週のミーティング']
+    for (const subject of subjects) {
+      const encoded = encodeRfc2047(subject)
+      const match = encoded.match(/^=\?UTF-8\?B\?(.+)\?=$/)
+      expect(match).not.toBeNull()
+      const decoded = Buffer.from(match![1], 'base64').toString('utf-8')
+      expect(decoded).toBe(subject)
+    }
   })
 
-  it('handles mixed ASCII and multi-byte characters in long subjects', () => {
-    const input = 'Important: 会議の議事録をお送りします - please review by Friday 🙏'
-    const result = encodeRfc2047(input)
-    const words = result.split('\r\n ')
-    words.forEach((word) => {
-      expect(word.length).toBeLessThanOrEqual(75)
-    })
-    expect(decodeRfc2047(result)).toBe(input)
+  it('does not double-encode already-encoded subjects', () => {
+    const alreadyEncoded = '=?UTF-8?B?VGltZSB0byBTdHJldGNoISDwn6eY?='
+    expect(encodeRfc2047(alreadyEncoded)).toBe(alreadyEncoded)
   })
 })
diff --git a/apps/sim/tools/gmail/utils.ts b/apps/sim/tools/gmail/utils.ts
@@ -296,50 +296,15 @@ function generateBoundary(): string {
 
 /**
  * Encode a header value using RFC 2047 Base64 encoding if it contains non-ASCII characters.
- * Email headers per RFC 2822 must be ASCII-only. Non-ASCII characters (emojis, accented
- * characters, etc.) must be encoded as =?UTF-8?B?<base64>?= to avoid mojibake.
- *
- * Per RFC 2047 §2, each encoded-word must not exceed 75 characters. Long values are split
- * into multiple encoded-words separated by CRLF + space (folding whitespace). Splits always
- * occur on character boundaries to avoid producing invalid UTF-8 fragments.
- * @param value The header value to encode
- * @returns The encoded header value, or the original if it's already ASCII
+ * This matches Google's own Gmail API sample: `=?utf-8?B?${Buffer.from(subject).toString('base64')}?=`
+ * @see https://github.com/googleapis/google-api-nodejs-client/blob/main/samples/gmail/send.js
  */
 export function encodeRfc2047(value: string): string {
   // eslint-disable-next-line no-control-regex
   if (/^[\x00-\x7F]*$/.test(value)) {
     return value
   }
-
-  // =?UTF-8?B? (10) + ?= (2) = 12 chars overhead. Max 75 - 12 = 63 chars for base64 payload.
-  // base64 encodes 3 bytes → 4 chars, so max raw bytes = floor(63 / 4) * 3 = 45 bytes per chunk.
-  const MAX_BYTES_PER_CHUNK = 45
-  const encodedWords: string[] = []
-
-  // Split on character boundaries by iterating characters, not raw bytes
-  let currentChars: string[] = []
-  let currentByteLen = 0
-
-  for (const char of value) {
-    const charByteLen = Buffer.byteLength(char, 'utf-8')
-
-    if (currentByteLen + charByteLen > MAX_BYTES_PER_CHUNK && currentChars.length > 0) {
-      const chunkStr = currentChars.join('')
-      encodedWords.push(`=?UTF-8?B?${Buffer.from(chunkStr, 'utf-8').toString('base64')}?=`)
-      currentChars = []
-      currentByteLen = 0
-    }
-
-    currentChars.push(char)
-    currentByteLen += charByteLen
-  }
-
-  if (currentChars.length > 0) {
-    const chunkStr = currentChars.join('')
-    encodedWords.push(`=?UTF-8?B?${Buffer.from(chunkStr, 'utf-8').toString('base64')}?=`)
-  }
-
-  return encodedWords.join('\r\n ')
+  return `=?UTF-8?B?${Buffer.from(value, 'utf-8').toString('base64')}?=`
 }
 
 /**