Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 20 additions & 8 deletions fetcher/fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ import (
"github.com/emersion/go-pgpmail"
"github.com/floatpane/matcha/config"
"go.mozilla.org/pkcs7"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/ianaindex"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)

Expand Down Expand Up @@ -228,15 +230,22 @@ func decodePart(reader io.Reader, header mail.PartHeader) (string, error) {
}

func decodeReaderWithCharset(reader io.Reader, charset string) ([]byte, error) {
encoding, err := ianaindex.IANA.Encoding(charset)
if err != nil || encoding == nil {
encoding, _ = ianaindex.IANA.Encoding("utf-8")
}

transformReader := transform.NewReader(reader, encoding.NewDecoder())
enc := lookupCharsetEncoding(charset)
transformReader := transform.NewReader(reader, enc.NewDecoder())
return ioutil.ReadAll(transformReader)
}

// lookupCharsetEncoding resolves a charset name, falling back to UTF-8.
func lookupCharsetEncoding(charset string) encoding.Encoding {
if enc, err := ianaindex.IANA.Encoding(charset); err == nil && enc != nil {
return enc
}
if enc, err := ianaindex.IANA.Encoding("utf-8"); err == nil && enc != nil {
return enc
}
return unicode.UTF8
}

func bestEffortCharset(contentType string) string {
for _, param := range strings.Split(contentType, ";") {
key, value, found := strings.Cut(param, "=")
Expand All @@ -256,11 +265,14 @@ func bestEffortCharset(contentType string) string {
func decodeHeader(header string) string {
dec := new(mime.WordDecoder)
dec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
encoding, err := ianaindex.IANA.Encoding(charset)
enc, err := ianaindex.IANA.Encoding(charset)
if err != nil {
return nil, err
}
return transform.NewReader(input, encoding.NewDecoder()), nil
if enc == nil {
return nil, fmt.Errorf("fetcher: no encoding implementation for charset %q", charset)
}
return transform.NewReader(input, enc.NewDecoder()), nil
}
decoded, err := dec.DecodeHeader(header)
if err != nil {
Expand Down
21 changes: 21 additions & 0 deletions fetcher/fetcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,27 @@ func TestDecodePartFallsBackToUTF8WhenMalformedContentTypeHasNoCharset(t *testin
}
}

func TestDecodeReaderWithCharsetSurvivesUnknownCharset(t *testing.T) {
decoded, err := decodeReaderWithCharset(strings.NewReader("hello"), "bogus-charset-name")
if err != nil {
t.Fatalf("decodeReaderWithCharset() returned error: %v", err)
}
if string(decoded) != "hello" {
t.Fatalf("decodeReaderWithCharset() = %q, want %q", string(decoded), "hello")
}
}

func TestLookupCharsetEncodingAlwaysReturnsNonNil(t *testing.T) {
cases := []string{"", "utf-8", "iso-8859-1", "bogus-charset-name", "this/is/not/real"}
for _, name := range cases {
t.Run(name, func(t *testing.T) {
if enc := lookupCharsetEncoding(name); enc == nil {
t.Fatalf("lookupCharsetEncoding(%q) returned nil", name)
}
})
}
}

// TestFetchEmails is an integration test that requires a live IMAP server and valid credentials.
// NOTE: This test will be skipped if it cannot load a configuration file,
// making it safe to run in a CI environment without credentials.
Expand Down
Loading