diff --git a/fetcher/fetcher.go b/fetcher/fetcher.go index 74f7cb3..006998c 100644 --- a/fetcher/fetcher.go +++ b/fetcher/fetcher.go @@ -30,7 +30,9 @@ import ( "github.com/emersion/go-pgpmail" "github.com/floatpane/matcha/config" "go.mozilla.org/pkcs7" + "golang.org/x/text/encoding" "golang.org/x/text/encoding/ianaindex" + "golang.org/x/text/encoding/unicode" "golang.org/x/text/transform" ) @@ -228,15 +230,22 @@ func decodePart(reader io.Reader, header mail.PartHeader) (string, error) { } func decodeReaderWithCharset(reader io.Reader, charset string) ([]byte, error) { - encoding, err := ianaindex.IANA.Encoding(charset) - if err != nil || encoding == nil { - encoding, _ = ianaindex.IANA.Encoding("utf-8") - } - - transformReader := transform.NewReader(reader, encoding.NewDecoder()) + enc := lookupCharsetEncoding(charset) + transformReader := transform.NewReader(reader, enc.NewDecoder()) return ioutil.ReadAll(transformReader) } +// lookupCharsetEncoding resolves a charset name, falling back to UTF-8. +func lookupCharsetEncoding(charset string) encoding.Encoding { + if enc, err := ianaindex.IANA.Encoding(charset); err == nil && enc != nil { + return enc + } + if enc, err := ianaindex.IANA.Encoding("utf-8"); err == nil && enc != nil { + return enc + } + return unicode.UTF8 +} + func bestEffortCharset(contentType string) string { for _, param := range strings.Split(contentType, ";") { key, value, found := strings.Cut(param, "=") @@ -256,11 +265,14 @@ func bestEffortCharset(contentType string) string { func decodeHeader(header string) string { dec := new(mime.WordDecoder) dec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { - encoding, err := ianaindex.IANA.Encoding(charset) + enc, err := ianaindex.IANA.Encoding(charset) if err != nil { return nil, err } - return transform.NewReader(input, encoding.NewDecoder()), nil + if enc == nil { + return nil, fmt.Errorf("fetcher: no encoding implementation for charset %q", charset) + } + return transform.NewReader(input, enc.NewDecoder()), nil } decoded, err := dec.DecodeHeader(header) if err != nil { diff --git a/fetcher/fetcher_test.go b/fetcher/fetcher_test.go index 9937ad2..af4c379 100644 --- a/fetcher/fetcher_test.go +++ b/fetcher/fetcher_test.go @@ -54,6 +54,27 @@ func TestDecodePartFallsBackToUTF8WhenMalformedContentTypeHasNoCharset(t *testin } } +func TestDecodeReaderWithCharsetSurvivesUnknownCharset(t *testing.T) { + decoded, err := decodeReaderWithCharset(strings.NewReader("hello"), "bogus-charset-name") + if err != nil { + t.Fatalf("decodeReaderWithCharset() returned error: %v", err) + } + if string(decoded) != "hello" { + t.Fatalf("decodeReaderWithCharset() = %q, want %q", string(decoded), "hello") + } +} + +func TestLookupCharsetEncodingAlwaysReturnsNonNil(t *testing.T) { + cases := []string{"", "utf-8", "iso-8859-1", "bogus-charset-name", "this/is/not/real"} + for _, name := range cases { + t.Run(name, func(t *testing.T) { + if enc := lookupCharsetEncoding(name); enc == nil { + t.Fatalf("lookupCharsetEncoding(%q) returned nil", name) + } + }) + } +} + // TestFetchEmails is an integration test that requires a live IMAP server and valid credentials. // NOTE: This test will be skipped if it cannot load a configuration file, // making it safe to run in a CI environment without credentials.