From f81e182595fd29d29a11916f7621e5dfdcc7b614 Mon Sep 17 00:00:00 2001 From: Gunir <134402102+gunir@users.noreply.github.com> Date: Thu, 15 Jan 2026 21:28:19 +0700 Subject: [PATCH 1/2] Update rawbody.go - Fix broken encoding due to mishandling Content-Type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: https://github.com/ZenPrivacy/zen-core/issues/34 This is a classic "Mojibake" issue caused by double-decoding. The artifacts you are seeing (→ instead of →) occur when UTF-8 bytes are misinterpreted as Windows-1252 (Latin-1). The function getRawBodyReader relies on golang.org/x/net/html/charset.NewReader to handle character encoding. When the upstream website (e.g., data-star.dev) returns Content-Type: text/html without an explicit charset=utf-8 parameter, the Go charset library defaults to Windows-1252 (to be spec-compliant with legacy HTML). Your proxy takes the valid UTF-8 bytes (e.g., E2 86 92 for →), "decodes" them as Windows-1252 (resulting in â, †, ’), and then re-encodes them as UTF-8 for the output. The browser receives this re-encoded mess and displays →. Both StreamRewrite and BufferRewrite unconditionally force this header because they assume the content has been successfully converted to UTF-8 by getRawBodyReader. Original Response: Content-Type: text/html (No charset specified). getRawBodyReader: Sees no charset. The golang.org/x/net/html/charset library defaults to Windows-1252 for compatibility. It reads the valid UTF-8 bytes from the server as if they were Windows-1252 bytes. Result: UTF-8 → (bytes E2 86 92) becomes string →. StreamRewrite: Sets Content-Type: text/html; charset=utf-8. Browser: Sees charset=utf-8 header. It renders the string → correctly as those characters, instead of the arrow you wanted. Signed-off-by: Gunir <134402102+gunir@users.noreply.github.com> --- httprewrite/rawbody.go | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/httprewrite/rawbody.go b/httprewrite/rawbody.go index 354e077..6ddbe3c 100644 --- a/httprewrite/rawbody.go +++ b/httprewrite/rawbody.go @@ -76,22 +76,31 @@ func BufferRewrite(res *http.Response, processor func(src []byte) []byte) error // getRawBodyReader extracts an uncompressed, UTF-8 decoded body from a potentially compressed and non-UTF-8 encoded HTTP response. func getRawBodyReader(res *http.Response) (body io.ReadCloser, mimeType string, err error) { - encoding := res.Header.Get("Content-Encoding") - contentType := res.Header.Get("Content-Type") + encoding := header.Get("Content-Encoding") + contentType := header.Get("Content-Type") mimeType, params, err := mime.ParseMediaType(contentType) if err != nil { - return nil, "", fmt.Errorf("parse content type %q: %v", contentType, err) + mimeType = "text/plain" // Fallback } - if encoding == "" && strings.ToLower(params["charset"]) == "utf-8" { - // The body is already UTF-8 encoded and not compressed. - return res.Body, mimeType, nil + + // [FIX 1] Treat missing charset as UTF-8 (Modern Web Default) + // If encoding is empty and no charset is specified, pass the body through raw. + charsetParam := strings.ToLower(params["charset"]) + if encoding == "" && (charsetParam == "utf-8" || charsetParam == "") { + return body, mimeType, nil } - decompressedReader, err := decompressReader(res.Body, encoding) + decompressedReader, err := decompressReader(body, encoding) if err != nil { return nil, "", fmt.Errorf("create decompressed reader for encoding %q: %v", encoding, err) } + // [FIX 2] If we reach here (because of compression), ensure we don't + // let charset.NewReader default to Windows-1252 if charset is missing. + if charsetParam == "" { + contentType = mimeType + "; charset=utf-8" + } + decodedReader, err := charset.NewReader(decompressedReader, contentType) if err != nil { decompressedReader.Close() @@ -103,7 +112,7 @@ func getRawBodyReader(res *http.Response) (body io.ReadCloser, mimeType string, io.Closer }{ decodedReader, - &multiCloser{[]io.Closer{decompressedReader, res.Body}}, + &multiCloser{[]io.Closer{decompressedReader, body}}, }, mimeType, nil } From 0725a5787455de9d6d53c1e09baa650ee5badf42 Mon Sep 17 00:00:00 2001 From: Gunir <134402102+gunir@users.noreply.github.com> Date: Thu, 15 Jan 2026 21:34:14 +0700 Subject: [PATCH 2/2] Update rawbody.go - Fix some variable mismatches Update rawbody.go - Fix some variable mismatches Signed-off-by: Gunir <134402102+gunir@users.noreply.github.com> --- httprewrite/rawbody.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/httprewrite/rawbody.go b/httprewrite/rawbody.go index 6ddbe3c..9380afc 100644 --- a/httprewrite/rawbody.go +++ b/httprewrite/rawbody.go @@ -76,8 +76,8 @@ func BufferRewrite(res *http.Response, processor func(src []byte) []byte) error // getRawBodyReader extracts an uncompressed, UTF-8 decoded body from a potentially compressed and non-UTF-8 encoded HTTP response. func getRawBodyReader(res *http.Response) (body io.ReadCloser, mimeType string, err error) { - encoding := header.Get("Content-Encoding") - contentType := header.Get("Content-Type") + encoding := res.Header.Get("Content-Encoding") + contentType := res.Header.Get("Content-Type") mimeType, params, err := mime.ParseMediaType(contentType) if err != nil { mimeType = "text/plain" // Fallback @@ -87,10 +87,10 @@ func getRawBodyReader(res *http.Response) (body io.ReadCloser, mimeType string, // If encoding is empty and no charset is specified, pass the body through raw. charsetParam := strings.ToLower(params["charset"]) if encoding == "" && (charsetParam == "utf-8" || charsetParam == "") { - return body, mimeType, nil + return res.Body, mimeType, nil } - decompressedReader, err := decompressReader(body, encoding) + decompressedReader, err := decompressReader(res.Body, encoding) if err != nil { return nil, "", fmt.Errorf("create decompressed reader for encoding %q: %v", encoding, err) } @@ -112,7 +112,7 @@ func getRawBodyReader(res *http.Response) (body io.ReadCloser, mimeType string, io.Closer }{ decodedReader, - &multiCloser{[]io.Closer{decompressedReader, body}}, + &multiCloser{[]io.Closer{decompressedReader, res.Body}}, }, mimeType, nil }