From a258240a1e71e9fa401de5a20b8be2e7225248ce Mon Sep 17 00:00:00 2001 From: Pontus Leitzler Date: Fri, 25 Oct 2024 10:09:00 +0200 Subject: [PATCH] THRIFT-5828: reduce over-allocation in Go binary protocol Due to a subtle caveat in buffer.Bytes the Go binary protocol method ReadBinary() always over-allocated. We now allocate the asked size directly up to 10 MiB, and then use io.CopyN to maintain the malformed message protection. The existing benchmarks show that we reduce allocations for small payloads. Ran the existing benchmarks in lib/go/thrift with and without this change: % go test -run X -bench . -benchmem > % benchcmp -changed old.txt new.txt [...] benchmark old allocs new allocs delta BenchmarkSafeReadBytes/normal-20 5 2 -60.00% BenchmarkBinaryBinary_0-20 4 1 -75.00% BenchmarkBinaryBinary_1-20 4 1 -75.00% BenchmarkBinaryBinary_2-20 5 2 -60.00% BenchmarkCompactBinary0-20 4 1 -75.00% BenchmarkCompactBinary1-20 4 1 -75.00% BenchmarkCompactBinary2-20 5 2 -60.00% BenchmarkSerializer/baseline-20 8 5 -37.50% BenchmarkSerializer/plain-20 20 17 -15.00% BenchmarkSerializer/pool-20 8 5 -37.50% benchmark old bytes new bytes delta BenchmarkSafeReadBytes/normal-20 1656 160 -90.34% BenchmarkBinaryBinary_0-20 1608 160 -90.05% BenchmarkBinaryBinary_1-20 1608 160 -90.05% BenchmarkBinaryBinary_2-20 1634 184 -88.74% BenchmarkCompactBinary0-20 1608 160 -90.05% BenchmarkCompactBinary1-20 1608 160 -90.05% BenchmarkCompactBinary2-20 1634 184 -88.74% BenchmarkSerializer/baseline-20 1000 416 -58.40% BenchmarkSerializer/plain-20 3640 3056 -16.04% BenchmarkSerializer/pool-20 1002 417 -58.38% --- lib/go/thrift/binary_protocol.go | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/lib/go/thrift/binary_protocol.go b/lib/go/thrift/binary_protocol.go index 3f2843ef966..78d9f042065 100644 --- a/lib/go/thrift/binary_protocol.go +++ b/lib/go/thrift/binary_protocol.go @@ -597,14 +597,18 @@ func safeReadBytes(size int32, trans io.Reader) ([]byte, error) { if size < 0 { return nil, nil } - if size > bytes.MinRead { - // Use bytes.Buffer to prevent allocating size bytes when size is very large - buf := new(bytes.Buffer) - _, err := io.CopyN(buf, trans, int64(size)) - return buf.Bytes(), err - } - // Allocate size bytes - b := make([]byte, size) - n, err := io.ReadFull(trans, b) - return b[:n], err + + // Fast path for reads smaller than 10 MiB that only allocates exactly + // what is asked for. + const readLimit = 10 * 1024 * 1024 + if size < readLimit { + b := make([]byte, size) + n, err := io.ReadFull(trans, b) + return b[:n], err + } + + // Use bytes.Buffer to prevent allocating size bytes when size is very large + buf := new(bytes.Buffer) + _, err := io.CopyN(buf, trans, int64(size)) + return buf.Bytes(), err }