From cbbd3e3853704dd1a98ff6c149cc63675865f4a7 Mon Sep 17 00:00:00 2001 From: Roman Khimov Date: Tue, 21 Oct 2025 13:04:12 +0300 Subject: [PATCH 1/2] locodedb: allocate less, don't leave references to csv data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. We have a reference to data allocated by csv reader code which means we're wasting memory and have a lot of useless active objects (can be clearly seen in `inuse_objects` of NeoFS node. 2. string is like 16 bytes while we can avoid storing it at all by writing 3 bytes more into the data blob. benchstat: goos: linux goarch: amd64 pkg: github.com/nspcc-dev/locode-db/pkg/locodedb cpu: AMD Ryzen 7 PRO 7840U w/ Radeon 780M Graphics │ code.old │ code.new │ │ sec/op │ sec/op vs base │ Unpack-16 132.5m ± 2% 130.4m ± 3% ~ (p=0.052 n=10) Get-16 351.4n ± 2% 385.9n ± 1% +9.83% (p=0.000 n=10) geomean 215.8µ 224.3µ +3.96% │ code.old │ code.new │ │ B/op │ B/op vs base │ Unpack-16 35.63Mi ± 0% 30.24Mi ± 0% -15.13% (p=0.000 n=10) Get-16 5.000 ± 0% 5.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 13.35Ki 12.30Ki -7.88% ¹ all samples are equal │ code.old │ code.new │ │ allocs/op │ allocs/op vs base │ Unpack-16 191.4k ± 0% 191.4k ± 0% +0.02% (p=0.000 n=10) Get-16 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ geomean 437.5 437.5 +0.01% ¹ all samples are equal Get clearly costs a bit more , but 15% less memory is much more valuable here because it's not a frequentely accessed data. No traces of csv-allocated strings left, before: File: locodedb.test Build ID: 3b6646cf8a2a939e7bb4055621d8b1b1d4ae096f Type: inuse_objects Time: 2025-10-21 12:52:13 MSK Entering interactive mode (type "help" for commands, "o" for options) (pprof) top Showing nodes accounting for 86170, 100% of 86171 total Dropped 1 node (cum <= 430) Showing top 10 nodes out of 32 flat flat% sum% cum cum% 47515 55.14% 55.14% 47515 55.14% encoding/csv.(*Reader).readRecord 32768 38.03% 93.17% 32768 38.03% runtime.(*timers).addHeap 3277 3.80% 96.97% 3277 3.80% strings.NewReplacer 2308 2.68% 99.65% 2308 2.68% runtime.allocm 302 0.35% 100% 47818 55.49% github.com/nspcc-dev/locode-db/pkg/locodedb.unpackLocodesData 0 0% 100% 47515 55.14% encoding/csv.(*Reader).Read 0 0% 100% 47818 55.49% github.com/nspcc-dev/locode-db/pkg/locodedb.Get 0 0% 100% 47818 55.49% github.com/nspcc-dev/locode-db/pkg/locodedb.initLocodeData 0 0% 100% 47818 55.49% github.com/nspcc-dev/locode-db/pkg/locodedb.initLocodeData.func1 0 0% 100% 47818 55.49% github.com/nspcc-dev/locode-db/pkg/locodedb_test.TestGet.func1 After: File: locodedb.test Build ID: 7c43b87dd117ccb052a1a569160cbe68e44dbe25 Type: inuse_objects Time: 2025-10-21 12:52:02 MSK Entering interactive mode (type "help" for commands, "o" for options) (pprof) top Showing nodes accounting for 69083, 100% of 69094 total Dropped 12 nodes (cum <= 345) Showing top 10 nodes out of 18 flat flat% sum% cum cum% 65537 94.85% 94.85% 65537 94.85% runtime.(*timers).addHeap 2521 3.65% 98.50% 2521 3.65% net/http.init 1025 1.48% 100% 1025 1.48% runtime.allocm 0 0% 100% 65537 94.85% runtime.(*scavengerState).sleep 0 0% 100% 65537 94.85% runtime.(*timer).maybeAdd 0 0% 100% 65537 94.85% runtime.(*timer).modify 0 0% 100% 65537 94.85% runtime.(*timer).reset (inline) 0 0% 100% 65537 94.85% runtime.bgscavenge 0 0% 100% 2521 3.65% runtime.doInit 0 0% 100% 2521 3.65% runtime.doInit1 Signed-off-by: Roman Khimov --- pkg/locodedb/calls.go | 14 +++++++++----- pkg/locodedb/utils.go | 3 +-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/pkg/locodedb/calls.go b/pkg/locodedb/calls.go index 2a0436e..408373e 100644 --- a/pkg/locodedb/calls.go +++ b/pkg/locodedb/calls.go @@ -53,9 +53,9 @@ func Get(locodeStr string) (Record, error) { code := locodeStr[CountryCodeLen:] n, _ := slices.BinarySearchFunc(cd.locodes, code, func(csv locodesCSV, s string) int { - return cmp.Compare(csv.code, s) + return cmp.Compare(codeFromCSV(&csv), s) }) - if n == len(cd.locodes) || strings.Compare(cd.locodes[n].code, code) != 0 { + if n == len(cd.locodes) || strings.Compare(codeFromCSV(&cd.locodes[n]), code) != 0 { return Record{}, ErrNotFound } @@ -69,14 +69,18 @@ func Get(locodeStr string) (Record, error) { }, nil } +func codeFromCSV(c *locodesCSV) string { + return locodeStrings[c.offset : c.offset+LocationCodeLen] +} + func locFromCSV(c *locodesCSV) string { - return locodeStrings[c.offset : c.offset+uint32(c.locationLen)] + return locodeStrings[c.offset+LocationCodeLen : c.offset+LocationCodeLen+uint32(c.locationLen)] } func divCodeFromCSV(c *locodesCSV) string { - return locodeStrings[c.offset+uint32(c.locationLen) : c.offset+uint32(c.locationLen)+uint32(c.subDivCodeLen)] + return locodeStrings[c.offset+LocationCodeLen+uint32(c.locationLen) : c.offset+LocationCodeLen+uint32(c.locationLen)+uint32(c.subDivCodeLen)] } func divNameFromCSV(c *locodesCSV) string { - return locodeStrings[c.offset+uint32(c.locationLen)+uint32(c.subDivCodeLen) : c.offset+uint32(c.locationLen)+uint32(c.subDivCodeLen)+uint32(c.subDivNameLen)] + return locodeStrings[c.offset+LocationCodeLen+uint32(c.locationLen)+uint32(c.subDivCodeLen) : c.offset+LocationCodeLen+uint32(c.locationLen)+uint32(c.subDivCodeLen)+uint32(c.subDivNameLen)] } diff --git a/pkg/locodedb/utils.go b/pkg/locodedb/utils.go index 393a5f3..5d8fd5a 100644 --- a/pkg/locodedb/utils.go +++ b/pkg/locodedb/utils.go @@ -45,7 +45,6 @@ type countryData struct { type locodesCSV struct { point Point offset uint32 - code string locationLen uint8 subDivCodeLen uint8 subDivNameLen uint8 @@ -106,6 +105,7 @@ func unpackLocodesData(data []byte, mc map[countryCode]countryData) (string, err subDivNameLen = uint8(len(record[4])) ) + b.WriteString(record[0][CountryCodeLen:]) b.WriteString(record[1]) b.WriteString(record[3]) b.WriteString(record[4]) @@ -132,7 +132,6 @@ func unpackLocodesData(data []byte, mc map[countryCode]countryData) (string, err } rec.locodes = append(rec.locodes, locodesCSV{ point: Point{Latitude: float32(lat), Longitude: float32(lng)}, - code: record[0][CountryCodeLen:], offset: recOffset, locationLen: locationLen, subDivCodeLen: subDivCodeLen, From c20dea5a46540bbfe9e1a4dea87a93b39473d1d1 Mon Sep 17 00:00:00 2001 From: Roman Khimov Date: Tue, 21 Oct 2025 13:21:48 +0300 Subject: [PATCH 2/2] locodedb: slices.BinarySearchFunc is more smart than sort.Search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It returns boolean flag for a reason, so 9bb4b2643bd3b0107494ccbc159782f8d8273906 could do a bit better here and avoid useless comparisons. goos: linux goarch: amd64 pkg: github.com/nspcc-dev/locode-db/pkg/locodedb cpu: AMD Ryzen 7 PRO 7840U w/ Radeon 780M Graphics │ search.old │ search.new │ │ sec/op │ sec/op vs base │ Get-16 383.6n ± 1% 375.6n ± 2% -2.07% (p=0.012 n=10) │ search.old │ search.new │ │ B/op │ B/op vs base │ Get-16 5.000 ± 0% 5.000 ± 0% ~ (p=1.000 n=10) ¹ ¹ all samples are equal │ search.old │ search.new │ │ allocs/op │ allocs/op vs base │ Get-16 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=10) ¹ ¹ all samples are equal Signed-off-by: Roman Khimov --- pkg/locodedb/calls.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pkg/locodedb/calls.go b/pkg/locodedb/calls.go index 408373e..7c5c392 100644 --- a/pkg/locodedb/calls.go +++ b/pkg/locodedb/calls.go @@ -4,7 +4,6 @@ import ( "cmp" "errors" "slices" - "strings" ) // ErrNotFound is returned when the record is not found in the location database. @@ -52,10 +51,10 @@ func Get(locodeStr string) (Record, error) { } code := locodeStr[CountryCodeLen:] - n, _ := slices.BinarySearchFunc(cd.locodes, code, func(csv locodesCSV, s string) int { + n, ok := slices.BinarySearchFunc(cd.locodes, code, func(csv locodesCSV, s string) int { return cmp.Compare(codeFromCSV(&csv), s) }) - if n == len(cd.locodes) || strings.Compare(codeFromCSV(&cd.locodes[n]), code) != 0 { + if !ok { return Record{}, ErrNotFound }