From 8ffbc52915a0676a3e3958f5942c27a7c63b4ab7 Mon Sep 17 00:00:00 2001 From: Roman Khimov Date: Thu, 19 Feb 2026 19:23:33 +0300 Subject: [PATCH] engine: use an unlocked version of Get() in processAddrDelete() processAddrDelete is supposed to be called with RLock taken on e.blockMtx and that's correct for its callers. Then it shouldn't RLock it again which is what it tries to do by calling Get(). This leads to deadlock between Close() and GC routine running in background: goroutine 787 [sync.RWMutex.Lock, 9 minutes]: sync.runtime_SemacquireRWMutex(0xc000304cfc?, 0x1?, 0x1002d90?) /opt/hostedtoolcache/go/1.24.13/x64/src/runtime/sema.go:105 +0x25 sync.(*RWMutex).Lock(0xc000304ce8) /opt/hostedtoolcache/go/1.24.13/x64/src/sync/rwmutex.go:155 +0x89 github.com/nspcc-dev/neofs-node/pkg/local_object_storage/engine.(*StorageEngine).setBlockExecErr(0xc000304cb0, {0x1546240, 0x1c12290}) /home/runner/work/neofs-node/neofs-node/pkg/local_object_storage/engine/control.go:106 +0x65 github.com/nspcc-dev/neofs-node/pkg/local_object_storage/engine.(*StorageEngine).Close(0xc000304cb0) /home/runner/work/neofs-node/neofs-node/pkg/local_object_storage/engine/control.go:76 +0xd0 github.com/nspcc-dev/neofs-node/pkg/local_object_storage/engine.TestChildrenExpiration.func1() /home/runner/work/neofs-node/neofs-node/pkg/local_object_storage/engine/gc_test.go:54 +0x2f testing.(*common).Cleanup.func1() /opt/hostedtoolcache/go/1.24.13/x64/src/testing/testing.go:1211 +0x170 testing.(*common).runCleanup(0xc0003af6c0, 0x0) /opt/hostedtoolcache/go/1.24.13/x64/src/testing/testing.go:1445 +0x2b4 testing.tRunner.func2() /opt/hostedtoolcache/go/1.24.13/x64/src/testing/testing.go:1786 +0x4d testing.tRunner(0xc0003af6c0, 0x14184f8) /opt/hostedtoolcache/go/1.24.13/x64/src/testing/testing.go:1798 +0x25f created by testing.(*T).Run in goroutine 1 /opt/hostedtoolcache/go/1.24.13/x64/src/testing/testing.go:1851 +0x8f3 goroutine 753 [sync.RWMutex.RLock, 9 minutes]: sync.runtime_SemacquireRWMutexR(0xc000304cf8?, 0x1?, 0x1017465?) /opt/hostedtoolcache/go/1.24.13/x64/src/runtime/sema.go:100 +0x25 sync.(*RWMutex).RLock(0xc000304ce8) /opt/hostedtoolcache/go/1.24.13/x64/src/sync/rwmutex.go:74 +0x5b github.com/nspcc-dev/neofs-node/pkg/local_object_storage/engine.(*StorageEngine).Get(0xc000304cb0, {{0x3c, 0xa9, 0x7, 0xc9, 0x47, 0xd8, 0x5c, 0x4e, 0xb2, ...}, ...}) /home/runner/work/neofs-node/neofs-node/pkg/local_object_storage/engine/get.go:34 +0x211 github.com/nspcc-dev/neofs-node/pkg/local_object_storage/engine.(*StorageEngine).processAddrDelete(0xc000304cb0, {{0x3c, 0xa9, 0x7, 0xc9, 0x47, 0xd8, 0x5c, 0x4e, 0xb2, ...}, ...}, ...) /home/runner/work/neofs-node/neofs-node/pkg/local_object_storage/engine/inhume.go:99 +0x1065 github.com/nspcc-dev/neofs-node/pkg/local_object_storage/engine.(*StorageEngine).processExpiredObjects(0xc000304cb0, {0xc00050ee40, 0x1, 0x1336669?}) /home/runner/work/neofs-node/neofs-node/pkg/local_object_storage/engine/inhume.go:269 +0x4c6 github.com/nspcc-dev/neofs-node/pkg/local_object_storage/shard.(*Shard).collectExpiredObjects(0xc000035240) /home/runner/work/neofs-node/neofs-node/pkg/local_object_storage/shard/gc.go:238 +0xb2e github.com/nspcc-dev/neofs-node/pkg/local_object_storage/shard.(*Shard).removeGarbage(0xc000035240) /home/runner/work/neofs-node/neofs-node/pkg/local_object_storage/shard/gc.go:149 +0x125 github.com/nspcc-dev/neofs-node/pkg/local_object_storage/shard.(*gc).tickRemover(0xc000572960) /home/runner/work/neofs-node/neofs-node/pkg/local_object_storage/shard/gc.go:120 +0x116 created by github.com/nspcc-dev/neofs-node/pkg/local_object_storage/shard.(*gc).init in goroutine 787 /home/runner/work/neofs-node/neofs-node/pkg/local_object_storage/shard/gc.go:81 +0xa5 Fixes #3835. Related to 60d0d3a636dfbed424c3fdb17562eda836af3e1f. Signed-off-by: Roman Khimov --- CHANGELOG.md | 1 + pkg/local_object_storage/engine/get.go | 4 ++++ pkg/local_object_storage/engine/inhume.go | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d56477ee23..fb0b23582f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ Changelog for NeoFS Node ### Fixed - Resending the header after chunks have already been sent in object service `Get` handler (#3833) +- GC deadlock on local object storage shutdown (#3837) ### Changed diff --git a/pkg/local_object_storage/engine/get.go b/pkg/local_object_storage/engine/get.go index 5737642fdf..0312a314c9 100644 --- a/pkg/local_object_storage/engine/get.go +++ b/pkg/local_object_storage/engine/get.go @@ -37,7 +37,11 @@ func (e *StorageEngine) Get(addr oid.Address) (*object.Object, error) { if e.blockErr != nil { return nil, e.blockErr } + return e.getInt(addr) +} +// getInt is an unlocked version of Get. +func (e *StorageEngine) getInt(addr oid.Address) (*object.Object, error) { var ( err error obj *object.Object diff --git a/pkg/local_object_storage/engine/inhume.go b/pkg/local_object_storage/engine/inhume.go index cc0ba9ce48..cb459d71a2 100644 --- a/pkg/local_object_storage/engine/inhume.go +++ b/pkg/local_object_storage/engine/inhume.go @@ -96,7 +96,7 @@ func (e *StorageEngine) processAddrDelete(addr oid.Address, deleteFunc func(*sha linkAddr.SetContainer(addr.Container()) linkAddr.SetObject(linkID) - linkObj, err := e.Get(linkAddr) + linkObj, err := e.getInt(linkAddr) if err != nil { e.log.Debug("inhuming root object but no link object is found", zap.Stringer("linkAddr", linkAddr),