Skip to content

Commit 6203a95

Browse files
committed
fix race in moveRegularItemWith sync where insertOrReplace can cause move to fail
- updated slab release logic for move failure, but there is still an issue with slab movement. currently investigating.
1 parent 4f8f425 commit 6203a95

File tree

5 files changed

+212
-24
lines changed

5 files changed

+212
-24
lines changed

cachelib/allocator/CacheAllocator-inl.h

Lines changed: 77 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1293,8 +1293,21 @@ size_t CacheAllocator<CacheTrait>::wakeUpWaitersLocked(folly::StringPiece key,
12931293
}
12941294

12951295
template <typename CacheTrait>
1296-
void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
1296+
bool CacheAllocator<CacheTrait>::moveRegularItemWithSync(
12971297
Item& oldItem, WriteHandle& newItemHdl) {
1298+
//on function exit - the new item handle is no longer moving
1299+
//and other threads may access it - but in case where
1300+
//we failed to replace in access container we can give the
1301+
//new item back to the allocator
1302+
auto guard = folly::makeGuard([&]() {
1303+
auto ref = newItemHdl->unmarkMoving();
1304+
if (UNLIKELY(ref == 0)) {
1305+
const auto res =
1306+
releaseBackToAllocator(*newItemHdl, RemoveContext::kNormal, false);
1307+
XDCHECK(res == ReleaseRes::kReleased);
1308+
}
1309+
});
1310+
12981311
XDCHECK(oldItem.isMoving());
12991312
XDCHECK(!oldItem.isExpired());
13001313
// TODO: should we introduce new latency tracker. E.g. evictRegularLatency_
@@ -1325,6 +1338,22 @@ void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
13251338

13261339
auto replaced = accessContainer_->replaceIf(oldItem, *newItemHdl,
13271340
predicate);
1341+
// another thread may have called insertOrReplace which could have
1342+
// marked this item as unaccessible causing the replaceIf
1343+
// in the access container to fail - in this case we want
1344+
// to abort the move since the item is no longer valid
1345+
if (!replaced) {
1346+
return false;
1347+
}
1348+
// what if another thread calls insertOrReplace now when
1349+
// the item is moving and already replaced in the hash table?
1350+
// 1. it succeeds in updating the hash table - so there is
1351+
// no guarentee that isAccessible() is true
1352+
// 2. it will then try to remove from MM container
1353+
// - this operation will wait for newItemHdl to
1354+
// be unmarkedMoving via the waitContext
1355+
// 3. replaced handle is returned and eventually drops
1356+
// ref to 0 and the item is recycled back to allocator.
13281357

13291358
if (config_.moveCb) {
13301359
// Execute the move callback. We cannot make any guarantees about the
@@ -1366,14 +1395,7 @@ void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
13661395
XDCHECK(newItemHdl->hasChainedItem());
13671396
}
13681397
newItemHdl.unmarkNascent();
1369-
auto ref = newItemHdl->unmarkMoving();
1370-
//remove because there is a chance the new item was not
1371-
//added to the access container
1372-
if (UNLIKELY(ref == 0)) {
1373-
const auto res =
1374-
releaseBackToAllocator(*newItemHdl, RemoveContext::kNormal, false);
1375-
XDCHECK(res == ReleaseRes::kReleased);
1376-
}
1398+
return true;
13771399
}
13781400

13791401
template <typename CacheTrait>
@@ -1626,28 +1648,43 @@ CacheAllocator<CacheTrait>::getNextCandidate(TierId tid,
16261648
auto evictedToNext = lastTier ? nullptr
16271649
: tryEvictToNextMemoryTier(*candidate, false);
16281650
if (!evictedToNext) {
1629-
if (!token.isValid()) {
1651+
//if insertOrReplace was called during move
1652+
//then candidate will not be accessible (failed replace during tryEvict)
1653+
// - therefore this was why we failed to
1654+
// evict to the next tier and insertOrReplace
1655+
// will remove from NVM cache
1656+
//however, if candidate is accessible
1657+
//that means the allocation in the next
1658+
//tier failed - so we will continue to
1659+
//evict the item to NVM cache
1660+
bool failedToReplace = !candidate->isAccessible();
1661+
if (!token.isValid() && !failedToReplace) {
16301662
token = createPutToken(*candidate);
16311663
}
1632-
// tryEvictToNextMemoryTier should only fail if allocation of the new item fails
1633-
// in that case, it should be still possible to mark item as exclusive.
1664+
// tryEvictToNextMemoryTier can fail if:
1665+
// a) allocation of the new item fails in that case,
1666+
// it should be still possible to mark item for eviction.
1667+
// b) another thread calls insertOrReplace and the item
1668+
// is no longer accessible
16341669
//
16351670
// in case that we are on the last tier, we whould have already marked
16361671
// as exclusive since we will not be moving the item to the next tier
16371672
// but rather just evicting all together, no need to
1638-
// markExclusiveWhenMoving
1673+
// markForEvictionWhenMoving
16391674
auto ret = lastTier ? true : candidate->markForEvictionWhenMoving();
16401675
XDCHECK(ret);
16411676

16421677
unlinkItemForEviction(*candidate);
1678+
1679+
if (token.isValid() && shouldWriteToNvmCacheExclusive(*candidate)
1680+
&& !failedToReplace) {
1681+
nvmCache_->put(*candidate, std::move(token));
1682+
}
16431683
// wake up any readers that wait for the move to complete
16441684
// it's safe to do now, as we have the item marked exclusive and
16451685
// no other reader can be added to the waiters list
16461686
wakeUpWaiters(*candidate, {});
16471687

1648-
if (token.isValid() && shouldWriteToNvmCacheExclusive(*candidate)) {
1649-
nvmCache_->put(*candidate, std::move(token));
1650-
}
16511688
} else {
16521689
XDCHECK(!evictedToNext->isMarkedForEviction() && !evictedToNext->isMoving());
16531690
XDCHECK(!candidate->isMarkedForEviction() && !candidate->isMoving());
@@ -1776,7 +1813,10 @@ CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
17761813

17771814
if (newItemHdl) {
17781815
XDCHECK_EQ(newItemHdl->getSize(), item.getSize());
1779-
moveRegularItemWithSync(item, newItemHdl);
1816+
if (!moveRegularItemWithSync(item, newItemHdl)) {
1817+
return WriteHandle{};
1818+
}
1819+
XDCHECK_EQ(newItemHdl->getKey(),item.getKey());
17801820
item.unmarkMoving();
17811821
return newItemHdl;
17821822
} else {
@@ -1815,7 +1855,9 @@ CacheAllocator<CacheTrait>::tryPromoteToNextMemoryTier(
18151855

18161856
if (newItemHdl) {
18171857
XDCHECK_EQ(newItemHdl->getSize(), item.getSize());
1818-
moveRegularItemWithSync(item, newItemHdl);
1858+
if (!moveRegularItemWithSync(item, newItemHdl)) {
1859+
return WriteHandle{};
1860+
}
18191861
item.unmarkMoving();
18201862
return newItemHdl;
18211863
} else {
@@ -3175,9 +3217,23 @@ bool CacheAllocator<CacheTrait>::tryMovingForSlabRelease(
31753217
// TODO: add support for chained items
31763218
return false;
31773219
} else {
3178-
moveRegularItemWithSync(oldItem, newItemHdl);
3179-
removeFromMMContainer(oldItem);
3180-
return true;
3220+
//move can fail if another thread calls insertOrReplace
3221+
//in this case oldItem is no longer valid (not accessible,
3222+
//it gets removed from MMContainer and evictForSlabRelease
3223+
//will send it back to the allocator
3224+
bool ret = moveRegularItemWithSync(oldItem, newItemHdl);
3225+
if (!ret) {
3226+
//we failed to move - newItemHdl was released back to allocator
3227+
//by the moveRegularItemWithSync but oldItem is not accessible
3228+
//and no longer valid - we need to clean it up here
3229+
XDCHECK(!oldItem.isAccessible());
3230+
oldItem.markForEvictionWhenMoving();
3231+
unlinkItemForEviction(oldItem);
3232+
wakeUpWaiters(oldItem, {});
3233+
} else {
3234+
removeFromMMContainer(oldItem);
3235+
}
3236+
return ret;
31813237
}
31823238
}
31833239
}

cachelib/allocator/CacheAllocator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1623,7 +1623,7 @@ class CacheAllocator : public CacheBase {
16231623
//
16241624
// @return true If the move was completed, and the containers were updated
16251625
// successfully.
1626-
void moveRegularItemWithSync(Item& oldItem, WriteHandle& newItemHdl);
1626+
bool moveRegularItemWithSync(Item& oldItem, WriteHandle& newItemHdl);
16271627

16281628
// Moves a regular item to a different slab. This should only be used during
16291629
// slab release after the item's exclusive bit has been set. The user supplied

cachelib/allocator/CacheItem.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ class BaseAllocatorTest;
4646
template <typename AllocatorT>
4747
class AllocatorHitStatsTest;
4848

49+
template <typename AllocatorT>
50+
class AllocatorMemoryTiersTest;
51+
4952
template <typename AllocatorT>
5053
class MapTest;
5154

@@ -473,6 +476,8 @@ class CACHELIB_PACKED_ATTR CacheItem {
473476
FRIEND_TEST(ItemTest, NonStringKey);
474477
template <typename AllocatorT>
475478
friend class facebook::cachelib::tests::AllocatorHitStatsTest;
479+
template <typename AllocatorT>
480+
friend class facebook::cachelib::tests::AllocatorMemoryTiersTest;
476481
};
477482

478483
// A chained item has a hook pointing to the next chained item. The hook is

cachelib/allocator/tests/AllocatorMemoryTiersTest.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,15 @@ namespace cachelib {
2121
namespace tests {
2222

2323
using LruAllocatorMemoryTiersTest = AllocatorMemoryTiersTest<LruAllocator>;
24-
24+
//using LruTestAllocatorMemoryTiersTest = AllocatorMemoryTiersTest<LruTestAllocator>;
2525
// TODO(MEMORY_TIER): add more tests with different eviction policies
2626
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersInvalid) { this->testMultiTiersInvalid(); }
2727
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersValid) { this->testMultiTiersValid(); }
2828
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersValidMixed) { this->testMultiTiersValidMixed(); }
2929
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersBackgroundMovers ) { this->testMultiTiersBackgroundMovers(); }
3030
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersRemoveDuringEviction) { this->testMultiTiersRemoveDuringEviction(); }
3131
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersReplaceDuringEviction) { this->testMultiTiersReplaceDuringEviction(); }
32+
TEST_F(LruAllocatorMemoryTiersTest, MultiTiersReplaceDuringEvictionWithReader) { this->testMultiTiersReplaceDuringEvictionWithReader(); }
3233

3334
} // end of namespace tests
3435
} // end of namespace cachelib

cachelib/allocator/tests/AllocatorMemoryTiersTest.h

Lines changed: 127 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
#include "cachelib/allocator/FreeThresholdStrategy.h"
2323
#include "cachelib/allocator/PromotionStrategy.h"
2424

25+
#include <fcntl.h>
26+
#include <unistd.h>
27+
#include <ctype.h>
28+
#include <semaphore.h>
2529
#include <folly/synchronization/Latch.h>
2630

2731
namespace facebook {
@@ -58,6 +62,7 @@ class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {
5862
ASSERT_NO_THROW(alloc->insertOrReplace(handle));
5963
}
6064
}
65+
6166
public:
6267
void testMultiTiersInvalid() {
6368
typename AllocatorT::Config config;
@@ -201,7 +206,7 @@ class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {
201206

202207
t->join();
203208
}
204-
209+
205210
void testMultiTiersReplaceDuringEviction() {
206211
std::unique_ptr<AllocatorT> alloc;
207212
PoolId pool;
@@ -234,6 +239,127 @@ class AllocatorMemoryTiersTest : public AllocatorTest<AllocatorT> {
234239
testMultiTiersAsyncOpDuringMove(alloc, pool, quit, moveCb);
235240

236241
t->join();
242+
243+
}
244+
245+
246+
void gdb_sync1() {}
247+
void gdb_sync2() {}
248+
void gdb_sync3() {}
249+
using ReadHandle = typename AllocatorT::ReadHandle;
250+
void testMultiTiersReplaceDuringEvictionWithReader() {
251+
sem_unlink ("/gdb1_sem");
252+
sem_t *sem = sem_open ("/gdb1_sem", O_CREAT | O_EXCL, S_IRUSR | S_IWUSR, 0);
253+
int gdbfd = open("/tmp/gdb1.gdb",O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR);
254+
char gdbcmds[] =
255+
"set attached=1\n"
256+
"break gdb_sync1\n"
257+
"break gdb_sync2\n"
258+
"break moveRegularItemWithSync\n"
259+
"c\n"
260+
"set scheduler-locking on\n"
261+
"thread 1\n"
262+
"c\n"
263+
"thread 4\n"
264+
"c\n"
265+
"thread 5\n"
266+
"break nativeFutexWaitImpl thread 5\n"
267+
"c\n"
268+
"thread 4\n"
269+
"break nativeFutexWaitImpl thread 4\n"
270+
"c\n"
271+
"thread 1\n"
272+
"break releaseBackToAllocator\n"
273+
"c\n"
274+
"c\n"
275+
"thread 5\n"
276+
"c\n"
277+
"thread 4\n"
278+
"c\n"
279+
"thread 1\n"
280+
"break gdb_sync3\n"
281+
"c\n"
282+
"quit\n";
283+
int ret = write(gdbfd,gdbcmds,strlen(gdbcmds));
284+
int ppid = getpid(); //parent pid
285+
//int pid = 0;
286+
int pid = fork();
287+
if (pid == 0) {
288+
sem_wait(sem);
289+
sem_close(sem);
290+
sem_unlink("/gdb1_sem");
291+
char cmdpid[256];
292+
sprintf(cmdpid,"%d",ppid);
293+
int f = execlp("gdb","gdb","--pid",cmdpid,"--batch-silent","--command=/tmp/gdb1.gdb",(char*) 0);
294+
ASSERT(f != -1);
295+
}
296+
sem_post(sem);
297+
//wait for gdb to run
298+
int attached = 0;
299+
while (attached == 0);
300+
301+
std::unique_ptr<AllocatorT> alloc;
302+
PoolId pool;
303+
bool quit = false;
304+
305+
typename AllocatorT::Config config;
306+
config.setCacheSize(4 * Slab::kSize);
307+
config.enableCachePersistence("/tmp");
308+
config.configureMemoryTiers({
309+
MemoryTierCacheConfig::fromShm()
310+
.setRatio(1).setMemBind(std::string("0")),
311+
MemoryTierCacheConfig::fromShm()
312+
.setRatio(1).setMemBind(std::string("0"))
313+
});
314+
315+
alloc = std::make_unique<AllocatorT>(AllocatorT::SharedMemNew, config);
316+
ASSERT(alloc != nullptr);
317+
pool = alloc->addPool("default", alloc->getCacheMemoryStats().ramCacheSize);
318+
319+
int i = 0;
320+
typename AllocatorT::Item* evicted;
321+
std::unique_ptr<std::thread> t;
322+
std::unique_ptr<std::thread> r;
323+
while(!quit) {
324+
auto handle = alloc->allocate(pool, std::to_string(++i), std::string("value").size());
325+
ASSERT(handle != nullptr);
326+
if (i == 1) {
327+
evicted = static_cast<typename AllocatorT::Item*>(handle.get());
328+
folly::Latch latch_t(1);
329+
t = std::make_unique<std::thread>([&](){
330+
auto handleNew = alloc->allocate(pool, std::to_string(1), std::string("new value").size());
331+
ASSERT(handleNew != nullptr);
332+
latch_t.count_down();
333+
//first breakpoint will be this one because
334+
//thread 1 still has more items to fill up the
335+
//cache before an evict is evicted
336+
gdb_sync1();
337+
ASSERT(evicted->isMoving());
338+
//need to suspend thread 1 - who is doing the eviction
339+
//gdb will do this for us
340+
folly::Latch latch(1);
341+
r = std::make_unique<std::thread>([&](){
342+
ASSERT(evicted->isMoving());
343+
latch.count_down();
344+
auto handleEvict = alloc->find(std::to_string(1));
345+
//does find block until done moving?? yes
346+
while (evicted->isMarkedForEviction()); //move will fail
347+
XDCHECK(handleEvict == nullptr) << handleEvict->toString();
348+
ASSERT(handleEvict == nullptr);
349+
});
350+
latch.wait();
351+
gdb_sync2();
352+
alloc->insertOrReplace(handleNew);
353+
ASSERT(!evicted->isAccessible()); //move failed
354+
quit = true;
355+
});
356+
latch_t.wait();
357+
}
358+
ASSERT_NO_THROW(alloc->insertOrReplace(handle));
359+
}
360+
t->join();
361+
r->join();
362+
gdb_sync3();
237363
}
238364
};
239365
} // namespace tests

0 commit comments

Comments
 (0)