Skip to content

Commit 6abb498

Browse files
igchorbyrnedj
authored andcommitted
Fix race in acquire (#68)
The assumption for moving items was that once item is unmarked no one can add new waiters for that item. However, since incrementing item ref count was not done under the MoveMap lock, there was a race: item could have been unmarked right after incRef returned incFailedMoving.
1 parent 6203a95 commit 6abb498

File tree

2 files changed

+24
-14
lines changed

2 files changed

+24
-14
lines changed

cachelib/allocator/CacheAllocator-inl.h

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,16 +1023,21 @@ CacheAllocator<CacheTrait>::acquire(Item* it) {
10231023
SCOPE_FAIL { stats_.numRefcountOverflow.inc(); };
10241024

10251025
// TODO: do not block incRef for child items to avoid deadlock
1026-
auto failIfMoving = getNumTiers() > 1 && !it->isChainedItem();
1027-
auto incRes = incRef(*it, failIfMoving);
1028-
if (LIKELY(incRes == RefcountWithFlags::incResult::incOk)) {
1029-
return WriteHandle{it, *this};
1030-
} else if (incRes == RefcountWithFlags::incResult::incFailedEviction){
1031-
// item is being evicted
1032-
return WriteHandle{};
1033-
} else {
1034-
// item is being moved - wait for completion
1035-
return handleWithWaitContextForMovingItem(*it);
1026+
const auto failIfMoving = getNumTiers() > 1 && !it->isChainedItem();
1027+
1028+
while (true) {
1029+
auto incRes = incRef(*it, failIfMoving);
1030+
if (LIKELY(incRes == RefcountWithFlags::incResult::incOk)) {
1031+
return WriteHandle{it, *this};
1032+
} else if (incRes == RefcountWithFlags::incResult::incFailedEviction){
1033+
// item is being evicted
1034+
return WriteHandle{};
1035+
} else {
1036+
// item is being moved - wait for completion
1037+
WriteHandle handle;
1038+
if (tryGetHandleWithWaitContextForMovingItem(*it, handle))
1039+
return handle;
1040+
}
10361041
}
10371042
}
10381043

@@ -1254,20 +1259,25 @@ CacheAllocator<CacheTrait>::insertOrReplace(const WriteHandle& handle) {
12541259
* 3. Wait until the moving thread will complete its job.
12551260
*/
12561261
template <typename CacheTrait>
1257-
typename CacheAllocator<CacheTrait>::WriteHandle
1258-
CacheAllocator<CacheTrait>::handleWithWaitContextForMovingItem(Item& item) {
1262+
bool
1263+
CacheAllocator<CacheTrait>::tryGetHandleWithWaitContextForMovingItem(Item& item, WriteHandle& handle) {
12591264
auto shard = getShardForKey(item.getKey());
12601265
auto& movesMap = getMoveMapForShard(shard);
12611266
{
12621267
auto lock = getMoveLockForShard(shard);
12631268

1269+
// item might have been evicted or moved before the lock was acquired
1270+
if (!item.isMoving())
1271+
return false;
1272+
12641273
WriteHandle hdl{*this};
12651274
auto waitContext = hdl.getItemWaitContext();
12661275

12671276
auto ret = movesMap.try_emplace(item.getKey(), std::make_unique<MoveCtx>());
12681277
ret.first->second->addWaiter(std::move(waitContext));
12691278

1270-
return hdl;
1279+
handle = std::move(hdl);
1280+
return true;
12711281
}
12721282
}
12731283

cachelib/allocator/CacheAllocator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2306,7 +2306,7 @@ auto& mmContainer = getMMContainer(tid, pid, cid);
23062306

23072307
size_t memoryTierSize(TierId tid) const;
23082308

2309-
WriteHandle handleWithWaitContextForMovingItem(Item& item);
2309+
bool tryGetHandleWithWaitContextForMovingItem(Item& item, WriteHandle& handle);
23102310

23112311
size_t wakeUpWaitersLocked(folly::StringPiece key, WriteHandle&& handle);
23122312

0 commit comments

Comments
 (0)