@@ -1293,8 +1293,21 @@ size_t CacheAllocator<CacheTrait>::wakeUpWaitersLocked(folly::StringPiece key,
12931293}
12941294
12951295template <typename CacheTrait>
1296- void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
1296+ bool CacheAllocator<CacheTrait>::moveRegularItemWithSync(
12971297 Item& oldItem, WriteHandle& newItemHdl) {
1298+ // on function exit - the new item handle is no longer moving
1299+ // and other threads may access it - but in case where
1300+ // we failed to replace in access container we can give the
1301+ // new item back to the allocator
1302+ auto guard = folly::makeGuard ([&]() {
1303+ auto ref = newItemHdl->unmarkMoving ();
1304+ if (UNLIKELY (ref == 0 )) {
1305+ const auto res =
1306+ releaseBackToAllocator (*newItemHdl, RemoveContext::kNormal , false );
1307+ XDCHECK (res == ReleaseRes::kReleased );
1308+ }
1309+ });
1310+
12981311 XDCHECK (oldItem.isMoving ());
12991312 XDCHECK (!oldItem.isExpired ());
13001313 // TODO: should we introduce new latency tracker. E.g. evictRegularLatency_
@@ -1325,6 +1338,22 @@ void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
13251338
13261339 auto replaced = accessContainer_->replaceIf (oldItem, *newItemHdl,
13271340 predicate);
1341+ // another thread may have called insertOrReplace which could have
1342+ // marked this item as unaccessible causing the replaceIf
1343+ // in the access container to fail - in this case we want
1344+ // to abort the move since the item is no longer valid
1345+ if (!replaced) {
1346+ return false ;
1347+ }
1348+ // what if another thread calls insertOrReplace now when
1349+ // the item is moving and already replaced in the hash table?
1350+ // 1. it succeeds in updating the hash table - so there is
1351+ // no guarentee that isAccessible() is true
1352+ // 2. it will then try to remove from MM container
1353+ // - this operation will wait for newItemHdl to
1354+ // be unmarkedMoving via the waitContext
1355+ // 3. replaced handle is returned and eventually drops
1356+ // ref to 0 and the item is recycled back to allocator.
13281357
13291358 if (config_.moveCb ) {
13301359 // Execute the move callback. We cannot make any guarantees about the
@@ -1366,14 +1395,7 @@ void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
13661395 XDCHECK (newItemHdl->hasChainedItem ());
13671396 }
13681397 newItemHdl.unmarkNascent ();
1369- auto ref = newItemHdl->unmarkMoving ();
1370- // remove because there is a chance the new item was not
1371- // added to the access container
1372- if (UNLIKELY (ref == 0 )) {
1373- const auto res =
1374- releaseBackToAllocator (*newItemHdl, RemoveContext::kNormal , false );
1375- XDCHECK (res == ReleaseRes::kReleased );
1376- }
1398+ return true ;
13771399}
13781400
13791401template <typename CacheTrait>
@@ -1626,28 +1648,43 @@ CacheAllocator<CacheTrait>::getNextCandidate(TierId tid,
16261648 auto evictedToNext = lastTier ? nullptr
16271649 : tryEvictToNextMemoryTier (*candidate, false );
16281650 if (!evictedToNext) {
1629- if (!token.isValid ()) {
1651+ // if insertOrReplace was called during move
1652+ // then candidate will not be accessible (failed replace during tryEvict)
1653+ // - therefore this was why we failed to
1654+ // evict to the next tier and insertOrReplace
1655+ // will remove from NVM cache
1656+ // however, if candidate is accessible
1657+ // that means the allocation in the next
1658+ // tier failed - so we will continue to
1659+ // evict the item to NVM cache
1660+ bool failedToReplace = !candidate->isAccessible ();
1661+ if (!token.isValid () && !failedToReplace) {
16301662 token = createPutToken (*candidate);
16311663 }
1632- // tryEvictToNextMemoryTier should only fail if allocation of the new item fails
1633- // in that case, it should be still possible to mark item as exclusive.
1664+ // tryEvictToNextMemoryTier can fail if:
1665+ // a) allocation of the new item fails in that case,
1666+ // it should be still possible to mark item for eviction.
1667+ // b) another thread calls insertOrReplace and the item
1668+ // is no longer accessible
16341669 //
16351670 // in case that we are on the last tier, we whould have already marked
16361671 // as exclusive since we will not be moving the item to the next tier
16371672 // but rather just evicting all together, no need to
1638- // markExclusiveWhenMoving
1673+ // markForEvictionWhenMoving
16391674 auto ret = lastTier ? true : candidate->markForEvictionWhenMoving ();
16401675 XDCHECK (ret);
16411676
16421677 unlinkItemForEviction (*candidate);
1678+
1679+ if (token.isValid () && shouldWriteToNvmCacheExclusive (*candidate)
1680+ && !failedToReplace) {
1681+ nvmCache_->put (*candidate, std::move (token));
1682+ }
16431683 // wake up any readers that wait for the move to complete
16441684 // it's safe to do now, as we have the item marked exclusive and
16451685 // no other reader can be added to the waiters list
16461686 wakeUpWaiters (*candidate, {});
16471687
1648- if (token.isValid () && shouldWriteToNvmCacheExclusive (*candidate)) {
1649- nvmCache_->put (*candidate, std::move (token));
1650- }
16511688 } else {
16521689 XDCHECK (!evictedToNext->isMarkedForEviction () && !evictedToNext->isMoving ());
16531690 XDCHECK (!candidate->isMarkedForEviction () && !candidate->isMoving ());
@@ -1776,7 +1813,10 @@ CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
17761813
17771814 if (newItemHdl) {
17781815 XDCHECK_EQ (newItemHdl->getSize (), item.getSize ());
1779- moveRegularItemWithSync (item, newItemHdl);
1816+ if (!moveRegularItemWithSync (item, newItemHdl)) {
1817+ return WriteHandle{};
1818+ }
1819+ XDCHECK_EQ (newItemHdl->getKey (),item.getKey ());
17801820 item.unmarkMoving ();
17811821 return newItemHdl;
17821822 } else {
@@ -1815,7 +1855,9 @@ CacheAllocator<CacheTrait>::tryPromoteToNextMemoryTier(
18151855
18161856 if (newItemHdl) {
18171857 XDCHECK_EQ (newItemHdl->getSize (), item.getSize ());
1818- moveRegularItemWithSync (item, newItemHdl);
1858+ if (!moveRegularItemWithSync (item, newItemHdl)) {
1859+ return WriteHandle{};
1860+ }
18191861 item.unmarkMoving ();
18201862 return newItemHdl;
18211863 } else {
@@ -3175,9 +3217,23 @@ bool CacheAllocator<CacheTrait>::tryMovingForSlabRelease(
31753217 // TODO: add support for chained items
31763218 return false ;
31773219 } else {
3178- moveRegularItemWithSync (oldItem, newItemHdl);
3179- removeFromMMContainer (oldItem);
3180- return true ;
3220+ // move can fail if another thread calls insertOrReplace
3221+ // in this case oldItem is no longer valid (not accessible,
3222+ // it gets removed from MMContainer and evictForSlabRelease
3223+ // will send it back to the allocator
3224+ bool ret = moveRegularItemWithSync (oldItem, newItemHdl);
3225+ if (!ret) {
3226+ // we failed to move - newItemHdl was released back to allocator
3227+ // by the moveRegularItemWithSync but oldItem is not accessible
3228+ // and no longer valid - we need to clean it up here
3229+ XDCHECK (!oldItem.isAccessible ());
3230+ oldItem.markForEvictionWhenMoving ();
3231+ unlinkItemForEviction (oldItem);
3232+ wakeUpWaiters (oldItem, {});
3233+ } else {
3234+ removeFromMMContainer (oldItem);
3235+ }
3236+ return ret;
31813237 }
31823238 }
31833239}
0 commit comments