Skip to content

Commit f4e30a7

Browse files
authored
Fix race in acquire (#68)
The assumption for moving items was that once item is unmarked no one can add new waiters for that item. However, since incrementing item ref count was not done under the MoveMap lock, there was a race: item could have been unmarked right after incRef returned incFailedMoving.
1 parent 08bf0b4 commit f4e30a7

File tree

2 files changed

+24
-14
lines changed

2 files changed

+24
-14
lines changed

cachelib/allocator/CacheAllocator-inl.h

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,16 +1024,21 @@ CacheAllocator<CacheTrait>::acquire(Item* it) {
10241024
SCOPE_FAIL { stats_.numRefcountOverflow.inc(); };
10251025

10261026
// TODO: do not block incRef for child items to avoid deadlock
1027-
auto failIfMoving = getNumTiers() > 1 && !it->isChainedItem();
1028-
auto incRes = incRef(*it, failIfMoving);
1029-
if (LIKELY(incRes == RefcountWithFlags::incResult::incOk)) {
1030-
return WriteHandle{it, *this};
1031-
} else if (incRes == RefcountWithFlags::incResult::incFailedEviction){
1032-
// item is being evicted
1033-
return WriteHandle{};
1034-
} else {
1035-
// item is being moved - wait for completion
1036-
return handleWithWaitContextForMovingItem(*it);
1027+
const auto failIfMoving = getNumTiers() > 1 && !it->isChainedItem();
1028+
1029+
while (true) {
1030+
auto incRes = incRef(*it, failIfMoving);
1031+
if (LIKELY(incRes == RefcountWithFlags::incResult::incOk)) {
1032+
return WriteHandle{it, *this};
1033+
} else if (incRes == RefcountWithFlags::incResult::incFailedEviction){
1034+
// item is being evicted
1035+
return WriteHandle{};
1036+
} else {
1037+
// item is being moved - wait for completion
1038+
WriteHandle handle;
1039+
if (tryGetHandleWithWaitContextForMovingItem(*it, handle))
1040+
return handle;
1041+
}
10371042
}
10381043
}
10391044

@@ -1255,20 +1260,25 @@ CacheAllocator<CacheTrait>::insertOrReplace(const WriteHandle& handle) {
12551260
* 3. Wait until the moving thread will complete its job.
12561261
*/
12571262
template <typename CacheTrait>
1258-
typename CacheAllocator<CacheTrait>::WriteHandle
1259-
CacheAllocator<CacheTrait>::handleWithWaitContextForMovingItem(Item& item) {
1263+
bool
1264+
CacheAllocator<CacheTrait>::tryGetHandleWithWaitContextForMovingItem(Item& item, WriteHandle& handle) {
12601265
auto shard = getShardForKey(item.getKey());
12611266
auto& movesMap = getMoveMapForShard(shard);
12621267
{
12631268
auto lock = getMoveLockForShard(shard);
12641269

1270+
// item might have been evicted or moved before the lock was acquired
1271+
if (!item.isMoving())
1272+
return false;
1273+
12651274
WriteHandle hdl{*this};
12661275
auto waitContext = hdl.getItemWaitContext();
12671276

12681277
auto ret = movesMap.try_emplace(item.getKey(), std::make_unique<MoveCtx>());
12691278
ret.first->second->addWaiter(std::move(waitContext));
12701279

1271-
return hdl;
1280+
handle = std::move(hdl);
1281+
return true;
12721282
}
12731283
}
12741284

cachelib/allocator/CacheAllocator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2250,7 +2250,7 @@ auto& mmContainer = getMMContainer(tid, pid, cid);
22502250

22512251
size_t memoryTierSize(TierId tid) const;
22522252

2253-
WriteHandle handleWithWaitContextForMovingItem(Item& item);
2253+
bool tryGetHandleWithWaitContextForMovingItem(Item& item, WriteHandle& handle);
22542254

22552255
size_t wakeUpWaitersLocked(folly::StringPiece key, WriteHandle&& handle);
22562256

0 commit comments

Comments
 (0)