Skip to content

Commit 8938190

Browse files
committed
Use moving bit for synchronization in findEviction
moving bit is used to give exclusive right to evict the item to a particular thread. Originially, there was an assumption that whoever marked the item as moving will try to free it until he succeeds. Since we don't want to do that in findEviction (potentially can take a long time) we need to make sure that unmarking is safe. This patch checks the flags after unmarking (atomically) and if ref is zero it also recyles the item. This is needed as there might be some concurrent thread releasing the item (and decrementing ref count). If moving bit is set, that thread would not free the memory back to allocator, resulting in memory leak on unmarkMoving().
1 parent ec20142 commit 8938190

File tree

7 files changed

+80
-188
lines changed

7 files changed

+80
-188
lines changed

cachelib/allocator/CacheAllocator-inl.h

Lines changed: 66 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -1186,14 +1186,13 @@ bool CacheAllocator<CacheTrait>::addWaitContextForMovingItem(
11861186
}
11871187

11881188
template <typename CacheTrait>
1189-
template <typename ItemPtr>
11901189
typename CacheAllocator<CacheTrait>::ItemHandle
11911190
CacheAllocator<CacheTrait>::moveRegularItemOnEviction(
1192-
ItemPtr& oldItemPtr, ItemHandle& newItemHdl) {
1191+
Item& oldItem, ItemHandle& newItemHdl) {
1192+
XDCHECK(oldItem.isMoving());
11931193
// TODO: should we introduce new latency tracker. E.g. evictRegularLatency_
11941194
// ??? util::LatencyTracker tracker{stats_.evictRegularLatency_};
11951195

1196-
Item& oldItem = *oldItemPtr;
11971196
if (!oldItem.isAccessible() || oldItem.isExpired()) {
11981197
return {};
11991198
}
@@ -1249,7 +1248,7 @@ CacheAllocator<CacheTrait>::moveRegularItemOnEviction(
12491248
// it is unsafe to replace the old item with a new one, so we should
12501249
// also abort.
12511250
if (!accessContainer_->replaceIf(oldItem, *newItemHdl,
1252-
itemEvictionPredicate)) {
1251+
itemMovingPredicate)) {
12531252
return {};
12541253
}
12551254

@@ -1269,7 +1268,7 @@ CacheAllocator<CacheTrait>::moveRegularItemOnEviction(
12691268

12701269
// Inside the MM container's lock, this checks if the old item exists to
12711270
// make sure that no other thread removed it, and only then replaces it.
1272-
if (!replaceInMMContainer(oldItemPtr, *newItemHdl)) {
1271+
if (!replaceInMMContainer(oldItem, *newItemHdl)) {
12731272
accessContainer_->remove(*newItemHdl);
12741273
return {};
12751274
}
@@ -1459,20 +1458,15 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
14591458
itr) {
14601459
++searchTries;
14611460

1462-
Item* candidate = itr.get();
1463-
1464-
// make sure no other thead is evicting the item
1465-
if (candidate->getRefCount() != 0) {
1466-
++itr;
1467-
continue;
1468-
}
1461+
Item* toRecycle = itr.get();
14691462

1470-
if (candidate->isChainedItem()) {
1471-
candidate = &candidate->asChainedItem().getParentItem(compressor_);
1472-
}
1463+
Item* candidate =
1464+
toRecycle->isChainedItem()
1465+
? &toRecycle->asChainedItem().getParentItem(compressor_)
1466+
: toRecycle;
14731467

1474-
auto toReleaseHandle = accessContainer_->find(*candidate);
1475-
if (!toReleaseHandle || !toReleaseHandle.isReady()) {
1468+
// make sure no other thead is evicting the item
1469+
if (candidate->getRefCount() != 0 || !candidate->markMoving()) {
14761470
++itr;
14771471
continue;
14781472
}
@@ -1482,52 +1476,57 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
14821476
// for chained items, the ownership of the parent can change. We try to
14831477
// evict what we think as parent and see if the eviction of parent
14841478
// recycles the child we intend to.
1485-
1486-
bool movedToNextTier = false;
1487-
bool evicted = false;
1488-
{
1489-
auto h = tryEvictToNextMemoryTier(tid, pid, candidate);
1490-
if (h) {
1491-
movedToNextTier = true;
1492-
evicted = true;
1493-
h.release();
1494-
toReleaseHandle.release();
1495-
1496-
decRef(*candidate);
1497-
}
1498-
}
1499-
if(!movedToNextTier) {
1500-
evicted = tryEvictItem(mmContainer, std::move(toReleaseHandle));
1501-
}
1502-
if (evicted) {
1479+
auto toReleaseHandle =
1480+
evictNormalItem(*candidate, true /* skipIfTokenInvalid */);
1481+
auto ref = candidate->unmarkMoving();
1482+
1483+
if (toReleaseHandle || ref == 0u) {
15031484
if (candidate->hasChainedItem()) {
15041485
(*stats_.chainedItemEvictions)[pid][cid].inc();
15051486
} else {
15061487
(*stats_.regularItemEvictions)[pid][cid].inc();
15071488
}
1489+
} else {
1490+
if (candidate->hasChainedItem()) {
1491+
stats_.evictFailParentAC.inc();
1492+
} else {
1493+
stats_.evictFailAC.inc();
1494+
}
1495+
}
1496+
1497+
if (toReleaseHandle) {
1498+
XDCHECK(toReleaseHandle.get() == candidate);
1499+
XDCHECK(toRecycle == candidate || toRecycle->isChainedItem());
1500+
XDCHECK_EQ(1u, toReleaseHandle->getRefCount());
15081501

1509-
XDCHECK_EQ(1u, candidate->getRefCount());
1502+
// We manually release the item here because we don't want to
1503+
// invoke the Item Handle's destructor which will be decrementing
1504+
// an already zero refcount, which will throw exception
1505+
auto& itemToRelease = *toReleaseHandle.release();
15101506

15111507
// Decrementing the refcount because we want to recycle the item
1512-
const auto ref = decRef(*candidate);
1508+
const auto ref = decRef(itemToRelease);
15131509
XDCHECK_EQ(0u, ref);
15141510

15151511
// check if by releasing the item we intend to, we actually
15161512
// recycle the candidate.
1513+
if (ReleaseRes::kRecycled ==
1514+
releaseBackToAllocator(itemToRelease, RemoveContext::kEviction,
1515+
/* isNascent */ false, toRecycle)) {
1516+
return toRecycle;
1517+
}
1518+
} else if (ref == 0u) {
1519+
// it's safe to recycle the item here as there are no more
1520+
// references and the item could not been marked as moving
1521+
// by other thread since it's detached from MMContainer.
15171522
if (ReleaseRes::kRecycled ==
15181523
releaseBackToAllocator(*candidate, RemoveContext::kEviction,
1519-
/* isNascent */ movedToNextTier, candidate)) {
1520-
return candidate;
1524+
/* isNascent */ false, toRecycle)) {
1525+
return toRecycle;
15211526
}
15221527
}
15231528

1524-
// If we destroyed the itr to possibly evict and failed, we restart
1525-
// from the beginning again
1526-
if (!itr) {
1527-
itr.resetToBegin();
1528-
for (int i = 0; i < searchTries; i++)
1529-
++itr;
1530-
}
1529+
itr.resetToBegin();
15311530
}
15321531
return nullptr;
15331532
}
@@ -1581,24 +1580,23 @@ bool CacheAllocator<CacheTrait>::shouldWriteToNvmCacheExclusive(
15811580
}
15821581

15831582
template <typename CacheTrait>
1584-
template <typename ItemPtr>
15851583
typename CacheAllocator<CacheTrait>::WriteHandle
15861584
CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
1587-
TierId tid, PoolId pid, ItemPtr& item) {
1588-
if(item->isChainedItem()) return {}; // TODO: We do not support ChainedItem yet
1589-
if(item->isExpired()) return acquire(item);
1585+
TierId tid, PoolId pid, Item& item) {
1586+
if(item.isChainedItem()) return {}; // TODO: We do not support ChainedItem yet
1587+
if(item.isExpired()) return acquire(&item);
15901588

15911589
TierId nextTier = tid; // TODO - calculate this based on some admission policy
15921590
while (++nextTier < numTiers_) { // try to evict down to the next memory tiers
15931591
// allocateInternal might trigger another eviction
15941592
auto newItemHdl = allocateInternalTier(nextTier, pid,
1595-
item->getKey(),
1596-
item->getSize(),
1597-
item->getCreationTime(),
1598-
item->getExpiryTime());
1593+
item.getKey(),
1594+
item.getSize(),
1595+
item.getCreationTime(),
1596+
item.getExpiryTime());
15991597

16001598
if (newItemHdl) {
1601-
XDCHECK_EQ(newItemHdl->getSize(), item->getSize());
1599+
XDCHECK_EQ(newItemHdl->getSize(), item.getSize());
16021600

16031601
return moveRegularItemOnEviction(item, newItemHdl);
16041602
}
@@ -1609,83 +1607,10 @@ CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
16091607

16101608
template <typename CacheTrait>
16111609
typename CacheAllocator<CacheTrait>::WriteHandle
1612-
CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(Item* item) {
1613-
auto tid = getTierId(*item);
1614-
auto pid = allocator_[tid]->getAllocInfo(item->getMemory()).poolId;
1615-
return tryEvictToNextMemoryTier(tid, pid, item);
1616-
}
1617-
1618-
template <typename CacheTrait>
1619-
bool CacheAllocator<CacheTrait>::tryEvictItem(MMContainer& mmContainer,
1620-
WriteHandle &&handle) {
1621-
auto &item = *handle;
1622-
1623-
// we should flush this to nvmcache if it is not already present in nvmcache
1624-
// and the item is not expired.
1625-
const bool evictToNvmCache = shouldWriteToNvmCache(item);
1626-
1627-
auto token = evictToNvmCache ? nvmCache_->createPutToken(item.getKey())
1628-
: typename NvmCacheT::PutToken{};
1629-
// record the in-flight eviciton. If not, we move on to next item to avoid
1630-
// stalling eviction.
1631-
if (evictToNvmCache && !token.isValid()) {
1632-
if (item.isChainedItem())
1633-
stats_.evictFailConcurrentFill.inc();
1634-
else
1635-
stats_.evictFailConcurrentFill.inc();
1636-
handle.release();
1637-
decRef(item);
1638-
return false;
1639-
}
1640-
1641-
// If there are other accessors, we should abort. We should be the only
1642-
// handle owner.
1643-
auto evictHandle = accessContainer_->removeIf(item, &itemEvictionPredicate);
1644-
1645-
/* We got new handle so it's safe to get rid of the old one. */
1646-
handle.release();
1647-
decRef(item);
1648-
1649-
if (!evictHandle) {
1650-
if (item.isChainedItem())
1651-
stats_.evictFailParentAC.inc();
1652-
else
1653-
stats_.evictFailAC.inc();
1654-
return false;
1655-
}
1656-
1657-
auto removed = mmContainer.remove(item);
1658-
XDCHECK(removed);
1659-
XDCHECK_EQ(reinterpret_cast<uintptr_t>(evictHandle.get()),
1660-
reinterpret_cast<uintptr_t>(&item));
1661-
XDCHECK(!evictHandle->isInMMContainer());
1662-
XDCHECK(!evictHandle->isAccessible());
1663-
1664-
// If the item is now marked as moving, that means its corresponding slab is
1665-
// being released right now. So, we look for the next item that is eligible
1666-
// for eviction. It is safe to destroy the handle here since the moving bit
1667-
// is set.
1668-
if (evictHandle->isMoving()) {
1669-
if (item.isChainedItem())
1670-
stats_.evictFailParentMove.inc();
1671-
else
1672-
stats_.evictFailMove.inc();
1673-
return false;
1674-
}
1675-
1676-
// Ensure that there are no accessors after removing from the access
1677-
// container.
1678-
XDCHECK(evictHandle->getRefCount() == 1);
1679-
1680-
if (evictToNvmCache && shouldWriteToNvmCacheExclusive(item)) {
1681-
XDCHECK(token.isValid());
1682-
nvmCache_->put(evictHandle, std::move(token));
1683-
}
1684-
1685-
/* Release the handle, item will be reused by the called. */
1686-
evictHandle.release();
1687-
1688-
return true;
1610+
CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(Item& item) {
1611+
auto tid = getTierId(item);
1612+
auto pid = allocator_[tid]->getAllocInfo(item.getMemory()).poolId;
1613+
return tryEvictToNextMemoryTier(tid, pid, item);
16891614
}
16901615

16911616
template <typename CacheTrait>
@@ -2885,7 +2810,7 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(
28852810
auto owningHandle =
28862811
item.isChainedItem()
28872812
? evictChainedItemForSlabRelease(item.asChainedItem())
2888-
: evictNormalItemForSlabRelease(item);
2813+
: evictNormalItem(item);
28892814

28902815
// we managed to evict the corresponding owner of the item and have the
28912816
// last handle for the owner.
@@ -2942,14 +2867,15 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(
29422867

29432868
template <typename CacheTrait>
29442869
typename CacheAllocator<CacheTrait>::ItemHandle
2945-
CacheAllocator<CacheTrait>::evictNormalItemForSlabRelease(Item& item) {
2870+
CacheAllocator<CacheTrait>::evictNormalItem(Item& item,
2871+
bool skipIfTokenInvalid) {
29462872
XDCHECK(item.isMoving());
29472873

29482874
if (item.isOnlyMoving()) {
29492875
return ItemHandle{};
29502876
}
29512877

2952-
auto evictHandle = tryEvictToNextMemoryTier(&item);
2878+
auto evictHandle = tryEvictToNextMemoryTier(item);
29532879
if(evictHandle) return evictHandle;
29542880

29552881
auto predicate = [](const Item& it) { return it.getRefCount() == 0; };
@@ -2958,6 +2884,11 @@ CacheAllocator<CacheTrait>::evictNormalItemForSlabRelease(Item& item) {
29582884
auto token = evictToNvmCache ? nvmCache_->createPutToken(item.getKey())
29592885
: typename NvmCacheT::PutToken{};
29602886

2887+
if (skipIfTokenInvalid && evictToNvmCache && !token.isValid()) {
2888+
stats_.evictFailConcurrentFill.inc();
2889+
return ItemHandle{};
2890+
}
2891+
29612892
// We remove the item from both access and mm containers. It doesn't matter
29622893
// if someone else calls remove on the item at this moment, the item cannot
29632894
// be freed as long as we have the moving bit set.

cachelib/allocator/CacheAllocator.h

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,8 +1401,7 @@ class CacheAllocator : public CacheBase {
14011401
//
14021402
// @return true If the move was completed, and the containers were updated
14031403
// successfully.
1404-
template <typename ItemPtr>
1405-
ItemHandle moveRegularItemOnEviction(ItemPtr& oldItem, ItemHandle& newItemHdl);
1404+
ItemHandle moveRegularItemOnEviction(Item& oldItem, ItemHandle& newItemHdl);
14061405

14071406
// Moves a regular item to a different slab. This should only be used during
14081407
// slab release after the item's moving bit has been set. The user supplied
@@ -1561,14 +1560,6 @@ class CacheAllocator : public CacheBase {
15611560
// @return An evicted item or nullptr if there is no suitable candidate.
15621561
Item* findEviction(TierId tid, PoolId pid, ClassId cid);
15631562

1564-
// Try to evict a regular item.
1565-
//
1566-
// @param mmContainer the container to look for evictions.
1567-
// @param item item to evict
1568-
//
1569-
// @return whther eviction succeeded
1570-
bool tryEvictItem(MMContainer& mmContainer, WriteHandle&& handle);
1571-
15721563
// Try to move the item down to the next memory tier
15731564
//
15741565
// @param tid current tier ID of the item
@@ -1577,16 +1568,15 @@ class CacheAllocator : public CacheBase {
15771568
//
15781569
// @return valid handle to the item. This will be the last
15791570
// handle to the item. On failure an empty handle.
1580-
template <typename ItemPtr>
1581-
WriteHandle tryEvictToNextMemoryTier(TierId tid, PoolId pid, ItemPtr& item);
1571+
WriteHandle tryEvictToNextMemoryTier(TierId tid, PoolId pid, Item& item);
15821572

15831573
// Try to move the item down to the next memory tier
15841574
//
15851575
// @param item the item to evict
15861576
//
15871577
// @return valid handle to the item. This will be the last
15881578
// handle to the item. On failure an empty handle.
1589-
WriteHandle tryEvictToNextMemoryTier(Item* item);
1579+
WriteHandle tryEvictToNextMemoryTier(Item& item);
15901580

15911581
// Deserializer CacheAllocatorMetadata and verify the version
15921582
//
@@ -1713,7 +1703,7 @@ class CacheAllocator : public CacheBase {
17131703
//
17141704
// @return last handle for corresponding to item on success. empty handle on
17151705
// failure. caller can retry if needed.
1716-
ItemHandle evictNormalItemForSlabRelease(Item& item);
1706+
ItemHandle evictNormalItem(Item& item, bool skipIfTokenInvalid = false);
17171707

17181708
// Helper function to evict a child item for slab release
17191709
// As a side effect, the parent item is also evicted
@@ -1834,10 +1824,6 @@ class CacheAllocator : public CacheBase {
18341824
return item.getRefCount() == 0;
18351825
}
18361826

1837-
static bool itemEvictionPredicate(const Item& item) {
1838-
return item.getRefCount() == 1 && !item.isMoving();
1839-
}
1840-
18411827
static bool itemExpiryPredicate(const Item& item) {
18421828
return item.getRefCount() == 1 && item.isExpired();
18431829
}

cachelib/allocator/CacheItem-inl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,8 @@ bool CacheItem<CacheTrait>::markMoving() noexcept {
229229
}
230230

231231
template <typename CacheTrait>
232-
void CacheItem<CacheTrait>::unmarkMoving() noexcept {
233-
ref_.unmarkMoving();
232+
RefcountWithFlags::Value CacheItem<CacheTrait>::unmarkMoving() noexcept {
233+
return ref_.unmarkMoving();
234234
}
235235

236236
template <typename CacheTrait>

cachelib/allocator/CacheItem.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ class CACHELIB_PACKED_ATTR CacheItem {
378378
* Unmarking moving does not depend on `isInMMContainer`
379379
*/
380380
bool markMoving() noexcept;
381-
void unmarkMoving() noexcept;
381+
RefcountWithFlags::Value unmarkMoving() noexcept;
382382
bool isMoving() const noexcept;
383383
bool isOnlyMoving() const noexcept;
384384

0 commit comments

Comments
 (0)