Skip to content

Commit d57dba8

Browse files
committed
Use moving bit for synchronization in findEviction
moving bit is used to give exclusive right to evict the item to a particular thread. Originially, there was an assumption that whoever marked the item as moving will try to free it until he succeeds. Since we don't want to do that in findEviction (potentially can take a long time) we need to make sure that unmarking is safe. This patch checks the flags after unmarking (atomically) and if ref is zero it also recyles the item. This is needed as there might be some concurrent thread releasing the item (and decrementing ref count). If moving bit is set, that thread would not free the memory back to allocator, resulting in memory leak on unmarkMoving().
1 parent fdb2163 commit d57dba8

File tree

7 files changed

+60
-139
lines changed

7 files changed

+60
-139
lines changed

cachelib/allocator/CacheAllocator-inl.h

Lines changed: 49 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,20 +1225,15 @@ CacheAllocator<CacheTrait>::findEviction(PoolId pid, ClassId cid) {
12251225
itr) {
12261226
++searchTries;
12271227

1228-
Item* candidate = itr.get();
1228+
Item* toRecycle = itr.get();
12291229

1230-
// make sure no other thead is evicting the item
1231-
if (candidate->getRefCount() != 0 || candidate->isMoving()) {
1232-
++itr;
1233-
continue;
1234-
}
1235-
1236-
if (candidate->isChainedItem()) {
1237-
candidate = &candidate->asChainedItem().getParentItem(compressor_);
1238-
}
1230+
Item* candidate =
1231+
toRecycle->isChainedItem()
1232+
? &toRecycle->asChainedItem().getParentItem(compressor_)
1233+
: toRecycle;
12391234

1240-
auto toReleaseHandle = accessContainer_->find(*candidate);
1241-
if (!toReleaseHandle || !toReleaseHandle.isReady()) {
1235+
// make sure no other thead is evicting the item
1236+
if (candidate->getRefCount() != 0 || !candidate->markMoving()) {
12421237
++itr;
12431238
continue;
12441239
}
@@ -1248,8 +1243,11 @@ CacheAllocator<CacheTrait>::findEviction(PoolId pid, ClassId cid) {
12481243
// for chained items, the ownership of the parent can change. We try to
12491244
// evict what we think as parent and see if the eviction of parent
12501245
// recycles the child we intend to.
1251-
bool evicted = tryEvictItem(mmContainer, std::move(toReleaseHandle));
1252-
if (evicted) {
1246+
auto toReleaseHandle =
1247+
evictNormalItem(*candidate, true /* skipIfTokenInvalid */);
1248+
auto ref = candidate->unmarkMoving();
1249+
1250+
if (toReleaseHandle || ref == 0u) {
12531251
if (candidate->hasChainedItem()) {
12541252
(*stats_.chainedItemEvictions)[pid][cid].inc();
12551253
} else {
@@ -1262,17 +1260,43 @@ CacheAllocator<CacheTrait>::findEviction(PoolId pid, ClassId cid) {
12621260
AllocatorApiResult::DRAM_EVICTED, toReleaseHandle->getSize(),
12631261
toReleaseHandle->getConfiguredTTL().count());
12641262
}
1263+
} else {
1264+
if (candidate->hasChainedItem()) {
1265+
stats_.evictFailParentAC.inc();
1266+
} else {
1267+
stats_.evictFailAC.inc();
1268+
}
1269+
}
1270+
1271+
if (toReleaseHandle) {
1272+
XDCHECK(toReleaseHandle.get() == candidate);
1273+
XDCHECK(toRecycle == candidate || toRecycle->isChainedItem());
1274+
XDCHECK_EQ(1u, toReleaseHandle->getRefCount());
1275+
1276+
// We manually release the item here because we don't want to
1277+
// invoke the Item Handle's destructor which will be decrementing
1278+
// an already zero refcount, which will throw exception
1279+
auto& itemToRelease = *toReleaseHandle.release();
12651280

12661281
// Decrementing the refcount because we want to recycle the item
1267-
const auto ref = decRef(*candidate);
1282+
const auto ref = decRef(itemToRelease);
12681283
XDCHECK_EQ(0u, ref);
12691284

12701285
// check if by releasing the item we intend to, we actually
12711286
// recycle the candidate.
1287+
if (ReleaseRes::kRecycled ==
1288+
releaseBackToAllocator(itemToRelease, RemoveContext::kEviction,
1289+
/* isNascent */ false, toRecycle)) {
1290+
return toRecycle;
1291+
}
1292+
} else if (ref == 0u) {
1293+
// it's safe to recycle the item here as there are no more
1294+
// references and the item could not been marked as moving
1295+
// by other thread since it's detached from MMContainer.
12721296
if (ReleaseRes::kRecycled ==
12731297
releaseBackToAllocator(*candidate, RemoveContext::kEviction,
1274-
/* isNascent */ false, candidate)) {
1275-
return candidate;
1298+
/* isNascent */ false, toRecycle)) {
1299+
return toRecycle;
12761300
}
12771301
}
12781302

@@ -1329,77 +1353,6 @@ bool CacheAllocator<CacheTrait>::shouldWriteToNvmCacheExclusive(
13291353
return true;
13301354
}
13311355

1332-
template <typename CacheTrait>
1333-
bool CacheAllocator<CacheTrait>::tryEvictItem(MMContainer& mmContainer,
1334-
WriteHandle&& handle) {
1335-
auto& item = *handle;
1336-
1337-
// we should flush this to nvmcache if it is not already present in nvmcache
1338-
// and the item is not expired.
1339-
const bool evictToNvmCache = shouldWriteToNvmCache(item);
1340-
1341-
auto token = evictToNvmCache ? nvmCache_->createPutToken(item.getKey())
1342-
: typename NvmCacheT::PutToken{};
1343-
// record the in-flight eviciton. If not, we move on to next item to avoid
1344-
// stalling eviction.
1345-
if (evictToNvmCache && !token.isValid()) {
1346-
if (item.isChainedItem())
1347-
stats_.evictFailConcurrentFill.inc();
1348-
else
1349-
stats_.evictFailConcurrentFill.inc();
1350-
handle.reset();
1351-
return false;
1352-
}
1353-
1354-
// If there are other accessors, we should abort. We should be the only
1355-
// handle owner.
1356-
auto evictHandle = accessContainer_->removeIf(item, &itemEvictionPredicate);
1357-
1358-
/* We got new handle so it's safe to get rid of the old one. */
1359-
handle.reset();
1360-
1361-
if (!evictHandle) {
1362-
if (item.isChainedItem())
1363-
stats_.evictFailParentAC.inc();
1364-
else
1365-
stats_.evictFailAC.inc();
1366-
return false;
1367-
}
1368-
1369-
auto removed = mmContainer.remove(item);
1370-
XDCHECK(removed);
1371-
XDCHECK_EQ(reinterpret_cast<uintptr_t>(evictHandle.get()),
1372-
reinterpret_cast<uintptr_t>(&item));
1373-
XDCHECK(!evictHandle->isInMMContainer());
1374-
XDCHECK(!evictHandle->isAccessible());
1375-
1376-
// If the item is now marked as moving, that means its corresponding slab is
1377-
// being released right now. So, we look for the next item that is eligible
1378-
// for eviction. It is safe to destroy the handle here since the moving bit
1379-
// is set.
1380-
if (evictHandle->isMoving()) {
1381-
if (item.isChainedItem())
1382-
stats_.evictFailParentMove.inc();
1383-
else
1384-
stats_.evictFailMove.inc();
1385-
return false;
1386-
}
1387-
1388-
// Ensure that there are no accessors after removing from the access
1389-
// container.
1390-
XDCHECK(evictHandle->getRefCount() == 1);
1391-
1392-
if (evictToNvmCache && shouldWriteToNvmCacheExclusive(item)) {
1393-
XDCHECK(token.isValid());
1394-
nvmCache_->put(evictHandle, std::move(token));
1395-
}
1396-
1397-
/* Release the handle, item will be reused by the called. */
1398-
evictHandle.release();
1399-
1400-
return true;
1401-
}
1402-
14031356
template <typename CacheTrait>
14041357
typename CacheAllocator<CacheTrait>::RemoveRes
14051358
CacheAllocator<CacheTrait>::remove(typename Item::Key key) {
@@ -2589,7 +2542,7 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(
25892542
auto owningHandle =
25902543
item.isChainedItem()
25912544
? evictChainedItemForSlabRelease(item.asChainedItem())
2592-
: evictNormalItemForSlabRelease(item);
2545+
: evictNormalItem(item);
25932546

25942547
// we managed to evict the corresponding owner of the item and have the
25952548
// last handle for the owner.
@@ -2646,7 +2599,8 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(
26462599

26472600
template <typename CacheTrait>
26482601
typename CacheAllocator<CacheTrait>::WriteHandle
2649-
CacheAllocator<CacheTrait>::evictNormalItemForSlabRelease(Item& item) {
2602+
CacheAllocator<CacheTrait>::evictNormalItem(Item& item,
2603+
bool skipIfTokenInvalid) {
26502604
XDCHECK(item.isMoving());
26512605

26522606
if (item.isOnlyMoving()) {
@@ -2659,6 +2613,11 @@ CacheAllocator<CacheTrait>::evictNormalItemForSlabRelease(Item& item) {
26592613
auto token = evictToNvmCache ? nvmCache_->createPutToken(item.getKey())
26602614
: typename NvmCacheT::PutToken{};
26612615

2616+
if (skipIfTokenInvalid && evictToNvmCache && !token.isValid()) {
2617+
stats_.evictFailConcurrentFill.inc();
2618+
return WriteHandle{};
2619+
}
2620+
26622621
// We remove the item from both access and mm containers. It doesn't matter
26632622
// if someone else calls remove on the item at this moment, the item cannot
26642623
// be freed as long as we have the moving bit set.

cachelib/allocator/CacheAllocator.h

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1585,15 +1585,6 @@ class CacheAllocator : public CacheBase {
15851585

15861586
using EvictionIterator = typename MMContainer::Iterator;
15871587

1588-
// Try to evict an item.
1589-
//
1590-
// @param mmContainer the container to look for evictions.
1591-
// @param handle handle to item to evict. eviction will fail if
1592-
// this is not an owning handle.
1593-
//
1594-
// @return whether eviction succeeded
1595-
bool tryEvictItem(MMContainer& mmContainer, WriteHandle&& handle);
1596-
15971588
// Deserializer CacheAllocatorMetadata and verify the version
15981589
//
15991590
// @param deserializer Deserializer object
@@ -1719,7 +1710,7 @@ class CacheAllocator : public CacheBase {
17191710
//
17201711
// @return last handle for corresponding to item on success. empty handle on
17211712
// failure. caller can retry if needed.
1722-
WriteHandle evictNormalItemForSlabRelease(Item& item);
1713+
WriteHandle evictNormalItem(Item& item, bool skipIfTokenInvalid = false);
17231714

17241715
// Helper function to evict a child item for slab release
17251716
// As a side effect, the parent item is also evicted
@@ -1839,10 +1830,6 @@ class CacheAllocator : public CacheBase {
18391830
return item.getRefCount() == 0;
18401831
}
18411832

1842-
static bool itemEvictionPredicate(const Item& item) {
1843-
return item.getRefCount() == 1 && !item.isMoving();
1844-
}
1845-
18461833
static bool itemExpiryPredicate(const Item& item) {
18471834
return item.getRefCount() == 1 && item.isExpired();
18481835
}

cachelib/allocator/CacheItem-inl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,8 +219,8 @@ bool CacheItem<CacheTrait>::markMoving() noexcept {
219219
}
220220

221221
template <typename CacheTrait>
222-
void CacheItem<CacheTrait>::unmarkMoving() noexcept {
223-
ref_.unmarkMoving();
222+
RefcountWithFlags::Value CacheItem<CacheTrait>::unmarkMoving() noexcept {
223+
return ref_.unmarkMoving();
224224
}
225225

226226
template <typename CacheTrait>

cachelib/allocator/CacheItem.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ class CACHELIB_PACKED_ATTR CacheItem {
357357
* Unmarking moving does not depend on `isInMMContainer`
358358
*/
359359
bool markMoving() noexcept;
360-
void unmarkMoving() noexcept;
360+
RefcountWithFlags::Value unmarkMoving() noexcept;
361361
bool isMoving() const noexcept;
362362
bool isOnlyMoving() const noexcept;
363363

cachelib/allocator/ChainedHashTable-inl.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -450,21 +450,6 @@ typename T::Handle ChainedHashTable::Container<T, HookPtr, LockT>::removeIf(
450450
}
451451
}
452452

453-
template <typename T,
454-
typename ChainedHashTable::Hook<T> T::*HookPtr,
455-
typename LockT>
456-
typename T::Handle ChainedHashTable::Container<T, HookPtr, LockT>::find(
457-
T& node) const {
458-
const auto bucket = ht_.getBucket(node.getKey());
459-
auto l = locks_.lockShared(bucket);
460-
461-
if (!node.isAccessible()) {
462-
return {};
463-
}
464-
465-
return handleMaker_(&node);
466-
}
467-
468453
template <typename T,
469454
typename ChainedHashTable::Hook<T> T::*HookPtr,
470455
typename LockT>

cachelib/allocator/ChainedHashTable.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -496,17 +496,6 @@ class ChainedHashTable {
496496
// creating this item handle.
497497
Handle find(Key key) const;
498498

499-
// returns a handle to specified node.
500-
//
501-
// @param node requested node
502-
//
503-
// @return handle to the node if find was sucessfull. returns a
504-
// null handle if the node was not in the container.
505-
//
506-
// @throw std::overflow_error is the maximum item refcount is execeeded by
507-
// creating this item handle.
508-
Handle find(T& node) const;
509-
510499
// for saving the state of the hash table
511500
//
512501
// precondition: serialization must happen without any reader or writer

cachelib/allocator/Refcount.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -247,10 +247,10 @@ class FOLLY_PACK_ATTR RefcountWithFlags {
247247
/**
248248
* The following four functions are used to track whether or not
249249
* an item is currently in the process of being moved. This happens during a
250-
* slab rebalance or resize operation.
250+
* slab rebalance or resize operation or during eviction.
251251
*
252-
* An item can only be marked moving when `isInMMContainer` returns true.
253-
* This operation is atomic.
252+
* An item can only be marked moving when `isInMMContainer` returns true and
253+
* the item is not yet marked as moving. This operation is atomic.
254254
*
255255
* User can also query if an item "isOnlyMoving". This returns true only
256256
* if the refcount is 0 and only the moving bit is set.
@@ -267,7 +267,8 @@ class FOLLY_PACK_ATTR RefcountWithFlags {
267267
Value curValue = __atomic_load_n(refPtr, __ATOMIC_RELAXED);
268268
while (true) {
269269
const bool flagSet = curValue & conditionBitMask;
270-
if (!flagSet) {
270+
const bool alreadyMoving = curValue & bitMask;
271+
if (!flagSet || alreadyMoving) {
271272
return false;
272273
}
273274

@@ -286,9 +287,9 @@ class FOLLY_PACK_ATTR RefcountWithFlags {
286287
}
287288
}
288289
}
289-
void unmarkMoving() noexcept {
290+
Value unmarkMoving() noexcept {
290291
Value bitMask = ~getAdminRef<kMoving>();
291-
__atomic_and_fetch(&refCount_, bitMask, __ATOMIC_ACQ_REL);
292+
return __atomic_and_fetch(&refCount_, bitMask, __ATOMIC_ACQ_REL) & kRefMask;
292293
}
293294
bool isMoving() const noexcept { return getRaw() & getAdminRef<kMoving>(); }
294295
bool isOnlyMoving() const noexcept {

0 commit comments

Comments
 (0)