From c572aa21d2cf3890706c55222696bd9e10d944d2 Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Wed, 8 Nov 2023 07:10:49 -0800 Subject: [PATCH 1/2] test case for acquire parent --- cachelib/allocator/CacheAllocator-inl.h | 4 +- cachelib/allocator/Handle.h | 5 +- .../allocator/tests/AllocatorTypeTest.cpp | 5 ++ cachelib/allocator/tests/BaseAllocatorTest.h | 75 +++++++++++++++++++ cachelib/common/Exceptions.h | 5 ++ 5 files changed, 91 insertions(+), 3 deletions(-) diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h index f9dfd24a30..84110a2048 100644 --- a/cachelib/allocator/CacheAllocator-inl.h +++ b/cachelib/allocator/CacheAllocator-inl.h @@ -862,13 +862,13 @@ CacheAllocator::releaseBackToAllocator(Item& it, headHandle.reset(); if (head == nullptr || &head->getParentItem(compressor_) != &it) { - throw std::runtime_error(folly::sformat( + throw exception::ChainedItemInvalid(folly::sformat( "Mismatch parent pointer. This should not happen. Key: {}", it.getKey())); } if (!chainedItemAccessContainer_->remove(*head)) { - throw std::runtime_error(folly::sformat( + throw exception::ChainedItemInvalid(folly::sformat( "Chained item associated with {} cannot be removed from hash table " "This should not happen here.", it.getKey())); diff --git a/cachelib/allocator/Handle.h b/cachelib/allocator/Handle.h index 11d2bed2be..9520b70c99 100644 --- a/cachelib/allocator/Handle.h +++ b/cachelib/allocator/Handle.h @@ -26,6 +26,7 @@ #include #include "cachelib/allocator/nvmcache/WaitContext.h" +#include "cachelib/common/Exceptions.h" namespace facebook { namespace cachelib { @@ -70,8 +71,10 @@ struct ReadHandleImpl { assert(alloc_ != nullptr); try { alloc_->release(it_, isNascent()); + } catch (const exception::ChainedItemInvalid& e) { + XDCHECK(false) << e.what(); } catch (const std::exception& e) { - XLOGF(CRITICAL, "Failed to release {:#10x} : {}", static_cast(it_), + XLOGF(CRITICAL, "Failed to release {} : {}", static_cast(it_), e.what()); } it_ = nullptr; diff --git a/cachelib/allocator/tests/AllocatorTypeTest.cpp b/cachelib/allocator/tests/AllocatorTypeTest.cpp index 2786946bc6..97ff04efea 100644 --- a/cachelib/allocator/tests/AllocatorTypeTest.cpp +++ b/cachelib/allocator/tests/AllocatorTypeTest.cpp @@ -292,6 +292,11 @@ TYPED_TEST(BaseAllocatorTest, TransferChainAfterMoving) { this->testTransferChainAfterMoving(); } +TYPED_TEST(BaseAllocatorTest, ChainedItemParentAcquireAfterMove) { + ASSERT_EXIT(this->testChainedItemParentAcquireAfterMoveLoop(), + testing::ExitedWithCode(0), ".*"); +} + TYPED_TEST(BaseAllocatorTest, AddAndPopChainedItemMultithread) { this->testAddAndPopChainedItemMultithread(); } diff --git a/cachelib/allocator/tests/BaseAllocatorTest.h b/cachelib/allocator/tests/BaseAllocatorTest.h index 4b25d0c5d3..f9dc647cc9 100644 --- a/cachelib/allocator/tests/BaseAllocatorTest.h +++ b/cachelib/allocator/tests/BaseAllocatorTest.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -2469,6 +2470,80 @@ class BaseAllocatorTest : public AllocatorTest { } } + // tests case that correct parent item is acquired after move + void testChainedItemParentAcquireAfterMoveLoop() { + // create an allocator worth 250 slabs + // first slab is for overhead, second is parent class + // third is chained item 1 and rest are for new chained item alloc + // to move to. + std::unique_ptr alloc; + typename AllocatorT::Config config; + config.configureChainedItems(); + config.setCacheSize(250 * Slab::kSize); + + const std::set allocSizes = {1024, 2048}; + auto sizes = std::vector{500, 1500}; + std::atomic numMoves{0}; + std::atomic numReplaces{0}; + PoolId pid; + + using Item = typename AllocatorT::Item; + config.enableMovingOnSlabRelease([&](Item& oldItem, Item& newItem, + Item* parentPtr) { + assert(oldItem.getSize() == newItem.getSize()); + assert(oldItem.isChainedItem()); + std::memcpy(newItem.getMemory(), oldItem.getMemory(), oldItem.getSize()); + folly::Latch latch(1); + auto insertThread = std::make_unique([&]() { + ASSERT_NO_THROW({ + auto parentReplacement = + alloc->allocate(pid, parentPtr->getKey(), sizes[0]); + Item* parentCopy = parentPtr; + latch.count_down(); + while (parentCopy->isMoving()) + ; + alloc->insertOrReplace(parentReplacement); + ++numReplaces; + }); + }); + insertThread->detach(); + latch.wait(); + ++numMoves; + }); + + alloc = std::make_unique(config); + + const size_t numBytes = alloc->getCacheMemoryStats().ramCacheSize; + const auto poolSize = numBytes; + pid = alloc->addPool("one", poolSize, allocSizes); + + auto allocFn = [&](std::string keyPrefix, std::vector sizes) { + for (unsigned int loop = 0; loop < 20; ++loop) { + for (unsigned int i = 0; i < 2048; ++i) { + const auto key = keyPrefix + folly::to(loop) + "_" + + folly::to(i); + auto itemHandle = + util::allocateAccessible(*alloc, pid, key, sizes[0]); + auto childItem = alloc->allocateChainedItem(itemHandle, sizes[1]); + ASSERT_NE(nullptr, childItem); + + alloc->addChainedItem(itemHandle, std::move(childItem)); + } + } + }; + allocFn(std::string{"yolo"}, sizes); + + ClassId cid = static_cast(1); + for (int i = 0; i < 20; i++) { + alloc->releaseSlab(pid, cid, SlabReleaseMode::kRebalance); + } + while (alloc->getSlabReleaseStats().numSlabReleaseForRebalance < 20) { + sleep(1); + } + // for ASSERT_EXIT + exit(0); + } + // create a chain of allocations, replace the allocation and ensure that the // order is preserved. void testChainedAllocsReplaceInChain() { diff --git a/cachelib/common/Exceptions.h b/cachelib/common/Exceptions.h index 02e62a9af3..9239938805 100644 --- a/cachelib/common/Exceptions.h +++ b/cachelib/common/Exceptions.h @@ -67,6 +67,11 @@ class SlabReleaseAborted : public std::runtime_error { using std::runtime_error::runtime_error; }; +class ChainedItemInvalid : public std::runtime_error { + public: + using std::runtime_error::runtime_error; +}; + // An allocation error. This could be a genuine std::bad_alloc from // the global allocator, or it can be an internal allocation error // from the backing cachelib item. From fb8e23c996fb1b61818da4022723535f6dae6486 Mon Sep 17 00:00:00 2001 From: Daniel Byrne Date: Wed, 8 Nov 2023 07:56:19 -0800 Subject: [PATCH 2/2] use findInternal to sync on ac lock --- cachelib/allocator/CacheAllocator-inl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h index 84110a2048..cebc70f4a9 100644 --- a/cachelib/allocator/CacheAllocator-inl.h +++ b/cachelib/allocator/CacheAllocator-inl.h @@ -2639,6 +2639,7 @@ bool CacheAllocator::moveForSlabRelease( const auto allocInfo = allocator_->getAllocInfo(oldItem.getMemory()); if (chainedItem) { newItemHdl.reset(); + auto parentKey = parentItem->getKey(); auto ref = parentItem->unmarkMoving(); if (UNLIKELY(ref == 0)) { wakeUpWaiters(*parentItem, {}); @@ -2648,7 +2649,7 @@ bool CacheAllocator::moveForSlabRelease( return true; } else { XDCHECK_NE(ref, 0); - auto parentHdl = acquire(parentItem); + auto parentHdl = findInternal(parentKey); if (parentHdl) { wakeUpWaiters(*parentItem, std::move(parentHdl)); }