diff --git a/.github/workflows/build-cachelib-centos.yml b/.github/workflows/build-cachelib-centos-long.yml similarity index 86% rename from .github/workflows/build-cachelib-centos.yml rename to .github/workflows/build-cachelib-centos-long.yml index 3b071a186a..92165f603b 100644 --- a/.github/workflows/build-cachelib-centos.yml +++ b/.github/workflows/build-cachelib-centos-long.yml @@ -1,7 +1,8 @@ name: build-cachelib-centos-latest on: schedule: - - cron: '30 5 * * 1,4' + - cron: '0 7 * * *' + jobs: build-cachelib-centos8-latest: name: "CentOS/latest - Build CacheLib with all dependencies" @@ -33,3 +34,6 @@ jobs: uses: actions/checkout@v2 - name: "build CacheLib using build script" run: ./contrib/build.sh -j -v -T + - name: "run tests" + timeout-minutes: 60 + run: cd opt/cachelib/tests && ../../../run_tests.sh long diff --git a/.github/workflows/build-cachelib-debian.yml b/.github/workflows/build-cachelib-debian.yml index a2ae44a569..5bc3ad3c70 100644 --- a/.github/workflows/build-cachelib-debian.yml +++ b/.github/workflows/build-cachelib-debian.yml @@ -1,7 +1,8 @@ name: build-cachelib-debian-10 on: schedule: - - cron: '30 5 * * 2,6' + - cron: '30 5 * * 0,3' + jobs: build-cachelib-debian-10: name: "Debian/Buster - Build CacheLib with all dependencies" @@ -37,3 +38,6 @@ jobs: uses: actions/checkout@v2 - name: "build CacheLib using build script" run: ./contrib/build.sh -j -v -T + - name: "run tests" + timeout-minutes: 60 + run: cd opt/cachelib/tests && ../../../run_tests.sh diff --git a/.github/workflows/build-cachelib-docker.yml b/.github/workflows/build-cachelib-docker.yml new file mode 100644 index 0000000000..f73339e0d9 --- /dev/null +++ b/.github/workflows/build-cachelib-docker.yml @@ -0,0 +1,49 @@ +name: build-cachelib-docker +on: + push: + pull_request: + +jobs: + build-cachelib-docker: + name: "CentOS/latest - Build CacheLib with all dependencies" + runs-on: ubuntu-latest + env: + REPO: cachelib + GITHUB_REPO: pmem/CacheLib + CONTAINER_REG: ghcr.io/pmem/cachelib + CONTAINER_REG_USER: ${{ secrets.GH_CR_USER }} + CONTAINER_REG_PASS: ${{ secrets.GH_CR_PAT }} + FORCE_IMAGE_ACTION: ${{ secrets.FORCE_IMAGE_ACTION }} + HOST_WORKDIR: ${{ github.workspace }} + WORKDIR: docker + IMG_VER: devel + strategy: + matrix: + CONFIG: ["OS=centos OS_VER=8streams PUSH_IMAGE=1"] + steps: + - name: "System Information" + run: | + echo === uname === + uname -a + echo === /etc/os-release === + cat /etc/os-release + echo === df -hl === + df -hl + echo === free -h === + free -h + echo === top === + top -b -n1 -1 -Eg || timeout 1 top -b -n1 + echo === env === + env + echo === gcc -v === + gcc -v + - name: "checkout sources" + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Pull the image or rebuild and push it + run: cd $WORKDIR && ${{ matrix.CONFIG }} ./pull-or-rebuild-image.sh $FORCE_IMAGE_ACTION + + - name: Run the build + run: cd $WORKDIR && ${{ matrix.CONFIG }} ./build.sh diff --git a/.github/workflows/build-cachelib.yml b/.github/workflows/build-cachelib.yml deleted file mode 100644 index 15161c40e0..0000000000 --- a/.github/workflows/build-cachelib.yml +++ /dev/null @@ -1,147 +0,0 @@ -# NOTES: -# 1. While Github-Actions enables cache of dependencies, -# Facebook's projects (folly,fizz,wangle,fbthrift) -# are fast-moving targets - so we always checkout the latest version -# (as opposed to using gitactions cache, which is recommended in the -# documentation). -# -# 2. Using docker containers to build on CentOS and Debian, -# Specifically CentOS v8.1.1911 as that -# version is closest to Facebook's internal dev machines. -# -# 3. When using docker containers we install 'sudo', -# as the docker images are typically very minimal and without -# 'sudo', while the ./contrib/ scripts use sudo. -# -# 4. When using the docker containers we install 'git' -# BEFORE getting the CacheLib source code (with the 'checkout' action). -# Otherwise, the 'checkout@v2' action script falls back to downloading -# the git repository files only, without the ".git" directory. -# We need the ".git" directory to updating the git-submodules -# (folly/wangle/fizz/fbthrift). See: -# https://github.com/actions/checkout/issues/126#issuecomment-570288731 -# -# 5. To reduce less-critical (and yet frequent) rebuilds, the jobs -# check the author of the commit, and SKIP the build if -# the author is "svcscm". These commits are automatic updates -# for the folly/fbthrift git-submodules, and can happen several times a day. -# While there is a possiblity that updating the git-submodules breaks -# CacheLib, it is less likely, and will be detected once an actual -# code change commit triggers a full build. -# e.g. https://github.com/facebookincubator/CacheLib/commit/9372a82190dd71a6e2bcb668828cfed9d1bd25c1 -# -# 6. The 'if' condition checking the author name of the commit (see #5 above) -# uses github actions metadata variable: -# 'github.event.head_commit.author.name' -# GitHub have changed in the past the metadata structure and broke -# such conditions. If you need to debug the metadata values, -# see the "dummy-show-github-event" job below. -# E.g. https://github.blog/changelog/2019-10-16-changes-in-github-actions-push-event-payload/ -# As of Jan-2021, the output is: -# { -# "author": { -# "email": "mimi@moo.moo", -# "name": "mimi" -# }, -# "committer": { -# "email": "assafgordon@gmail.com", -# "name": "Assaf Gordon", -# "username": "agordon" -# }, -# "distinct": true, -# "id": "6c3aab0970f4a07cc2af7658756a6ef9d82f3276", -# "message": "gitactions: test", -# "timestamp": "2021-01-26T11:11:57-07:00", -# "tree_id": "741cd1cb802df84362a51e5d01f28788845d08b7", -# "url": "https://github.com/agordon/CacheLib/commit/6c3aab0970f4a07cc2af7658756a6ef9d82f3276" -# } -# -# 7. When checking the commit's author name, we use '...author.name', -# NOT '...author.username' - because the 'svcscm' author does not -# have a github username (see the 'mimi' example above). -# - -name: build-cachelib -on: [push] -jobs: - dummy-show-github-event: - name: "Show GitHub Action event.head_commit variable" - runs-on: ubuntu-latest - steps: - - name: "GitHub Variable Content" - env: - CONTENT: ${{ toJSON(github.event.head_commit) }} - run: echo "$CONTENT" - - - build-cachelib-centos8-1-1911: - if: "!contains(github.event.head_commit.author.name, 'svcscm')" - name: "CentOS/8.1.1911 - Build CacheLib with all dependencies" - runs-on: ubuntu-latest - # Docker container image name - container: "centos:8.1.1911" - steps: - - name: "update packages" - # stock centos has a problem with CMAKE, fails with: - # "cmake: symbol lookup error: cmake: undefined symbol: archive_write_add_filter_zstd" - # updating solves it - run: dnf update -y - - name: "install sudo,git" - run: dnf install -y sudo git cmake gcc - - name: "System Information" - run: | - echo === uname === - uname -a - echo === /etc/os-release === - cat /etc/os-release - echo === df -hl === - df -hl - echo === free -h === - free -h - echo === top === - top -b -n1 -1 -Eg || timeout 1 top -b -n1 - echo === env === - env - echo === gcc -v === - gcc -v - - name: "checkout sources" - uses: actions/checkout@v2 - - name: "Install Prerequisites" - run: ./contrib/build.sh -S -B - - name: "Test: update-submodules" - run: ./contrib/update-submodules.sh - - name: "Install dependency: zstd" - run: ./contrib/build-package.sh -j -v -i zstd - - name: "Install dependency: googleflags" - run: ./contrib/build-package.sh -j -v -i googleflags - - name: "Install dependency: googlelog" - run: ./contrib/build-package.sh -j -v -i googlelog - - name: "Install dependency: googletest" - run: ./contrib/build-package.sh -j -v -i googletest - - name: "Install dependency: sparsemap" - run: ./contrib/build-package.sh -j -v -i sparsemap - - name: "Install dependency: fmt" - run: ./contrib/build-package.sh -j -v -i fmt - - name: "Install dependency: folly" - run: ./contrib/build-package.sh -j -v -i folly - - name: "Install dependency: fizz" - run: ./contrib/build-package.sh -j -v -i fizz - - name: "Install dependency: wangle" - run: ./contrib/build-package.sh -j -v -i wangle - - name: "Install dependency: fbthrift" - run: ./contrib/build-package.sh -j -v -i fbthrift - - name: "build CacheLib" - # Build cachelib in debug mode (-d) and with all tests (-t) - run: ./contrib/build-package.sh -j -v -i -d -t cachelib - - uses: actions/upload-artifact@v2 - if: failure() - with: - name: cachelib-cmake-logs - path: | - build-cachelib/CMakeFiles/*.log - build-cachelib/CMakeCache.txt - build-cachelib/Makefile - build-cachelib/**/Makefile - if-no-files-found: warn - retention-days: 1 - diff --git a/.github/workflows/clang-format-check.yml b/.github/workflows/clang-format-check.yml index 99370135ff..90c8d739c6 100644 --- a/.github/workflows/clang-format-check.yml +++ b/.github/workflows/clang-format-check.yml @@ -1,6 +1,6 @@ # From: https://github.com/marketplace/actions/clang-format-check#multiple-paths name: clang-format Check -on: [pull_request] +on: [] jobs: formatting-check: name: Formatting Check @@ -13,7 +13,7 @@ jobs: steps: - uses: actions/checkout@v2 - name: Run clang-format style check for C/C++ programs. - uses: jidicula/clang-format-action@v3.4.0 + uses: jidicula/clang-format-action@v4.6.2 with: - clang-format-version: '11' + clang-format-version: '13' check-path: ${{ matrix.path }} diff --git a/.packit.yaml b/.packit.yaml new file mode 100644 index 0000000000..bea307d9d0 --- /dev/null +++ b/.packit.yaml @@ -0,0 +1,25 @@ +# See the documentation for more information: +# https://packit.dev/docs/configuration + +specfile_path: cachelib.spec + +upstream_package_name: CacheLib +downstream_package_name: cachelib + +actions: + fix-spec-file: + - bash -c "sed -i cachelib.spec -e \"s/%global commit.*/%global commit $(git rev-parse HEAD)/\"" + - bash -c "sed -i cachelib.spec -e \"s/%global date.*/%global date $(git show -s --date=format:'%Y%m%d' --format=%cd)/\"" + create-archive: + - bash -c "COMMIT=$(git rev-parse HEAD); curl -ORL https://github.com/facebook/CacheLib/archive/${COMMIT}/cachelib-${COMMIT}.tar.gz; echo cachelib-${COMMIT}.tar.gz" + post-upstream-clone: "bash -c \"rm -rf cachelib-dist-git; git clone -b packit https://pagure.io/meta/cachelib.git cachelib-dist-git && mv cachelib-dist-git/cachelib*.{spec,patch} .\"" + +jobs: +- job: copr_build + trigger: pull_request + metadata: + targets: + - fedora-rawhide-aarch64 + - fedora-rawhide-x86_64 + - fedora-35-aarch64 + - fedora-35-x86_64 diff --git a/CHANGELOG.md b/CHANGELOG.md index ebe779f258..2cf2803a42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## V17 +In this version, `CacheAllocator::ItemHandle` is removed. Updating to this version will cause compilation error if `ItemHandle` is still used. + ## V16 This version is incompatible with versions below 15. Downgrading from this version directly to a version below 15 will require the cache to be dropped. If you need to downgrade from this version, please make sure you downgrade to version 15 first to avoid dropping the cache. @@ -8,7 +11,7 @@ This version is incompatible with versions below 15. Downgrading from this versi This version is incompatible with any previous versions. -Updating to this version may cause compliation error because: +Updating to this version may cause compilation error because: - The following APIs are removed: 1. CacheAllocator::allocatePermanent_deprecated. diff --git a/cachelib/CMakeLists.txt b/cachelib/CMakeLists.txt index 917e164e3b..f666025093 100644 --- a/cachelib/CMakeLists.txt +++ b/cachelib/CMakeLists.txt @@ -17,7 +17,7 @@ # refer to the root source directory of the project as ${HELLO_SOURCE_DIR} and # to the root binary directory of the project as ${HELLO_BINARY_DIR}. -cmake_minimum_required (VERSION 3.19) +cmake_minimum_required (VERSION 3.12) ## TODO: get version from variable project (CacheLib VERSION 0.1) diff --git a/cachelib/allocator/CCacheAllocator.cpp b/cachelib/allocator/CCacheAllocator.cpp index 6f0bab6727..cff4bded4b 100644 --- a/cachelib/allocator/CCacheAllocator.cpp +++ b/cachelib/allocator/CCacheAllocator.cpp @@ -30,12 +30,12 @@ CCacheAllocator::CCacheAllocator(MemoryAllocator& allocator, PoolId poolId) CCacheAllocator::CCacheAllocator(MemoryAllocator& allocator, PoolId poolId, const SerializationType& object) - : CCacheAllocatorBase(*object.ccMetadata_ref()), + : CCacheAllocatorBase(*object.ccMetadata()), allocator_(allocator), poolId_(poolId), currentChunksIndex_(0) { auto& currentChunks = chunks_[currentChunksIndex_]; - for (auto chunk : *object.chunks_ref()) { + for (auto chunk : *object.chunks()) { currentChunks.push_back(allocator_.unCompress(CompressedPtr(chunk))); } } @@ -93,11 +93,11 @@ size_t CCacheAllocator::resize() { CCacheAllocator::SerializationType CCacheAllocator::saveState() { CCacheAllocator::SerializationType object; - *object.ccMetadata_ref() = ccType_.saveState(); + *object.ccMetadata() = ccType_.saveState(); std::lock_guard guard(resizeLock_); for (auto chunk : getCurrentChunks()) { - object.chunks_ref()->push_back(allocator_.compress(chunk).saveState()); + object.chunks()->push_back(allocator_.compress(chunk).saveState()); } return object; } diff --git a/cachelib/allocator/CCacheManager.cpp b/cachelib/allocator/CCacheManager.cpp index d9d1d6db7a..6750139ff0 100644 --- a/cachelib/allocator/CCacheManager.cpp +++ b/cachelib/allocator/CCacheManager.cpp @@ -24,7 +24,7 @@ CCacheManager::CCacheManager(const SerializationType& object, : memoryAllocator_(memoryAllocator) { std::lock_guard guard(lock_); - for (const auto& allocator : *object.allocators_ref()) { + for (const auto& allocator : *object.allocators()) { auto id = memoryAllocator_.getPoolId(allocator.first); allocators_.emplace( std::piecewise_construct, @@ -81,8 +81,7 @@ CCacheManager::SerializationType CCacheManager::saveState() { SerializationType object; for (auto& allocator : allocators_) { - object.allocators_ref()->emplace(allocator.first, - allocator.second.saveState()); + object.allocators()->emplace(allocator.first, allocator.second.saveState()); } return object; } diff --git a/cachelib/allocator/CMakeLists.txt b/cachelib/allocator/CMakeLists.txt index 0c19c720d8..9c0cd061b3 100644 --- a/cachelib/allocator/CMakeLists.txt +++ b/cachelib/allocator/CMakeLists.txt @@ -77,14 +77,16 @@ install(TARGETS cachelib_allocator DESTINATION ${LIB_INSTALL_DIR} ) if (BUILD_TESTS) - add_library (allocator_test_support + add_library (allocator_test_support OBJECT ${DATASTRUCT_TESTS_THRIFT_FILES} ./nvmcache/tests/NvmTestBase.cpp ./memory/tests/TestBase.cpp + ../common/TestUtils.cpp ) add_dependencies(allocator_test_support thrift_generated_files) target_link_libraries (allocator_test_support PUBLIC cachelib_allocator + common_test_utils glog::glog gflags GTest::gtest @@ -116,8 +118,11 @@ if (BUILD_TESTS) add_test (tests/ChainedHashTest.cpp) add_test (tests/AllocatorResizeTypeTest.cpp) add_test (tests/AllocatorHitStatsTypeTest.cpp) + add_test (tests/AllocatorMemoryTiersTest.cpp) + add_test (tests/MemoryTiersTest.cpp) add_test (tests/MultiAllocatorTest.cpp) add_test (tests/NvmAdmissionPolicyTest.cpp) + add_test (tests/CacheAllocatorConfigTest.cpp) add_test (nvmcache/tests/NvmItemTests.cpp) add_test (nvmcache/tests/InFlightPutsTest.cpp) add_test (nvmcache/tests/TombStoneTests.cpp) diff --git a/cachelib/allocator/Cache.cpp b/cachelib/allocator/Cache.cpp index 0e812fb10e..7f6bfe737c 100644 --- a/cachelib/allocator/Cache.cpp +++ b/cachelib/allocator/Cache.cpp @@ -23,6 +23,12 @@ namespace facebook { namespace cachelib { +CacheBase::CacheBase(unsigned numTiers): numTiers_(numTiers) {} + +unsigned CacheBase::getNumTiers() const { + return numTiers_; +} + void CacheBase::setRebalanceStrategy( PoolId pid, std::shared_ptr strategy) { std::unique_lock l(lock_); diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h index a737074ac6..a31d168be3 100644 --- a/cachelib/allocator/Cache.h +++ b/cachelib/allocator/Cache.h @@ -74,7 +74,7 @@ enum class DestructorContext { // A base class of cache exposing members and status agnostic of template type. class CacheBase { public: - CacheBase() = default; + CacheBase(unsigned numTiers = 1); virtual ~CacheBase() = default; // Movable but not copyable @@ -83,9 +83,15 @@ class CacheBase { CacheBase(CacheBase&&) = default; CacheBase& operator=(CacheBase&&) = default; + // TODO: come up with some reasonable number + static constexpr unsigned kMaxTiers = 2; + // Get a string referring to the cache name for this cache virtual const std::string getCacheName() const = 0; + // Returns true for ObjectCacheBase, false for CacheAllocator. + virtual bool isObjectCache() const = 0; + // Get the reference to a memory pool, for stats purposes // // @param poolId The pool id to query @@ -97,6 +103,9 @@ class CacheBase { // @param poolId the pool id virtual PoolStats getPoolStats(PoolId poolId) const = 0; + virtual AllocationClassBaseStat getAllocationClassStats( + TierId, PoolId pid, ClassId cid) const = 0; + // @param poolId the pool id virtual AllSlabReleaseEvents getAllSlabReleaseEvents(PoolId poolId) const = 0; @@ -187,6 +196,10 @@ class CacheBase { // pool id virtual const ICompactCache& getCompactCache(PoolId pid) const = 0; + // return object cache stats + virtual void getObjectCacheCounters( + std::function) const {} + protected: // move bytes from one pool to another. The source pool should be at least // _bytes_ in size. @@ -271,6 +284,10 @@ class CacheBase { // @return The number of slabs that were actually reclaimed (<= numSlabs) virtual unsigned int reclaimSlabs(PoolId id, size_t numSlabs) = 0; + unsigned getNumTiers() const; + + unsigned numTiers_ = 1; + // Protect 'poolRebalanceStragtegies_' and `poolResizeStrategies_` // and `poolOptimizeStrategy_` mutable std::mutex lock_; diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h index a512ed4b6b..6bd3c02989 100644 --- a/cachelib/allocator/CacheAllocator-inl.h +++ b/cachelib/allocator/CacheAllocator-inl.h @@ -16,23 +16,24 @@ #pragma once +#include + namespace facebook { namespace cachelib { template CacheAllocator::CacheAllocator(Config config) - : isOnShm_{config.memMonitoringEnabled()}, + : CacheBase(config.getMemoryTierConfigs().size()), + memoryTierConfigs(config.getMemoryTierConfigs()), + isOnShm_{config.memMonitoringEnabled()}, config_(config.validate()), - tempShm_(isOnShm_ ? std::make_unique(config_.size) + tempShm_(isOnShm_ ? std::make_unique( + config_.getCacheSize()) : nullptr), - allocator_(isOnShm_ ? std::make_unique( - getAllocatorConfig(config_), - tempShm_->getAddr(), - config_.size) - : std::make_unique( - getAllocatorConfig(config_), config_.size)), - compactCacheManager_(std::make_unique(*allocator_)), + allocator_(createPrivateAllocator()), + compactCacheManager_(std::make_unique(*allocator_[0] /* TODO */)), compressor_(createPtrCompressor()), + mmContainers_(numTiers_), accessContainer_(std::make_unique( config_.accessConfig, compressor_, @@ -43,21 +44,68 @@ CacheAllocator::CacheAllocator(Config config) [this](Item* it) -> ItemHandle { return acquire(it); })), chainedItemLocks_(config_.chainedItemsLockPower, std::make_shared()), - cacheCreationTime_{util::getCurrentTimeSec()}, - nvmCacheState_{config_.cacheDir, config_.isNvmCacheEncryptionEnabled(), - config_.isNvmCacheTruncateAllocSizeEnabled()} { + movesMap_(kShards), + moveLock_(kShards), + cacheCreationTime_{util::getCurrentTimeSec()} { + + if (numTiers_ > 1 || std::holds_alternative( + memoryTierConfigs[0].getShmTypeOpts())) { + throw std::runtime_error( + "Using custom memory tier or using more than one tier is only " + "supported for Shared Memory."); + } initCommon(false); } +template +std::vector> +CacheAllocator::createPrivateAllocator() { + std::vector> allocators; + + if (isOnShm_) + allocators.emplace_back(std::make_unique( + getAllocatorConfig(config_), + tempShm_->getAddr(), + config_.size)); + else + allocators.emplace_back(std::make_unique( + getAllocatorConfig(config_), config_.size)); + + return allocators; +} + +template +std::vector> +CacheAllocator::createAllocators() { + std::vector> allocators; + for (int tid = 0; tid < numTiers_; tid++) { + allocators.emplace_back(createNewMemoryAllocator(tid)); + } + return allocators; +} + +template +std::vector> +CacheAllocator::restoreAllocators() { + std::vector> allocators; + for (int tid = 0; tid < numTiers_; tid++) { + allocators.emplace_back(restoreMemoryAllocator(tid)); + } + return allocators; +} + template CacheAllocator::CacheAllocator(SharedMemNewT, Config config) - : isOnShm_{true}, + : CacheBase(config.getMemoryTierConfigs().size()), + memoryTierConfigs(config.getMemoryTierConfigs()), + isOnShm_{true}, config_(config.validate()), shmManager_( - std::make_unique(config_.cacheDir, config_.usePosixShm)), - allocator_(createNewMemoryAllocator()), - compactCacheManager_(std::make_unique(*allocator_)), + std::make_unique(config_.cacheDir, config_.isUsingPosixShm())), + allocator_(createAllocators()), + compactCacheManager_(std::make_unique(*allocator_[0] /* TODO */)), compressor_(createPtrCompressor()), + mmContainers_(numTiers_), accessContainer_(std::make_unique( config_.accessConfig, shmManager_ @@ -65,7 +113,8 @@ CacheAllocator::CacheAllocator(SharedMemNewT, Config config) AccessContainer::getRequiredSize( config_.accessConfig.getNumBuckets()), nullptr, - ShmSegmentOpts(config_.accessConfig.getPageSize())) + ShmSegmentOpts(config_.accessConfig.getPageSize(), + false, config_.isUsingPosixShm())) .addr, compressor_, [this](Item* it) -> ItemHandle { return acquire(it); })), @@ -76,48 +125,55 @@ CacheAllocator::CacheAllocator(SharedMemNewT, Config config) AccessContainer::getRequiredSize( config_.chainedItemAccessConfig.getNumBuckets()), nullptr, - ShmSegmentOpts(config_.accessConfig.getPageSize())) + ShmSegmentOpts(config_.accessConfig.getPageSize(), + false, config_.isUsingPosixShm())) .addr, compressor_, [this](Item* it) -> ItemHandle { return acquire(it); })), chainedItemLocks_(config_.chainedItemsLockPower, std::make_shared()), - cacheCreationTime_{util::getCurrentTimeSec()}, - nvmCacheState_{config_.cacheDir, config_.isNvmCacheEncryptionEnabled(), - config_.isNvmCacheTruncateAllocSizeEnabled()} { + movesMap_(kShards), + moveLock_(kShards), + cacheCreationTime_{util::getCurrentTimeSec()} { initCommon(false); - shmManager_->removeShm(detail::kShmInfoName); + shmManager_->removeShm(detail::kShmInfoName, + PosixSysVSegmentOpts(config_.isUsingPosixShm())); } template CacheAllocator::CacheAllocator(SharedMemAttachT, Config config) - : isOnShm_{true}, + : CacheBase(config.getMemoryTierConfigs().size()), + memoryTierConfigs(config.getMemoryTierConfigs()), + isOnShm_{true}, config_(config.validate()), shmManager_( std::make_unique(config_.cacheDir, config_.usePosixShm)), deserializer_(createDeserializer()), metadata_{deserializeCacheAllocatorMetadata(*deserializer_)}, - allocator_(restoreMemoryAllocator()), - compactCacheManager_(restoreCCacheManager()), + allocator_(restoreAllocators()), + compactCacheManager_(restoreCCacheManager(0 /* TODO - per tier */)), compressor_(createPtrCompressor()), mmContainers_(deserializeMMContainers(*deserializer_, compressor_)), accessContainer_(std::make_unique( deserializer_->deserialize(), config_.accessConfig, - shmManager_->attachShm(detail::kShmHashTableName), + shmManager_->attachShm(detail::kShmHashTableName, nullptr, + ShmSegmentOpts(PageSizeT::NORMAL, false, config_.isUsingPosixShm())), compressor_, [this](Item* it) -> ItemHandle { return acquire(it); })), chainedItemAccessContainer_(std::make_unique( deserializer_->deserialize(), config_.chainedItemAccessConfig, - shmManager_->attachShm(detail::kShmChainedItemHashTableName), + shmManager_->attachShm(detail::kShmChainedItemHashTableName, nullptr, + ShmSegmentOpts(PageSizeT::NORMAL, false, config_.isUsingPosixShm())), compressor_, [this](Item* it) -> ItemHandle { return acquire(it); })), chainedItemLocks_(config_.chainedItemsLockPower, std::make_shared()), - cacheCreationTime_{*metadata_.cacheCreationTime_ref()}, - nvmCacheState_{config_.cacheDir, config_.isNvmCacheEncryptionEnabled(), - config_.isNvmCacheTruncateAllocSizeEnabled()} { + movesMap_(kShards), + moveLock_(kShards), + cacheCreationTime_{*metadata_.cacheCreationTime_ref()} { + /* TODO - per tier? */ for (auto pid : *metadata_.compactCachePools_ref()) { isCompactCachePool_[pid] = true; } @@ -127,9 +183,57 @@ CacheAllocator::CacheAllocator(SharedMemAttachT, Config config) // We will create a new info shm segment on shutDown(). If we don't remove // this info shm segment here and the new info shm segment's size is larger // than this one, creating new one will fail. - shmManager_->removeShm(detail::kShmInfoName); + shmManager_->removeShm(detail::kShmInfoName, + PosixSysVSegmentOpts(config_.isUsingPosixShm())); } +template +CacheAllocator::CacheAllocator( + typename CacheAllocator::InitMemType type, Config config) + : isOnShm_{type != InitMemType::kNone ? true + : config.memMonitoringEnabled()}, + config_(config.validate()), + tempShm_(type == InitMemType::kNone && isOnShm_ + ? std::make_unique(config_.size) + : nullptr), + shmManager_(type != InitMemType::kNone + ? std::make_unique(config_.cacheDir, + config_.usePosixShm) + : nullptr), + deserializer_(type == InitMemType::kMemAttach ? createDeserializer() + : nullptr), + metadata_{type == InitMemType::kMemAttach + ? deserializeCacheAllocatorMetadata(*deserializer_) + : serialization::CacheAllocatorMetadata{}}, + allocator_(initAllocator(type)), + compactCacheManager_(type != InitMemType::kMemAttach + ? std::make_unique(*allocator_) + : restoreCCacheManager()), + compressor_(createPtrCompressor()), + mmContainers_(type == InitMemType::kMemAttach + ? deserializeMMContainers(*deserializer_, compressor_) + : MMContainers{}), + accessContainer_(initAccessContainer( + type, detail::kShmHashTableName, config.accessConfig)), + chainedItemAccessContainer_( + initAccessContainer(type, + detail::kShmChainedItemHashTableName, + config.chainedItemAccessConfig)), + chainedItemLocks_(config_.chainedItemsLockPower, + std::make_shared()), + cacheCreationTime_{ + type != InitMemType::kMemAttach + ? util::getCurrentTimeSec() + : static_cast(*metadata_.cacheCreationTime())}, + cacheInstanceCreationTime_{type != InitMemType::kMemAttach + ? cacheCreationTime_ + : util::getCurrentTimeSec()}, + // Pass in cacheInstnaceCreationTime_ as the current time to keep + // nvmCacheState's current time in sync + nvmCacheState_{cacheInstanceCreationTime_, config_.cacheDir, + config_.isNvmCacheEncryptionEnabled(), + config_.isNvmCacheTruncateAllocSizeEnabled()} {} + template CacheAllocator::~CacheAllocator() { XLOG(DBG, "destructing CacheAllocator"); @@ -141,44 +245,65 @@ CacheAllocator::~CacheAllocator() { } template -std::unique_ptr -CacheAllocator::createNewMemoryAllocator() { +ShmSegmentOpts CacheAllocator::createShmCacheOpts(TierId tid) { ShmSegmentOpts opts; opts.alignment = sizeof(Slab); + opts.typeOpts = memoryTierConfigs[tid].getShmTypeOpts(); + if (auto *v = std::get_if(&opts.typeOpts)) { + v->usePosix = config_.usePosixShm; + } + + return opts; +} + +template +size_t CacheAllocator::memoryTierSize(TierId tid) const +{ + auto partitions = std::accumulate(memoryTierConfigs.begin(), memoryTierConfigs.end(), 0UL, + [](const size_t i, const MemoryTierCacheConfig& config){ + return i + config.getRatio(); + }); + + return memoryTierConfigs[tid].calculateTierSize(config_.getCacheSize(), partitions); +} + +template +std::unique_ptr +CacheAllocator::createNewMemoryAllocator(TierId tid) { return std::make_unique( getAllocatorConfig(config_), shmManager_ - ->createShm(detail::kShmCacheName, config_.size, - config_.slabMemoryBaseAddr, opts) + ->createShm(detail::kShmCacheName + std::to_string(tid), + config_.getCacheSize(), config_.slabMemoryBaseAddr, + createShmCacheOpts(tid)) .addr, - config_.size); + memoryTierSize(tid) + ); } template std::unique_ptr -CacheAllocator::restoreMemoryAllocator() { - ShmSegmentOpts opts; - opts.alignment = sizeof(Slab); +CacheAllocator::restoreMemoryAllocator(TierId tid) { return std::make_unique( deserializer_->deserialize(), shmManager_ - ->attachShm(detail::kShmCacheName, config_.slabMemoryBaseAddr, opts) - .addr, - config_.size, + ->attachShm(detail::kShmCacheName + std::to_string(tid), + config_.slabMemoryBaseAddr, createShmCacheOpts(tid)).addr, + memoryTierSize(tid), config_.disableFullCoredump); } template std::unique_ptr -CacheAllocator::restoreCCacheManager() { +CacheAllocator::restoreCCacheManager(TierId tid) { return std::make_unique( deserializer_->deserialize(), - *allocator_); + *allocator_[tid]); } template void CacheAllocator::initCommon(bool dramCacheAttached) { - if (config_.nvmConfig.has_value()) { + if (config_.isNvmCacheEnabled()) { if (config_.nvmCacheAP) { nvmAdmissionPolicy_ = config_.nvmCacheAP; } else if (config_.rejectFirstAPNumEntries) { @@ -196,48 +321,54 @@ void CacheAllocator::initCommon(bool dramCacheAttached) { } initStats(); initNvmCache(dramCacheAttached); - initWorkers(); + + if (!config_.delayCacheWorkersStart) { + initWorkers(); + } } template void CacheAllocator::initNvmCache(bool dramCacheAttached) { - if (!config_.nvmConfig.has_value()) { + if (!config_.isNvmCacheEnabled()) { return; } + nvmCacheState_.emplace(NvmCacheState(config_.cacheDir, config_.isNvmCacheEncryptionEnabled(), + config_.isNvmCacheTruncateAllocSizeEnabled())); + // for some usecases that create pools, restoring nvmcache when dram cache // is not persisted is not supported. const bool shouldDrop = config_.dropNvmCacheOnShmNew && !dramCacheAttached; // if we are dealing with persistency, cache directory should be enabled const bool truncate = config_.cacheDir.empty() || - nvmCacheState_.shouldStartFresh() || shouldDrop; + nvmCacheState_.value().shouldStartFresh() || shouldDrop; if (truncate) { - nvmCacheState_.markTruncated(); + nvmCacheState_.value().markTruncated(); } nvmCache_ = std::make_unique(*this, *config_.nvmConfig, truncate, config_.itemDestructor); if (!config_.cacheDir.empty()) { - nvmCacheState_.clearPrevState(); + nvmCacheState_.value().clearPrevState(); } } template void CacheAllocator::initWorkers() { - if (config_.poolResizingEnabled()) { + if (config_.poolResizingEnabled() && !poolResizer_) { startNewPoolResizer(config_.poolResizeInterval, config_.poolResizeSlabsPerIter, config_.poolResizeStrategy); } - if (config_.poolRebalancingEnabled()) { + if (config_.poolRebalancingEnabled() && !poolRebalancer_) { startNewPoolRebalancer(config_.poolRebalanceInterval, config_.defaultPoolRebalanceStrategy, config_.poolRebalancerFreeAllocThreshold); } - if (config_.memMonitoringEnabled()) { + if (config_.memMonitoringEnabled() && !memMonitor_) { if (!isOnShm_) { throw std::invalid_argument( "Memory monitoring is not supported for cache on heap. It is " @@ -249,11 +380,11 @@ void CacheAllocator::initWorkers() { config_.poolAdviseStrategy); } - if (config_.itemsReaperEnabled()) { + if (config_.itemsReaperEnabled() && !reaper_) { startNewReaper(config_.reaperInterval, config_.reaperConfig); } - if (config_.poolOptimizerEnabled()) { + if (config_.poolOptimizerEnabled() && !poolOptimizer_) { startNewPoolOptimizer(config_.regularPoolOptimizeInterval, config_.compactCacheOptimizeInterval, config_.poolOptimizeStrategy, @@ -261,16 +392,76 @@ void CacheAllocator::initWorkers() { } } +template +std::unique_ptr CacheAllocator::initAllocator( + InitMemType type) { + if (type == InitMemType::kNone) { + if (isOnShm_ == true) { + return std::make_unique( + getAllocatorConfig(config_), tempShm_->getAddr(), config_.size); + } else { + return std::make_unique(getAllocatorConfig(config_), + config_.size); + } + } else if (type == InitMemType::kMemNew) { + return createNewMemoryAllocator(); + } else if (type == InitMemType::kMemAttach) { + return restoreMemoryAllocator(); + } + + // Invalid type + throw std::runtime_error(folly::sformat( + "Cannot initialize memory allocator, unknown InitMemType: {}.", + static_cast(type))); +} + +template +std::unique_ptr::AccessContainer> +CacheAllocator::initAccessContainer(InitMemType type, + const std::string name, + AccessConfig config) { + if (type == InitMemType::kNone) { + return std::make_unique( + config, compressor_, + [this](Item* it) -> WriteHandle { return acquire(it); }); + } else if (type == InitMemType::kMemNew) { + return std::make_unique( + config, + shmManager_ + ->createShm( + name, + AccessContainer::getRequiredSize(config.getNumBuckets()), + nullptr, + ShmSegmentOpts(config.getPageSize())) + .addr, + compressor_, + [this](Item* it) -> WriteHandle { return acquire(it); }); + } else if (type == InitMemType::kMemAttach) { + return std::make_unique( + deserializer_->deserialize(), + config, + shmManager_->attachShm(name), + compressor_, + [this](Item* it) -> WriteHandle { return acquire(it); }); + } + + // Invalid type + throw std::runtime_error(folly::sformat( + "Cannot initialize access container, unknown InitMemType: {}.", + static_cast(type))); +} + template std::unique_ptr CacheAllocator::createDeserializer() { - auto infoAddr = shmManager_->attachShm(detail::kShmInfoName); + auto infoAddr = shmManager_->attachShm(detail::kShmInfoName, nullptr, + ShmSegmentOpts(PageSizeT::NORMAL, false, config_.isUsingPosixShm())); return std::make_unique( reinterpret_cast(infoAddr.addr), reinterpret_cast(infoAddr.addr) + infoAddr.size); } template -typename CacheAllocator::ItemHandle +typename CacheAllocator::WriteHandle CacheAllocator::allocate(PoolId poolId, typename Item::Key key, uint32_t size, @@ -285,7 +476,8 @@ CacheAllocator::allocate(PoolId poolId, template typename CacheAllocator::ItemHandle -CacheAllocator::allocateInternal(PoolId pid, +CacheAllocator::allocateInternalTier(TierId tid, + PoolId pid, typename Item::Key key, uint32_t size, uint32_t creationTime, @@ -298,16 +490,20 @@ CacheAllocator::allocateInternal(PoolId pid, const auto requiredSize = Item::getRequiredSize(key, size); // the allocation class in our memory allocator. - const auto cid = allocator_->getAllocationClassId(pid, requiredSize); + const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize); + util::RollingLatencyTracker rollTracker{(*stats_.classAllocLatency)[tid][pid][cid]}; + // TODO: per-tier (*stats_.allocAttempts)[pid][cid].inc(); - void* memory = allocator_->allocate(pid, requiredSize); + void* memory = allocator_[tid]->allocate(pid, requiredSize); + // TODO: Today disableEviction means do not evict from memory (DRAM). + // Should we support eviction between memory tiers (e.g. from DRAM to PMEM)? if (memory == nullptr && !config_.disableEviction) { - memory = findEviction(pid, cid); + memory = findEviction(tid, pid, cid); } - ItemHandle handle; + WriteHandle handle; if (memory != nullptr) { // At this point, we have a valid memory allocation that is ready for use. // Ensure that when we abort from here under any circumstances, we free up @@ -315,7 +511,7 @@ CacheAllocator::allocateInternal(PoolId pid, // for example. SCOPE_FAIL { // free back the memory to the allocator since we failed. - allocator_->free(memory); + allocator_[tid]->free(memory); }; handle = acquire(new (memory) Item(key, size, creationTime, expiryTime)); @@ -326,7 +522,7 @@ CacheAllocator::allocateInternal(PoolId pid, } } else { // failed to allocate memory. - (*stats_.allocFailures)[pid][cid].inc(); + (*stats_.allocFailures)[pid][cid].inc(); // TODO: per-tier // wake up rebalancer if (poolRebalancer_) { poolRebalancer_->wakeUp(); @@ -343,6 +539,21 @@ CacheAllocator::allocateInternal(PoolId pid, return handle; } +template +typename CacheAllocator::WriteHandle +CacheAllocator::allocateInternal(PoolId pid, + typename Item::Key key, + uint32_t size, + uint32_t creationTime, + uint32_t expiryTime) { + auto tid = 0; /* TODO: consult admission policy */ + for(TierId tid = 0; tid < numTiers_; ++tid) { + auto handle = allocateInternalTier(tid, pid, key, size, creationTime, expiryTime); + if (handle) return handle; + } + return {}; +} + template typename CacheAllocator::WriteHandle CacheAllocator::allocateChainedItem(const ReadHandle& parent, @@ -373,21 +584,28 @@ CacheAllocator::allocateChainedItemInternal( // number of bytes required for this item const auto requiredSize = ChainedItem::getRequiredSize(size); - const auto pid = allocator_->getAllocInfo(parent->getMemory()).poolId; - const auto cid = allocator_->getAllocationClassId(pid, requiredSize); + // TODO: is this correct? + auto tid = getTierId(*parent); + + const auto pid = allocator_[tid]->getAllocInfo(parent->getMemory()).poolId; + const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize); + + util::RollingLatencyTracker rollTracker{(*stats_.classAllocLatency)[tid][pid][cid]}; + // TODO: per-tier? Right now stats_ are not used in any public periodic + // worker (*stats_.allocAttempts)[pid][cid].inc(); - void* memory = allocator_->allocate(pid, requiredSize); + void* memory = allocator_[tid]->allocate(pid, requiredSize); if (memory == nullptr) { - memory = findEviction(pid, cid); + memory = findEviction(tid, pid, cid); } if (memory == nullptr) { (*stats_.allocFailures)[pid][cid].inc(); - return ItemHandle{}; + return WriteHandle{}; } - SCOPE_FAIL { allocator_->free(memory); }; + SCOPE_FAIL { allocator_[tid]->free(memory); }; auto child = acquire( new (memory) ChainedItem(compressor_.compress(parent.getInternal()), size, @@ -403,8 +621,8 @@ CacheAllocator::allocateChainedItemInternal( } template -void CacheAllocator::addChainedItem(ItemHandle& parent, - ItemHandle child) { +void CacheAllocator::addChainedItem(WriteHandle& parent, + WriteHandle child) { if (!parent || !child || !child->isChainedItem()) { throw std::invalid_argument( folly::sformat("Invalid parent or child. parent: {}, child: {}", @@ -448,14 +666,14 @@ void CacheAllocator::addChainedItem(ItemHandle& parent, } template -typename CacheAllocator::ItemHandle -CacheAllocator::popChainedItem(ItemHandle& parent) { +typename CacheAllocator::WriteHandle +CacheAllocator::popChainedItem(WriteHandle& parent) { if (!parent || !parent->hasChainedItem()) { throw std::invalid_argument(folly::sformat( "Invalid parent {}", parent ? parent->toString() : nullptr)); } - ItemHandle head; + WriteHandle head; { // scope of chained item lock. auto l = chainedItemLocks_.lockExclusive(parent->getKey()); @@ -502,8 +720,8 @@ CacheAllocator::getParentKey(const Item& chainedItem) { } template -void CacheAllocator::transferChainLocked(ItemHandle& parent, - ItemHandle& newParent) { +void CacheAllocator::transferChainLocked(WriteHandle& parent, + WriteHandle& newParent) { // parent must be in a state to not have concurrent readers. Eviction code // paths rely on holding the last item handle. Since we hold on to an item // handle here, the chain will not be touched by any eviction code path. @@ -545,7 +763,7 @@ void CacheAllocator::transferChainLocked(ItemHandle& parent, template void CacheAllocator::transferChainAndReplace( - ItemHandle& parent, ItemHandle& newParent) { + WriteHandle& parent, WriteHandle& newParent) { if (!parent || !newParent) { throw std::invalid_argument("invalid parent or new parent"); } @@ -591,9 +809,9 @@ bool CacheAllocator::replaceIfAccessible(Item& oldItem, } template -typename CacheAllocator::ItemHandle +typename CacheAllocator::WriteHandle CacheAllocator::replaceChainedItem(Item& oldItem, - ItemHandle newItemHandle, + WriteHandle newItemHandle, Item& parent) { if (!newItemHandle) { throw std::invalid_argument("Empty handle for newItem"); @@ -618,9 +836,9 @@ CacheAllocator::replaceChainedItem(Item& oldItem, } template -typename CacheAllocator::ItemHandle +typename CacheAllocator::WriteHandle CacheAllocator::replaceChainedItemLocked(Item& oldItem, - ItemHandle newItemHdl, + WriteHandle newItemHdl, const Item& parent) { XDCHECK(newItemHdl != nullptr); XDCHECK_GE(1u, oldItem.getRefCount()); @@ -696,8 +914,8 @@ CacheAllocator::releaseBackToAllocator(Item& it, throw std::runtime_error( folly::sformat("cannot release this item: {}", it.toString())); } - - const auto allocInfo = allocator_->getAllocInfo(it.getMemory()); + const auto tid = getTierId(it); + const auto allocInfo = allocator_[tid]->getAllocInfo(it.getMemory()); if (ctx == RemoveContext::kEviction) { const auto timeNow = util::getCurrentTimeSec(); @@ -721,8 +939,7 @@ CacheAllocator::releaseBackToAllocator(Item& it, folly::sformat("Can not recycle a chained item {}, toRecyle", it.toString(), toRecycle->toString())); } - - allocator_->free(&it); + allocator_[tid]->free(&it); return ReleaseRes::kReleased; } @@ -781,7 +998,7 @@ CacheAllocator::releaseBackToAllocator(Item& it, auto next = head->getNext(compressor_); const auto childInfo = - allocator_->getAllocInfo(static_cast(head)); + allocator_[tid]->getAllocInfo(static_cast(head)); (*stats_.fragmentationSize)[childInfo.poolId][childInfo.classId].sub( util::getFragmentation(*this, *head)); @@ -814,7 +1031,7 @@ CacheAllocator::releaseBackToAllocator(Item& it, XDCHECK(ReleaseRes::kReleased != res); res = ReleaseRes::kRecycled; } else { - allocator_->free(head); + allocator_[tid]->free(head); } } @@ -829,7 +1046,7 @@ CacheAllocator::releaseBackToAllocator(Item& it, res = ReleaseRes::kRecycled; } else { XDCHECK(it.isDrained()); - allocator_->free(&it); + allocator_[tid]->free(&it); } return res; @@ -850,16 +1067,16 @@ RefcountWithFlags::Value CacheAllocator::decRef(Item& it) { } template -typename CacheAllocator::ItemHandle +typename CacheAllocator::WriteHandle CacheAllocator::acquire(Item* it) { if (UNLIKELY(!it)) { - return ItemHandle{}; + return WriteHandle{}; } SCOPE_FAIL { stats_.numRefcountOverflow.inc(); }; incRef(*it); - return ItemHandle{it, *this}; + return WriteHandle{it, *this}; } template @@ -901,6 +1118,25 @@ bool CacheAllocator::replaceInMMContainer(Item& oldItem, } } +template +bool CacheAllocator::replaceInMMContainer(Item* oldItem, + Item& newItem) { + return replaceInMMContainer(*oldItem, newItem); +} + +template +bool CacheAllocator::replaceInMMContainer(EvictionIterator& oldItemIt, + Item& newItem) { + auto& oldContainer = getMMContainer(*oldItemIt); + auto& newContainer = getMMContainer(newItem); + + // This function is used for eviction across tiers + XDCHECK(&oldContainer != &newContainer); + oldContainer.remove(oldItemIt); + + return newContainer.add(newItem); +} + template bool CacheAllocator::replaceChainedItemInMMContainer( Item& oldItem, Item& newItem) { @@ -943,12 +1179,12 @@ void CacheAllocator::insertInMMContainer(Item& item) { */ template -bool CacheAllocator::insert(const ItemHandle& handle) { +bool CacheAllocator::insert(const WriteHandle& handle) { return insertImpl(handle, AllocatorApiEvent::INSERT); } template -bool CacheAllocator::insertImpl(const ItemHandle& handle, +bool CacheAllocator::insertImpl(const WriteHandle& handle, AllocatorApiEvent event) { XDCHECK(handle); XDCHECK(event == AllocatorApiEvent::INSERT || @@ -984,17 +1220,19 @@ bool CacheAllocator::insertImpl(const ItemHandle& handle, } template -typename CacheAllocator::ItemHandle -CacheAllocator::insertOrReplace(const ItemHandle& handle) { +typename CacheAllocator::WriteHandle +CacheAllocator::insertOrReplace(const WriteHandle& handle) { XDCHECK(handle); if (handle->isAccessible()) { throw std::invalid_argument("Handle is already accessible"); } + HashedKey hk{handle->getKey()}; + insertInMMContainer(*(handle.getInternal())); - ItemHandle replaced; + WriteHandle replaced; try { - auto lock = nvmCache_ ? nvmCache_->getItemDestructorLock(handle->getKey()) + auto lock = nvmCache_ ? nvmCache_->getItemDestructorLock(hk) : std::unique_lock(); replaced = accessContainer_->insertOrReplace(*(handle.getInternal())); @@ -1003,7 +1241,7 @@ CacheAllocator::insertOrReplace(const ItemHandle& handle) { // item is to be replaced and the destructor will be executed // upon memory released, mark it in nvm to avoid destructor // executed from nvm - nvmCache_->markNvmItemRemovedLocked(handle->getKey()); + nvmCache_->markNvmItemRemovedLocked(hk); } } catch (const std::exception&) { removeFromMMContainer(*(handle.getInternal())); @@ -1026,8 +1264,7 @@ CacheAllocator::insertOrReplace(const ItemHandle& handle) { // We can avoid nvm delete only if we have non nvm clean item in cache. // In all other cases we must enqueue delete. if (!replaced || replaced->isNvmClean()) { - nvmCache_->remove(handle->getKey(), - nvmCache_->createDeleteTombStone(handle->getKey())); + nvmCache_->remove(hk, nvmCache_->createDeleteTombStone(hk)); } } @@ -1045,9 +1282,159 @@ CacheAllocator::insertOrReplace(const ItemHandle& handle) { return replaced; } +/* Next two methods are used to asynchronously move Item between memory tiers. + * + * The thread, which moves Item, allocates new Item in the tier we are moving to + * and calls moveRegularItemOnEviction() method. This method does the following: + * 1. Create MoveCtx and put it to the movesMap. + * 2. Update the access container with the new item from the tier we are + * moving to. This Item has kIncomplete flag set. + * 3. Copy data from the old Item to the new one. + * 4. Unset the kIncomplete flag and Notify MoveCtx + * + * Concurrent threads which are getting handle to the same key: + * 1. When a handle is created it checks if the kIncomplete flag is set + * 2. If so, Handle implementation creates waitContext and adds it to the + * MoveCtx by calling addWaitContextForMovingItem() method. + * 3. Wait until the moving thread will complete its job. + */ +template +bool CacheAllocator::addWaitContextForMovingItem( + folly::StringPiece key, std::shared_ptr> waiter) { + auto shard = getShardForKey(key); + auto& movesMap = getMoveMapForShard(shard); + auto lock = getMoveLockForShard(shard); + auto it = movesMap.find(key); + if (it == movesMap.end()) { + return false; + } + auto ctx = it->second.get(); + ctx->addWaiter(std::move(waiter)); + return true; +} + +template +typename CacheAllocator::ItemHandle +CacheAllocator::moveRegularItemOnEviction( + Item& oldItem, ItemHandle& newItemHdl) { + XDCHECK(oldItem.isMoving()); + // TODO: should we introduce new latency tracker. E.g. evictRegularLatency_ + // ??? util::LatencyTracker tracker{stats_.evictRegularLatency_}; + + if (!oldItem.isAccessible() || oldItem.isExpired()) { + return {}; + } + + XDCHECK_EQ(newItemHdl->getSize(), oldItem.getSize()); + XDCHECK_NE(getTierId(oldItem), getTierId(*newItemHdl)); + + // take care of the flags before we expose the item to be accessed. this + // will ensure that when another thread removes the item from RAM, we issue + // a delete accordingly. See D7859775 for an example + if (oldItem.isNvmClean()) { + newItemHdl->markNvmClean(); + } + + folly::StringPiece key(oldItem.getKey()); + auto shard = getShardForKey(key); + auto& movesMap = getMoveMapForShard(shard); + MoveCtx* ctx(nullptr); + { + auto lock = getMoveLockForShard(shard); + auto res = movesMap.try_emplace(key, std::make_unique()); + if (!res.second) { + return {}; + } + ctx = res.first->second.get(); + } + + auto resHdl = ItemHandle{}; + auto guard = folly::makeGuard([key, this, ctx, shard, &resHdl]() { + auto& movesMap = getMoveMapForShard(shard); + if (resHdl) + resHdl->unmarkIncomplete(); + auto lock = getMoveLockForShard(shard); + ctx->setItemHandle(std::move(resHdl)); + movesMap.erase(key); + }); + + // TODO: Possibly we can use markMoving() instead. But today + // moveOnSlabRelease logic assume that we mark as moving old Item + // and than do copy and replace old Item with the new one in access + // container. Furthermore, Item can be marked as Moving only + // if it is linked to MM container. In our case we mark the new Item + // and update access container before the new Item is ready (content is + // copied). + newItemHdl->markIncomplete(); + + // Inside the access container's lock, this checks if the old item is + // accessible and its refcount is zero. If the item is not accessible, + // there is no point to replace it since it had already been removed + // or in the process of being removed. If the item is in cache but the + // refcount is non-zero, it means user could be attempting to remove + // this item through an API such as remove(ItemHandle). In this case, + // it is unsafe to replace the old item with a new one, so we should + // also abort. + if (!accessContainer_->replaceIf(oldItem, *newItemHdl, + itemMovingPredicate)) { + return {}; + } + + if (config_.moveCb) { + // Execute the move callback. We cannot make any guarantees about the + // consistency of the old item beyond this point, because the callback can + // do more than a simple memcpy() e.g. update external references. If there + // are any remaining handles to the old item, it is the caller's + // responsibility to invalidate them. The move can only fail after this + // statement if the old item has been removed or replaced, in which case it + // should be fine for it to be left in an inconsistent state. + config_.moveCb(oldItem, *newItemHdl, nullptr); + } else { + std::memcpy(newItemHdl->getWritableMemory(), oldItem.getMemory(), + oldItem.getSize()); + } + + // Inside the MM container's lock, this checks if the old item exists to + // make sure that no other thread removed it, and only then replaces it. + if (!replaceInMMContainer(oldItem, *newItemHdl)) { + accessContainer_->remove(*newItemHdl); + return {}; + } + + // Replacing into the MM container was successful, but someone could have + // called insertOrReplace() or remove() before or after the + // replaceInMMContainer() operation, which would invalidate newItemHdl. + if (!newItemHdl->isAccessible()) { + removeFromMMContainer(*newItemHdl); + return {}; + } + + // no one can add or remove chained items at this point + if (oldItem.hasChainedItem()) { + // safe to acquire handle for a moving Item + auto oldHandle = acquire(&oldItem); + XDCHECK_EQ(1u, oldHandle->getRefCount()) << oldHandle->toString(); + XDCHECK(!newItemHdl->hasChainedItem()) << newItemHdl->toString(); + try { + auto l = chainedItemLocks_.lockExclusive(oldItem.getKey()); + transferChainLocked(oldHandle, newItemHdl); + } catch (const std::exception& e) { + // this should never happen because we drained all the handles. + XLOGF(DFATAL, "{}", e.what()); + throw; + } + + XDCHECK(!oldItem.hasChainedItem()); + XDCHECK(newItemHdl->hasChainedItem()); + } + newItemHdl.unmarkNascent(); + resHdl = std::move(newItemHdl); // guard will assign it to ctx under lock + return acquire(&oldItem); +} + template bool CacheAllocator::moveRegularItem(Item& oldItem, - ItemHandle& newItemHdl) { + WriteHandle& newItemHdl) { XDCHECK(config_.moveCb); util::LatencyTracker tracker{stats_.moveRegularLatency_}; @@ -1080,7 +1467,7 @@ bool CacheAllocator::moveRegularItem(Item& oldItem, // there is no point to replace it since it had already been removed // or in the process of being removed. If the item is in cache but the // refcount is non-zero, it means user could be attempting to remove - // this item through an API such as remove(ItemHandle). In this case, + // this item through an API such as remove(itemHandle). In this case, // it is unsafe to replace the old item with a new one, so we should // also abort. if (!accessContainer_->replaceIf(oldItem, *newItemHdl, itemMovingPredicate)) { @@ -1126,7 +1513,7 @@ bool CacheAllocator::moveRegularItem(Item& oldItem, template bool CacheAllocator::moveChainedItem(ChainedItem& oldItem, - ItemHandle& newItemHdl) { + WriteHandle& newItemHdl) { XDCHECK(config_.moveCb); util::LatencyTracker tracker{stats_.moveChainedLatency_}; @@ -1167,7 +1554,7 @@ bool CacheAllocator::moveChainedItem(ChainedItem& oldItem, return false; } - auto parentPtr = parentHandle.get(); + auto parentPtr = parentHandle.getInternal(); XDCHECK_EQ(reinterpret_cast(parentPtr), reinterpret_cast(&oldItem.getParentItem(compressor_))); @@ -1187,41 +1574,70 @@ bool CacheAllocator::moveChainedItem(ChainedItem& oldItem, template typename CacheAllocator::Item* -CacheAllocator::findEviction(PoolId pid, ClassId cid) { - auto& mmContainer = getMMContainer(pid, cid); +CacheAllocator::findEviction(TierId tid, PoolId pid, ClassId cid) { + auto& mmContainer = getMMContainer(tid, pid, cid); // Keep searching for a candidate until we were able to evict it // or until the search limit has been exhausted unsigned int searchTries = 0; - auto itr = mmContainer.getEvictionIterator(); while ((config_.evictionSearchTries == 0 || - config_.evictionSearchTries > searchTries) && - itr) { + config_.evictionSearchTries > searchTries)) { ++searchTries; - Item* candidate = itr.get(); + Item* toRecycle = nullptr; + Item* candidate = nullptr; + + mmContainer.withEvictionIterator([this, &candidate, &toRecycle, &searchTries](auto &&itr){ + while ((config_.evictionSearchTries == 0 || + config_.evictionSearchTries > searchTries) && itr) { + ++searchTries; + + auto *toRecycle_ = itr.get(); + auto *candidate_ = toRecycle_->isChainedItem() + ? &toRecycle_->asChainedItem().getParentItem(compressor_) + : toRecycle_; + + // make sure no other thead is evicting the item + if (candidate_->getRefCount() == 0 && candidate_->markMoving()) { + toRecycle = toRecycle_; + candidate = candidate_; + return; + } + + ++itr; + } + }); + + if (!toRecycle) + continue; + + XDCHECK(toRecycle); + XDCHECK(candidate); + // for chained items, the ownership of the parent can change. We try to // evict what we think as parent and see if the eviction of parent // recycles the child we intend to. auto toReleaseHandle = - itr->isChainedItem() - ? advanceIteratorAndTryEvictChainedItem(itr) - : advanceIteratorAndTryEvictRegularItem(mmContainer, itr); + evictNormalItem(*candidate, true /* skipIfTokenInvalid */); + auto ref = candidate->unmarkMoving(); - if (toReleaseHandle) { - if (toReleaseHandle->hasChainedItem()) { + if (toReleaseHandle || ref == 0u) { + if (candidate->hasChainedItem()) { (*stats_.chainedItemEvictions)[pid][cid].inc(); } else { (*stats_.regularItemEvictions)[pid][cid].inc(); } + } else { + if (candidate->hasChainedItem()) { + stats_.evictFailParentAC.inc(); + } else { + stats_.evictFailAC.inc(); + } + } - // Invalidate iterator since later on we may use this mmContainer - // again, which cannot be done unless we drop this iterator - itr.destroy(); - - // we must be the last handle and for chained items, this will be - // the parent. - XDCHECK(toReleaseHandle.get() == candidate || candidate->isChainedItem()); + if (toReleaseHandle) { + XDCHECK(toReleaseHandle.get() == candidate); + XDCHECK(toRecycle == candidate || toRecycle->isChainedItem()); XDCHECK_EQ(1u, toReleaseHandle->getRefCount()); // We manually release the item here because we don't want to @@ -1237,15 +1653,18 @@ CacheAllocator::findEviction(PoolId pid, ClassId cid) { // recycle the candidate. if (ReleaseRes::kRecycled == releaseBackToAllocator(itemToRelease, RemoveContext::kEviction, - /* isNascent */ false, candidate)) { - return candidate; + /* isNascent */ false, toRecycle)) { + return toRecycle; + } + } else if (ref == 0u) { + // it's safe to recycle the item here as there are no more + // references and the item could not been marked as moving + // by other thread since it's detached from MMContainer. + if (ReleaseRes::kRecycled == + releaseBackToAllocator(*candidate, RemoveContext::kEviction, + /* isNascent */ false, toRecycle)) { + return toRecycle; } - } - - // If we destroyed the itr to possibly evict and failed, we restart - // from the beginning again - if (!itr) { - itr.resetToBegin(); } } return nullptr; @@ -1300,140 +1719,37 @@ bool CacheAllocator::shouldWriteToNvmCacheExclusive( } template -typename CacheAllocator::ItemHandle -CacheAllocator::advanceIteratorAndTryEvictRegularItem( - MMContainer& mmContainer, EvictionIterator& itr) { - // we should flush this to nvmcache if it is not already present in nvmcache - // and the item is not expired. - Item& item = *itr; - const bool evictToNvmCache = shouldWriteToNvmCache(item); - - auto token = evictToNvmCache ? nvmCache_->createPutToken(item.getKey()) - : typename NvmCacheT::PutToken{}; - // record the in-flight eviciton. If not, we move on to next item to avoid - // stalling eviction. - if (evictToNvmCache && !token.isValid()) { - ++itr; - stats_.evictFailConcurrentFill.inc(); - return ItemHandle{}; - } - - // If there are other accessors, we should abort. Acquire a handle here since - // if we remove the item from both access containers and mm containers - // below, we will need a handle to ensure proper cleanup in case we end up - // not evicting this item - auto evictHandle = accessContainer_->removeIf(item, &itemEvictionPredicate); +typename CacheAllocator::WriteHandle +CacheAllocator::tryEvictToNextMemoryTier( + TierId tid, PoolId pid, Item& item) { + if(item.isChainedItem()) return {}; // TODO: We do not support ChainedItem yet + if(item.isExpired()) return acquire(&item); + + TierId nextTier = tid; // TODO - calculate this based on some admission policy + while (++nextTier < numTiers_) { // try to evict down to the next memory tiers + // allocateInternal might trigger another eviction + auto newItemHdl = allocateInternalTier(nextTier, pid, + item.getKey(), + item.getSize(), + item.getCreationTime(), + item.getExpiryTime()); - if (!evictHandle) { - ++itr; - stats_.evictFailAC.inc(); - return evictHandle; - } + if (newItemHdl) { + XDCHECK_EQ(newItemHdl->getSize(), item.getSize()); - mmContainer.remove(itr); - XDCHECK_EQ(reinterpret_cast(evictHandle.get()), - reinterpret_cast(&item)); - XDCHECK(!evictHandle->isInMMContainer()); - XDCHECK(!evictHandle->isAccessible()); - - // If the item is now marked as moving, that means its corresponding slab is - // being released right now. So, we look for the next item that is eligible - // for eviction. It is safe to destroy the handle here since the moving bit - // is set. Iterator was already advance by the remove call above. - if (evictHandle->isMoving()) { - stats_.evictFailMove.inc(); - return ItemHandle{}; + return moveRegularItemOnEviction(item, newItemHdl); + } } - // Invalidate iterator since later on if we are not evicting this - // item, we may need to rely on the handle we created above to ensure - // proper cleanup if the item's raw refcount has dropped to 0. - // And since this item may be a parent item that has some child items - // in this very same mmContainer, we need to make sure we drop this - // exclusive iterator so we can gain access to it when we're cleaning - // up the child items - itr.destroy(); - - // Ensure that there are no accessors after removing from the access - // container - XDCHECK(evictHandle->getRefCount() == 1); - - if (evictToNvmCache && shouldWriteToNvmCacheExclusive(item)) { - XDCHECK(token.isValid()); - nvmCache_->put(evictHandle, std::move(token)); - } - return evictHandle; + return {}; } template -typename CacheAllocator::ItemHandle -CacheAllocator::advanceIteratorAndTryEvictChainedItem( - EvictionIterator& itr) { - XDCHECK(itr->isChainedItem()); - - ChainedItem* candidate = &itr->asChainedItem(); - ++itr; - - // The parent could change at any point through transferChain. However, if - // that happens, we would realize that the releaseBackToAllocator return - // kNotRecycled and we would try another chained item, leading to transient - // failure. - auto& parent = candidate->getParentItem(compressor_); - - const bool evictToNvmCache = shouldWriteToNvmCache(parent); - - auto token = evictToNvmCache ? nvmCache_->createPutToken(parent.getKey()) - : typename NvmCacheT::PutToken{}; - - // if token is invalid, return. iterator is already advanced. - if (evictToNvmCache && !token.isValid()) { - stats_.evictFailConcurrentFill.inc(); - return ItemHandle{}; - } - - // check if the parent exists in the hashtable and refcount is drained. - auto parentHandle = - accessContainer_->removeIf(parent, &itemEvictionPredicate); - if (!parentHandle) { - stats_.evictFailParentAC.inc(); - return parentHandle; - } - - // Invalidate iterator since later on we may use the mmContainer - // associated with this iterator which cannot be done unless we - // drop this iterator - // - // This must be done once we know the parent is not nullptr. - // Since we can very well be the last holder of this parent item, - // which may have a chained item that is linked in this MM container. - itr.destroy(); - - // Ensure we have the correct parent and we're the only user of the - // parent, then free it from access container. Otherwise, we abort - XDCHECK_EQ(reinterpret_cast(&parent), - reinterpret_cast(parentHandle.get())); - XDCHECK_EQ(1u, parent.getRefCount()); - - removeFromMMContainer(*parentHandle); - - XDCHECK(!parent.isInMMContainer()); - XDCHECK(!parent.isAccessible()); - - // We need to make sure the parent is not marked as moving - // and we're the only holder of the parent item. Safe to destroy the handle - // here since moving bit is set. - if (parentHandle->isMoving()) { - stats_.evictFailParentMove.inc(); - return ItemHandle{}; - } - - if (evictToNvmCache && shouldWriteToNvmCacheExclusive(*parentHandle)) { - XDCHECK(token.isValid()); - XDCHECK(parentHandle->hasChainedItem()); - nvmCache_->put(parentHandle, std::move(token)); - } - - return parentHandle; +typename CacheAllocator::WriteHandle +CacheAllocator::tryEvictToNextMemoryTier(Item& item) { + auto tid = getTierId(item); + auto pid = allocator_[tid]->getAllocInfo(item.getMemory()).poolId; + return tryEvictToNextMemoryTier(tid, pid, item); } template @@ -1483,14 +1799,15 @@ CacheAllocator::remove(typename Item::Key key) { // flight after removing from the hashtable. // stats_.numCacheRemoves.inc(); + HashedKey hk{key}; using Guard = typename NvmCacheT::DeleteTombStoneGuard; - auto tombStone = nvmCache_ ? nvmCache_->createDeleteTombStone(key) : Guard{}; + auto tombStone = nvmCache_ ? nvmCache_->createDeleteTombStone(hk) : Guard{}; auto handle = findInternal(key); if (!handle) { if (nvmCache_) { - nvmCache_->remove(key, std::move(tombStone)); + nvmCache_->remove(hk, std::move(tombStone)); } if (auto eventTracker = getEventTracker()) { eventTracker->record(AllocatorApiEvent::REMOVE, key, @@ -1499,13 +1816,13 @@ CacheAllocator::remove(typename Item::Key key) { return RemoveRes::kNotFoundInRam; } - return removeImpl(*handle, std::move(tombStone)); + return removeImpl(hk, *handle, std::move(tombStone)); } template bool CacheAllocator::removeFromRamForTesting( typename Item::Key key) { - return removeImpl(*findInternal(key), DeleteTombStoneGuard{}, + return removeImpl(HashedKey{key}, *findInternal(key), DeleteTombStoneGuard{}, false /* removeFromNvm */) == RemoveRes::kSuccess; } @@ -1513,7 +1830,8 @@ template void CacheAllocator::removeFromNvmForTesting( typename Item::Key key) { if (nvmCache_) { - nvmCache_->remove(key, nvmCache_->createDeleteTombStone(key)); + HashedKey hk{key}; + nvmCache_->remove(hk, nvmCache_->createDeleteTombStone(hk)); } } @@ -1546,9 +1864,10 @@ CacheAllocator::remove(AccessIterator& it) { AllocatorApiResult::REMOVED, it->getSize(), it->getConfiguredTTL().count()); } - auto tombstone = nvmCache_ ? nvmCache_->createDeleteTombStone(it->getKey()) - : DeleteTombStoneGuard{}; - return removeImpl(*it, std::move(tombstone)); + HashedKey hk{it->getKey()}; + auto tombstone = + nvmCache_ ? nvmCache_->createDeleteTombStone(hk) : DeleteTombStoneGuard{}; + return removeImpl(hk, *it, std::move(tombstone)); } template @@ -1558,20 +1877,22 @@ CacheAllocator::remove(const ReadHandle& it) { if (!it) { throw std::invalid_argument("Trying to remove a null item handle"); } - auto tombstone = nvmCache_ ? nvmCache_->createDeleteTombStone(it->getKey()) - : DeleteTombStoneGuard{}; - return removeImpl(*(it.getInternal()), std::move(tombstone)); + HashedKey hk{it->getKey()}; + auto tombstone = + nvmCache_ ? nvmCache_->createDeleteTombStone(hk) : DeleteTombStoneGuard{}; + return removeImpl(hk, *(it.getInternal()), std::move(tombstone)); } template typename CacheAllocator::RemoveRes -CacheAllocator::removeImpl(Item& item, +CacheAllocator::removeImpl(HashedKey hk, + Item& item, DeleteTombStoneGuard tombstone, bool removeFromNvm, bool recordApiEvent) { bool success = false; { - auto lock = nvmCache_ ? nvmCache_->getItemDestructorLock(item.getKey()) + auto lock = nvmCache_ ? nvmCache_->getItemDestructorLock(hk) : std::unique_lock(); success = accessContainer_->remove(item); @@ -1580,7 +1901,7 @@ CacheAllocator::removeImpl(Item& item, // item is to be removed and the destructor will be executed // upon memory released, mark it in nvm to avoid destructor // executed from nvm - nvmCache_->markNvmItemRemovedLocked(item.getKey()); + nvmCache_->markNvmItemRemovedLocked(hk); } } XDCHECK(!item.isAccessible()); @@ -1594,7 +1915,7 @@ CacheAllocator::removeImpl(Item& item, // have it be written to NVM. if (removeFromNvm && item.isNvmClean()) { XDCHECK(tombstone); - nvmCache_->remove(item.getKey(), std::move(tombstone)); + nvmCache_->remove(hk, std::move(tombstone)); } auto eventTracker = getEventTracker(); @@ -1617,49 +1938,68 @@ CacheAllocator::removeImpl(Item& item, template void CacheAllocator::invalidateNvm(Item& item) { if (nvmCache_ != nullptr && item.isAccessible() && item.isNvmClean()) { + HashedKey hk{item.getKey()}; { - auto lock = nvmCache_->getItemDestructorLock(item.getKey()); + auto lock = nvmCache_->getItemDestructorLock(hk); if (!item.isNvmEvicted() && item.isNvmClean() && item.isAccessible()) { // item is being updated and invalidated in nvm. Mark the item to avoid // destructor to be executed from nvm - nvmCache_->markNvmItemRemovedLocked(item.getKey()); + nvmCache_->markNvmItemRemovedLocked(hk); } item.unmarkNvmClean(); } - nvmCache_->remove(item.getKey(), - nvmCache_->createDeleteTombStone(item.getKey())); + nvmCache_->remove(hk, nvmCache_->createDeleteTombStone(hk)); + } +} + +template +TierId +CacheAllocator::getTierId(const Item& item) const { + return getTierId(item.getMemory()); +} + +template +TierId +CacheAllocator::getTierId(const void* ptr) const { + for (TierId tid = 0; tid < numTiers_; tid++) { + if (allocator_[tid]->isMemoryInAllocator(ptr)) + return tid; } + + throw std::invalid_argument("Item does not belong to any tier!"); } template typename CacheAllocator::MMContainer& CacheAllocator::getMMContainer(const Item& item) const noexcept { + const auto tid = getTierId(item); const auto allocInfo = - allocator_->getAllocInfo(static_cast(&item)); - return getMMContainer(allocInfo.poolId, allocInfo.classId); + allocator_[tid]->getAllocInfo(static_cast(&item)); + return getMMContainer(tid, allocInfo.poolId, allocInfo.classId); } template typename CacheAllocator::MMContainer& -CacheAllocator::getMMContainer(PoolId pid, +CacheAllocator::getMMContainer(TierId tid, + PoolId pid, ClassId cid) const noexcept { - XDCHECK_LT(static_cast(pid), mmContainers_.size()); - XDCHECK_LT(static_cast(cid), mmContainers_[pid].size()); - return *mmContainers_[pid][cid]; + XDCHECK_LT(static_cast(tid), mmContainers_.size()); + XDCHECK_LT(static_cast(pid), mmContainers_[tid].size()); + XDCHECK_LT(static_cast(cid), mmContainers_[tid][pid].size()); + return *mmContainers_[tid][pid][cid]; } template -typename CacheAllocator::ItemHandle +typename CacheAllocator::ReadHandle CacheAllocator::peek(typename Item::Key key) { - auto handle = findInternal(key); - return handle; + return findInternal(key); } template -std::pair::ItemHandle, - typename CacheAllocator::ItemHandle> +std::pair::ReadHandle, + typename CacheAllocator::ReadHandle> CacheAllocator::inspectCache(typename Item::Key key) { - std::pair res; + std::pair res; res.first = findInternal(key); res.second = nvmCache_ ? nvmCache_->peek(key) : nullptr; return res; @@ -1669,9 +2009,9 @@ CacheAllocator::inspectCache(typename Item::Key key) { // CacheAllocator. Hence the sprinkling of UNLIKELY/LIKELY to tell the // compiler which executions we don't want to optimize on. template -typename CacheAllocator::ItemHandle -CacheAllocator::findFastImpl(typename Item::Key key, - AccessMode mode) { +typename CacheAllocator::WriteHandle +CacheAllocator::findFastInternal(typename Item::Key key, + AccessMode mode) { auto handle = findInternal(key); stats_.numCacheGets.inc(); @@ -1685,9 +2025,10 @@ CacheAllocator::findFastImpl(typename Item::Key key, } template -typename CacheAllocator::ItemHandle -CacheAllocator::findFast(typename Item::Key key, AccessMode mode) { - auto handle = findFastImpl(key, mode); +typename CacheAllocator::WriteHandle +CacheAllocator::findFastImpl(typename Item::Key key, + AccessMode mode) { + auto handle = findFastInternal(key, mode); auto eventTracker = getEventTracker(); if (UNLIKELY(eventTracker != nullptr)) { if (handle) { @@ -1704,9 +2045,29 @@ CacheAllocator::findFast(typename Item::Key key, AccessMode mode) { } template -typename CacheAllocator::ItemHandle -CacheAllocator::find(typename Item::Key key, AccessMode mode) { - auto handle = findFastImpl(key, mode); +typename CacheAllocator::ReadHandle +CacheAllocator::findFast(typename Item::Key key) { + return findFastImpl(key, AccessMode::kRead); +} + +template +typename CacheAllocator::WriteHandle +CacheAllocator::findFastToWrite(typename Item::Key key, + bool doNvmInvalidation) { + auto handle = findFastImpl(key, AccessMode::kWrite); + if (handle == nullptr) { + return nullptr; + } + if (doNvmInvalidation) { + invalidateNvm(*handle); + } + return handle; +} + +template +typename CacheAllocator::WriteHandle +CacheAllocator::findImpl(typename Item::Key key, AccessMode mode) { + auto handle = findFastInternal(key, mode); if (handle) { if (UNLIKELY(handle->isExpired())) { @@ -1718,7 +2079,7 @@ CacheAllocator::find(typename Item::Key key, AccessMode mode) { eventTracker->record(AllocatorApiEvent::FIND, key, AllocatorApiResult::NOT_FOUND); } - ItemHandle ret; + WriteHandle ret; ret.markExpired(); return ret; } @@ -1735,7 +2096,7 @@ CacheAllocator::find(typename Item::Key key, AccessMode mode) { auto eventResult = AllocatorApiResult::NOT_FOUND; if (nvmCache_) { - handle = nvmCache_->find(key); + handle = nvmCache_->find(HashedKey{key}); eventResult = AllocatorApiResult::NOT_FOUND_IN_MEMORY; } @@ -1748,10 +2109,10 @@ CacheAllocator::find(typename Item::Key key, AccessMode mode) { } template -typename CacheAllocator::ItemHandle +typename CacheAllocator::WriteHandle CacheAllocator::findToWrite(typename Item::Key key, bool doNvmInvalidation) { - auto handle = find(key, AccessMode::kWrite); + auto handle = findImpl(key, AccessMode::kWrite); if (handle == nullptr) { return nullptr; } @@ -1764,11 +2125,11 @@ CacheAllocator::findToWrite(typename Item::Key key, template typename CacheAllocator::ReadHandle CacheAllocator::find(typename Item::Key key) { - return find(key, AccessMode::kRead); + return findImpl(key, AccessMode::kRead); } template -void CacheAllocator::markUseful(const ItemHandle& handle, +void CacheAllocator::markUseful(const ReadHandle& handle, AccessMode mode) { if (!handle) { return; @@ -1791,8 +2152,9 @@ void CacheAllocator::markUseful(const ItemHandle& handle, template bool CacheAllocator::recordAccessInMMContainer(Item& item, AccessMode mode) { + const auto tid = getTierId(item); const auto allocInfo = - allocator_->getAllocInfo(static_cast(&item)); + allocator_[tid]->getAllocInfo(static_cast(&item)); (*stats_.cacheHits)[allocInfo.poolId][allocInfo.classId].inc(); // track recently accessed items if needed @@ -1800,58 +2162,70 @@ bool CacheAllocator::recordAccessInMMContainer(Item& item, ring_->trackItem(reinterpret_cast(&item), item.getSize()); } - auto& mmContainer = getMMContainer(allocInfo.poolId, allocInfo.classId); + auto& mmContainer = getMMContainer(tid, allocInfo.poolId, allocInfo.classId); return mmContainer.recordAccess(item, mode); } template uint32_t CacheAllocator::getUsableSize(const Item& item) const { + const auto tid = getTierId(item); const auto allocSize = - allocator_->getAllocInfo(static_cast(&item)).allocSize; + allocator_[tid]->getAllocInfo(static_cast(&item)).allocSize; return item.isChainedItem() ? allocSize - ChainedItem::getRequiredSize(0) : allocSize - Item::getRequiredSize(item.getKey(), 0); } template -typename CacheAllocator::ItemHandle +typename CacheAllocator::ReadHandle CacheAllocator::getSampleItem() { + // TODO: is using random tier a good idea? + auto tid = folly::Random::rand32() % numTiers_; + const auto* item = - reinterpret_cast(allocator_->getRandomAlloc()); + reinterpret_cast(allocator_[tid]->getRandomAlloc()); if (!item) { - return ItemHandle{}; + return ReadHandle{}; } - auto handle = findInternal(item->getKey()); + ReadHandle handle = findInternal(item->getKey()); // Check that item returned is the same that was sampled if (handle.get() == item) { return handle; } - return ItemHandle{}; + return ReadHandle{}; } template std::vector CacheAllocator::dumpEvictionIterator( - PoolId pid, ClassId cid, size_t numItems) { + PoolId pid, ClassId cid, size_t numItems) { if (numItems == 0) { return {}; } - if (static_cast(pid) >= mmContainers_.size() || - static_cast(cid) >= mmContainers_[pid].size()) { + // Always evict from the lowest layer. + int tid = numTiers_ - 1; + + if (static_cast(tid) >= mmContainers_.size() || + static_cast(pid) >= mmContainers_[tid].size() || + static_cast(cid) >= mmContainers_[tid][pid].size()) { throw std::invalid_argument( - folly::sformat("Invalid PoolId: {} and ClassId: {}.", pid, cid)); + folly::sformat("Invalid TierId: {} and PoolId: {} and ClassId: {}.", tid, pid, cid)); } std::vector content; - auto& mm = *mmContainers_[pid][cid]; - auto evictItr = mm.getEvictionIterator(); size_t i = 0; - while (evictItr && i < numItems) { - content.push_back(evictItr->toString()); - ++evictItr; - ++i; + while (i < numItems && tid >= 0) { + auto& mm = *mmContainers_[tid][pid][cid]; + auto evictItr = mm.getEvictionIterator(); + while (evictItr && i < numItems) { + content.push_back(evictItr->toString()); + ++evictItr; + ++i; + } + + --tid; } return content; @@ -1873,8 +2247,8 @@ folly::IOBuf CacheAllocator::convertToIOBufT(Handle& handle) { ConvertChainedItem converter; // based on current refcount and threshold from config - // determine to use a new ItemHandle for each chain items - // or use shared ItemHandle for all chain items + // determine to use a new Item Handle for each chain items + // or use shared Item Handle for all chain items if (item->getRefCount() > config_.thresholdForConvertingToIOBuf) { auto sharedHdl = std::make_shared(std::move(handle)); @@ -1913,6 +2287,9 @@ folly::IOBuf CacheAllocator::convertToIOBufT(Handle& handle) { } } else { + // following IOBuf will take the item's ownership and trigger freeFunc to + // release the reference count. + handle.release(); iobuf = folly::IOBuf{folly::IOBuf::TAKE_OWNERSHIP, item, // Since we'll be moving the IOBuf data pointer forward @@ -1926,7 +2303,6 @@ folly::IOBuf CacheAllocator::convertToIOBufT(Handle& handle) { .reset(); } /* freeFunc */, this /* userData for freeFunc */}; - handle.release(); if (item->hasChainedItem()) { converter = [this](Item* parentItem, ChainedItem& chainedItem) { @@ -2027,19 +2403,46 @@ PoolId CacheAllocator::addPool( std::shared_ptr resizeStrategy, bool ensureProvisionable) { folly::SharedMutex::WriteHolder w(poolsResizeAndRebalanceLock_); - auto pid = allocator_->addPool(name, size, allocSizes, ensureProvisionable); + + PoolId pid = 0; + std::vector tierPoolSizes; + const auto &tierConfigs = config_.getMemoryTierConfigs(); + size_t totalCacheSize = 0; + + for (TierId tid = 0; tid < numTiers_; tid++) { + totalCacheSize += allocator_[tid]->getMemorySize(); + } + + for (TierId tid = 0; tid < numTiers_; tid++) { + auto tierSizeRatio = + static_cast(allocator_[tid]->getMemorySize()) / totalCacheSize; + size_t tierPoolSize = static_cast(tierSizeRatio * size); + + tierPoolSizes.push_back(tierPoolSize); + } + + for (TierId tid = 0; tid < numTiers_; tid++) { + // TODO: what if we manage to add pool only in one tier? + // we should probably remove that on failure + auto res = allocator_[tid]->addPool( + name, tierPoolSizes[tid], allocSizes, ensureProvisionable); + XDCHECK(tid == 0 || res == pid); + pid = res; + } + createMMContainers(pid, std::move(config)); setRebalanceStrategy(pid, std::move(rebalanceStrategy)); setResizeStrategy(pid, std::move(resizeStrategy)); + return pid; } template void CacheAllocator::overridePoolRebalanceStrategy( PoolId pid, std::shared_ptr rebalanceStrategy) { - if (static_cast(pid) >= mmContainers_.size()) { + if (static_cast(pid) >= mmContainers_[0].size()) { throw std::invalid_argument(folly::sformat( - "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size())); + "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[0].size())); } setRebalanceStrategy(pid, std::move(rebalanceStrategy)); } @@ -2047,9 +2450,9 @@ void CacheAllocator::overridePoolRebalanceStrategy( template void CacheAllocator::overridePoolResizeStrategy( PoolId pid, std::shared_ptr resizeStrategy) { - if (static_cast(pid) >= mmContainers_.size()) { + if (static_cast(pid) >= mmContainers_[0].size()) { throw std::invalid_argument(folly::sformat( - "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size())); + "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[0].size())); } setResizeStrategy(pid, std::move(resizeStrategy)); } @@ -2061,14 +2464,14 @@ void CacheAllocator::overridePoolOptimizeStrategy( } template -void CacheAllocator::overridePoolConfig(PoolId pid, +void CacheAllocator::overridePoolConfig(TierId tid, PoolId pid, const MMConfig& config) { - if (static_cast(pid) >= mmContainers_.size()) { + // TODO: add generic tier id checking + if (static_cast(pid) >= mmContainers_[tid].size()) { throw std::invalid_argument(folly::sformat( - "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size())); + "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[tid].size())); } - - auto& pool = allocator_->getPool(pid); + auto& pool = allocator_[tid]->getPool(pid); for (unsigned int cid = 0; cid < pool.getNumClassId(); ++cid) { MMConfig mmConfig = config; mmConfig.addExtraConfig( @@ -2076,29 +2479,35 @@ void CacheAllocator::overridePoolConfig(PoolId pid, ? pool.getAllocationClass(static_cast(cid)) .getAllocsPerSlab() : 0); - DCHECK_NOTNULL(mmContainers_[pid][cid].get()); - mmContainers_[pid][cid]->setConfig(mmConfig); + DCHECK_NOTNULL(mmContainers_[tid][pid][cid].get()); + mmContainers_[tid][pid][cid]->setConfig(mmConfig); } } template void CacheAllocator::createMMContainers(const PoolId pid, MMConfig config) { - auto& pool = allocator_->getPool(pid); + // pools on each layer should have the same number of class id, etc. + // TODO: think about deduplication + auto& pool = allocator_[0]->getPool(pid); + for (unsigned int cid = 0; cid < pool.getNumClassId(); ++cid) { config.addExtraConfig( config_.trackTailHits ? pool.getAllocationClass(static_cast(cid)) .getAllocsPerSlab() : 0); - mmContainers_[pid][cid].reset(new MMContainer(config, compressor_)); + for (TierId tid = 0; tid < numTiers_; tid++) { + mmContainers_[tid][pid][cid].reset(new MMContainer(config, compressor_)); + } } } template PoolId CacheAllocator::getPoolId( folly::StringPiece name) const noexcept { - return allocator_->getPoolId(name.str()); + // each tier has the same pools + return allocator_[0]->getPoolId(name.str()); } // The Function returns a consolidated vector of Release Slab @@ -2141,7 +2550,9 @@ std::set CacheAllocator::filterCompactCachePools( template std::set CacheAllocator::getRegularPoolIds() const { folly::SharedMutex::ReadHolder r(poolsResizeAndRebalanceLock_); - return filterCompactCachePools(allocator_->getPoolIds()); + // TODO - get rid of the duplication - right now, each tier + // holds pool objects with mostly the same info + return filterCompactCachePools(allocator_[0]->getPoolIds()); } template @@ -2166,10 +2577,9 @@ std::set CacheAllocator::getRegularPoolIdsForResize() // getAdvisedMemorySize - then pools may be overLimit even when // all slabs are not allocated. Otherwise, pools may be overLimit // only after all slabs are allocated. - // - return (allocator_->allSlabsAllocated()) || - (allocator_->getAdvisedMemorySize() != 0) - ? filterCompactCachePools(allocator_->getPoolsOverLimit()) + return (allocator_[currentTier()]->allSlabsAllocated()) || + (allocator_[currentTier()]->getAdvisedMemorySize() != 0) + ? filterCompactCachePools(allocator_[currentTier()]->getPoolsOverLimit()) : std::set{}; } @@ -2178,9 +2588,19 @@ const std::string CacheAllocator::getCacheName() const { return config_.cacheName; } +template +size_t CacheAllocator::getPoolSize(PoolId poolId) const { + size_t poolSize = 0; + for (auto& allocator: allocator_) { + const auto& pool = allocator->getPool(poolId); + poolSize += pool.getPoolSize(); + } + return poolSize; +} + template PoolStats CacheAllocator::getPoolStats(PoolId poolId) const { - const auto& pool = allocator_->getPool(poolId); + const auto& pool = allocator_[currentTier()]->getPool(poolId); const auto& allocSizes = pool.getAllocSizes(); auto mpStats = pool.getStats(); const auto& classIds = mpStats.classIds; @@ -2198,7 +2618,7 @@ PoolStats CacheAllocator::getPoolStats(PoolId poolId) const { // TODO export evictions, numItems etc from compact cache directly. if (!isCompactCache) { for (const ClassId cid : classIds) { - const auto& container = getMMContainer(poolId, cid); + const auto& container = getMMContainer(currentTier(), poolId, cid); uint64_t classHits = (*stats_.cacheHits)[poolId][cid].get(); cacheStats.insert( {cid, @@ -2214,7 +2634,7 @@ PoolStats CacheAllocator::getPoolStats(PoolId poolId) const { PoolStats ret; ret.isCompactCache = isCompactCache; - ret.poolName = allocator_->getPoolName(poolId); + ret.poolName = allocator_[currentTier()]->getPoolName(poolId); ret.poolSize = pool.getPoolSize(); ret.poolUsableSize = pool.getPoolUsableSize(); ret.poolAdvisedSize = pool.getPoolAdvisedSize(); @@ -2226,29 +2646,66 @@ PoolStats CacheAllocator::getPoolStats(PoolId poolId) const { return ret; } +template +double CacheAllocator::slabsApproxFreePercentage(TierId tid) const +{ + return allocator_[tid]->approxFreeSlabsPercentage(); +} + +template +AllocationClassBaseStat CacheAllocator::getAllocationClassStats( + TierId tid, PoolId pid, ClassId cid) const { + const auto &ac = allocator_[tid]->getPool(pid).getAllocationClass(cid); + + AllocationClassBaseStat stats{}; + stats.allocSize = ac.getAllocSize(); + stats.memorySize = ac.getNumSlabs() * Slab::kSize; + + if (slabsApproxFreePercentage(tid) > 0.0) { + auto totalMemory = MemoryAllocator::getMemorySize(memoryTierSize(tid)); + auto freeMemory = static_cast(totalMemory) * slabsApproxFreePercentage(tid) / 100.0; + + // amount of free memory which has the same ratio to entire free memory as + // this allocation class memory size has to used memory + auto scaledFreeMemory = static_cast(freeMemory * stats.memorySize / totalMemory); + + auto acAllocatedMemory = (100.0 - ac.approxFreePercentage()) / 100.0 * ac.getNumSlabs() * Slab::kSize; + auto acMaxAvailableMemory = ac.getNumSlabs() * Slab::kSize + scaledFreeMemory; + + if (acMaxAvailableMemory == 0) { + stats.approxFreePercent = 100.0; + } else { + stats.approxFreePercent = 100.0 - 100.0 * acAllocatedMemory / acMaxAvailableMemory; + } + } else { + stats.approxFreePercent = ac.approxFreePercentage(); + } + stats.allocLatencyNs = (*stats_.classAllocLatency)[tid][pid][cid]; + + return stats; +} + template PoolEvictionAgeStats CacheAllocator::getPoolEvictionAgeStats( PoolId pid, unsigned int slabProjectionLength) const { PoolEvictionAgeStats stats; - - const auto& pool = allocator_->getPool(pid); + const auto& pool = allocator_[currentTier()]->getPool(pid); const auto& allocSizes = pool.getAllocSizes(); for (ClassId cid = 0; cid < static_cast(allocSizes.size()); ++cid) { - auto& mmContainer = getMMContainer(pid, cid); + auto& mmContainer = getMMContainer(currentTier(), pid, cid); const auto numItemsPerSlab = - allocator_->getPool(pid).getAllocationClass(cid).getAllocsPerSlab(); + allocator_[currentTier()]->getPool(pid).getAllocationClass(cid).getAllocsPerSlab(); const auto projectionLength = numItemsPerSlab * slabProjectionLength; stats.classEvictionAgeStats[cid] = mmContainer.getEvictionAgeStat(projectionLength); } - return stats; } template CacheMetadata CacheAllocator::getCacheMetadata() const noexcept { return CacheMetadata{kCachelibVersion, kCacheRamFormatVersion, - kCacheNvmFormatVersion, config_.size}; + kCacheNvmFormatVersion, config_.getCacheSize()}; } template @@ -2280,7 +2737,7 @@ void CacheAllocator::releaseSlab(PoolId pid, } try { - auto releaseContext = allocator_->startSlabRelease( + auto releaseContext = allocator_[currentTier()]->startSlabRelease( pid, victim, receiver, mode, hint, [this]() -> bool { return shutDownInProgress_; }); @@ -2289,15 +2746,15 @@ void CacheAllocator::releaseSlab(PoolId pid, return; } - releaseSlabImpl(releaseContext); - if (!allocator_->allAllocsFreed(releaseContext)) { + releaseSlabImpl(currentTier(), releaseContext); + if (!allocator_[currentTier()]->allAllocsFreed(releaseContext)) { throw std::runtime_error( folly::sformat("Was not able to free all allocs. PoolId: {}, AC: {}", releaseContext.getPoolId(), releaseContext.getClassId())); } - allocator_->completeSlabRelease(releaseContext); + allocator_[currentTier()]->completeSlabRelease(releaseContext); } catch (const exception::SlabReleaseAborted& e) { stats_.numAbortedSlabReleases.inc(); throw exception::SlabReleaseAborted(folly::sformat( @@ -2308,8 +2765,7 @@ void CacheAllocator::releaseSlab(PoolId pid, } template -SlabReleaseStats CacheAllocator::getSlabReleaseStats() - const noexcept { +SlabReleaseStats CacheAllocator::getSlabReleaseStats() const noexcept { std::lock_guard l(workersMutex_); return SlabReleaseStats{stats_.numActiveSlabReleases.get(), stats_.numReleasedForRebalance.get(), @@ -2322,13 +2778,31 @@ SlabReleaseStats CacheAllocator::getSlabReleaseStats() stats_.numMoveAttempts.get(), stats_.numMoveSuccesses.get(), stats_.numEvictionAttempts.get(), - stats_.numEvictionSuccesses.get()}; + stats_.numEvictionSuccesses.get(), + stats_.numSlabReleaseStuck.get()}; } template -void CacheAllocator::releaseSlabImpl( +void CacheAllocator::releaseSlabImpl(TierId tid, const SlabReleaseContext& releaseContext) { - util::Throttler throttler(config_.throttleConfig); + auto startTime = std::chrono::milliseconds(util::getCurrentTimeMs()); + bool releaseStuck = false; + + SCOPE_EXIT { + if (releaseStuck) { + stats_.numSlabReleaseStuck.dec(); + } + }; + + util::Throttler throttler( + config_.throttleConfig, + [this, &startTime, &releaseStuck](std::chrono::milliseconds curTime) { + if (!releaseStuck && + curTime >= startTime + config_.slabReleaseStuckThreshold) { + stats().numSlabReleaseStuck.inc(); + releaseStuck = true; + } + }); // Active allocations need to be freed before we can release this slab // The idea is: @@ -2354,7 +2828,7 @@ void CacheAllocator::releaseSlabImpl( if (!isMoved) { evictForSlabRelease(releaseContext, item, throttler); } - XDCHECK(allocator_->isAllocFreed(releaseContext, alloc)); + XDCHECK(allocator_[tid]->isAllocFreed(releaseContext, alloc)); } } @@ -2377,7 +2851,7 @@ bool CacheAllocator::moveForSlabRelease( bool isMoved = false; auto startTime = util::getCurrentTimeSec(); - ItemHandle newItemHdl = allocateNewItemForOldItem(oldItem); + WriteHandle newItemHdl = allocateNewItemForOldItem(oldItem); for (unsigned int itemMovingAttempts = 0; itemMovingAttempts < config_.movingTries; @@ -2434,8 +2908,11 @@ bool CacheAllocator::moveForSlabRelease( ctx.getPoolId(), ctx.getClassId()); }); } - const auto allocInfo = allocator_->getAllocInfo(oldItem.getMemory()); - allocator_->free(&oldItem); + + auto tid = getTierId(oldItem); + + const auto allocInfo = allocator_[tid]->getAllocInfo(oldItem.getMemory()); + allocator_[tid]->free(&oldItem); (*stats_.fragmentationSize)[allocInfo.poolId][allocInfo.classId].sub( util::getFragmentation(*this, oldItem)); @@ -2444,10 +2921,10 @@ bool CacheAllocator::moveForSlabRelease( } template -typename CacheAllocator::ItemHandle +typename CacheAllocator::ReadHandle CacheAllocator::validateAndGetParentHandleForChainedMoveLocked( const ChainedItem& item, const Key& parentKey) { - ItemHandle parentHandle{}; + ReadHandle parentHandle{}; try { parentHandle = findInternal(parentKey); // If the parent is not the same as the parent of the chained item, @@ -2464,7 +2941,7 @@ CacheAllocator::validateAndGetParentHandleForChainedMoveLocked( } template -typename CacheAllocator::ItemHandle +typename CacheAllocator::WriteHandle CacheAllocator::allocateNewItemForOldItem(const Item& oldItem) { if (oldItem.isChainedItem()) { const auto& oldChainedItem = oldItem.asChainedItem(); @@ -2488,7 +2965,7 @@ CacheAllocator::allocateNewItemForOldItem(const Item& oldItem) { } XDCHECK_EQ(newItemHdl->getSize(), oldChainedItem.getSize()); - auto parentPtr = parentHandle.get(); + auto parentPtr = parentHandle.getInternal(); XDCHECK_EQ(reinterpret_cast(parentPtr), reinterpret_cast( &oldChainedItem.getParentItem(compressor_))); @@ -2497,11 +2974,12 @@ CacheAllocator::allocateNewItemForOldItem(const Item& oldItem) { } const auto allocInfo = - allocator_->getAllocInfo(static_cast(&oldItem)); + allocator_[getTierId(oldItem)]->getAllocInfo(static_cast(&oldItem)); // Set up the destination for the move. Since oldItem would have the moving // bit set, it won't be picked for eviction. - auto newItemHdl = allocateInternal(allocInfo.poolId, + auto newItemHdl = allocateInternalTier(getTierId(oldItem), + allocInfo.poolId, oldItem.getKey(), oldItem.getSize(), oldItem.getCreationTime(), @@ -2519,7 +2997,7 @@ CacheAllocator::allocateNewItemForOldItem(const Item& oldItem) { template bool CacheAllocator::tryMovingForSlabRelease( - Item& oldItem, ItemHandle& newItemHdl) { + Item& oldItem, WriteHandle& newItemHdl) { // By holding onto a user-level synchronization object, we ensure moving // a regular item or chained item is synchronized with any potential // user-side mutation. @@ -2557,7 +3035,7 @@ bool CacheAllocator::tryMovingForSlabRelease( template void CacheAllocator::evictForSlabRelease( const SlabReleaseContext& ctx, Item& item, util::Throttler& throttler) { - XDCHECK(!config_.disableEviction); + XDCHECK(!config_.isEvictionDisabled()); auto startTime = util::getCurrentTimeSec(); while (true) { @@ -2580,13 +3058,13 @@ void CacheAllocator::evictForSlabRelease( auto owningHandle = item.isChainedItem() ? evictChainedItemForSlabRelease(item.asChainedItem()) - : evictNormalItemForSlabRelease(item); + : evictNormalItem(item); // we managed to evict the corresponding owner of the item and have the // last handle for the owner. if (owningHandle) { const auto allocInfo = - allocator_->getAllocInfo(static_cast(&item)); + allocator_[getTierId(item)]->getAllocInfo(static_cast(&item)); if (owningHandle->hasChainedItem()) { (*stats_.chainedItemEvictions)[allocInfo.poolId][allocInfo.classId] .inc(); @@ -2613,7 +3091,7 @@ void CacheAllocator::evictForSlabRelease( if (shutDownInProgress_) { item.unmarkMoving(); - allocator_->abortSlabRelease(ctx); + allocator_[getTierId(item)]->abortSlabRelease(ctx); throw exception::SlabReleaseAborted( folly::sformat("Slab Release aborted while trying to evict" " Item: {} Pool: {}, Class: {}.", @@ -2637,19 +3115,28 @@ void CacheAllocator::evictForSlabRelease( template typename CacheAllocator::ItemHandle -CacheAllocator::evictNormalItemForSlabRelease(Item& item) { +CacheAllocator::evictNormalItem(Item& item, + bool skipIfTokenInvalid) { XDCHECK(item.isMoving()); if (item.isOnlyMoving()) { - return ItemHandle{}; + return WriteHandle{}; } + auto evictHandle = tryEvictToNextMemoryTier(item); + if(evictHandle) return evictHandle; + auto predicate = [](const Item& it) { return it.getRefCount() == 0; }; const bool evictToNvmCache = shouldWriteToNvmCache(item); auto token = evictToNvmCache ? nvmCache_->createPutToken(item.getKey()) : typename NvmCacheT::PutToken{}; + if (skipIfTokenInvalid && evictToNvmCache && !token.isValid()) { + stats_.evictFailConcurrentFill.inc(); + return ItemHandle{}; + } + // We remove the item from both access and mm containers. It doesn't matter // if someone else calls remove on the item at this moment, the item cannot // be freed as long as we have the moving bit set. @@ -2674,7 +3161,7 @@ CacheAllocator::evictNormalItemForSlabRelease(Item& item) { } template -typename CacheAllocator::ItemHandle +typename CacheAllocator::WriteHandle CacheAllocator::evictChainedItemForSlabRelease(ChainedItem& child) { XDCHECK(child.isMoving()); @@ -2775,7 +3262,7 @@ CacheAllocator::evictChainedItemForSlabRelease(ChainedItem& child) { } template -bool CacheAllocator::removeIfExpired(const ItemHandle& handle) { +bool CacheAllocator::removeIfExpired(const ReadHandle& handle) { if (!handle) { return false; } @@ -2795,6 +3282,7 @@ bool CacheAllocator::removeIfExpired(const ItemHandle& handle) { template bool CacheAllocator::markMovingForSlabRelease( const SlabReleaseContext& ctx, void* alloc, util::Throttler& throttler) { + // MemoryAllocator::processAllocForRelease will execute the callback // if the item is not already free. So there are three outcomes here: // 1. Item not freed yet and marked as moving @@ -2808,6 +3296,7 @@ bool CacheAllocator::markMovingForSlabRelease( // At first, we assume this item was already freed bool itemFreed = true; bool markedMoving = false; + TierId tid = getTierId(alloc); const auto fn = [&markedMoving, &itemFreed](void* memory) { // Since this callback is executed, the item is not yet freed itemFreed = false; @@ -2819,7 +3308,7 @@ bool CacheAllocator::markMovingForSlabRelease( auto startTime = util::getCurrentTimeSec(); while (true) { - allocator_->processAllocForRelease(ctx, alloc, fn); + allocator_[tid]->processAllocForRelease(ctx, alloc, fn); // If item is already freed we give up trying to mark the item moving // and return false, otherwise if marked as moving, we return true. @@ -2835,7 +3324,7 @@ bool CacheAllocator::markMovingForSlabRelease( if (shutDownInProgress_) { XDCHECK(!static_cast(alloc)->isMoving()); - allocator_->abortSlabRelease(ctx); + allocator_[tid]->abortSlabRelease(ctx); throw exception::SlabReleaseAborted( folly::sformat("Slab Release aborted while still trying to mark" " as moving for Item: {}. Pool: {}, Class: {}.", @@ -2858,12 +3347,15 @@ template CCacheT* CacheAllocator::addCompactCache(folly::StringPiece name, size_t size, Args&&... args) { + if (numTiers_ != 1) + throw std::runtime_error("TODO: compact cache for multi-tier Cache not supported."); + if (!config_.isCompactCacheEnabled()) { throw std::logic_error("Compact cache is not enabled"); } folly::SharedMutex::WriteHolder lock(compactCachePoolsLock_); - auto poolId = allocator_->addPool(name, size, {Slab::kSize}); + auto poolId = allocator_[0]->addPool(name, size, {Slab::kSize}); isCompactCachePool_[poolId] = true; auto ptr = std::make_unique( @@ -2946,38 +3438,41 @@ folly::IOBufQueue CacheAllocator::saveStateToIOBuf() { "There are still slabs being released at the moment"); } - *metadata_.allocatorVersion_ref() = kCachelibVersion; - *metadata_.ramFormatVersion_ref() = kCacheRamFormatVersion; - *metadata_.cacheCreationTime_ref() = static_cast(cacheCreationTime_); - *metadata_.mmType_ref() = MMType::kId; - *metadata_.accessType_ref() = AccessType::kId; + *metadata_.allocatorVersion() = kCachelibVersion; + *metadata_.ramFormatVersion() = kCacheRamFormatVersion; + *metadata_.cacheCreationTime() = static_cast(cacheCreationTime_); + *metadata_.mmType() = MMType::kId; + *metadata_.accessType() = AccessType::kId; - metadata_.compactCachePools_ref()->clear(); + metadata_.compactCachePools()->clear(); const auto pools = getPoolIds(); { folly::SharedMutex::ReadHolder lock(compactCachePoolsLock_); for (PoolId pid : pools) { for (unsigned int cid = 0; cid < (*stats_.fragmentationSize)[pid].size(); ++cid) { - metadata_.fragmentationSize_ref()[pid][static_cast(cid)] = + metadata_.fragmentationSize()[pid][static_cast(cid)] = (*stats_.fragmentationSize)[pid][cid].get(); } if (isCompactCachePool_[pid]) { - metadata_.compactCachePools_ref()->push_back(pid); + metadata_.compactCachePools()->push_back(pid); } } } - *metadata_.numChainedParentItems_ref() = stats_.numChainedParentItems.get(); - *metadata_.numChainedChildItems_ref() = stats_.numChainedChildItems.get(); - *metadata_.numAbortedSlabReleases_ref() = stats_.numAbortedSlabReleases.get(); + *metadata_.numChainedParentItems() = stats_.numChainedParentItems.get(); + *metadata_.numChainedChildItems() = stats_.numChainedChildItems.get(); + *metadata_.numAbortedSlabReleases() = stats_.numAbortedSlabReleases.get(); + // TODO: implement serialization for multiple tiers auto serializeMMContainers = [](MMContainers& mmContainers) { MMSerializationTypeContainer state; - for (unsigned int i = 0; i < mmContainers.size(); ++i) { + for (unsigned int i = 0; i < 1 /* TODO: */ ; ++i) { for (unsigned int j = 0; j < mmContainers[i].size(); ++j) { - if (mmContainers[i][j]) { - state.pools_ref()[i][j] = mmContainers[i][j]->saveState(); + for (unsigned int k = 0; k < mmContainers[i][j].size(); ++k) { + if (mmContainers[i][j][k]) { + state.pools_ref()[j][k] = mmContainers[i][j][k]->saveState(); + } } } } @@ -2987,7 +3482,8 @@ folly::IOBufQueue CacheAllocator::saveStateToIOBuf() { serializeMMContainers(mmContainers_); AccessSerializationType accessContainerState = accessContainer_->saveState(); - MemoryAllocator::SerializationType allocatorState = allocator_->saveState(); + // TODO: foreach allocator + MemoryAllocator::SerializationType allocatorState = allocator_[0]->saveState(); CCacheManager::SerializationType ccState = compactCacheManager_->saveState(); AccessSerializationType chainedItemAccessContainerState = @@ -3049,6 +3545,8 @@ CacheAllocator::shutDown() { (shmShutDownStatus == ShmShutDownRes::kSuccess); shmManager_.reset(); + // TODO: save per-tier state + if (shmShutDownSucceeded) { if (!nvmShutDownStatusOpt || *nvmShutDownStatusOpt) return ShutDownStatus::kSuccess; @@ -3086,7 +3584,7 @@ std::optional CacheAllocator::saveNvmCache() { return false; } - nvmCacheState_.markSafeShutDown(); + nvmCacheState_.value().markSafeShutDown(); return true; } @@ -3097,8 +3595,11 @@ void CacheAllocator::saveRamCache() { std::unique_ptr ioBuf = serializedBuf.move(); ioBuf->coalesce(); - void* infoAddr = - shmManager_->createShm(detail::kShmInfoName, ioBuf->length()).addr; + ShmSegmentOpts opts; + opts.typeOpts = PosixSysVSegmentOpts(config_.isUsingPosixShm()); + + void* infoAddr = shmManager_->createShm(detail::kShmInfoName, ioBuf->length(), + nullptr, opts).addr; Serializer serializer(reinterpret_cast(infoAddr), reinterpret_cast(infoAddr) + ioBuf->length()); serializer.writeToBuffer(std::move(ioBuf)); @@ -3112,7 +3613,9 @@ CacheAllocator::deserializeMMContainers( const auto container = deserializer.deserialize(); - MMContainers mmContainers; + /* TODO: right now, we create empty containers becouse deserialization + * only works for a single (topmost) tier. */ + MMContainers mmContainers = createEmptyMMContainers(); for (auto& kvPool : *container.pools_ref()) { auto i = static_cast(kvPool.first); @@ -3127,12 +3630,12 @@ CacheAllocator::deserializeMMContainers( ? pool.getAllocationClass(j).getAllocsPerSlab() : 0); ptr->setConfig(config); - mmContainers[i][j] = std::move(ptr); + mmContainers[0 /* TODO */][i][j] = std::move(ptr); } } // We need to drop the unevictableMMContainer in the desierializer. // TODO: remove this at version 17. - if (metadata_.allocatorVersion_ref() <= 15) { + if (metadata_.allocatorVersion() <= 15) { deserializer.deserialize(); } return mmContainers; @@ -3141,14 +3644,16 @@ CacheAllocator::deserializeMMContainers( template typename CacheAllocator::MMContainers CacheAllocator::createEmptyMMContainers() { - MMContainers mmContainers; + MMContainers mmContainers(numTiers_); for (unsigned int i = 0; i < mmContainers_.size(); i++) { for (unsigned int j = 0; j < mmContainers_[i].size(); j++) { - if (mmContainers_[i][j]) { - MMContainerPtr ptr = - std::make_unique( - mmContainers_[i][j]->getConfig(), compressor_); - mmContainers[i][j] = std::move(ptr); + for (unsigned int k = 0; k < mmContainers_[i][j].size(); k++) { + if (mmContainers_[i][j][k]) { + MMContainerPtr ptr = + std::make_unique( + mmContainers_[i][j][k]->getConfig(), compressor_); + mmContainers[i][j][k] = std::move(ptr); + } } } } @@ -3163,22 +3668,22 @@ CacheAllocator::deserializeCacheAllocatorMetadata( // TODO: // Once everyone is on v8 or later, remove the outter if. if (kCachelibVersion > 8) { - if (*meta.ramFormatVersion_ref() != kCacheRamFormatVersion) { + if (*meta.ramFormatVersion() != kCacheRamFormatVersion) { throw std::runtime_error( folly::sformat("Expected cache ram format version {}. But found {}.", - kCacheRamFormatVersion, *meta.ramFormatVersion_ref())); + kCacheRamFormatVersion, *meta.ramFormatVersion())); } } - if (*meta.accessType_ref() != AccessType::kId) { + if (*meta.accessType() != AccessType::kId) { throw std::invalid_argument( - folly::sformat("Expected {}, got {} for AccessType", - *meta.accessType_ref(), AccessType::kId)); + folly::sformat("Expected {}, got {} for AccessType", *meta.accessType(), + AccessType::kId)); } - if (*meta.mmType_ref() != MMType::kId) { - throw std::invalid_argument(folly::sformat( - "Expected {}, got {} for MMType", *meta.mmType_ref(), MMType::kId)); + if (*meta.mmType() != MMType::kId) { + throw std::invalid_argument(folly::sformat("Expected {}, got {} for MMType", + *meta.mmType(), MMType::kId)); } return meta; } @@ -3209,7 +3714,7 @@ void CacheAllocator::initStats() { stats_.init(); // deserialize the fragmentation size of each thread. - for (const auto& pid : *metadata_.fragmentationSize_ref()) { + for (const auto& pid : *metadata_.fragmentationSize()) { for (const auto& cid : pid.second) { (*stats_.fragmentationSize)[pid.first][cid.first].set( static_cast(cid.second)); @@ -3217,10 +3722,10 @@ void CacheAllocator::initStats() { } // deserialize item counter stats - stats_.numChainedParentItems.set(*metadata_.numChainedParentItems_ref()); - stats_.numChainedChildItems.set(*metadata_.numChainedChildItems_ref()); + stats_.numChainedParentItems.set(*metadata_.numChainedParentItems()); + stats_.numChainedChildItems.set(*metadata_.numChainedChildItems()); stats_.numAbortedSlabReleases.set( - static_cast(*metadata_.numAbortedSlabReleases_ref())); + static_cast(*metadata_.numAbortedSlabReleases())); } template @@ -3241,7 +3746,7 @@ void CacheAllocator::forEachChainedItem( } template -typename CacheAllocator::ItemHandle +typename CacheAllocator::WriteHandle CacheAllocator::findChainedItem(const Item& parent) const { const auto cPtr = compressor_.compress(&parent); return chainedItemAccessContainer_->find( @@ -3279,21 +3784,29 @@ GlobalCacheStats CacheAllocator::getGlobalCacheStats() const { ret.numItems = accessContainer_->getStats().numKeys; const uint64_t currTime = util::getCurrentTimeSec(); + ret.cacheInstanceUpTime = currTime - cacheInstanceCreationTime_; ret.ramUpTime = currTime - cacheCreationTime_; - ret.nvmUpTime = currTime - nvmCacheState_.getCreationTime(); ret.nvmCacheEnabled = nvmCache_ ? nvmCache_->isEnabled() : false; + ret.nvmUpTime = currTime - getNVMCacheCreationTime(); ret.reaperStats = getReaperStats(); ret.numActiveHandles = getNumActiveHandles(); + ret.isNewRamCache = cacheCreationTime_ == cacheInstanceCreationTime_; + ret.isNewNvmCache = + nvmCacheState_.getCreationTime() == cacheInstanceCreationTime_; + return ret; } template CacheMemoryStats CacheAllocator::getCacheMemoryStats() const { - const auto totalCacheSize = allocator_->getMemorySize(); + size_t totalCacheSize = 0; + for(auto& allocator: allocator_) { + totalCacheSize += allocator->getMemorySize(); + } auto addSize = [this](size_t a, PoolId pid) { - return a + allocator_->getPool(pid).getPoolSize(); + return a + allocator_[currentTier()]->getPool(pid).getPoolSize(); }; const auto regularPoolIds = getRegularPoolIds(); const auto ccCachePoolIds = getCCachePoolIds(); @@ -3302,15 +3815,20 @@ CacheMemoryStats CacheAllocator::getCacheMemoryStats() const { size_t compactCacheSize = std::accumulate( ccCachePoolIds.begin(), ccCachePoolIds.end(), 0ULL, addSize); + std::vector slabsApproxFreePercentages; + for (TierId tid = 0; tid < numTiers_; tid++) + slabsApproxFreePercentages.push_back(slabsApproxFreePercentage(tid)); + return CacheMemoryStats{totalCacheSize, regularCacheSize, compactCacheSize, - allocator_->getAdvisedMemorySize(), + allocator_[currentTier()]->getAdvisedMemorySize(), memMonitor_ ? memMonitor_->getMaxAdvisePct() : 0, - allocator_->getUnreservedMemorySize(), + allocator_[currentTier()]->getUnreservedMemorySize(), nvmCache_ ? nvmCache_->getSize() : 0, util::getMemAvailable(), - util::getRSSBytes()}; + util::getRSSBytes(), + slabsApproxFreePercentages}; } template @@ -3325,6 +3843,11 @@ bool CacheAllocator::autoResizeEnabledForPool(PoolId pid) const { } } +template +void CacheAllocator::startCacheWorkers() { + initWorkers(); +} + template template bool CacheAllocator::stopWorker(folly::StringPiece name, @@ -3444,12 +3967,14 @@ bool CacheAllocator::stopReaper(std::chrono::seconds timeout) { template bool CacheAllocator::cleanupStrayShmSegments( - const std::string& cacheDir, bool posix) { + const std::string& cacheDir, bool posix /*TODO(SHM_FILE): const std::vector& config */) { if (util::getStatIfExists(cacheDir, nullptr) && util::isDir(cacheDir)) { try { // cache dir exists. clean up only if there are no other processes // attached. if another process was attached, the following would fail. ShmManager::cleanup(cacheDir, posix); + + // TODO: cleanup per-tier state } catch (const std::exception& e) { XLOGF(ERR, "Error cleaning up {}. Exception: ", cacheDir, e.what()); return false; @@ -3459,10 +3984,17 @@ bool CacheAllocator::cleanupStrayShmSegments( // Any other concurrent process can not be attached to the segments or // even if it does, we want to mark it for destruction. ShmManager::removeByName(cacheDir, detail::kShmInfoName, posix); - ShmManager::removeByName(cacheDir, detail::kShmCacheName, posix); + ShmManager::removeByName(cacheDir, detail::kShmCacheName + + std::to_string(0), posix); ShmManager::removeByName(cacheDir, detail::kShmHashTableName, posix); ShmManager::removeByName(cacheDir, detail::kShmChainedItemHashTableName, posix); + + // TODO(SHM_FILE): try to nuke segments of differente types (which require + // extra info) + // for (auto &tier : config) { + // ShmManager::removeByName(cacheDir, tierShmName, config_.memoryTiers[i].opts); + // } } return true; } @@ -3473,8 +4005,10 @@ uint64_t CacheAllocator::getItemPtrAsOffset(const void* ptr) { // the two differ (e.g. Mac OS 12) - causing templating instantiation // errors downstream. + auto tid = getTierId(ptr); + // if this succeeeds, the address is valid within the cache. - allocator_->getAllocInfo(ptr); + allocator_[tid]->getAllocInfo(ptr); if (!isOnShm_ || !shmManager_) { throw std::invalid_argument("Shared memory not used"); @@ -3493,7 +4027,7 @@ CacheAllocator::getNvmCacheStatsMap() const { : std::unordered_map{}; if (nvmAdmissionPolicy_) { auto policyStats = nvmAdmissionPolicy_->getCounters(); - for (const auto kv : policyStats) { + for (const auto& kv : policyStats) { ret[kv.first] = kv.second; } } diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h index 63b4102c60..08c3d21475 100644 --- a/cachelib/allocator/CacheAllocator.h +++ b/cachelib/allocator/CacheAllocator.h @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include @@ -82,6 +84,17 @@ namespace cachelib { template class FbInternalRuntimeUpdateWrapper; +template +class ReadOnlyMap; + +namespace objcache2 { +template +class ObjectCache; + +template +class ObjectCacheBase; +} // namespace objcache2 + namespace cachebench { template class Cache; @@ -100,6 +113,12 @@ class AllocatorHitStatsTest; template class AllocatorResizeTest; +template +class FixedSizeArrayTest; + +template +class MapTest; + class NvmCacheTest; template @@ -111,6 +130,20 @@ class CacheAllocatorTestWrapper; class PersistenceCache; } // namespace tests +namespace objcache { +template +class ObjectCache; +namespace test { +#define GET_CLASS_NAME(test_case_name, test_name) \ + test_case_name##_##test_name##_Test + +#define GET_DECORATED_CLASS_NAME(namespace, test_case_name, test_name) \ + namespace ::GET_CLASS_NAME(test_case_name, test_name) + +class GET_CLASS_NAME(ObjectCache, ObjectHandleInvalid); +} // namespace test +} // namespace objcache + // CacheAllocator can provide an interface to make Keyed Allocations(Item) and // takes two templated types that control how the allocation is // maintained(MMType aka MemoryManagementType) and accessed(AccessType). The @@ -154,13 +187,16 @@ class CacheAllocator : public CacheBase { // the holder for the item when we hand it to the caller. This ensures // that the reference count is maintained when the caller is done with the - // item. The ItemHandle provides a getMemory() and getKey() interface. The - // caller is free to use the result of these two as long as the handle is - // active/alive. Using the result of the above interfaces after destroying - // the ItemHandle is UB. The ItemHandle safely wraps a pointer to the Item. + // item. The ReadHandle/WriteHandle provides a getMemory() and getKey() + // interface. The caller is free to use the result of these two as long as the + // handle is active/alive. Using the result of the above interfaces after + // destroying the ReadHandle/WriteHandle is UB. The ReadHandle/WriteHandle + // safely wraps a pointer to the "const Item"/"Item". using ReadHandle = typename Item::ReadHandle; using WriteHandle = typename Item::WriteHandle; - using ItemHandle = WriteHandle; + // Following is deprecated as of allocator version 17 and this line will be + // removed at a future date + // using ItemHandle = WriteHandle; template > @@ -327,11 +363,11 @@ class CacheAllocator : public CacheBase { // @throw std::invalid_argument if the poolId is invalid or the size // requested is invalid or if the key is invalid(key.size() == 0 or // key.size() > 255) - ItemHandle allocate(PoolId id, - Key key, - uint32_t size, - uint32_t ttlSecs = 0, - uint32_t creationTime = 0); + WriteHandle allocate(PoolId id, + Key key, + uint32_t size, + uint32_t ttlSecs = 0, + uint32_t creationTime = 0); // Allocate a chained item // @@ -358,7 +394,7 @@ class CacheAllocator : public CacheBase { // @param child chained item that will be linked to the parent // // @throw std::invalid_argument if parent is nullptr - void addChainedItem(ItemHandle& parent, ItemHandle child); + void addChainedItem(WriteHandle& parent, WriteHandle child); // Pop the first chained item assocaited with this parent and unmark this // parent handle as having chained allocations. @@ -369,7 +405,7 @@ class CacheAllocator : public CacheBase { // // @return ChainedItem head if there exists one // nullptr otherwise - ItemHandle popChainedItem(ItemHandle& parent); + WriteHandle popChainedItem(WriteHandle& parent); // Return the key to the parent item. // @@ -395,9 +431,9 @@ class CacheAllocator : public CacheBase { // @return handle to the oldItem on return. // // @throw std::invalid_argument if any of the pre-conditions fails - ItemHandle replaceChainedItem(Item& oldItem, - ItemHandle newItem, - Item& parent); + WriteHandle replaceChainedItem(Item& oldItem, + WriteHandle newItem, + Item& parent); // Transfers the ownership of the chain from the current parent to the new // parent and inserts the new parent into the cache. Parent will be unmarked @@ -418,7 +454,7 @@ class CacheAllocator : public CacheBase { // @throw std::invalid_argument if the parent does not have chained item or // incorrect state of chained item or if any of the pre-conditions // are not met - void transferChainAndReplace(ItemHandle& parent, ItemHandle& newParent); + void transferChainAndReplace(WriteHandle& parent, WriteHandle& newParent); // Inserts the allocated handle into the AccessContainer, making it // accessible for everyone. This needs to be the handle that the caller @@ -431,7 +467,7 @@ class CacheAllocator : public CacheBase { // and is now accessible to everyone. False if there was an error. // // @throw std::invalid_argument if the handle is already accessible. - bool insert(const ItemHandle& handle); + bool insert(const WriteHandle& handle); // Replaces the allocated handle into the AccessContainer, making it // accessible for everyone. If an existing handle is already in the @@ -445,7 +481,7 @@ class CacheAllocator : public CacheBase { // @throw cachelib::exception::RefcountOverflow if the item we are replacing // is already out of refcounts. // @return handle to the old item that had been replaced - ItemHandle insertOrReplace(const ItemHandle& handle); + WriteHandle insertOrReplace(const WriteHandle& handle); // look up an item by its key across the nvm cache as well if enabled. // @@ -455,16 +491,22 @@ class CacheAllocator : public CacheBase { // key does not exist. ReadHandle find(Key key); - // look up an item by its key across the nvm cache as well if enabled. - // - // @param key the key for lookup - // @param mode the mode of access for the lookup. - // AccessMode::kRead or AccessMode::kWrite + // Warning: this API is synchronous today with HybridCache. This means as + // opposed to find(), we will block on an item being read from + // flash until it is loaded into DRAM-cache. In find(), if an item + // is missing in dram, we will return a "not-ready" handle and + // user can choose to block or convert to folly::SemiFuture and + // process the item only when it becomes ready (loaded into DRAM). + // If blocking behavior is NOT what you want, a workaround is: + // auto readHandle = cache->find("my key"); + // if (!readHandle.isReady()) { + // auto sf = std::move(readHandle) + // .toSemiFuture() + // .defer([] (auto readHandle)) { + // return std::move(readHandle).toWriteHandle(); + // } + // } // - // @return the handle for the item or a handle to nullptr if the key does - // not exist. - ItemHandle find(Key key, AccessMode mode); - // look up an item by its key across the nvm cache as well if enabled. Users // should call this API only when they are going to mutate the item data. // @@ -472,20 +514,31 @@ class CacheAllocator : public CacheBase { // @param isNvmInvalidate whether to do nvm invalidation; // defaults to be true // - // @return the handle for the item or a handle to nullptr if the + // @return the write handle for the item or a handle to nullptr if the // key does not exist. - ItemHandle findToWrite(Key key, bool doNvmInvalidation = true); + WriteHandle findToWrite(Key key, bool doNvmInvalidation = true); // look up an item by its key. This ignores the nvm cache and only does RAM // lookup. // // @param key the key for lookup - // @param mode the mode of access for the lookup. defaults to - // AccessMode::kRead // - // @return the handle for the item or a handle to nullptr if the key does - // not exist. - FOLLY_ALWAYS_INLINE ItemHandle findFast(Key key, AccessMode mode); + // @return the read handle for the item or a handle to nullptr if the key + // does not exist. + FOLLY_ALWAYS_INLINE ReadHandle findFast(Key key); + + // look up an item by its key. This ignores the nvm cache and only does RAM + // lookup. Users should call this API only when they are going to mutate the + // item data. + // + // @param key the key for lookup + // @param isNvmInvalidate whether to do nvm invalidation; + // defaults to be true + // + // @return the write handle for the item or a handle to nullptr if the + // key does not exist. + FOLLY_ALWAYS_INLINE WriteHandle + findFastToWrite(Key key, bool doNvmInvalidation = true); // look up an item by its key. This ignores the nvm cache and only does RAM // lookup. This API does not update the stats related to cache gets and misses @@ -494,7 +547,7 @@ class CacheAllocator : public CacheBase { // @param key the key for lookup // @return the handle for the item or a handle to nullptr if the key does // not exist. - FOLLY_ALWAYS_INLINE ItemHandle peek(Key key); + FOLLY_ALWAYS_INLINE ReadHandle peek(Key key); // Mark an item that was fetched through peek as useful. This is useful when // users want to look into the cache and only mark items as useful when they @@ -503,7 +556,7 @@ class CacheAllocator : public CacheBase { // @param handle the item handle // @param mode the mode of access for the lookup. defaults to // AccessMode::kRead - void markUseful(const ItemHandle& handle, AccessMode mode); + void markUseful(const ReadHandle& handle, AccessMode mode); using AccessIterator = typename AccessContainer::Iterator; // Iterator interface for the cache. It guarantees that all keys that were @@ -531,7 +584,7 @@ class CacheAllocator : public CacheBase { // removes the allocation corresponding to the key, if present in the hash // table. The key will not be accessible through find() after this returns // success. The allocation for the key will be recycled once all active - // ItemHandles are released. + // Item handles are released. // // @param key the key for the allocation. // @return kSuccess if the key exists and was successfully removed. @@ -606,11 +659,11 @@ class CacheAllocator : public CacheBase { // Get a random item from memory // This is useful for profiling and sampling cachelib managed memory // - // @return ItemHandle if an valid item is found + // @return ReadHandle if an valid item is found // // nullptr if the randomly chosen memory does not belong // to an valid item - ItemHandle getSampleItem(); + ReadHandle getSampleItem(); // Convert a Read Handle to an IOBuf. The returned IOBuf gives a // read-only view to the user. The item's ownership is retained by @@ -699,7 +752,7 @@ class CacheAllocator : public CacheBase { // @param config new config for the pool // // @throw std::invalid_argument if the poolId is invalid - void overridePoolConfig(PoolId pid, const MMConfig& config); + void overridePoolConfig(TierId tid, PoolId pid, const MMConfig& config); // update an existing pool's rebalance strategy // @@ -740,8 +793,9 @@ class CacheAllocator : public CacheBase { // @return true if the operation succeeded. false if the size of the pool is // smaller than _bytes_ // @throw std::invalid_argument if the poolId is invalid. + // TODO: should call shrinkPool for specific tier? bool shrinkPool(PoolId pid, size_t bytes) { - return allocator_->shrinkPool(pid, bytes); + return allocator_[currentTier()]->shrinkPool(pid, bytes); } // grow an existing pool by _bytes_. This will fail if there is no @@ -750,8 +804,9 @@ class CacheAllocator : public CacheBase { // @return true if the pool was grown. false if the necessary number of // bytes were not available. // @throw std::invalid_argument if the poolId is invalid. + // TODO: should call growPool for specific tier? bool growPool(PoolId pid, size_t bytes) { - return allocator_->growPool(pid, bytes); + return allocator_[currentTier()]->growPool(pid, bytes); } // move bytes from one pool to another. The source pool should be at least @@ -764,7 +819,7 @@ class CacheAllocator : public CacheBase { // correct size to do the transfer. // @throw std::invalid_argument if src or dest is invalid pool bool resizePools(PoolId src, PoolId dest, size_t bytes) override { - return allocator_->resizePools(src, dest, bytes); + return allocator_[currentTier()]->resizePools(src, dest, bytes); } // Add a new compact cache with given name and size @@ -851,6 +906,11 @@ class CacheAllocator : public CacheBase { // kSavedOnlyDRAM and kSavedOnlyNvmCache - partial content saved ShutDownStatus shutDown(); + // No-op for workers that are already running. Typically user uses this in + // conjunction with `config.delayWorkerStart()` to avoid initialization + // ordering issues with user callback for cachelib's workers. + void startCacheWorkers(); + // Functions that stop existing ones (if any) and create new workers // start pool rebalancer @@ -964,12 +1024,13 @@ class CacheAllocator : public CacheBase { // @throw std::invalid_argument if the memory does not belong to this // cache allocator AllocInfo getAllocInfo(const void* memory) const { - return allocator_->getAllocInfo(memory); + return allocator_[getTierId(memory)]->getAllocInfo(memory); } // return the ids for the set of existing pools in this cache. std::set getPoolIds() const override final { - return allocator_->getPoolIds(); + // all tiers have the same pool ids. TODO: deduplicate + return allocator_[0]->getPoolIds(); } // return a list of pool ids that are backing compact caches. This includes @@ -981,18 +1042,18 @@ class CacheAllocator : public CacheBase { // return the pool with speicified id. const MemoryPool& getPool(PoolId pid) const override final { - return allocator_->getPool(pid); + return allocator_[currentTier()]->getPool(pid); } // calculate the number of slabs to be advised/reclaimed in each pool PoolAdviseReclaimData calcNumSlabsToAdviseReclaim() override final { auto regularPoolIds = getRegularPoolIds(); - return allocator_->calcNumSlabsToAdviseReclaim(regularPoolIds); + return allocator_[currentTier()]->calcNumSlabsToAdviseReclaim(regularPoolIds); } // update number of slabs to advise in the cache void updateNumSlabsToAdvise(int32_t numSlabsToAdvise) override final { - allocator_->updateNumSlabsToAdvise(numSlabsToAdvise); + allocator_[currentTier()]->updateNumSlabsToAdvise(numSlabsToAdvise); } // returns a valid PoolId corresponding to the name or kInvalidPoolId if the @@ -1001,7 +1062,8 @@ class CacheAllocator : public CacheBase { // returns the pool's name by its poolId. std::string getPoolName(PoolId poolId) const { - return allocator_->getPoolName(poolId); + // all tiers have the same pool names. + return allocator_[0]->getPoolName(poolId); } // get stats related to all kinds of slab release events. @@ -1023,6 +1085,11 @@ class CacheAllocator : public CacheBase { return accessContainer_->getStats(); } + // Get the total number of keys inserted into the access container + uint64_t getAccessContainerNumKeys() const { + return accessContainer_->getNumKeys(); + } + // returns the reaper stats ReaperStats getReaperStats() const { auto stats = reaper_ ? reaper_->getStats() : ReaperStats{}; @@ -1039,10 +1106,13 @@ class CacheAllocator : public CacheBase { // get cache name const std::string getCacheName() const override final; + // combined pool size for all memory tiers + size_t getPoolSize(PoolId pid) const; + // pool stats by pool id PoolStats getPoolStats(PoolId pid) const override final; - // This can be expensive so it is not part of PoolStats + // This can be expensive so it is not part of PoolStats. PoolEvictionAgeStats getPoolEvictionAgeStats( PoolId pid, unsigned int slabProjectionLength) const override final; @@ -1052,9 +1122,13 @@ class CacheAllocator : public CacheBase { // return the overall cache stats GlobalCacheStats getGlobalCacheStats() const override final; - // return cache's memory usage stats + // return cache's memory usage stats. CacheMemoryStats getCacheMemoryStats() const override final; + // return basic stats for Allocation Class + AllocationClassBaseStat getAllocationClassStats(TierId tid, PoolId pid, ClassId cid) + const override final; + // return the nvm cache stats map std::unordered_map getNvmCacheStatsMap() const override final; @@ -1082,16 +1156,25 @@ class CacheAllocator : public CacheBase { // // @return time when the cache was created. time_t getCacheCreationTime() const noexcept { return cacheCreationTime_; } + + // unix timestamp when the NVM cache was created. If NVM cahce isn't enaled, + // the cache creation time is returned instead. + // + // @return time when the NVM cache was created. time_t getNVMCacheCreationTime() const { - return nvmCacheState_.getCreationTime(); + auto result = getCacheCreationTime(); + if (nvmCacheState_.has_value()) { + result = nvmCacheState_.value().getCreationTime(); + } + return result; } // Inspects the cache without changing its state. // // @param key for the cache item - // @return std::pair the first represents the state + // @return std::pair the first represents the state // in the RAM and the second is a copy of the state in NVM - std::pair inspectCache(Key key); + std::pair inspectCache(Key key); // blocks until the inflight operations are flushed to nvmcache. Used for // benchmarking when we want to load up the cache first with some data and @@ -1153,7 +1236,8 @@ class CacheAllocator : public CacheBase { // returns true if there was no error in trying to cleanup the segment // because another process was attached. False if the user tried to clean up // and the cache was actually attached. - static bool cleanupStrayShmSegments(const std::string& cacheDir, bool posix); + static bool cleanupStrayShmSegments(const std::string& cacheDir, bool posix + /*TODO: const std::vector& config = {} */); // gives a relative offset to a pointer within the cache. uint64_t getItemPtrAsOffset(const void* ptr); @@ -1165,7 +1249,8 @@ class CacheAllocator : public CacheBase { sizeof(typename RefcountWithFlags::Value) + sizeof(uint32_t) + sizeof(uint32_t) + sizeof(KAllocation)) == sizeof(Item), "vtable overhead"); - static_assert(32 == sizeof(Item), "item overhead is 32 bytes"); + // XXX: this will fail due to CompressedPtr change + // static_assert(32 == sizeof(Item), "item overhead is 32 bytes"); // make sure there is no overhead in ChainedItem on top of a regular Item static_assert(sizeof(Item) == sizeof(ChainedItem), @@ -1188,6 +1273,8 @@ class CacheAllocator : public CacheBase { #pragma GCC diagnostic pop private: + double slabsApproxFreePercentage(TierId tid) const; + // wrapper around Item's refcount and active handle tracking FOLLY_ALWAYS_INLINE void incRef(Item& it); FOLLY_ALWAYS_INLINE RefcountWithFlags::Value decRef(Item& it); @@ -1196,6 +1283,14 @@ class CacheAllocator : public CacheBase { // allocator and executes the necessary callbacks. no-op if it is nullptr. FOLLY_ALWAYS_INLINE void release(Item* it, bool isNascent); + // Differtiate different memory setting for the initialization + enum class InitMemType { kNone, kMemNew, kMemAttach }; + // instantiates a cache allocator for common initialization + // + // @param types the type of the memory used + // @param config the configuration for the whole cache allocator + CacheAllocator(InitMemType types, Config config); + // This is the last step in item release. We also use this for the eviction // scenario where we have to do everything, but not release the allocation // to the allocator and instead recycle it for another new allocation. If @@ -1234,28 +1329,31 @@ class CacheAllocator : public CacheBase { // acquires an handle on the item. returns an empty handle if it is null. // @param it pointer to an item - // @return ItemHandle return a handle to this item + // @return WriteHandle return a handle to this item // @throw std::overflow_error is the maximum item refcount is execeeded by // creating this item handle. - ItemHandle acquire(Item* it); + WriteHandle acquire(Item* it); // creates an item handle with wait context. - ItemHandle createNvmCacheFillHandle() { return ItemHandle{*this}; } + WriteHandle createNvmCacheFillHandle() { return WriteHandle{*this}; } // acquires the wait context for the handle. This is used by NvmCache to // maintain a list of waiters std::shared_ptr> getWaitContext( - ItemHandle& hdl) const { + ReadHandle& hdl) const { return hdl.getItemWaitContext(); } using MMContainerPtr = std::unique_ptr; using MMContainers = - std::array, - MemoryPoolManager::kMaxPools>; + std::vector, + MemoryPoolManager::kMaxPools>>; void createMMContainers(const PoolId pid, MMConfig config); + TierId getTierId(const Item& item) const; + TierId getTierId(const void* ptr) const; + // acquire the MMContainer corresponding to the the Item's class and pool. // // @return pointer to the MMContainer. @@ -1263,13 +1361,11 @@ class CacheAllocator : public CacheBase { // allocation from the memory allocator. MMContainer& getMMContainer(const Item& item) const noexcept; - MMContainer& getMMContainer(PoolId pid, ClassId cid) const noexcept; - // acquire the MMContainer for the give pool and class id and creates one // if it does not exist. // - // @return pointer to a valid MMContainer that is initialized. - MMContainer& getEvictableMMContainer(PoolId pid, ClassId cid) const noexcept; + // @return pointer to a valid MMContainer that is initialized + MMContainer& getMMContainer(TierId tid, PoolId pid, ClassId cid) const noexcept; // create a new cache allocation. The allocation can be initialized // appropriately and made accessible through insert or insertOrReplace. @@ -1295,7 +1391,18 @@ class CacheAllocator : public CacheBase { // @throw std::invalid_argument if the poolId is invalid or the size // requested is invalid or if the key is invalid(key.size() == 0 or // key.size() > 255) - ItemHandle allocateInternal(PoolId id, + WriteHandle allocateInternal(PoolId id, + Key key, + uint32_t size, + uint32_t creationTime, + uint32_t expiryTime); + + // create a new cache allocation on specific memory tier. + // For description see allocateInternal. + // + // @param tid id a memory tier + ItemHandle allocateInternalTier(TierId tid, + PoolId id, Key key, uint32_t size, uint32_t creationTime, @@ -1325,19 +1432,19 @@ class CacheAllocator : public CacheBase { // @param parentKey key of the item's parent // // @return handle to the parent item if the validations pass - // otherwise, an empty ItemHandle is returned. + // otherwise, an empty Handle is returned. // - ItemHandle validateAndGetParentHandleForChainedMoveLocked( + ReadHandle validateAndGetParentHandleForChainedMoveLocked( const ChainedItem& item, const Key& parentKey); // Given an existing item, allocate a new one for the // existing one to later be moved into. // - // @param oldItem handle to item we want to allocate a new item for + // @param oldItem the item we want to allocate a new item for // // @return handle to the newly allocated item // - ItemHandle allocateNewItemForOldItem(const Item& oldItem); + WriteHandle allocateNewItemForOldItem(const Item& oldItem); // internal helper that grabs a refcounted handle to the item. This does // not record the access to reflect in the mmContainer. @@ -1348,7 +1455,7 @@ class CacheAllocator : public CacheBase { // // @throw std::overflow_error is the maximum item refcount is execeeded by // creating this item handle. - ItemHandle findInternal(Key key) { + WriteHandle findInternal(Key key) { // Note: this method can not be const because we need a non-const // reference to create the ItemReleaser. return accessContainer_->find(key); @@ -1358,12 +1465,42 @@ class CacheAllocator : public CacheBase { // lookup. // // @param key the key for lookup - // @param mode the mode of access for the lookup. defaults to - // AccessMode::kRead + // @param mode the mode of access for the lookup. + // AccessMode::kRead or AccessMode::kWrite + // + // @return the handle for the item or a handle to nullptr if the key does + // not exist. + FOLLY_ALWAYS_INLINE WriteHandle findFastInternal(Key key, AccessMode mode); + + // look up an item by its key across the nvm cache as well if enabled. + // + // @param key the key for lookup + // @param mode the mode of access for the lookup. + // AccessMode::kRead or AccessMode::kWrite + // + // @return the handle for the item or a handle to nullptr if the key does + // not exist. + FOLLY_ALWAYS_INLINE WriteHandle findImpl(Key key, AccessMode mode); + + // look up an item by its key. This ignores the nvm cache and only does RAM + // lookup. + // + // @param key the key for lookup + // @param mode the mode of access for the lookup. + // AccessMode::kRead or AccessMode::kWrite // // @return the handle for the item or a handle to nullptr if the key does // not exist. - FOLLY_ALWAYS_INLINE ItemHandle findFastImpl(Key key, AccessMode mode); + FOLLY_ALWAYS_INLINE WriteHandle findFastImpl(Key key, AccessMode mode); + + // Moves a regular item to a different memory tier. + // + // @param oldItem Reference to the item being moved + // @param newItemHdl Reference to the handle of the new item being moved into + // + // @return true If the move was completed, and the containers were updated + // successfully. + ItemHandle moveRegularItemOnEviction(Item& oldItem, ItemHandle& newItemHdl); // Moves a regular item to a different slab. This should only be used during // slab release after the item's moving bit has been set. The user supplied @@ -1375,7 +1512,7 @@ class CacheAllocator : public CacheBase { // // @return true If the move was completed, and the containers were updated // successfully. - bool moveRegularItem(Item& oldItem, ItemHandle& newItemHdl); + bool moveRegularItem(Item& oldItem, WriteHandle& newItemHdl); // template class for viewAsChainedAllocs that takes either ReadHandle or // WriteHandle @@ -1402,7 +1539,7 @@ class CacheAllocator : public CacheBase { // // @return true If the move was completed, and the containers were updated // successfully. - bool moveChainedItem(ChainedItem& oldItem, ItemHandle& newItemHdl); + bool moveChainedItem(ChainedItem& oldItem, WriteHandle& newItemHdl); // Transfers the chain ownership from parent to newParent. Parent // will be unmarked as having chained allocations. Parent will not be null @@ -1419,7 +1556,7 @@ class CacheAllocator : public CacheBase { // @param newParent the new parent for the chain // // @throw if any of the conditions for parent or newParent are not met. - void transferChainLocked(ItemHandle& parent, ItemHandle& newParent); + void transferChainLocked(WriteHandle& parent, WriteHandle& newParent); // replace a chained item in the existing chain. This needs to be called // with the chained item lock held exclusive @@ -1429,9 +1566,9 @@ class CacheAllocator : public CacheBase { // @param parent the parent for the chain // // @return handle to the oldItem - ItemHandle replaceChainedItemLocked(Item& oldItem, - ItemHandle newItemHdl, - const Item& parent); + WriteHandle replaceChainedItemLocked(Item& oldItem, + WriteHandle newItemHdl, + const Item& parent); // Insert an item into MM container. The caller must hold a valid handle for // the item. @@ -1450,6 +1587,10 @@ class CacheAllocator : public CacheBase { // false if the item is not in MMContainer bool removeFromMMContainer(Item& item); + using EvictionIterator = typename MMContainer::Iterator; + + ItemHandle acquire(EvictionIterator& it) { return acquire(it.get()); } + // Replaces an item in the MMContainer with another item, at the same // position. // @@ -1460,6 +1601,8 @@ class CacheAllocator : public CacheBase { // destination item did not exist in the container, or if the // source item already existed. bool replaceInMMContainer(Item& oldItem, Item& newItem); + bool replaceInMMContainer(Item* oldItem, Item& newItem); + bool replaceInMMContainer(EvictionIterator& oldItemIt, Item& newItem); // Replaces an item in the MMContainer with another item, at the same // position. Or, if the two chained items belong to two different MM @@ -1493,17 +1636,19 @@ class CacheAllocator : public CacheBase { // and is now accessible to everyone. False if there was an error. // // @throw std::invalid_argument if the handle is already accessible or invalid - bool insertImpl(const ItemHandle& handle, AllocatorApiEvent event); + bool insertImpl(const WriteHandle& handle, AllocatorApiEvent event); // Removes an item from the access container and MM container. // + // @param hk the hashed key for the item // @param it Item to remove // @param tombstone A tombstone for nvm::remove job created by // nvm::createDeleteTombStone, can be empty if nvm is // not enable, or removeFromNvm is false // @param removeFromNvm if true clear key from nvm // @param recordApiEvent should we record API event for this operation. - RemoveRes removeImpl(Item& it, + RemoveRes removeImpl(HashedKey hk, + Item& it, DeleteTombStoneGuard tombstone, bool removeFromNvm = true, bool recordApiEvent = true); @@ -1514,28 +1659,27 @@ class CacheAllocator : public CacheBase { // @param pid the id of the pool to look for evictions inside // @param cid the id of the class to look for evictions inside // @return An evicted item or nullptr if there is no suitable candidate. - Item* findEviction(PoolId pid, ClassId cid); - - using EvictionIterator = typename MMContainer::Iterator; + Item* findEviction(TierId tid, PoolId pid, ClassId cid); - // Advance the current iterator and try to evict a regular item + // Try to move the item down to the next memory tier // - // @param mmContainer the container to look for evictions. - // @param itr iterator holding the item + // @param tid current tier ID of the item + // @param pid the pool ID the item belong to. + // @param item the item to evict // - // @return valid handle to regular item on success. This will be the last - // handle to the item. On failure an empty handle. - ItemHandle advanceIteratorAndTryEvictRegularItem(MMContainer& mmContainer, - EvictionIterator& itr); + // @return valid handle to the item. This will be the last + // handle to the item. On failure an empty handle. + WriteHandle tryEvictToNextMemoryTier(TierId tid, PoolId pid, Item& item); - // Advance the current iterator and try to evict a chained item - // Iterator may also be reset during the course of this function + // Try to move the item down to the next memory tier // - // @param itr iterator holding the item + // @param item the item to evict // - // @return valid handle to the parent item on success. This will be the last - // handle to the item - ItemHandle advanceIteratorAndTryEvictChainedItem(EvictionIterator& itr); + // @return valid handle to the item. This will be the last + // handle to the item. On failure an empty handle. + WriteHandle tryEvictToNextMemoryTier(Item& item); + + size_t memoryTierSize(TierId tid) const; // Deserializer CacheAllocatorMetadata and verify the version // @@ -1548,16 +1692,8 @@ class CacheAllocator : public CacheBase { Deserializer& deserializer, const typename Item::PtrCompressor& compressor); - // Create a copy of empty MMContainers according to the configs of - // mmContainers_ This function is used when serilizing for persistence for the - // reason of backward compatibility. A copy of empty MMContainers from - // mmContainers_ will be created and serialized as unevictable mm containers - // and written to metadata so that previous CacheLib versions can restore from - // such a serialization. This function will be removed in the next version. - MMContainers createEmptyMMContainers(); - unsigned int reclaimSlabs(PoolId id, size_t numSlabs) final { - return allocator_->reclaimSlabsAndGrow(id, numSlabs); + return allocator_[currentTier()]->reclaimSlabsAndGrow(id, numSlabs); } FOLLY_ALWAYS_INLINE EventTracker* getEventTracker() const { @@ -1616,7 +1752,7 @@ class CacheAllocator : public CacheBase { const void* hint = nullptr) final; // @param releaseContext slab release context - void releaseSlabImpl(const SlabReleaseContext& releaseContext); + void releaseSlabImpl(TierId tid, const SlabReleaseContext& releaseContext); // @return true when successfully marked as moving, // fasle when this item has already been freed @@ -1646,7 +1782,7 @@ class CacheAllocator : public CacheBase { // // @return true if the item has been moved // false if we have exhausted moving attempts - bool tryMovingForSlabRelease(Item& item, ItemHandle& newItemHdl); + bool tryMovingForSlabRelease(Item& item, WriteHandle& newItemHdl); // Evict an item from access and mm containers and // ensure it is safe for freeing. @@ -1662,19 +1798,19 @@ class CacheAllocator : public CacheBase { // // @return last handle for corresponding to item on success. empty handle on // failure. caller can retry if needed. - ItemHandle evictNormalItemForSlabRelease(Item& item); + ItemHandle evictNormalItem(Item& item, bool skipIfTokenInvalid = false); // Helper function to evict a child item for slab release // As a side effect, the parent item is also evicted // // @return last handle to the parent item of the child on success. empty // handle on failure. caller can retry. - ItemHandle evictChainedItemForSlabRelease(ChainedItem& item); + WriteHandle evictChainedItemForSlabRelease(ChainedItem& item); // Helper function to remove a item if expired. // // @return true if it item expire and removed successfully. - bool removeIfExpired(const ItemHandle& handle); + bool removeIfExpired(const ReadHandle& handle); // exposed for the Reaper to iterate through the memory and find items to // reap under the super charged mode. This is faster if there are lots of @@ -1688,7 +1824,7 @@ class CacheAllocator : public CacheBase { // primitives. So we consciously exempt ourselves here from TSAN data race // detection. folly::annotate_ignore_thread_sanitizer_guard g(__FILE__, __LINE__); - allocator_->forEachAllocation(std::forward(f)); + allocator_[currentTier()]->forEachAllocation(std::forward(f)); } // returns true if nvmcache is enabled and we should write this item to @@ -1731,9 +1867,11 @@ class CacheAllocator : public CacheBase { std::unique_ptr& worker, std::chrono::seconds timeout = std::chrono::seconds{0}); - std::unique_ptr createNewMemoryAllocator(); - std::unique_ptr restoreMemoryAllocator(); - std::unique_ptr restoreCCacheManager(); + ShmSegmentOpts createShmCacheOpts(TierId tid); + + std::unique_ptr createNewMemoryAllocator(TierId tid); + std::unique_ptr restoreMemoryAllocator(TierId tid); + std::unique_ptr restoreCCacheManager(TierId tid); PoolIds filterCompactCachePools(const PoolIds& poolIds) const; @@ -1753,7 +1891,7 @@ class CacheAllocator : public CacheBase { } typename Item::PtrCompressor createPtrCompressor() const { - return allocator_->createPtrCompressor(); + return typename Item::PtrCompressor(allocator_); } // helper utility to throttle and optionally log. @@ -1774,6 +1912,19 @@ class CacheAllocator : public CacheBase { void initNvmCache(bool dramCacheAttached); void initWorkers(); + // @param type the type of initialization + // @return nullptr if the type is invalid + // @return pointer to memory allocator + // @throw std::runtime_error if type is invalid + std::unique_ptr initAllocator(InitMemType type); + // @param type the type of initialization + // @return nullptr if the type is invalid + // @return pointer to access container + // @throw std::runtime_error if type is invalid + std::unique_ptr initAccessContainer(InitMemType type, + const std::string name, + AccessConfig config); + std::optional saveNvmCache(); void saveRamCache(); @@ -1781,10 +1932,6 @@ class CacheAllocator : public CacheBase { return item.getRefCount() == 0; } - static bool itemEvictionPredicate(const Item& item) { - return item.getRefCount() == 0 && !item.isMoving(); - } - static bool itemExpiryPredicate(const Item& item) { return item.getRefCount() == 1 && item.isExpired(); } @@ -1809,7 +1956,7 @@ class CacheAllocator : public CacheBase { // @return true if successfully recorded in MMContainer bool recordAccessInMMContainer(Item& item, AccessMode mode); - ItemHandle findChainedItem(const Item& parent) const; + WriteHandle findChainedItem(const Item& parent) const; // Get the thread local version of the Stats detail::Stats& stats() const noexcept { return stats_; } @@ -1831,6 +1978,91 @@ class CacheAllocator : public CacheBase { // BEGIN private members + TierId currentTier() const { + // TODO: every function which calls this method should be refactored. + // We should go case by case and either make such function work on + // all tiers or expose separate parameter to describe the tier ID. + return 0; + } + + bool addWaitContextForMovingItem( + folly::StringPiece key, std::shared_ptr> waiter); + + class MoveCtx { + public: + MoveCtx() {} + + ~MoveCtx() { + // prevent any further enqueue to waiters + // Note: we don't need to hold locks since no one can enqueue + // after this point. + wakeUpWaiters(); + } + + // record the item handle. Upon destruction we will wake up the waiters + // and pass a clone of the handle to the callBack. By default we pass + // a null handle + void setItemHandle(ItemHandle _it) { it = std::move(_it); } + + // enqueue a waiter into the waiter list + // @param waiter WaitContext + void addWaiter(std::shared_ptr> waiter) { + XDCHECK(waiter); + waiters.push_back(std::move(waiter)); + } + + private: + // notify all pending waiters that are waiting for the fetch. + void wakeUpWaiters() { + bool refcountOverflowed = false; + for (auto& w : waiters) { + // If refcount overflowed earlier, then we will return miss to + // all subsequent waitors. + if (refcountOverflowed) { + w->set(ItemHandle{}); + continue; + } + + try { + w->set(it.clone()); + } catch (const exception::RefcountOverflow&) { + // We'll return a miss to the user's pending read, + // so we should enqueue a delete via NvmCache. + // TODO: cache.remove(it); + refcountOverflowed = true; + } + } + } + + ItemHandle it; // will be set when Context is being filled + std::vector>> waiters; // list of + // waiters + }; + using MoveMap = + folly::F14ValueMap, + folly::HeterogeneousAccessHash>; + + static size_t getShardForKey(folly::StringPiece key) { + return folly::Hash()(key) % kShards; + } + + MoveMap& getMoveMapForShard(size_t shard) { + return movesMap_[shard].movesMap_; + } + + MoveMap& getMoveMap(folly::StringPiece key) { + return getMoveMapForShard(getShardForKey(key)); + } + + std::unique_lock getMoveLockForShard(size_t shard) { + return std::unique_lock(moveLock_[shard].moveLock_); + } + + std::unique_lock getMoveLock(folly::StringPiece key) { + return getMoveLockForShard(getShardForKey(key)); + } + // Whether the memory allocator for this cache allocator was created on shared // memory. The hash table, chained item hash table etc is also created on // shared memory except for temporary shared memory mode when they're created @@ -1839,6 +2071,8 @@ class CacheAllocator : public CacheBase { const Config config_{}; + const typename Config::MemoryTierConfigs memoryTierConfigs; + // Manages the temporary shared memory segment for memory allocator that // is not persisted when cache process exits. std::unique_ptr tempShm_; @@ -1856,9 +2090,14 @@ class CacheAllocator : public CacheBase { const MMConfig mmConfig_{}; // the memory allocator for allocating out of the available memory. - std::unique_ptr allocator_; + std::vector> allocator_; + + std::vector> createPrivateAllocator(); + std::vector> createAllocators(); + std::vector> restoreAllocators(); // compact cache allocator manager + // TODO: per tier? std::unique_ptr compactCacheManager_; // compact cache instances reside here when user "add" or "attach" compact @@ -1920,8 +2159,30 @@ class CacheAllocator : public CacheBase { // poolResizer_, poolOptimizer_, memMonitor_, reaper_ mutable std::mutex workersMutex_; + static constexpr size_t kShards = 8192; // TODO: need to define right value + + struct MovesMapShard { + alignas(folly::hardware_destructive_interference_size) MoveMap movesMap_; + }; + + struct MoveLock { + alignas(folly::hardware_destructive_interference_size) std::mutex moveLock_; + }; + + // a map of all pending moves + std::vector movesMap_; + + // a map of move locks for each shard + std::vector moveLock_; + // time when the ram cache was first created - const time_t cacheCreationTime_{0}; + const uint32_t cacheCreationTime_{0}; + + // time when CacheAllocator structure is created. Whenever a process restarts + // and even if cache content is persisted, this will be reset. It's similar + // to process uptime. (But alternatively if user explicitly shuts down and + // re-attach cache, this will be reset as well) + const uint32_t cacheInstanceCreationTime_{0}; // thread local accumulation of handle counts mutable util::FastStats handleCount_{}; @@ -1934,7 +2195,7 @@ class CacheAllocator : public CacheBase { folly::ThreadLocal ring_; // state for the nvmcache - NvmCacheState nvmCacheState_; + std::optional nvmCacheState_{}; // admission policy for nvmcache std::shared_ptr> nvmAdmissionPolicy_; @@ -1949,9 +2210,14 @@ class CacheAllocator : public CacheBase { friend ReaperAPIWrapper; friend class CacheAPIWrapperForNvm; friend class FbInternalRuntimeUpdateWrapper; + friend class objcache2::ObjectCache; + friend class objcache2::ObjectCacheBase; + template + friend class ReadOnlyMap; // tests friend class facebook::cachelib::tests::NvmCacheTest; + FRIEND_TEST(CachelibAdminTest, WorkingSetAnalysisLoggingTest); template friend class facebook::cachelib::tests::BaseAllocatorTest; template @@ -1963,11 +2229,26 @@ class CacheAllocator : public CacheBase { friend class facebook::cachelib::tests::NvmAdmissionPolicyTest; friend class facebook::cachelib::tests::CacheAllocatorTestWrapper; friend class facebook::cachelib::tests::PersistenceCache; + template + friend class facebook::cachelib::tests::FixedSizeArrayTest; + template + friend class facebook::cachelib::tests::MapTest; // benchmarks template friend class facebook::cachelib::cachebench::Cache; friend class facebook::cachelib::cachebench::tests::CacheTest; + friend void lookupCachelibBufferManagerOnly(); + friend void lookupCachelibMap(); + friend void benchCachelibMap(); + friend void benchCachelibRangeMap(); + + // objectCache + template + friend class facebook::cachelib::objcache::ObjectCache; + friend class GET_DECORATED_CLASS_NAME(objcache::test, + ObjectCache, + ObjectHandleInvalid); }; } // namespace cachelib } // namespace facebook diff --git a/cachelib/allocator/CacheAllocatorConfig.h b/cachelib/allocator/CacheAllocatorConfig.h index f06cadd929..f64089a0da 100644 --- a/cachelib/allocator/CacheAllocatorConfig.h +++ b/cachelib/allocator/CacheAllocatorConfig.h @@ -18,6 +18,7 @@ #include +#include #include #include #include @@ -25,8 +26,10 @@ #include #include "cachelib/allocator/Cache.h" +#include "cachelib/allocator/MemoryTierCacheConfig.h" #include "cachelib/allocator/MM2Q.h" #include "cachelib/allocator/MemoryMonitor.h" +#include "cachelib/allocator/MemoryTierCacheConfig.h" #include "cachelib/allocator/NvmAdmissionPolicy.h" #include "cachelib/allocator/PoolOptimizeStrategy.h" #include "cachelib/allocator/RebalanceStrategy.h" @@ -50,6 +53,7 @@ class CacheAllocatorConfig { using NvmCacheDeviceEncryptor = typename CacheT::NvmCacheT::DeviceEncryptor; using MoveCb = typename CacheT::MoveCb; using NvmCacheConfig = typename CacheT::NvmCacheT::Config; + using MemoryTierConfigs = std::vector; using Key = typename CacheT::Key; using EventTrackerSharedPtr = std::shared_ptr; using Item = typename CacheT::Item; @@ -92,6 +96,8 @@ class CacheAllocatorConfig { // Config for NvmCache. If enabled, cachelib will also make use of flash. CacheAllocatorConfig& enableNvmCache(NvmCacheConfig config); + bool isNvmCacheEnabled() const; + // enable the reject first admission policy through its parameters // @param numEntries the number of entries to track across all splits // @param numSplits the number of splits. we drop a whole split by @@ -191,14 +197,26 @@ class CacheAllocatorConfig { // This allows cache to be persisted across restarts. One example use case is // to preserve the cache when releasing a new version of your service. Refer // to our user guide for how to set up cache persistence. + // TODO: get rid of baseAddr or if set make sure all mapping are adjacent? + // We can also make baseAddr a per-tier configuration CacheAllocatorConfig& enableCachePersistence(std::string directory, void* baseAddr = nullptr); - // uses posix shm segments instead of the default sys-v shm segments. - // @throw std::invalid_argument if called without enabling - // cachePersistence() + // Uses posix shm segments instead of the default sys-v shm + // segments. @throw std::invalid_argument if called without enabling + // cachePersistence(). CacheAllocatorConfig& usePosixForShm(); + // Configures cache memory tiers. Each tier represents a cache region inside + // byte-addressable memory such as DRAM, Pmem, CXLmem. + // Accepts vector of MemoryTierCacheConfig. Each vector element describes + // configuration for a single memory cache tier. Tier sizes are specified as + // ratios, the number of parts of total cache size each tier would occupy. + CacheAllocatorConfig& configureMemoryTiers(const MemoryTierConfigs& configs); + + // Return reference to MemoryTierCacheConfigs. + const MemoryTierConfigs& getMemoryTierConfigs() const; + // This turns on a background worker that periodically scans through the // access container and look for expired items and remove them. CacheAllocatorConfig& enableItemReaperInBackground( @@ -259,7 +277,11 @@ class CacheAllocatorConfig { ChainedItemMovingSync sync = {}, uint32_t movingAttemptsLimit = 10); - // This customizes how many items we try to evict before giving up. + // Specify a threshold for detecting slab release stuck + CacheAllocatorConfig& setSlabReleaseStuckThreashold( + std::chrono::milliseconds threshold); + + // This customizes how many items we try to evict before giving up.s // We may fail to evict if someone else (another thread) is using an item. // Setting this to a high limit leads to a higher chance of successful // evictions but it can lead to higher allocation latency as well. @@ -292,9 +314,20 @@ class CacheAllocatorConfig { // smaller than this will always be rejected by NvmAdmissionPolicy. CacheAllocatorConfig& setNvmAdmissionMinTTL(uint64_t ttl); - // skip promote children items in chained when parent fail to promote + // Skip promote children items in chained when parent fail to promote CacheAllocatorConfig& setSkipPromoteChildrenWhenParentFailed(); + // (deprecated) Disable cache eviction. + // Please do not create new callers. CacheLib will stop supporting disabled + // eviction. + [[deprecated]] CacheAllocatorConfig& deprecated_disableEviction(); + + bool isEvictionDisabled() const noexcept { return disableEviction; } + + // We will delay worker start until user explicitly calls + // CacheAllocator::startCacheWorkers() + CacheAllocatorConfig& setDelayCacheWorkersStart(); + // skip promote children items in chained when parent fail to promote bool isSkipPromoteChildrenWhenParentFailed() const noexcept { return skipPromoteChildrenWhenParentFailed; @@ -351,13 +384,19 @@ class CacheAllocatorConfig { bool validateStrategy( const std::shared_ptr& strategy) const; + // check that memory tier ratios are set properly + const CacheAllocatorConfig& validateMemoryTiers() const; + // @return a map representation of the configs std::map serialize() const; + // The max number of memory cache tiers + inline static const size_t kMaxCacheMemoryTiers = 2; + // Cache name for users to indentify their own cache. std::string cacheName{""}; - // Amount of memory for this cache instance + // Amount of memory for this cache instance (sum of all memory tiers' sizes) size_t size = 1 * 1024 * 1024 * 1024; // Directory for shared memory related metadata @@ -422,6 +461,10 @@ class CacheAllocatorConfig { std::shared_ptr defaultPoolRebalanceStrategy{ new RebalanceStrategy{}}; + // The slab release process is considered as being stuck if it does not + // make any progress for the below threshold + std::chrono::milliseconds slabReleaseStuckThreshold{std::chrono::seconds(60)}; + // time interval to sleep between iterations of pool size optimization, // for regular pools and compact caches std::chrono::seconds regularPoolOptimizeInterval{0}; @@ -461,11 +504,6 @@ class CacheAllocatorConfig { // ABOVE are the config for various cache workers // - // if turned on, cache allocator will not evict any item when the - // system is out of memory. The user must free previously allocated - // items to make more room. - bool disableEviction = false; - // the number of tries to search for an item to evict // 0 means it's infinite unsigned int evictionSearchTries{50}; @@ -559,12 +597,21 @@ class CacheAllocatorConfig { // cache. uint64_t nvmAdmissionMinTTL{0}; - // skip promote children items in chained when parent fail to promote + // Skip promote children items in chained when parent fail to promote bool skipPromoteChildrenWhenParentFailed{false}; + // If true, we will delay worker start until user explicitly calls + // CacheAllocator::startCacheWorkers() + bool delayCacheWorkersStart{false}; + friend CacheT; private: + // Configuration for memory tiers. + MemoryTierConfigs memoryTierConfigs{ + {MemoryTierCacheConfig::fromShm().setRatio(1)} + }; + void mergeWithPrefix( std::map& configMap, const std::map& configMapToMerge, @@ -572,6 +619,15 @@ class CacheAllocatorConfig { std::string stringifyAddr(const void* addr) const; std::string stringifyRebalanceStrategy( const std::shared_ptr& strategy) const; + + // Configuration for memory tiers. + MemoryTierConfigs memoryTierConfigs{ + {MemoryTierCacheConfig::fromShm().setRatio(1)}}; + + // if turned on, cache allocator will not evict any item when the + // system is out of memory. The user must free previously allocated + // items to make more room. + bool disableEviction = false; }; template @@ -665,6 +721,11 @@ CacheAllocatorConfig& CacheAllocatorConfig::enableNvmCache( return *this; } +template +bool CacheAllocatorConfig::isNvmCacheEnabled() const { + return nvmConfig.has_value(); +} + template CacheAllocatorConfig& CacheAllocatorConfig::setNvmCacheAdmissionPolicy( std::shared_ptr> policy) { @@ -829,6 +890,28 @@ CacheAllocatorConfig& CacheAllocatorConfig::enableItemReaperInBackground( return *this; } +template +CacheAllocatorConfig& CacheAllocatorConfig::configureMemoryTiers( + const MemoryTierConfigs& config) { + if (config.size() > kMaxCacheMemoryTiers) { + throw std::invalid_argument(folly::sformat( + "Too many memory tiers. The number of supported tiers is {}.", + kMaxCacheMemoryTiers)); + } + if (!config.size()) { + throw std::invalid_argument( + "There must be at least one memory tier config."); + } + memoryTierConfigs = config; + return *this; +} + +template +const typename CacheAllocatorConfig::MemoryTierConfigs& +CacheAllocatorConfig::getMemoryTierConfigs() const { + return memoryTierConfigs; +} + template CacheAllocatorConfig& CacheAllocatorConfig::disableCacheEviction() { disableEviction = true; @@ -903,6 +986,13 @@ CacheAllocatorConfig& CacheAllocatorConfig::enableMovingOnSlabRelease( return *this; } +template +CacheAllocatorConfig& CacheAllocatorConfig::setSlabReleaseStuckThreashold( + std::chrono::milliseconds threshold) { + slabReleaseStuckThreshold = threshold; + return *this; +} + template CacheAllocatorConfig& CacheAllocatorConfig::setEvictionSearchLimit( uint32_t limit) { @@ -944,7 +1034,6 @@ CacheAllocatorConfig& CacheAllocatorConfig::setNvmAdmissionMinTTL( return *this; } -// skip promote children items in chained when parent fail to promote template CacheAllocatorConfig& CacheAllocatorConfig::setSkipPromoteChildrenWhenParentFailed() { @@ -975,7 +1064,8 @@ const CacheAllocatorConfig& CacheAllocatorConfig::validate() const { throw std::invalid_argument( "It's not allowed to enable both RemoveCB and ItemDestructor."); } - return *this; + + return validateMemoryTiers(); } template @@ -1002,13 +1092,31 @@ bool CacheAllocatorConfig::validateStrategy( (type != PoolOptimizeStrategy::MarginalHits || trackTailHits); } +template +const CacheAllocatorConfig& CacheAllocatorConfig::validateMemoryTiers() + const { + size_t parts = 0; + for (const auto& tierConfig : memoryTierConfigs) { + if (!tierConfig.getRatio()) { + throw std::invalid_argument("Tier ratio must be an integer number >=1."); + } + parts += tierConfig.getRatio(); + } + + if (parts > size) { + throw std::invalid_argument( + "Sum of tier ratios must be less than total cache size."); + } + return *this; +} + template std::map CacheAllocatorConfig::serialize() const { std::map configMap; configMap["size"] = std::to_string(size); configMap["cacheDir"] = cacheDir; - configMap["posixShm"] = usePosixShm ? "set" : "empty"; + configMap["posixShm"] = isUsingPosixShm() ? "set" : "empty"; configMap["defaultAllocSizes"] = ""; // Stringify std::set @@ -1026,6 +1134,8 @@ std::map CacheAllocatorConfig::serialize() const { configMap["poolResizeSlabsPerIter"] = std::to_string(poolResizeSlabsPerIter); configMap["poolRebalanceInterval"] = util::toString(poolRebalanceInterval); + configMap["slabReleaseStuckThreshold"] = + util::toString(slabReleaseStuckThreshold); configMap["trackTailHits"] = std::to_string(trackTailHits); // Stringify enum switch (memMonitorConfig.mode) { @@ -1081,6 +1191,8 @@ std::map CacheAllocatorConfig::serialize() const { stringifyRebalanceStrategy(defaultPoolRebalanceStrategy); configMap["eventTracker"] = eventTracker ? "set" : "empty"; configMap["nvmAdmissionMinTTL"] = std::to_string(nvmAdmissionMinTTL); + configMap["delayCacheWorkersStart"] = + delayCacheWorkersStart ? "true" : "false"; mergeWithPrefix(configMap, throttleConfig.serialize(), "throttleConfig"); mergeWithPrefix(configMap, chainedItemAccessConfig.serialize(), diff --git a/cachelib/allocator/CacheItem-inl.h b/cachelib/allocator/CacheItem-inl.h index f87ab9f101..db0bbe7ca8 100644 --- a/cachelib/allocator/CacheItem-inl.h +++ b/cachelib/allocator/CacheItem-inl.h @@ -73,16 +73,6 @@ void* CacheItem::getMemoryInternal() const noexcept { } } -// Deprecated -template -void* CacheItem::getWritableMemory() const { - if (isChainedItem()) { - return asChainedItem().getMemory(); - } else { - return alloc_.getMemory(); - } -} - template uint32_t CacheItem::getOffsetForMemory() const noexcept { return reinterpret_cast(getMemory()) - @@ -229,8 +219,8 @@ bool CacheItem::markMoving() noexcept { } template -void CacheItem::unmarkMoving() noexcept { - ref_.unmarkMoving(); +RefcountWithFlags::Value CacheItem::unmarkMoving() noexcept { + return ref_.unmarkMoving(); } template @@ -273,6 +263,21 @@ bool CacheItem::isNvmEvicted() const noexcept { return ref_.isNvmEvicted(); } +template +void CacheItem::markIncomplete() noexcept { + ref_.markIncomplete(); +} + +template +void CacheItem::unmarkIncomplete() noexcept { + ref_.unmarkIncomplete(); +} + +template +bool CacheItem::isIncomplete() const noexcept { + return ref_.isIncomplete(); +} + template void CacheItem::markIsChainedItem() noexcept { XDCHECK(!hasChainedItem()); diff --git a/cachelib/allocator/CacheItem.h b/cachelib/allocator/CacheItem.h index bde46f296e..61b374720e 100644 --- a/cachelib/allocator/CacheItem.h +++ b/cachelib/allocator/CacheItem.h @@ -141,6 +141,7 @@ class CACHELIB_PACKED_ATTR CacheItem { * to be mapped to different addresses on shared memory. */ using CompressedPtr = facebook::cachelib::CompressedPtr; + using SingleTierPtrCompressor = MemoryAllocator::SingleTierPtrCompressor; using PtrCompressor = MemoryAllocator::PtrCompressor; // Get the required size for a cache item given the size of memory @@ -171,11 +172,6 @@ class CACHELIB_PACKED_ATTR CacheItem { // piece of memory. void* getMemory() noexcept; - // (deprecated) Writable memory for this allocation. The caller is free to do - // whatever he wants with it and needs to ensure thread sage for access into - // this piece of memory. - [[deprecated("Use getMemory() instead")]] void* getWritableMemory() const; - // Cast item's readonly memory to a readonly user type template const T* getMemoryAs() const noexcept { @@ -188,13 +184,6 @@ class CACHELIB_PACKED_ATTR CacheItem { return reinterpret_cast(getMemory()); } - // (Deprecated) Cast item's writable memory to a writable user type - template - [[deprecated("Use getMemoryAs() instead")]] T* - getWritableMemoryAs() noexcept { - return reinterpret_cast(getWritableMemory()); - } - // This is the size of the memory allocation requested by the user. // The memory range [getMemory(), getMemory() + getSize()) is usable. uint32_t getSize() const noexcept; @@ -250,6 +239,14 @@ class CACHELIB_PACKED_ATTR CacheItem { void unmarkNvmEvicted() noexcept; bool isNvmEvicted() const noexcept; + /** + * Marks that the item is migrating between memory tiers and + * not ready for access now. Accessing thread should wait. + */ + void markIncomplete() noexcept; + void unmarkIncomplete() noexcept; + bool isIncomplete() const noexcept; + /** * Function to set the timestamp for when to expire an item * @@ -369,7 +366,7 @@ class CACHELIB_PACKED_ATTR CacheItem { * Unmarking moving does not depend on `isInMMContainer` */ bool markMoving() noexcept; - void unmarkMoving() noexcept; + RefcountWithFlags::Value unmarkMoving() noexcept; bool isMoving() const noexcept; bool isOnlyMoving() const noexcept; diff --git a/cachelib/allocator/CacheStats.cpp b/cachelib/allocator/CacheStats.cpp index 4f7811e5be..97ef6d47ca 100644 --- a/cachelib/allocator/CacheStats.cpp +++ b/cachelib/allocator/CacheStats.cpp @@ -42,6 +42,8 @@ void Stats::init() { initToZero(*fragmentationSize); initToZero(*chainedItemEvictions); initToZero(*regularItemEvictions); + + classAllocLatency = std::make_unique(); } template @@ -49,7 +51,7 @@ struct SizeVerify {}; void Stats::populateGlobalCacheStats(GlobalCacheStats& ret) const { #ifndef SKIP_SIZE_VERIFY - SizeVerify a = SizeVerify<16144>{}; + SizeVerify a = SizeVerify<16160>{}; std::ignore = a; #endif ret.numCacheGets = numCacheGets.get(); @@ -58,6 +60,7 @@ void Stats::populateGlobalCacheStats(GlobalCacheStats& ret) const { ret.numCacheRemoves = numCacheRemoves.get(); ret.numCacheRemoveRamHits = numCacheRemoveRamHits.get(); ret.numRamDestructorCalls = numRamDestructorCalls.get(); + ret.numDestructorExceptions = numDestructorExceptions.get(); ret.numNvmGets = numNvmGets.get(); ret.numNvmGetMiss = numNvmGetMiss.get(); @@ -133,6 +136,7 @@ void Stats::populateGlobalCacheStats(GlobalCacheStats& ret) const { ret.numEvictionFailureFromMoving = evictFailMove.get(); ret.numEvictionFailureFromParentMoving = evictFailParentMove.get(); ret.numAbortedSlabReleases = numAbortedSlabReleases.get(); + ret.numSkippedSlabReleases = numSkippedSlabReleases.get(); } } // namespace detail diff --git a/cachelib/allocator/CacheStats.h b/cachelib/allocator/CacheStats.h index 146de6bea7..7e8603cecd 100644 --- a/cachelib/allocator/CacheStats.h +++ b/cachelib/allocator/CacheStats.h @@ -25,6 +25,7 @@ #include "cachelib/allocator/memory/Slab.h" #include "cachelib/common/FastStats.h" #include "cachelib/common/PercentileStats.h" +#include "cachelib/common/RollingStats.h" #include "cachelib/common/Time.h" namespace facebook { @@ -34,8 +35,12 @@ namespace cachelib { struct EvictionStatPerType { // the age of the oldest element in seconds uint64_t oldestElementAge = 0ULL; + // number of elements in the eviction queue uint64_t size = 0ULL; + + // the estimated age after removing a slab worth of elements + uint64_t projectedAge = 0ULL; }; // stats class for one MM container (a.k.a one allocation class) related to @@ -46,9 +51,6 @@ struct EvictionAgeStat { EvictionStatPerType hotQueueStat; EvictionStatPerType coldQueueStat; - - // this is the estimated age after removing a slab worth of elements - uint64_t projectedAge; }; // stats related to evictions for a pool @@ -71,10 +73,6 @@ struct PoolEvictionAgeStats { const EvictionStatPerType& getColdEvictionStat(ClassId cid) const { return classEvictionAgeStats.at(cid).coldQueueStat; } - - uint64_t getProjectedAge(ClassId cid) const { - return classEvictionAgeStats.at(cid).projectedAge; - } }; // Stats for MM container @@ -98,6 +96,20 @@ struct MMContainerStat { uint64_t numTailAccesses; }; +struct AllocationClassBaseStat { + // size of allocation class + size_t allocSize{0}; + + // size of memory assigned to this allocation class + size_t memorySize{0}; + + // percent of free memory in this class + double approxFreePercent{0.0}; + + // Rolling allocation latency (in ns) + util::RollingStats allocLatencyNs; +}; + // cache related stats for a given allocation class. struct CacheStat { // allocation size for this container. @@ -257,6 +269,7 @@ struct SlabReleaseStats { uint64_t numMoveSuccesses; uint64_t numEvictionAttempts; uint64_t numEvictionSuccesses; + uint64_t numSlabReleaseStuck; }; // Stats for reaper @@ -458,12 +471,26 @@ struct GlobalCacheStats { util::PercentileStats::Estimates nvmEvictionSecondsToExpiry{}; util::PercentileStats::Estimates nvmPutSize{}; + // time when CacheAllocator structure is created. Whenever a process restarts + // and even if cache content is persisted, this will be reset. It's similar + // to process uptime. (But alternatively if user explicitly shuts down and + // re-attach cache, this will be reset as well) + uint64_t cacheInstanceUpTime{0}; + // time since the ram cache was created in seconds uint64_t ramUpTime{0}; // time since the nvm cache was created in seconds uint64_t nvmUpTime{0}; + // If true, it means ram cache is brand new, or it was not restored from a + // previous cache instance + bool isNewRamCache{false}; + + // If true, it means nvm cache is brand new, or it was not restored from a + // previous cache instance + bool isNewNvmCache{false}; + // if nvmcache is currently active and serving gets bool nvmCacheEnabled; @@ -481,6 +508,9 @@ struct GlobalCacheStats { // Number of times slab release was aborted due to shutdown uint64_t numAbortedSlabReleases{0}; + // Number of times slab was skipped when reaper runs + uint64_t numSkippedSlabReleases{0}; + // current active handles outstanding. This stat should // not go to negative. If it's negative, it means we have // leaked handles (or some sort of accounting bug internally) @@ -521,6 +551,9 @@ struct CacheMemoryStats { // rss size of the process size_t memRssSize{0}; + + // percentage of free slabs + std::vector slabsApproxFreePercentages{0.0}; }; // Stats for compact cache diff --git a/cachelib/allocator/CacheStatsInternal.h b/cachelib/allocator/CacheStatsInternal.h index 355afb594f..7ae57d4d23 100644 --- a/cachelib/allocator/CacheStatsInternal.h +++ b/cachelib/allocator/CacheStatsInternal.h @@ -21,6 +21,7 @@ #include "cachelib/allocator/Cache.h" #include "cachelib/allocator/memory/MemoryAllocator.h" #include "cachelib/common/AtomicCounter.h" +#include "cachelib/common/RollingStats.h" namespace facebook { namespace cachelib { @@ -175,6 +176,10 @@ struct Stats { AtomicCounter numReleasedForResize{0}; AtomicCounter numReleasedForAdvise{0}; AtomicCounter numAbortedSlabReleases{0}; + AtomicCounter numSkippedSlabReleases{0}; + + // Flag indicating the slab release stuck + AtomicCounter numSlabReleaseStuck{0}; // allocations with invalid parameters AtomicCounter invalidAllocs{0}; @@ -221,6 +226,14 @@ struct Stats { std::unique_ptr chainedItemEvictions{}; std::unique_ptr regularItemEvictions{}; + using PerTierPoolClassRollingStats = std::array< + std::array, + MemoryPoolManager::kMaxPools>, + CacheBase::kMaxTiers>; + + // rolling latency tracking for every alloc class in every pool + std::unique_ptr classAllocLatency{}; + // Eviction failures due to parent cannot be removed from access container AtomicCounter evictFailParentAC{0}; diff --git a/cachelib/allocator/CacheVersion.h b/cachelib/allocator/CacheVersion.h index 0189301d44..cd2ca7b2d7 100644 --- a/cachelib/allocator/CacheVersion.h +++ b/cachelib/allocator/CacheVersion.h @@ -28,7 +28,7 @@ namespace cachelib { // then you only need to bump this version. // I.e. you're rolling out a new feature that is cache compatible with previous // Cachelib instances. -constexpr uint64_t kCachelibVersion = 16; +constexpr uint64_t kCachelibVersion = 17; // Updating this version will cause RAM cache to be dropped for all // cachelib users!!! Proceed with care!! You must coordinate with diff --git a/cachelib/allocator/ChainedHashTable-inl.h b/cachelib/allocator/ChainedHashTable-inl.h index 1c0453855c..8a8e68ded1 100644 --- a/cachelib/allocator/ChainedHashTable-inl.h +++ b/cachelib/allocator/ChainedHashTable-inl.h @@ -70,7 +70,7 @@ ChainedHashTable::Impl::Impl(size_t numBuckets, } template T::*HookPtr> -ChainedHashTable::Impl::~Impl() { +ChainedHashTable::Impl::~Impl() { if (restorable_) { hashTable_.release(); } @@ -241,12 +241,12 @@ ChainedHashTable::Container::Container( ht_{config_.getNumBuckets(), memStart, compressor, config_.getHasher(), false /* resetMem */}, locks_{config_.getLocksPower(), config_.getHasher()}, - numKeys_(*object.numKeys_ref()) { + numKeys_(*object.numKeys()) { if (config_.getBucketsPower() != - static_cast(*object.bucketsPower_ref())) { + static_cast(*object.bucketsPower())) { throw std::invalid_argument(folly::sformat( "Hashtable bucket power not compatible. old = {}, new = {}", - *object.bucketsPower_ref(), + *object.bucketsPower(), config.getBucketsPower())); } @@ -260,11 +260,11 @@ ChainedHashTable::Container::Container( // checking hasher magic id not equal to 0 is to ensure it'll be // a warm roll going from a cachelib without hasher magic id to // one with a magic id - if (*object.hasherMagicId_ref() != 0 && - *object.hasherMagicId_ref() != config_.getHasher()->getMagicId()) { + if (*object.hasherMagicId() != 0 && + *object.hasherMagicId() != config_.getHasher()->getMagicId()) { throw std::invalid_argument(folly::sformat( "Hash object's ID mismatch. expected = {}, actual = {}", - *object.hasherMagicId_ref(), config_.getHasher()->getMagicId())); + *object.hasherMagicId(), config_.getHasher()->getMagicId())); } } @@ -476,10 +476,10 @@ ChainedHashTable::Container::saveState() const { } serialization::ChainedHashTableObject object; - *object.bucketsPower_ref() = config_.getBucketsPower(); - *object.locksPower_ref() = config_.getLocksPower(); - *object.numKeys_ref() = numKeys_; - *object.hasherMagicId_ref() = config_.getHasher()->getMagicId(); + *object.bucketsPower() = config_.getBucketsPower(); + *object.locksPower() = config_.getLocksPower(); + *object.numKeys() = numKeys_; + *object.hasherMagicId() = config_.getHasher()->getMagicId(); return object; } diff --git a/cachelib/allocator/ChainedHashTable.h b/cachelib/allocator/ChainedHashTable.h index 411606b148..f3c87bfa7a 100644 --- a/cachelib/allocator/ChainedHashTable.h +++ b/cachelib/allocator/ChainedHashTable.h @@ -272,13 +272,19 @@ class ChainedHashTable { } // Estimate bucketsPower and LocksPower based on cache entries. - void sizeBucketsPowerAndLocksPower(size_t cacheEntries) noexcept { + void sizeBucketsPowerAndLocksPower(size_t cacheEntries) { // The percentage of used buckets vs unused buckets is measured by a load // factor. For optimal performance, the load factor should not be more // than 60%. bucketsPower_ = static_cast(ceil(log2(cacheEntries * 1.6 /* load factor */))); + if (bucketsPower_ > kMaxBucketPower) { + throw std::invalid_argument(folly::sformat( + "Invalid arguments to the config constructor cacheEntries = {}", + cacheEntries)); + } + // 1 lock per 1000 buckets. locksPower_ = std::max(1, bucketsPower_ - 10); } @@ -630,7 +636,9 @@ class ChainedHashTable { Stats getStats() const noexcept { return {numKeys_, ht_.getNumBuckets()}; } // Get the total number of keys inserted into the hash table - uint64_t getNumKeys() const noexcept { return numKeys_; } + uint64_t getNumKeys() const noexcept { + return numKeys_.load(std::memory_order_relaxed); + } private: using Hashtable = Impl; diff --git a/cachelib/allocator/Handle.h b/cachelib/allocator/Handle.h index 1d97f8147c..ce455a0bca 100644 --- a/cachelib/allocator/Handle.h +++ b/cachelib/allocator/Handle.h @@ -43,6 +43,9 @@ enum class HandleFlags : uint8_t { kWentToNvm = 1 << 2, }; +template +struct WriteHandleImpl; + // RAII class that manages cache item pointer lifetime. These handles // can only be created by a CacheAllocator and upon destruction the handle // takes care of releasing the item to the correct cache allocator instance. @@ -189,6 +192,13 @@ struct ReadHandleImpl { } } + WriteHandleImpl toWriteHandle() && { + XDCHECK_NE(alloc_, nullptr); + XDCHECK_NE(getInternal(), nullptr); + alloc_->invalidateNvm(*getInternal()); + return WriteHandleImpl{std::move(*this)}; + } + using ReadyCallback = folly::Function; // Return true iff item handle is ready to use. @@ -235,8 +245,8 @@ struct ReadHandleImpl { bool isWriteHandle() const { return false; } protected: - // accessor. Calling get on handle with isReady() == false blocks the thread - // until the handle is ready. + // accessor. Calling getInternal() on handle with isReady() == false blocks + // the thread until the handle is ready. FOLLY_ALWAYS_INLINE Item* getInternal() const noexcept { return waitContext_ ? waitContext_->get() : it_; } @@ -325,7 +335,7 @@ struct ReadHandleImpl { // We will construct another handle that will be transferred to // another thread. So we will decrement a count locally to be back // to 0 on this thread. In the user thread, they must increment by - // 1. It is done automatically if the user converted their ItemHandle + // 1. It is done automatically if the user converted their Handle // to a SemiFuture via toSemiFuture(). auto readHandle = hdl.clone(); if (readHandle) { @@ -392,6 +402,12 @@ struct ReadHandleImpl { } } + protected: + friend class ReadHandleImpl; + // Method used only by ReadHandleImpl ctor + void discard() { + it_.store(nullptr, std::memory_order_relaxed); + } private: // we are waiting on Item* to be set to a value. One of the valid values is // nullptr. So choose something that we dont expect to indicate a ptr @@ -415,7 +431,7 @@ struct ReadHandleImpl { // cache->adjustHandleCountForThread_private(1); // This is needed because cachelib had previously moved a handle from an // internal thread to this callback, and cachelib internally removed a - // 1. It is done automatically if the user converted their ItemHandle + // 1. It is done automatically if the user converted their Handle // to a SemiFuture via toSemiFuture(). For more details, refer to comments // around ItemWaitContext. // @@ -471,11 +487,19 @@ struct ReadHandleImpl { // Handle which has the item already FOLLY_ALWAYS_INLINE ReadHandleImpl(Item* it, CacheT& alloc) noexcept - : alloc_(&alloc), it_(it) {} + : alloc_(&alloc), it_(it) { + if (it_ && it_->isIncomplete()) { + waitContext_ = std::make_shared(alloc); + if (!alloc_->addWaitContextForMovingItem(it->getKey(), waitContext_)) { + waitContext_->discard(); + waitContext_.reset(); + } + } + } // handle that has a wait context allocated. Used for async handles // In this case, the it_ will be filled in asynchronously and mulitple - // ItemHandles can wait on the one underlying handle + // Handles can wait on the one underlying handle explicit ReadHandleImpl(CacheT& alloc) noexcept : alloc_(&alloc), it_(nullptr), @@ -487,10 +511,10 @@ struct ReadHandleImpl { // Object-cache's c++ allocator will need to create a zero refcount handle in // order to access CacheAllocator API. Search for this function for details. - template - friend ItemHandle2* objcacheInitializeZeroRefcountHandle(void* handleStorage, - Item2* it, - Cache2& alloc); + template + friend HandleT* objcacheInitializeZeroRefcountHandle(void* handleStorage, + Item2* it, + Cache2& alloc); // A handle is marked as nascent when it was not yet inserted into the cache. // However, user can override it by marking an item as "not nascent" even if @@ -498,13 +522,13 @@ struct ReadHandleImpl { // item will still be processed by RemoveCallback if user frees it. Today, // the only user who can do this is Cachelib's ObjectCache API to ensure the // correct RAII behavior for an object. - template - friend void objcacheUnmarkNascent(const ItemHandle2& hdl); + template + friend void objcacheUnmarkNascent(const HandleT& hdl); // Object-cache's c++ allocator needs to access CacheAllocator directly from // an item handle in order to access CacheAllocator APIs. - template - friend typename ItemHandle2::CacheT& objcacheGetCache(const ItemHandle2& hdl); + template + friend typename HandleT::CacheT& objcacheGetCache(const HandleT& hdl); // instance of the cache this handle and item belong to. CacheT* alloc_ = nullptr; @@ -524,7 +548,7 @@ struct ReadHandleImpl { friend typename CacheT::NvmCacheT; // Following methods are only used in tests where we need to access private - // methods in ItemHandle + // methods in ReadHandle template friend T1 createHandleWithWaitContextForTest(T2&); template @@ -544,12 +568,6 @@ struct WriteHandleImpl : public ReadHandleImpl { using ReadHandle = ReadHandleImpl; using ReadHandle::ReadHandle; // inherit constructors - // TODO(jiayueb): remove this constructor after we finish R/W handle - // migration. In the end, WriteHandle should only be obtained via - // CacheAllocator APIs like findToWrite(). - explicit WriteHandleImpl(ReadHandle&& readHandle) - : ReadHandle(std::move(readHandle)) {} - // Accessors always return a non-const item. FOLLY_ALWAYS_INLINE Item* operator->() const noexcept { return ReadHandle::getInternal(); @@ -570,16 +588,17 @@ struct WriteHandleImpl : public ReadHandleImpl { bool isWriteHandle() const { return true; } // Friends + friend ReadHandle; // Only CacheAllocator and NvmCache can create non-default constructed handles friend CacheT; friend typename CacheT::NvmCacheT; // Object-cache's c++ allocator will need to create a zero refcount handle in // order to access CacheAllocator API. Search for this function for details. - template - friend ItemHandle2* objcacheInitializeZeroRefcountHandle(void* handleStorage, - Item2* it, - Cache2& alloc); + template + friend HandleT* objcacheInitializeZeroRefcountHandle(void* handleStorage, + Item2* it, + Cache2& alloc); // A handle is marked as nascent when it was not yet inserted into the cache. // However, user can override it by marking an item as "not nascent" even if @@ -587,16 +606,16 @@ struct WriteHandleImpl : public ReadHandleImpl { // item will still be processed by RemoveCallback if user frees it. Today, // the only user who can do this is Cachelib's ObjectCache API to ensure the // correct RAII behavior for an object. - template - friend void objcacheUnmarkNascent(const ItemHandle2& hdl); + template + friend void objcacheUnmarkNascent(const HandleT& hdl); // Object-cache's c++ allocator needs to access CacheAllocator directly from // an item handle in order to access CacheAllocator APIs. - template - friend typename ItemHandle2::CacheT& objcacheGetCache(const ItemHandle2& hdl); + template + friend typename HandleT::CacheT& objcacheGetCache(const HandleT& hdl); // Following methods are only used in tests where we need to access private - // methods in ItemHandle + // methods in WriteHandle template friend T1 createHandleWithWaitContextForTest(T2&); template @@ -605,6 +624,10 @@ struct WriteHandleImpl : public ReadHandleImpl { FRIEND_TEST(ItemHandleTest, WaitContext_readycb); FRIEND_TEST(ItemHandleTest, WaitContext_ready_immediate); FRIEND_TEST(ItemHandleTest, onReadyWithNoWaitContext); + + private: + explicit WriteHandleImpl(ReadHandle&& readHandle) + : ReadHandle(std::move(readHandle)) {} }; template diff --git a/cachelib/allocator/LruTailAgeStrategy.cpp b/cachelib/allocator/LruTailAgeStrategy.cpp index 210536683a..182b0cc6a3 100644 --- a/cachelib/allocator/LruTailAgeStrategy.cpp +++ b/cachelib/allocator/LruTailAgeStrategy.cpp @@ -29,6 +29,42 @@ namespace cachelib { LruTailAgeStrategy::LruTailAgeStrategy(Config config) : RebalanceStrategy(LruTailAge), config_(std::move(config)) {} +uint64_t LruTailAgeStrategy::getOldestElementAge( + const PoolEvictionAgeStats& poolEvictionAgeStats, ClassId cid) const { + switch (config_.queueSelector) { + case Config::QueueSelector::kHot: + return poolEvictionAgeStats.classEvictionAgeStats.at(cid) + .hotQueueStat.oldestElementAge; + case Config::QueueSelector::kWarm: + return poolEvictionAgeStats.classEvictionAgeStats.at(cid) + .warmQueueStat.oldestElementAge; + case Config::QueueSelector::kCold: + return poolEvictionAgeStats.classEvictionAgeStats.at(cid) + .coldQueueStat.oldestElementAge; + default: + XDCHECK(false) << "queue selector is invalid"; + return 0; + } +} + +uint64_t LruTailAgeStrategy::getProjectedAge( + const PoolEvictionAgeStats& poolEvictionAgeStats, ClassId cid) const { + switch (config_.queueSelector) { + case Config::QueueSelector::kHot: + return poolEvictionAgeStats.classEvictionAgeStats.at(cid) + .hotQueueStat.projectedAge; + case Config::QueueSelector::kWarm: + return poolEvictionAgeStats.classEvictionAgeStats.at(cid) + .warmQueueStat.projectedAge; + case Config::QueueSelector::kCold: + return poolEvictionAgeStats.classEvictionAgeStats.at(cid) + .coldQueueStat.projectedAge; + default: + XDCHECK(false) << "queue selector is invalid"; + return 0; + } +} + // The list of allocation classes to be rebalanced is determined by: // // 0. Filter out classes that have below minSlabThreshold_ @@ -70,9 +106,9 @@ ClassId LruTailAgeStrategy::pickVictim( return *std::max_element( victims.begin(), victims.end(), [&](ClassId a, ClassId b) { return ( - poolEvictionAgeStats.getProjectedAge(a) * + getProjectedAge(poolEvictionAgeStats, a) * (config.getWeight ? config.getWeight(pid, a, poolStats) : 1.0) < - poolEvictionAgeStats.getProjectedAge(b) * + getProjectedAge(poolEvictionAgeStats, b) * (config.getWeight ? config.getWeight(pid, b, poolStats) : 1.0)); }); } @@ -94,9 +130,9 @@ ClassId LruTailAgeStrategy::pickReceiver( // the youngest age among the potenital receivers return *std::min_element( receivers.begin(), receivers.end(), [&](ClassId a, ClassId b) { - return (poolEvictionAgeStats.getOldestElementAge(a) * + return (getOldestElementAge(poolEvictionAgeStats, a) * (config.getWeight ? config.getWeight(pid, a, stats) : 1.0) < - poolEvictionAgeStats.getOldestElementAge(b) * + getOldestElementAge(poolEvictionAgeStats, b) * (config.getWeight ? config.getWeight(pid, b, stats) : 1.0)); }); } @@ -131,9 +167,9 @@ RebalanceContext LruTailAgeStrategy::pickVictimAndReceiverImpl( if (!config.getWeight) { const auto victimProjectedTailAge = - poolEvictionAgeStats.getProjectedAge(ctx.victimClassId); + getProjectedAge(poolEvictionAgeStats, ctx.victimClassId); const auto receiverTailAge = - poolEvictionAgeStats.getOldestElementAge(ctx.receiverClassId); + getOldestElementAge(poolEvictionAgeStats, ctx.receiverClassId); XLOGF(DBG, "Rebalancing: receiver = {}, receiverTailAge = {}, victim = {}", static_cast(ctx.receiverClassId), receiverTailAge, diff --git a/cachelib/allocator/LruTailAgeStrategy.h b/cachelib/allocator/LruTailAgeStrategy.h index 086ed6701a..2a90045ba4 100644 --- a/cachelib/allocator/LruTailAgeStrategy.h +++ b/cachelib/allocator/LruTailAgeStrategy.h @@ -53,6 +53,14 @@ class LruTailAgeStrategy : public RebalanceStrategy { const PoolId pid, const ClassId classId, const PoolStats& pStats)>; WeightFn getWeight = {}; + // This lets us specify which queue's eviction age to use. + // Note not all eviction policies provide hot, warm, and cold queues. + // We leave it up to the policy to determine how to define hot, warm, cold + // eviction ages. For exmaple, in LRU, we use the same eviction-age + // for all three stats. + enum class QueueSelector { kHot, kWarm, kCold }; + QueueSelector queueSelector{QueueSelector::kWarm}; + // The free memory threshold to be used to pick victim class. size_t getFreeMemThreshold() const noexcept { return numSlabsFreeMem * Slab::kSize; @@ -110,6 +118,12 @@ class LruTailAgeStrategy : public RebalanceStrategy { ClassId victim, const PoolEvictionAgeStats& poolEvictionAgeStats) const; + uint64_t getOldestElementAge(const PoolEvictionAgeStats& poolEvictionAgeStats, + ClassId cid) const; + + uint64_t getProjectedAge(const PoolEvictionAgeStats& poolEvictionAgeStats, + ClassId cid) const; + // Config for this strategy, this can be updated anytime. // Do not access this directly, always use `getConfig()` to // obtain a copy first diff --git a/cachelib/allocator/MM2Q-inl.h b/cachelib/allocator/MM2Q-inl.h index c112f0b442..0b0df33413 100644 --- a/cachelib/allocator/MM2Q-inl.h +++ b/cachelib/allocator/MM2Q-inl.h @@ -21,9 +21,9 @@ namespace cachelib { template T::*HookPtr> MM2Q::Container::Container(const serialization::MM2QObject& object, PtrCompressor compressor) - : lru_(*object.lrus_ref(), compressor), - tailTrackingEnabled_(*object.tailTrackingEnabled_ref()), - config_(*object.config_ref()) { + : lru_(*object.lrus(), compressor), + tailTrackingEnabled_(*object.tailTrackingEnabled()), + config_(*object.config()) { lruRefreshTime_ = config_.lruRefreshTime; nextReconfigureTime_ = config_.mmReconfigureIntervalSecs.count() == 0 ? std::numeric_limits