diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000..ac12399b46 --- /dev/null +++ b/.clang-format @@ -0,0 +1,45 @@ +--- +AccessModifierOffset: -1 +AlignEscapedNewlinesLeft: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: true +BasedOnStyle: Google +BinPackParameters: false +BreakBeforeBinaryOperators: false +BreakBeforeBraces: Attach +BreakConstructorInitializersBeforeComma: false +ColumnLimit: 80 +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +ExperimentalAutoDetectBinPacking: true +IndentCaseLabels: false +IndentFunctionDeclarationAfterType: false +IndentWidth: 2 +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCSpaceBeforeProtocolList: false +PenaltyBreakBeforeFirstCallParameter: 10 +PenaltyBreakComment: 60 +PenaltyBreakFirstLessLess: 20 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Left +SpaceAfterControlStatementKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +Standard: Cpp11 +TabWidth: 8 +UseTab: Never +... diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md deleted file mode 100644 index 670f752033..0000000000 --- a/.github/ISSUE_TEMPLATE.md +++ /dev/null @@ -1,17 +0,0 @@ -# Summary of Issue or Feature request - -## Goal -Increase flash space efficiency or build is not handling X - -## Context -This is related to project Bar. It addresses a key gap in its design. - -## Suggested Approach -We will add an additional component between X and Y. We will introduce a new API called Hoop. - -## Success Metrics - -No regression on cpu -No regression on read and write latency to flash device -Increase space density by 20% -Added a new metrics here diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000000..dd84ea7824 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,38 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000000..bbcbbe7d61 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/general-question.md b/.github/ISSUE_TEMPLATE/general-question.md new file mode 100644 index 0000000000..49e480cb96 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/general-question.md @@ -0,0 +1,10 @@ +--- +name: General Question +about: General Questions about CacheLib usage, compilation, and anything else +title: '' +labels: '' +assignees: '' + +--- + + diff --git a/.github/workflows/build-cachelib-centos-long.yml b/.github/workflows/build-cachelib-centos-long.yml new file mode 100644 index 0000000000..92165f603b --- /dev/null +++ b/.github/workflows/build-cachelib-centos-long.yml @@ -0,0 +1,39 @@ +name: build-cachelib-centos-latest +on: + schedule: + - cron: '0 7 * * *' + +jobs: + build-cachelib-centos8-latest: + name: "CentOS/latest - Build CacheLib with all dependencies" + runs-on: ubuntu-latest + # Docker container image name + container: "centos:latest" + steps: + - name: "update packages" + run: dnf upgrade -y + - name: "install sudo,git" + run: dnf install -y sudo git cmake gcc + - name: "System Information" + run: | + echo === uname === + uname -a + echo === /etc/os-release === + cat /etc/os-release + echo === df -hl === + df -hl + echo === free -h === + free -h + echo === top === + top -b -n1 -1 -Eg || timeout 1 top -b -n1 + echo === env === + env + echo === gcc -v === + gcc -v + - name: "checkout sources" + uses: actions/checkout@v2 + - name: "build CacheLib using build script" + run: ./contrib/build.sh -j -v -T + - name: "run tests" + timeout-minutes: 60 + run: cd opt/cachelib/tests && ../../../run_tests.sh long diff --git a/.github/workflows/build-cachelib-centos.yml b/.github/workflows/build-cachelib-centos.yml index 3b071a186a..63b30e4821 100644 --- a/.github/workflows/build-cachelib-centos.yml +++ b/.github/workflows/build-cachelib-centos.yml @@ -1,18 +1,15 @@ name: build-cachelib-centos-latest on: - schedule: - - cron: '30 5 * * 1,4' + push: + pull_request: + jobs: build-cachelib-centos8-latest: name: "CentOS/latest - Build CacheLib with all dependencies" runs-on: ubuntu-latest # Docker container image name - container: "centos:latest" + container: "ghcr.io/igchor/cachelib-deps:streams8" steps: - - name: "update packages" - run: dnf upgrade -y - - name: "install sudo,git" - run: dnf install -y sudo git cmake gcc - name: "System Information" run: | echo === uname === @@ -31,5 +28,10 @@ jobs: gcc -v - name: "checkout sources" uses: actions/checkout@v2 + - name: "print workspace" + run: echo $GITHUB_WORKSPACE - name: "build CacheLib using build script" - run: ./contrib/build.sh -j -v -T + run: mkdir build && cd build && cmake ../cachelib -DBUILD_TESTS=ON -DCMAKE_INSTALL_PREFIX=/opt -DCMAKE_BUILD_TYPE=Debug && make install -j$(nproc) + - name: "run tests" + timeout-minutes: 60 + run: cd /opt/tests && $GITHUB_WORKSPACE/run_tests.sh diff --git a/.github/workflows/build-cachelib-debian.yml b/.github/workflows/build-cachelib-debian.yml index a2ae44a569..5bc3ad3c70 100644 --- a/.github/workflows/build-cachelib-debian.yml +++ b/.github/workflows/build-cachelib-debian.yml @@ -1,7 +1,8 @@ name: build-cachelib-debian-10 on: schedule: - - cron: '30 5 * * 2,6' + - cron: '30 5 * * 0,3' + jobs: build-cachelib-debian-10: name: "Debian/Buster - Build CacheLib with all dependencies" @@ -37,3 +38,6 @@ jobs: uses: actions/checkout@v2 - name: "build CacheLib using build script" run: ./contrib/build.sh -j -v -T + - name: "run tests" + timeout-minutes: 60 + run: cd opt/cachelib/tests && ../../../run_tests.sh diff --git a/.github/workflows/build-cachelib.yml b/.github/workflows/build-cachelib.yml deleted file mode 100644 index 15161c40e0..0000000000 --- a/.github/workflows/build-cachelib.yml +++ /dev/null @@ -1,147 +0,0 @@ -# NOTES: -# 1. While Github-Actions enables cache of dependencies, -# Facebook's projects (folly,fizz,wangle,fbthrift) -# are fast-moving targets - so we always checkout the latest version -# (as opposed to using gitactions cache, which is recommended in the -# documentation). -# -# 2. Using docker containers to build on CentOS and Debian, -# Specifically CentOS v8.1.1911 as that -# version is closest to Facebook's internal dev machines. -# -# 3. When using docker containers we install 'sudo', -# as the docker images are typically very minimal and without -# 'sudo', while the ./contrib/ scripts use sudo. -# -# 4. When using the docker containers we install 'git' -# BEFORE getting the CacheLib source code (with the 'checkout' action). -# Otherwise, the 'checkout@v2' action script falls back to downloading -# the git repository files only, without the ".git" directory. -# We need the ".git" directory to updating the git-submodules -# (folly/wangle/fizz/fbthrift). See: -# https://github.com/actions/checkout/issues/126#issuecomment-570288731 -# -# 5. To reduce less-critical (and yet frequent) rebuilds, the jobs -# check the author of the commit, and SKIP the build if -# the author is "svcscm". These commits are automatic updates -# for the folly/fbthrift git-submodules, and can happen several times a day. -# While there is a possiblity that updating the git-submodules breaks -# CacheLib, it is less likely, and will be detected once an actual -# code change commit triggers a full build. -# e.g. https://github.com/facebookincubator/CacheLib/commit/9372a82190dd71a6e2bcb668828cfed9d1bd25c1 -# -# 6. The 'if' condition checking the author name of the commit (see #5 above) -# uses github actions metadata variable: -# 'github.event.head_commit.author.name' -# GitHub have changed in the past the metadata structure and broke -# such conditions. If you need to debug the metadata values, -# see the "dummy-show-github-event" job below. -# E.g. https://github.blog/changelog/2019-10-16-changes-in-github-actions-push-event-payload/ -# As of Jan-2021, the output is: -# { -# "author": { -# "email": "mimi@moo.moo", -# "name": "mimi" -# }, -# "committer": { -# "email": "assafgordon@gmail.com", -# "name": "Assaf Gordon", -# "username": "agordon" -# }, -# "distinct": true, -# "id": "6c3aab0970f4a07cc2af7658756a6ef9d82f3276", -# "message": "gitactions: test", -# "timestamp": "2021-01-26T11:11:57-07:00", -# "tree_id": "741cd1cb802df84362a51e5d01f28788845d08b7", -# "url": "https://github.com/agordon/CacheLib/commit/6c3aab0970f4a07cc2af7658756a6ef9d82f3276" -# } -# -# 7. When checking the commit's author name, we use '...author.name', -# NOT '...author.username' - because the 'svcscm' author does not -# have a github username (see the 'mimi' example above). -# - -name: build-cachelib -on: [push] -jobs: - dummy-show-github-event: - name: "Show GitHub Action event.head_commit variable" - runs-on: ubuntu-latest - steps: - - name: "GitHub Variable Content" - env: - CONTENT: ${{ toJSON(github.event.head_commit) }} - run: echo "$CONTENT" - - - build-cachelib-centos8-1-1911: - if: "!contains(github.event.head_commit.author.name, 'svcscm')" - name: "CentOS/8.1.1911 - Build CacheLib with all dependencies" - runs-on: ubuntu-latest - # Docker container image name - container: "centos:8.1.1911" - steps: - - name: "update packages" - # stock centos has a problem with CMAKE, fails with: - # "cmake: symbol lookup error: cmake: undefined symbol: archive_write_add_filter_zstd" - # updating solves it - run: dnf update -y - - name: "install sudo,git" - run: dnf install -y sudo git cmake gcc - - name: "System Information" - run: | - echo === uname === - uname -a - echo === /etc/os-release === - cat /etc/os-release - echo === df -hl === - df -hl - echo === free -h === - free -h - echo === top === - top -b -n1 -1 -Eg || timeout 1 top -b -n1 - echo === env === - env - echo === gcc -v === - gcc -v - - name: "checkout sources" - uses: actions/checkout@v2 - - name: "Install Prerequisites" - run: ./contrib/build.sh -S -B - - name: "Test: update-submodules" - run: ./contrib/update-submodules.sh - - name: "Install dependency: zstd" - run: ./contrib/build-package.sh -j -v -i zstd - - name: "Install dependency: googleflags" - run: ./contrib/build-package.sh -j -v -i googleflags - - name: "Install dependency: googlelog" - run: ./contrib/build-package.sh -j -v -i googlelog - - name: "Install dependency: googletest" - run: ./contrib/build-package.sh -j -v -i googletest - - name: "Install dependency: sparsemap" - run: ./contrib/build-package.sh -j -v -i sparsemap - - name: "Install dependency: fmt" - run: ./contrib/build-package.sh -j -v -i fmt - - name: "Install dependency: folly" - run: ./contrib/build-package.sh -j -v -i folly - - name: "Install dependency: fizz" - run: ./contrib/build-package.sh -j -v -i fizz - - name: "Install dependency: wangle" - run: ./contrib/build-package.sh -j -v -i wangle - - name: "Install dependency: fbthrift" - run: ./contrib/build-package.sh -j -v -i fbthrift - - name: "build CacheLib" - # Build cachelib in debug mode (-d) and with all tests (-t) - run: ./contrib/build-package.sh -j -v -i -d -t cachelib - - uses: actions/upload-artifact@v2 - if: failure() - with: - name: cachelib-cmake-logs - path: | - build-cachelib/CMakeFiles/*.log - build-cachelib/CMakeCache.txt - build-cachelib/Makefile - build-cachelib/**/Makefile - if-no-files-found: warn - retention-days: 1 - diff --git a/.github/workflows/clang-format-check.yml b/.github/workflows/clang-format-check.yml new file mode 100644 index 0000000000..9f76f8ab6c --- /dev/null +++ b/.github/workflows/clang-format-check.yml @@ -0,0 +1,19 @@ +# From: https://github.com/marketplace/actions/clang-format-check#multiple-paths +name: clang-format Check +on: [] +jobs: + formatting-check: + name: Formatting Check + runs-on: ubuntu-latest + strategy: + matrix: + path: + - 'cachelib' + - 'examples' + steps: + - uses: actions/checkout@v2 + - name: Run clang-format style check for C/C++ programs. + uses: jidicula/clang-format-action@v3.4.0 + with: + clang-format-version: '11' + check-path: ${{ matrix.path }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 3058251a48..ebe779f258 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ # Changelog -## V14 +## V16 -Inception version when Cachelib goes open source. +This version is incompatible with versions below 15. Downgrading from this version directly to a version below 15 will require the cache to be dropped. If you need to downgrade from this version, please make sure you downgrade to version 15 first to avoid dropping the cache. ## V15 @@ -13,3 +13,7 @@ Updating to this version may cause compliation error because: 1. CacheAllocator::allocatePermanent_deprecated. Updating to this version will not require dropping the cache. + +## V14 + +Inception version when Cachelib goes open source. diff --git a/cachelib/CMakeLists.txt b/cachelib/CMakeLists.txt index 3ff0b01ef0..917e164e3b 100644 --- a/cachelib/CMakeLists.txt +++ b/cachelib/CMakeLists.txt @@ -17,21 +17,25 @@ # refer to the root source directory of the project as ${HELLO_SOURCE_DIR} and # to the root binary directory of the project as ${HELLO_BINARY_DIR}. -cmake_minimum_required (VERSION 3.10) +cmake_minimum_required (VERSION 3.19) ## TODO: get version from variable project (CacheLib VERSION 0.1) #configure_file(cachelib/cachelib_config.h.in cachelib_config.h) -set(CACHELIB_MAJOR_VERSION 0) +if (NOT DEFINED CACHELIB_MAJOR_VERSION) + set(CACHELIB_MAJOR_VERSION 0) +endif () set(CACHELIB_MINOR_VERSION 1) set(CACHELIB_PATCH_VERSION 0) set(CACHELIB_VERSION ${CACHELIB_MAJOR_VERSION}.${CACHELIB_MINOR_VERSION}.${CACHELIB_PATCH_VERSION}) set(PACKAGE_NAME "cachelib") -set(PACKAGE_VERSION "${CACHELIB_VERSION}") +if (NOT DEFINED PACKAGE_VERSION) + set(PACKAGE_VERSION "${CACHELIB_VERSION}") +endif () set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_TARNAME "${PACKAGE_NAME}-${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "https://github.com/facebook/TBD") @@ -361,6 +365,20 @@ install(EXPORT cachelib-exports #NAMESPACE cachelib:: DESTINATION ${CMAKE_INSTALL_DIR}) +if (BUILD_SHARED_LIBS) + set_target_properties( + cachelib_allocator + cachelib_cachebench + cachelib_common + cachelib_datatype + cachelib_navy + cachelib_shm + PROPERTIES + SOVERSION ${CACHELIB_MAJOR_VERSION} + VERSION ${PACKAGE_VERSION} + ) +endif () + if (BUILD_TESTS) get_property(TEST_BINARIES GLOBAL PROPERTY TEST_BINARIES) #message(STATUS "=== Test binaries : ${TEST_BINARIES} ===") diff --git a/cachelib/allocator/CMakeLists.txt b/cachelib/allocator/CMakeLists.txt index fc5d8610d8..b64d48d86f 100644 --- a/cachelib/allocator/CMakeLists.txt +++ b/cachelib/allocator/CMakeLists.txt @@ -65,6 +65,13 @@ target_link_libraries(cachelib_allocator PUBLIC cachelib_shm ) +if ((CMAKE_SYSTEM_NAME STREQUAL Linux) AND + (CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64)) +else() + target_compile_definitions(cachelib_allocator PRIVATE SKIP_SIZE_VERIFY) +endif() + + install(TARGETS cachelib_allocator EXPORT cachelib-exports DESTINATION ${LIB_INSTALL_DIR} ) @@ -74,6 +81,7 @@ if (BUILD_TESTS) ${DATASTRUCT_TESTS_THRIFT_FILES} ./nvmcache/tests/NvmTestBase.cpp ./memory/tests/TestBase.cpp + ../common/TestUtils.cpp ) add_dependencies(allocator_test_support thrift_generated_files) target_link_libraries (allocator_test_support PUBLIC @@ -109,6 +117,8 @@ if (BUILD_TESTS) add_test (tests/ChainedHashTest.cpp) add_test (tests/AllocatorResizeTypeTest.cpp) add_test (tests/AllocatorHitStatsTypeTest.cpp) + add_test (tests/AllocatorMemoryTiersTest.cpp) + add_test (tests/MemoryTiersTest.cpp) add_test (tests/MultiAllocatorTest.cpp) add_test (tests/NvmAdmissionPolicyTest.cpp) add_test (nvmcache/tests/NvmItemTests.cpp) diff --git a/cachelib/allocator/Cache.cpp b/cachelib/allocator/Cache.cpp index 0e812fb10e..7f6bfe737c 100644 --- a/cachelib/allocator/Cache.cpp +++ b/cachelib/allocator/Cache.cpp @@ -23,6 +23,12 @@ namespace facebook { namespace cachelib { +CacheBase::CacheBase(unsigned numTiers): numTiers_(numTiers) {} + +unsigned CacheBase::getNumTiers() const { + return numTiers_; +} + void CacheBase::setRebalanceStrategy( PoolId pid, std::shared_ptr strategy) { std::unique_lock l(lock_); diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h index 02fd706588..c4a48506d3 100644 --- a/cachelib/allocator/Cache.h +++ b/cachelib/allocator/Cache.h @@ -49,14 +49,32 @@ struct SimplePoolOptimizeStrategy; // to differentiate between the access modes and do appropriate action. enum class AccessMode { kRead, kWrite }; -// enum value to indicate if the removal from the MMContainer was an eviction -// or not. +// used by RemoveCB, indicating if the removal from the MMContainer was an +// eviction or not. enum class RemoveContext { kEviction, kNormal }; +// used by ItemDestructor, indicating how the item is destructed +enum class DestructorContext { + // item was in dram and evicted from dram. it could have + // been present in nvm as well. + kEvictedFromRAM, + + // item was only in nvm and evicted from nvm + kEvictedFromNVM, + + // item was present in dram and removed by user calling + // remove()/insertOrReplace, or removed due to expired. + // it could have been present in nvm as well. + kRemovedFromRAM, + + // item was present only in nvm and removed by user calling + // remove()/insertOrReplace. + kRemovedFromNVM +}; // A base class of cache exposing members and status agnostic of template type. class CacheBase { public: - CacheBase() = default; + CacheBase(unsigned numTiers = 1); virtual ~CacheBase() = default; // Movable but not copyable @@ -65,6 +83,9 @@ class CacheBase { CacheBase(CacheBase&&) = default; CacheBase& operator=(CacheBase&&) = default; + // TODO: come up with some reasonable number + static constexpr unsigned kMaxTiers = 8; + // Get a string referring to the cache name for this cache virtual const std::string getCacheName() const = 0; @@ -253,6 +274,10 @@ class CacheBase { // @return The number of slabs that were actually reclaimed (<= numSlabs) virtual unsigned int reclaimSlabs(PoolId id, size_t numSlabs) = 0; + unsigned getNumTiers() const; + + unsigned numTiers_ = 1; + // Protect 'poolRebalanceStragtegies_' and `poolResizeStrategies_` // and `poolOptimizeStrategy_` mutable std::mutex lock_; diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h index 15cfee7432..c8c11c77f5 100644 --- a/cachelib/allocator/CacheAllocator-inl.h +++ b/cachelib/allocator/CacheAllocator-inl.h @@ -16,26 +16,24 @@ #pragma once -#include "cachelib/allocator/CacheVersion.h" -#include "cachelib/common/Utils.h" +#include namespace facebook { namespace cachelib { template CacheAllocator::CacheAllocator(Config config) - : isOnShm_{config.memMonitoringEnabled()}, + : CacheBase(config.getMemoryTierConfigs().size()), + memoryTierConfigs(config.getMemoryTierConfigs()), + isOnShm_{config.memMonitoringEnabled()}, config_(config.validate()), - tempShm_(isOnShm_ ? std::make_unique(config_.size) + tempShm_(isOnShm_ ? std::make_unique( + config_.getCacheSize()) : nullptr), - allocator_(isOnShm_ ? std::make_unique( - getAllocatorConfig(config_), - tempShm_->getAddr(), - config_.size) - : std::make_unique( - getAllocatorConfig(config_), config_.size)), - compactCacheManager_(std::make_unique(*allocator_)), + allocator_(createPrivateAllocator()), + compactCacheManager_(std::make_unique(*allocator_[0] /* TODO */)), compressor_(createPtrCompressor()), + mmContainers_(numTiers_), accessContainer_(std::make_unique( config_.accessConfig, compressor_, @@ -46,21 +44,68 @@ CacheAllocator::CacheAllocator(Config config) [this](Item* it) -> ItemHandle { return acquire(it); })), chainedItemLocks_(config_.chainedItemsLockPower, std::make_shared()), - cacheCreationTime_{util::getCurrentTimeSec()}, - nvmCacheState_{config_.cacheDir, config_.isNvmCacheEncryptionEnabled(), - config_.isNvmCacheTruncateAllocSizeEnabled()} { + movesMap_(kShards), + moveLock_(kShards), + cacheCreationTime_{util::getCurrentTimeSec()} { + + if (numTiers_ > 1 || std::holds_alternative( + memoryTierConfigs[0].getShmTypeOpts())) { + throw std::runtime_error( + "Using custom memory tier or using more than one tier is only " + "supported for Shared Memory."); + } initCommon(false); } +template +std::vector> +CacheAllocator::createPrivateAllocator() { + std::vector> allocators; + + if (isOnShm_) + allocators.emplace_back(std::make_unique( + getAllocatorConfig(config_), + tempShm_->getAddr(), + config_.size)); + else + allocators.emplace_back(std::make_unique( + getAllocatorConfig(config_), config_.size)); + + return allocators; +} + +template +std::vector> +CacheAllocator::createAllocators() { + std::vector> allocators; + for (int tid = 0; tid < numTiers_; tid++) { + allocators.emplace_back(createNewMemoryAllocator(tid)); + } + return allocators; +} + +template +std::vector> +CacheAllocator::restoreAllocators() { + std::vector> allocators; + for (int tid = 0; tid < numTiers_; tid++) { + allocators.emplace_back(restoreMemoryAllocator(tid)); + } + return allocators; +} + template CacheAllocator::CacheAllocator(SharedMemNewT, Config config) - : isOnShm_{true}, + : CacheBase(config.getMemoryTierConfigs().size()), + memoryTierConfigs(config.getMemoryTierConfigs()), + isOnShm_{true}, config_(config.validate()), shmManager_( - std::make_unique(config_.cacheDir, config_.usePosixShm)), - allocator_(createNewMemoryAllocator()), - compactCacheManager_(std::make_unique(*allocator_)), + std::make_unique(config_.cacheDir, config_.isUsingPosixShm())), + allocator_(createAllocators()), + compactCacheManager_(std::make_unique(*allocator_[0] /* TODO */)), compressor_(createPtrCompressor()), + mmContainers_(numTiers_), accessContainer_(std::make_unique( config_.accessConfig, shmManager_ @@ -68,7 +113,8 @@ CacheAllocator::CacheAllocator(SharedMemNewT, Config config) AccessContainer::getRequiredSize( config_.accessConfig.getNumBuckets()), nullptr, - ShmSegmentOpts(config_.accessConfig.getPageSize())) + ShmSegmentOpts(config_.accessConfig.getPageSize(), + false, config_.isUsingPosixShm())) .addr, compressor_, [this](Item* it) -> ItemHandle { return acquire(it); })), @@ -79,48 +125,55 @@ CacheAllocator::CacheAllocator(SharedMemNewT, Config config) AccessContainer::getRequiredSize( config_.chainedItemAccessConfig.getNumBuckets()), nullptr, - ShmSegmentOpts(config_.accessConfig.getPageSize())) + ShmSegmentOpts(config_.accessConfig.getPageSize(), + false, config_.isUsingPosixShm())) .addr, compressor_, [this](Item* it) -> ItemHandle { return acquire(it); })), chainedItemLocks_(config_.chainedItemsLockPower, std::make_shared()), - cacheCreationTime_{util::getCurrentTimeSec()}, - nvmCacheState_{config_.cacheDir, config_.isNvmCacheEncryptionEnabled(), - config_.isNvmCacheTruncateAllocSizeEnabled()} { + movesMap_(kShards), + moveLock_(kShards), + cacheCreationTime_{util::getCurrentTimeSec()} { initCommon(false); - shmManager_->removeShm(detail::kShmInfoName); + shmManager_->removeShm(detail::kShmInfoName, + PosixSysVSegmentOpts(config_.isUsingPosixShm())); } template CacheAllocator::CacheAllocator(SharedMemAttachT, Config config) - : isOnShm_{true}, + : CacheBase(config.getMemoryTierConfigs().size()), + memoryTierConfigs(config.getMemoryTierConfigs()), + isOnShm_{true}, config_(config.validate()), shmManager_( std::make_unique(config_.cacheDir, config_.usePosixShm)), deserializer_(createDeserializer()), metadata_{deserializeCacheAllocatorMetadata(*deserializer_)}, - allocator_(restoreMemoryAllocator()), - compactCacheManager_(restoreCCacheManager()), + allocator_(restoreAllocators()), + compactCacheManager_(restoreCCacheManager(0 /* TODO - per tier */)), compressor_(createPtrCompressor()), mmContainers_(deserializeMMContainers(*deserializer_, compressor_)), accessContainer_(std::make_unique( deserializer_->deserialize(), config_.accessConfig, - shmManager_->attachShm(detail::kShmHashTableName), + shmManager_->attachShm(detail::kShmHashTableName, nullptr, + ShmSegmentOpts(PageSizeT::NORMAL, false, config_.isUsingPosixShm())), compressor_, [this](Item* it) -> ItemHandle { return acquire(it); })), chainedItemAccessContainer_(std::make_unique( deserializer_->deserialize(), config_.chainedItemAccessConfig, - shmManager_->attachShm(detail::kShmChainedItemHashTableName), + shmManager_->attachShm(detail::kShmChainedItemHashTableName, nullptr, + ShmSegmentOpts(PageSizeT::NORMAL, false, config_.isUsingPosixShm())), compressor_, [this](Item* it) -> ItemHandle { return acquire(it); })), chainedItemLocks_(config_.chainedItemsLockPower, std::make_shared()), - cacheCreationTime_{*metadata_.cacheCreationTime_ref()}, - nvmCacheState_{config_.cacheDir, config_.isNvmCacheEncryptionEnabled(), - config_.isNvmCacheTruncateAllocSizeEnabled()} { + movesMap_(kShards), + moveLock_(kShards), + cacheCreationTime_{*metadata_.cacheCreationTime_ref()} { + /* TODO - per tier? */ for (auto pid : *metadata_.compactCachePools_ref()) { isCompactCachePool_[pid] = true; } @@ -130,7 +183,8 @@ CacheAllocator::CacheAllocator(SharedMemAttachT, Config config) // We will create a new info shm segment on shutDown(). If we don't remove // this info shm segment here and the new info shm segment's size is larger // than this one, creating new one will fail. - shmManager_->removeShm(detail::kShmInfoName); + shmManager_->removeShm(detail::kShmInfoName, + PosixSysVSegmentOpts(config_.isUsingPosixShm())); } template @@ -144,44 +198,50 @@ CacheAllocator::~CacheAllocator() { } template -std::unique_ptr -CacheAllocator::createNewMemoryAllocator() { +ShmSegmentOpts CacheAllocator::createShmCacheOpts(TierId tid) { ShmSegmentOpts opts; opts.alignment = sizeof(Slab); + opts.typeOpts = memoryTierConfigs[tid].getShmTypeOpts(); + + return opts; +} + +template +std::unique_ptr +CacheAllocator::createNewMemoryAllocator(TierId tid) { return std::make_unique( getAllocatorConfig(config_), shmManager_ - ->createShm(detail::kShmCacheName, config_.size, - config_.slabMemoryBaseAddr, opts) + ->createShm(detail::kShmCacheName + std::to_string(tid), + config_.getCacheSize(), config_.slabMemoryBaseAddr, + createShmCacheOpts(tid)) .addr, - config_.size); + memoryTierConfigs[tid].getSize()); } template std::unique_ptr -CacheAllocator::restoreMemoryAllocator() { - ShmSegmentOpts opts; - opts.alignment = sizeof(Slab); +CacheAllocator::restoreMemoryAllocator(TierId tid) { return std::make_unique( deserializer_->deserialize(), shmManager_ - ->attachShm(detail::kShmCacheName, config_.slabMemoryBaseAddr, opts) - .addr, - config_.size, + ->attachShm(detail::kShmCacheName + std::to_string(tid), + config_.slabMemoryBaseAddr, createShmCacheOpts(tid)).addr, + memoryTierConfigs[tid].getSize(), config_.disableFullCoredump); } template std::unique_ptr -CacheAllocator::restoreCCacheManager() { +CacheAllocator::restoreCCacheManager(TierId tid) { return std::make_unique( deserializer_->deserialize(), - *allocator_); + *allocator_[tid]); } template void CacheAllocator::initCommon(bool dramCacheAttached) { - if (config_.nvmConfig.has_value()) { + if (config_.isNvmCacheEnabled()) { if (config_.nvmCacheAP) { nvmAdmissionPolicy_ = config_.nvmCacheAP; } else if (config_.rejectFirstAPNumEntries) { @@ -204,24 +264,28 @@ void CacheAllocator::initCommon(bool dramCacheAttached) { template void CacheAllocator::initNvmCache(bool dramCacheAttached) { - if (!config_.nvmConfig.has_value()) { + if (!config_.isNvmCacheEnabled()) { return; } + nvmCacheState_.emplace(NvmCacheState(config_.cacheDir, config_.isNvmCacheEncryptionEnabled(), + config_.isNvmCacheTruncateAllocSizeEnabled())); + // for some usecases that create pools, restoring nvmcache when dram cache // is not persisted is not supported. const bool shouldDrop = config_.dropNvmCacheOnShmNew && !dramCacheAttached; // if we are dealing with persistency, cache directory should be enabled const bool truncate = config_.cacheDir.empty() || - nvmCacheState_.shouldStartFresh() || shouldDrop; + nvmCacheState_.value().shouldStartFresh() || shouldDrop; if (truncate) { - nvmCacheState_.markTruncated(); + nvmCacheState_.value().markTruncated(); } - nvmCache_ = std::make_unique(*this, *config_.nvmConfig, truncate); + nvmCache_ = std::make_unique(*this, *config_.nvmConfig, truncate, + config_.itemDestructor); if (!config_.cacheDir.empty()) { - nvmCacheState_.clearPrevState(); + nvmCacheState_.value().clearPrevState(); } } @@ -265,7 +329,8 @@ void CacheAllocator::initWorkers() { template std::unique_ptr CacheAllocator::createDeserializer() { - auto infoAddr = shmManager_->attachShm(detail::kShmInfoName); + auto infoAddr = shmManager_->attachShm(detail::kShmInfoName, nullptr, + ShmSegmentOpts(PageSizeT::NORMAL, false, config_.isUsingPosixShm())); return std::make_unique( reinterpret_cast(infoAddr.addr), reinterpret_cast(infoAddr.addr) + infoAddr.size); @@ -287,7 +352,8 @@ CacheAllocator::allocate(PoolId poolId, template typename CacheAllocator::ItemHandle -CacheAllocator::allocateInternal(PoolId pid, +CacheAllocator::allocateInternalTier(TierId tid, + PoolId pid, typename Item::Key key, uint32_t size, uint32_t creationTime, @@ -300,13 +366,16 @@ CacheAllocator::allocateInternal(PoolId pid, const auto requiredSize = Item::getRequiredSize(key, size); // the allocation class in our memory allocator. - const auto cid = allocator_->getAllocationClassId(pid, requiredSize); + const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize); + // TODO: per-tier (*stats_.allocAttempts)[pid][cid].inc(); - void* memory = allocator_->allocate(pid, requiredSize); + void* memory = allocator_[tid]->allocate(pid, requiredSize); + // TODO: Today disableEviction means do not evict from memory (DRAM). + // Should we support eviction between memory tiers (e.g. from DRAM to PMEM)? if (memory == nullptr && !config_.disableEviction) { - memory = findEviction(pid, cid); + memory = findEviction(tid, pid, cid); } ItemHandle handle; @@ -317,7 +386,7 @@ CacheAllocator::allocateInternal(PoolId pid, // for example. SCOPE_FAIL { // free back the memory to the allocator since we failed. - allocator_->free(memory); + allocator_[tid]->free(memory); }; handle = acquire(new (memory) Item(key, size, creationTime, expiryTime)); @@ -328,7 +397,7 @@ CacheAllocator::allocateInternal(PoolId pid, } } else { // failed to allocate memory. - (*stats_.allocFailures)[pid][cid].inc(); + (*stats_.allocFailures)[pid][cid].inc(); // TODO: per-tier // wake up rebalancer if (poolRebalancer_) { poolRebalancer_->wakeUp(); @@ -347,7 +416,22 @@ CacheAllocator::allocateInternal(PoolId pid, template typename CacheAllocator::ItemHandle -CacheAllocator::allocateChainedItem(const ItemHandle& parent, +CacheAllocator::allocateInternal(PoolId pid, + typename Item::Key key, + uint32_t size, + uint32_t creationTime, + uint32_t expiryTime) { + auto tid = 0; /* TODO: consult admission policy */ + for(TierId tid = 0; tid < numTiers_; ++tid) { + auto handle = allocateInternalTier(tid, pid, key, size, creationTime, expiryTime); + if (handle) return handle; + } + return {}; +} + +template +typename CacheAllocator::WriteHandle +CacheAllocator::allocateChainedItem(const ReadHandle& parent, uint32_t size) { if (!parent) { throw std::invalid_argument( @@ -365,9 +449,9 @@ CacheAllocator::allocateChainedItem(const ItemHandle& parent, } template -typename CacheAllocator::ItemHandle +typename CacheAllocator::WriteHandle CacheAllocator::allocateChainedItemInternal( - const ItemHandle& parent, uint32_t size) { + const ReadHandle& parent, uint32_t size) { util::LatencyTracker tracker{stats().allocateLatency_}; SCOPE_FAIL { stats_.invalidAllocs.inc(); }; @@ -375,24 +459,30 @@ CacheAllocator::allocateChainedItemInternal( // number of bytes required for this item const auto requiredSize = ChainedItem::getRequiredSize(size); - const auto pid = allocator_->getAllocInfo(parent->getMemory()).poolId; - const auto cid = allocator_->getAllocationClassId(pid, requiredSize); + // TODO: is this correct? + auto tid = getTierId(*parent); + + const auto pid = allocator_[tid]->getAllocInfo(parent->getMemory()).poolId; + const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize); + // TODO: per-tier? Right now stats_ are not used in any public periodic + // worker (*stats_.allocAttempts)[pid][cid].inc(); - void* memory = allocator_->allocate(pid, requiredSize); + void* memory = allocator_[tid]->allocate(pid, requiredSize); if (memory == nullptr) { - memory = findEviction(pid, cid); + memory = findEviction(tid, pid, cid); } if (memory == nullptr) { (*stats_.allocFailures)[pid][cid].inc(); return ItemHandle{}; } - SCOPE_FAIL { allocator_->free(memory); }; + SCOPE_FAIL { allocator_[tid]->free(memory); }; - auto child = acquire(new (memory) ChainedItem( - compressor_.compress(parent.get()), size, util::getCurrentTimeSec())); + auto child = acquire( + new (memory) ChainedItem(compressor_.compress(parent.getInternal()), size, + util::getCurrentTimeSec())); if (child) { child.markNascent(); @@ -697,8 +787,8 @@ CacheAllocator::releaseBackToAllocator(Item& it, throw std::runtime_error( folly::sformat("cannot release this item: {}", it.toString())); } - - const auto allocInfo = allocator_->getAllocInfo(it.getMemory()); + const auto tid = getTierId(it); + const auto allocInfo = allocator_[tid]->getAllocInfo(it.getMemory()); if (ctx == RemoveContext::kEviction) { const auto timeNow = util::getCurrentTimeSec(); @@ -722,8 +812,7 @@ CacheAllocator::releaseBackToAllocator(Item& it, folly::sformat("Can not recycle a chained item {}, toRecyle", it.toString(), toRecycle->toString())); } - - allocator_->free(&it); + allocator_[tid]->free(&it); return ReleaseRes::kReleased; } @@ -734,6 +823,22 @@ CacheAllocator::releaseBackToAllocator(Item& it, config_.removeCb(RemoveCbData{ctx, it, viewAsChainedAllocsRange(it)}); } + // only skip destructor for evicted items that are either in the queue to put + // into nvm or already in nvm + if (!nascent && config_.itemDestructor && + (ctx != RemoveContext::kEviction || !it.isNvmClean() || + it.isNvmEvicted())) { + try { + config_.itemDestructor(DestructorData{ + ctx, it, viewAsChainedAllocsRange(it), allocInfo.poolId}); + stats().numRamDestructorCalls.inc(); + } catch (const std::exception& e) { + stats().numDestructorExceptions.inc(); + XLOG_EVERY_N(INFO, 100) + << "Catch exception from user's item destructor: " << e.what(); + } + } + // If no `toRecycle` is set, then the result is kReleased // Because this function cannot fail to release "it" ReleaseRes res = @@ -766,7 +871,7 @@ CacheAllocator::releaseBackToAllocator(Item& it, auto next = head->getNext(compressor_); const auto childInfo = - allocator_->getAllocInfo(static_cast(head)); + allocator_[tid]->getAllocInfo(static_cast(head)); (*stats_.fragmentationSize)[childInfo.poolId][childInfo.classId].sub( util::getFragmentation(*this, *head)); @@ -799,7 +904,7 @@ CacheAllocator::releaseBackToAllocator(Item& it, XDCHECK(ReleaseRes::kReleased != res); res = ReleaseRes::kRecycled; } else { - allocator_->free(head); + allocator_[tid]->free(head); } } @@ -814,7 +919,7 @@ CacheAllocator::releaseBackToAllocator(Item& it, res = ReleaseRes::kRecycled; } else { XDCHECK(it.isDrained()); - allocator_->free(&it); + allocator_[tid]->free(&it); } return res; @@ -886,6 +991,25 @@ bool CacheAllocator::replaceInMMContainer(Item& oldItem, } } +template +bool CacheAllocator::replaceInMMContainer(Item* oldItem, + Item& newItem) { + return replaceInMMContainer(*oldItem, newItem); +} + +template +bool CacheAllocator::replaceInMMContainer(EvictionIterator& oldItemIt, + Item& newItem) { + auto& oldContainer = getMMContainer(*oldItemIt); + auto& newContainer = getMMContainer(newItem); + + // This function is used for eviction across tiers + XDCHECK(&oldContainer != &newContainer); + oldContainer.remove(oldItemIt); + + return newContainer.add(newItem); +} + template bool CacheAllocator::replaceChainedItemInMMContainer( Item& oldItem, Item& newItem) { @@ -979,8 +1103,18 @@ CacheAllocator::insertOrReplace(const ItemHandle& handle) { insertInMMContainer(*(handle.getInternal())); ItemHandle replaced; try { + auto lock = nvmCache_ ? nvmCache_->getItemDestructorLock(handle->getKey()) + : std::unique_lock(); + replaced = accessContainer_->insertOrReplace(*(handle.getInternal())); - } catch (const exception::RefcountOverflow&) { + + if (replaced && replaced->isNvmClean() && !replaced->isNvmEvicted()) { + // item is to be replaced and the destructor will be executed + // upon memory released, mark it in nvm to avoid destructor + // executed from nvm + nvmCache_->markNvmItemRemovedLocked(handle->getKey()); + } + } catch (const std::exception&) { removeFromMMContainer(*(handle.getInternal())); if (auto eventTracker = getEventTracker()) { eventTracker->record(AllocatorApiEvent::INSERT_OR_REPLACE, @@ -1020,6 +1154,157 @@ CacheAllocator::insertOrReplace(const ItemHandle& handle) { return replaced; } +/* Next two methods are used to asynchronously move Item between memory tiers. + * + * The thread, which moves Item, allocates new Item in the tier we are moving to + * and calls moveRegularItemOnEviction() method. This method does the following: + * 1. Create MoveCtx and put it to the movesMap. + * 2. Update the access container with the new item from the tier we are + * moving to. This Item has kIncomplete flag set. + * 3. Copy data from the old Item to the new one. + * 4. Unset the kIncomplete flag and Notify MoveCtx + * + * Concurrent threads which are getting handle to the same key: + * 1. When a handle is created it checks if the kIncomplete flag is set + * 2. If so, Handle implementation creates waitContext and adds it to the + * MoveCtx by calling addWaitContextForMovingItem() method. + * 3. Wait until the moving thread will complete its job. + */ +template +bool CacheAllocator::addWaitContextForMovingItem( + folly::StringPiece key, std::shared_ptr> waiter) { + auto shard = getShardForKey(key); + auto& movesMap = getMoveMapForShard(shard); + auto lock = getMoveLockForShard(shard); + auto it = movesMap.find(key); + if (it == movesMap.end()) { + return false; + } + auto ctx = it->second.get(); + ctx->addWaiter(std::move(waiter)); + return true; +} + +template +template +typename CacheAllocator::ItemHandle +CacheAllocator::moveRegularItemOnEviction( + ItemPtr& oldItemPtr, ItemHandle& newItemHdl) { + // TODO: should we introduce new latency tracker. E.g. evictRegularLatency_ + // ??? util::LatencyTracker tracker{stats_.evictRegularLatency_}; + + Item& oldItem = *oldItemPtr; + if (!oldItem.isAccessible() || oldItem.isExpired()) { + return {}; + } + + XDCHECK_EQ(newItemHdl->getSize(), oldItem.getSize()); + XDCHECK_NE(getTierId(oldItem), getTierId(*newItemHdl)); + + // take care of the flags before we expose the item to be accessed. this + // will ensure that when another thread removes the item from RAM, we issue + // a delete accordingly. See D7859775 for an example + if (oldItem.isNvmClean()) { + newItemHdl->markNvmClean(); + } + + folly::StringPiece key(oldItem.getKey()); + auto shard = getShardForKey(key); + auto& movesMap = getMoveMapForShard(shard); + MoveCtx* ctx(nullptr); + { + auto lock = getMoveLockForShard(shard); + auto res = movesMap.try_emplace(key, std::make_unique()); + if (!res.second) { + return {}; + } + ctx = res.first->second.get(); + } + + auto resHdl = ItemHandle{}; + auto guard = folly::makeGuard([key, this, ctx, shard, &resHdl]() { + auto& movesMap = getMoveMapForShard(shard); + if (resHdl) + resHdl->unmarkIncomplete(); + auto lock = getMoveLockForShard(shard); + ctx->setItemHandle(std::move(resHdl)); + movesMap.erase(key); + }); + + // TODO: Possibly we can use markMoving() instead. But today + // moveOnSlabRelease logic assume that we mark as moving old Item + // and than do copy and replace old Item with the new one in access + // container. Furthermore, Item can be marked as Moving only + // if it is linked to MM container. In our case we mark the new Item + // and update access container before the new Item is ready (content is + // copied). + newItemHdl->markIncomplete(); + + // Inside the access container's lock, this checks if the old item is + // accessible and its refcount is zero. If the item is not accessible, + // there is no point to replace it since it had already been removed + // or in the process of being removed. If the item is in cache but the + // refcount is non-zero, it means user could be attempting to remove + // this item through an API such as remove(ItemHandle). In this case, + // it is unsafe to replace the old item with a new one, so we should + // also abort. + if (!accessContainer_->replaceIf(oldItem, *newItemHdl, + itemEvictionPredicate)) { + return {}; + } + + if (config_.moveCb) { + // Execute the move callback. We cannot make any guarantees about the + // consistency of the old item beyond this point, because the callback can + // do more than a simple memcpy() e.g. update external references. If there + // are any remaining handles to the old item, it is the caller's + // responsibility to invalidate them. The move can only fail after this + // statement if the old item has been removed or replaced, in which case it + // should be fine for it to be left in an inconsistent state. + config_.moveCb(oldItem, *newItemHdl, nullptr); + } else { + std::memcpy(newItemHdl->getWritableMemory(), oldItem.getMemory(), + oldItem.getSize()); + } + + // Inside the MM container's lock, this checks if the old item exists to + // make sure that no other thread removed it, and only then replaces it. + if (!replaceInMMContainer(oldItemPtr, *newItemHdl)) { + accessContainer_->remove(*newItemHdl); + return {}; + } + + // Replacing into the MM container was successful, but someone could have + // called insertOrReplace() or remove() before or after the + // replaceInMMContainer() operation, which would invalidate newItemHdl. + if (!newItemHdl->isAccessible()) { + removeFromMMContainer(*newItemHdl); + return {}; + } + + // no one can add or remove chained items at this point + if (oldItem.hasChainedItem()) { + // safe to acquire handle for a moving Item + auto oldHandle = acquire(&oldItem); + XDCHECK_EQ(1u, oldHandle->getRefCount()) << oldHandle->toString(); + XDCHECK(!newItemHdl->hasChainedItem()) << newItemHdl->toString(); + try { + auto l = chainedItemLocks_.lockExclusive(oldItem.getKey()); + transferChainLocked(oldHandle, newItemHdl); + } catch (const std::exception& e) { + // this should never happen because we drained all the handles. + XLOGF(DFATAL, "{}", e.what()); + throw; + } + + XDCHECK(!oldItem.hasChainedItem()); + XDCHECK(newItemHdl->hasChainedItem()); + } + newItemHdl.unmarkNascent(); + resHdl = std::move(newItemHdl); // guard will assign it to ctx under lock + return acquire(&oldItem); +} + template bool CacheAllocator::moveRegularItem(Item& oldItem, ItemHandle& newItemHdl) { @@ -1162,8 +1447,8 @@ bool CacheAllocator::moveChainedItem(ChainedItem& oldItem, template typename CacheAllocator::Item* -CacheAllocator::findEviction(PoolId pid, ClassId cid) { - auto& mmContainer = getMMContainer(pid, cid); +CacheAllocator::findEviction(TierId tid, PoolId pid, ClassId cid) { + auto& mmContainer = getMMContainer(tid, pid, cid); // Keep searching for a candidate until we were able to evict it // or until the search limit has been exhausted @@ -1178,10 +1463,17 @@ CacheAllocator::findEviction(PoolId pid, ClassId cid) { // for chained items, the ownership of the parent can change. We try to // evict what we think as parent and see if the eviction of parent // recycles the child we intend to. - auto toReleaseHandle = - itr->isChainedItem() - ? advanceIteratorAndTryEvictChainedItem(itr) - : advanceIteratorAndTryEvictRegularItem(mmContainer, itr); + + ItemHandle toReleaseHandle = tryEvictToNextMemoryTier(tid, pid, itr); + bool movedToNextTier = false; + if(toReleaseHandle) { + movedToNextTier = true; + } else { + toReleaseHandle = + itr->isChainedItem() + ? advanceIteratorAndTryEvictChainedItem(tid, pid, itr) + : advanceIteratorAndTryEvictRegularItem(tid, pid, mmContainer, itr); + } if (toReleaseHandle) { if (toReleaseHandle->hasChainedItem()) { @@ -1212,7 +1504,7 @@ CacheAllocator::findEviction(PoolId pid, ClassId cid) { // recycle the candidate. if (ReleaseRes::kRecycled == releaseBackToAllocator(itemToRelease, RemoveContext::kEviction, - /* isNascent */ false, candidate)) { + /* isNascent */ movedToNextTier, candidate)) { return candidate; } } @@ -1274,13 +1566,47 @@ bool CacheAllocator::shouldWriteToNvmCacheExclusive( return true; } +template +template +typename CacheAllocator::ItemHandle +CacheAllocator::tryEvictToNextMemoryTier( + TierId tid, PoolId pid, ItemPtr& item) { + if(item->isChainedItem()) return {}; // TODO: We do not support ChainedItem yet + if(item->isExpired()) return acquire(item); + + TierId nextTier = tid; // TODO - calculate this based on some admission policy + while (++nextTier < numTiers_) { // try to evict down to the next memory tiers + // allocateInternal might trigger another eviction + auto newItemHdl = allocateInternalTier(nextTier, pid, + item->getKey(), + item->getSize(), + item->getCreationTime(), + item->getExpiryTime()); + + if (newItemHdl) { + XDCHECK_EQ(newItemHdl->getSize(), item->getSize()); + + return moveRegularItemOnEviction(item, newItemHdl); + } + } + + return {}; +} + +template +typename CacheAllocator::ItemHandle +CacheAllocator::tryEvictToNextMemoryTier(Item* item) { + auto tid = getTierId(*item); + auto pid = allocator_[tid]->getAllocInfo(item->getMemory()).poolId; + return tryEvictToNextMemoryTier(tid, pid, item); +} + template typename CacheAllocator::ItemHandle CacheAllocator::advanceIteratorAndTryEvictRegularItem( - MMContainer& mmContainer, EvictionIterator& itr) { - // we should flush this to nvmcache if it is not already present in nvmcache - // and the item is not expired. + TierId tid, PoolId pid, MMContainer& mmContainer, EvictionIterator& itr) { Item& item = *itr; + const bool evictToNvmCache = shouldWriteToNvmCache(item); auto token = evictToNvmCache ? nvmCache_->createPutToken(item.getKey()) @@ -1343,7 +1669,7 @@ CacheAllocator::advanceIteratorAndTryEvictRegularItem( template typename CacheAllocator::ItemHandle CacheAllocator::advanceIteratorAndTryEvictChainedItem( - EvictionIterator& itr) { + TierId tid, PoolId pid, EvictionIterator& itr) { XDCHECK(itr->isChainedItem()); ChainedItem* candidate = &itr->asChainedItem(); @@ -1394,6 +1720,8 @@ CacheAllocator::advanceIteratorAndTryEvictChainedItem( XDCHECK(!parent.isInMMContainer()); XDCHECK(!parent.isAccessible()); + // TODO: add multi-tier support (similar as for unchained items) + // We need to make sure the parent is not marked as moving // and we're the only holder of the parent item. Safe to destroy the handle // here since moving bit is set. @@ -1528,7 +1856,7 @@ CacheAllocator::remove(AccessIterator& it) { template typename CacheAllocator::RemoveRes -CacheAllocator::remove(const ItemHandle& it) { +CacheAllocator::remove(const ReadHandle& it) { stats_.numCacheRemoves.inc(); if (!it) { throw std::invalid_argument("Trying to remove a null item handle"); @@ -1544,21 +1872,34 @@ CacheAllocator::removeImpl(Item& item, DeleteTombStoneGuard tombstone, bool removeFromNvm, bool recordApiEvent) { - // Enqueue delete to nvmCache if we know from the item that it was pulled in - // from NVM. If the item was not pulled in from NVM, it is not possible to - // have it be written to NVM. - if (nvmCache_ && removeFromNvm && item.isNvmClean()) { - XDCHECK(tombstone); - nvmCache_->remove(item.getKey(), std::move(tombstone)); - } + bool success = false; + { + auto lock = nvmCache_ ? nvmCache_->getItemDestructorLock(item.getKey()) + : std::unique_lock(); + + success = accessContainer_->remove(item); - const bool success = accessContainer_->remove(item); + if (removeFromNvm && success && item.isNvmClean() && !item.isNvmEvicted()) { + // item is to be removed and the destructor will be executed + // upon memory released, mark it in nvm to avoid destructor + // executed from nvm + nvmCache_->markNvmItemRemovedLocked(item.getKey()); + } + } XDCHECK(!item.isAccessible()); // remove it from the mm container. this will be no-op if it is already // removed. removeFromMMContainer(item); + // Enqueue delete to nvmCache if we know from the item that it was pulled in + // from NVM. If the item was not pulled in from NVM, it is not possible to + // have it be written to NVM. + if (removeFromNvm && item.isNvmClean()) { + XDCHECK(tombstone); + nvmCache_->remove(item.getKey(), std::move(tombstone)); + } + auto eventTracker = getEventTracker(); if (recordApiEvent && eventTracker) { const auto result = @@ -1579,27 +1920,55 @@ CacheAllocator::removeImpl(Item& item, template void CacheAllocator::invalidateNvm(Item& item) { if (nvmCache_ != nullptr && item.isAccessible() && item.isNvmClean()) { - item.unmarkNvmClean(); + { + auto lock = nvmCache_->getItemDestructorLock(item.getKey()); + if (!item.isNvmEvicted() && item.isNvmClean() && item.isAccessible()) { + // item is being updated and invalidated in nvm. Mark the item to avoid + // destructor to be executed from nvm + nvmCache_->markNvmItemRemovedLocked(item.getKey()); + } + item.unmarkNvmClean(); + } nvmCache_->remove(item.getKey(), nvmCache_->createDeleteTombStone(item.getKey())); } } +template +TierId +CacheAllocator::getTierId(const Item& item) const { + return getTierId(item.getMemory()); +} + +template +TierId +CacheAllocator::getTierId(const void* ptr) const { + for (TierId tid = 0; tid < numTiers_; tid++) { + if (allocator_[tid]->isMemoryInAllocator(ptr)) + return tid; + } + + throw std::invalid_argument("Item does not belong to any tier!"); +} + template typename CacheAllocator::MMContainer& CacheAllocator::getMMContainer(const Item& item) const noexcept { + const auto tid = getTierId(item); const auto allocInfo = - allocator_->getAllocInfo(static_cast(&item)); - return getMMContainer(allocInfo.poolId, allocInfo.classId); + allocator_[tid]->getAllocInfo(static_cast(&item)); + return getMMContainer(tid, allocInfo.poolId, allocInfo.classId); } template typename CacheAllocator::MMContainer& -CacheAllocator::getMMContainer(PoolId pid, +CacheAllocator::getMMContainer(TierId tid, + PoolId pid, ClassId cid) const noexcept { - XDCHECK_LT(static_cast(pid), mmContainers_.size()); - XDCHECK_LT(static_cast(cid), mmContainers_[pid].size()); - return *mmContainers_[pid][cid]; + XDCHECK_LT(static_cast(tid), mmContainers_.size()); + XDCHECK_LT(static_cast(pid), mmContainers_[tid].size()); + XDCHECK_LT(static_cast(cid), mmContainers_[tid][pid].size()); + return *mmContainers_[tid][pid][cid]; } template @@ -1701,6 +2070,26 @@ CacheAllocator::find(typename Item::Key key, AccessMode mode) { return handle; } +template +typename CacheAllocator::ItemHandle +CacheAllocator::findToWrite(typename Item::Key key, + bool doNvmInvalidation) { + auto handle = find(key, AccessMode::kWrite); + if (handle == nullptr) { + return nullptr; + } + if (doNvmInvalidation) { + invalidateNvm(*handle); + } + return handle; +} + +template +typename CacheAllocator::ReadHandle +CacheAllocator::find(typename Item::Key key) { + return find(key, AccessMode::kRead); +} + template void CacheAllocator::markUseful(const ItemHandle& handle, AccessMode mode) { @@ -1709,9 +2098,11 @@ void CacheAllocator::markUseful(const ItemHandle& handle, } auto& item = *(handle.getInternal()); - recordAccessInMMContainer(item, mode); + bool recorded = recordAccessInMMContainer(item, mode); - if (LIKELY(!item.hasChainedItem())) { + // if parent is not recorded, skip children as well when the config is set + if (LIKELY(!item.hasChainedItem() || + (!recorded && config_.isSkipPromoteChildrenWhenParentFailed()))) { return; } @@ -1721,10 +2112,11 @@ void CacheAllocator::markUseful(const ItemHandle& handle, } template -void CacheAllocator::recordAccessInMMContainer(Item& item, +bool CacheAllocator::recordAccessInMMContainer(Item& item, AccessMode mode) { + const auto tid = getTierId(item); const auto allocInfo = - allocator_->getAllocInfo(static_cast(&item)); + allocator_[tid]->getAllocInfo(static_cast(&item)); (*stats_.cacheHits)[allocInfo.poolId][allocInfo.classId].inc(); // track recently accessed items if needed @@ -1732,14 +2124,15 @@ void CacheAllocator::recordAccessInMMContainer(Item& item, ring_->trackItem(reinterpret_cast(&item), item.getSize()); } - auto& mmContainer = getMMContainer(allocInfo.poolId, allocInfo.classId); - mmContainer.recordAccess(item, mode); + auto& mmContainer = getMMContainer(tid, allocInfo.poolId, allocInfo.classId); + return mmContainer.recordAccess(item, mode); } template uint32_t CacheAllocator::getUsableSize(const Item& item) const { + const auto tid = getTierId(item); const auto allocSize = - allocator_->getAllocInfo(static_cast(&item)).allocSize; + allocator_[tid]->getAllocInfo(static_cast(&item)).allocSize; return item.isChainedItem() ? allocSize - ChainedItem::getRequiredSize(0) : allocSize - Item::getRequiredSize(item.getKey(), 0); @@ -1748,8 +2141,11 @@ uint32_t CacheAllocator::getUsableSize(const Item& item) const { template typename CacheAllocator::ItemHandle CacheAllocator::getSampleItem() { + // TODO: is using random tier a good idea? + auto tid = folly::Random::rand32() % numTiers_; + const auto* item = - reinterpret_cast(allocator_->getRandomAlloc()); + reinterpret_cast(allocator_[tid]->getRandomAlloc()); if (!item) { return ItemHandle{}; } @@ -1764,38 +2160,47 @@ CacheAllocator::getSampleItem() { template std::vector CacheAllocator::dumpEvictionIterator( - PoolId pid, ClassId cid, size_t numItems) { + PoolId pid, ClassId cid, size_t numItems) { if (numItems == 0) { return {}; } - if (static_cast(pid) >= mmContainers_.size() || - static_cast(cid) >= mmContainers_[pid].size()) { + // Always evict from the lowest layer. + int tid = numTiers_ - 1; + + if (static_cast(tid) >= mmContainers_.size() || + static_cast(pid) >= mmContainers_[tid].size() || + static_cast(cid) >= mmContainers_[tid][pid].size()) { throw std::invalid_argument( - folly::sformat("Invalid PoolId: {} and ClassId: {}.", pid, cid)); + folly::sformat("Invalid TierId: {} and PoolId: {} and ClassId: {}.", tid, pid, cid)); } std::vector content; - auto& mm = *mmContainers_[pid][cid]; - auto evictItr = mm.getEvictionIterator(); size_t i = 0; - while (evictItr && i < numItems) { - content.push_back(evictItr->toString()); - ++evictItr; - ++i; + while (i < numItems && tid >= 0) { + auto& mm = *mmContainers_[tid][pid][cid]; + auto evictItr = mm.getEvictionIterator(); + while (evictItr && i < numItems) { + content.push_back(evictItr->toString()); + ++evictItr; + ++i; + } + + --tid; } return content; } template -folly::IOBuf CacheAllocator::convertToIOBuf(ItemHandle handle) { +template +folly::IOBuf CacheAllocator::convertToIOBufT(Handle& handle) { if (!handle) { throw std::invalid_argument("null item handle for converting to IOBUf"); } - Item* item = handle.get(); + Item* item = handle.getInternal(); const uint32_t dataOffset = item->getOffsetForMemory(); using ConvertChainedItem = std::function( @@ -1807,7 +2212,7 @@ folly::IOBuf CacheAllocator::convertToIOBuf(ItemHandle handle) { // determine to use a new ItemHandle for each chain items // or use shared ItemHandle for all chain items if (item->getRefCount() > config_.thresholdForConvertingToIOBuf) { - auto sharedHdl = std::make_shared(std::move(handle)); + auto sharedHdl = std::make_shared(std::move(handle)); iobuf = folly::IOBuf{ folly::IOBuf::TAKE_OWNERSHIP, item, @@ -1818,10 +2223,10 @@ folly::IOBuf CacheAllocator::convertToIOBuf(ItemHandle handle) { dataOffset + item->getSize(), [](void*, void* userData) { - auto* hdl = reinterpret_cast*>(userData); + auto* hdl = reinterpret_cast*>(userData); delete hdl; } /* freeFunc */, - new std::shared_ptr{sharedHdl} /* userData for freeFunc */}; + new std::shared_ptr{sharedHdl} /* userData for freeFunc */}; if (item->hasChainedItem()) { converter = [sharedHdl](Item*, ChainedItem& chainedItem) { @@ -1836,30 +2241,27 @@ folly::IOBuf CacheAllocator::convertToIOBuf(ItemHandle handle) { chainedItemDataOffset + chainedItem.getSize(), [](void*, void* userData) { - auto* hdl = - reinterpret_cast*>(userData); + auto* hdl = reinterpret_cast*>(userData); delete hdl; } /* freeFunc */, - new std::shared_ptr{ - sharedHdl} /* userData for freeFunc */); + new std::shared_ptr{sharedHdl} /* userData for freeFunc */); }; } } else { - iobuf = - folly::IOBuf{folly::IOBuf::TAKE_OWNERSHIP, item, + iobuf = folly::IOBuf{folly::IOBuf::TAKE_OWNERSHIP, item, - // Since we'll be moving the IOBuf data pointer forward - // by dataOffset, we need to adjust the IOBuf length - // accordingly - dataOffset + item->getSize(), + // Since we'll be moving the IOBuf data pointer forward + // by dataOffset, we need to adjust the IOBuf length + // accordingly + dataOffset + item->getSize(), - [](void* buf, void* userData) { - ItemHandle{reinterpret_cast(buf), + [](void* buf, void* userData) { + Handle{reinterpret_cast(buf), *reinterpret_cast(userData)} - .reset(); - } /* freeFunc */, - this /* userData for freeFunc */}; + .reset(); + } /* freeFunc */, + this /* userData for freeFunc */}; handle.release(); if (item->hasChainedItem()) { @@ -1889,7 +2291,7 @@ folly::IOBuf CacheAllocator::convertToIOBuf(ItemHandle handle) { auto* cache = reinterpret_cast(userData); auto* child = reinterpret_cast(buf); auto* parent = &child->getParentItem(cache->compressor_); - ItemHandle{parent, *cache}.reset(); + Handle{parent, *cache}.reset(); } /* freeFunc */, this /* userData for freeFunc */); }; @@ -1961,19 +2363,31 @@ PoolId CacheAllocator::addPool( std::shared_ptr resizeStrategy, bool ensureProvisionable) { folly::SharedMutex::WriteHolder w(poolsResizeAndRebalanceLock_); - auto pid = allocator_->addPool(name, size, allocSizes, ensureProvisionable); + + PoolId pid = 0; + auto tierConfigs = config_.getMemoryTierConfigs(); + for (TierId tid = 0; tid < numTiers_; tid++) { + auto tierSizeRatio = static_cast( + tierConfigs[tid].getSize()) / config_.getCacheSize(); + auto tierPoolSize = static_cast(tierSizeRatio * size); + auto res = allocator_[tid]->addPool(name, tierPoolSize, allocSizes, ensureProvisionable); + XDCHECK(tid == 0 || res == pid); + pid = res; + } + createMMContainers(pid, std::move(config)); setRebalanceStrategy(pid, std::move(rebalanceStrategy)); setResizeStrategy(pid, std::move(resizeStrategy)); + return pid; } template void CacheAllocator::overridePoolRebalanceStrategy( PoolId pid, std::shared_ptr rebalanceStrategy) { - if (static_cast(pid) >= mmContainers_.size()) { + if (static_cast(pid) >= mmContainers_[0].size()) { throw std::invalid_argument(folly::sformat( - "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size())); + "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[0].size())); } setRebalanceStrategy(pid, std::move(rebalanceStrategy)); } @@ -1981,9 +2395,9 @@ void CacheAllocator::overridePoolRebalanceStrategy( template void CacheAllocator::overridePoolResizeStrategy( PoolId pid, std::shared_ptr resizeStrategy) { - if (static_cast(pid) >= mmContainers_.size()) { + if (static_cast(pid) >= mmContainers_[0].size()) { throw std::invalid_argument(folly::sformat( - "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size())); + "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[0].size())); } setResizeStrategy(pid, std::move(resizeStrategy)); } @@ -1995,14 +2409,14 @@ void CacheAllocator::overridePoolOptimizeStrategy( } template -void CacheAllocator::overridePoolConfig(PoolId pid, +void CacheAllocator::overridePoolConfig(TierId tid, PoolId pid, const MMConfig& config) { - if (static_cast(pid) >= mmContainers_.size()) { + // TODO: add generic tier id checking + if (static_cast(pid) >= mmContainers_[tid].size()) { throw std::invalid_argument(folly::sformat( - "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size())); + "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[tid].size())); } - - auto& pool = allocator_->getPool(pid); + auto& pool = allocator_[tid]->getPool(pid); for (unsigned int cid = 0; cid < pool.getNumClassId(); ++cid) { MMConfig mmConfig = config; mmConfig.addExtraConfig( @@ -2010,30 +2424,35 @@ void CacheAllocator::overridePoolConfig(PoolId pid, ? pool.getAllocationClass(static_cast(cid)) .getAllocsPerSlab() : 0); - DCHECK_NOTNULL(mmContainers_[pid][cid].get()); - - mmContainers_[pid][cid]->setConfig(mmConfig); + DCHECK_NOTNULL(mmContainers_[tid][pid][cid].get()); + mmContainers_[tid][pid][cid]->setConfig(mmConfig); } } template void CacheAllocator::createMMContainers(const PoolId pid, MMConfig config) { - auto& pool = allocator_->getPool(pid); + // pools on each layer should have the same number of class id, etc. + // TODO: think about deduplication + auto& pool = allocator_[0]->getPool(pid); + for (unsigned int cid = 0; cid < pool.getNumClassId(); ++cid) { config.addExtraConfig( config_.trackTailHits ? pool.getAllocationClass(static_cast(cid)) .getAllocsPerSlab() : 0); - mmContainers_[pid][cid].reset(new MMContainer(config, compressor_)); + for (TierId tid = 0; tid < numTiers_; tid++) { + mmContainers_[tid][pid][cid].reset(new MMContainer(config, compressor_)); + } } } template PoolId CacheAllocator::getPoolId( folly::StringPiece name) const noexcept { - return allocator_->getPoolId(name.str()); + // each tier has the same pools + return allocator_[0]->getPoolId(name.str()); } // The Function returns a consolidated vector of Release Slab @@ -2076,7 +2495,9 @@ std::set CacheAllocator::filterCompactCachePools( template std::set CacheAllocator::getRegularPoolIds() const { folly::SharedMutex::ReadHolder r(poolsResizeAndRebalanceLock_); - return filterCompactCachePools(allocator_->getPoolIds()); + // TODO - get rid of the duplication - right now, each tier + // holds pool objects with mostly the same info + return filterCompactCachePools(allocator_[0]->getPoolIds()); } template @@ -2101,10 +2522,9 @@ std::set CacheAllocator::getRegularPoolIdsForResize() // getAdvisedMemorySize - then pools may be overLimit even when // all slabs are not allocated. Otherwise, pools may be overLimit // only after all slabs are allocated. - // - return (allocator_->allSlabsAllocated()) || - (allocator_->getAdvisedMemorySize() != 0) - ? filterCompactCachePools(allocator_->getPoolsOverLimit()) + return (allocator_[currentTier()]->allSlabsAllocated()) || + (allocator_[currentTier()]->getAdvisedMemorySize() != 0) + ? filterCompactCachePools(allocator_[currentTier()]->getPoolsOverLimit()) : std::set{}; } @@ -2115,7 +2535,7 @@ const std::string CacheAllocator::getCacheName() const { template PoolStats CacheAllocator::getPoolStats(PoolId poolId) const { - const auto& pool = allocator_->getPool(poolId); + const auto& pool = allocator_[currentTier()]->getPool(poolId); const auto& allocSizes = pool.getAllocSizes(); auto mpStats = pool.getStats(); const auto& classIds = mpStats.classIds; @@ -2133,7 +2553,7 @@ PoolStats CacheAllocator::getPoolStats(PoolId poolId) const { // TODO export evictions, numItems etc from compact cache directly. if (!isCompactCache) { for (const ClassId cid : classIds) { - const auto& container = getMMContainer(poolId, cid); + const auto& container = getMMContainer(currentTier(), poolId, cid); uint64_t classHits = (*stats_.cacheHits)[poolId][cid].get(); cacheStats.insert( {cid, @@ -2149,7 +2569,7 @@ PoolStats CacheAllocator::getPoolStats(PoolId poolId) const { PoolStats ret; ret.isCompactCache = isCompactCache; - ret.poolName = allocator_->getPoolName(poolId); + ret.poolName = allocator_[currentTier()]->getPoolName(poolId); ret.poolSize = pool.getPoolSize(); ret.poolUsableSize = pool.getPoolUsableSize(); ret.poolAdvisedSize = pool.getPoolAdvisedSize(); @@ -2165,25 +2585,23 @@ template PoolEvictionAgeStats CacheAllocator::getPoolEvictionAgeStats( PoolId pid, unsigned int slabProjectionLength) const { PoolEvictionAgeStats stats; - - const auto& pool = allocator_->getPool(pid); + const auto& pool = allocator_[currentTier()]->getPool(pid); const auto& allocSizes = pool.getAllocSizes(); for (ClassId cid = 0; cid < static_cast(allocSizes.size()); ++cid) { - auto& mmContainer = getMMContainer(pid, cid); + auto& mmContainer = getMMContainer(currentTier(), pid, cid); const auto numItemsPerSlab = - allocator_->getPool(pid).getAllocationClass(cid).getAllocsPerSlab(); + allocator_[currentTier()]->getPool(pid).getAllocationClass(cid).getAllocsPerSlab(); const auto projectionLength = numItemsPerSlab * slabProjectionLength; stats.classEvictionAgeStats[cid] = mmContainer.getEvictionAgeStat(projectionLength); } - return stats; } template CacheMetadata CacheAllocator::getCacheMetadata() const noexcept { return CacheMetadata{kCachelibVersion, kCacheRamFormatVersion, - kCacheNvmFormatVersion, config_.size}; + kCacheNvmFormatVersion, config_.getCacheSize()}; } template @@ -2215,7 +2633,7 @@ void CacheAllocator::releaseSlab(PoolId pid, } try { - auto releaseContext = allocator_->startSlabRelease( + auto releaseContext = allocator_[currentTier()]->startSlabRelease( pid, victim, receiver, mode, hint, [this]() -> bool { return shutDownInProgress_; }); @@ -2224,15 +2642,15 @@ void CacheAllocator::releaseSlab(PoolId pid, return; } - releaseSlabImpl(releaseContext); - if (!allocator_->allAllocsFreed(releaseContext)) { + releaseSlabImpl(currentTier(), releaseContext); + if (!allocator_[currentTier()]->allAllocsFreed(releaseContext)) { throw std::runtime_error( folly::sformat("Was not able to free all allocs. PoolId: {}, AC: {}", releaseContext.getPoolId(), releaseContext.getClassId())); } - allocator_->completeSlabRelease(releaseContext); + allocator_[currentTier()]->completeSlabRelease(releaseContext); } catch (const exception::SlabReleaseAborted& e) { stats_.numAbortedSlabReleases.inc(); throw exception::SlabReleaseAborted(folly::sformat( @@ -2243,8 +2661,7 @@ void CacheAllocator::releaseSlab(PoolId pid, } template -SlabReleaseStats CacheAllocator::getSlabReleaseStats() - const noexcept { +SlabReleaseStats CacheAllocator::getSlabReleaseStats() const noexcept { std::lock_guard l(workersMutex_); return SlabReleaseStats{stats_.numActiveSlabReleases.get(), stats_.numReleasedForRebalance.get(), @@ -2261,7 +2678,7 @@ SlabReleaseStats CacheAllocator::getSlabReleaseStats() } template -void CacheAllocator::releaseSlabImpl( +void CacheAllocator::releaseSlabImpl(TierId tid, const SlabReleaseContext& releaseContext) { util::Throttler throttler(config_.throttleConfig); @@ -2289,7 +2706,7 @@ void CacheAllocator::releaseSlabImpl( if (!isMoved) { evictForSlabRelease(releaseContext, item, throttler); } - XDCHECK(allocator_->isAllocFreed(releaseContext, alloc)); + XDCHECK(allocator_[tid]->isAllocFreed(releaseContext, alloc)); } } @@ -2369,8 +2786,11 @@ bool CacheAllocator::moveForSlabRelease( ctx.getPoolId(), ctx.getClassId()); }); } - const auto allocInfo = allocator_->getAllocInfo(oldItem.getMemory()); - allocator_->free(&oldItem); + + auto tid = getTierId(oldItem); + + const auto allocInfo = allocator_[tid]->getAllocInfo(oldItem.getMemory()); + allocator_[tid]->free(&oldItem); (*stats_.fragmentationSize)[allocInfo.poolId][allocInfo.classId].sub( util::getFragmentation(*this, oldItem)); @@ -2432,11 +2852,12 @@ CacheAllocator::allocateNewItemForOldItem(const Item& oldItem) { } const auto allocInfo = - allocator_->getAllocInfo(static_cast(&oldItem)); + allocator_[getTierId(oldItem)]->getAllocInfo(static_cast(&oldItem)); // Set up the destination for the move. Since oldItem would have the moving // bit set, it won't be picked for eviction. - auto newItemHdl = allocateInternal(allocInfo.poolId, + auto newItemHdl = allocateInternalTier(getTierId(oldItem), + allocInfo.poolId, oldItem.getKey(), oldItem.getSize(), oldItem.getCreationTime(), @@ -2521,7 +2942,7 @@ void CacheAllocator::evictForSlabRelease( // last handle for the owner. if (owningHandle) { const auto allocInfo = - allocator_->getAllocInfo(static_cast(&item)); + allocator_[getTierId(item)]->getAllocInfo(static_cast(&item)); if (owningHandle->hasChainedItem()) { (*stats_.chainedItemEvictions)[allocInfo.poolId][allocInfo.classId] .inc(); @@ -2548,7 +2969,7 @@ void CacheAllocator::evictForSlabRelease( if (shutDownInProgress_) { item.unmarkMoving(); - allocator_->abortSlabRelease(ctx); + allocator_[getTierId(item)]->abortSlabRelease(ctx); throw exception::SlabReleaseAborted( folly::sformat("Slab Release aborted while trying to evict" " Item: {} Pool: {}, Class: {}.", @@ -2579,6 +3000,9 @@ CacheAllocator::evictNormalItemForSlabRelease(Item& item) { return ItemHandle{}; } + auto evictHandle = tryEvictToNextMemoryTier(&item); + if(evictHandle) return evictHandle; + auto predicate = [](const Item& it) { return it.getRefCount() == 0; }; const bool evictToNvmCache = shouldWriteToNvmCache(item); @@ -2730,6 +3154,7 @@ bool CacheAllocator::removeIfExpired(const ItemHandle& handle) { template bool CacheAllocator::markMovingForSlabRelease( const SlabReleaseContext& ctx, void* alloc, util::Throttler& throttler) { + // MemoryAllocator::processAllocForRelease will execute the callback // if the item is not already free. So there are three outcomes here: // 1. Item not freed yet and marked as moving @@ -2743,18 +3168,20 @@ bool CacheAllocator::markMovingForSlabRelease( // At first, we assume this item was already freed bool itemFreed = true; bool markedMoving = false; - const auto fn = [&markedMoving, &itemFreed](void* memory) { + TierId tid = 0; + const auto fn = [&markedMoving, &itemFreed, &tid, this /* TODO - necessary for getTierId */](void* memory) { // Since this callback is executed, the item is not yet freed itemFreed = false; Item* item = static_cast(memory); if (item->markMoving()) { markedMoving = true; } + tid = getTierId(*item); }; auto startTime = util::getCurrentTimeSec(); while (true) { - allocator_->processAllocForRelease(ctx, alloc, fn); + allocator_[tid]->processAllocForRelease(ctx, alloc, fn); // If item is already freed we give up trying to mark the item moving // and return false, otherwise if marked as moving, we return true. @@ -2770,7 +3197,7 @@ bool CacheAllocator::markMovingForSlabRelease( if (shutDownInProgress_) { XDCHECK(!static_cast(alloc)->isMoving()); - allocator_->abortSlabRelease(ctx); + allocator_[tid]->abortSlabRelease(ctx); throw exception::SlabReleaseAborted( folly::sformat("Slab Release aborted while still trying to mark" " as moving for Item: {}. Pool: {}, Class: {}.", @@ -2793,12 +3220,15 @@ template CCacheT* CacheAllocator::addCompactCache(folly::StringPiece name, size_t size, Args&&... args) { + if (numTiers_ != 1) + throw std::runtime_error("TODO: compact cache for multi-tier Cache not supported."); + if (!config_.isCompactCacheEnabled()) { throw std::logic_error("Compact cache is not enabled"); } folly::SharedMutex::WriteHolder lock(compactCachePoolsLock_); - auto poolId = allocator_->addPool(name, size, {Slab::kSize}); + auto poolId = allocator_[0]->addPool(name, size, {Slab::kSize}); isCompactCachePool_[poolId] = true; auto ptr = std::make_unique( @@ -2870,7 +3300,6 @@ typename CacheTrait::MMType::LruType CacheAllocator::getItemLruType( // --------------------------------- // | accessContainer_ | // | mmContainers_ | -// | emptyMMContainers | // | compactCacheManager_ | // | allocator_ | // | metadata_ | @@ -2908,12 +3337,15 @@ folly::IOBufQueue CacheAllocator::saveStateToIOBuf() { *metadata_.numChainedChildItems_ref() = stats_.numChainedChildItems.get(); *metadata_.numAbortedSlabReleases_ref() = stats_.numAbortedSlabReleases.get(); + // TODO: implement serialization for multiple tiers auto serializeMMContainers = [](MMContainers& mmContainers) { MMSerializationTypeContainer state; - for (unsigned int i = 0; i < mmContainers.size(); ++i) { + for (unsigned int i = 0; i < 1 /* TODO: */ ; ++i) { for (unsigned int j = 0; j < mmContainers[i].size(); ++j) { - if (mmContainers[i][j]) { - state.pools_ref()[i][j] = mmContainers[i][j]->saveState(); + for (unsigned int k = 0; k < mmContainers[i][j].size(); ++k) { + if (mmContainers[i][j][k]) { + state.pools_ref()[j][k] = mmContainers[i][j][k]->saveState(); + } } } } @@ -2922,15 +3354,9 @@ folly::IOBufQueue CacheAllocator::saveStateToIOBuf() { MMSerializationTypeContainer mmContainersState = serializeMMContainers(mmContainers_); - // On version 15, persist the empty unevictable mmcontainer. - // So that version <= 14 can still load a metadata saved by version 15. - // TODO: Remove this on version 16. - MMContainers dummyMMContainers = createEmptyMMContainers(); - MMSerializationTypeContainer unevictableMMContainersState = - serializeMMContainers(dummyMMContainers); - AccessSerializationType accessContainerState = accessContainer_->saveState(); - MemoryAllocator::SerializationType allocatorState = allocator_->saveState(); + // TODO: foreach allocator + MemoryAllocator::SerializationType allocatorState = allocator_[0]->saveState(); CCacheManager::SerializationType ccState = compactCacheManager_->saveState(); AccessSerializationType chainedItemAccessContainerState = @@ -2943,7 +3369,6 @@ folly::IOBufQueue CacheAllocator::saveStateToIOBuf() { Serializer::serializeToIOBufQueue(queue, allocatorState); Serializer::serializeToIOBufQueue(queue, ccState); Serializer::serializeToIOBufQueue(queue, mmContainersState); - Serializer::serializeToIOBufQueue(queue, unevictableMMContainersState); Serializer::serializeToIOBufQueue(queue, accessContainerState); Serializer::serializeToIOBufQueue(queue, chainedItemAccessContainerState); return queue; @@ -2993,6 +3418,8 @@ CacheAllocator::shutDown() { (shmShutDownStatus == ShmShutDownRes::kSuccess); shmManager_.reset(); + // TODO: save per-tier state + if (shmShutDownSucceeded) { if (!nvmShutDownStatusOpt || *nvmShutDownStatusOpt) return ShutDownStatus::kSuccess; @@ -3030,7 +3457,7 @@ std::optional CacheAllocator::saveNvmCache() { return false; } - nvmCacheState_.markSafeShutDown(); + nvmCacheState_.value().markSafeShutDown(); return true; } @@ -3041,8 +3468,11 @@ void CacheAllocator::saveRamCache() { std::unique_ptr ioBuf = serializedBuf.move(); ioBuf->coalesce(); - void* infoAddr = - shmManager_->createShm(detail::kShmInfoName, ioBuf->length()).addr; + ShmSegmentOpts opts; + opts.typeOpts = PosixSysVSegmentOpts(config_.isUsingPosixShm()); + + void* infoAddr = shmManager_->createShm(detail::kShmInfoName, ioBuf->length(), + nullptr, opts).addr; Serializer serializer(reinterpret_cast(infoAddr), reinterpret_cast(infoAddr) + ioBuf->length()); serializer.writeToBuffer(std::move(ioBuf)); @@ -3056,7 +3486,9 @@ CacheAllocator::deserializeMMContainers( const auto container = deserializer.deserialize(); - MMContainers mmContainers; + /* TODO: right now, we create empty containers becouse deserialization + * only works for a single (topmost) tier. */ + MMContainers mmContainers = createEmptyMMContainers(); for (auto& kvPool : *container.pools_ref()) { auto i = static_cast(kvPool.first); @@ -3071,11 +3503,11 @@ CacheAllocator::deserializeMMContainers( ? pool.getAllocationClass(j).getAllocsPerSlab() : 0); ptr->setConfig(config); - mmContainers[i][j] = std::move(ptr); + mmContainers[0 /* TODO */][i][j] = std::move(ptr); } } // We need to drop the unevictableMMContainer in the desierializer. - // TODO: remove this when all use case are later than version 15. + // TODO: remove this at version 17. if (metadata_.allocatorVersion_ref() <= 15) { deserializer.deserialize(); } @@ -3085,14 +3517,16 @@ CacheAllocator::deserializeMMContainers( template typename CacheAllocator::MMContainers CacheAllocator::createEmptyMMContainers() { - MMContainers mmContainers; + MMContainers mmContainers(numTiers_); for (unsigned int i = 0; i < mmContainers_.size(); i++) { for (unsigned int j = 0; j < mmContainers_[i].size(); j++) { - if (mmContainers_[i][j]) { - MMContainerPtr ptr = - std::make_unique( - mmContainers_[i][j]->getConfig(), compressor_); - mmContainers[i][j] = std::move(ptr); + for (unsigned int k = 0; k < mmContainers_[i][j].size(); k++) { + if (mmContainers_[i][j][k]) { + MMContainerPtr ptr = + std::make_unique( + mmContainers_[i][j][k]->getConfig(), compressor_); + mmContainers[i][j][k] = std::move(ptr); + } } } } @@ -3193,8 +3627,9 @@ CacheAllocator::findChainedItem(const Item& parent) const { } template -typename CacheAllocator::ChainedAllocs -CacheAllocator::viewAsChainedAllocs(const ItemHandle& parent) { +template +CacheChainedAllocs, Handle, Iter> +CacheAllocator::viewAsChainedAllocsT(const Handle& parent) { XDCHECK(parent); auto handle = parent.clone(); if (!handle) { @@ -3210,7 +3645,8 @@ CacheAllocator::viewAsChainedAllocs(const ItemHandle& parent) { auto l = chainedItemLocks_.lockShared(handle->getKey()); auto head = findChainedItem(*handle); - return ChainedAllocs{std::move(l), std::move(handle), *head, compressor_}; + return CacheChainedAllocs, Handle, Iter>{ + std::move(l), std::move(handle), *head, compressor_}; } template @@ -3222,8 +3658,8 @@ GlobalCacheStats CacheAllocator::getGlobalCacheStats() const { const uint64_t currTime = util::getCurrentTimeSec(); ret.ramUpTime = currTime - cacheCreationTime_; - ret.nvmUpTime = currTime - nvmCacheState_.getCreationTime(); ret.nvmCacheEnabled = nvmCache_ ? nvmCache_->isEnabled() : false; + ret.nvmUpTime = currTime - getNVMCacheCreationTime(); ret.reaperStats = getReaperStats(); ret.numActiveHandles = getNumActiveHandles(); @@ -3232,10 +3668,13 @@ GlobalCacheStats CacheAllocator::getGlobalCacheStats() const { template CacheMemoryStats CacheAllocator::getCacheMemoryStats() const { - const auto totalCacheSize = allocator_->getMemorySize(); + size_t totalCacheSize = 0; + for(auto& allocator: allocator_) { + totalCacheSize += allocator->getMemorySize(); + } auto addSize = [this](size_t a, PoolId pid) { - return a + allocator_->getPool(pid).getPoolSize(); + return a + allocator_[currentTier()]->getPool(pid).getPoolSize(); }; const auto regularPoolIds = getRegularPoolIds(); const auto ccCachePoolIds = getCCachePoolIds(); @@ -3247,12 +3686,12 @@ CacheMemoryStats CacheAllocator::getCacheMemoryStats() const { return CacheMemoryStats{totalCacheSize, regularCacheSize, compactCacheSize, - allocator_->getAdvisedMemorySize(), + allocator_[currentTier()]->getAdvisedMemorySize(), memMonitor_ ? memMonitor_->getMaxAdvisePct() : 0, - allocator_->getUnreservedMemorySize(), + allocator_[currentTier()]->getUnreservedMemorySize(), nvmCache_ ? nvmCache_->getSize() : 0, - memMonitor_ ? memMonitor_->getMemAvailableSize() : 0, - memMonitor_ ? memMonitor_->getMemRssSize() : 0}; + util::getMemAvailable(), + util::getRSSBytes()}; } template @@ -3386,12 +3825,14 @@ bool CacheAllocator::stopReaper(std::chrono::seconds timeout) { template bool CacheAllocator::cleanupStrayShmSegments( - const std::string& cacheDir, bool posix) { + const std::string& cacheDir, bool posix /*TODO(SHM_FILE): const std::vector& config */) { if (util::getStatIfExists(cacheDir, nullptr) && util::isDir(cacheDir)) { try { // cache dir exists. clean up only if there are no other processes // attached. if another process was attached, the following would fail. ShmManager::cleanup(cacheDir, posix); + + // TODO: cleanup per-tier state } catch (const std::exception& e) { XLOGF(ERR, "Error cleaning up {}. Exception: ", cacheDir, e.what()); return false; @@ -3401,18 +3842,31 @@ bool CacheAllocator::cleanupStrayShmSegments( // Any other concurrent process can not be attached to the segments or // even if it does, we want to mark it for destruction. ShmManager::removeByName(cacheDir, detail::kShmInfoName, posix); - ShmManager::removeByName(cacheDir, detail::kShmCacheName, posix); + ShmManager::removeByName(cacheDir, detail::kShmCacheName + + std::to_string(0), posix); ShmManager::removeByName(cacheDir, detail::kShmHashTableName, posix); ShmManager::removeByName(cacheDir, detail::kShmChainedItemHashTableName, posix); + + // TODO(SHM_FILE): try to nuke segments of differente types (which require + // extra info) + // for (auto &tier : config) { + // ShmManager::removeByName(cacheDir, tierShmName, config_.memoryTiers[i].opts); + // } } return true; } template -uintptr_t CacheAllocator::getItemPtrAsOffset(const void* ptr) { +uint64_t CacheAllocator::getItemPtrAsOffset(const void* ptr) { + // Return unt64_t instead of uintptr_t to accommodate platforms where + // the two differ (e.g. Mac OS 12) - causing templating instantiation + // errors downstream. + + auto tid = getTierId(ptr); + // if this succeeeds, the address is valid within the cache. - allocator_->getAllocInfo(ptr); + allocator_[tid]->getAllocInfo(ptr); if (!isOnShm_ || !shmManager_) { throw std::invalid_argument("Shared memory not used"); @@ -3420,8 +3874,8 @@ uintptr_t CacheAllocator::getItemPtrAsOffset(const void* ptr) { const auto& shm = shmManager_->getShmByName(detail::kShmCacheName); - return reinterpret_cast(ptr) - - reinterpret_cast(shm.getCurrentMapping().addr); + return reinterpret_cast(ptr) - + reinterpret_cast(shm.getCurrentMapping().addr); } template diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h index a065ff208f..319e66a626 100644 --- a/cachelib/allocator/CacheAllocator.h +++ b/cachelib/allocator/CacheAllocator.h @@ -21,9 +21,12 @@ #include #include #include +#include +#include #include #include +#include #include #include #include @@ -151,10 +154,28 @@ class CacheAllocator : public CacheBase { using Item = CacheItem; using ChainedItem = typename Item::ChainedItem; + // the holder for the item when we hand it to the caller. This ensures + // that the reference count is maintained when the caller is done with the + // item. The ItemHandle provides a getMemory() and getKey() interface. The + // caller is free to use the result of these two as long as the handle is + // active/alive. Using the result of the above interfaces after destroying + // the ItemHandle is UB. The ItemHandle safely wraps a pointer to the Item. + using ReadHandle = typename Item::ReadHandle; + using WriteHandle = typename Item::WriteHandle; + using ItemHandle = WriteHandle; + template > + using TypedHandle = TypedHandleImpl; + // TODO (sathya) some types take CacheT and some take CacheTrait. need to // clean this up and come up with a consistent policy that is intuitive. - using ChainedAllocs = CacheChainedAllocs; - using ChainedItemIter = CacheChainedItemIterator; + using ChainedItemIter = CacheChainedItemIterator; + using WritableChainedItemIter = CacheChainedItemIterator; + using ChainedAllocs = CacheChainedAllocs; + using WritableChainedAllocs = + CacheChainedAllocs; + using Key = typename Item::Key; using PoolIds = std::set; @@ -171,6 +192,49 @@ class CacheAllocator : public CacheBase { // Iterator range pointing to chained allocs associated with @item folly::Range chainedAllocs; }; + struct DestructorData { + DestructorData(DestructorContext ctx, + Item& it, + folly::Range iter, + PoolId id) + : context(ctx), item(it), chainedAllocs(iter), pool(id) {} + + // helps to convert RemoveContext to DestructorContext, + // the context for RemoveCB is re-used to create DestructorData, + // this can be removed if RemoveCB is dropped. + DestructorData(RemoveContext ctx, + Item& it, + folly::Range iter, + PoolId id) + : item(it), chainedAllocs(iter), pool(id) { + if (ctx == RemoveContext::kEviction) { + context = DestructorContext::kEvictedFromRAM; + } else { + context = DestructorContext::kRemovedFromRAM; + } + } + + // remove or eviction + DestructorContext context; + + // item about to be freed back to allocator + // when the item is evicted/removed from NVM, the item is created on the + // heap, functions (e.g. CacheAllocator::getAllocInfo) that assumes item is + // located in cache slab doesn't work in such case. + // chained items must be iterated though @chainedAllocs. + // Other APIs used to access chained items are not compatible and should not + // be used. + Item& item; + + // Iterator range pointing to chained allocs associated with @item + // when chained items are evicted/removed from NVM, items are created on the + // heap, functions (e.g. CacheAllocator::getAllocInfo) that assumes items + // are located in cache slab doesn't work in such case. + folly::Range chainedAllocs; + + // the pool that this item is/was + PoolId pool; + }; // call back to execute when moving an item, this could be a simple memcpy // or something more complex. @@ -180,20 +244,14 @@ class CacheAllocator : public CacheBase { std::function; // call back type that is executed when the cache item is removed - // (evicted / freed) + // (evicted / freed) from RAM, only items inserted into cache (not nascent) + // successfully are tracked using RemoveCb = std::function; - // the holder for the item when we hand it to the caller. This ensures - // that the reference count is maintained when the caller is done with the - // item. The ItemHandle provides a getMemory() and getKey() interface. The - // caller is free to use the result of these two as long as the handle is - // active/alive. Using the result of the above interfaces after destroying - // the ItemHandle is UB. The ItemHandle safely wraps a pointer to the Item. - using ItemHandle = typename Item::Handle; - template > - using TypedHandle = TypedHandleImpl; + // the destructor being executed when the item is removed from cache (both RAM + // and NVM), only items inserted into cache (not nascent) successfully are + // tracked. + using ItemDestructor = std::function; using NvmCacheT = NvmCache; using NvmCacheConfig = typename NvmCacheT::Config; @@ -291,7 +349,7 @@ class CacheAllocator : public CacheBase { // @return handle to the chained allocation // @throw std::invalid_argument if the size requested is invalid or // if the item is invalid - ItemHandle allocateChainedItem(const ItemHandle& parent, uint32_t size); + WriteHandle allocateChainedItem(const ReadHandle& parent, uint32_t size); // Link a chained item to a parent item and mark this parent handle as having // chained allocations. @@ -391,15 +449,34 @@ class CacheAllocator : public CacheBase { // @return handle to the old item that had been replaced ItemHandle insertOrReplace(const ItemHandle& handle); + // look up an item by its key across the nvm cache as well if enabled. + // + // @param key the key for lookup + // + // @return the read handle for the item or a handle to nullptr if the + // key does not exist. + ReadHandle find(Key key); + // look up an item by its key across the nvm cache as well if enabled. // // @param key the key for lookup - // @param mode the mode of access for the lookup. defaults to - // AccessMode::kRead + // @param mode the mode of access for the lookup. + // AccessMode::kRead or AccessMode::kWrite // // @return the handle for the item or a handle to nullptr if the key does // not exist. - ItemHandle find(Key key, AccessMode mode = AccessMode::kRead); + ItemHandle find(Key key, AccessMode mode); + + // look up an item by its key across the nvm cache as well if enabled. Users + // should call this API only when they are going to mutate the item data. + // + // @param key the key for lookup + // @param isNvmInvalidate whether to do nvm invalidation; + // defaults to be true + // + // @return the handle for the item or a handle to nullptr if the + // key does not exist. + ItemHandle findToWrite(Key key, bool doNvmInvalidation = true); // look up an item by its key. This ignores the nvm cache and only does RAM // lookup. @@ -478,28 +555,47 @@ class CacheAllocator : public CacheBase { // removes the allocation corresponding to the handle. The allocation will // be freed when all the existing handles are released. // - // @param it item handle + // @param it item read handle // // @return kSuccess if the item exists and was successfully removed. // kNotFoundInRam otherwise // // @throw std::invalid_argument if item handle is null - RemoveRes remove(const ItemHandle& it); + RemoveRes remove(const ReadHandle& it); + + // view a read-only parent item as a chain of allocations if it has chained + // alloc. The returned chained-alloc is good to iterate upon, but will block + // any concurrent addChainedItem or popChainedItem for the same key until the + // ChainedAllocs object is released. This is ideal for use cases which do + // very brief operations on the chain of allocations. + // + // The ordering of the iteration for the chain is LIFO. Check + // CacheChainedAllocs.h for the API and usage. + // + // @param parent the parent allocation of the chain from a ReadHandle. + // @return read-only chained alloc view of the parent + // + // @throw std::invalid_argument if the parent does not have chained allocs + ChainedAllocs viewAsChainedAllocs(const ReadHandle& parent) { + return viewAsChainedAllocsT(parent); + } - // view a parent item as a chain of allocations if it has chained alloc. - // The returned chained-alloc is good to iterate upon, but will block any - // concurrent addChainedItem or popChainedItem for the same key until the + // view a writable parent item as a chain of allocations if it has chained + // alloc. The returned chained-alloc is good to iterate upon, but will block + // any concurrent addChainedItem or popChainedItem for the same key until the // ChainedAllocs object is released. This is ideal for use cases which do // very brief operations on the chain of allocations. // // The ordering of the iteration for the chain is LIFO. Check // CacheChainedAllocs.h for the API and usage. // - // @param parent the parent allocation of the chain. - // @return chained alloc view of the paren + // @param parent the parent allocation of the chain from a WriteHandle. + // @return writable chained alloc view of the parent // // @throw std::invalid_argument if the parent does not have chained allocs - ChainedAllocs viewAsChainedAllocs(const ItemHandle& parent); + WritableChainedAllocs viewAsWritableChainedAllocs(const WriteHandle& parent) { + return viewAsChainedAllocsT(parent); + } // Returns the full usable size for this item // This can be bigger than item.getSize() @@ -518,24 +614,44 @@ class CacheAllocator : public CacheBase { // to an valid item ItemHandle getSampleItem(); - // TODO: When Read/Write Handles are ready, change this to allow - // const-only access to data manged by iobuf and offer a - // convertToWritableIOBuf() API. - // - // Convert an item handle to an IOBuf. The returned IOBuf gives a + // Convert a Read Handle to an IOBuf. The returned IOBuf gives a // read-only view to the user. The item's ownership is retained by // the IOBuf until its destruction. // - // When the item handle has one or more chained items attached to it, + // When the read handle has one or more chained items attached to it, // user will also get a series of IOBufs (first of which is the Parent). // - // @param handle item handle that will transfer its ownership to an IOBuf + // **WARNING**: folly::IOBuf allows mutation to a cachelib item even when the + // item is read-only. User is responsible to ensure no mutation occurs (i.e. + // only const functions are called). If mutation is required, please use + // `convertToIOBufForWrite`. + // + // @param handle read handle that will transfer its ownership to an IOBuf // // @return an IOBuf that contains the value of the item. - // This IOBuf acts as an Item Handle, on destruction, it will + // This IOBuf acts as a Read Handle, on destruction, it will // properly decrement the refcount (to release the item). - // @throw std::invalid_argument if ItemHandle is nullptr - folly::IOBuf convertToIOBuf(ItemHandle handle); + // @throw std::invalid_argument if ReadHandle is nullptr + folly::IOBuf convertToIOBuf(ReadHandle handle) { + return convertToIOBufT(handle); + } + + // Convert a Write Handle to an IOBuf. The returned IOBuf gives a + // writable view to the user. The item's ownership is retained by + // the IOBuf until its destruction. + // + // When the write handle has one or more chained items attached to it, + // user will also get a series of IOBufs (first of which is the Parent). + // + // @param handle write handle that will transfer its ownership to an IOBuf + // + // @return an IOBuf that contains the value of the item. + // This IOBuf acts as a Write Handle, on destruction, it will + // properly decrement the refcount (to release the item). + // @throw std::invalid_argument if WriteHandle is nullptr + folly::IOBuf convertToIOBufForWrite(WriteHandle handle) { + return convertToIOBufT(handle); + } // TODO: When Read/Write Handles are ready, change this to allow // const-only access to data manged by iobuf and offer a @@ -585,7 +701,7 @@ class CacheAllocator : public CacheBase { // @param config new config for the pool // // @throw std::invalid_argument if the poolId is invalid - void overridePoolConfig(PoolId pid, const MMConfig& config); + void overridePoolConfig(TierId tid, PoolId pid, const MMConfig& config); // update an existing pool's rebalance strategy // @@ -626,8 +742,9 @@ class CacheAllocator : public CacheBase { // @return true if the operation succeeded. false if the size of the pool is // smaller than _bytes_ // @throw std::invalid_argument if the poolId is invalid. + // TODO: should call shrinkPool for specific tier? bool shrinkPool(PoolId pid, size_t bytes) { - return allocator_->shrinkPool(pid, bytes); + return allocator_[currentTier()]->shrinkPool(pid, bytes); } // grow an existing pool by _bytes_. This will fail if there is no @@ -636,8 +753,9 @@ class CacheAllocator : public CacheBase { // @return true if the pool was grown. false if the necessary number of // bytes were not available. // @throw std::invalid_argument if the poolId is invalid. + // TODO: should call growPool for specific tier? bool growPool(PoolId pid, size_t bytes) { - return allocator_->growPool(pid, bytes); + return allocator_[currentTier()]->growPool(pid, bytes); } // move bytes from one pool to another. The source pool should be at least @@ -650,7 +768,7 @@ class CacheAllocator : public CacheBase { // correct size to do the transfer. // @throw std::invalid_argument if src or dest is invalid pool bool resizePools(PoolId src, PoolId dest, size_t bytes) override { - return allocator_->resizePools(src, dest, bytes); + return allocator_[currentTier()]->resizePools(src, dest, bytes); } // Add a new compact cache with given name and size @@ -850,12 +968,13 @@ class CacheAllocator : public CacheBase { // @throw std::invalid_argument if the memory does not belong to this // cache allocator AllocInfo getAllocInfo(const void* memory) const { - return allocator_->getAllocInfo(memory); + return allocator_[getTierId(memory)]->getAllocInfo(memory); } // return the ids for the set of existing pools in this cache. std::set getPoolIds() const override final { - return allocator_->getPoolIds(); + // all tiers have the same pool ids. TODO: deduplicate + return allocator_[0]->getPoolIds(); } // return a list of pool ids that are backing compact caches. This includes @@ -867,18 +986,18 @@ class CacheAllocator : public CacheBase { // return the pool with speicified id. const MemoryPool& getPool(PoolId pid) const override final { - return allocator_->getPool(pid); + return allocator_[currentTier()]->getPool(pid); } // calculate the number of slabs to be advised/reclaimed in each pool PoolAdviseReclaimData calcNumSlabsToAdviseReclaim() override final { auto regularPoolIds = getRegularPoolIds(); - return allocator_->calcNumSlabsToAdviseReclaim(regularPoolIds); + return allocator_[currentTier()]->calcNumSlabsToAdviseReclaim(regularPoolIds); } // update number of slabs to advise in the cache void updateNumSlabsToAdvise(int32_t numSlabsToAdvise) override final { - allocator_->updateNumSlabsToAdvise(numSlabsToAdvise); + allocator_[currentTier()]->updateNumSlabsToAdvise(numSlabsToAdvise); } // returns a valid PoolId corresponding to the name or kInvalidPoolId if the @@ -887,7 +1006,8 @@ class CacheAllocator : public CacheBase { // returns the pool's name by its poolId. std::string getPoolName(PoolId poolId) const { - return allocator_->getPoolName(poolId); + // all tiers have the same pool names. + return allocator_[0]->getPoolName(poolId); } // get stats related to all kinds of slab release events. @@ -928,7 +1048,7 @@ class CacheAllocator : public CacheBase { // pool stats by pool id PoolStats getPoolStats(PoolId pid) const override final; - // This can be expensive so it is not part of PoolStats + // This can be expensive so it is not part of PoolStats. PoolEvictionAgeStats getPoolEvictionAgeStats( PoolId pid, unsigned int slabProjectionLength) const override final; @@ -938,7 +1058,7 @@ class CacheAllocator : public CacheBase { // return the overall cache stats GlobalCacheStats getGlobalCacheStats() const override final; - // return cache's memory usage stats + // return cache's memory usage stats. CacheMemoryStats getCacheMemoryStats() const override final; // return the nvm cache stats map @@ -968,8 +1088,17 @@ class CacheAllocator : public CacheBase { // // @return time when the cache was created. time_t getCacheCreationTime() const noexcept { return cacheCreationTime_; } + + // unix timestamp when the NVM cache was created. If NVM cahce isn't enaled, + // the cache creation time is returned instead. + // + // @return time when the NVM cache was created. time_t getNVMCacheCreationTime() const { - return nvmCacheState_.getCreationTime(); + auto result = getCacheCreationTime(); + if (nvmCacheState_.has_value()) { + result = nvmCacheState_.value().getCreationTime(); + } + return result; } // Inspects the cache without changing its state. @@ -1026,6 +1155,10 @@ class CacheAllocator : public CacheBase { } } + // Mark the item as dirty and enqueue for deletion from nvmcache + // @param item item to invalidate. + void invalidateNvm(Item& item); + // Attempts to clean up left-over shared memory from preivous instance of // cachelib cache for the cache directory. If there are other processes // using the same directory, we don't touch it. If the directory is not @@ -1035,7 +1168,8 @@ class CacheAllocator : public CacheBase { // returns true if there was no error in trying to cleanup the segment // because another process was attached. False if the user tried to clean up // and the cache was actually attached. - static bool cleanupStrayShmSegments(const std::string& cacheDir, bool posix); + static bool cleanupStrayShmSegments(const std::string& cacheDir, bool posix + /*TODO: const std::vector& config = {} */); // gives a relative offset to a pointer within the cache. uint64_t getItemPtrAsOffset(const void* ptr); @@ -1047,7 +1181,8 @@ class CacheAllocator : public CacheBase { sizeof(typename RefcountWithFlags::Value) + sizeof(uint32_t) + sizeof(uint32_t) + sizeof(KAllocation)) == sizeof(Item), "vtable overhead"); - static_assert(32 == sizeof(Item), "item overhead is 32 bytes"); + // XXX: this will fail due to CompressedPtr change + // static_assert(32 == sizeof(Item), "item overhead is 32 bytes"); // make sure there is no overhead in ChainedItem on top of a regular Item static_assert(sizeof(Item) == sizeof(ChainedItem), @@ -1126,18 +1261,21 @@ class CacheAllocator : public CacheBase { // acquires the wait context for the handle. This is used by NvmCache to // maintain a list of waiters - std::shared_ptr> getWaitContext( + std::shared_ptr> getWaitContext( ItemHandle& hdl) const { return hdl.getItemWaitContext(); } using MMContainerPtr = std::unique_ptr; using MMContainers = - std::array, - MemoryPoolManager::kMaxPools>; + std::vector, + MemoryPoolManager::kMaxPools>>; void createMMContainers(const PoolId pid, MMConfig config); + TierId getTierId(const Item& item) const; + TierId getTierId(const void* ptr) const; + // acquire the MMContainer corresponding to the the Item's class and pool. // // @return pointer to the MMContainer. @@ -1145,13 +1283,11 @@ class CacheAllocator : public CacheBase { // allocation from the memory allocator. MMContainer& getMMContainer(const Item& item) const noexcept; - MMContainer& getMMContainer(PoolId pid, ClassId cid) const noexcept; - // acquire the MMContainer for the give pool and class id and creates one // if it does not exist. // - // @return pointer to a valid MMContainer that is initialized. - MMContainer& getEvictableMMContainer(PoolId pid, ClassId cid) const noexcept; + // @return pointer to a valid MMContainer that is initialized + MMContainer& getMMContainer(TierId tid, PoolId pid, ClassId cid) const noexcept; // create a new cache allocation. The allocation can be initialized // appropriately and made accessible through insert or insertOrReplace. @@ -1183,6 +1319,17 @@ class CacheAllocator : public CacheBase { uint32_t creationTime, uint32_t expiryTime); + // create a new cache allocation on specific memory tier. + // For description see allocateInternal. + // + // @param tid id a memory tier + ItemHandle allocateInternalTier(TierId tid, + PoolId id, + Key key, + uint32_t size, + uint32_t creationTime, + uint32_t expiryTime); + // Allocate a chained item // // The resulting chained item does not have a parent item and @@ -1197,8 +1344,8 @@ class CacheAllocator : public CacheBase { // @return handle to the chained allocation // @throw std::invalid_argument if the size requested is invalid or // if the item is invalid - ItemHandle allocateChainedItemInternal(const ItemHandle& parent, - uint32_t size); + WriteHandle allocateChainedItemInternal(const ReadHandle& parent, + uint32_t size); // Given an item and its parentKey, validate that the parentKey // corresponds to an item that's the parent of the supplied item. @@ -1247,6 +1394,16 @@ class CacheAllocator : public CacheBase { // not exist. FOLLY_ALWAYS_INLINE ItemHandle findFastImpl(Key key, AccessMode mode); + // Moves a regular item to a different memory tier. + // + // @param oldItem Reference to the item being moved + // @param newItemHdl Reference to the handle of the new item being moved into + // + // @return true If the move was completed, and the containers were updated + // successfully. + template + ItemHandle moveRegularItemOnEviction(ItemPtr& oldItem, ItemHandle& newItemHdl); + // Moves a regular item to a different slab. This should only be used during // slab release after the item's moving bit has been set. The user supplied // callback is responsible for copying the contents and fixing the semantics @@ -1259,6 +1416,17 @@ class CacheAllocator : public CacheBase { // successfully. bool moveRegularItem(Item& oldItem, ItemHandle& newItemHdl); + // template class for viewAsChainedAllocs that takes either ReadHandle or + // WriteHandle + template + CacheChainedAllocs viewAsChainedAllocsT( + const Handle& parent); + + // template class for convertToIOBuf that takes either ReadHandle or + // WriteHandle + template + folly::IOBuf convertToIOBufT(Handle& handle); + // Moves a chained item to a different slab. This should only be used during // slab release after the item's moving bit has been set. The user supplied // callback is responsible for copying the contents and fixing the semantics @@ -1304,10 +1472,6 @@ class CacheAllocator : public CacheBase { ItemHandle newItemHdl, const Item& parent); - // Mark the item as dirty and enqueue for deletion from nvmcache - // @param hdl item to invalidate. - void invalidateNvm(Item& item); - // Insert an item into MM container. The caller must hold a valid handle for // the item. // @@ -1325,6 +1489,10 @@ class CacheAllocator : public CacheBase { // false if the item is not in MMContainer bool removeFromMMContainer(Item& item); + using EvictionIterator = typename MMContainer::Iterator; + + ItemHandle acquire(EvictionIterator& it) { return acquire(it.get()); } + // Replaces an item in the MMContainer with another item, at the same // position. // @@ -1335,6 +1503,8 @@ class CacheAllocator : public CacheBase { // destination item did not exist in the container, or if the // source item already existed. bool replaceInMMContainer(Item& oldItem, Item& newItem); + bool replaceInMMContainer(Item* oldItem, Item& newItem); + bool replaceInMMContainer(EvictionIterator& oldItemIt, Item& newItem); // Replaces an item in the MMContainer with another item, at the same // position. Or, if the two chained items belong to two different MM @@ -1361,7 +1531,7 @@ class CacheAllocator : public CacheBase { // @param event AllocatorApiEvent that corresponds to the current operation. // supported events are INSERT, corresponding to the client // insert call, and INSERT_FROM_NVM, cooresponding to the insert - // call that happens when an item is promoted from NVM storate + // call that happens when an item is promoted from NVM storage // to memory. // // @return true if the handle was successfully inserted into the hashtable @@ -1389,9 +1559,7 @@ class CacheAllocator : public CacheBase { // @param pid the id of the pool to look for evictions inside // @param cid the id of the class to look for evictions inside // @return An evicted item or nullptr if there is no suitable candidate. - Item* findEviction(PoolId pid, ClassId cid); - - using EvictionIterator = typename MMContainer::Iterator; + Item* findEviction(TierId tid, PoolId pid, ClassId cid); // Advance the current iterator and try to evict a regular item // @@ -1400,7 +1568,7 @@ class CacheAllocator : public CacheBase { // // @return valid handle to regular item on success. This will be the last // handle to the item. On failure an empty handle. - ItemHandle advanceIteratorAndTryEvictRegularItem(MMContainer& mmContainer, + ItemHandle advanceIteratorAndTryEvictRegularItem(TierId tid, PoolId pid, MMContainer& mmContainer, EvictionIterator& itr); // Advance the current iterator and try to evict a chained item @@ -1410,7 +1578,26 @@ class CacheAllocator : public CacheBase { // // @return valid handle to the parent item on success. This will be the last // handle to the item - ItemHandle advanceIteratorAndTryEvictChainedItem(EvictionIterator& itr); + ItemHandle advanceIteratorAndTryEvictChainedItem(TierId tid, PoolId pid, EvictionIterator& itr); + + // Try to move the item down to the next memory tier + // + // @param tid current tier ID of the item + // @param pid the pool ID the item belong to. + // @param item the item to evict + // + // @return valid handle to the item. This will be the last + // handle to the item. On failure an empty handle. + template + ItemHandle tryEvictToNextMemoryTier(TierId tid, PoolId pid, ItemPtr& item); + + // Try to move the item down to the next memory tier + // + // @param item the item to evict + // + // @return valid handle to the item. This will be the last + // handle to the item. On failure an empty handle. + ItemHandle tryEvictToNextMemoryTier(Item* item); // Deserializer CacheAllocatorMetadata and verify the version // @@ -1432,7 +1619,7 @@ class CacheAllocator : public CacheBase { MMContainers createEmptyMMContainers(); unsigned int reclaimSlabs(PoolId id, size_t numSlabs) final { - return allocator_->reclaimSlabsAndGrow(id, numSlabs); + return allocator_[currentTier()]->reclaimSlabsAndGrow(id, numSlabs); } FOLLY_ALWAYS_INLINE EventTracker* getEventTracker() const { @@ -1491,7 +1678,7 @@ class CacheAllocator : public CacheBase { const void* hint = nullptr) final; // @param releaseContext slab release context - void releaseSlabImpl(const SlabReleaseContext& releaseContext); + void releaseSlabImpl(TierId tid, const SlabReleaseContext& releaseContext); // @return true when successfully marked as moving, // fasle when this item has already been freed @@ -1563,7 +1750,7 @@ class CacheAllocator : public CacheBase { // primitives. So we consciously exempt ourselves here from TSAN data race // detection. folly::annotate_ignore_thread_sanitizer_guard g(__FILE__, __LINE__); - allocator_->forEachAllocation(std::forward(f)); + allocator_[currentTier()]->forEachAllocation(std::forward(f)); } // returns true if nvmcache is enabled and we should write this item to @@ -1606,9 +1793,11 @@ class CacheAllocator : public CacheBase { std::unique_ptr& worker, std::chrono::seconds timeout = std::chrono::seconds{0}); - std::unique_ptr createNewMemoryAllocator(); - std::unique_ptr restoreMemoryAllocator(); - std::unique_ptr restoreCCacheManager(); + ShmSegmentOpts createShmCacheOpts(TierId tid); + + std::unique_ptr createNewMemoryAllocator(TierId tid); + std::unique_ptr restoreMemoryAllocator(TierId tid); + std::unique_ptr restoreCCacheManager(TierId tid); PoolIds filterCompactCachePools(const PoolIds& poolIds) const; @@ -1628,7 +1817,7 @@ class CacheAllocator : public CacheBase { } typename Item::PtrCompressor createPtrCompressor() const { - return allocator_->createPtrCompressor(); + return typename Item::PtrCompressor(allocator_); } // helper utility to throttle and optionally log. @@ -1681,7 +1870,8 @@ class CacheAllocator : public CacheBase { // @param item Record the item has been accessed in its mmContainer // @param mode the mode of access // @param stats stats object to avoid a thread local lookup. - void recordAccessInMMContainer(Item& item, AccessMode mode); + // @return true if successfully recorded in MMContainer + bool recordAccessInMMContainer(Item& item, AccessMode mode); ItemHandle findChainedItem(const Item& parent) const; @@ -1690,7 +1880,7 @@ class CacheAllocator : public CacheBase { void initStats(); - // return an iterator to the item's chained allocations. The order of + // return a read-only iterator to the item's chained allocations. The order of // iteration on the item will be LIFO of the addChainedItem calls. folly::Range viewAsChainedAllocsRange( const Item& parent) const; @@ -1705,6 +1895,91 @@ class CacheAllocator : public CacheBase { // BEGIN private members + TierId currentTier() const { + // TODO: every function which calls this method should be refactored. + // We should go case by case and either make such function work on + // all tiers or expose separate parameter to describe the tier ID. + return 0; + } + + bool addWaitContextForMovingItem( + folly::StringPiece key, std::shared_ptr> waiter); + + class MoveCtx { + public: + MoveCtx() {} + + ~MoveCtx() { + // prevent any further enqueue to waiters + // Note: we don't need to hold locks since no one can enqueue + // after this point. + wakeUpWaiters(); + } + + // record the item handle. Upon destruction we will wake up the waiters + // and pass a clone of the handle to the callBack. By default we pass + // a null handle + void setItemHandle(ItemHandle _it) { it = std::move(_it); } + + // enqueue a waiter into the waiter list + // @param waiter WaitContext + void addWaiter(std::shared_ptr> waiter) { + XDCHECK(waiter); + waiters.push_back(std::move(waiter)); + } + + private: + // notify all pending waiters that are waiting for the fetch. + void wakeUpWaiters() { + bool refcountOverflowed = false; + for (auto& w : waiters) { + // If refcount overflowed earlier, then we will return miss to + // all subsequent waitors. + if (refcountOverflowed) { + w->set(ItemHandle{}); + continue; + } + + try { + w->set(it.clone()); + } catch (const exception::RefcountOverflow&) { + // We'll return a miss to the user's pending read, + // so we should enqueue a delete via NvmCache. + // TODO: cache.remove(it); + refcountOverflowed = true; + } + } + } + + ItemHandle it; // will be set when Context is being filled + std::vector>> waiters; // list of + // waiters + }; + using MoveMap = + folly::F14ValueMap, + folly::HeterogeneousAccessHash>; + + static size_t getShardForKey(folly::StringPiece key) { + return folly::Hash()(key) % kShards; + } + + MoveMap& getMoveMapForShard(size_t shard) { + return movesMap_[shard].movesMap_; + } + + MoveMap& getMoveMap(folly::StringPiece key) { + return getMoveMapForShard(getShardForKey(key)); + } + + std::unique_lock getMoveLockForShard(size_t shard) { + return std::unique_lock(moveLock_[shard].moveLock_); + } + + std::unique_lock getMoveLock(folly::StringPiece key) { + return getMoveLockForShard(getShardForKey(key)); + } + // Whether the memory allocator for this cache allocator was created on shared // memory. The hash table, chained item hash table etc is also created on // shared memory except for temporary shared memory mode when they're created @@ -1713,6 +1988,8 @@ class CacheAllocator : public CacheBase { const Config config_{}; + const typename Config::MemoryTierConfigs memoryTierConfigs; + // Manages the temporary shared memory segment for memory allocator that // is not persisted when cache process exits. std::unique_ptr tempShm_; @@ -1730,9 +2007,14 @@ class CacheAllocator : public CacheBase { const MMConfig mmConfig_{}; // the memory allocator for allocating out of the available memory. - std::unique_ptr allocator_; + std::vector> allocator_; + + std::vector> createPrivateAllocator(); + std::vector> createAllocators(); + std::vector> restoreAllocators(); // compact cache allocator manager + // TODO: per tier? std::unique_ptr compactCacheManager_; // compact cache instances reside here when user "add" or "attach" compact @@ -1763,6 +2045,7 @@ class CacheAllocator : public CacheBase { std::unique_ptr chainedItemAccessContainer_{nullptr}; friend ChainedAllocs; + friend WritableChainedAllocs; // ensure any modification to a chain of chained items are synchronized using ChainedItemLock = facebook::cachelib::SharedMutexBuckets; ChainedItemLock chainedItemLocks_; @@ -1793,6 +2076,22 @@ class CacheAllocator : public CacheBase { // poolResizer_, poolOptimizer_, memMonitor_, reaper_ mutable std::mutex workersMutex_; + static constexpr size_t kShards = 8192; // TODO: need to define right value + + struct MovesMapShard { + alignas(folly::hardware_destructive_interference_size) MoveMap movesMap_; + }; + + struct MoveLock { + alignas(folly::hardware_destructive_interference_size) std::mutex moveLock_; + }; + + // a map of all pending moves + std::vector movesMap_; + + // a map of move locks for each shard + std::vector moveLock_; + // time when the ram cache was first created const time_t cacheCreationTime_{0}; @@ -1807,7 +2106,7 @@ class CacheAllocator : public CacheBase { folly::ThreadLocal ring_; // state for the nvmcache - NvmCacheState nvmCacheState_; + std::optional nvmCacheState_{}; // admission policy for nvmcache std::shared_ptr> nvmAdmissionPolicy_; @@ -1818,7 +2117,7 @@ class CacheAllocator : public CacheBase { // END private members // Make this friend to give access to acquire and release - friend ItemHandle; + friend ReadHandle; friend ReaperAPIWrapper; friend class CacheAPIWrapperForNvm; friend class FbInternalRuntimeUpdateWrapper; diff --git a/cachelib/allocator/CacheAllocatorConfig.h b/cachelib/allocator/CacheAllocatorConfig.h index 1207036a95..1d11b3ef14 100644 --- a/cachelib/allocator/CacheAllocatorConfig.h +++ b/cachelib/allocator/CacheAllocatorConfig.h @@ -25,6 +25,7 @@ #include #include "cachelib/allocator/Cache.h" +#include "cachelib/allocator/MemoryTierCacheConfig.h" #include "cachelib/allocator/MM2Q.h" #include "cachelib/allocator/MemoryMonitor.h" #include "cachelib/allocator/NvmAdmissionPolicy.h" @@ -44,11 +45,13 @@ class CacheAllocatorConfig { using AccessConfig = typename CacheT::AccessConfig; using ChainedItemMovingSync = typename CacheT::ChainedItemMovingSync; using RemoveCb = typename CacheT::RemoveCb; + using ItemDestructor = typename CacheT::ItemDestructor; using NvmCacheEncodeCb = typename CacheT::NvmCacheT::EncodeCB; using NvmCacheDecodeCb = typename CacheT::NvmCacheT::DecodeCB; using NvmCacheDeviceEncryptor = typename CacheT::NvmCacheT::DeviceEncryptor; using MoveCb = typename CacheT::MoveCb; using NvmCacheConfig = typename CacheT::NvmCacheT::Config; + using MemoryTierConfigs = std::vector; using Key = typename CacheT::Key; using EventTrackerSharedPtr = std::shared_ptr; using Item = typename CacheT::Item; @@ -81,12 +84,18 @@ class CacheAllocatorConfig { CacheAllocatorConfig& setAccessConfig(size_t numEntries); // RemoveCallback is invoked for each item that is evicted or removed - // explicitly + // explicitly from RAM CacheAllocatorConfig& setRemoveCallback(RemoveCb cb); + // ItemDestructor is invoked for each item that is evicted or removed + // explicitly from cache (both RAM and NVM) + CacheAllocatorConfig& setItemDestructor(ItemDestructor destructor); + // Config for NvmCache. If enabled, cachelib will also make use of flash. CacheAllocatorConfig& enableNvmCache(NvmCacheConfig config); + bool isNvmCacheEnabled() const; + // enable the reject first admission policy through its parameters // @param numEntries the number of entries to track across all splits // @param numSplits the number of splits. we drop a whole split by @@ -186,14 +195,26 @@ class CacheAllocatorConfig { // This allows cache to be persisted across restarts. One example use case is // to preserve the cache when releasing a new version of your service. Refer // to our user guide for how to set up cache persistence. + // TODO: get rid of baseAddr or if set make sure all mapping are adjacent? + // We can also make baseAddr a per-tier configuration CacheAllocatorConfig& enableCachePersistence(std::string directory, void* baseAddr = nullptr); - // uses posix shm segments instead of the default sys-v shm segments. - // @throw std::invalid_argument if called without enabling - // cachePersistence() + // Uses posix shm segments instead of the default sys-v shm + // segments. @throw std::invalid_argument if called without enabling + // cachePersistence(). CacheAllocatorConfig& usePosixForShm(); + // Configures cache memory tiers. Accepts vector of MemoryTierCacheConfig. + // Each vector element describes configuration for a single memory cache tier. + // @throw std::invalid_argument if: + // - the size of configs is 0 + // - memory tiers use both size and ratio parameters + CacheAllocatorConfig& configureMemoryTiers(const MemoryTierConfigs& configs); + + // Return vector of memory tier configs. + MemoryTierConfigs getMemoryTierConfigs() const; + // This turns on a background worker that periodically scans through the // access container and look for expired items and remove them. CacheAllocatorConfig& enableItemReaperInBackground( @@ -287,6 +308,14 @@ class CacheAllocatorConfig { // smaller than this will always be rejected by NvmAdmissionPolicy. CacheAllocatorConfig& setNvmAdmissionMinTTL(uint64_t ttl); + // skip promote children items in chained when parent fail to promote + CacheAllocatorConfig& setSkipPromoteChildrenWhenParentFailed(); + + // skip promote children items in chained when parent fail to promote + bool isSkipPromoteChildrenWhenParentFailed() const noexcept { + return skipPromoteChildrenWhenParentFailed; + } + // @return whether compact cache is enabled bool isCompactCacheEnabled() const noexcept { return enableZeroedSlabAllocs; } @@ -323,7 +352,7 @@ class CacheAllocatorConfig { const std::string& getCacheName() const noexcept { return cacheName; } - size_t getCacheSize() const noexcept { return size; } + size_t getCacheSize() const noexcept; bool isUsingPosixShm() const noexcept { return usePosixShm; } @@ -486,9 +515,14 @@ class CacheAllocatorConfig { // for all normal items AccessConfig accessConfig{}; - // user defined callback invoked when an item is being evicted or freed + // user defined callback invoked when an item is being evicted or freed from + // RAM RemoveCb removeCb{}; + // user defined item destructor invoked when an item is being + // evicted or freed from cache (both RAM and NVM) + ItemDestructor itemDestructor{}; + // user defined call back to move the item. This is executed while holding // the user provided movingSync. For items without chained allocations, // there is no specific need for explicit movingSync and user can skip @@ -541,9 +575,19 @@ class CacheAllocatorConfig { // cache. uint64_t nvmAdmissionMinTTL{0}; + // skip promote children items in chained when parent fail to promote + bool skipPromoteChildrenWhenParentFailed{false}; + friend CacheT; private: + void validateMemoryTiersWithSize(const MemoryTierConfigs&, size_t) const; + + // Configuration for memory tiers. + MemoryTierConfigs memoryTierConfigs{ + {MemoryTierCacheConfig::fromShm().setRatio(1)} + }; + void mergeWithPrefix( std::map& configMap, const std::map& configMapToMerge, @@ -562,6 +606,8 @@ CacheAllocatorConfig& CacheAllocatorConfig::setCacheName( template CacheAllocatorConfig& CacheAllocatorConfig::setCacheSize(size_t _size) { + validateMemoryTiersWithSize(this->memoryTierConfigs, _size); + size = _size; constexpr size_t maxCacheSizeWithCoredump = 64'424'509'440; // 60GB if (size <= maxCacheSizeWithCoredump) { @@ -613,6 +659,13 @@ CacheAllocatorConfig& CacheAllocatorConfig::setRemoveCallback( return *this; } +template +CacheAllocatorConfig& CacheAllocatorConfig::setItemDestructor( + ItemDestructor destructor) { + itemDestructor = std::move(destructor); + return *this; +} + template CacheAllocatorConfig& CacheAllocatorConfig::enableRejectFirstAPForNvm( uint64_t numEntries, @@ -637,6 +690,11 @@ CacheAllocatorConfig& CacheAllocatorConfig::enableNvmCache( return *this; } +template +bool CacheAllocatorConfig::isNvmCacheEnabled() const { + return nvmConfig.has_value(); +} + template CacheAllocatorConfig& CacheAllocatorConfig::setNvmCacheAdmissionPolicy( std::shared_ptr> policy) { @@ -801,6 +859,61 @@ CacheAllocatorConfig& CacheAllocatorConfig::enableItemReaperInBackground( return *this; } +template +CacheAllocatorConfig& CacheAllocatorConfig::configureMemoryTiers( + const MemoryTierConfigs& config) { + if (!config.size()) { + throw std::invalid_argument("There must be at least one memory tier."); + } + + for (auto tier_config: config) { + auto tier_size = tier_config.getSize(); + auto tier_ratio = tier_config.getRatio(); + if ((!tier_size and !tier_ratio) || (tier_size and tier_ratio)) { + throw std::invalid_argument( + "For each memory tier either size or ratio must be set."); + } + } + + validateMemoryTiersWithSize(config, this->size); + + memoryTierConfigs = config; + + return *this; +} + +template +typename CacheAllocatorConfig::MemoryTierConfigs +CacheAllocatorConfig::getMemoryTierConfigs() const { + MemoryTierConfigs config = memoryTierConfigs; + size_t sum_ratios = 0; + + for (auto &tier_config: config) { + if (auto *v = std::get_if(&tier_config.shmOpts)) { + v->usePosix = usePosixShm; + } + + sum_ratios += tier_config.getRatio(); + } + + if (sum_ratios == 0) + return config; + + // if ratios are used, size must be specified + XDCHECK(size); + + // Convert ratios to sizes, size must be non-zero + size_t sum_sizes = 0; + size_t partition_size = size / sum_ratios; + for (auto& tier_config: config) { + tier_config.setSize(partition_size * tier_config.getRatio()); + tier_config.setRatio(0); + sum_sizes += tier_config.getSize(); + } + + return config; +} + template CacheAllocatorConfig& CacheAllocatorConfig::disableCacheEviction() { disableEviction = true; @@ -916,6 +1029,54 @@ CacheAllocatorConfig& CacheAllocatorConfig::setNvmAdmissionMinTTL( return *this; } +// skip promote children items in chained when parent fail to promote +template +CacheAllocatorConfig& +CacheAllocatorConfig::setSkipPromoteChildrenWhenParentFailed() { + skipPromoteChildrenWhenParentFailed = true; + return *this; +} + +template +size_t CacheAllocatorConfig::getCacheSize() const noexcept { + if (size) + return size; + + size_t sum_sizes = 0; + for (const auto &tier_config : getMemoryTierConfigs()) { + sum_sizes += tier_config.getSize(); + } + + return sum_sizes; +} + +template +void CacheAllocatorConfig::validateMemoryTiersWithSize( + const MemoryTierConfigs &config, size_t size) const { + size_t sum_ratios = 0; + size_t sum_sizes = 0; + + for (const auto &tier_config: config) { + sum_ratios += tier_config.getRatio(); + sum_sizes += tier_config.getSize(); + } + + if (sum_ratios && sum_sizes) { + throw std::invalid_argument("Cannot mix ratios and sizes."); + } else if (sum_sizes) { + if (size && sum_sizes != size) { + throw std::invalid_argument( + "Sum of tier sizes doesn't match total cache size. " + "Setting of cache total size is not required when per-tier " + "sizes are specified - it is calculated as sum of tier sizes."); + } + } else if (!sum_ratios && !sum_sizes) { + throw std::invalid_argument( + "Either sum of all memory tiers sizes or sum of all ratios " + "must be greater than 0."); + } +} + template const CacheAllocatorConfig& CacheAllocatorConfig::validate() const { // we can track tail hits only if MMType is MM2Q @@ -924,11 +1085,7 @@ const CacheAllocatorConfig& CacheAllocatorConfig::validate() const { "Tail hits tracking cannot be enabled on MMTypes except MM2Q."); } - // The first part determines max number of "slots" we can address using - // CompressedPtr; - // The second part specifies the minimal allocation size for each slot. - // Multiplied, they inform us the maximal addressable space for cache. - size_t maxCacheSize = (1ul << CompressedPtr::kNumBits) * Slab::kMinAllocSize; + size_t maxCacheSize = CompressedPtr::getMaxAddressableSize(); // Configured cache size should not exceed the maximal addressable space for // cache. if (size > maxCacheSize) { @@ -937,6 +1094,29 @@ const CacheAllocatorConfig& CacheAllocatorConfig::validate() const { size, maxCacheSize)); } + + // we don't allow user to enable both RemoveCB and ItemDestructor + if (removeCb && itemDestructor) { + throw std::invalid_argument( + "It's not allowed to enable both RemoveCB and ItemDestructor."); + } + + size_t sum_ratios = 0; + for (auto tier_config: memoryTierConfigs) { + sum_ratios += tier_config.getRatio(); + } + + if (sum_ratios) { + if (!size) { + throw std::invalid_argument( + "Total cache size must be specified when size ratios are " + "used to specify memory tier sizes."); + } else if (size < sum_ratios) { + throw std::invalid_argument( + "Sum of all tier size ratios is greater than total cache size."); + } + } + return *this; } @@ -970,7 +1150,7 @@ std::map CacheAllocatorConfig::serialize() const { configMap["size"] = std::to_string(size); configMap["cacheDir"] = cacheDir; - configMap["posixShm"] = usePosixShm ? "set" : "empty"; + configMap["posixShm"] = isUsingPosixShm() ? "set" : "empty"; configMap["defaultAllocSizes"] = ""; // Stringify std::set diff --git a/cachelib/allocator/CacheChainedItemIterator.h b/cachelib/allocator/CacheChainedItemIterator.h index 741e7511ef..2d55d5eebd 100644 --- a/cachelib/allocator/CacheChainedItemIterator.h +++ b/cachelib/allocator/CacheChainedItemIterator.h @@ -16,6 +16,8 @@ #pragma once +#include + #include #include "cachelib/common/Iterators.h" @@ -26,21 +28,28 @@ namespace cachelib { namespace tests { template class BaseAllocatorTest; -} +} // namespace tests // Class to iterate through chained items in the special case that the caller // has the item but no itemhandle (e.g. during release) -template +template class CacheChainedItemIterator - : public detail::IteratorFacade, - typename Cache::Item, + : public detail::IteratorFacade, + ItemT, std::forward_iterator_tag> { public: - using Item = typename Cache::Item; - + using Item = ItemT; CacheChainedItemIterator() = default; - Item& dereference() const { return *curr_; } + Item& dereference() const { + if (curr_) { + return *curr_; + } + if (curIOBuf_) { + return *reinterpret_cast(curIOBuf_->writableData()); + } + throw std::runtime_error("no item to dereference"); + } // advance the iterator. // Do nothing if uninitizliaed. @@ -48,10 +57,16 @@ class CacheChainedItemIterator if (curr_) { curr_ = curr_->asChainedItem().getNext(*compressor_); } + if (curIOBuf_) { + curIOBuf_ = curIOBuf_->next(); + } } - bool equal(const CacheChainedItemIterator& other) const { - return curr_ == other.curr_; + bool equal(const CacheChainedItemIterator& other) const { + if (curr_ || other.curr_) { + return curr_ == other.curr_; + } + return curIOBuf_ == other.curIOBuf_; } private: @@ -68,14 +83,34 @@ class CacheChainedItemIterator } } + // only NvmCacheT can create with this constructor + // this is used to construct chained item for ItemDestructor + // with DipperItem on Navy, Item is allocated at heap (as IOBuf) + // instead of in allocator memory pool. + explicit CacheChainedItemIterator(folly::IOBuf* iobuf) : curIOBuf_(iobuf) { + // If @item is not nullptr, check that it is a chained item or parent item + // sine IOBuf chains is a circle, so we need to let the parent be the end + // iterator + if (curIOBuf_ && !dereference().isChainedItem() && + !dereference().hasChainedItem()) { + throw std::invalid_argument( + "Cannot initialize ChainedAllocIterator, Item is not a ChainedItem"); + } + } + // Current iterator position in chain Item* curr_{nullptr}; + // Removed/evicted from NVM + folly::IOBuf* curIOBuf_{nullptr}; + // Pointer compressor to traverse the chain. const PtrCompressor* compressor_{nullptr}; friend Cache; + friend typename Cache::NvmCacheT; friend typename Cache::ChainedAllocs; + friend typename Cache::WritableChainedAllocs; // For testing template diff --git a/cachelib/allocator/CacheItem-inl.h b/cachelib/allocator/CacheItem-inl.h index db6e1cea7d..dcdaf4444d 100644 --- a/cachelib/allocator/CacheItem-inl.h +++ b/cachelib/allocator/CacheItem-inl.h @@ -55,7 +55,17 @@ const typename CacheItem::Key CacheItem::getKey() } template -void* CacheItem::getMemory() const noexcept { +const void* CacheItem::getMemory() const noexcept { + return getMemoryInternal(); +} + +template +void* CacheItem::getMemory() noexcept { + return getMemoryInternal(); +} + +template +void* CacheItem::getMemoryInternal() const noexcept { if (isChainedItem()) { return asChainedItem().getMemory(); } else { @@ -63,10 +73,9 @@ void* CacheItem::getMemory() const noexcept { } } +// Deprecated template void* CacheItem::getWritableMemory() const { - // TODO : check AccessMode, throw exception if not writable - // TODO : add nvm invalidation logic if (isChainedItem()) { return asChainedItem().getMemory(); } else { @@ -264,6 +273,21 @@ bool CacheItem::isNvmEvicted() const noexcept { return ref_.isNvmEvicted(); } +template +void CacheItem::markIncomplete() noexcept { + ref_.markIncomplete(); +} + +template +void CacheItem::unmarkIncomplete() noexcept { + ref_.unmarkIncomplete(); +} + +template +bool CacheItem::isIncomplete() const noexcept { + return ref_.isIncomplete(); +} + template void CacheItem::markIsChainedItem() noexcept { XDCHECK(!hasChainedItem()); diff --git a/cachelib/allocator/CacheItem.h b/cachelib/allocator/CacheItem.h index dd8d9e0581..aa660b401b 100644 --- a/cachelib/allocator/CacheItem.h +++ b/cachelib/allocator/CacheItem.h @@ -65,7 +65,7 @@ class ChainedItemPayload; template class NvmCache; -template +template class CacheChainedAllocs; template @@ -129,7 +129,9 @@ class CACHELIB_PACKED_ATTR CacheItem { * the item is freed when it is not linked to access/mm containers * and its refcount drops to 0. */ - using Handle = detail::HandleImpl; + using ReadHandle = detail::ReadHandleImpl; + using WriteHandle = detail::WriteHandleImpl; + using Handle = WriteHandle; using HandleMaker = std::function; /** @@ -139,6 +141,7 @@ class CACHELIB_PACKED_ATTR CacheItem { * to be mapped to different addresses on shared memory. */ using CompressedPtr = facebook::cachelib::CompressedPtr; + using SingleTierPtrCompressor = MemoryAllocator::SingleTierPtrCompressor; using PtrCompressor = MemoryAllocator::PtrCompressor; // Get the required size for a cache item given the size of memory @@ -162,26 +165,34 @@ class CACHELIB_PACKED_ATTR CacheItem { const Key getKey() const noexcept; // Readonly memory for this allocation. - // TODO: switch the return type to 'const void*' once all the callsites - // are modified to use getMemory() and getWritableMemory() correctly - void* getMemory() const noexcept; + const void* getMemory() const noexcept; // Writable memory for this allocation. The caller is free to do whatever he - // wants with it and needs to ensure thread sage for access into this + // wants with it and needs to ensure thread safety for access into this // piece of memory. - void* getWritableMemory() const; + void* getMemory() noexcept; + + // (deprecated) Writable memory for this allocation. The caller is free to do + // whatever he wants with it and needs to ensure thread sage for access into + // this piece of memory. + [[deprecated("Use getMemory() instead")]] void* getWritableMemory() const; // Cast item's readonly memory to a readonly user type - // TODO: switch the return type to 'const T*' once all the callsites - // are modified to use getMemory() and getWritableMemory() correctly template - T* getMemoryAs() const noexcept { - return reinterpret_cast(getMemory()); + const T* getMemoryAs() const noexcept { + return reinterpret_cast(getMemory()); } // Cast item's writable memory to a writable user type template - T* getWritableMemoryAs() noexcept { + T* getMemoryAs() noexcept { + return reinterpret_cast(getMemory()); + } + + // (Deprecated) Cast item's writable memory to a writable user type + template + [[deprecated("Use getMemoryAs() instead")]] T* + getWritableMemoryAs() noexcept { return reinterpret_cast(getWritableMemory()); } @@ -240,22 +251,37 @@ class CACHELIB_PACKED_ATTR CacheItem { void unmarkNvmEvicted() noexcept; bool isNvmEvicted() const noexcept; + /** + * Marks that the item is migrating between memory tiers and + * not ready for access now. Accessing thread should wait. + */ + void markIncomplete() noexcept; + void unmarkIncomplete() noexcept; + bool isIncomplete() const noexcept; + /** * Function to set the timestamp for when to expire an item - * Employs a best-effort approach to update the expiryTime. Item's expiry - * time can only be updated when the item is a regular item and is part of - * the cache and not in the moving state. + * + * This API will only succeed when an item is a regular item, and user + * has already inserted it into the cache (via @insert or @insertOrReplace). + * In addition, the item cannot be in a "moving" state. * * @param expiryTime the expiryTime value to update to * * @return boolean indicating whether expiry time was successfully updated + * false when item is not linked in cache, or in moving state, or a + * chained item */ bool updateExpiryTime(uint32_t expiryTimeSecs) noexcept; // Same as @updateExpiryTime, but sets expiry time to @ttl seconds from now. + // It has the same restrictions as @updateExpiryTime. An item must be a + // regular item and is part of the cache and NOT in the moving state. // // @param ttl TTL (from now) - // @return Boolean indicating whether expiry time was successfully updated. + // @return boolean indicating whether expiry time was successfully updated + // false when item is not linked in cache, or in moving state, or a + // chained item bool extendTTL(std::chrono::seconds ttl) noexcept; // Return the refcount of an item @@ -284,6 +310,8 @@ class CACHELIB_PACKED_ATTR CacheItem { // size does not match with the current key void changeKey(Key key); + void* getMemoryInternal() const noexcept; + /** * CacheItem's refcount contain admin references, access referneces, and * flags, refer to Refcount.h for details. @@ -418,8 +446,10 @@ class CACHELIB_PACKED_ATTR CacheItem { friend AccessContainer; friend MMContainer; friend NvmCacheT; - friend CacheChainedAllocs>; - friend CacheChainedItemIterator>; + template + friend class CacheChainedAllocs; + template + friend class CacheChainedItemIterator; friend class facebook::cachelib::tests::CacheAllocatorTestWrapper; template friend class Map; @@ -460,8 +490,8 @@ class CACHELIB_PACKED_ATTR CacheItem { // | a | | y | | // | t | | l | | // | i | | o | | -// | o | | d | | -// | n | | | | +// | o | | a | | +// | n | | d | | // | --------------------- | template class CACHELIB_PACKED_ATTR CacheChainedItem : public CacheItem { @@ -538,8 +568,11 @@ class CACHELIB_PACKED_ATTR CacheChainedItem : public CacheItem { friend Payload; friend CacheAllocator; - friend CacheChainedAllocs>; - friend CacheChainedItemIterator>; + template + friend class CacheChainedAllocs; + template + friend class CacheChainedItemIterator; + friend NvmCache>; template friend class facebook::cachelib::tests::BaseAllocatorTest; FRIEND_TEST(ItemTest, ChainedItemConstruction); diff --git a/cachelib/allocator/CacheStats.cpp b/cachelib/allocator/CacheStats.cpp index 1d49f25865..4f7811e5be 100644 --- a/cachelib/allocator/CacheStats.cpp +++ b/cachelib/allocator/CacheStats.cpp @@ -48,18 +48,23 @@ template struct SizeVerify {}; void Stats::populateGlobalCacheStats(GlobalCacheStats& ret) const { - SizeVerify a = SizeVerify<15600>{}; +#ifndef SKIP_SIZE_VERIFY + SizeVerify a = SizeVerify<16144>{}; std::ignore = a; +#endif ret.numCacheGets = numCacheGets.get(); ret.numCacheGetMiss = numCacheGetMiss.get(); ret.numCacheGetExpiries = numCacheGetExpiries.get(); ret.numCacheRemoves = numCacheRemoves.get(); ret.numCacheRemoveRamHits = numCacheRemoveRamHits.get(); + ret.numRamDestructorCalls = numRamDestructorCalls.get(); ret.numNvmGets = numNvmGets.get(); ret.numNvmGetMiss = numNvmGetMiss.get(); ret.numNvmGetMissFast = numNvmGetMissFast.get(); ret.numNvmGetMissExpired = numNvmGetMissExpired.get(); + ret.numNvmGetMissDueToInflightRemove = numNvmGetMissDueToInflightRemove.get(); + ret.numNvmGetMissErrs = numNvmGetMissErrs.get(); ret.numNvmGetCoalesced = numNvmGetCoalesced.get(); ret.numNvmPuts = numNvmPuts.get(); ret.numNvmDeletes = numNvmDeletes.get(); @@ -71,6 +76,8 @@ void Stats::populateGlobalCacheStats(GlobalCacheStats& ret) const { ret.numNvmAbortedPutOnInflightGet = numNvmAbortedPutOnInflightGet.get(); ret.numNvmCleanEvict = numNvmCleanEvict.get(); ret.numNvmCleanDoubleEvict = numNvmCleanDoubleEvict.get(); + ret.numNvmDestructorCalls = numNvmDestructorCalls.get(); + ret.numNvmDestructorRefcountOverflow = numNvmDestructorRefcountOverflow.get(); ret.numNvmExpiredEvict = numNvmExpiredEvict.get(); ret.numNvmPutFromClean = numNvmPutFromClean.get(); ret.numNvmEvictions = numNvmEvictions.get(); @@ -85,6 +92,8 @@ void Stats::populateGlobalCacheStats(GlobalCacheStats& ret) const { ret.numChainedParentItems = numChainedParentItems.get(); ret.numChainedChildItems = numChainedChildItems.get(); ret.numNvmAllocAttempts = numNvmAllocAttempts.get(); + ret.numNvmAllocForItemDestructor = numNvmAllocForItemDestructor.get(); + ret.numNvmItemDestructorAllocErrors = numNvmItemDestructorAllocErrors.get(); ret.allocateLatencyNs = this->allocateLatency_.estimate(); ret.moveChainedLatencyNs = this->moveChainedLatency_.estimate(); @@ -178,9 +187,6 @@ PoolStats& PoolStats::operator+=(const PoolStats& other) { d.oldestTimeSec = s.oldestTimeSec; } - d.numLockByInserts += s.numLockByInserts; - d.numLockByRecordAccesses += s.numLockByRecordAccesses; - d.numLockByRemoves += s.numLockByRemoves; d.numHotAccesses += s.numHotAccesses; d.numColdAccesses += s.numColdAccesses; d.numWarmAccesses += s.numWarmAccesses; diff --git a/cachelib/allocator/CacheStats.h b/cachelib/allocator/CacheStats.h index 52d4e3ce43..146de6bea7 100644 --- a/cachelib/allocator/CacheStats.h +++ b/cachelib/allocator/CacheStats.h @@ -86,15 +86,6 @@ struct MMContainerStat { // the container. uint64_t oldestTimeSec; - // number of lock hits by inserts into the LRU - uint64_t numLockByInserts; - - // number of lock hits by recordAccess - uint64_t numLockByRecordAccesses; - - // number of lock hits by removes - uint64_t numLockByRemoves; - // refresh time for LRU uint64_t lruRefreshTime; @@ -331,12 +322,21 @@ struct GlobalCacheStats { // number of remove calls that resulted in a ram hit uint64_t numCacheRemoveRamHits{0}; + // number of item destructor calls from ram + uint64_t numRamDestructorCalls{0}; + // number of nvm gets uint64_t numNvmGets{0}; // number of nvm misses uint64_t numNvmGetMiss{0}; + // number of nvm isses due to internal errors + uint64_t numNvmGetMissErrs{0}; + + // number of nvm misses due to inflight remove on the same key + uint64_t numNvmGetMissDueToInflightRemove{0}; + // number of nvm misses that happened synchronously uint64_t numNvmGetMissFast{0}; @@ -386,12 +386,27 @@ struct GlobalCacheStats { // number of evictions that were already expired uint64_t numNvmExpiredEvict{0}; + // number of item destructor calls from nvm + uint64_t numNvmDestructorCalls{0}; + + // number of RefcountOverflow happens causing item destructor + // being skipped in nvm + uint64_t numNvmDestructorRefcountOverflow{0}; + // number of puts to nvm of a clean item in RAM due to nvm eviction. uint64_t numNvmPutFromClean{0}; // attempts made from nvm cache to allocate an item for promotion uint64_t numNvmAllocAttempts{0}; + // attempts made from nvm cache to allocate an item for its destructor + uint64_t numNvmAllocForItemDestructor{0}; + // heap allocate errors for item destrutor + uint64_t numNvmItemDestructorAllocErrors{0}; + + // size of itemRemoved_ hash set in nvm + uint64_t numNvmItemRemovedSetSize{0}; + // number of attempts to allocate an item uint64_t allocAttempts{0}; @@ -410,6 +425,9 @@ struct GlobalCacheStats { // number of refcount overflows uint64_t numRefcountOverflow{0}; + // number of exception occurred inside item destructor + uint64_t numDestructorExceptions{0}; + // number of allocated and CHAINED items that are parents (i.e., // consisting of at least one chained child) uint64_t numChainedChildItems{0}; diff --git a/cachelib/allocator/CacheStatsInternal.h b/cachelib/allocator/CacheStatsInternal.h index d56c854e49..355afb594f 100644 --- a/cachelib/allocator/CacheStatsInternal.h +++ b/cachelib/allocator/CacheStatsInternal.h @@ -51,6 +51,9 @@ struct Stats { // number of remove calls that resulted in a ram hit TLCounter numCacheRemoveRamHits{0}; + // number of item destructor calls from ram + TLCounter numRamDestructorCalls{0}; + // number of nvm gets TLCounter numNvmGets{0}; @@ -60,6 +63,12 @@ struct Stats { // number of nvm misses TLCounter numNvmGetMiss{0}; + // number of nvm isses due to internal errors + TLCounter numNvmGetMissErrs{0}; + + // number of nvm misses due to inflight remove on the same key + TLCounter numNvmGetMissDueToInflightRemove{0}; + // number of nvm gets that are expired TLCounter numNvmGetMissExpired{0}; @@ -106,6 +115,13 @@ struct Stats { // number of evictions that were already expired AtomicCounter numNvmExpiredEvict{0}; + // number of item destructor calls from nvm + AtomicCounter numNvmDestructorCalls{0}; + + // number of RefcountOverflow happens causing item destructor + // being skipped in nvm + AtomicCounter numNvmDestructorRefcountOverflow{0}; + // number of entries that were clean in RAM, but evicted and rewritten to // nvmcache because the nvmcache version was evicted AtomicCounter numNvmPutFromClean{0}; @@ -122,6 +138,14 @@ struct Stats { // attempts made from nvm cache to allocate an item for promotion TLCounter numNvmAllocAttempts{0}; + // attempts made from nvm cache to allocate an item for its destructor + TLCounter numNvmAllocForItemDestructor{0}; + // heap allocate errors for item destrutor + TLCounter numNvmItemDestructorAllocErrors{0}; + + // the number of allocated items that are permanent + TLCounter numPermanentItems{0}; + // the number of allocated and CHAINED items that are parents (i.e., // consisting of at least one chained child) TLCounter numChainedParentItems{0}; @@ -140,6 +164,9 @@ struct Stats { // being thrown AtomicCounter numRefcountOverflow{0}; + // number of exception occurred inside item destructor + AtomicCounter numDestructorExceptions{0}; + // The number of slabs being released right now. // This must be zero when `saveState()` is called. AtomicCounter numActiveSlabReleases{0}; diff --git a/cachelib/allocator/CacheVersion.h b/cachelib/allocator/CacheVersion.h index 033504e693..0189301d44 100644 --- a/cachelib/allocator/CacheVersion.h +++ b/cachelib/allocator/CacheVersion.h @@ -28,7 +28,7 @@ namespace cachelib { // then you only need to bump this version. // I.e. you're rolling out a new feature that is cache compatible with previous // Cachelib instances. -constexpr uint64_t kCachelibVersion = 15; +constexpr uint64_t kCachelibVersion = 16; // Updating this version will cause RAM cache to be dropped for all // cachelib users!!! Proceed with care!! You must coordinate with diff --git a/cachelib/allocator/ChainedAllocs.h b/cachelib/allocator/ChainedAllocs.h index f0f062260a..fdf3ae1dcd 100644 --- a/cachelib/allocator/ChainedAllocs.h +++ b/cachelib/allocator/ChainedAllocs.h @@ -23,17 +23,16 @@ namespace cachelib { // index. The chain is traversed in the LIFO order. The caller needs to ensure // that there are no concurrent addChainedItem or popChainedItem while this // happens. -template +template class CacheChainedAllocs { public: using Item = typename Cache::Item; - using Iter = typename Cache::ChainedItemIter; + using ChainedItem = typename Iter::Item; CacheChainedAllocs(CacheChainedAllocs&&) = default; CacheChainedAllocs& operator=(CacheChainedAllocs&&) = default; // return the parent of the chain. - Item& getParentItem() noexcept { return *parent_; } const Item& getParentItem() const noexcept { return *parent_; } // iterate and compute the length of the chain. This is O(N) computation. // @@ -45,7 +44,7 @@ class CacheChainedAllocs { // return the nTh in the chain from the beginning. n = 0 is the first in the // chain and last inserted. - Item* getNthInChain(size_t n) { + ChainedItem* getNthInChain(size_t n) { size_t i = 0; for (auto& c : getChain()) { if (i++ == n) { @@ -64,7 +63,6 @@ class CacheChainedAllocs { using LockType = typename Cache::ChainedItemLock; using ReadLockHolder = typename LockType::ReadLockHolder; using PtrCompressor = typename Item::PtrCompressor; - using ItemHandle = typename Cache::ItemHandle; CacheChainedAllocs(const CacheChainedAllocs&) = delete; CacheChainedAllocs& operator=(const CacheChainedAllocs&) = delete; @@ -76,7 +74,7 @@ class CacheChainedAllocs { // @param head beginning of the chain of the allocations // @param c pointer compressor to traverse the chain CacheChainedAllocs(ReadLockHolder l, - ItemHandle parent, + Handle parent, Item& head, const PtrCompressor& c) : lock_(std::move(l)), @@ -97,7 +95,7 @@ class CacheChainedAllocs { // handle to the parent item. holding this ensures that remaining of the // chain is not evicted. - ItemHandle parent_; + Handle parent_; // verify this would not cause issues with the moving slab release logic. // Evicting logic is fine since it looks for the parent's refcount diff --git a/cachelib/allocator/ChainedHashTable.h b/cachelib/allocator/ChainedHashTable.h index f58b435455..411606b148 100644 --- a/cachelib/allocator/ChainedHashTable.h +++ b/cachelib/allocator/ChainedHashTable.h @@ -550,6 +550,7 @@ class ChainedHashTable { return !(*this == other); } + // TODO(jiayueb): change to return ReadHandle after fixing all the breaks const Handle& asHandle() { return curr(); } // reset the Iterator to begin of container diff --git a/cachelib/allocator/Handle.h b/cachelib/allocator/Handle.h index f253b963de..507e2968bc 100644 --- a/cachelib/allocator/Handle.h +++ b/cachelib/allocator/Handle.h @@ -49,12 +49,12 @@ enum class HandleFlags : uint8_t { // Handle must be destroyed *before* the instance of the CacheAllocator // gets destroyed. template -struct HandleImpl { +struct ReadHandleImpl { using Item = T; using CacheT = typename T::CacheT; - HandleImpl() = default; - HandleImpl(std::nullptr_t) {} + ReadHandleImpl() = default; + /*implicit*/ ReadHandleImpl(std::nullptr_t) {} // reset the handle by releasing the item it holds. void reset() noexcept { @@ -87,58 +87,64 @@ struct HandleImpl { return ret; } - ~HandleImpl() noexcept { reset(); } + ~ReadHandleImpl() noexcept { reset(); } - HandleImpl(const HandleImpl&) = delete; - HandleImpl& operator=(const HandleImpl&) = delete; + ReadHandleImpl(const ReadHandleImpl&) = delete; + ReadHandleImpl& operator=(const ReadHandleImpl&) = delete; - FOLLY_ALWAYS_INLINE HandleImpl(HandleImpl&& other) noexcept + FOLLY_ALWAYS_INLINE ReadHandleImpl(ReadHandleImpl&& other) noexcept : alloc_(other.alloc_), it_(other.releaseItem()), waitContext_(std::move(other.waitContext_)), flags_(other.getFlags()) {} - FOLLY_ALWAYS_INLINE HandleImpl& operator=(HandleImpl&& other) noexcept { + FOLLY_ALWAYS_INLINE ReadHandleImpl& operator=( + ReadHandleImpl&& other) noexcept { if (this != &other) { - this->~HandleImpl(); - new (this) HandleImpl(std::move(other)); + this->~ReadHandleImpl(); + new (this) ReadHandleImpl(std::move(other)); } return *this; } // == and != operators for comparison with Item* - friend bool operator==(const HandleImpl& a, const Item* it) noexcept { + friend bool operator==(const ReadHandleImpl& a, const Item* it) noexcept { return a.get() == it; } - friend bool operator==(const Item* it, const HandleImpl& a) noexcept { + friend bool operator==(const Item* it, const ReadHandleImpl& a) noexcept { return a == it; } - friend bool operator!=(const HandleImpl& a, const Item* it) noexcept { + friend bool operator!=(const ReadHandleImpl& a, const Item* it) noexcept { return !(a == it); } - friend bool operator!=(const Item* it, const HandleImpl& a) noexcept { + friend bool operator!=(const Item* it, const ReadHandleImpl& a) noexcept { return !(a == it); } // == and != operators for comparison with nullptr - friend bool operator==(const HandleImpl& a, std::nullptr_t) noexcept { + friend bool operator==(const ReadHandleImpl& a, std::nullptr_t) noexcept { return a.get() == nullptr; } - friend bool operator==(std::nullptr_t nullp, const HandleImpl& a) noexcept { + friend bool operator==(std::nullptr_t nullp, + const ReadHandleImpl& a) noexcept { return a == nullp; } - friend bool operator!=(const HandleImpl& a, std::nullptr_t nullp) noexcept { + friend bool operator!=(const ReadHandleImpl& a, + std::nullptr_t nullp) noexcept { return !(a == nullp); } - friend bool operator!=(std::nullptr_t nullp, const HandleImpl& a) noexcept { + friend bool operator!=(std::nullptr_t nullp, + const ReadHandleImpl& a) noexcept { return !(a == nullp); } // == and != operator - friend bool operator==(const HandleImpl& a, const HandleImpl& b) noexcept { + friend bool operator==(const ReadHandleImpl& a, + const ReadHandleImpl& b) noexcept { return a.get() == b.get(); } - friend bool operator!=(const HandleImpl& a, const HandleImpl& b) noexcept { + friend bool operator!=(const ReadHandleImpl& a, + const ReadHandleImpl& b) noexcept { return !(a == b); } @@ -147,23 +153,23 @@ struct HandleImpl { return get() != nullptr; } - // accessors. Calling get on handle with isReady() == false blocks the thread - // until the handle is ready. - FOLLY_ALWAYS_INLINE const Item* operator->() const noexcept { return get(); } - FOLLY_ALWAYS_INLINE Item* operator->() noexcept { return get(); } - FOLLY_ALWAYS_INLINE const Item& operator*() const noexcept { return *get(); } - FOLLY_ALWAYS_INLINE Item& operator*() noexcept { return *get(); } + // Accessors always return a const item. + FOLLY_ALWAYS_INLINE const Item* operator->() const noexcept { + return getInternal(); + } + FOLLY_ALWAYS_INLINE const Item& operator*() const noexcept { + return *getInternal(); + } FOLLY_ALWAYS_INLINE const Item* get() const noexcept { return getInternal(); } - FOLLY_ALWAYS_INLINE Item* get() noexcept { return getInternal(); } // Convert to semi future. - folly::SemiFuture toSemiFuture() && { + folly::SemiFuture toSemiFuture() && { if (isReady()) { - return folly::makeSemiFuture(std::forward(*this)); + return folly::makeSemiFuture(std::forward(*this)); } - folly::Promise promise; + folly::Promise promise; auto semiFuture = promise.getSemiFuture(); - auto cb = onReady([p = std::move(promise)](HandleImpl handle) mutable { + auto cb = onReady([p = std::move(promise)](ReadHandleImpl handle) mutable { p.setValue(std::move(handle)); }); if (cb) { @@ -172,7 +178,7 @@ struct HandleImpl { cb(std::move(*this)); return semiFuture; } else { - return std::move(semiFuture).deferValue([](HandleImpl handle) { + return std::move(semiFuture).deferValue([](ReadHandleImpl handle) { if (handle) { // Increment one refcount on user thread since we transferred a handle // from a cachelib internal thread. @@ -183,7 +189,7 @@ struct HandleImpl { } } - using ReadyCallback = folly::Function; + using ReadyCallback = folly::Function; // Return true iff item handle is ready to use. // Empty handles are considered ready with it_ == nullptr. @@ -217,12 +223,26 @@ struct HandleImpl { // @return HandleImpl return a handle to this item // @throw std::overflow_error is the maximum item refcount is execeeded by // creating this item handle. - HandleImpl clone() { return cloneInternal(); } + ReadHandleImpl clone() const { + ReadHandleImpl hdl{}; + if (alloc_) { + hdl = alloc_->acquire(getInternal()); + } + hdl.cloneFlags(*this); + return hdl; + } + + bool isWriteHandle() const { return false; } - const HandleImpl clone() const { return cloneInternal(); } + protected: + // accessor. Calling get on handle with isReady() == false blocks the thread + // until the handle is ready. + FOLLY_ALWAYS_INLINE Item* getInternal() const noexcept { + return waitContext_ ? waitContext_->get() : it_; + } private: - struct ItemWaitContext : public WaitContext { + struct ItemWaitContext : public WaitContext { explicit ItemWaitContext(CacheT& alloc) : alloc_(alloc) {} // @return managed item pointer @@ -286,12 +306,12 @@ struct HandleImpl { // In addition, we will be bumping the handle count by 1, when SemiFuture // is evaluated (via defer callback). This is because we have cloned // an item handle to be passed to the SemiFuture. - void set(HandleImpl hdl) override { + void set(ReadHandleImpl hdl) override { XDCHECK(!isReady()); SCOPE_EXIT { hdl.release(); }; flags_ = hdl.getFlags(); - auto it = hdl.get(); + auto it = hdl.getInternal(); it_.store(it, std::memory_order_release); // Handles are fulfilled by threads different from the owners. Adjust // the refcount tracking accordingly. use the local copy to not make @@ -307,11 +327,11 @@ struct HandleImpl { // to 0 on this thread. In the user thread, they must increment by // 1. It is done automatically if the user converted their ItemHandle // to a SemiFuture via toSemiFuture(). - auto itemHandle = hdl.clone(); - if (itemHandle) { + auto readHandle = hdl.clone(); + if (readHandle) { alloc_.adjustHandleCountForThread_private(-1); } - onReadyCallback_(std::move(itemHandle)); + onReadyCallback_(std::move(readHandle)); } } baton_.post(); @@ -372,6 +392,12 @@ struct HandleImpl { } } + protected: + friend class ReadHandleImpl; + // Method used only by ReadHandleImpl ctor + void discard() { + it_.store(nullptr, std::memory_order_relaxed); + } private: // we are waiting on Item* to be set to a value. One of the valid values is // nullptr. So choose something that we dont expect to indicate a ptr @@ -415,19 +441,6 @@ struct HandleImpl { return waitContext_; } - FOLLY_ALWAYS_INLINE Item* getInternal() const noexcept { - return waitContext_ ? waitContext_->get() : it_; - } - - HandleImpl cloneInternal() const { - HandleImpl hdl{}; - if (alloc_) { - hdl = alloc_->acquire(getInternal()); - } - hdl.cloneFlags(*this); - return hdl; - } - // Internal book keeping to track handles that correspond to items that are // not present in cache. This state is mutated, but does not affect the user // visible meaning of the item handle(public API). Hence this is const. @@ -452,7 +465,7 @@ struct HandleImpl { uint8_t getFlags() const { return waitContext_ ? waitContext_->getFlags() : flags_; } - void cloneFlags(const HandleImpl& other) { flags_ = other.getFlags(); } + void cloneFlags(const ReadHandleImpl& other) { flags_ = other.getFlags(); } Item* releaseItem() noexcept { return std::exchange(it_, nullptr); } @@ -463,13 +476,21 @@ struct HandleImpl { } // Handle which has the item already - FOLLY_ALWAYS_INLINE HandleImpl(Item* it, CacheT& alloc) noexcept - : alloc_(&alloc), it_(it) {} + FOLLY_ALWAYS_INLINE ReadHandleImpl(Item* it, CacheT& alloc) noexcept + : alloc_(&alloc), it_(it) { + if (it_ && it_->isIncomplete()) { + waitContext_ = std::make_shared(alloc); + if (!alloc_->addWaitContextForMovingItem(it->getKey(), waitContext_)) { + waitContext_->discard(); + waitContext_.reset(); + } + } + } // handle that has a wait context allocated. Used for async handles // In this case, the it_ will be filled in asynchronously and mulitple // ItemHandles can wait on the one underlying handle - explicit HandleImpl(CacheT& alloc) noexcept + explicit ReadHandleImpl(CacheT& alloc) noexcept : alloc_(&alloc), it_(nullptr), waitContext_(std::make_shared(alloc)) {} @@ -528,8 +549,80 @@ struct HandleImpl { FRIEND_TEST(ItemHandleTest, onReadyWithNoWaitContext); }; +// WriteHandleImpl is a sub class of ReadHandleImpl to function as a mutable +// handle. User is able to obtain a mutable item from a "write handle". +template +struct WriteHandleImpl : public ReadHandleImpl { + using Item = T; + using CacheT = typename T::CacheT; + using ReadHandle = ReadHandleImpl; + using ReadHandle::ReadHandle; // inherit constructors + + // TODO(jiayueb): remove this constructor after we finish R/W handle + // migration. In the end, WriteHandle should only be obtained via + // CacheAllocator APIs like findToWrite(). + explicit WriteHandleImpl(ReadHandle&& readHandle) + : ReadHandle(std::move(readHandle)) {} + + // Accessors always return a non-const item. + FOLLY_ALWAYS_INLINE Item* operator->() const noexcept { + return ReadHandle::getInternal(); + } + FOLLY_ALWAYS_INLINE Item& operator*() const noexcept { + return *ReadHandle::getInternal(); + } + FOLLY_ALWAYS_INLINE Item* get() const noexcept { + return ReadHandle::getInternal(); + } + + // Clones write handle. returns an empty handle if it is null. + // @return WriteHandleImpl return a handle to this item + // @throw std::overflow_error is the maximum item refcount is execeeded by + // creating this item handle. + WriteHandleImpl clone() const { return WriteHandleImpl{ReadHandle::clone()}; } + + bool isWriteHandle() const { return true; } + + // Friends + // Only CacheAllocator and NvmCache can create non-default constructed handles + friend CacheT; + friend typename CacheT::NvmCacheT; + + // Object-cache's c++ allocator will need to create a zero refcount handle in + // order to access CacheAllocator API. Search for this function for details. + template + friend ItemHandle2* objcacheInitializeZeroRefcountHandle(void* handleStorage, + Item2* it, + Cache2& alloc); + + // A handle is marked as nascent when it was not yet inserted into the cache. + // However, user can override it by marking an item as "not nascent" even if + // it's not inserted into the cache. Unmarking it means a not-yet-inserted + // item will still be processed by RemoveCallback if user frees it. Today, + // the only user who can do this is Cachelib's ObjectCache API to ensure the + // correct RAII behavior for an object. + template + friend void objcacheUnmarkNascent(const ItemHandle2& hdl); + + // Object-cache's c++ allocator needs to access CacheAllocator directly from + // an item handle in order to access CacheAllocator APIs. + template + friend typename ItemHandle2::CacheT& objcacheGetCache(const ItemHandle2& hdl); + + // Following methods are only used in tests where we need to access private + // methods in ItemHandle + template + friend T1 createHandleWithWaitContextForTest(T2&); + template + friend std::shared_ptr getWaitContextForTest( + T1&); + FRIEND_TEST(ItemHandleTest, WaitContext_readycb); + FRIEND_TEST(ItemHandleTest, WaitContext_ready_immediate); + FRIEND_TEST(ItemHandleTest, onReadyWithNoWaitContext); +}; + template -std::ostream& operator<<(std::ostream& os, const HandleImpl& it) { +std::ostream& operator<<(std::ostream& os, const ReadHandleImpl& it) { if (it) { os << it->toString(); } else { diff --git a/cachelib/allocator/MM2Q-inl.h b/cachelib/allocator/MM2Q-inl.h index 39fa573b61..c112f0b442 100644 --- a/cachelib/allocator/MM2Q-inl.h +++ b/cachelib/allocator/MM2Q-inl.h @@ -55,7 +55,6 @@ bool MM2Q::Container::recordAccess(T& node, lruRefreshTime_.load(std::memory_order_relaxed)))) { auto func = [&]() { reconfigureLocked(curr); - ++numLockByRecordAccesses_; if (!node.isInMMContainer()) { return false; } @@ -213,7 +212,6 @@ template T::*HookPtr> bool MM2Q::Container::add(T& node) noexcept { const auto currTime = static_cast