diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h index ffbff0289e..ac985a7ae2 100644 --- a/cachelib/allocator/Cache.h +++ b/cachelib/allocator/Cache.h @@ -84,7 +84,7 @@ class CacheBase { CacheBase& operator=(CacheBase&&) = default; // TODO: come up with some reasonable number - static constexpr unsigned kMaxTiers = 8; + static constexpr unsigned kMaxTiers = 2; // Get a string referring to the cache name for this cache virtual const std::string getCacheName() const = 0; @@ -100,8 +100,8 @@ class CacheBase { // @param poolId the pool id virtual PoolStats getPoolStats(PoolId poolId) const = 0; - virtual AllocationClassBaseStat getAllocationClassStats(TierId, PoolId pid, ClassId cid) - const = 0; + virtual AllocationClassBaseStat getAllocationClassStats( + TierId, PoolId pid, ClassId cid) const = 0; // @param poolId the pool id virtual AllSlabReleaseEvents getAllSlabReleaseEvents(PoolId poolId) const = 0; diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h index 59f8b1cc43..8e8583b4a8 100644 --- a/cachelib/allocator/CacheAllocator-inl.h +++ b/cachelib/allocator/CacheAllocator-inl.h @@ -382,6 +382,7 @@ CacheAllocator::allocateInternalTier(TierId tid, // the allocation class in our memory allocator. const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize); + util::RollingLatencyTracker rollTracker{(*stats_.classAllocLatency)[tid][pid][cid]}; // TODO: per-tier (*stats_.allocAttempts)[pid][cid].inc(); @@ -480,6 +481,8 @@ CacheAllocator::allocateChainedItemInternal( const auto pid = allocator_[tid]->getAllocInfo(parent->getMemory()).poolId; const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize); + util::RollingLatencyTracker rollTracker{(*stats_.classAllocLatency)[tid][pid][cid]}; + // TODO: per-tier? Right now stats_ are not used in any public periodic // worker (*stats_.allocAttempts)[pid][cid].inc(); @@ -2540,6 +2543,7 @@ AllocationClassBaseStat CacheAllocator::getAllocationClassStats( } else { stats.approxFreePercent = ac.approxFreePercentage(); } + stats.allocLatencyNs = (*stats_.classAllocLatency)[tid][pid][cid]; return stats; } diff --git a/cachelib/allocator/CacheStats.cpp b/cachelib/allocator/CacheStats.cpp index 4f7811e5be..98a02cad75 100644 --- a/cachelib/allocator/CacheStats.cpp +++ b/cachelib/allocator/CacheStats.cpp @@ -42,6 +42,8 @@ void Stats::init() { initToZero(*fragmentationSize); initToZero(*chainedItemEvictions); initToZero(*regularItemEvictions); + + classAllocLatency = std::make_unique(); } template diff --git a/cachelib/allocator/CacheStats.h b/cachelib/allocator/CacheStats.h index a24b13d35e..f82ba143e3 100644 --- a/cachelib/allocator/CacheStats.h +++ b/cachelib/allocator/CacheStats.h @@ -25,6 +25,7 @@ #include "cachelib/allocator/memory/Slab.h" #include "cachelib/common/FastStats.h" #include "cachelib/common/PercentileStats.h" +#include "cachelib/common/RollingStats.h" #include "cachelib/common/Time.h" namespace facebook { @@ -107,6 +108,9 @@ struct AllocationClassBaseStat { // percent of free memory in this class double approxFreePercent{0.0}; + + // Rolling allocation latency (in ns) + util::RollingStats allocLatencyNs; }; // cache related stats for a given allocation class. diff --git a/cachelib/allocator/CacheStatsInternal.h b/cachelib/allocator/CacheStatsInternal.h index 355afb594f..dbf3395623 100644 --- a/cachelib/allocator/CacheStatsInternal.h +++ b/cachelib/allocator/CacheStatsInternal.h @@ -21,6 +21,7 @@ #include "cachelib/allocator/Cache.h" #include "cachelib/allocator/memory/MemoryAllocator.h" #include "cachelib/common/AtomicCounter.h" +#include "cachelib/common/RollingStats.h" namespace facebook { namespace cachelib { @@ -221,6 +222,14 @@ struct Stats { std::unique_ptr chainedItemEvictions{}; std::unique_ptr regularItemEvictions{}; + using PerTierPoolClassRollingStats = std::array< + std::array, + MemoryPoolManager::kMaxPools>, + CacheBase::kMaxTiers>; + + // rolling latency tracking for every alloc class in every pool + std::unique_ptr classAllocLatency{}; + // Eviction failures due to parent cannot be removed from access container AtomicCounter evictFailParentAC{0}; diff --git a/cachelib/cachebench/cache/CacheStats.h b/cachelib/cachebench/cache/CacheStats.h index 377026dc20..c027773014 100644 --- a/cachelib/cachebench/cache/CacheStats.h +++ b/cachelib/cachebench/cache/CacheStats.h @@ -96,7 +96,8 @@ struct Stats { uint64_t invalidDestructorCount{0}; int64_t unDestructedItemCount{0}; - std::map>> allocationClassStats; + std::map>> + allocationClassStats; std::vector slabsApproxFreePercentages; @@ -122,7 +123,9 @@ struct Stats { if (FLAGS_report_memory_usage_stats) { for (TierId tid = 0; tid < slabsApproxFreePercentages.size(); tid++) { - out << folly::sformat("tid{:2} free slabs : {:.2f}%", tid, slabsApproxFreePercentages[tid]) << std::endl; + out << folly::sformat("tid{:2} free slabs : {:.2f}%", tid, + slabsApproxFreePercentages[tid]) + << std::endl; } auto formatMemory = [](size_t bytes) -> std::tuple { @@ -142,26 +145,25 @@ struct Stats { }; auto foreachAC = [&](auto cb) { - for (auto &tidStats : allocationClassStats) { - for (auto &pidStat : tidStats.second) { - for (auto &cidStat : pidStat.second) { + for (auto& tidStats : allocationClassStats) { + for (auto& pidStat : tidStats.second) { + for (auto& cidStat : pidStat.second) { cb(tidStats.first, pidStat.first, cidStat.first, cidStat.second); } } } }; - foreachAC([&](auto tid, auto pid, auto cid, auto stats){ + foreachAC([&](auto tid, auto pid, auto cid, auto stats) { auto [allocSizeSuffix, allocSize] = formatMemory(stats.allocSize); auto [memorySizeSuffix, memorySize] = formatMemory(stats.memorySize); - out << folly::sformat("tid{:2} pid{:2} cid{:4} {:8.2f}{} memorySize: {:8.2f}{}", - tid, pid, cid, allocSize, allocSizeSuffix, memorySize, memorySizeSuffix) << std::endl; - }); - - foreachAC([&](auto tid, auto pid, auto cid, auto stats){ - auto [allocSizeSuffix, allocSize] = formatMemory(stats.allocSize); - out << folly::sformat("tid{:2} pid{:2} cid{:4} {:8.2f}{} free: {:4.2f}%", - tid, pid, cid, allocSize, allocSizeSuffix, stats.approxFreePercent) << std::endl; + out << folly::sformat( + "tid{:2} pid{:2} cid{:4} {:8.2f}{} memorySize:{:8.2f}{} " + "free:{:4.2f}% rollingAvgAllocLatency:{:8.2f}ns", + tid, pid, cid, allocSize, allocSizeSuffix, memorySize, + memorySizeSuffix, stats.approxFreePercent, + stats.allocLatencyNs.estimate()) + << std::endl; }); } diff --git a/cachelib/common/RollingStats.h b/cachelib/common/RollingStats.h new file mode 100644 index 0000000000..4d179681ad --- /dev/null +++ b/cachelib/common/RollingStats.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include "cachelib/common/Utils.h" + +namespace facebook { +namespace cachelib { +namespace util { + +class RollingStats { + public: + // track latency by taking the value of duration directly. + void trackValue(double value) { + // This is a highly unlikely scenario where + // cnt_ reaches numerical limits. Skip update + // of the rolling average anymore. + if (cnt_ == std::numeric_limits::max()) { + cnt_ = 0; + return; + } + auto ratio = static_cast(cnt_) / (cnt_ + 1); + avg_ *= ratio; + ++cnt_; + avg_ += value / cnt_; + } + + // Return the rolling average. + double estimate() { return avg_; } + + private: + double avg_{0}; + uint64_t cnt_{0}; +}; + +class RollingLatencyTracker { + public: + explicit RollingLatencyTracker(RollingStats& stats) + : stats_(&stats), begin_(std::chrono::steady_clock::now()) {} + RollingLatencyTracker() {} + ~RollingLatencyTracker() { + if (stats_) { + auto tp = std::chrono::steady_clock::now(); + auto diffNanos = + std::chrono::duration_cast(tp - begin_) + .count(); + stats_->trackValue(static_cast(diffNanos)); + } + } + + RollingLatencyTracker(const RollingLatencyTracker&) = delete; + RollingLatencyTracker& operator=(const RollingLatencyTracker&) = delete; + + RollingLatencyTracker(RollingLatencyTracker&& rhs) noexcept + : stats_(rhs.stats_), begin_(rhs.begin_) { + rhs.stats_ = nullptr; + } + + RollingLatencyTracker& operator=(RollingLatencyTracker&& rhs) noexcept { + if (this != &rhs) { + this->~RollingLatencyTracker(); + new (this) RollingLatencyTracker(std::move(rhs)); + } + return *this; + } + + private: + RollingStats* stats_{nullptr}; + std::chrono::time_point begin_; +}; +} // namespace util +} // namespace cachelib +} // namespace facebook