From 7a6b1008e983198a2ae4f7f3695ebf4a0e6c0c90 Mon Sep 17 00:00:00 2001
From: "Chorazewicz, Igor" <igor.chorazewicz@intel.com>
Date: Tue, 28 Sep 2021 15:11:07 +0200
Subject: [PATCH 1/2] Initial multi-tier support implementation

---
 cachelib/allocator/Cache.cpp                  |   6 +
 cachelib/allocator/Cache.h                    |   9 +-
 cachelib/allocator/CacheAllocator-inl.h       | 411 ++++++++++++------
 cachelib/allocator/CacheAllocator.h           | 105 +++--
 cachelib/allocator/PoolOptimizer.cpp          |   2 +
 cachelib/allocator/memory/MemoryAllocator.h   |   7 +
 cachelib/allocator/memory/Slab.h              |   2 +
 cachelib/allocator/memory/SlabAllocator.h     |  17 +-
 .../allocator/tests/AllocatorResizeTest.h     |   8 +-
 cachelib/allocator/tests/BaseAllocatorTest.h  |   8 +-
 cachelib/allocator/tests/TestBase-inl.h       |   4 +-
 11 files changed, 391 insertions(+), 188 deletions(-)
diff --git a/cachelib/allocator/Cache.cpp b/cachelib/allocator/Cache.cpp
index 0e812fb10e..7f6bfe737c 100644
--- a/cachelib/allocator/Cache.cpp
+++ b/cachelib/allocator/Cache.cpp
@@ -23,6 +23,12 @@
 namespace facebook {
 namespace cachelib {
 
+CacheBase::CacheBase(unsigned numTiers): numTiers_(numTiers) {}
+
+unsigned CacheBase::getNumTiers() const {
+  return numTiers_;
+}
+
 void CacheBase::setRebalanceStrategy(
     PoolId pid, std::shared_ptr<RebalanceStrategy> strategy) {
   std::unique_lock<std::mutex> l(lock_);
diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h
index 02fd706588..88fe53acb0 100644
--- a/cachelib/allocator/Cache.h
+++ b/cachelib/allocator/Cache.h
@@ -56,7 +56,7 @@ enum class RemoveContext { kEviction, kNormal };
 // A base class of cache exposing members and status agnostic of template type.
 class CacheBase {
  public:
-  CacheBase() = default;
+  CacheBase(unsigned numTiers = 1);
   virtual ~CacheBase() = default;
 
   // Movable but not copyable
@@ -65,6 +65,9 @@ class CacheBase {
   CacheBase(CacheBase&&) = default;
   CacheBase& operator=(CacheBase&&) = default;
 
+  // TODO: come up with some reasonable number
+  static constexpr unsigned kMaxTiers = 8;
+
   // Get a string referring to the cache name for this cache
   virtual const std::string getCacheName() const = 0;
 
@@ -253,6 +256,10 @@ class CacheBase {
   // @return The number of slabs that were actually reclaimed (<= numSlabs)
   virtual unsigned int reclaimSlabs(PoolId id, size_t numSlabs) = 0;
 
+  unsigned getNumTiers() const;
+
+  unsigned numTiers_ = 1;
+
   // Protect 'poolRebalanceStragtegies_' and `poolResizeStrategies_`
   // and `poolOptimizeStrategy_`
   mutable std::mutex lock_;
diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h
index f178a0999a..5644b2ce89 100644
--- a/cachelib/allocator/CacheAllocator-inl.h
+++ b/cachelib/allocator/CacheAllocator-inl.h
@@ -17,6 +17,8 @@
 #pragma once
 
 #include "cachelib/allocator/CacheVersion.h"
+#include <folly/Random.h>
+
 #include "cachelib/common/Utils.h"
 
 namespace facebook {
@@ -24,20 +26,15 @@ namespace cachelib {
 
 template <typename CacheTrait>
 CacheAllocator<CacheTrait>::CacheAllocator(Config config)
-    : memoryTierConfigs(config.getMemoryTierConfigs()),
+    : CacheBase(config.getMemoryTierConfigs().size()),
+      memoryTierConfigs(config.getMemoryTierConfigs()),
       isOnShm_{config.memMonitoringEnabled()},
       config_(config.validate()),
       tempShm_(isOnShm_ ? std::make_unique<TempShmMapping>(
                             config_.getCacheSize())
                         : nullptr),
-      allocator_(isOnShm_ ? std::make_unique<MemoryAllocator>(
-                                getAllocatorConfig(config_),
-                                tempShm_->getAddr(),
-                                config_.getCacheSize())
-                          : std::make_unique<MemoryAllocator>(
-                                getAllocatorConfig(config_),
-                                config_.getCacheSize())),
-      compactCacheManager_(std::make_unique<CCacheManager>(*allocator_)),
+      allocator_(createPrivateAllocator()),
+      compactCacheManager_(std::make_unique<CCacheManager>(*allocator_[0] /* TODO */)),
       compressor_(createPtrCompressor()),
       accessContainer_(std::make_unique<AccessContainer>(
           config_.accessConfig,
@@ -50,24 +47,63 @@ CacheAllocator<CacheTrait>::CacheAllocator(Config config)
       chainedItemLocks_(config_.chainedItemsLockPower,
                         std::make_shared<MurmurHash2>()),
       cacheCreationTime_{util::getCurrentTimeSec()} {
-  // TODO(MEMORY_TIER)
-  if (std::holds_alternative<FileShmSegmentOpts>(
+
+  if (numTiers_ > 1 || std::holds_alternative<FileShmSegmentOpts>(
       memoryTierConfigs[0].getShmTypeOpts())) {
     throw std::runtime_error(
-      "Using custom memory tier is only supported for Shared Memory.");
+      "Using custom memory tier or using more than one tier is only "
+      "supported for Shared Memory.");
   }
   initCommon(false);
 }
 
+template <typename CacheTrait>
+std::vector<std::unique_ptr<MemoryAllocator>>
+CacheAllocator<CacheTrait>::createPrivateAllocator() {
+  std::vector<std::unique_ptr<MemoryAllocator>> allocators;
+
+  if (isOnShm_)
+    allocators.emplace_back(std::make_unique<MemoryAllocator>(
+                                getAllocatorConfig(config_),
+                                tempShm_->getAddr(),
+                                config_.size));
+  else
+    allocators.emplace_back(std::make_unique<MemoryAllocator>(
+                                getAllocatorConfig(config_), config_.size));
+
+  return allocators;
+}
+
+template <typename CacheTrait>
+std::vector<std::unique_ptr<MemoryAllocator>>
+CacheAllocator<CacheTrait>::createAllocators() {
+  std::vector<std::unique_ptr<MemoryAllocator>> allocators;
+  for (int tid = 0; tid < numTiers_; tid++) {
+    allocators.emplace_back(createNewMemoryAllocator(tid));
+  }
+  return allocators;
+}
+
+template <typename CacheTrait>
+std::vector<std::unique_ptr<MemoryAllocator>>
+CacheAllocator<CacheTrait>::restoreAllocators() {
+  std::vector<std::unique_ptr<MemoryAllocator>> allocators;
+  for (int tid = 0; tid < numTiers_; tid++) {
+    allocators.emplace_back(restoreMemoryAllocator(tid));
+  }
+  return allocators;
+}
+
 template <typename CacheTrait>
 CacheAllocator<CacheTrait>::CacheAllocator(SharedMemNewT, Config config)
-    : memoryTierConfigs(config.getMemoryTierConfigs()),
+    : CacheBase(config.getMemoryTierConfigs().size()),
+      memoryTierConfigs(config.getMemoryTierConfigs()),
       isOnShm_{true},
       config_(config.validate()),
       shmManager_(
           std::make_unique<ShmManager>(config_.cacheDir, config_.isUsingPosixShm())),
-      allocator_(createNewMemoryAllocator()),
-      compactCacheManager_(std::make_unique<CCacheManager>(*allocator_)),
+      allocator_(createAllocators()),
+      compactCacheManager_(std::make_unique<CCacheManager>(*allocator_[0] /* TODO */)),
       compressor_(createPtrCompressor()),
       accessContainer_(std::make_unique<AccessContainer>(
           config_.accessConfig,
@@ -103,15 +139,16 @@ CacheAllocator<CacheTrait>::CacheAllocator(SharedMemNewT, Config config)
 
 template <typename CacheTrait>
 CacheAllocator<CacheTrait>::CacheAllocator(SharedMemAttachT, Config config)
-    : memoryTierConfigs(config.getMemoryTierConfigs()),
+    : CacheBase(config.getMemoryTierConfigs().size()),
+      memoryTierConfigs(config.getMemoryTierConfigs()),
       isOnShm_{true},
       config_(config.validate()),
       shmManager_(
           std::make_unique<ShmManager>(config_.cacheDir, config_.usePosixShm)),
       deserializer_(createDeserializer()),
       metadata_{deserializeCacheAllocatorMetadata(*deserializer_)},
-      allocator_(restoreMemoryAllocator()),
-      compactCacheManager_(restoreCCacheManager()),
+      allocator_(restoreAllocators()),
+      compactCacheManager_(restoreCCacheManager(0 /* TODO - per tier */)),
       compressor_(createPtrCompressor()),
       mmContainers_(deserializeMMContainers(*deserializer_, compressor_)),
       accessContainer_(std::make_unique<AccessContainer>(
@@ -131,6 +168,7 @@ CacheAllocator<CacheTrait>::CacheAllocator(SharedMemAttachT, Config config)
       chainedItemLocks_(config_.chainedItemsLockPower,
                         std::make_shared<MurmurHash2>()),
       cacheCreationTime_{*metadata_.cacheCreationTime_ref()} {
+  /* TODO - per tier? */
   for (auto pid : *metadata_.compactCachePools_ref()) {
     isCompactCachePool_[pid] = true;
   }
@@ -155,48 +193,45 @@ CacheAllocator<CacheTrait>::~CacheAllocator() {
 }
 
 template <typename CacheTrait>
-ShmSegmentOpts CacheAllocator<CacheTrait>::createShmCacheOpts() {
-  if (memoryTierConfigs.size() > 1) {
-    throw std::invalid_argument("CacheLib only supports a single memory tier");
-  }
-
+ShmSegmentOpts CacheAllocator<CacheTrait>::createShmCacheOpts(TierId tid) {
   ShmSegmentOpts opts;
   opts.alignment = sizeof(Slab);
-  opts.typeOpts = memoryTierConfigs[0].getShmTypeOpts();
+  opts.typeOpts = memoryTierConfigs[tid].getShmTypeOpts();
 
   return opts;
 }
 
 template <typename CacheTrait>
 std::unique_ptr<MemoryAllocator>
-CacheAllocator<CacheTrait>::createNewMemoryAllocator() {
+CacheAllocator<CacheTrait>::createNewMemoryAllocator(TierId tid) {
   return std::make_unique<MemoryAllocator>(
       getAllocatorConfig(config_),
       shmManager_
-          ->createShm(detail::kShmCacheName, config_.getCacheSize(),
-                      config_.slabMemoryBaseAddr, createShmCacheOpts())
+          ->createShm(detail::kShmCacheName + std::to_string(tid),
+                      config_.getCacheSize(), config_.slabMemoryBaseAddr,
+                      createShmCacheOpts(tid))
           .addr,
       config_.getCacheSize());
 }
 
 template <typename CacheTrait>
 std::unique_ptr<MemoryAllocator>
-CacheAllocator<CacheTrait>::restoreMemoryAllocator() {
+CacheAllocator<CacheTrait>::restoreMemoryAllocator(TierId tid) {
   return std::make_unique<MemoryAllocator>(
       deserializer_->deserialize<MemoryAllocator::SerializationType>(),
       shmManager_
-          ->attachShm(detail::kShmCacheName, config_.slabMemoryBaseAddr,
-          createShmCacheOpts()).addr,
+          ->attachShm(detail::kShmCacheName + std::to_string(tid),
+            config_.slabMemoryBaseAddr, createShmCacheOpts(tid)).addr,
       config_.getCacheSize(),
       config_.disableFullCoredump);
 }
 
 template <typename CacheTrait>
 std::unique_ptr<CCacheManager>
-CacheAllocator<CacheTrait>::restoreCCacheManager() {
+CacheAllocator<CacheTrait>::restoreCCacheManager(TierId tid) {
   return std::make_unique<CCacheManager>(
       deserializer_->deserialize<CCacheManager::SerializationType>(),
-      *allocator_);
+      *allocator_[tid]);
 }
 
 template <typename CacheTrait>
@@ -311,7 +346,8 @@ CacheAllocator<CacheTrait>::allocate(PoolId poolId,
 
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::ItemHandle
-CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
+CacheAllocator<CacheTrait>::allocateInternalTier(TierId tid,
+                                             PoolId pid,
                                              typename Item::Key key,
                                              uint32_t size,
                                              uint32_t creationTime,
@@ -324,13 +360,16 @@ CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
   const auto requiredSize = Item::getRequiredSize(key, size);
 
   // the allocation class in our memory allocator.
-  const auto cid = allocator_->getAllocationClassId(pid, requiredSize);
+  const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize);
 
+  // TODO: per-tier
   (*stats_.allocAttempts)[pid][cid].inc();
 
-  void* memory = allocator_->allocate(pid, requiredSize);
+  void* memory = allocator_[tid]->allocate(pid, requiredSize);
+  // TODO: Today disableEviction means do not evict from memory (DRAM).
+  //       Should we support eviction between memory tiers (e.g. from DRAM to PMEM)?
   if (memory == nullptr && !config_.disableEviction) {
-    memory = findEviction(pid, cid);
+    memory = findEviction(tid, pid, cid);
   }
 
   ItemHandle handle;
@@ -341,7 +380,7 @@ CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
     // for example.
     SCOPE_FAIL {
       // free back the memory to the allocator since we failed.
-      allocator_->free(memory);
+      allocator_[tid]->free(memory);
     };
 
     handle = acquire(new (memory) Item(key, size, creationTime, expiryTime));
@@ -352,7 +391,7 @@ CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
     }
 
   } else { // failed to allocate memory.
-    (*stats_.allocFailures)[pid][cid].inc();
+    (*stats_.allocFailures)[pid][cid].inc(); // TODO: per-tier
     // wake up rebalancer
     if (poolRebalancer_) {
       poolRebalancer_->wakeUp();
@@ -369,6 +408,21 @@ CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
   return handle;
 }
 
+template <typename CacheTrait>
+typename CacheAllocator<CacheTrait>::ItemHandle
+CacheAllocator<CacheTrait>::allocateInternal(PoolId pid,
+                                             typename Item::Key key,
+                                             uint32_t size,
+                                             uint32_t creationTime,
+                                             uint32_t expiryTime) {
+  auto tid = 0; /* TODO: consult admission policy */
+  for(TierId tid = 0; tid < numTiers_; ++tid) {
+    auto handle = allocateInternalTier(tid, pid, key, size, creationTime, expiryTime);
+    if (handle) return handle;
+  }
+  return {};
+}
+
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::ItemHandle
 CacheAllocator<CacheTrait>::allocateChainedItem(const ItemHandle& parent,
@@ -399,21 +453,26 @@ CacheAllocator<CacheTrait>::allocateChainedItemInternal(
   // number of bytes required for this item
   const auto requiredSize = ChainedItem::getRequiredSize(size);
 
-  const auto pid = allocator_->getAllocInfo(parent->getMemory()).poolId;
-  const auto cid = allocator_->getAllocationClassId(pid, requiredSize);
+  // TODO: is this correct?
+  auto tid = getTierId(*parent);
+
+  const auto pid = allocator_[tid]->getAllocInfo(parent->getMemory()).poolId;
+  const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize);
 
+  // TODO: per-tier? Right now stats_ are not used in any public periodic
+  // worker
   (*stats_.allocAttempts)[pid][cid].inc();
 
-  void* memory = allocator_->allocate(pid, requiredSize);
+  void* memory = allocator_[tid]->allocate(pid, requiredSize);
   if (memory == nullptr) {
-    memory = findEviction(pid, cid);
+    memory = findEviction(tid, pid, cid);
   }
   if (memory == nullptr) {
     (*stats_.allocFailures)[pid][cid].inc();
     return ItemHandle{};
   }
 
-  SCOPE_FAIL { allocator_->free(memory); };
+  SCOPE_FAIL { allocator_[tid]->free(memory); };
 
   auto child = acquire(new (memory) ChainedItem(
       compressor_.compress(parent.get()), size, util::getCurrentTimeSec()));
@@ -721,8 +780,8 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
     throw std::runtime_error(
         folly::sformat("cannot release this item: {}", it.toString()));
   }
-
-  const auto allocInfo = allocator_->getAllocInfo(it.getMemory());
+  const auto tid = getTierId(it);
+  const auto allocInfo = allocator_[tid]->getAllocInfo(it.getMemory());
 
   if (ctx == RemoveContext::kEviction) {
     const auto timeNow = util::getCurrentTimeSec();
@@ -746,8 +805,7 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
           folly::sformat("Can not recycle a chained item {}, toRecyle",
                          it.toString(), toRecycle->toString()));
     }
-
-    allocator_->free(&it);
+    allocator_[tid]->free(&it);
     return ReleaseRes::kReleased;
   }
 
@@ -790,7 +848,7 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
       auto next = head->getNext(compressor_);
 
       const auto childInfo =
-          allocator_->getAllocInfo(static_cast<const void*>(head));
+          allocator_[tid]->getAllocInfo(static_cast<const void*>(head));
       (*stats_.fragmentationSize)[childInfo.poolId][childInfo.classId].sub(
           util::getFragmentation(*this, *head));
 
@@ -823,7 +881,7 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
           XDCHECK(ReleaseRes::kReleased != res);
           res = ReleaseRes::kRecycled;
         } else {
-          allocator_->free(head);
+          allocator_[tid]->free(head);
         }
       }
 
@@ -838,7 +896,7 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
     res = ReleaseRes::kRecycled;
   } else {
     XDCHECK(it.isDrained());
-    allocator_->free(&it);
+    allocator_[tid]->free(&it);
   }
 
   return res;
@@ -1186,8 +1244,8 @@ bool CacheAllocator<CacheTrait>::moveChainedItem(ChainedItem& oldItem,
 
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::Item*
-CacheAllocator<CacheTrait>::findEviction(PoolId pid, ClassId cid) {
-  auto& mmContainer = getMMContainer(pid, cid);
+CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
+  auto& mmContainer = getMMContainer(tid, pid, cid);
 
   // Keep searching for a candidate until we were able to evict it
   // or until the search limit has been exhausted
@@ -1204,8 +1262,8 @@ CacheAllocator<CacheTrait>::findEviction(PoolId pid, ClassId cid) {
     // recycles the child we intend to.
     auto toReleaseHandle =
         itr->isChainedItem()
-            ? advanceIteratorAndTryEvictChainedItem(itr)
-            : advanceIteratorAndTryEvictRegularItem(mmContainer, itr);
+            ? advanceIteratorAndTryEvictChainedItem(tid, pid, itr)
+            : advanceIteratorAndTryEvictRegularItem(tid, pid, mmContainer, itr);
 
     if (toReleaseHandle) {
       if (toReleaseHandle->hasChainedItem()) {
@@ -1301,10 +1359,9 @@ bool CacheAllocator<CacheTrait>::shouldWriteToNvmCacheExclusive(
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::ItemHandle
 CacheAllocator<CacheTrait>::advanceIteratorAndTryEvictRegularItem(
-    MMContainer& mmContainer, EvictionIterator& itr) {
-  // we should flush this to nvmcache if it is not already present in nvmcache
-  // and the item is not expired.
+    TierId tid, PoolId pid, MMContainer& mmContainer, EvictionIterator& itr) {
   Item& item = *itr;
+
   const bool evictToNvmCache = shouldWriteToNvmCache(item);
 
   auto token = evictToNvmCache ? nvmCache_->createPutToken(item.getKey())
@@ -1367,7 +1424,7 @@ CacheAllocator<CacheTrait>::advanceIteratorAndTryEvictRegularItem(
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::ItemHandle
 CacheAllocator<CacheTrait>::advanceIteratorAndTryEvictChainedItem(
-    EvictionIterator& itr) {
+    TierId tid, PoolId pid, EvictionIterator& itr) {
   XDCHECK(itr->isChainedItem());
 
   ChainedItem* candidate = &itr->asChainedItem();
@@ -1418,6 +1475,8 @@ CacheAllocator<CacheTrait>::advanceIteratorAndTryEvictChainedItem(
   XDCHECK(!parent.isInMMContainer());
   XDCHECK(!parent.isAccessible());
 
+  // TODO: add multi-tier support (similar as for unchained items)
+
   // We need to make sure the parent is not marked as moving
   // and we're the only holder of the parent item. Safe to destroy the handle
   // here since moving bit is set.
@@ -1609,21 +1668,41 @@ void CacheAllocator<CacheTrait>::invalidateNvm(Item& item) {
   }
 }
 
+template <typename CacheTrait>
+TierId
+CacheAllocator<CacheTrait>::getTierId(const Item& item) const {
+  return getTierId(item.getMemory());
+}
+
+template <typename CacheTrait>
+TierId
+CacheAllocator<CacheTrait>::getTierId(const void* ptr) const {
+  for (TierId tid = 0; tid < numTiers_; tid++) {
+    if (allocator_[tid]->isMemoryInAllocator(ptr))
+      return tid;
+  }
+
+  throw std::invalid_argument("Item does not belong to any tier!");
+}
+
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::MMContainer&
 CacheAllocator<CacheTrait>::getMMContainer(const Item& item) const noexcept {
+  const auto tid = getTierId(item);
   const auto allocInfo =
-      allocator_->getAllocInfo(static_cast<const void*>(&item));
-  return getMMContainer(allocInfo.poolId, allocInfo.classId);
+      allocator_[tid]->getAllocInfo(static_cast<const void*>(&item));
+  return getMMContainer(tid, allocInfo.poolId, allocInfo.classId);
 }
 
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::MMContainer&
-CacheAllocator<CacheTrait>::getMMContainer(PoolId pid,
+CacheAllocator<CacheTrait>::getMMContainer(TierId tid,
+                                           PoolId pid,
                                            ClassId cid) const noexcept {
-  XDCHECK_LT(static_cast<size_t>(pid), mmContainers_.size());
-  XDCHECK_LT(static_cast<size_t>(cid), mmContainers_[pid].size());
-  return *mmContainers_[pid][cid];
+  XDCHECK_LT(static_cast<size_t>(tid), mmContainers_.size());
+  XDCHECK_LT(static_cast<size_t>(pid), mmContainers_[tid].size());
+  XDCHECK_LT(static_cast<size_t>(cid), mmContainers_[tid][pid].size());
+  return *mmContainers_[tid][pid][cid];
 }
 
 template <typename CacheTrait>
@@ -1747,8 +1826,9 @@ void CacheAllocator<CacheTrait>::markUseful(const ItemHandle& handle,
 template <typename CacheTrait>
 void CacheAllocator<CacheTrait>::recordAccessInMMContainer(Item& item,
                                                            AccessMode mode) {
+  const auto tid = getTierId(item);
   const auto allocInfo =
-      allocator_->getAllocInfo(static_cast<const void*>(&item));
+      allocator_[tid]->getAllocInfo(static_cast<const void*>(&item));
   (*stats_.cacheHits)[allocInfo.poolId][allocInfo.classId].inc();
 
   // track recently accessed items if needed
@@ -1756,14 +1836,15 @@ void CacheAllocator<CacheTrait>::recordAccessInMMContainer(Item& item,
     ring_->trackItem(reinterpret_cast<uintptr_t>(&item), item.getSize());
   }
 
-  auto& mmContainer = getMMContainer(allocInfo.poolId, allocInfo.classId);
+  auto& mmContainer = getMMContainer(tid, allocInfo.poolId, allocInfo.classId);
   mmContainer.recordAccess(item, mode);
 }
 
 template <typename CacheTrait>
 uint32_t CacheAllocator<CacheTrait>::getUsableSize(const Item& item) const {
+  const auto tid = getTierId(item);
   const auto allocSize =
-      allocator_->getAllocInfo(static_cast<const void*>(&item)).allocSize;
+      allocator_[tid]->getAllocInfo(static_cast<const void*>(&item)).allocSize;
   return item.isChainedItem()
              ? allocSize - ChainedItem::getRequiredSize(0)
              : allocSize - Item::getRequiredSize(item.getKey(), 0);
@@ -1772,8 +1853,11 @@ uint32_t CacheAllocator<CacheTrait>::getUsableSize(const Item& item) const {
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::ItemHandle
 CacheAllocator<CacheTrait>::getSampleItem() {
+  // TODO: is using random tier a good idea?
+  auto tid = folly::Random::rand32() % numTiers_;
+
   const auto* item =
-      reinterpret_cast<const Item*>(allocator_->getRandomAlloc());
+      reinterpret_cast<const Item*>(allocator_[tid]->getRandomAlloc());
   if (!item) {
     return ItemHandle{};
   }
@@ -1788,26 +1872,34 @@ CacheAllocator<CacheTrait>::getSampleItem() {
 
 template <typename CacheTrait>
 std::vector<std::string> CacheAllocator<CacheTrait>::dumpEvictionIterator(
-    PoolId pid, ClassId cid, size_t numItems) {
+  PoolId pid, ClassId cid, size_t numItems) {
   if (numItems == 0) {
     return {};
   }
 
-  if (static_cast<size_t>(pid) >= mmContainers_.size() ||
-      static_cast<size_t>(cid) >= mmContainers_[pid].size()) {
+  // Always evict from the lowest layer.
+  int tid = numTiers_ - 1;
+
+  if (static_cast<size_t>(tid) >= mmContainers_.size() ||
+      static_cast<size_t>(pid) >= mmContainers_[tid].size() ||
+      static_cast<size_t>(cid) >= mmContainers_[tid][pid].size()) {
     throw std::invalid_argument(
-        folly::sformat("Invalid PoolId: {} and ClassId: {}.", pid, cid));
+        folly::sformat("Invalid TierId: {} and PoolId: {} and ClassId: {}.", tid, pid, cid));
   }
 
   std::vector<std::string> content;
 
-  auto& mm = *mmContainers_[pid][cid];
-  auto evictItr = mm.getEvictionIterator();
   size_t i = 0;
-  while (evictItr && i < numItems) {
-    content.push_back(evictItr->toString());
-    ++evictItr;
-    ++i;
+  while (i < numItems && tid >= 0) {
+    auto& mm = *mmContainers_[tid][pid][cid];
+    auto evictItr = mm.getEvictionIterator();
+    while (evictItr && i < numItems) {
+      content.push_back(evictItr->toString());
+      ++evictItr;
+      ++i;
+    }
+
+    --tid;
   }
 
   return content;
@@ -1985,19 +2077,31 @@ PoolId CacheAllocator<CacheTrait>::addPool(
     std::shared_ptr<RebalanceStrategy> resizeStrategy,
     bool ensureProvisionable) {
   folly::SharedMutex::WriteHolder w(poolsResizeAndRebalanceLock_);
-  auto pid = allocator_->addPool(name, size, allocSizes, ensureProvisionable);
+
+  PoolId pid = 0;
+  auto tierConfigs = config_.getMemoryTierConfigs();
+  for (TierId tid = 0; tid < numTiers_; tid++) {
+    auto tierSizeRatio = static_cast<double>(
+        tierConfigs[tid].getSize()) / config_.getCacheSize();
+    auto tierPoolSize = static_cast<size_t>(tierSizeRatio * size);
+    auto res = allocator_[tid]->addPool(name, tierPoolSize, allocSizes, ensureProvisionable);
+    XDCHECK(tid == 0 || res == pid);
+    pid = res;
+  }
+
   createMMContainers(pid, std::move(config));
   setRebalanceStrategy(pid, std::move(rebalanceStrategy));
   setResizeStrategy(pid, std::move(resizeStrategy));
+
   return pid;
 }
 
 template <typename CacheTrait>
 void CacheAllocator<CacheTrait>::overridePoolRebalanceStrategy(
     PoolId pid, std::shared_ptr<RebalanceStrategy> rebalanceStrategy) {
-  if (static_cast<size_t>(pid) >= mmContainers_.size()) {
+  if (static_cast<size_t>(pid) >= mmContainers_[0].size()) {
     throw std::invalid_argument(folly::sformat(
-        "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size()));
+        "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[0].size()));
   }
   setRebalanceStrategy(pid, std::move(rebalanceStrategy));
 }
@@ -2005,9 +2109,9 @@ void CacheAllocator<CacheTrait>::overridePoolRebalanceStrategy(
 template <typename CacheTrait>
 void CacheAllocator<CacheTrait>::overridePoolResizeStrategy(
     PoolId pid, std::shared_ptr<RebalanceStrategy> resizeStrategy) {
-  if (static_cast<size_t>(pid) >= mmContainers_.size()) {
+  if (static_cast<size_t>(pid) >= mmContainers_[0].size()) {
     throw std::invalid_argument(folly::sformat(
-        "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size()));
+        "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[0].size()));
   }
   setResizeStrategy(pid, std::move(resizeStrategy));
 }
@@ -2019,14 +2123,14 @@ void CacheAllocator<CacheTrait>::overridePoolOptimizeStrategy(
 }
 
 template <typename CacheTrait>
-void CacheAllocator<CacheTrait>::overridePoolConfig(PoolId pid,
+void CacheAllocator<CacheTrait>::overridePoolConfig(TierId tid, PoolId pid,
                                                     const MMConfig& config) {
-  if (static_cast<size_t>(pid) >= mmContainers_.size()) {
+  // TODO: add generic tier id checking
+  if (static_cast<size_t>(pid) >= mmContainers_[tid].size()) {
     throw std::invalid_argument(folly::sformat(
-        "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_.size()));
+        "Invalid PoolId: {}, size of pools: {}", pid, mmContainers_[tid].size()));
   }
-
-  auto& pool = allocator_->getPool(pid);
+  auto& pool = allocator_[tid]->getPool(pid);
   for (unsigned int cid = 0; cid < pool.getNumClassId(); ++cid) {
     MMConfig mmConfig = config;
     mmConfig.addExtraConfig(
@@ -2034,30 +2138,36 @@ void CacheAllocator<CacheTrait>::overridePoolConfig(PoolId pid,
             ? pool.getAllocationClass(static_cast<ClassId>(cid))
                   .getAllocsPerSlab()
             : 0);
-    DCHECK_NOTNULL(mmContainers_[pid][cid].get());
+    DCHECK_NOTNULL(mmContainers_[tid][pid][cid].get());
 
-    mmContainers_[pid][cid]->setConfig(mmConfig);
+    mmContainers_[tid][pid][cid]->setConfig(mmConfig);
   }
 }
 
 template <typename CacheTrait>
 void CacheAllocator<CacheTrait>::createMMContainers(const PoolId pid,
                                                     MMConfig config) {
-  auto& pool = allocator_->getPool(pid);
+  // pools on each layer should have the same number of class id, etc.
+  // TODO: think about deduplication
+  auto& pool = allocator_[0]->getPool(pid);
+
   for (unsigned int cid = 0; cid < pool.getNumClassId(); ++cid) {
     config.addExtraConfig(
         config_.trackTailHits
             ? pool.getAllocationClass(static_cast<ClassId>(cid))
                   .getAllocsPerSlab()
             : 0);
-    mmContainers_[pid][cid].reset(new MMContainer(config, compressor_));
+    for (TierId tid = 0; tid < numTiers_; tid++) {
+      mmContainers_[tid][pid][cid].reset(new MMContainer(config, compressor_));
+    }
   }
 }
 
 template <typename CacheTrait>
 PoolId CacheAllocator<CacheTrait>::getPoolId(
     folly::StringPiece name) const noexcept {
-  return allocator_->getPoolId(name.str());
+  // each tier has the same pools
+  return allocator_[0]->getPoolId(name.str());
 }
 
 // The Function returns a consolidated vector of Release Slab
@@ -2100,7 +2210,9 @@ std::set<PoolId> CacheAllocator<CacheTrait>::filterCompactCachePools(
 template <typename CacheTrait>
 std::set<PoolId> CacheAllocator<CacheTrait>::getRegularPoolIds() const {
   folly::SharedMutex::ReadHolder r(poolsResizeAndRebalanceLock_);
-  return filterCompactCachePools(allocator_->getPoolIds());
+  // TODO - get rid of the duplication - right now, each tier
+  // holds pool objects with mostly the same info
+  return filterCompactCachePools(allocator_[0]->getPoolIds());
 }
 
 template <typename CacheTrait>
@@ -2125,10 +2237,9 @@ std::set<PoolId> CacheAllocator<CacheTrait>::getRegularPoolIdsForResize()
   // getAdvisedMemorySize - then pools may be overLimit even when
   // all slabs are not allocated. Otherwise, pools may be overLimit
   // only after all slabs are allocated.
-  //
-  return (allocator_->allSlabsAllocated()) ||
-                 (allocator_->getAdvisedMemorySize() != 0)
-             ? filterCompactCachePools(allocator_->getPoolsOverLimit())
+  return (allocator_[currentTier()]->allSlabsAllocated()) ||
+                 (allocator_[currentTier()]->getAdvisedMemorySize() != 0)
+             ? filterCompactCachePools(allocator_[currentTier()]->getPoolsOverLimit())
              : std::set<PoolId>{};
 }
 
@@ -2139,7 +2250,7 @@ const std::string CacheAllocator<CacheTrait>::getCacheName() const {
 
 template <typename CacheTrait>
 PoolStats CacheAllocator<CacheTrait>::getPoolStats(PoolId poolId) const {
-  const auto& pool = allocator_->getPool(poolId);
+  const auto& pool = allocator_[currentTier()]->getPool(poolId);
   const auto& allocSizes = pool.getAllocSizes();
   auto mpStats = pool.getStats();
   const auto& classIds = mpStats.classIds;
@@ -2157,7 +2268,7 @@ PoolStats CacheAllocator<CacheTrait>::getPoolStats(PoolId poolId) const {
   // TODO export evictions, numItems etc from compact cache directly.
   if (!isCompactCache) {
     for (const ClassId cid : classIds) {
-      const auto& container = getMMContainer(poolId, cid);
+      const auto& container = getMMContainer(currentTier(), poolId, cid);
       uint64_t classHits = (*stats_.cacheHits)[poolId][cid].get();
       cacheStats.insert(
           {cid,
@@ -2173,7 +2284,7 @@ PoolStats CacheAllocator<CacheTrait>::getPoolStats(PoolId poolId) const {
 
   PoolStats ret;
   ret.isCompactCache = isCompactCache;
-  ret.poolName = allocator_->getPoolName(poolId);
+  ret.poolName = allocator_[currentTier()]->getPoolName(poolId);
   ret.poolSize = pool.getPoolSize();
   ret.poolUsableSize = pool.getPoolUsableSize();
   ret.poolAdvisedSize = pool.getPoolAdvisedSize();
@@ -2189,18 +2300,16 @@ template <typename CacheTrait>
 PoolEvictionAgeStats CacheAllocator<CacheTrait>::getPoolEvictionAgeStats(
     PoolId pid, unsigned int slabProjectionLength) const {
   PoolEvictionAgeStats stats;
-
-  const auto& pool = allocator_->getPool(pid);
+  const auto& pool = allocator_[currentTier()]->getPool(pid);
   const auto& allocSizes = pool.getAllocSizes();
   for (ClassId cid = 0; cid < static_cast<ClassId>(allocSizes.size()); ++cid) {
-    auto& mmContainer = getMMContainer(pid, cid);
+    auto& mmContainer = getMMContainer(currentTier(), pid, cid);
     const auto numItemsPerSlab =
-        allocator_->getPool(pid).getAllocationClass(cid).getAllocsPerSlab();
+        allocator_[currentTier()]->getPool(pid).getAllocationClass(cid).getAllocsPerSlab();
     const auto projectionLength = numItemsPerSlab * slabProjectionLength;
     stats.classEvictionAgeStats[cid] =
         mmContainer.getEvictionAgeStat(projectionLength);
   }
-
   return stats;
 }
 
@@ -2239,7 +2348,7 @@ void CacheAllocator<CacheTrait>::releaseSlab(PoolId pid,
   }
 
   try {
-    auto releaseContext = allocator_->startSlabRelease(
+    auto releaseContext = allocator_[currentTier()]->startSlabRelease(
         pid, victim, receiver, mode, hint,
         [this]() -> bool { return shutDownInProgress_; });
 
@@ -2248,15 +2357,15 @@ void CacheAllocator<CacheTrait>::releaseSlab(PoolId pid,
       return;
     }
 
-    releaseSlabImpl(releaseContext);
-    if (!allocator_->allAllocsFreed(releaseContext)) {
+    releaseSlabImpl(currentTier(), releaseContext);
+    if (!allocator_[currentTier()]->allAllocsFreed(releaseContext)) {
       throw std::runtime_error(
           folly::sformat("Was not able to free all allocs. PoolId: {}, AC: {}",
                          releaseContext.getPoolId(),
                          releaseContext.getClassId()));
     }
 
-    allocator_->completeSlabRelease(releaseContext);
+    allocator_[currentTier()]->completeSlabRelease(releaseContext);
   } catch (const exception::SlabReleaseAborted& e) {
     stats_.numAbortedSlabReleases.inc();
     throw exception::SlabReleaseAborted(folly::sformat(
@@ -2267,8 +2376,7 @@ void CacheAllocator<CacheTrait>::releaseSlab(PoolId pid,
 }
 
 template <typename CacheTrait>
-SlabReleaseStats CacheAllocator<CacheTrait>::getSlabReleaseStats()
-    const noexcept {
+SlabReleaseStats CacheAllocator<CacheTrait>::getSlabReleaseStats() const noexcept {
   std::lock_guard<std::mutex> l(workersMutex_);
   return SlabReleaseStats{stats_.numActiveSlabReleases.get(),
                           stats_.numReleasedForRebalance.get(),
@@ -2285,7 +2393,7 @@ SlabReleaseStats CacheAllocator<CacheTrait>::getSlabReleaseStats()
 }
 
 template <typename CacheTrait>
-void CacheAllocator<CacheTrait>::releaseSlabImpl(
+void CacheAllocator<CacheTrait>::releaseSlabImpl(TierId tid,
     const SlabReleaseContext& releaseContext) {
   util::Throttler throttler(config_.throttleConfig);
 
@@ -2313,7 +2421,7 @@ void CacheAllocator<CacheTrait>::releaseSlabImpl(
     if (!isMoved) {
       evictForSlabRelease(releaseContext, item, throttler);
     }
-    XDCHECK(allocator_->isAllocFreed(releaseContext, alloc));
+    XDCHECK(allocator_[tid]->isAllocFreed(releaseContext, alloc));
   }
 }
 
@@ -2393,8 +2501,11 @@ bool CacheAllocator<CacheTrait>::moveForSlabRelease(
             ctx.getPoolId(), ctx.getClassId());
     });
   }
-  const auto allocInfo = allocator_->getAllocInfo(oldItem.getMemory());
-  allocator_->free(&oldItem);
+
+  auto tid = getTierId(oldItem);
+
+  const auto allocInfo = allocator_[tid]->getAllocInfo(oldItem.getMemory());
+  allocator_[tid]->free(&oldItem);
 
   (*stats_.fragmentationSize)[allocInfo.poolId][allocInfo.classId].sub(
       util::getFragmentation(*this, oldItem));
@@ -2456,11 +2567,12 @@ CacheAllocator<CacheTrait>::allocateNewItemForOldItem(const Item& oldItem) {
   }
 
   const auto allocInfo =
-      allocator_->getAllocInfo(static_cast<const void*>(&oldItem));
+      allocator_[getTierId(oldItem)]->getAllocInfo(static_cast<const void*>(&oldItem));
 
   // Set up the destination for the move. Since oldItem would have the moving
   // bit set, it won't be picked for eviction.
-  auto newItemHdl = allocateInternal(allocInfo.poolId,
+  auto newItemHdl = allocateInternalTier(getTierId(oldItem),
+                                     allocInfo.poolId,
                                      oldItem.getKey(),
                                      oldItem.getSize(),
                                      oldItem.getCreationTime(),
@@ -2545,7 +2657,7 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(
     // last handle for the owner.
     if (owningHandle) {
       const auto allocInfo =
-          allocator_->getAllocInfo(static_cast<const void*>(&item));
+          allocator_[getTierId(item)]->getAllocInfo(static_cast<const void*>(&item));
       if (owningHandle->hasChainedItem()) {
         (*stats_.chainedItemEvictions)[allocInfo.poolId][allocInfo.classId]
             .inc();
@@ -2572,7 +2684,7 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(
 
     if (shutDownInProgress_) {
       item.unmarkMoving();
-      allocator_->abortSlabRelease(ctx);
+      allocator_[getTierId(item)]->abortSlabRelease(ctx);
       throw exception::SlabReleaseAborted(
           folly::sformat("Slab Release aborted while trying to evict"
                          " Item: {} Pool: {}, Class: {}.",
@@ -2754,6 +2866,7 @@ bool CacheAllocator<CacheTrait>::removeIfExpired(const ItemHandle& handle) {
 template <typename CacheTrait>
 bool CacheAllocator<CacheTrait>::markMovingForSlabRelease(
     const SlabReleaseContext& ctx, void* alloc, util::Throttler& throttler) {
+
   // MemoryAllocator::processAllocForRelease will execute the callback
   // if the item is not already free. So there are three outcomes here:
   //  1. Item not freed yet and marked as moving
@@ -2767,18 +2880,20 @@ bool CacheAllocator<CacheTrait>::markMovingForSlabRelease(
   // At first, we assume this item was already freed
   bool itemFreed = true;
   bool markedMoving = false;
-  const auto fn = [&markedMoving, &itemFreed](void* memory) {
+  TierId tid = 0;
+  const auto fn = [&markedMoving, &itemFreed, &tid, this /* TODO - necessary for getTierId */](void* memory) {
     // Since this callback is executed, the item is not yet freed
     itemFreed = false;
     Item* item = static_cast<Item*>(memory);
     if (item->markMoving()) {
       markedMoving = true;
     }
+    tid = getTierId(*item);
   };
 
   auto startTime = util::getCurrentTimeSec();
   while (true) {
-    allocator_->processAllocForRelease(ctx, alloc, fn);
+    allocator_[tid]->processAllocForRelease(ctx, alloc, fn);
 
     // If item is already freed we give up trying to mark the item moving
     // and return false, otherwise if marked as moving, we return true.
@@ -2794,7 +2909,7 @@ bool CacheAllocator<CacheTrait>::markMovingForSlabRelease(
 
     if (shutDownInProgress_) {
       XDCHECK(!static_cast<Item*>(alloc)->isMoving());
-      allocator_->abortSlabRelease(ctx);
+      allocator_[tid]->abortSlabRelease(ctx);
       throw exception::SlabReleaseAborted(
           folly::sformat("Slab Release aborted while still trying to mark"
                          " as moving for Item: {}. Pool: {}, Class: {}.",
@@ -2817,12 +2932,15 @@ template <typename CCacheT, typename... Args>
 CCacheT* CacheAllocator<CacheTrait>::addCompactCache(folly::StringPiece name,
                                                      size_t size,
                                                      Args&&... args) {
+  if (numTiers_ != 1)
+    throw std::runtime_error("TODO: compact cache for multi-tier Cache not supported.");
+
   if (!config_.isCompactCacheEnabled()) {
     throw std::logic_error("Compact cache is not enabled");
   }
 
   folly::SharedMutex::WriteHolder lock(compactCachePoolsLock_);
-  auto poolId = allocator_->addPool(name, size, {Slab::kSize});
+  auto poolId = allocator_[0]->addPool(name, size, {Slab::kSize});
   isCompactCachePool_[poolId] = true;
 
   auto ptr = std::make_unique<CCacheT>(
@@ -2932,12 +3050,13 @@ folly::IOBufQueue CacheAllocator<CacheTrait>::saveStateToIOBuf() {
   *metadata_.numChainedChildItems_ref() = stats_.numChainedChildItems.get();
   *metadata_.numAbortedSlabReleases_ref() = stats_.numAbortedSlabReleases.get();
 
+  // TODO: implement serialization for multiple tiers
   auto serializeMMContainers = [](MMContainers& mmContainers) {
     MMSerializationTypeContainer state;
-    for (unsigned int i = 0; i < mmContainers.size(); ++i) {
-      for (unsigned int j = 0; j < mmContainers[i].size(); ++j) {
-        if (mmContainers[i][j]) {
-          state.pools_ref()[i][j] = mmContainers[i][j]->saveState();
+    for (unsigned int i = 0; i < mmContainers[0 /* TODO */].size(); ++i) {
+      for (unsigned int j = 0; j < mmContainers[0 /* TODO */][i].size(); ++j) {
+        if (mmContainers[0 /* TODO */][i][j]) {
+          state.pools_ref()[i][j] = mmContainers[0 /* TODO */][i][j]->saveState();
         }
       }
     }
@@ -2954,7 +3073,8 @@ folly::IOBufQueue CacheAllocator<CacheTrait>::saveStateToIOBuf() {
       serializeMMContainers(dummyMMContainers);
 
   AccessSerializationType accessContainerState = accessContainer_->saveState();
-  MemoryAllocator::SerializationType allocatorState = allocator_->saveState();
+  // TODO: foreach allocator
+  MemoryAllocator::SerializationType allocatorState = allocator_[0]->saveState();
   CCacheManager::SerializationType ccState = compactCacheManager_->saveState();
 
   AccessSerializationType chainedItemAccessContainerState =
@@ -3017,6 +3137,8 @@ CacheAllocator<CacheTrait>::shutDown() {
       (shmShutDownStatus == ShmShutDownRes::kSuccess);
   shmManager_.reset();
 
+  // TODO: save per-tier state
+
   if (shmShutDownSucceeded) {
     if (!nvmShutDownStatusOpt || *nvmShutDownStatusOpt)
       return ShutDownStatus::kSuccess;
@@ -3098,7 +3220,7 @@ CacheAllocator<CacheTrait>::deserializeMMContainers(
                                 ? pool.getAllocationClass(j).getAllocsPerSlab()
                                 : 0);
       ptr->setConfig(config);
-      mmContainers[i][j] = std::move(ptr);
+      mmContainers[0 /* TODO */][i][j] = std::move(ptr);
     }
   }
   // We need to drop the unevictableMMContainer in the desierializer.
@@ -3115,11 +3237,13 @@ CacheAllocator<CacheTrait>::createEmptyMMContainers() {
   MMContainers mmContainers;
   for (unsigned int i = 0; i < mmContainers_.size(); i++) {
     for (unsigned int j = 0; j < mmContainers_[i].size(); j++) {
-      if (mmContainers_[i][j]) {
-        MMContainerPtr ptr =
-            std::make_unique<typename MMContainerPtr::element_type>(
-                mmContainers_[i][j]->getConfig(), compressor_);
-        mmContainers[i][j] = std::move(ptr);
+      for (unsigned int k = 0; k < mmContainers_[i][j].size(); k++) {
+        if (mmContainers_[i][j][k]) {
+          MMContainerPtr ptr =
+              std::make_unique<typename MMContainerPtr::element_type>(
+                  mmContainers_[i][j][k]->getConfig(), compressor_);
+          mmContainers[i][j][k] = std::move(ptr);
+        }
       }
     }
   }
@@ -3259,10 +3383,10 @@ GlobalCacheStats CacheAllocator<CacheTrait>::getGlobalCacheStats() const {
 
 template <typename CacheTrait>
 CacheMemoryStats CacheAllocator<CacheTrait>::getCacheMemoryStats() const {
-  const auto totalCacheSize = allocator_->getMemorySize();
+  const auto totalCacheSize = allocator_[currentTier()]->getMemorySize();
 
   auto addSize = [this](size_t a, PoolId pid) {
-    return a + allocator_->getPool(pid).getPoolSize();
+    return a + allocator_[currentTier()]->getPool(pid).getPoolSize();
   };
   const auto regularPoolIds = getRegularPoolIds();
   const auto ccCachePoolIds = getCCachePoolIds();
@@ -3274,9 +3398,9 @@ CacheMemoryStats CacheAllocator<CacheTrait>::getCacheMemoryStats() const {
   return CacheMemoryStats{totalCacheSize,
                           regularCacheSize,
                           compactCacheSize,
-                          allocator_->getAdvisedMemorySize(),
+                          allocator_[currentTier()]->getAdvisedMemorySize(),
                           memMonitor_ ? memMonitor_->getMaxAdvisePct() : 0,
-                          allocator_->getUnreservedMemorySize(),
+                          allocator_[currentTier()]->getUnreservedMemorySize(),
                           nvmCache_ ? nvmCache_->getSize() : 0,
                           memMonitor_ ? memMonitor_->getMemAvailableSize() : 0,
                           memMonitor_ ? memMonitor_->getMemRssSize() : 0};
@@ -3419,6 +3543,8 @@ bool CacheAllocator<CacheTrait>::cleanupStrayShmSegments(
       // cache dir exists. clean up only if there are no other processes
       // attached. if another process was attached, the following would fail.
       ShmManager::cleanup(cacheDir, posix);
+
+      // TODO: cleanup per-tier state
     } catch (const std::exception& e) {
       XLOGF(ERR, "Error cleaning up {}. Exception: ", cacheDir, e.what());
       return false;
@@ -3428,7 +3554,8 @@ bool CacheAllocator<CacheTrait>::cleanupStrayShmSegments(
     // Any other concurrent process can not be attached to the segments or
     // even if it does, we want to mark it for destruction.
     ShmManager::removeByName(cacheDir, detail::kShmInfoName, posix);
-    ShmManager::removeByName(cacheDir, detail::kShmCacheName, posix);
+    ShmManager::removeByName(cacheDir, detail::kShmCacheName
+                             + std::to_string(0), posix);
     ShmManager::removeByName(cacheDir, detail::kShmHashTableName, posix);
     ShmManager::removeByName(cacheDir, detail::kShmChainedItemHashTableName,
                              posix);
@@ -3444,8 +3571,10 @@ bool CacheAllocator<CacheTrait>::cleanupStrayShmSegments(
 
 template <typename CacheTrait>
 uintptr_t CacheAllocator<CacheTrait>::getItemPtrAsOffset(const void* ptr) {
+  auto tid = getTierId(ptr);
+
   // if this succeeeds, the address is valid within the cache.
-  allocator_->getAllocInfo(ptr);
+  allocator_[tid]->getAllocInfo(ptr);
 
   if (!isOnShm_ || !shmManager_) {
     throw std::invalid_argument("Shared memory not used");
diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h
index abdc13485e..70b0cd61ea 100644
--- a/cachelib/allocator/CacheAllocator.h
+++ b/cachelib/allocator/CacheAllocator.h
@@ -585,7 +585,7 @@ class CacheAllocator : public CacheBase {
   // @param config    new config for the pool
   //
   // @throw std::invalid_argument if the poolId is invalid
-  void overridePoolConfig(PoolId pid, const MMConfig& config);
+  void overridePoolConfig(TierId tid, PoolId pid, const MMConfig& config);
 
   // update an existing pool's rebalance strategy
   //
@@ -626,8 +626,9 @@ class CacheAllocator : public CacheBase {
   // @return  true if the operation succeeded. false if the size of the pool is
   //          smaller than _bytes_
   // @throw   std::invalid_argument if the poolId is invalid.
+  // TODO: should call shrinkPool for specific tier?
   bool shrinkPool(PoolId pid, size_t bytes) {
-    return allocator_->shrinkPool(pid, bytes);
+    return allocator_[currentTier()]->shrinkPool(pid, bytes);
   }
 
   // grow an existing pool by _bytes_. This will fail if there is no
@@ -636,8 +637,9 @@ class CacheAllocator : public CacheBase {
   // @return    true if the pool was grown. false if the necessary number of
   //            bytes were not available.
   // @throw     std::invalid_argument if the poolId is invalid.
+  // TODO: should call growPool for specific tier?
   bool growPool(PoolId pid, size_t bytes) {
-    return allocator_->growPool(pid, bytes);
+    return allocator_[currentTier()]->growPool(pid, bytes);
   }
 
   // move bytes from one pool to another. The source pool should be at least
@@ -650,7 +652,7 @@ class CacheAllocator : public CacheBase {
   //          correct size to do the transfer.
   // @throw   std::invalid_argument if src or dest is invalid pool
   bool resizePools(PoolId src, PoolId dest, size_t bytes) override {
-    return allocator_->resizePools(src, dest, bytes);
+    return allocator_[currentTier()]->resizePools(src, dest, bytes);
   }
 
   // Add a new compact cache with given name and size
@@ -850,12 +852,13 @@ class CacheAllocator : public CacheBase {
   // @throw std::invalid_argument if the memory does not belong to this
   //        cache allocator
   AllocInfo getAllocInfo(const void* memory) const {
-    return allocator_->getAllocInfo(memory);
+    return allocator_[getTierId(memory)]->getAllocInfo(memory);
   }
 
   // return the ids for the set of existing pools in this cache.
   std::set<PoolId> getPoolIds() const override final {
-    return allocator_->getPoolIds();
+    // all tiers have the same pool ids. TODO: deduplicate
+    return allocator_[0]->getPoolIds();
   }
 
   // return a list of pool ids that are backing compact caches. This includes
@@ -867,18 +870,18 @@ class CacheAllocator : public CacheBase {
 
   // return the pool with speicified id.
   const MemoryPool& getPool(PoolId pid) const override final {
-    return allocator_->getPool(pid);
+    return allocator_[currentTier()]->getPool(pid);
   }
 
   // calculate the number of slabs to be advised/reclaimed in each pool
   PoolAdviseReclaimData calcNumSlabsToAdviseReclaim() override final {
     auto regularPoolIds = getRegularPoolIds();
-    return allocator_->calcNumSlabsToAdviseReclaim(regularPoolIds);
+    return allocator_[currentTier()]->calcNumSlabsToAdviseReclaim(regularPoolIds);
   }
 
   // update number of slabs to advise in the cache
   void updateNumSlabsToAdvise(int32_t numSlabsToAdvise) override final {
-    allocator_->updateNumSlabsToAdvise(numSlabsToAdvise);
+    allocator_[currentTier()]->updateNumSlabsToAdvise(numSlabsToAdvise);
   }
 
   // returns a valid PoolId corresponding to the name or kInvalidPoolId if the
@@ -887,7 +890,8 @@ class CacheAllocator : public CacheBase {
 
   // returns the pool's name by its poolId.
   std::string getPoolName(PoolId poolId) const {
-    return allocator_->getPoolName(poolId);
+    // all tiers have the same pool names.
+    return allocator_[0]->getPoolName(poolId);
   }
 
   // get stats related to all kinds of slab release events.
@@ -928,7 +932,7 @@ class CacheAllocator : public CacheBase {
   // pool stats by pool id
   PoolStats getPoolStats(PoolId pid) const override final;
 
-  // This can be expensive so it is not part of PoolStats
+  // This can be expensive so it is not part of PoolStats.
   PoolEvictionAgeStats getPoolEvictionAgeStats(
       PoolId pid, unsigned int slabProjectionLength) const override final;
 
@@ -938,7 +942,7 @@ class CacheAllocator : public CacheBase {
   // return the overall cache stats
   GlobalCacheStats getGlobalCacheStats() const override final;
 
-  // return cache's memory usage stats
+  // return cache's memory usage stats.
   CacheMemoryStats getCacheMemoryStats() const override final;
 
   // return the nvm cache stats map
@@ -1143,11 +1147,14 @@ class CacheAllocator : public CacheBase {
 
   using MMContainerPtr = std::unique_ptr<MMContainer>;
   using MMContainers =
-      std::array<std::array<MMContainerPtr, MemoryAllocator::kMaxClasses>,
-                 MemoryPoolManager::kMaxPools>;
+      std::array<std::array<std::array<MMContainerPtr, MemoryAllocator::kMaxClasses>,
+                 MemoryPoolManager::kMaxPools>, kMaxTiers>;
 
   void createMMContainers(const PoolId pid, MMConfig config);
 
+  TierId getTierId(const Item& item) const;
+  TierId getTierId(const void* ptr) const;
+
   // acquire the MMContainer corresponding to the the Item's class and pool.
   //
   // @return pointer to the MMContainer.
@@ -1155,13 +1162,11 @@ class CacheAllocator : public CacheBase {
   // allocation from the memory allocator.
   MMContainer& getMMContainer(const Item& item) const noexcept;
 
-  MMContainer& getMMContainer(PoolId pid, ClassId cid) const noexcept;
-
   // acquire the MMContainer for the give pool and class id and creates one
   // if it does not exist.
   //
-  // @return pointer to a valid MMContainer that is initialized.
-  MMContainer& getEvictableMMContainer(PoolId pid, ClassId cid) const noexcept;
+  // @return pointer to a valid MMContainer that is initialized
+  MMContainer& getMMContainer(TierId tid, PoolId pid, ClassId cid) const noexcept;
 
   // create a new cache allocation. The allocation can be initialized
   // appropriately and made accessible through insert or insertOrReplace.
@@ -1193,6 +1198,17 @@ class CacheAllocator : public CacheBase {
                               uint32_t creationTime,
                               uint32_t expiryTime);
 
+  // create a new cache allocation on specific memory tier.
+  // For description see allocateInternal.
+  //
+  // @param tid id a memory tier
+  ItemHandle allocateInternalTier(TierId tid,
+                              PoolId id,
+                              Key key,
+                              uint32_t size,
+                              uint32_t creationTime,
+                              uint32_t expiryTime);
+
   // Allocate a chained item
   //
   // The resulting chained item does not have a parent item and
@@ -1257,6 +1273,15 @@ class CacheAllocator : public CacheBase {
   //              not exist.
   FOLLY_ALWAYS_INLINE ItemHandle findFastImpl(Key key, AccessMode mode);
 
+  // Moves a regular item to a different memory tier.
+  //
+  // @param oldItem     Reference to the item being moved
+  // @param newItemHdl  Reference to the handle of the new item being moved into
+  //
+  // @return true  If the move was completed, and the containers were updated
+  //               successfully.
+  bool moveRegularItemOnEviction(Item& oldItem, ItemHandle& newItemHdl);
+
   // Moves a regular item to a different slab. This should only be used during
   // slab release after the item's moving bit has been set. The user supplied
   // callback is responsible for copying the contents and fixing the semantics
@@ -1399,7 +1424,7 @@ class CacheAllocator : public CacheBase {
   // @param  pid  the id of the pool to look for evictions inside
   // @param  cid  the id of the class to look for evictions inside
   // @return An evicted item or nullptr  if there is no suitable candidate.
-  Item* findEviction(PoolId pid, ClassId cid);
+  Item* findEviction(TierId tid, PoolId pid, ClassId cid);
 
   using EvictionIterator = typename MMContainer::Iterator;
 
@@ -1410,7 +1435,7 @@ class CacheAllocator : public CacheBase {
   //
   // @return  valid handle to regular item on success. This will be the last
   //          handle to the item. On failure an empty handle.
-  ItemHandle advanceIteratorAndTryEvictRegularItem(MMContainer& mmContainer,
+  ItemHandle advanceIteratorAndTryEvictRegularItem(TierId tid, PoolId pid, MMContainer& mmContainer,
                                                    EvictionIterator& itr);
 
   // Advance the current iterator and try to evict a chained item
@@ -1420,7 +1445,15 @@ class CacheAllocator : public CacheBase {
   //
   // @return  valid handle to the parent item on success. This will be the last
   //          handle to the item
-  ItemHandle advanceIteratorAndTryEvictChainedItem(EvictionIterator& itr);
+  ItemHandle advanceIteratorAndTryEvictChainedItem(TierId tid, PoolId pid, EvictionIterator& itr);
+
+  // Try to move the item down to the next memory tier
+  //
+  // @param item the item to evict
+  //
+  // @return valid handle to the item. This will be the last
+  //         handle to the item. On failure an empty handle. 
+  ItemHandle tryEvictToNextMemoryTier(TierId tid, PoolId pid, Item& item);
 
   // Deserializer CacheAllocatorMetadata and verify the version
   //
@@ -1442,7 +1475,7 @@ class CacheAllocator : public CacheBase {
   MMContainers createEmptyMMContainers();
 
   unsigned int reclaimSlabs(PoolId id, size_t numSlabs) final {
-    return allocator_->reclaimSlabsAndGrow(id, numSlabs);
+    return allocator_[currentTier()]->reclaimSlabsAndGrow(id, numSlabs);
   }
 
   FOLLY_ALWAYS_INLINE EventTracker* getEventTracker() const {
@@ -1501,7 +1534,7 @@ class CacheAllocator : public CacheBase {
                    const void* hint = nullptr) final;
 
   // @param releaseContext  slab release context
-  void releaseSlabImpl(const SlabReleaseContext& releaseContext);
+  void releaseSlabImpl(TierId tid, const SlabReleaseContext& releaseContext);
 
   // @return  true when successfully marked as moving,
   //          fasle when this item has already been freed
@@ -1573,7 +1606,7 @@ class CacheAllocator : public CacheBase {
     // primitives. So we consciously exempt ourselves here from TSAN data race
     // detection.
     folly::annotate_ignore_thread_sanitizer_guard g(__FILE__, __LINE__);
-    allocator_->forEachAllocation(std::forward<Fn>(f));
+    allocator_[currentTier()]->forEachAllocation(std::forward<Fn>(f));
   }
 
   // returns true if nvmcache is enabled and we should write this item to
@@ -1616,11 +1649,11 @@ class CacheAllocator : public CacheBase {
                   std::unique_ptr<T>& worker,
                   std::chrono::seconds timeout = std::chrono::seconds{0});
 
-  ShmSegmentOpts createShmCacheOpts();
+  ShmSegmentOpts createShmCacheOpts(TierId tid);
 
-  std::unique_ptr<MemoryAllocator> createNewMemoryAllocator();
-  std::unique_ptr<MemoryAllocator> restoreMemoryAllocator();
-  std::unique_ptr<CCacheManager> restoreCCacheManager();
+  std::unique_ptr<MemoryAllocator> createNewMemoryAllocator(TierId tid);
+  std::unique_ptr<MemoryAllocator> restoreMemoryAllocator(TierId tid);
+  std::unique_ptr<CCacheManager> restoreCCacheManager(TierId tid);
 
   PoolIds filterCompactCachePools(const PoolIds& poolIds) const;
 
@@ -1640,7 +1673,7 @@ class CacheAllocator : public CacheBase {
   }
 
   typename Item::PtrCompressor createPtrCompressor() const {
-    return allocator_->createPtrCompressor<Item>();
+    return allocator_[0 /* TODO */]->createPtrCompressor<Item>();
   }
 
   // helper utility to throttle and optionally log.
@@ -1717,6 +1750,13 @@ class CacheAllocator : public CacheBase {
 
   // BEGIN private members
 
+  TierId currentTier() const {
+    // TODO: every function which calls this method should be refactored.
+    // We should go case by case and either make such function work on
+    // all tiers or expose separate parameter to describe the tier ID.
+    return 0;
+  }
+
   // Whether the memory allocator for this cache allocator was created on shared
   // memory. The hash table, chained item hash table etc is also created on
   // shared memory except for temporary shared memory mode when they're created
@@ -1744,9 +1784,14 @@ class CacheAllocator : public CacheBase {
   const MMConfig mmConfig_{};
 
   // the memory allocator for allocating out of the available memory.
-  std::unique_ptr<MemoryAllocator> allocator_;
+  std::vector<std::unique_ptr<MemoryAllocator>> allocator_;
+
+  std::vector<std::unique_ptr<MemoryAllocator>> createPrivateAllocator();
+  std::vector<std::unique_ptr<MemoryAllocator>> createAllocators();
+  std::vector<std::unique_ptr<MemoryAllocator>> restoreAllocators();
 
   // compact cache allocator manager
+  // TODO: per tier?
   std::unique_ptr<CCacheManager> compactCacheManager_;
 
   // compact cache instances reside here when user "add" or "attach" compact
diff --git a/cachelib/allocator/PoolOptimizer.cpp b/cachelib/allocator/PoolOptimizer.cpp
index b1b3ff26b1..bf31325be1 100644
--- a/cachelib/allocator/PoolOptimizer.cpp
+++ b/cachelib/allocator/PoolOptimizer.cpp
@@ -51,6 +51,8 @@ void PoolOptimizer::optimizeRegularPoolSizes() {
 
 void PoolOptimizer::optimizeCompactCacheSizes() {
   try {
+    // TODO: should optimizer look at each tier individually?
+    // If yes, then resizePools should be per-tier
     auto strategy = cache_.getPoolOptimizeStrategy();
     if (!strategy) {
       strategy = strategy_;
diff --git a/cachelib/allocator/memory/MemoryAllocator.h b/cachelib/allocator/memory/MemoryAllocator.h
index cc92cdf2a8..32982fc3af 100644
--- a/cachelib/allocator/memory/MemoryAllocator.h
+++ b/cachelib/allocator/memory/MemoryAllocator.h
@@ -630,6 +630,13 @@ class MemoryAllocator {
     memoryPoolManager_.updateNumSlabsToAdvise(numSlabs);
   }
 
+  // returns ture if ptr points to memory which is managed by this
+  // allocator
+  bool isMemoryInAllocator(const void *ptr) {
+    return ptr && ptr >= slabAllocator_.getSlabMemoryBegin()
+      && ptr < slabAllocator_.getSlabMemoryEnd();
+  }
+
  private:
   // @param memory    pointer to the memory.
   // @return          the MemoryPool corresponding to the memory.
diff --git a/cachelib/allocator/memory/Slab.h b/cachelib/allocator/memory/Slab.h
index 823147affc..b6fd8f21a4 100644
--- a/cachelib/allocator/memory/Slab.h
+++ b/cachelib/allocator/memory/Slab.h
@@ -50,6 +50,8 @@ namespace cachelib {
  * independantly by the SlabAllocator.
  */
 
+// identifier for the memory tier
+using TierId = int8_t;
 // identifier for the memory pool
 using PoolId = int8_t;
 // identifier for the allocation class
diff --git a/cachelib/allocator/memory/SlabAllocator.h b/cachelib/allocator/memory/SlabAllocator.h
index d5773ba30c..fa5e00a892 100644
--- a/cachelib/allocator/memory/SlabAllocator.h
+++ b/cachelib/allocator/memory/SlabAllocator.h
@@ -312,6 +312,17 @@ class SlabAllocator {
     return PtrCompressor<PtrType, SlabAllocator>(*this);
   }
 
+  // returns starting address of memory we own.
+  const Slab* getSlabMemoryBegin() const noexcept {
+    return reinterpret_cast<Slab*>(memoryStart_);
+  }
+
+  // returns first byte after the end of memory region we own.
+  const Slab* getSlabMemoryEnd() const noexcept {
+    return reinterpret_cast<Slab*>(reinterpret_cast<uint8_t*>(memoryStart_) +
+                                   memorySize_);
+  }
+
  private:
   // null Slab* presenttation. With 4M Slab size, a valid slab index would never
   // reach 2^16 - 1;
@@ -329,12 +340,6 @@ class SlabAllocator {
   // @throw std::invalid_argument if the state is invalid.
   void checkState() const;
 
-  // returns first byte after the end of memory region we own.
-  const Slab* getSlabMemoryEnd() const noexcept {
-    return reinterpret_cast<Slab*>(reinterpret_cast<uint8_t*>(memoryStart_) +
-                                   memorySize_);
-  }
-
   // returns true if we have slabbed all the memory that is available to us.
   // false otherwise.
   bool allMemorySlabbed() const noexcept {
diff --git a/cachelib/allocator/tests/AllocatorResizeTest.h b/cachelib/allocator/tests/AllocatorResizeTest.h
index 3eac3fd475..06c7ae0e81 100644
--- a/cachelib/allocator/tests/AllocatorResizeTest.h
+++ b/cachelib/allocator/tests/AllocatorResizeTest.h
@@ -959,23 +959,23 @@ class AllocatorResizeTest : public AllocatorTest<AllocatorT> {
       for (i = 1; i <= numItersToMaxAdviseAway + 1; i++) {
         alloc.memMonitor_->adviseAwaySlabs();
         std::this_thread::sleep_for(std::chrono::seconds{2});
-        ASSERT_EQ(alloc.allocator_->getAdvisedMemorySize(), i * perIterAdvSize);
+        ASSERT_EQ(alloc.allocator_[0 /* TODO - extend test */]->getAdvisedMemorySize(), i * perIterAdvSize);
       }
       i--;
       // This should fail
       alloc.memMonitor_->adviseAwaySlabs();
       std::this_thread::sleep_for(std::chrono::seconds{2});
-      auto totalAdvisedAwayMemory = alloc.allocator_->getAdvisedMemorySize();
+      auto totalAdvisedAwayMemory = alloc.allocator_[0 /* TODO - extend test */]->getAdvisedMemorySize();
       ASSERT_EQ(totalAdvisedAwayMemory, i * perIterAdvSize);
 
       // Try to reclaim back
       for (i = 1; i <= numItersToMaxAdviseAway + 1; i++) {
         alloc.memMonitor_->reclaimSlabs();
         std::this_thread::sleep_for(std::chrono::seconds{2});
-        ASSERT_EQ(alloc.allocator_->getAdvisedMemorySize(),
+        ASSERT_EQ(alloc.allocator_[0 /* TODO - extend test */]->getAdvisedMemorySize(),
                   totalAdvisedAwayMemory - i * perIterAdvSize);
       }
-      totalAdvisedAwayMemory = alloc.allocator_->getAdvisedMemorySize();
+      totalAdvisedAwayMemory = alloc.allocator_[0 /* TODO - extend test */]->getAdvisedMemorySize();
       ASSERT_EQ(totalAdvisedAwayMemory, 0);
     }
   }
diff --git a/cachelib/allocator/tests/BaseAllocatorTest.h b/cachelib/allocator/tests/BaseAllocatorTest.h
index dce17f7ceb..38bf0ec78a 100644
--- a/cachelib/allocator/tests/BaseAllocatorTest.h
+++ b/cachelib/allocator/tests/BaseAllocatorTest.h
@@ -4078,13 +4078,13 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
     // Had a bug: D4799860 where we allocated the wrong size for chained item
     {
       const auto parentAllocInfo =
-          alloc.allocator_->getAllocInfo(itemHandle->getMemory());
+          alloc.allocator_[0 /* TODO - extend test */]->getAllocInfo(itemHandle->getMemory());
       const auto child1AllocInfo =
-          alloc.allocator_->getAllocInfo(chainedItemHandle->getMemory());
+          alloc.allocator_[0 /* TODO - extend test */]->getAllocInfo(chainedItemHandle->getMemory());
       const auto child2AllocInfo =
-          alloc.allocator_->getAllocInfo(chainedItemHandle2->getMemory());
+          alloc.allocator_[0 /* TODO - extend test */]->getAllocInfo(chainedItemHandle2->getMemory());
       const auto child3AllocInfo =
-          alloc.allocator_->getAllocInfo(chainedItemHandle3->getMemory());
+          alloc.allocator_[0 /* TODO - extend test */]->getAllocInfo(chainedItemHandle3->getMemory());
 
       const auto parentCid = parentAllocInfo.classId;
       const auto child1Cid = child1AllocInfo.classId;
diff --git a/cachelib/allocator/tests/TestBase-inl.h b/cachelib/allocator/tests/TestBase-inl.h
index fc6544103c..407f1e8046 100644
--- a/cachelib/allocator/tests/TestBase-inl.h
+++ b/cachelib/allocator/tests/TestBase-inl.h
@@ -312,7 +312,7 @@ void AllocatorTest<AllocatorT>::testShmIsRemoved(
   ASSERT_FALSE(AllocatorT::ShmManager::segmentExists(
       config.getCacheDir(), detail::kShmHashTableName, config.usePosixShm));
   ASSERT_FALSE(AllocatorT::ShmManager::segmentExists(
-      config.getCacheDir(), detail::kShmCacheName, config.usePosixShm));
+      config.getCacheDir(), detail::kShmCacheName + std::to_string(0), config.usePosixShm));
   ASSERT_FALSE(AllocatorT::ShmManager::segmentExists(
       config.getCacheDir(), detail::kShmChainedItemHashTableName,
       config.usePosixShm));
@@ -326,7 +326,7 @@ void AllocatorTest<AllocatorT>::testShmIsNotRemoved(
   ASSERT_TRUE(AllocatorT::ShmManager::segmentExists(
       config.getCacheDir(), detail::kShmHashTableName, config.usePosixShm));
   ASSERT_TRUE(AllocatorT::ShmManager::segmentExists(
-      config.getCacheDir(), detail::kShmCacheName, config.usePosixShm));
+      config.getCacheDir(), detail::kShmCacheName + std::to_string(0), config.usePosixShm));
   ASSERT_TRUE(AllocatorT::ShmManager::segmentExists(
       config.getCacheDir(), detail::kShmChainedItemHashTableName,
       config.usePosixShm));

From 9ed5abcfbe158d249bfced4020e798924f34a925 Mon Sep 17 00:00:00 2001
From: Igor Chorazewicz <Igor.Chorazewicz@intel.com>
Date: Fri, 10 Dec 2021 21:45:58 -0500
Subject: [PATCH 2/2] Extend CompressedPtr to work with multiple tiers

Now it's size is 8 bytes intead of 4.

Original CompressedPtr stored only some offset with a memory Allocator.
For multi-tier implementation, this is not enough. We must also store
tierId and when uncompressing, select a proper allocator.

An alternative could be to just resign from CompressedPtr but they
are leveraged to allow the cache to be mapped to different addresses on shared memory.

Changing CompressedPtr impacted CacheItem size - it increased from 32 to 44 bytes.
---
 cachelib/allocator/CacheAllocator.h           |  5 +-
 cachelib/allocator/CacheAllocatorConfig.h     |  2 +-
 cachelib/allocator/CacheItem.h                |  1 +
 cachelib/allocator/memory/AllocationClass.cpp | 10 +-
 cachelib/allocator/memory/AllocationClass.h   |  2 +-
 cachelib/allocator/memory/CompressedPtr.h     | 95 ++++++++++++++++---
 cachelib/allocator/memory/MemoryAllocator.h   |  9 +-
 cachelib/allocator/memory/SlabAllocator.cpp   |  4 +
 cachelib/allocator/memory/SlabAllocator.h     |  4 +-
 .../allocator/tests/AllocatorResizeTest.h     |  4 +-
 10 files changed, 105 insertions(+), 31 deletions(-)

diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h
index 70b0cd61ea..32758bd1ad 100644
--- a/cachelib/allocator/CacheAllocator.h
+++ b/cachelib/allocator/CacheAllocator.h
@@ -1061,7 +1061,8 @@ class CacheAllocator : public CacheBase {
                  sizeof(typename RefcountWithFlags::Value) + sizeof(uint32_t) +
                  sizeof(uint32_t) + sizeof(KAllocation)) == sizeof(Item),
                 "vtable overhead");
-  static_assert(32 == sizeof(Item), "item overhead is 32 bytes");
+  // XXX: this will fail due to CompressedPtr change
+  // static_assert(32 == sizeof(Item), "item overhead is 32 bytes");
 
   // make sure there is no overhead in ChainedItem on top of a regular Item
   static_assert(sizeof(Item) == sizeof(ChainedItem),
@@ -1673,7 +1674,7 @@ class CacheAllocator : public CacheBase {
   }
 
   typename Item::PtrCompressor createPtrCompressor() const {
-    return allocator_[0 /* TODO */]->createPtrCompressor<Item>();
+    return typename Item::PtrCompressor(allocator_);
   }
 
   // helper utility to throttle and optionally log.
diff --git a/cachelib/allocator/CacheAllocatorConfig.h b/cachelib/allocator/CacheAllocatorConfig.h
index a5d2058687..e8fc20c141 100644
--- a/cachelib/allocator/CacheAllocatorConfig.h
+++ b/cachelib/allocator/CacheAllocatorConfig.h
@@ -1059,7 +1059,7 @@ const CacheAllocatorConfig<T>& CacheAllocatorConfig<T>::validate() const {
   // CompressedPtr;
   // The second part specifies the minimal allocation size for each slot.
   // Multiplied, they inform us the maximal addressable space for cache.
-  size_t maxCacheSize = (1ul << CompressedPtr::kNumBits) * Slab::kMinAllocSize;
+  size_t maxCacheSize = CompressedPtr::getMaxAddressableSize() * Slab::kMinAllocSize;
   // Configured cache size should not exceed the maximal addressable space for
   // cache.
   if (size > maxCacheSize) {
diff --git a/cachelib/allocator/CacheItem.h b/cachelib/allocator/CacheItem.h
index dd8d9e0581..fa67fc256b 100644
--- a/cachelib/allocator/CacheItem.h
+++ b/cachelib/allocator/CacheItem.h
@@ -139,6 +139,7 @@ class CACHELIB_PACKED_ATTR CacheItem {
    * to be mapped to different addresses on shared memory.
    */
   using CompressedPtr = facebook::cachelib::CompressedPtr;
+  using SingleTierPtrCompressor = MemoryAllocator::SingleTierPtrCompressor<Item>;
   using PtrCompressor = MemoryAllocator::PtrCompressor<Item>;
 
   // Get the required size for a cache item given the size of memory
diff --git a/cachelib/allocator/memory/AllocationClass.cpp b/cachelib/allocator/memory/AllocationClass.cpp
index 7648798722..c8d97035a1 100644
--- a/cachelib/allocator/memory/AllocationClass.cpp
+++ b/cachelib/allocator/memory/AllocationClass.cpp
@@ -50,7 +50,7 @@ AllocationClass::AllocationClass(ClassId classId,
       poolId_(poolId),
       allocationSize_(allocSize),
       slabAlloc_(s),
-      freedAllocations_{slabAlloc_.createPtrCompressor<FreeAlloc>()} {
+      freedAllocations_{slabAlloc_.createSingleTierPtrCompressor<FreeAlloc>()} {
   checkState();
 }
 
@@ -102,7 +102,7 @@ AllocationClass::AllocationClass(
       currSlab_(s.getSlabForIdx(*object.currSlabIdx_ref())),
       slabAlloc_(s),
       freedAllocations_(*object.freedAllocationsObject_ref(),
-                        slabAlloc_.createPtrCompressor<FreeAlloc>()),
+                        slabAlloc_.createSingleTierPtrCompressor<FreeAlloc>()),
       canAllocate_(*object.canAllocate_ref()) {
   if (!slabAlloc_.isRestorable()) {
     throw std::logic_error("The allocation class cannot be restored.");
@@ -356,9 +356,9 @@ std::pair<bool, std::vector<void*>> AllocationClass::pruneFreeAllocs(
   // allocated slab, release any freed allocations belonging to this slab.
   // Set the bit to true if the corresponding allocation is freed, false
   // otherwise.
-  FreeList freeAllocs{slabAlloc_.createPtrCompressor<FreeAlloc>()};
-  FreeList notInSlab{slabAlloc_.createPtrCompressor<FreeAlloc>()};
-  FreeList inSlab{slabAlloc_.createPtrCompressor<FreeAlloc>()};
+  FreeList freeAllocs{slabAlloc_.createSingleTierPtrCompressor<FreeAlloc>()};
+  FreeList notInSlab{slabAlloc_.createSingleTierPtrCompressor<FreeAlloc>()};
+  FreeList inSlab{slabAlloc_.createSingleTierPtrCompressor<FreeAlloc>()};
 
   lock_->lock_combine([&]() {
     // Take the allocation class free list offline
diff --git a/cachelib/allocator/memory/AllocationClass.h b/cachelib/allocator/memory/AllocationClass.h
index 4071062119..47925a0da0 100644
--- a/cachelib/allocator/memory/AllocationClass.h
+++ b/cachelib/allocator/memory/AllocationClass.h
@@ -446,7 +446,7 @@ class AllocationClass {
   struct CACHELIB_PACKED_ATTR FreeAlloc {
     using CompressedPtr = facebook::cachelib::CompressedPtr;
     using PtrCompressor =
-        facebook::cachelib::PtrCompressor<FreeAlloc, SlabAllocator>;
+        facebook::cachelib::SingleTierPtrCompressor<FreeAlloc, SlabAllocator>;
     SListHook<FreeAlloc> hook_{};
   };
 
diff --git a/cachelib/allocator/memory/CompressedPtr.h b/cachelib/allocator/memory/CompressedPtr.h
index 4b6f956658..cbda038502 100644
--- a/cachelib/allocator/memory/CompressedPtr.h
+++ b/cachelib/allocator/memory/CompressedPtr.h
@@ -27,6 +27,9 @@ namespace cachelib {
 
 class SlabAllocator;
 
+template <typename PtrType, typename AllocatorContainer>
+class PtrCompressor;
+
 // the following are for pointer compression for the memory allocator.  We
 // compress pointers by storing the slab index and the alloc index of the
 // allocation inside the slab. With slab worth kNumSlabBits of data, if we
@@ -41,7 +44,7 @@ class SlabAllocator;
 // decompress a CompressedPtr than compress a pointer while creating one.
 class CACHELIB_PACKED_ATTR CompressedPtr {
  public:
-  using PtrType = uint32_t;
+  using PtrType = uint64_t;
   // Thrift doesn't support unsigned type
   using SerializedPtrType = int64_t;
 
@@ -83,14 +86,14 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
  private:
   // null pointer representation. This is almost never guaranteed to be a
   // valid pointer that we can compress to.
-  static constexpr PtrType kNull = 0xffffffff;
+  static constexpr PtrType kNull = 0x00000000ffffffff;
 
   // default construct to null.
   PtrType ptr_{kNull};
 
   // create a compressed pointer for a valid memory allocation.
-  CompressedPtr(uint32_t slabIdx, uint32_t allocIdx)
-      : ptr_(compress(slabIdx, allocIdx)) {}
+  CompressedPtr(uint32_t slabIdx, uint32_t allocIdx, TierId tid = 0)
+      : ptr_(compress(slabIdx, allocIdx, tid)) {}
 
   constexpr explicit CompressedPtr(PtrType ptr) noexcept : ptr_{ptr} {}
 
@@ -100,40 +103,60 @@ class CACHELIB_PACKED_ATTR CompressedPtr {
   static constexpr unsigned int kNumAllocIdxBits =
       Slab::kNumSlabBits - Slab::kMinAllocPower;
 
+  // Use topmost 32 bits for TierId
+  // XXX: optimize
+  static constexpr unsigned int kNumTierIdxOffset = 32;
+
   static constexpr PtrType kAllocIdxMask = ((PtrType)1 << kNumAllocIdxBits) - 1;
 
+  // kNumTierIdxBits most significant bits
+  static constexpr PtrType kTierIdxMask = (((PtrType)1 << kNumTierIdxOffset) - 1) << (NumBits<PtrType>::value - kNumTierIdxOffset);
+
   // Number of bits for the slab index. This will be the top 16 bits of the
   // compressed ptr.
   static constexpr unsigned int kNumSlabIdxBits =
-      NumBits<PtrType>::value - kNumAllocIdxBits;
+      NumBits<PtrType>::value - kNumTierIdxOffset - kNumAllocIdxBits; 
 
-  // Compress the given slabIdx and allocIdx into a 32-bit compressed
+  // Compress the given slabIdx and allocIdx into a 64-bit compressed
   // pointer.
-  static PtrType compress(uint32_t slabIdx, uint32_t allocIdx) noexcept {
+  static PtrType compress(uint32_t slabIdx, uint32_t allocIdx, TierId tid) noexcept {
     XDCHECK_LE(allocIdx, kAllocIdxMask);
     XDCHECK_LT(slabIdx, (1u << kNumSlabIdxBits) - 1);
-    return (slabIdx << kNumAllocIdxBits) + allocIdx;
+    return (static_cast<uint64_t>(tid) << kNumTierIdxOffset) + (slabIdx << kNumAllocIdxBits) + allocIdx;
   }
 
   // Get the slab index of the compressed ptr
   uint32_t getSlabIdx() const noexcept {
     XDCHECK(!isNull());
-    return static_cast<uint32_t>(ptr_ >> kNumAllocIdxBits);
+    auto noTierIdPtr = ptr_ & ~kTierIdxMask;
+    return static_cast<uint32_t>(noTierIdPtr >> kNumAllocIdxBits);
   }
 
   // Get the allocation index of the compressed ptr
   uint32_t getAllocIdx() const noexcept {
     XDCHECK(!isNull());
-    return static_cast<uint32_t>(ptr_ & kAllocIdxMask);
+    auto noTierIdPtr = ptr_ & ~kTierIdxMask;
+    return static_cast<uint32_t>(noTierIdPtr & kAllocIdxMask);
+  }
+
+  uint32_t getTierId() const noexcept {
+    XDCHECK(!isNull());
+    return static_cast<uint32_t>(ptr_ >> kNumTierIdxOffset);
+  }
+
+  void setTierId(TierId tid) noexcept {
+    ptr_ += static_cast<uint64_t>(tid) << kNumTierIdxOffset;
   }
 
   friend SlabAllocator;
+  template <typename CPtrType, typename AllocatorContainer>
+  friend class PtrCompressor;
 };
 
 template <typename PtrType, typename AllocatorT>
-class PtrCompressor {
+class SingleTierPtrCompressor {
  public:
-  explicit PtrCompressor(const AllocatorT& allocator) noexcept
+  explicit SingleTierPtrCompressor(const AllocatorT& allocator) noexcept
       : allocator_(allocator) {}
 
   const CompressedPtr compress(const PtrType* uncompressed) const {
@@ -144,11 +167,11 @@ class PtrCompressor {
     return static_cast<PtrType*>(allocator_.unCompress(compressed));
   }
 
-  bool operator==(const PtrCompressor& rhs) const noexcept {
+  bool operator==(const SingleTierPtrCompressor& rhs) const noexcept {
     return &allocator_ == &rhs.allocator_;
   }
 
-  bool operator!=(const PtrCompressor& rhs) const noexcept {
+  bool operator!=(const SingleTierPtrCompressor& rhs) const noexcept {
     return !(*this == rhs);
   }
 
@@ -156,5 +179,49 @@ class PtrCompressor {
   // memory allocator that does the pointer compression.
   const AllocatorT& allocator_;
 };
+
+template <typename PtrType, typename AllocatorContainer>
+class PtrCompressor {
+ public:
+  explicit PtrCompressor(const AllocatorContainer& allocators) noexcept
+      : allocators_(allocators) {}
+
+  const CompressedPtr compress(const PtrType* uncompressed) const {
+    if (uncompressed == nullptr)
+      return CompressedPtr{};
+
+    TierId tid;
+    for (tid = 0; tid < allocators_.size(); tid++) {
+      if (allocators_[tid]->isMemoryInAllocator(static_cast<const void*>(uncompressed)))
+        break;
+    }
+
+    auto cptr = allocators_[tid]->compress(uncompressed);
+    cptr.setTierId(tid);
+
+    return cptr;
+  }
+
+  PtrType* unCompress(const CompressedPtr compressed) const {
+    if (compressed.isNull()) {
+      return nullptr;
+    }
+
+    auto &allocator = *allocators_[compressed.getTierId()];
+    return static_cast<PtrType*>(allocator.unCompress(compressed));
+  }
+
+  bool operator==(const PtrCompressor& rhs) const noexcept {
+    return &allocators_ == &rhs.allocators_;
+  }
+
+  bool operator!=(const PtrCompressor& rhs) const noexcept {
+    return !(*this == rhs);
+  }
+
+ private:
+  // memory allocator that does the pointer compression.
+  const AllocatorContainer& allocators_;
+};
 } // namespace cachelib
 } // namespace facebook
diff --git a/cachelib/allocator/memory/MemoryAllocator.h b/cachelib/allocator/memory/MemoryAllocator.h
index 32982fc3af..182058e76d 100644
--- a/cachelib/allocator/memory/MemoryAllocator.h
+++ b/cachelib/allocator/memory/MemoryAllocator.h
@@ -513,12 +513,13 @@ class MemoryAllocator {
   using CompressedPtr = facebook::cachelib::CompressedPtr;
   template <typename PtrType>
   using PtrCompressor =
-      facebook::cachelib::PtrCompressor<PtrType, SlabAllocator>;
+      facebook::cachelib::PtrCompressor<PtrType,
+      std::vector<std::unique_ptr<MemoryAllocator>>>;
 
   template <typename PtrType>
-  PtrCompressor<PtrType> createPtrCompressor() {
-    return slabAllocator_.createPtrCompressor<PtrType>();
-  }
+  using SingleTierPtrCompressor =
+      facebook::cachelib::PtrCompressor<PtrType,
+      SlabAllocator>;
 
   // compress a given pointer to a valid allocation made out of this allocator
   // through an allocate() or nullptr. Calling this otherwise with invalid
diff --git a/cachelib/allocator/memory/SlabAllocator.cpp b/cachelib/allocator/memory/SlabAllocator.cpp
index d29227660d..139e690472 100644
--- a/cachelib/allocator/memory/SlabAllocator.cpp
+++ b/cachelib/allocator/memory/SlabAllocator.cpp
@@ -519,6 +519,8 @@ serialization::SlabAllocatorObject SlabAllocator::saveState() {
 // for benchmarking purposes.
 const unsigned int kMarkerBits = 6;
 CompressedPtr SlabAllocator::compressAlt(const void* ptr) const {
+  // XXX: do we need to set tierId here?
+
   if (ptr == nullptr) {
     return CompressedPtr{};
   }
@@ -530,6 +532,8 @@ CompressedPtr SlabAllocator::compressAlt(const void* ptr) const {
 }
 
 void* SlabAllocator::unCompressAlt(const CompressedPtr cPtr) const {
+  // XXX: do we need to set tierId here?
+
   if (cPtr.isNull()) {
     return nullptr;
   }
diff --git a/cachelib/allocator/memory/SlabAllocator.h b/cachelib/allocator/memory/SlabAllocator.h
index fa5e00a892..875a8f5c2b 100644
--- a/cachelib/allocator/memory/SlabAllocator.h
+++ b/cachelib/allocator/memory/SlabAllocator.h
@@ -308,8 +308,8 @@ class SlabAllocator {
   }
 
   template <typename PtrType>
-  PtrCompressor<PtrType, SlabAllocator> createPtrCompressor() const {
-    return PtrCompressor<PtrType, SlabAllocator>(*this);
+  SingleTierPtrCompressor<PtrType, SlabAllocator> createSingleTierPtrCompressor() const {
+    return SingleTierPtrCompressor<PtrType, SlabAllocator>(*this);
   }
 
   // returns starting address of memory we own.
diff --git a/cachelib/allocator/tests/AllocatorResizeTest.h b/cachelib/allocator/tests/AllocatorResizeTest.h
index 06c7ae0e81..5f99cfcc93 100644
--- a/cachelib/allocator/tests/AllocatorResizeTest.h
+++ b/cachelib/allocator/tests/AllocatorResizeTest.h
@@ -1098,7 +1098,7 @@ class AllocatorResizeTest : public AllocatorTest<AllocatorT> {
         size_t allocBytes = 0;
         for (size_t k = 0; k < expectedIters * Slab::kSize / sz; k++) {
           const auto key = this->getRandomNewKey(alloc, keyLen);
-          auto handle = util::allocateAccessible(alloc, poolId, key, sz - 45);
+          auto handle = util::allocateAccessible(alloc, poolId, key, sz - 45 - 9 /* TODO: compressed ptr size */);
           if (!handle.get()) {
             break;
           }
@@ -1110,7 +1110,7 @@ class AllocatorResizeTest : public AllocatorTest<AllocatorT> {
         for (size_t k = 0; k < expectedIters * Slab::kSize / sz; k++) {
           const auto key = this->getRandomNewKey(alloc, keyLen);
           size_t allocBytes = 0;
-          auto handle = util::allocateAccessible(alloc, poolId, key, sz - 45);
+          auto handle = util::allocateAccessible(alloc, poolId, key, sz - 45 - 9 /* TODO: compressed ptr size */);
           allocBytes += handle->getSize();
         }
       }