Skip to content

Commit 19473e9

Browse files
committed
Implement NUMA binding support for SysVShmSegment
1 parent 6122ab9 commit 19473e9

File tree

7 files changed

+72
-1
lines changed

7 files changed

+72
-1
lines changed

cachelib/allocator/CacheAllocator-inl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ ShmSegmentOpts CacheAllocator<CacheTrait>::createShmCacheOpts(TierId tid) {
125125
ShmSegmentOpts opts;
126126
opts.alignment = sizeof(Slab);
127127
opts.typeOpts = memoryTierConfigs[tid].getShmTypeOpts();
128+
opts.memBindNumaNodes = memoryTierConfigs[tid].getMemBind();
128129
if (auto *v = std::get_if<PosixSysVSegmentOpts>(&opts.typeOpts)) {
129130
v->usePosix = config_.usePosixShm;
130131
}

cachelib/allocator/MemoryTierCacheConfig.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,16 @@ class MemoryTierCacheConfig {
5353

5454
size_t getRatio() const noexcept { return ratio; }
5555

56+
// Allocate memory only from specified NUMA nodes
57+
MemoryTierCacheConfig& setMemBind(const std::vector<size_t>& _numaNodes) {
58+
numaNodes = _numaNodes;
59+
return *this;
60+
}
61+
62+
std::vector<size_t> getMemBind() const {
63+
return numaNodes;
64+
}
65+
5666
size_t calculateTierSize(size_t totalCacheSize, size_t partitionNum) const {
5767
// TODO: Call this method when tiers are enabled in allocator
5868
// to calculate tier sizes in bytes.
@@ -82,6 +92,9 @@ class MemoryTierCacheConfig {
8292
// Options specific to shm type
8393
ShmTypeOpts shmOpts;
8494

95+
// Numa node(s) to bind the tier
96+
std::vector<size_t> numaNodes;
97+
8598
MemoryTierCacheConfig() = default;
8699
};
87100
} // namespace cachelib

cachelib/shm/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ add_library (cachelib_shm
2525
add_dependencies(cachelib_shm thrift_generated_files)
2626
target_link_libraries(cachelib_shm PUBLIC
2727
cachelib_common
28+
numa
2829
)
2930

3031
install(TARGETS cachelib_shm

cachelib/shm/ShmCommon.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ struct ShmSegmentOpts {
9393
PageSizeT pageSize{PageSizeT::NORMAL};
9494
bool readOnly{false};
9595
size_t alignment{1}; // alignment for mapping.
96+
std::vector<size_t> memBindNumaNodes;
9697
// opts specific to segment type
9798
ShmTypeOpts typeOpts{PosixSysVSegmentOpts(false)};
9899

cachelib/shm/SysVShmSegment.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@
1818

1919
#include <folly/hash/Hash.h>
2020
#include <folly/logging/xlog.h>
21+
#include <folly/ScopeGuard.h>
2122
#include <sys/mman.h>
2223
#include <sys/shm.h>
24+
#include <numa.h>
25+
#include <numaif.h>
2326

2427
#include "cachelib/common/Utils.h"
2528

@@ -184,6 +187,50 @@ void shmCtlImpl(int shmid, int cmd, shmid_ds* buf) {
184187
}
185188
}
186189

190+
void mbindImpl(void *addr, unsigned long len, int mode,
191+
const std::vector<size_t>& memBindNumaNodes,
192+
unsigned int flags) {
193+
struct bitmask *nodesMask = numa_allocate_nodemask();
194+
auto guard = folly::makeGuard([&] { numa_bitmask_free(nodesMask); });
195+
196+
for(auto node : memBindNumaNodes) {
197+
numa_bitmask_setbit(nodesMask, node);
198+
}
199+
200+
long ret = mbind(addr, len, mode, nodesMask->maskp, nodesMask->size, flags);
201+
if(ret == 0) return;
202+
203+
switch (errno) {
204+
case EFAULT:
205+
util::throwSystemError(errno);
206+
break;
207+
case EINVAL:
208+
util::throwSystemError(errno, "Invalid parameters when bind segment to NUMA node(s)");
209+
break;
210+
case EIO:
211+
if(flags & MPOL_MF_STRICT) {
212+
util::throwSystemError(errno, "Segment already allocated on another NUMA node that does not follow the policy.");
213+
}
214+
if(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL )) {
215+
util::throwSystemError(errno, "Segment already allocated but kernel was unable to move it to specified NUMA node(s).");
216+
}
217+
util::throwSystemError(errno, "Invalid errno");
218+
break;
219+
case ENOMEM:
220+
util::throwSystemError(errno, "Could not bind memory. Insufficient kernel memory was available");
221+
break;
222+
case EPERM:
223+
if(flags & MPOL_MF_MOVE_ALL) {
224+
util::throwSystemError(errno, "Process does not have the CAP_SYS_NICE privilege to bind segment with MPOL_MF_MOVE_ALL flag");
225+
}
226+
util::throwSystemError(errno, "Invalid errno");
227+
break;
228+
default:
229+
XDCHECK(false);
230+
util::throwSystemError(errno, "Invalid errno");
231+
}
232+
}
233+
187234
} // namespace detail
188235

189236
void ensureSizeforHugePage(size_t size) {
@@ -270,11 +317,17 @@ void* SysVShmSegment::mapAddress(void* addr) const {
270317

271318
void* retAddr = detail::shmAttachImpl(shmid_, addr, shmFlags);
272319
XDCHECK(retAddr == addr || addr == nullptr);
320+
memBind(retAddr);
273321
return retAddr;
274322
}
275323

276324
void SysVShmSegment::unMap(void* addr) const { detail::shmDtImpl(addr); }
277325

326+
void SysVShmSegment::memBind(void* addr) const {
327+
if(opts_.memBindNumaNodes.empty()) return;
328+
detail::mbindImpl(addr, getSize(), MPOL_BIND, opts_.memBindNumaNodes, 0);
329+
}
330+
278331
void SysVShmSegment::markForRemoval() {
279332
if (isMarkedForRemoval()) {
280333
return;

cachelib/shm/SysVShmSegment.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ class SysVShmSegment : public ShmBase {
100100
void lockPagesInMemory() const;
101101
void createReferenceMapping();
102102
void deleteReferenceMapping() const;
103+
void memBind(void* addr) const;
103104

104105
// the key identifier for the shared memory
105106
KeyType key_{kInvalidKey};

contrib/prerequisites-centos8.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ sudo dnf --enablerepo="$POWERTOOLS_REPO" install -y \
5757
libsodium-static \
5858
libdwarf-static \
5959
boost-static \
60-
double-conversion-static
60+
double-conversion-static \
61+
numactl-devel
6162

6263
#Do not install these from OS packages - they are typically outdated.
6364
#gflags-devel \

0 commit comments

Comments
 (0)