Skip to content

Commit 07b5303

Browse files
authored
dml batch async (#8)
1 parent ea12bd0 commit 07b5303

File tree

3 files changed

+70
-83
lines changed

3 files changed

+70
-83
lines changed

accel-config.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,11 @@ do
2727
done
2828

2929
sudo accel-config config-wq ${SWQ} --group-id=0
30-
sudo accel-config config-wq ${SWQ} --priority=5
30+
sudo accel-config config-wq ${SWQ} --priority=1
3131
sudo accel-config config-wq ${SWQ} --wq-size=128
3232
sudo accel-config config-wq ${SWQ} --max-batch-size=1024
33+
sudo accel-config config-wq ${SWQ} --max-transfer-size=4194304
34+
sudo accel-config config-wq ${SWQ} --block-on-fault=0
3335
sudo accel-config config-wq ${SWQ} --type=user
3436
sudo accel-config config-wq ${SWQ} --name="dsa-test"
3537
sudo accel-config config-wq ${SWQ} --mode=shared

cachelib/allocator/CacheAllocator.h

Lines changed: 65 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -2079,7 +2079,7 @@ auto& mmContainer = getMMContainer(tid, pid, cid);
20792079
return evictions;
20802080
}
20812081

2082-
static std::string dsa_error_string(dml::mem_copy_result& result) {
2082+
static std::string dsa_error_string(dml::batch_result& result) {
20832083
std::string error;
20842084
switch (result.status) {
20852085
case dml::status_code::false_predicate: /**< Operation completed successfully: but result is unexpected */
@@ -2139,8 +2139,12 @@ auto& mmContainer = getMMContainer(tid, pid, cid);
21392139
}
21402140
return error;
21412141
}
2142+
21422143
// exposed for the background evictor to iterate through the memory and evict
21432144
// in batch. This should improve insertion path for tiered memory config
2145+
using allocator_t = std::allocator<dml::byte_t>;
2146+
using batch_handler_t = dml::handler<dml::batch_operation, allocator_t>;
2147+
21442148
size_t traverseAndEvictItemsUsingDsa(unsigned int tid,
21452149
unsigned int pid,
21462150
unsigned int cid,
@@ -2217,68 +2221,52 @@ auto& mmContainer = getMMContainer(tid, pid, cid);
22172221
}
22182222
XDCHECK_EQ(newItemHandles.size(),validHandleCnt);
22192223

2220-
/* for DML async op */
2221-
auto dmlHandles = std::vector<dml::handler<dml::mem_copy_operation,
2222-
std::allocator<dml::byte_t>>>();
2223-
//dmlHandles.reserve(validHandleCnt);
2224-
2225-
//std::default_random_engine generator;
2226-
//std::bernoulli_distribution distribution(1.0);
2227-
2224+
auto sequence = dml::sequence<allocator_t>(validHandleCnt);
22282225
for (auto index = 0U; index < candidates.size(); index++) {
22292226
XDCHECK_EQ(newItemHandles[index].get(),newItemPtr[index]);
22302227
if (newItemHandles[index].get() != newItemPtr[index]) {
2231-
throw std::runtime_error(folly::sformat("dml error - newItemHdls {:}, newItemPtr {:}",newItemHandles[index].get(),newItemPtr[index]));
2228+
throw std::runtime_error(folly::sformat(
2229+
"dml error - newItemHdls {:}, newItemPtr {:}",
2230+
newItemHandles[index].get(), newItemPtr[index]));
22322231
}
22332232
XDCHECK_EQ(newItemHandles[index]->getKey(), candidates[index]->getKey());
22342233
if (newItemHandles[index]->getKey() != candidates[index]->getKey()) {
2235-
throw std::runtime_error(folly::sformat("dml error - newItemHdl key {}, ptr {:}, old key {}, ptr {:}",newItemHandles[index]->getKey(), newItemHandles[index].get(),
2236-
candidates[index]->getKey(),candidates[index]));
2234+
throw std::runtime_error(folly::sformat(
2235+
"dml error - newItemHdl key {}, ptr {:}, old key {}, ptr {:}",
2236+
newItemHandles[index]->getKey(), newItemHandles[index].get(),
2237+
candidates[index]->getKey(),candidates[index]));
22372238
}
22382239
XDCHECK_EQ(newItemHandles[index]->getSize(), candidates[index]->getSize());
22392240
XDCHECK_EQ(newItemHandles[index]->getRefCount(), 1);
22402241
XDCHECK_EQ(candidates[index]->getRefCount(), 0);
22412242

2242-
//if (distribution(generator)) {
2243-
dml::const_data_view srcView = dml::make_view(
2243+
dml::const_data_view srcView = dml::make_view(
22442244
reinterpret_cast<uint8_t*>(candidates[index]->getMemory()),
22452245
candidates[index]->getSize());
2246-
dml::data_view dstView = dml::make_view(
2246+
dml::data_view dstView = dml::make_view(
22472247
reinterpret_cast<uint8_t*>(newItemHandles[index]->getMemory()),
22482248
newItemHandles[index]->getSize());
2249-
(*stats_.dsaEvictionSubmits)[tid][pid][cid].inc();
2250-
if (config_.dsaAsync) {
2251-
if (cid < 11) {
2252-
dmlHandles.emplace_back(
2253-
dml::submit<dml::software>(dml::mem_copy, srcView, dstView));
2254-
} else {
2255-
dmlHandles.emplace_back(
2256-
dml::submit<dml::hardware>(dml::mem_copy, srcView, dstView));
2257-
}
2258-
} else {
2259-
auto dmlHandle = dml::submit<dml::hardware>(dml::mem_copy, srcView, dstView);
2260-
auto result = dmlHandle.get();
2261-
if (result.status != dml::status_code::ok) {
2262-
std::string error = dsa_error_string(result);
2263-
throw std::runtime_error(folly::sformat("dml error: {} for item: {} count {}",
2264-
error, candidates[index]->toString(), (*stats_.dsaEvictionSubmits)[tid][pid][cid].get()));
2265-
}
2266-
XDCHECK_EQ(result.status,dml::status_code::ok);
2267-
}
2268-
//} else {
2269-
// std::memcpy(newItemHandles[index]->getMemory(),
2270-
// candidates[index]->getMemory(),
2271-
// candidates[index]->getSize());
2272-
// }
2249+
if (sequence.add(dml::mem_copy, srcView, dstView) != dml::status_code::ok) {
2250+
throw std::runtime_error("failed to add dml::mem_copy operation to the sequence.");
2251+
}
22732252
}
2274-
if (config_.dsaAsync) {
2275-
if (newItemHandles.size() != dmlHandles.size()) {
2276-
throw std::runtime_error(folly::sformat("dml error - newItemHdls {:}, dmlHandles {:}",newItemHandles.size(),dmlHandles.size()));
2277-
}
2278-
XDCHECK_EQ(newItemHandles.size(),dmlHandles.size());
2279-
XDCHECK_EQ(newItemHandles.size(),candidates.size());
2280-
} else {
2281-
XDCHECK_EQ(0,dmlHandles.size());
2253+
2254+
batch_handler_t handler{};
2255+
/* Use software path (memcpy) if class size is small (identified by class id < 11) */
2256+
if (cid < 11) {
2257+
handler = dml::submit<dml::software>(dml::batch, sequence);
2258+
XDCHECK(handler.valid());
2259+
if (!handler.valid()) {
2260+
throw std::runtime_error("Failed dml::software sequence submission");
2261+
}
2262+
} else { // larger items
2263+
handler = dml::submit<dml::hardware>(dml::batch, sequence,
2264+
dml::default_execution_interface<dml::hardware>{}, 0);
2265+
XDCHECK(handler.valid());
2266+
if (!handler.valid()) {
2267+
throw std::runtime_error("Failed dml::hardware sequence submission");
2268+
}
2269+
(*stats_.dsaEvictionSubmits)[tid][pid][cid].inc();
22822270
}
22832271

22842272
// Adding the item to mmContainer has to succeed since no one
@@ -2289,8 +2277,6 @@ auto& mmContainer = getMMContainer(tid, pid, cid);
22892277

22902278
for (auto index = 0U; index < candidates.size(); index++) {
22912279
XDCHECK(newItemHandles[index]);
2292-
2293-
22942280
// no one can add or remove chained items at this point
22952281
if (candidates[index]->hasChainedItem()) {
22962282
// safe to acquire handle for a moving Item
@@ -2307,32 +2293,31 @@ auto& mmContainer = getMMContainer(tid, pid, cid);
23072293
XLOGF(DFATAL, "{}", e.what());
23082294
throw;
23092295
}
2310-
23112296
XDCHECK(!candidates[index]->hasChainedItem());
23122297
XDCHECK(newItemHandles[index]->hasChainedItem());
23132298
}
2314-
auto predicate = [&](const Item& item) {
2315-
// we rely on moving flag being set (it should block all readers)
2316-
XDCHECK(item.getRefCount() == 0);
2317-
return true;
2318-
};
2319-
2320-
if (config_.dsaAsync) {
2321-
auto result = dmlHandles[index].get();
2322-
if (result.status != dml::status_code::ok) {
2323-
uint64_t dsaCallsPost = 0;
2324-
for (ClassId cid : classIds) {
2325-
dsaCallsPost += (*stats_.dsaEvictionSubmits)[tid][pid][cid].get();
2326-
}
2327-
uint64_t dsaCalls = dsaCallsPost - dsaCallsPre;
2328-
std::string error = dsa_error_string(result);
2329-
auto errorStr = folly::sformat("dml error: {} for item: {}, submitsi {:}", error, candidates[index]->toString(),dsaCalls);
2330-
XDCHECK_EQ(result.status,dml::status_code::ok) << errorStr;
2331-
throw std::runtime_error(errorStr);
2332-
}
2333-
XDCHECK_EQ(result.status,dml::status_code::ok);
2334-
//verify that queue has decreased in size
2299+
}
2300+
2301+
auto predicate = [&](const Item& item) {
2302+
// we rely on moving flag being set (it should block all readers)
2303+
XDCHECK(item.getRefCount() == 0);
2304+
return true;
2305+
};
2306+
2307+
auto result = handler.get();
2308+
if (result.status != dml::status_code::ok) {
2309+
uint64_t dsaCallsPost = 0;
2310+
for (ClassId cid : classIds) {
2311+
dsaCallsPost += (*stats_.dsaEvictionSubmits)[tid][pid][cid].get();
23352312
}
2313+
uint64_t dsaCalls = dsaCallsPost - dsaCallsPre;
2314+
std::string error = dsa_error_string(result);
2315+
auto errorStr = folly::sformat("dml error: submits {:}", error, dsaCalls);
2316+
throw std::runtime_error(errorStr);
2317+
}
2318+
XDCHECK_EQ(result.status, dml::status_code::ok);
2319+
2320+
for (auto index = 0U; index < candidates.size(); index++) {
23362321
// another thread may have called insertOrReplace() which
23372322
// could have marked this item as unaccessible causing the
23382323
// replaceIf() in the access container to fail - in this
@@ -2352,22 +2337,21 @@ auto& mmContainer = getMMContainer(tid, pid, cid);
23522337
(*stats_.numWritebacks)[tid][pid][cid].inc();
23532338
wakeUpWaiters(*candidates[index], std::move(newItemHandles[index]));
23542339
} else {
2355-
auto token = createPutToken(*candidates[index]);
2356-
auto ret = candidates[index]->markForEvictionWhenMoving();
2357-
XDCHECK(ret);
2340+
auto token = createPutToken(*candidates[index]);
2341+
auto ret = candidates[index]->markForEvictionWhenMoving();
2342+
XDCHECK(ret);
23582343

23592344
unlinkItemForEviction(*candidates[index]);
2360-
// wake up any readers that wait for the move to complete
2361-
// it's safe to do now, as we have the item marked exclusive and
2362-
// no other reader can be added to the waiters list
2363-
wakeUpWaiters(*candidates[index], WriteHandle{});
2345+
// wake up any readers that wait for the move to complete
2346+
// it's safe to do now, as we have the item marked exclusive and
2347+
// no other reader can be added to the waiters list
2348+
wakeUpWaiters(*candidates[index], WriteHandle{});
23642349

2365-
if (token.isValid() && shouldWriteToNvmCacheExclusive(*candidates[index])) {
2366-
nvmCache_->put(*candidates[index], std::move(token));
2350+
if (token.isValid() && shouldWriteToNvmCacheExclusive(*candidates[index])) {
2351+
nvmCache_->put(*candidates[index], std::move(token));
23672352
}
23682353
}
23692354

2370-
23712355
evictions++;
23722356
if (candidates[index]->hasChainedItem()) {
23732357
(*stats_.chainedItemEvictions)[tid][pid][cid].inc();

cachelib/cachebench/test_configs/hit_ratio/cdn/dsa_config_async.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"cache_config": {
33
"cacheSizeMB": 32768,
4+
"poolRebalanceIntervalSec": 0,
45
"htBucketPower": 27,
56
"htBucketLock": 27,
67
"backgroundEvictorIntervalMilSec": 1,
@@ -15,7 +16,7 @@
1516
},
1617
{
1718
"ratio": 1,
18-
"memBindNodes": 0
19+
"memBindNodes": 1
1920
}
2021
]
2122
},

0 commit comments

Comments
 (0)