From 48e0df3ab44898a96a1934af9b1454a2faed1c28 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Tue, 20 Jun 2023 10:14:11 +0100 Subject: [PATCH 1/4] Catch exceptions thrown during inference and report as errors --- bin/pytorch_inference/CCommandParser.h | 8 +++++-- bin/pytorch_inference/Main.cc | 29 +++++++++++++++++--------- bin/pytorch_inference/evaluate.py | 2 +- include/core/CCompressedLfuCache.h | 17 ++++++++++----- 4 files changed, 38 insertions(+), 18 deletions(-) diff --git a/bin/pytorch_inference/CCommandParser.h b/bin/pytorch_inference/CCommandParser.h index e726deeae3..5deb462b29 100644 --- a/bin/pytorch_inference/CCommandParser.h +++ b/bin/pytorch_inference/CCommandParser.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -58,7 +59,7 @@ class CCommandParser { //! \brief Inference request cache interface. class CRequestCacheInterface { public: - using TComputeResponse = std::function; + using TComputeResponse = std::function(SRequest)>; using TReadResponse = std::function; public: @@ -102,7 +103,10 @@ class CCommandParser { bool lookup(SRequest request, const TComputeResponse& computeResponse, const TReadResponse& readResponse) override { - readResponse(computeResponse(std::move(request)), false); + auto computed = computeResponse(std::move(request)); + if (computed) { + readResponse(*computed, false); + } return false; } diff --git a/bin/pytorch_inference/Main.cc b/bin/pytorch_inference/Main.cc index 884797a5fd..cce4dcb2db 100644 --- a/bin/pytorch_inference/Main.cc +++ b/bin/pytorch_inference/Main.cc @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -92,16 +93,24 @@ bool handleRequest(ml::torch::CCommandParser::CRequestCacheInterface& cache, // We time the combination of the cache lookup and (if necessary) // the inference. ml::core::CStopWatch stopWatch(true); - cache.lookup(std::move(capturedRequest), - [&](auto request_) -> std::string { - torch::Tensor results = infer(module_, request_); - return resultWriter.createInnerResult(results); - }, - [&](const auto& innerResponseJson_, bool isCacheHit) { - resultWriter.wrapAndWriteInnerResponse(innerResponseJson_, - requestId, isCacheHit, - stopWatch.stop()); - }); + cache.lookup( + std::move(capturedRequest), + [&](auto request_) -> std::optional { + try { + torch::Tensor results = infer(module_, request_); + return resultWriter.createInnerResult(results); + } catch (const c10::Error& e) { + resultWriter.writeError(request_.s_RequestId, e.what()); + return std::nullopt; + } catch (std::runtime_error& e) { + resultWriter.writeError(request_.s_RequestId, e.what()); + return std::nullopt; + } + }, + [&](const auto& innerResponseJson_, bool isCacheHit) { + resultWriter.wrapAndWriteInnerResponse( + innerResponseJson_, requestId, isCacheHit, stopWatch.stop()); + }); }); return true; } diff --git a/bin/pytorch_inference/evaluate.py b/bin/pytorch_inference/evaluate.py index ac01ded113..6845c906ef 100644 --- a/bin/pytorch_inference/evaluate.py +++ b/bin/pytorch_inference/evaluate.py @@ -288,7 +288,7 @@ def test_evaluation(args): for result in result_docs: if 'error' in result: - print(f"Inference failed. Request: {result['error']['request_id']}, Msg: {result['error']['error']}") + print(f"Inference failed. Request: {result['request_id']}, Msg: {result['error']['error']}") results_match = False continue diff --git a/include/core/CCompressedLfuCache.h b/include/core/CCompressedLfuCache.h index b19f09ea12..17529ebe68 100644 --- a/include/core/CCompressedLfuCache.h +++ b/include/core/CCompressedLfuCache.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -65,7 +66,7 @@ class CCompressedLfuCache { using TDictionary = CCompressedDictionary; using TCompressedKey = typename TDictionary::CWord; using TCompressKey = std::function; - using TComputeValueCallback = std::function; + using TComputeValueCallback = std::function(KEY)>; using TReadValueCallback = std::function; public: @@ -96,6 +97,9 @@ class CCompressedLfuCache { //! Lookup an item with \p key in the cache or else fall back to computing. //! + //! \warning If \p computeValue fails to produce a value (returns std::nullopt) + //! then \p readValue will not be called. + //! //! \param[in] key The item key. //! \param[in] computeValue Computes the value in the case of a cache miss. //! \param[in] readValue Processes the value. @@ -137,15 +141,18 @@ class CCompressedLfuCache { } auto value = computeValue(std::move(key)); + if (!value) { + return false; + } - std::size_t itemMemoryUsage{memory::dynamicSize(value)}; + std::size_t itemMemoryUsage{memory::dynamicSize(*value)}; if (this->guardWrite(TIME_OUT, [&] { // It is possible that two values with the same key check the cache // before either takes the write lock. So check if this is already // in the cache before going any further. if (m_ItemCache.find(compressedKey) != m_ItemCache.end()) { - readValue(value, true); + readValue(*value, true); this->incrementCount(compressedKey); return; } @@ -158,14 +165,14 @@ class CCompressedLfuCache { // It's possible that the cache is empty yet isn't big // enough to hold this new item. if (itemToEvict == m_ItemStats.end()) { - readValue(value, false); + readValue(*value, false); return; } m_RemovedCount += lastEvictedCount; lastEvictedCount = itemToEvict->count(); this->removeFromCache(itemToEvict); } - readValue(this->insert(compressedKey, value, itemMemoryUsage, + readValue(this->insert(compressedKey, *value, itemMemoryUsage, count + lastEvictedCount), false); }) == false) { From 0647c3fbfec91b30e0850b310a47a2e40fb06280 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Tue, 20 Jun 2023 11:33:11 +0100 Subject: [PATCH 2/4] changelog --- docs/CHANGELOG.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 48edb4d2c1..fce4843043 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -38,6 +38,7 @@ === Bug Fixes * Prevent high memory usage by evaluating batch inference singularly. (See {ml-pull}2538[#2538].) +* Catch exceptions thrown during inference and report as errors. (See {ml-pull}2542[#2542].) == {es} version 8.8.0 From b6f1b5d531893a1cdb977b5b8348b6ee31e37b86 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Tue, 20 Jun 2023 14:38:26 +0100 Subject: [PATCH 3/4] test --- lib/core/unittest/CCompressedLfuCacheTest.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lib/core/unittest/CCompressedLfuCacheTest.cc b/lib/core/unittest/CCompressedLfuCacheTest.cc index 3de709ee5a..812dfef594 100644 --- a/lib/core/unittest/CCompressedLfuCacheTest.cc +++ b/lib/core/unittest/CCompressedLfuCacheTest.cc @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -612,4 +613,19 @@ BOOST_AUTO_TEST_CASE(testClear) { BOOST_TEST_REQUIRE(cache.checkInvariants()); } +BOOST_AUTO_TEST_CASE(testComputeValueReturnsNullOpt) { + TStrStrCache cache{32 * core::constants::BYTES_IN_KILOBYTES, + [](const TStrStrCache::TDictionary& dictionary, const std::string& key) { + return dictionary.word(key); + }}; + + bool valueRead{false}; + + BOOST_REQUIRE_EQUAL( + false, + cache.lookup("key_1", [](std::string) { return std::nullopt; }, + [&valueRead](const std::string&, bool) { valueRead = true; })); + BOOST_REQUIRE_EQUAL(false, valueRead); +} + BOOST_AUTO_TEST_SUITE_END() From 1c694d26c35419c8db7f1661f5f2378b413927b1 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Tue, 20 Jun 2023 16:20:29 +0100 Subject: [PATCH 4/4] address comments --- bin/pytorch_inference/Main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/pytorch_inference/Main.cc b/bin/pytorch_inference/Main.cc index cce4dcb2db..27d99bcb84 100644 --- a/bin/pytorch_inference/Main.cc +++ b/bin/pytorch_inference/Main.cc @@ -18,7 +18,6 @@ #include #include -#include #include #include @@ -38,6 +37,7 @@ #include #include +#include #include torch::Tensor infer(torch::jit::script::Module& module_,