[NLP] Catch exceptions thrown during inference and report as errors (#2542)

davidkyle · web-flow · commit 4b769f85c816 · 2023-06-20T17:45:52.000+01:00
diff --git a/bin/pytorch_inference/CCommandParser.h b/bin/pytorch_inference/CCommandParser.h
@@ -19,6 +19,7 @@
 #include <functional>
 #include <iosfwd>
 #include <memory>
+#include <optional>
 #include <string>
 #include <vector>
 
@@ -58,7 +59,7 @@ class CCommandParser {
     //! \brief Inference request cache interface.
     class CRequestCacheInterface {
     public:
-        using TComputeResponse = std::function<std::string(SRequest)>;
+        using TComputeResponse = std::function<std::optional<std::string>(SRequest)>;
         using TReadResponse = std::function<void(const std::string&, bool)>;
 
     public:
@@ -102,7 +103,10 @@ class CCommandParser {
         bool lookup(SRequest request,
                     const TComputeResponse& computeResponse,
                     const TReadResponse& readResponse) override {
-            readResponse(computeResponse(std::move(request)), false);
+            auto computed = computeResponse(std::move(request));
+            if (computed) {
+                readResponse(*computed, false);
+            }
             return false;
         }
 
diff --git a/bin/pytorch_inference/Main.cc b/bin/pytorch_inference/Main.cc
@@ -37,6 +37,7 @@
 
 #include <cstdint>
 #include <memory>
+#include <optional>
 #include <string>
 
 torch::Tensor infer(torch::jit::script::Module& module_,
@@ -92,16 +93,24 @@ bool handleRequest(ml::torch::CCommandParser::CRequestCacheInterface& cache,
         // We time the combination of the cache lookup and (if necessary)
         // the inference.
         ml::core::CStopWatch stopWatch(true);
-        cache.lookup(std::move(capturedRequest),
-                     [&](auto request_) -> std::string {
-                         torch::Tensor results = infer(module_, request_);
-                         return resultWriter.createInnerResult(results);
-                     },
-                     [&](const auto& innerResponseJson_, bool isCacheHit) {
-                         resultWriter.wrapAndWriteInnerResponse(innerResponseJson_,
-                                                                requestId, isCacheHit,
-                                                                stopWatch.stop());
-                     });
+        cache.lookup(
+            std::move(capturedRequest),
+            [&](auto request_) -> std::optional<std::string> {
+                try {
+                    torch::Tensor results = infer(module_, request_);
+                    return resultWriter.createInnerResult(results);
+                } catch (const c10::Error& e) {
+                    resultWriter.writeError(request_.s_RequestId, e.what());
+                    return std::nullopt;
+                } catch (std::runtime_error& e) {
+                    resultWriter.writeError(request_.s_RequestId, e.what());
+                    return std::nullopt;
+                }
+            },
+            [&](const auto& innerResponseJson_, bool isCacheHit) {
+                resultWriter.wrapAndWriteInnerResponse(
+                    innerResponseJson_, requestId, isCacheHit, stopWatch.stop());
+            });
     });
     return true;
 }
diff --git a/bin/pytorch_inference/evaluate.py b/bin/pytorch_inference/evaluate.py
@@ -288,7 +288,7 @@ def test_evaluation(args):
         for result in result_docs:
         
             if 'error' in result: 
-                print(f"Inference failed. Request: {result['error']['request_id']}, Msg: {result['error']['error']}")
+                print(f"Inference failed. Request: {result['request_id']}, Msg: {result['error']['error']}")
                 results_match = False
                 continue
 
diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc
@@ -38,6 +38,7 @@
 
 === Bug Fixes
 * Prevent high memory usage by evaluating batch inference singularly. (See {ml-pull}2538[#2538].)
+* Catch exceptions thrown during inference and report as errors. (See {ml-pull}2542[#2542].)
 
 == {es} version 8.8.0
 
diff --git a/include/core/CCompressedLfuCache.h b/include/core/CCompressedLfuCache.h
@@ -30,6 +30,7 @@
 #include <limits>
 #include <memory>
 #include <mutex>
+#include <optional>
 #include <set>
 #include <shared_mutex>
 #include <string>
@@ -65,7 +66,7 @@ class CCompressedLfuCache {
     using TDictionary = CCompressedDictionary<COMPRESSED_KEY_BITS / 64>;
     using TCompressedKey = typename TDictionary::CWord;
     using TCompressKey = std::function<TCompressedKey(const TDictionary&, const KEY&)>;
-    using TComputeValueCallback = std::function<VALUE(KEY)>;
+    using TComputeValueCallback = std::function<std::optional<VALUE>(KEY)>;
     using TReadValueCallback = std::function<void(const VALUE&, bool)>;
 
 public:
@@ -96,6 +97,9 @@ class CCompressedLfuCache {
 
     //! Lookup an item with \p key in the cache or else fall back to computing.
     //!
+    //! \warning If \p computeValue fails to produce a value (returns std::nullopt)
+    //! then \p readValue will not be called.
+    //!
     //! \param[in] key The item key.
     //! \param[in] computeValue Computes the value in the case of a cache miss.
     //! \param[in] readValue Processes the value.
@@ -137,15 +141,18 @@ class CCompressedLfuCache {
         }
 
         auto value = computeValue(std::move(key));
+        if (!value) {
+            return false;
+        }
 
-        std::size_t itemMemoryUsage{memory::dynamicSize(value)};
+        std::size_t itemMemoryUsage{memory::dynamicSize(*value)};
 
         if (this->guardWrite(TIME_OUT, [&] {
                 // It is possible that two values with the same key check the cache
                 // before either takes the write lock. So check if this is already
                 // in the cache before going any further.
                 if (m_ItemCache.find(compressedKey) != m_ItemCache.end()) {
-                    readValue(value, true);
+                    readValue(*value, true);
                     this->incrementCount(compressedKey);
                     return;
                 }
@@ -158,14 +165,14 @@ class CCompressedLfuCache {
                     // It's possible that the cache is empty yet isn't big
                     // enough to hold this new item.
                     if (itemToEvict == m_ItemStats.end()) {
-                        readValue(value, false);
+                        readValue(*value, false);
                         return;
                     }
                     m_RemovedCount += lastEvictedCount;
                     lastEvictedCount = itemToEvict->count();
                     this->removeFromCache(itemToEvict);
                 }
-                readValue(this->insert(compressedKey, value, itemMemoryUsage,
+                readValue(this->insert(compressedKey, *value, itemMemoryUsage,
                                        count + lastEvictedCount),
                           false);
             }) == false) {
diff --git a/lib/core/unittest/CCompressedLfuCacheTest.cc b/lib/core/unittest/CCompressedLfuCacheTest.cc
@@ -24,6 +24,7 @@
 #include <boost/test/unit_test.hpp>
 
 #include <chrono>
+#include <optional>
 #include <sstream>
 #include <string>
 #include <thread>
@@ -612,4 +613,19 @@ BOOST_AUTO_TEST_CASE(testClear) {
     BOOST_TEST_REQUIRE(cache.checkInvariants());
 }
 
+BOOST_AUTO_TEST_CASE(testComputeValueReturnsNullOpt) {
+    TStrStrCache cache{32 * core::constants::BYTES_IN_KILOBYTES,
+                       [](const TStrStrCache::TDictionary& dictionary, const std::string& key) {
+                           return dictionary.word(key);
+                       }};
+
+    bool valueRead{false};
+
+    BOOST_REQUIRE_EQUAL(
+        false,
+        cache.lookup("key_1", [](std::string) { return std::nullopt; },
+                     [&valueRead](const std::string&, bool) { valueRead = true; }));
+    BOOST_REQUIRE_EQUAL(false, valueRead);
+}
+
 BOOST_AUTO_TEST_SUITE_END()