[8.0] [ML] fail inference processor more consistently on certain error types (#81475) (#81546)

benwtrent · web-flow · commit a03b0c7e6fde · 2021-12-08T15:34:32.000-05:00
* [ML] fail inference processor more consistently on certain error types (#81475) This updates the following scenarios and causes NER/native inference to fail and not write a warning: - missing vocabulary values - missing model/deployment - native process failed - native process stopping - request timed out - misconfigured inference task update type * fixing for backport * fixing backport * fixing backport
diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java
@@ -40,6 +40,7 @@
 
 import static org.elasticsearch.xpack.ml.integration.InferenceIngestIT.putPipeline;
 import static org.elasticsearch.xpack.ml.integration.InferenceIngestIT.simulateRequest;
+import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
@@ -465,7 +466,11 @@ public void testInferencePipelineAgainstUnallocatedModel() throws IOException {
         String response = EntityUtils.toString(client().performRequest(simulateRequest(source)).getEntity());
         assertThat(
             response,
-            containsString("model [not-deployed] must be deployed to use. Please deploy with the start trained model deployment API.")
+            allOf(
+                containsString("model [not-deployed] must be deployed to use. Please deploy with the start trained model deployment API."),
+                containsString("error"),
+                not(containsString("warning"))
+            )
         );
 
         client().performRequest(
@@ -528,6 +533,81 @@ public void testStopUsedDeploymentByIngestProcessor() throws IOException {
         stopDeployment(modelId, true);
     }
 
+    public void testPipelineWithBadProcessor() throws IOException {
+        String model = "deployed";
+        createTrainedModel(model);
+        putVocabulary(List.of("once", "twice"), model);
+        putModelDefinition(model);
+        startDeployment(model);
+        String source = """
+            {
+              "pipeline": {
+                "processors": [
+                  {
+                    "inference": {
+                      "model_id": "deployed",
+                      "inference_config": {
+                        "ner": {}
+                      }
+                    }
+                  }
+                ]
+              },
+              "docs": [
+                {"_source": {"input": "my words"}}]
+            }
+            """;
+
+        String response = EntityUtils.toString(client().performRequest(simulateRequest(source)).getEntity());
+        assertThat(
+            response,
+            allOf(
+                containsString("inference not possible. Task is configured with [pass_through] but received update of type [ner]"),
+                containsString("error"),
+                not(containsString("warning"))
+            )
+        );
+
+        source = """
+            {
+              "pipeline": {
+                "processors": [
+                  {
+                    "inference": {
+                      "model_id": "deployed"
+                    }
+                  }
+                ]
+              },
+              "docs": [
+                {"_source": {"input": "my words"}}]
+            }
+            """;
+
+        response = EntityUtils.toString(client().performRequest(simulateRequest(source)).getEntity());
+        assertThat(response, allOf(containsString("error"), not(containsString("warning"))));
+
+        // Missing input field is a warning
+        source = """
+            {
+              "pipeline": {
+                "processors": [
+                  {
+                    "inference": {
+                      "model_id": "deployed"
+                    }
+                  }
+                ]
+              },
+              "docs": [
+                {"_source": {"something": "my words"}}]
+            }
+            """;
+
+        response = EntityUtils.toString(client().performRequest(simulateRequest(source)).getEntity());
+        assertThat(response, containsString("warning"));
+    }
+
     private int sumInferenceCountOnNodes(List<Map<String, Object>> nodes) {
         int inferenceCount = 0;
         for (var node : nodes) {
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportInternalInferModelAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportInternalInferModelAction.java
@@ -6,8 +6,6 @@
  */
 package org.elasticsearch.xpack.ml.action;
 
-import org.elasticsearch.ElasticsearchStatusException;
-import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.support.ActionFilters;
 import org.elasticsearch.action.support.HandledTransportAction;
@@ -17,7 +15,6 @@
 import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.license.LicenseUtils;
 import org.elasticsearch.license.XPackLicenseState;
-import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.tasks.Task;
 import org.elasticsearch.tasks.TaskId;
 import org.elasticsearch.threadpool.ThreadPool;
@@ -30,7 +27,6 @@
 import org.elasticsearch.xpack.core.ml.action.InternalInferModelAction.Request;
 import org.elasticsearch.xpack.core.ml.action.InternalInferModelAction.Response;
 import org.elasticsearch.xpack.core.ml.inference.results.InferenceResults;
-import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfigUpdate;
 import org.elasticsearch.xpack.ml.inference.allocation.TrainedModelAllocationMetadata;
 import org.elasticsearch.xpack.ml.inference.loadingservice.LocalModel;
@@ -191,19 +187,7 @@ private void inferSingleDocAgainstAllocatedModel(
             ML_ORIGIN,
             InferTrainedModelDeploymentAction.INSTANCE,
             request,
-            ActionListener.wrap(r -> listener.onResponse(r.getResults()), e -> {
-                Throwable unwrapped = ExceptionsHelper.unwrapCause(e);
-                if (unwrapped instanceof ElasticsearchStatusException) {
-                    ElasticsearchStatusException ex = (ElasticsearchStatusException) unwrapped;
-                    if (ex.status().equals(RestStatus.TOO_MANY_REQUESTS)) {
-                        listener.onFailure(ex);
-                    } else {
-                        listener.onResponse(new WarningInferenceResults(ex.getMessage()));
-                    }
-                } else {
-                    listener.onResponse(new WarningInferenceResults(e.getMessage()));
-                }
-            })
+            ActionListener.wrap(r -> listener.onResponse(r.getResults()), listener::onFailure)
         );
     }
 }
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/deployment/DeploymentManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/deployment/DeploymentManager.java
@@ -11,6 +11,7 @@
 import org.apache.logging.log4j.Logger;
 import org.apache.logging.log4j.message.ParameterizedMessage;
 import org.apache.lucene.util.SetOnce;
+import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ElasticsearchStatusException;
 import org.elasticsearch.ResourceNotFoundException;
 import org.elasticsearch.action.ActionListener;
@@ -33,6 +34,7 @@
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig;
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelInput;
 import org.elasticsearch.xpack.core.ml.inference.results.InferenceResults;
+import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.IndexLocation;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.NlpConfig;
@@ -227,16 +229,18 @@ public void infer(
     ) {
         if (task.isStopped()) {
             listener.onFailure(
-                new IllegalStateException(
-                    "[" + task.getModelId() + "] is stopping or stopped due to [" + task.stoppedReason().orElse("") + "]"
+                ExceptionsHelper.conflictStatusException(
+                    "[{}] is stopping or stopped due to [{}]",
+                    task.getModelId(),
+                    task.stoppedReason().orElse("")
                 )
             );
             return;
         }
 
         ProcessContext processContext = processContextByAllocation.get(task.getId());
         if (processContext == null) {
-            listener.onFailure(new IllegalStateException("[" + task.getModelId() + "] process context missing"));
+            listener.onFailure(ExceptionsHelper.conflictStatusException("[{}] process context missing", task.getModelId()));
             return;
         }
 
@@ -258,7 +262,7 @@ public void infer(
         }
     }
 
-    static class InferenceAction extends AbstractRunnable {
+    static class InferenceAction extends AbstractRunnable implements ActionListener<InferenceResults> {
         private final String modelId;
         private final long requestId;
         private final TimeValue timeout;
@@ -304,6 +308,11 @@ void onTimeout() {
             logger.debug("[{}] request [{}] received timeout after [{}] but listener already alerted", modelId, requestId, timeout);
         }
 
+        @Override
+        public void onResponse(InferenceResults inferenceResults) {
+            onSuccess(inferenceResults);
+        }
+
         void onSuccess(InferenceResults inferenceResults) {
             timeoutHandler.cancel();
             if (notified.compareAndSet(false, true)) {
@@ -360,17 +369,32 @@ protected void doRun() throws Exception {
                                 processContext,
                                 request.tokenization,
                                 processor.getResultProcessor((NlpConfig) config),
-                                ActionListener.wrap(this::onSuccess, this::onFailure)
+                                ActionListener.wrap(this::onSuccess, f -> handleFailure(f, this))
                             ),
                             this::onFailure
                         )
                     );
                 processContext.process.get().writeInferenceRequest(request.processInput);
             } catch (IOException e) {
                 logger.error(new ParameterizedMessage("[{}] error writing to process", processContext.task.getModelId()), e);
-                onFailure(ExceptionsHelper.serverError("error writing to process", e));
+                handleFailure(ExceptionsHelper.serverError("error writing to process", e), this);
             } catch (Exception e) {
-                onFailure(e);
+                handleFailure(e, this);
+            }
+        }
+
+        private static void handleFailure(Exception e, ActionListener<InferenceResults> listener) {
+            Throwable unwrapped = org.elasticsearch.ExceptionsHelper.unwrapCause(e);
+            if (unwrapped instanceof ElasticsearchException ex) {
+                if (ex.status() == RestStatus.BAD_REQUEST) {
+                    listener.onResponse(new WarningInferenceResults(ex.getMessage()));
+                } else {
+                    listener.onFailure(ex);
+                }
+            } else if (unwrapped instanceof IllegalArgumentException) {
+                listener.onResponse(new WarningInferenceResults(e.getMessage()));
+            } else {
+                listener.onFailure(e);
             }
         }
 
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/deployment/TrainedModelDeploymentTask.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/deployment/TrainedModelDeploymentTask.java
@@ -10,10 +10,12 @@
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.apache.lucene.util.SetOnce;
+import org.elasticsearch.ElasticsearchStatusException;
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.license.LicensedFeature;
 import org.elasticsearch.license.XPackLicenseState;
+import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.tasks.CancellableTask;
 import org.elasticsearch.tasks.TaskId;
 import org.elasticsearch.xpack.core.ml.MlTasks;
@@ -110,14 +112,15 @@ protected void onCancelled() {
     public void infer(Map<String, Object> doc, InferenceConfigUpdate update, TimeValue timeout, ActionListener<InferenceResults> listener) {
         if (inferenceConfigHolder.get() == null) {
             listener.onFailure(
-                ExceptionsHelper.badRequestException("[{}] inference not possible against uninitialized model", params.getModelId())
+                ExceptionsHelper.conflictStatusException("[{}] inference not possible against uninitialized model", params.getModelId())
             );
             return;
         }
         if (update.isSupported(inferenceConfigHolder.get()) == false) {
             listener.onFailure(
-                ExceptionsHelper.badRequestException(
+                new ElasticsearchStatusException(
                     "[{}] inference not possible. Task is configured with [{}] but received update of type [{}]",
+                    RestStatus.FORBIDDEN,
                     params.getModelId(),
                     inferenceConfigHolder.get().getName(),
                     update.getName()
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/FillMaskProcessor.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/FillMaskProcessor.java
@@ -13,6 +13,7 @@
 import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.FillMaskConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.NlpConfig;
+import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
 import org.elasticsearch.xpack.ml.inference.deployment.PyTorchResult;
 import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.BertTokenizer;
 import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.NlpTokenizer;
@@ -36,18 +37,18 @@ public class FillMaskProcessor implements NlpTask.Processor {
     @Override
     public void validateInputs(List<String> inputs) {
         if (inputs.isEmpty()) {
-            throw new IllegalArgumentException("input request is empty");
+            throw ExceptionsHelper.badRequestException("input request is empty");
         }
 
         for (String input : inputs) {
             int maskIndex = input.indexOf(BertTokenizer.MASK_TOKEN);
             if (maskIndex < 0) {
-                throw new IllegalArgumentException("no " + BertTokenizer.MASK_TOKEN + " token could be found");
+                throw ExceptionsHelper.badRequestException("no {} token could be found", BertTokenizer.MASK_TOKEN);
             }
 
             maskIndex = input.indexOf(BertTokenizer.MASK_TOKEN, maskIndex + BertTokenizer.MASK_TOKEN.length());
             if (maskIndex > 0) {
-                throw new IllegalArgumentException("only one " + BertTokenizer.MASK_TOKEN + " token should exist in the input");
+                throw ExceptionsHelper.badRequestException("only one {} token should exist in the input", BertTokenizer.MASK_TOKEN);
             }
         }
     }
@@ -59,8 +60,7 @@ public NlpTask.RequestBuilder getRequestBuilder(NlpConfig config) {
 
     @Override
     public NlpTask.ResultProcessor getResultProcessor(NlpConfig config) {
-        if (config instanceof FillMaskConfig) {
-            FillMaskConfig fillMaskConfig = (FillMaskConfig) config;
+        if (config instanceof FillMaskConfig fillMaskConfig) {
             return (tokenization, result) -> processResult(
                 tokenization,
                 result,
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/FillMaskProcessorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/FillMaskProcessorTests.java
@@ -7,6 +7,7 @@
 
 package org.elasticsearch.xpack.ml.inference.nlp;
 
+import org.elasticsearch.ElasticsearchStatusException;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.xpack.core.ml.inference.results.FillMaskResults;
 import org.elasticsearch.xpack.core.ml.inference.results.TopClassEntry;
@@ -88,7 +89,7 @@ public void testValidate_GivenMissingMaskToken() {
         FillMaskConfig config = new FillMaskConfig(new VocabularyConfig("test-index"), null, null, null);
         FillMaskProcessor processor = new FillMaskProcessor(mock(BertTokenizer.class), config);
 
-        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> processor.validateInputs(input));
+        ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> processor.validateInputs(input));
         assertThat(e.getMessage(), containsString("no [MASK] token could be found"));
     }
 
@@ -98,7 +99,7 @@ public void testProcessResults_GivenMultipleMaskTokens() {
         FillMaskConfig config = new FillMaskConfig(new VocabularyConfig("test-index"), null, null, null);
         FillMaskProcessor processor = new FillMaskProcessor(mock(BertTokenizer.class), config);
 
-        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> processor.validateInputs(input));
+        ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> processor.validateInputs(input));
         assertThat(e.getMessage(), containsString("only one [MASK] token should exist in the input"));
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,7 @@`
`7`	`7`
`8`	`8`	`package org.elasticsearch.xpack.ml.inference.nlp;`
`9`	`9`
	`10`	`+import org.elasticsearch.ElasticsearchStatusException;`
`10`	`11`	`import org.elasticsearch.test.ESTestCase;`
`11`	`12`	`import org.elasticsearch.xpack.core.ml.inference.results.FillMaskResults;`
`12`	`13`	`import org.elasticsearch.xpack.core.ml.inference.results.TopClassEntry;`
`@@ -88,7 +89,7 @@ public void testValidate_GivenMissingMaskToken() {`
`88`	`89`	`FillMaskConfig config = new FillMaskConfig(new VocabularyConfig("test-index"), null, null, null);`
`89`	`90`	`FillMaskProcessor processor = new FillMaskProcessor(mock(BertTokenizer.class), config);`
`90`	`91`
`91`		`- IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> processor.validateInputs(input));`
	`92`	`+ ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> processor.validateInputs(input));`
`92`	`93`	`assertThat(e.getMessage(), containsString("no [MASK] token could be found"));`
`93`	`94`	`}`
`94`	`95`
`@@ -98,7 +99,7 @@ public void testProcessResults_GivenMultipleMaskTokens() {`
`98`	`99`	`FillMaskConfig config = new FillMaskConfig(new VocabularyConfig("test-index"), null, null, null);`
`99`	`100`	`FillMaskProcessor processor = new FillMaskProcessor(mock(BertTokenizer.class), config);`
`100`	`101`
`101`		`- IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> processor.validateInputs(input));`
	`102`	`+ ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> processor.validateInputs(input));`
`102`	`103`	`assertThat(e.getMessage(), containsString("only one [MASK] token should exist in the input"));`
`103`	`104`	`}`
`104`	`105`	`}`