[ML] fail inference processor more consistently on certain error types (#81475)

benwtrent · web-flow · commit 2dec141e4585 · 2021-12-08T12:33:34.000-05:00
This updates the following scenarios and causes NER/native inference to
fail and not write a warning:  - missing vocabulary values  - missing
model/deployment  - native process failed  - native process stopping  -
request timed out  - misconfigured inference task update type
diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/PyTorchModelIT.java
@@ -40,6 +40,7 @@
 
 import static org.elasticsearch.xpack.ml.integration.InferenceIngestIT.putPipeline;
 import static org.elasticsearch.xpack.ml.integration.InferenceIngestIT.simulateRequest;
+import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
@@ -465,7 +466,11 @@ public void testInferencePipelineAgainstUnallocatedModel() throws IOException {
         String response = EntityUtils.toString(client().performRequest(simulateRequest(source)).getEntity());
         assertThat(
             response,
-            containsString("model [not-deployed] must be deployed to use. Please deploy with the start trained model deployment API.")
+            allOf(
+                containsString("model [not-deployed] must be deployed to use. Please deploy with the start trained model deployment API."),
+                containsString("error"),
+                not(containsString("warning"))
+            )
         );
 
         client().performRequest(
@@ -520,9 +525,8 @@ public void testTruncation() throws IOException {
         startDeployment(modelId, AllocationStatus.State.FULLY_ALLOCATED.toString());
 
         String input = "once twice thrice";
-        ResponseException ex = expectThrows(ResponseException.class, () -> infer("once twice thrice", modelId));
         assertThat(
-            ex.getMessage(),
+            EntityUtils.toString(infer("once twice thrice", modelId).getEntity()),
             containsString("Input too large. The tokenized input length [3] exceeds the maximum sequence length [2]")
         );
 
@@ -578,6 +582,81 @@ public void testStopUsedDeploymentByIngestProcessor() throws IOException {
         stopDeployment(modelId, true);
     }
 
+    public void testPipelineWithBadProcessor() throws IOException {
+        String model = "deployed";
+        createTrainedModel(model);
+        putVocabulary(List.of("once", "twice"), model);
+        putModelDefinition(model);
+        startDeployment(model);
+        String source = """
+            {
+              "pipeline": {
+                "processors": [
+                  {
+                    "inference": {
+                      "model_id": "deployed",
+                      "inference_config": {
+                        "ner": {}
+                      }
+                    }
+                  }
+                ]
+              },
+              "docs": [
+                {"_source": {"input": "my words"}}]
+            }
+            """;
+
+        String response = EntityUtils.toString(client().performRequest(simulateRequest(source)).getEntity());
+        assertThat(
+            response,
+            allOf(
+                containsString("inference not possible. Task is configured with [pass_through] but received update of type [ner]"),
+                containsString("error"),
+                not(containsString("warning"))
+            )
+        );
+
+        source = """
+            {
+              "pipeline": {
+                "processors": [
+                  {
+                    "inference": {
+                      "model_id": "deployed"
+                    }
+                  }
+                ]
+              },
+              "docs": [
+                {"_source": {"input": "my words"}}]
+            }
+            """;
+
+        response = EntityUtils.toString(client().performRequest(simulateRequest(source)).getEntity());
+        assertThat(response, allOf(containsString("error"), not(containsString("warning"))));
+
+        // Missing input field is a warning
+        source = """
+            {
+              "pipeline": {
+                "processors": [
+                  {
+                    "inference": {
+                      "model_id": "deployed"
+                    }
+                  }
+                ]
+              },
+              "docs": [
+                {"_source": {"something": "my words"}}]
+            }
+            """;
+
+        response = EntityUtils.toString(client().performRequest(simulateRequest(source)).getEntity());
+        assertThat(response, containsString("warning"));
+    }
+
     public void testDeleteModelWithDeploymentUsedByIngestProcessor() throws IOException {
         String modelId = "test_delete_model_with_used_deployment";
         createTrainedModel(modelId);
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportInternalInferModelAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportInternalInferModelAction.java
@@ -6,8 +6,6 @@
  */
 package org.elasticsearch.xpack.ml.action;
 
-import org.elasticsearch.ElasticsearchStatusException;
-import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.support.ActionFilters;
 import org.elasticsearch.action.support.HandledTransportAction;
@@ -18,7 +16,6 @@
 import org.elasticsearch.license.License;
 import org.elasticsearch.license.LicenseUtils;
 import org.elasticsearch.license.XPackLicenseState;
-import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.tasks.Task;
 import org.elasticsearch.tasks.TaskId;
 import org.elasticsearch.threadpool.ThreadPool;
@@ -31,7 +28,6 @@
 import org.elasticsearch.xpack.core.ml.action.InternalInferModelAction.Request;
 import org.elasticsearch.xpack.core.ml.action.InternalInferModelAction.Response;
 import org.elasticsearch.xpack.core.ml.inference.results.InferenceResults;
-import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfigUpdate;
 import org.elasticsearch.xpack.ml.inference.allocation.TrainedModelAllocationMetadata;
 import org.elasticsearch.xpack.ml.inference.loadingservice.LocalModel;
@@ -195,19 +191,7 @@ private void inferSingleDocAgainstAllocatedModel(
             ML_ORIGIN,
             InferTrainedModelDeploymentAction.INSTANCE,
             request,
-            ActionListener.wrap(r -> listener.onResponse(r.getResults()), e -> {
-                Throwable unwrapped = ExceptionsHelper.unwrapCause(e);
-                if (unwrapped instanceof ElasticsearchStatusException) {
-                    ElasticsearchStatusException ex = (ElasticsearchStatusException) unwrapped;
-                    if (ex.status().equals(RestStatus.TOO_MANY_REQUESTS)) {
-                        listener.onFailure(ex);
-                    } else {
-                        listener.onResponse(new WarningInferenceResults(ex.getMessage()));
-                    }
-                } else {
-                    listener.onResponse(new WarningInferenceResults(e.getMessage()));
-                }
-            })
+            ActionListener.wrap(r -> listener.onResponse(r.getResults()), listener::onFailure)
         );
     }
 }
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/deployment/DeploymentManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/deployment/DeploymentManager.java
@@ -11,6 +11,7 @@
 import org.apache.logging.log4j.Logger;
 import org.apache.logging.log4j.message.ParameterizedMessage;
 import org.apache.lucene.util.SetOnce;
+import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ElasticsearchStatusException;
 import org.elasticsearch.ResourceNotFoundException;
 import org.elasticsearch.action.ActionListener;
@@ -33,6 +34,7 @@
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig;
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelInput;
 import org.elasticsearch.xpack.core.ml.inference.results.InferenceResults;
+import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.IndexLocation;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.NlpConfig;
@@ -227,16 +229,18 @@ public void infer(
     ) {
         if (task.isStopped()) {
             listener.onFailure(
-                new IllegalStateException(
-                    "[" + task.getModelId() + "] is stopping or stopped due to [" + task.stoppedReason().orElse("") + "]"
+                ExceptionsHelper.conflictStatusException(
+                    "[{}] is stopping or stopped due to [{}]",
+                    task.getModelId(),
+                    task.stoppedReason().orElse("")
                 )
             );
             return;
         }
 
         ProcessContext processContext = processContextByAllocation.get(task.getId());
         if (processContext == null) {
-            listener.onFailure(new IllegalStateException("[" + task.getModelId() + "] process context missing"));
+            listener.onFailure(ExceptionsHelper.conflictStatusException("[{}] process context missing", task.getModelId()));
             return;
         }
 
@@ -258,7 +262,7 @@ public void infer(
         }
     }
 
-    static class InferenceAction extends AbstractRunnable {
+    static class InferenceAction extends AbstractRunnable implements ActionListener<InferenceResults> {
         private final String modelId;
         private final long requestId;
         private final TimeValue timeout;
@@ -304,6 +308,11 @@ void onTimeout() {
             logger.debug("[{}] request [{}] received timeout after [{}] but listener already alerted", modelId, requestId, timeout);
         }
 
+        @Override
+        public void onResponse(InferenceResults inferenceResults) {
+            onSuccess(inferenceResults);
+        }
+
         void onSuccess(InferenceResults inferenceResults) {
             timeoutHandler.cancel();
             if (notified.compareAndSet(false, true)) {
@@ -362,17 +371,32 @@ protected void doRun() throws Exception {
                                 processContext,
                                 request.tokenization,
                                 processor.getResultProcessor((NlpConfig) config),
-                                ActionListener.wrap(this::onSuccess, this::onFailure)
+                                ActionListener.wrap(this::onSuccess, f -> handleFailure(f, this))
                             ),
                             this::onFailure
                         )
                     );
                 processContext.process.get().writeInferenceRequest(request.processInput);
             } catch (IOException e) {
                 logger.error(new ParameterizedMessage("[{}] error writing to process", processContext.task.getModelId()), e);
-                onFailure(ExceptionsHelper.serverError("error writing to process", e));
+                handleFailure(ExceptionsHelper.serverError("error writing to process", e), this);
             } catch (Exception e) {
-                onFailure(e);
+                handleFailure(e, this);
+            }
+        }
+
+        private static void handleFailure(Exception e, ActionListener<InferenceResults> listener) {
+            Throwable unwrapped = org.elasticsearch.ExceptionsHelper.unwrapCause(e);
+            if (unwrapped instanceof ElasticsearchException ex) {
+                if (ex.status() == RestStatus.BAD_REQUEST) {
+                    listener.onResponse(new WarningInferenceResults(ex.getMessage()));
+                } else {
+                    listener.onFailure(ex);
+                }
+            } else if (unwrapped instanceof IllegalArgumentException) {
+                listener.onResponse(new WarningInferenceResults(e.getMessage()));
+            } else {
+                listener.onFailure(e);
             }
         }
 
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/deployment/TrainedModelDeploymentTask.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/deployment/TrainedModelDeploymentTask.java
@@ -10,10 +10,12 @@
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.apache.lucene.util.SetOnce;
+import org.elasticsearch.ElasticsearchStatusException;
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.license.LicensedFeature;
 import org.elasticsearch.license.XPackLicenseState;
+import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.tasks.CancellableTask;
 import org.elasticsearch.tasks.TaskId;
 import org.elasticsearch.xpack.core.ml.MlTasks;
@@ -110,14 +112,15 @@ protected void onCancelled() {
     public void infer(Map<String, Object> doc, InferenceConfigUpdate update, TimeValue timeout, ActionListener<InferenceResults> listener) {
         if (inferenceConfigHolder.get() == null) {
             listener.onFailure(
-                ExceptionsHelper.badRequestException("[{}] inference not possible against uninitialized model", params.getModelId())
+                ExceptionsHelper.conflictStatusException("[{}] inference not possible against uninitialized model", params.getModelId())
             );
             return;
         }
         if (update.isSupported(inferenceConfigHolder.get()) == false) {
             listener.onFailure(
-                ExceptionsHelper.badRequestException(
+                new ElasticsearchStatusException(
                     "[{}] inference not possible. Task is configured with [{}] but received update of type [{}]",
+                    RestStatus.FORBIDDEN,
                     params.getModelId(),
                     inferenceConfigHolder.get().getName(),
                     update.getName()
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/FillMaskProcessor.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/FillMaskProcessor.java
@@ -13,12 +13,12 @@
 import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.FillMaskConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.NlpConfig;
+import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
 import org.elasticsearch.xpack.ml.inference.deployment.PyTorchResult;
 import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.NlpTokenizer;
 import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.TokenizationResult;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 import java.util.Optional;
 
@@ -35,19 +35,19 @@ public class FillMaskProcessor implements NlpTask.Processor {
     @Override
     public void validateInputs(List<String> inputs) {
         if (inputs.isEmpty()) {
-            throw new IllegalArgumentException("input request is empty");
+            throw ExceptionsHelper.badRequestException("input request is empty");
         }
 
         final String mask = tokenizer.getMaskToken();
         for (String input : inputs) {
             int maskIndex = input.indexOf(mask);
             if (maskIndex < 0) {
-                throw new IllegalArgumentException("no " + mask + " token could be found");
+                throw ExceptionsHelper.badRequestException("no {} token could be found", mask);
             }
 
             maskIndex = input.indexOf(mask, maskIndex + mask.length());
             if (maskIndex > 0) {
-                throw new IllegalArgumentException("only one " + mask + " token should exist in the input");
+                throw ExceptionsHelper.badRequestException("only one {} token should exist in the input", mask);
             }
         }
     }
@@ -59,8 +59,7 @@ public NlpTask.RequestBuilder getRequestBuilder(NlpConfig config) {
 
     @Override
     public NlpTask.ResultProcessor getResultProcessor(NlpConfig config) {
-        if (config instanceof FillMaskConfig) {
-            FillMaskConfig fillMaskConfig = (FillMaskConfig) config;
+        if (config instanceof FillMaskConfig fillMaskConfig) {
             return (tokenization, result) -> processResult(
                 tokenization,
                 result,
@@ -91,7 +90,7 @@ static InferenceResults processResult(
         }
 
         if (tokenizer.getMaskTokenId().isEmpty()) {
-            return new WarningInferenceResults(
+            throw ExceptionsHelper.conflictStatusException(
                 "The token id for the mask token {} is not known in the tokenizer. Check the vocabulary contains the mask token",
                 tokenizer.getMaskToken()
             );
@@ -109,7 +108,7 @@ static InferenceResults processResult(
             return new WarningInferenceResults(
                 "mask token id [{}] not found in the tokenization {}",
                 maskTokenId,
-                Arrays.asList(tokenization.getTokenizations().get(0).getTokenIds())
+                List.of(tokenization.getTokenizations().get(0).getTokenIds())
             );
         }
 
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/FillMaskProcessorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/FillMaskProcessorTests.java
@@ -7,6 +7,7 @@
 
 package org.elasticsearch.xpack.ml.inference.nlp;
 
+import org.elasticsearch.ElasticsearchStatusException;
 import org.elasticsearch.test.ESTestCase;
 import org.elasticsearch.xpack.core.ml.inference.results.FillMaskResults;
 import org.elasticsearch.xpack.core.ml.inference.results.TopClassEntry;
@@ -103,7 +104,7 @@ public void testValidate_GivenMissingMaskToken() {
         FillMaskConfig config = new FillMaskConfig(new VocabularyConfig("test-index"), null, null, null);
         FillMaskProcessor processor = new FillMaskProcessor(tokenizer, config);
 
-        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> processor.validateInputs(input));
+        ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> processor.validateInputs(input));
         assertThat(e.getMessage(), containsString("no [MASK] token could be found"));
     }
 
@@ -116,7 +117,7 @@ public void testProcessResults_GivenMultipleMaskTokens() {
         FillMaskConfig config = new FillMaskConfig(new VocabularyConfig("test-index"), null, null, null);
         FillMaskProcessor processor = new FillMaskProcessor(tokenizer, config);
 
-        IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> processor.validateInputs(input));
+        ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> processor.validateInputs(input));
         assertThat(e.getMessage(), containsString("only one [MASK] token should exist in the input"));
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,7 @@`
`7`	`7`
`8`	`8`	`package org.elasticsearch.xpack.ml.inference.nlp;`
`9`	`9`
	`10`	`+import org.elasticsearch.ElasticsearchStatusException;`
`10`	`11`	`import org.elasticsearch.test.ESTestCase;`
`11`	`12`	`import org.elasticsearch.xpack.core.ml.inference.results.FillMaskResults;`
`12`	`13`	`import org.elasticsearch.xpack.core.ml.inference.results.TopClassEntry;`
`@@ -103,7 +104,7 @@ public void testValidate_GivenMissingMaskToken() {`
`103`	`104`	`FillMaskConfig config = new FillMaskConfig(new VocabularyConfig("test-index"), null, null, null);`
`104`	`105`	`FillMaskProcessor processor = new FillMaskProcessor(tokenizer, config);`
`105`	`106`
`106`		`- IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> processor.validateInputs(input));`
	`107`	`+ ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> processor.validateInputs(input));`
`107`	`108`	`assertThat(e.getMessage(), containsString("no [MASK] token could be found"));`
`108`	`109`	`}`
`109`	`110`
`@@ -116,7 +117,7 @@ public void testProcessResults_GivenMultipleMaskTokens() {`
`116`	`117`	`FillMaskConfig config = new FillMaskConfig(new VocabularyConfig("test-index"), null, null, null);`
`117`	`118`	`FillMaskProcessor processor = new FillMaskProcessor(tokenizer, config);`
`118`	`119`
`119`		`- IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> processor.validateInputs(input));`
	`120`	`+ ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, () -> processor.validateInputs(input));`
`120`	`121`	`assertThat(e.getMessage(), containsString("only one [MASK] token should exist in the input"));`
`121`	`122`	`}`
`122`	`123`	`}`