|
| 1 | +/* |
| 2 | + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one |
| 3 | + * or more contributor license agreements. Licensed under the Elastic License; |
| 4 | + * you may not use this file except in compliance with the Elastic License. |
| 5 | + */ |
| 6 | +package org.elasticsearch.xpack.ml.integration; |
| 7 | + |
| 8 | +import org.elasticsearch.ElasticsearchException; |
| 9 | +import org.elasticsearch.Version; |
| 10 | +import org.elasticsearch.action.support.PlainActionFuture; |
| 11 | +import org.elasticsearch.common.collect.Tuple; |
| 12 | +import org.elasticsearch.common.xcontent.NamedXContentRegistry; |
| 13 | +import org.elasticsearch.license.License; |
| 14 | +import org.elasticsearch.xpack.core.action.util.PageParams; |
| 15 | +import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; |
| 16 | +import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsDest; |
| 17 | +import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsSource; |
| 18 | +import org.elasticsearch.xpack.core.ml.dataframe.analyses.Regression; |
| 19 | +import org.elasticsearch.xpack.core.ml.inference.MlInferenceNamedXContentProvider; |
| 20 | +import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig; |
| 21 | +import org.elasticsearch.xpack.core.ml.inference.TrainedModelDefinition; |
| 22 | +import org.elasticsearch.xpack.core.ml.inference.TrainedModelDefinitionTests; |
| 23 | +import org.elasticsearch.xpack.core.ml.inference.TrainedModelInputTests; |
| 24 | +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TargetType; |
| 25 | +import org.elasticsearch.xpack.ml.MlSingleNodeTestCase; |
| 26 | +import org.elasticsearch.xpack.ml.dataframe.process.ChunkedTrainedModelPersister; |
| 27 | +import org.elasticsearch.xpack.ml.dataframe.process.results.TrainedModelDefinitionChunk; |
| 28 | +import org.elasticsearch.xpack.ml.extractor.DocValueField; |
| 29 | +import org.elasticsearch.xpack.ml.extractor.ExtractedField; |
| 30 | +import org.elasticsearch.xpack.ml.extractor.ExtractedFields; |
| 31 | +import org.elasticsearch.xpack.ml.inference.modelsize.MlModelSizeNamedXContentProvider; |
| 32 | +import org.elasticsearch.xpack.ml.inference.modelsize.ModelSizeInfo; |
| 33 | +import org.elasticsearch.xpack.ml.inference.modelsize.ModelSizeInfoTests; |
| 34 | +import org.elasticsearch.xpack.ml.inference.persistence.TrainedModelProvider; |
| 35 | +import org.elasticsearch.xpack.ml.notifications.DataFrameAnalyticsAuditor; |
| 36 | +import org.junit.Before; |
| 37 | + |
| 38 | +import java.io.IOException; |
| 39 | +import java.util.ArrayList; |
| 40 | +import java.util.Collections; |
| 41 | +import java.util.List; |
| 42 | +import java.util.Set; |
| 43 | + |
| 44 | +import static org.hamcrest.Matchers.equalTo; |
| 45 | + |
| 46 | +public class ChunkedTrainedModelPersisterIT extends MlSingleNodeTestCase { |
| 47 | + |
| 48 | + private TrainedModelProvider trainedModelProvider; |
| 49 | + |
| 50 | + @Before |
| 51 | + public void createComponents() throws Exception { |
| 52 | + trainedModelProvider = new TrainedModelProvider(client(), xContentRegistry()); |
| 53 | + waitForMlTemplates(); |
| 54 | + } |
| 55 | + |
| 56 | + public void testStoreModelViaChunkedPersister() throws IOException { |
| 57 | + String modelId = "stored-chunked-model"; |
| 58 | + DataFrameAnalyticsConfig analyticsConfig = new DataFrameAnalyticsConfig.Builder() |
| 59 | + .setId(modelId) |
| 60 | + .setSource(new DataFrameAnalyticsSource(new String[] {"my_source"}, null, null)) |
| 61 | + .setDest(new DataFrameAnalyticsDest("my_dest", null)) |
| 62 | + .setAnalysis(new Regression("foo")) |
| 63 | + .build(); |
| 64 | + List<ExtractedField> extractedFieldList = Collections.singletonList(new DocValueField("foo", Collections.emptySet())); |
| 65 | + TrainedModelConfig.Builder configBuilder = buildTrainedModelConfigBuilder(modelId); |
| 66 | + String compressedDefinition = configBuilder.build().getCompressedDefinition(); |
| 67 | + int totalSize = compressedDefinition.length(); |
| 68 | + List<String> chunks = chunkStringWithSize(compressedDefinition, totalSize/3); |
| 69 | + |
| 70 | + ChunkedTrainedModelPersister persister = new ChunkedTrainedModelPersister(trainedModelProvider, |
| 71 | + analyticsConfig, |
| 72 | + new DataFrameAnalyticsAuditor(client(), "test-node"), |
| 73 | + (ex) -> { throw new ElasticsearchException(ex); }, |
| 74 | + new ExtractedFields(extractedFieldList, Collections.emptyMap()) |
| 75 | + ); |
| 76 | + |
| 77 | + //Accuracy for size is not tested here |
| 78 | + ModelSizeInfo modelSizeInfo = ModelSizeInfoTests.createRandom(); |
| 79 | + persister.createAndIndexInferenceModelMetadata(modelSizeInfo); |
| 80 | + for (int i = 0; i < chunks.size(); i++) { |
| 81 | + persister.createAndIndexInferenceModelDoc(new TrainedModelDefinitionChunk(chunks.get(i), i, i == (chunks.size() - 1))); |
| 82 | + } |
| 83 | + |
| 84 | + PlainActionFuture<Tuple<Long, Set<String>>> getIdsFuture = new PlainActionFuture<>(); |
| 85 | + trainedModelProvider.expandIds(modelId + "*", false, PageParams.defaultParams(), Collections.emptySet(), getIdsFuture); |
| 86 | + Tuple<Long, Set<String>> ids = getIdsFuture.actionGet(); |
| 87 | + assertThat(ids.v1(), equalTo(1L)); |
| 88 | + |
| 89 | + PlainActionFuture<TrainedModelConfig> getTrainedModelFuture = new PlainActionFuture<>(); |
| 90 | + trainedModelProvider.getTrainedModel(ids.v2().iterator().next(), true, getTrainedModelFuture); |
| 91 | + |
| 92 | + TrainedModelConfig storedConfig = getTrainedModelFuture.actionGet(); |
| 93 | + assertThat(storedConfig.getCompressedDefinition(), equalTo(compressedDefinition)); |
| 94 | + assertThat(storedConfig.getEstimatedOperations(), equalTo((long)modelSizeInfo.numOperations())); |
| 95 | + assertThat(storedConfig.getEstimatedHeapMemory(), equalTo(modelSizeInfo.ramBytesUsed())); |
| 96 | + } |
| 97 | + |
| 98 | + private static TrainedModelConfig.Builder buildTrainedModelConfigBuilder(String modelId) { |
| 99 | + TrainedModelDefinition.Builder definitionBuilder = TrainedModelDefinitionTests.createRandomBuilder(); |
| 100 | + long bytesUsed = definitionBuilder.build().ramBytesUsed(); |
| 101 | + long operations = definitionBuilder.build().getTrainedModel().estimatedNumOperations(); |
| 102 | + return TrainedModelConfig.builder() |
| 103 | + .setCreatedBy("ml_test") |
| 104 | + .setParsedDefinition(TrainedModelDefinitionTests.createRandomBuilder(TargetType.REGRESSION)) |
| 105 | + .setDescription("trained model config for test") |
| 106 | + .setModelId(modelId) |
| 107 | + .setVersion(Version.CURRENT) |
| 108 | + .setLicenseLevel(License.OperationMode.PLATINUM.description()) |
| 109 | + .setEstimatedHeapMemory(bytesUsed) |
| 110 | + .setEstimatedOperations(operations) |
| 111 | + .setInput(TrainedModelInputTests.createRandomInput()); |
| 112 | + } |
| 113 | + |
| 114 | + public static List<String> chunkStringWithSize(String str, int chunkSize) { |
| 115 | + List<String> subStrings = new ArrayList<>((str.length() + chunkSize - 1) / chunkSize); |
| 116 | + for (int i = 0; i < str.length(); i += chunkSize) { |
| 117 | + subStrings.add(str.substring(i, Math.min(i + chunkSize, str.length()))); |
| 118 | + } |
| 119 | + return subStrings; |
| 120 | + } |
| 121 | + |
| 122 | + @Override |
| 123 | + public NamedXContentRegistry xContentRegistry() { |
| 124 | + List<NamedXContentRegistry.Entry> namedXContent = new ArrayList<>(); |
| 125 | + namedXContent.addAll(new MlInferenceNamedXContentProvider().getNamedXContentParsers()); |
| 126 | + namedXContent.addAll(new MlModelSizeNamedXContentProvider().getNamedXContentParsers()); |
| 127 | + return new NamedXContentRegistry(namedXContent); |
| 128 | + } |
| 129 | + |
| 130 | +} |
0 commit comments