From 5004872532e991541755a7891b83836437a44557 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Thu, 14 Oct 2021 15:37:25 -0400 Subject: [PATCH 1/3] Add support for configuring HNSW parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR extends the dense_vector type to allow configure HNSW params in `index_options`: `m` – max number of connections for each node, `ef_construction` – number of candidate neighbors to track while searching the graph for each newly inserted node. ``` "mappings": { "properties": { "my_vector": { "type": "dense_vector", "dims": 128, "index": true, "similarity": "l2_norm", "index_options": { "type" : "hnsw", "m" : 15, "ef_construction" : 50 } } } } ``` index_options as an object, and all parameters underneath are optional. If `m` or `ef_contruction` are not provided, the default values from the current codec will be used. Relates to #78473 --- .../index/codec/CodecService.java | 4 +- ...atCodec.java => PerFieldMappingCodec.java} | 30 ++-- .../index/mapper/MappingLookup.java | 15 ++ .../index/mapper/VectorFieldMapper.java | 128 ++++++++++++++++++ .../elasticsearch/index/codec/CodecTests.java | 2 +- .../mapper/CompletionFieldMapperTests.java | 6 +- .../test/vectors/10_dense_vector_basic.yml | 4 + .../vectors/20_dense_vector_special_cases.yml | 3 + .../mapper/DenseVectorFieldMapper.java | 20 ++- .../mapper/DenseVectorFieldMapperTests.java | 63 +++++++++ 10 files changed, 252 insertions(+), 23 deletions(-) rename server/src/main/java/org/elasticsearch/index/codec/{PerFieldMappingPostingFormatCodec.java => PerFieldMappingCodec.java} (57%) create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/VectorFieldMapper.java diff --git a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java index 1e623ccbb0456..47cc25ea9cd2b 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java +++ b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java @@ -38,9 +38,9 @@ public CodecService(@Nullable MapperService mapperService) { codecs.put(BEST_COMPRESSION_CODEC, new Lucene90Codec(Lucene90Codec.Mode.BEST_COMPRESSION)); } else { codecs.put(DEFAULT_CODEC, - new PerFieldMappingPostingFormatCodec(Lucene90Codec.Mode.BEST_SPEED, mapperService)); + new PerFieldMappingCodec(Lucene90Codec.Mode.BEST_SPEED, mapperService)); codecs.put(BEST_COMPRESSION_CODEC, - new PerFieldMappingPostingFormatCodec(Lucene90Codec.Mode.BEST_COMPRESSION, mapperService)); + new PerFieldMappingCodec(Lucene90Codec.Mode.BEST_COMPRESSION, mapperService)); } codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault()); for (String codec : Codec.availableCodecs()) { diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingCodec.java similarity index 57% rename from server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java rename to server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingCodec.java index 240728b3eec2f..e11194e0da576 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingCodec.java @@ -10,6 +10,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene90.Lucene90Codec; import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; @@ -17,24 +18,24 @@ import org.elasticsearch.index.mapper.MapperService; /** - * {@link PerFieldMappingPostingFormatCodec This postings format} is the default - * {@link PostingsFormat} for Elasticsearch. It utilizes the - * {@link MapperService} to lookup a {@link PostingsFormat} per field. This - * allows users to change the low level postings format for individual fields - * per index in real time via the mapping API. If no specific postings format is - * configured for a specific field the default postings format is used. + * {@link PerFieldMappingCodec This postings format} is the default + * {@link PostingsFormat} and {@link KnnVectorsFormat} for Elasticsearch. It utilizes the + * {@link MapperService} to lookup a {@link PostingsFormat} and {@link KnnVectorsFormat} per field. This + * allows users to change the low level postings format and vectors format for individual fields + * per index in real time via the mapping API. If no specific postings format or vector format is + * configured for a specific field the default postings or vector format is used. */ -public class PerFieldMappingPostingFormatCodec extends Lucene90Codec { +public class PerFieldMappingCodec extends Lucene90Codec { private final MapperService mapperService; private final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat(); static { - assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMappingPostingFormatCodec.class) : - "PerFieldMappingPostingFormatCodec must subclass the latest " + "lucene codec: " + Lucene.LATEST_CODEC; + assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMappingCodec.class) : + "PerFieldMappingCodec must subclass the latest " + "lucene codec: " + Lucene.LATEST_CODEC; } - public PerFieldMappingPostingFormatCodec(Mode compressionMode, MapperService mapperService) { + public PerFieldMappingCodec(Mode compressionMode, MapperService mapperService) { super(compressionMode); this.mapperService = mapperService; } @@ -48,6 +49,15 @@ public PostingsFormat getPostingsFormatForField(String field) { return format; } + @Override + public KnnVectorsFormat getKnnVectorsFormatForField(String field) { + KnnVectorsFormat format = mapperService.mappingLookup().getKnnVectorsFormatForField(field); + if (format == null) { + return super.getKnnVectorsFormatForField(field); + } + return format; + } + @Override public DocValuesFormat getDocValuesFormatForField(String field) { return docValuesFormat; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java index 02e1698b7f5d2..29a154e707536 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java @@ -8,6 +8,7 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.index.IndexSettings; @@ -228,6 +229,20 @@ public PostingsFormat getPostingsFormat(String field) { return completionFields.contains(field) ? CompletionFieldMapper.postingsFormat() : null; } + /** + * Returns the knn vectors format for a particular field + * @param field the field to retrieve a knn vectors format for + * @return the knn vectors format for the field, or {@code null} if the default format should be used + */ + public KnnVectorsFormat getKnnVectorsFormatForField(String field) { + Mapper fieldMapper = fieldMappers.get(field); + if (fieldMapper instanceof VectorFieldMapper) { + return ((VectorFieldMapper) fieldMapper).getKnnVectorsFormatForField(); + } else { + return null; + } + } + void checkLimits(IndexSettings settings) { checkFieldLimit(settings.getMappingTotalFieldsLimit()); checkObjectDepthLimit(settings.getMappingDepthLimit()); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/VectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/VectorFieldMapper.java new file mode 100644 index 0000000000000..2c1c38a4b3927 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/VectorFieldMapper.java @@ -0,0 +1,128 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ +package org.elasticsearch.index.mapper; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat; +import org.elasticsearch.common.xcontent.support.XContentMapValues; +import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Map; +import java.util.Objects; + +import static org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN; +import static org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_BEAM_WIDTH; + +/** + * Field mapper for a vector field for ann search. + */ + +public abstract class VectorFieldMapper extends FieldMapper { + public static final IndexOptions DEFAULT_INDEX_OPTIONS = new HNSWIndexOptions(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH); + protected final IndexOptions indexOptions; + + protected VectorFieldMapper(String simpleName, MappedFieldType mappedFieldType, MultiFields multiFields, CopyTo copyTo, + IndexOptions indexOptions) { + super(simpleName, mappedFieldType, multiFields, copyTo); + this.indexOptions = indexOptions; + } + + /** + * Returns the knn vectors format that is customly set up for this field or {@code null} if + * the format is not set up or if the set up format matches the default format. + * @return the knn vectors format for the field, or {@code null} if the default format should be used + */ + public KnnVectorsFormat getKnnVectorsFormatForField() { + if (indexOptions == null && indexOptions == DEFAULT_INDEX_OPTIONS) { + return null; + } else { + HNSWIndexOptions hnswIndexOptions = (HNSWIndexOptions) indexOptions; + return new Lucene90HnswVectorsFormat(hnswIndexOptions.m, hnswIndexOptions.efConstruction); + } + } + + public static IndexOptions parseVectorIndexOptions(String fieldName, Object propNode) { + if (propNode == null) { + return null; + } + Map indexOptionsMap = (Map) propNode; + String type = XContentMapValues.nodeStringValue(indexOptionsMap.remove("type"), "hnsw"); + if (type.equals("hnsw")) { + return HNSWIndexOptions.parseIndexOptions(fieldName, indexOptionsMap); + } else { + throw new MapperParsingException("Unknown vector index options type [" + type + "] for field [" + fieldName + "]"); + } + } + + public abstract static class IndexOptions implements ToXContent { + protected final String type; + public IndexOptions(String type) { + this.type = type; + } + } + + public static class HNSWIndexOptions extends IndexOptions { + private final int m; + private final int efConstruction; + + public HNSWIndexOptions(int m, int efConstruction) { + super("hnsw"); + this.m = m; + this.efConstruction = efConstruction; + } + + public int m() { + return m; + } + + public int efConstruction() { + return efConstruction; + } + + public static IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap) { + int m = XContentMapValues.nodeIntegerValue(indexOptionsMap.remove("m"), DEFAULT_MAX_CONN); + int efConstruction = XContentMapValues.nodeIntegerValue(indexOptionsMap.remove("ef_construction"), DEFAULT_BEAM_WIDTH); + MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); + if (m == DEFAULT_MAX_CONN && efConstruction == DEFAULT_BEAM_WIDTH) { + return VectorFieldMapper.DEFAULT_INDEX_OPTIONS; + } else { + return new HNSWIndexOptions(m, efConstruction); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("type", type); + builder.field("m", m); + builder.field("ef_construction", efConstruction); + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + HNSWIndexOptions that = (HNSWIndexOptions) o; + return m == that.m && efConstruction == that.efConstruction; + } + + @Override + public int hashCode() { + return Objects.hash(type, m, efConstruction); + } + + @Override + public String toString() { + return "{type=" + type + ", m=" + m + ", ef_construction=" + efConstruction + " }"; + } + } +} diff --git a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java index ec1a49dafe75c..9a666f9057e5d 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java @@ -40,7 +40,7 @@ public class CodecTests extends ESTestCase { public void testResolveDefaultCodecs() throws Exception { CodecService codecService = createCodecService(); - assertThat(codecService.codec("default"), instanceOf(PerFieldMappingPostingFormatCodec.class)); + assertThat(codecService.codec("default"), instanceOf(PerFieldMappingCodec.class)); assertThat(codecService.codec("default"), instanceOf(Lucene90Codec.class)); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java index 15b2af9211f37..a240f1da193b4 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java @@ -29,6 +29,7 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.index.codec.PerFieldMappingCodec; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; @@ -38,7 +39,6 @@ import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.codec.CodecService; -import org.elasticsearch.index.codec.PerFieldMappingPostingFormatCodec; import org.hamcrest.FeatureMatcher; import org.hamcrest.Matcher; import org.hamcrest.Matchers; @@ -122,8 +122,8 @@ public void testPostingsFormat() throws IOException { MapperService mapperService = createMapperService(fieldMapping(this::minimalMapping)); CodecService codecService = new CodecService(mapperService); Codec codec = codecService.codec("default"); - assertThat(codec, instanceOf(PerFieldMappingPostingFormatCodec.class)); - PerFieldMappingPostingFormatCodec perFieldCodec = (PerFieldMappingPostingFormatCodec) codec; + assertThat(codec, instanceOf(PerFieldMappingCodec.class)); + PerFieldMappingCodec perFieldCodec = (PerFieldMappingCodec) codec; assertThat(perFieldCodec.getPostingsFormatForField("field"), instanceOf(Completion90PostingsFormat.class)); } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/10_dense_vector_basic.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/10_dense_vector_basic.yml index 6d3456182901f..952ee4654f040 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/10_dense_vector_basic.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/10_dense_vector_basic.yml @@ -18,6 +18,10 @@ setup: dims: 5 index: true similarity: dot_product + index_options: + type: hnsw + m: 15 + ef_construction: 80 - do: index: index: test-index diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/20_dense_vector_special_cases.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/20_dense_vector_special_cases.yml index 6b486c6299c61..9d097c55a4510 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/20_dense_vector_special_cases.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/20_dense_vector_special_cases.yml @@ -19,6 +19,9 @@ setup: dims: 3 index: true similarity: l2_norm + index_options: + type: hnsw + m: 15 --- "Indexing of Dense vectors should error when dims don't match defined in the mapping": diff --git a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapper.java b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapper.java index b336cc19466c1..41299c925168a 100644 --- a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapper.java +++ b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapper.java @@ -16,6 +16,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; +import org.elasticsearch.index.mapper.VectorFieldMapper; import org.elasticsearch.xcontent.XContentParser.Token; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.fielddata.IndexFieldData; @@ -47,7 +48,7 @@ /** * A {@link FieldMapper} for indexing a dense vector of floats. */ -public class DenseVectorFieldMapper extends FieldMapper { +public class DenseVectorFieldMapper extends VectorFieldMapper { public static final String CONTENT_TYPE = "dense_vector"; public static short MAX_DIMS_COUNT = 2048; //maximum allowed number of dimensions @@ -73,6 +74,8 @@ public static class Builder extends FieldMapper.Builder { private final Parameter indexed = Parameter.indexParam(m -> toType(m).indexed, false); private final Parameter similarity = Parameter.enumParam( "similarity", false, m -> toType(m).similarity, null, VectorSimilarity.class); + private final Parameter indexOptions = new Parameter<>("index_options", false, () -> null, + (n, c, o) -> VectorFieldMapper.parseVectorIndexOptions(n, o), m -> toType(m).indexOptions); private final Parameter> meta = Parameter.metaParam(); final Version indexVersionCreated; @@ -84,11 +87,13 @@ public Builder(String name, Version indexVersionCreated) { this.indexed.requiresParameters(similarity); this.similarity.setSerializerCheck((id, ic, v) -> v != null); this.similarity.requiresParameters(indexed); + this.indexOptions.requiresParameters(indexed); + this.indexOptions.setSerializerCheck((id, ic, v) -> v != null); } @Override protected List> getParameters() { - return List.of(dims, indexed, similarity, meta); + return List.of(dims, indexed, similarity, indexOptions, meta); } @Override @@ -102,7 +107,8 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) { similarity.getValue(), indexVersionCreated, multiFieldsBuilder.build(this, context), - copyTo.build()); + copyTo.build(), + indexOptions.getValue()); } } @@ -187,10 +193,10 @@ public Query termQuery(Object value, SearchExecutionContext context) { private final VectorSimilarity similarity; private final Version indexCreatedVersion; - private DenseVectorFieldMapper(String simpleName, MappedFieldType mappedFieldType, int dims, - boolean indexed, VectorSimilarity similarity, - Version indexCreatedVersion, MultiFields multiFields, CopyTo copyTo) { - super(simpleName, mappedFieldType, multiFields, copyTo); + private DenseVectorFieldMapper(String simpleName, MappedFieldType mappedFieldType, int dims, boolean indexed, + VectorSimilarity similarity, Version indexCreatedVersion, MultiFields multiFields, + CopyTo copyTo, VectorFieldMapper.IndexOptions indexOptions) { + super(simpleName, mappedFieldType, multiFields, copyTo, indexOptions); this.dims = dims; this.indexed = indexed; this.similarity = similarity; diff --git a/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapperTests.java b/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapperTests.java index 43dec82107f79..1e321b68843ec 100644 --- a/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapperTests.java +++ b/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapperTests.java @@ -9,6 +9,9 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.KnnVectorField; import org.apache.lucene.index.IndexableField; @@ -16,6 +19,9 @@ import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; +import org.elasticsearch.index.codec.CodecService; +import org.elasticsearch.index.codec.PerFieldMappingCodec; +import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.LuceneDocument; @@ -35,15 +41,19 @@ import java.util.Collection; import java.util.List; +import static org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_BEAM_WIDTH; +import static org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; public class DenseVectorFieldMapperTests extends MapperTestCase { private final boolean indexed; + private final boolean indexOptionsSet; public DenseVectorFieldMapperTests() { this.indexed = randomBoolean(); + this.indexOptionsSet = randomBoolean(); } @Override @@ -56,6 +66,13 @@ protected void minimalMapping(XContentBuilder b) throws IOException { b.field("type", "dense_vector").field("dims", 4); if (indexed) { b.field("index", true).field("similarity", "dot_product"); + if (indexOptionsSet) { + b.startObject("index_options"); + b.field("type", "hnsw"); + b.field("m", 5); + b.field("ef_construction", 50); + b.endObject(); + } } } @@ -86,6 +103,21 @@ protected void registerParameters(ParameterChecker checker) throws IOException { fieldMapping(b -> b.field("type", "dense_vector") .field("dims", 4) .field("index", false))); + checker.registerConflictCheck("index_options", + fieldMapping(b -> b.field("type", "dense_vector") + .field("dims", 4) + .field("index", true) + .field("similarity", "dot_product") + .startObject("index_options") + .field("m", 5) + .field("ef_construction", 80) + .endObject()), + fieldMapping(b -> b.field("type", "dense_vector") + .field("dims", 4) + .field("index", true) + .field("similarity", "dot_product") + .startObject("index_options") + .endObject())); } @Override @@ -203,6 +235,14 @@ public void testInvalidParameters() { .field("dims", 3) .field("similarity", "l2_norm")))); assertThat(e.getMessage(), containsString("Field [similarity] requires field [index] to be configured")); + + e = expectThrows(MapperParsingException.class, + () -> createDocumentMapper(fieldMapping(b -> b + .field("type", "dense_vector") + .field("dims", 3) + .startObject("index_options") + .endObject()))); + assertThat(e.getMessage(), containsString("Field [index_options] requires field [index] to be configured")); } public void testAddDocumentsToIndexBefore_V_7_5_0() throws Exception { @@ -288,4 +328,27 @@ public void testCannotBeUsedInMultifields() { }))); assertThat(e.getMessage(), containsString("Field [vectors] of type [dense_vector] can't be used in multifields")); } + + protected void mappingWithCustomIndexOptions(XContentBuilder b, int m, int efConstruction) throws IOException { + b.field("type", "dense_vector"); + b.field("dims", 4); + b.field("index", true); + b.field("similarity", "dot_product"); + b.startObject("index_options"); + b.field("m", m) ; + b.field("ef_construction", efConstruction); + b.endObject(); + } + + public void testKnnVectorsFormat() throws IOException { + int m = randomIntBetween(1, DEFAULT_MAX_CONN + 10); + int efConstruction = randomIntBetween(1, DEFAULT_BEAM_WIDTH + 10); + MapperService mapperService = createMapperService(fieldMapping(b -> mappingWithCustomIndexOptions(b, m, efConstruction))); + CodecService codecService = new CodecService(mapperService); + Codec codec = codecService.codec("default"); + assertThat(codec, instanceOf(PerFieldMappingCodec.class)); + KnnVectorsFormat knnVectorsFormat = ((PerFieldMappingCodec) codec).getKnnVectorsFormatForField("field"); + assertThat(knnVectorsFormat, instanceOf(Lucene90HnswVectorsFormat.class)); + //TODO: add more assertions once LUCENE-10178 is implemented + } } From d5cc59f2a5a8a8fdf56fdad015dd545ba8aad1e8 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Fri, 15 Oct 2021 15:29:34 -0400 Subject: [PATCH 2/3] Address Julie's comments --- .../index/codec/PerFieldMappingCodec.java | 2 +- .../index/mapper/MappingLookup.java | 4 +- .../PerFieldKnnVectorsFormatFieldMapper.java | 25 ++++ .../index/mapper/VectorFieldMapper.java | 128 ------------------ .../test/vectors/10_dense_vector_basic.yml | 5 +- .../vectors/20_dense_vector_special_cases.yml | 1 + .../mapper/DenseVectorFieldMapper.java | 117 ++++++++++++++-- .../mapper/DenseVectorFieldMapperTests.java | 79 ++++++++--- 8 files changed, 196 insertions(+), 165 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/PerFieldKnnVectorsFormatFieldMapper.java delete mode 100644 server/src/main/java/org/elasticsearch/index/mapper/VectorFieldMapper.java diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingCodec.java index e11194e0da576..4c23fe9089899 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingCodec.java @@ -18,7 +18,7 @@ import org.elasticsearch.index.mapper.MapperService; /** - * {@link PerFieldMappingCodec This postings format} is the default + * {@link PerFieldMappingCodec This Lucene codec} provides the default * {@link PostingsFormat} and {@link KnnVectorsFormat} for Elasticsearch. It utilizes the * {@link MapperService} to lookup a {@link PostingsFormat} and {@link KnnVectorsFormat} per field. This * allows users to change the low level postings format and vectors format for individual fields diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java index 29a154e707536..c0eb5837cee05 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java @@ -236,8 +236,8 @@ public PostingsFormat getPostingsFormat(String field) { */ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { Mapper fieldMapper = fieldMappers.get(field); - if (fieldMapper instanceof VectorFieldMapper) { - return ((VectorFieldMapper) fieldMapper).getKnnVectorsFormatForField(); + if (fieldMapper instanceof PerFieldKnnVectorsFormatFieldMapper) { + return ((PerFieldKnnVectorsFormatFieldMapper) fieldMapper).getKnnVectorsFormatForField(); } else { return null; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/PerFieldKnnVectorsFormatFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/PerFieldKnnVectorsFormatFieldMapper.java new file mode 100644 index 0000000000000..91f2a05ed9275 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/PerFieldKnnVectorsFormatFieldMapper.java @@ -0,0 +1,25 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ +package org.elasticsearch.index.mapper; + +import org.apache.lucene.codecs.KnnVectorsFormat; + +/** + * Field mapper used for the only purpose to provide a custom knn vectors format. + * For internal use only. + */ + +public interface PerFieldKnnVectorsFormatFieldMapper { + + /** + * Returns the knn vectors format that is customly set up for this field or {@code null} if + * the format is not set up or if the set up format matches the default format. + * @return the knn vectors format for the field, or {@code null} if the default format should be used + */ + KnnVectorsFormat getKnnVectorsFormatForField(); +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/VectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/VectorFieldMapper.java deleted file mode 100644 index 2c1c38a4b3927..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/mapper/VectorFieldMapper.java +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the Server Side Public License, v 1; you may not use this file except - * in compliance with, at your election, the Elastic License 2.0 or the Server - * Side Public License, v 1. - */ -package org.elasticsearch.index.mapper; - -import org.apache.lucene.codecs.KnnVectorsFormat; -import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat; -import org.elasticsearch.common.xcontent.support.XContentMapValues; -import org.elasticsearch.xcontent.ToXContent; -import org.elasticsearch.xcontent.XContentBuilder; - -import java.io.IOException; -import java.util.Map; -import java.util.Objects; - -import static org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN; -import static org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_BEAM_WIDTH; - -/** - * Field mapper for a vector field for ann search. - */ - -public abstract class VectorFieldMapper extends FieldMapper { - public static final IndexOptions DEFAULT_INDEX_OPTIONS = new HNSWIndexOptions(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH); - protected final IndexOptions indexOptions; - - protected VectorFieldMapper(String simpleName, MappedFieldType mappedFieldType, MultiFields multiFields, CopyTo copyTo, - IndexOptions indexOptions) { - super(simpleName, mappedFieldType, multiFields, copyTo); - this.indexOptions = indexOptions; - } - - /** - * Returns the knn vectors format that is customly set up for this field or {@code null} if - * the format is not set up or if the set up format matches the default format. - * @return the knn vectors format for the field, or {@code null} if the default format should be used - */ - public KnnVectorsFormat getKnnVectorsFormatForField() { - if (indexOptions == null && indexOptions == DEFAULT_INDEX_OPTIONS) { - return null; - } else { - HNSWIndexOptions hnswIndexOptions = (HNSWIndexOptions) indexOptions; - return new Lucene90HnswVectorsFormat(hnswIndexOptions.m, hnswIndexOptions.efConstruction); - } - } - - public static IndexOptions parseVectorIndexOptions(String fieldName, Object propNode) { - if (propNode == null) { - return null; - } - Map indexOptionsMap = (Map) propNode; - String type = XContentMapValues.nodeStringValue(indexOptionsMap.remove("type"), "hnsw"); - if (type.equals("hnsw")) { - return HNSWIndexOptions.parseIndexOptions(fieldName, indexOptionsMap); - } else { - throw new MapperParsingException("Unknown vector index options type [" + type + "] for field [" + fieldName + "]"); - } - } - - public abstract static class IndexOptions implements ToXContent { - protected final String type; - public IndexOptions(String type) { - this.type = type; - } - } - - public static class HNSWIndexOptions extends IndexOptions { - private final int m; - private final int efConstruction; - - public HNSWIndexOptions(int m, int efConstruction) { - super("hnsw"); - this.m = m; - this.efConstruction = efConstruction; - } - - public int m() { - return m; - } - - public int efConstruction() { - return efConstruction; - } - - public static IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap) { - int m = XContentMapValues.nodeIntegerValue(indexOptionsMap.remove("m"), DEFAULT_MAX_CONN); - int efConstruction = XContentMapValues.nodeIntegerValue(indexOptionsMap.remove("ef_construction"), DEFAULT_BEAM_WIDTH); - MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); - if (m == DEFAULT_MAX_CONN && efConstruction == DEFAULT_BEAM_WIDTH) { - return VectorFieldMapper.DEFAULT_INDEX_OPTIONS; - } else { - return new HNSWIndexOptions(m, efConstruction); - } - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(); - builder.field("type", type); - builder.field("m", m); - builder.field("ef_construction", efConstruction); - builder.endObject(); - return builder; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - HNSWIndexOptions that = (HNSWIndexOptions) o; - return m == that.m && efConstruction == that.efConstruction; - } - - @Override - public int hashCode() { - return Objects.hash(type, m, efConstruction); - } - - @Override - public String toString() { - return "{type=" + type + ", m=" + m + ", ef_construction=" + efConstruction + " }"; - } - } -} diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/10_dense_vector_basic.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/10_dense_vector_basic.yml index 952ee4654f040..91ee333e5e924 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/10_dense_vector_basic.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/10_dense_vector_basic.yml @@ -18,10 +18,7 @@ setup: dims: 5 index: true similarity: dot_product - index_options: - type: hnsw - m: 15 - ef_construction: 80 + - do: index: index: test-index diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/20_dense_vector_special_cases.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/20_dense_vector_special_cases.yml index 9d097c55a4510..eb7b9850f4399 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/20_dense_vector_special_cases.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/20_dense_vector_special_cases.yml @@ -22,6 +22,7 @@ setup: index_options: type: hnsw m: 15 + ef_construction: 50 --- "Indexing of Dense vectors should error when dims don't match defined in the mapping": diff --git a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapper.java b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapper.java index 41299c925168a..11ef4eea05562 100644 --- a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapper.java +++ b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapper.java @@ -8,6 +8,8 @@ package org.elasticsearch.xpack.vectors.mapper; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Field; import org.apache.lucene.document.KnnVectorField; @@ -16,7 +18,10 @@ import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; -import org.elasticsearch.index.mapper.VectorFieldMapper; +import org.elasticsearch.index.mapper.MappingParser; +import org.elasticsearch.index.mapper.PerFieldKnnVectorsFormatFieldMapper; +import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser.Token; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.fielddata.IndexFieldData; @@ -41,17 +46,21 @@ import java.time.ZoneId; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.function.Supplier; +import static org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_BEAM_WIDTH; +import static org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; /** * A {@link FieldMapper} for indexing a dense vector of floats. */ -public class DenseVectorFieldMapper extends VectorFieldMapper { +public class DenseVectorFieldMapper extends FieldMapper implements PerFieldKnnVectorsFormatFieldMapper { public static final String CONTENT_TYPE = "dense_vector"; public static short MAX_DIMS_COUNT = 2048; //maximum allowed number of dimensions + public static final IndexOptions DEFAULT_INDEX_OPTIONS = new HnswIndexOptions(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH); private static final byte INT_BYTES = 4; private static DenseVectorFieldMapper toType(FieldMapper in) { @@ -75,7 +84,7 @@ public static class Builder extends FieldMapper.Builder { private final Parameter similarity = Parameter.enumParam( "similarity", false, m -> toType(m).similarity, null, VectorSimilarity.class); private final Parameter indexOptions = new Parameter<>("index_options", false, () -> null, - (n, c, o) -> VectorFieldMapper.parseVectorIndexOptions(n, o), m -> toType(m).indexOptions); + (n, c, o) -> o == null ? null : parseIndexOptions(n, o), m -> toType(m).indexOptions); private final Parameter> meta = Parameter.metaParam(); final Version indexVersionCreated; @@ -105,10 +114,10 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) { dims.getValue(), indexed.getValue(), similarity.getValue(), + indexOptions.getValue(), indexVersionCreated, multiFieldsBuilder.build(this, context), - copyTo.build(), - indexOptions.getValue()); + copyTo.build()); } } @@ -122,6 +131,71 @@ enum VectorSimilarity { } } + abstract static class IndexOptions implements ToXContent { + final String type; + IndexOptions(String type) { + this.type = type; + } + } + + static class HnswIndexOptions extends IndexOptions { + private final int m; + private final int efConstruction; + + static IndexOptions parseIndexOptions(String fieldName, Map indexOptionsMap) { + Object mNode = indexOptionsMap.remove("m"); + Object efConstructionNode = indexOptionsMap.remove("ef_construction"); + if (mNode == null) { + throw new MapperParsingException("[index_options] of type [hnsw] requires field [m] to be configured"); + } + if (efConstructionNode == null) { + throw new MapperParsingException("[index_options] of type [hnsw] requires field [ef_construction] to be configured"); + } + int m = XContentMapValues.nodeIntegerValue(mNode); + int efConstruction = XContentMapValues.nodeIntegerValue(efConstructionNode); + MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); + if (m == DEFAULT_MAX_CONN && efConstruction == DEFAULT_BEAM_WIDTH) { + return DEFAULT_INDEX_OPTIONS; + } else { + return new HnswIndexOptions(m, efConstruction); + } + } + + private HnswIndexOptions(int m, int efConstruction) { + super("hnsw"); + this.m = m; + this.efConstruction = efConstruction; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("type", type); + builder.field("m", m); + builder.field("ef_construction", efConstruction); + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + HnswIndexOptions that = (HnswIndexOptions) o; + return m == that.m && efConstruction == that.efConstruction; + } + + @Override + public int hashCode() { + return Objects.hash(type, m, efConstruction); + } + + @Override + public String toString() { + return "{type=" + type + ", m=" + m + ", ef_construction=" + efConstruction + " }"; + } + } + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated()), notInMultiFields(CONTENT_TYPE)); @@ -191,15 +265,17 @@ public Query termQuery(Object value, SearchExecutionContext context) { private final int dims; private final boolean indexed; private final VectorSimilarity similarity; + private final IndexOptions indexOptions; private final Version indexCreatedVersion; private DenseVectorFieldMapper(String simpleName, MappedFieldType mappedFieldType, int dims, boolean indexed, - VectorSimilarity similarity, Version indexCreatedVersion, MultiFields multiFields, - CopyTo copyTo, VectorFieldMapper.IndexOptions indexOptions) { - super(simpleName, mappedFieldType, multiFields, copyTo, indexOptions); + VectorSimilarity similarity, IndexOptions indexOptions, + Version indexCreatedVersion, MultiFields multiFields, CopyTo copyTo) { + super(simpleName, mappedFieldType, multiFields, copyTo); this.dims = dims; this.indexed = indexed; this.similarity = similarity; + this.indexOptions = indexOptions; this.indexCreatedVersion = indexCreatedVersion; } @@ -295,4 +371,29 @@ protected String contentType() { public FieldMapper.Builder getMergeBuilder() { return new Builder(simpleName(), indexCreatedVersion).init(this); } + + public static IndexOptions parseIndexOptions(String fieldName, Object propNode) { + @SuppressWarnings("unchecked") + Map indexOptionsMap = (Map) propNode; + Object typeNode = indexOptionsMap.remove("type"); + if (typeNode == null) { + throw new MapperParsingException("[index_options] requires field [type] to be configured"); + } + String type = XContentMapValues.nodeStringValue(typeNode); + if (type.equals("hnsw")) { + return HnswIndexOptions.parseIndexOptions(fieldName, indexOptionsMap); + } else { + throw new MapperParsingException("Unknown vector index options type [" + type + "] for field [" + fieldName + "]"); + } + } + + @Override + public KnnVectorsFormat getKnnVectorsFormatForField() { + if (indexOptions == null || indexOptions == DEFAULT_INDEX_OPTIONS) { + return null; // use default format + } else { + HnswIndexOptions hnswIndexOptions = (HnswIndexOptions) indexOptions; + return new Lucene90HnswVectorsFormat(hnswIndexOptions.m, hnswIndexOptions.efConstruction); + } + } } diff --git a/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapperTests.java b/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapperTests.java index 1e321b68843ec..4fa96288ffca2 100644 --- a/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapperTests.java +++ b/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapperTests.java @@ -68,9 +68,9 @@ protected void minimalMapping(XContentBuilder b) throws IOException { b.field("index", true).field("similarity", "dot_product"); if (indexOptionsSet) { b.startObject("index_options"); - b.field("type", "hnsw"); - b.field("m", 5); - b.field("ef_construction", 50); + b.field("type", "hnsw"); + b.field("m", 5); + b.field("ef_construction", 50); b.endObject(); } } @@ -107,16 +107,15 @@ protected void registerParameters(ParameterChecker checker) throws IOException { fieldMapping(b -> b.field("type", "dense_vector") .field("dims", 4) .field("index", true) - .field("similarity", "dot_product") - .startObject("index_options") - .field("m", 5) - .field("ef_construction", 80) - .endObject()), + .field("similarity", "dot_product")), fieldMapping(b -> b.field("type", "dense_vector") .field("dims", 4) .field("index", true) .field("similarity", "dot_product") .startObject("index_options") + .field("type" , "hnsw") + .field("m", 5) + .field("ef_construction", 80) .endObject())); } @@ -241,8 +240,45 @@ public void testInvalidParameters() { .field("type", "dense_vector") .field("dims", 3) .startObject("index_options") + .field("type", "hnsw") + .field("m", 5) + .field("ef_construction", 100) .endObject()))); assertThat(e.getMessage(), containsString("Field [index_options] requires field [index] to be configured")); + + e = expectThrows(MapperParsingException.class, + () -> createDocumentMapper(fieldMapping(b -> b + .field("type", "dense_vector") + .field("dims", 3) + .field("similarity", "l2_norm") + .field("index", true) + .startObject("index_options") + .endObject()))); + assertThat(e.getMessage(), containsString("[index_options] requires field [type] to be configured")); + + e = expectThrows(MapperParsingException.class, + () -> createDocumentMapper(fieldMapping(b -> b + .field("type", "dense_vector") + .field("dims", 3) + .field("similarity", "l2_norm") + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .field("ef_construction", 100) + .endObject()))); + assertThat(e.getMessage(), containsString("[index_options] of type [hnsw] requires field [m] to be configured")); + + e = expectThrows(MapperParsingException.class, + () -> createDocumentMapper(fieldMapping(b -> b + .field("type", "dense_vector") + .field("dims", 3) + .field("similarity", "l2_norm") + .field("index", true) + .startObject("index_options") + .field("type", "hnsw") + .field("m", 5) + .endObject()))); + assertThat(e.getMessage(), containsString("[index_options] of type [hnsw] requires field [ef_construction] to be configured")); } public void testAddDocumentsToIndexBefore_V_7_5_0() throws Exception { @@ -329,21 +365,20 @@ public void testCannotBeUsedInMultifields() { assertThat(e.getMessage(), containsString("Field [vectors] of type [dense_vector] can't be used in multifields")); } - protected void mappingWithCustomIndexOptions(XContentBuilder b, int m, int efConstruction) throws IOException { - b.field("type", "dense_vector"); - b.field("dims", 4); - b.field("index", true); - b.field("similarity", "dot_product"); - b.startObject("index_options"); - b.field("m", m) ; - b.field("ef_construction", efConstruction); - b.endObject(); - } - public void testKnnVectorsFormat() throws IOException { - int m = randomIntBetween(1, DEFAULT_MAX_CONN + 10); - int efConstruction = randomIntBetween(1, DEFAULT_BEAM_WIDTH + 10); - MapperService mapperService = createMapperService(fieldMapping(b -> mappingWithCustomIndexOptions(b, m, efConstruction))); + final int m = randomIntBetween(1, DEFAULT_MAX_CONN + 10); + final int efConstruction = randomIntBetween(1, DEFAULT_BEAM_WIDTH + 10); + MapperService mapperService = createMapperService(fieldMapping(b -> { + b.field("type", "dense_vector"); + b.field("dims", 4); + b.field("index", true); + b.field("similarity", "dot_product"); + b.startObject("index_options"); + b.field("type", "hnsw"); + b.field("m", m); + b.field("ef_construction", efConstruction); + b.endObject(); + })); CodecService codecService = new CodecService(mapperService); Codec codec = codecService.codec("default"); assertThat(codec, instanceOf(PerFieldMappingCodec.class)); From db231e95509f4b9c62f3ac4c736862e7dbc4a46c Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Sat, 16 Oct 2021 17:46:08 -0400 Subject: [PATCH 3/3] Address Julie's comments 2 --- .../elasticsearch/index/codec/CodecService.java | 4 ++-- ...ppingCodec.java => PerFieldMapperCodec.java} | 10 +++++----- .../PerFieldKnnVectorsFormatFieldMapper.java | 4 ++-- .../elasticsearch/index/codec/CodecTests.java | 2 +- .../mapper/CompletionFieldMapperTests.java | 6 +++--- .../vectors/mapper/DenseVectorFieldMapper.java | 17 +++++------------ .../mapper/DenseVectorFieldMapperTests.java | 6 +++--- 7 files changed, 21 insertions(+), 28 deletions(-) rename server/src/main/java/org/elasticsearch/index/codec/{PerFieldMappingCodec.java => PerFieldMapperCodec.java} (86%) diff --git a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java index 47cc25ea9cd2b..bd2850bc6a5a8 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java +++ b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java @@ -38,9 +38,9 @@ public CodecService(@Nullable MapperService mapperService) { codecs.put(BEST_COMPRESSION_CODEC, new Lucene90Codec(Lucene90Codec.Mode.BEST_COMPRESSION)); } else { codecs.put(DEFAULT_CODEC, - new PerFieldMappingCodec(Lucene90Codec.Mode.BEST_SPEED, mapperService)); + new PerFieldMapperCodec(Lucene90Codec.Mode.BEST_SPEED, mapperService)); codecs.put(BEST_COMPRESSION_CODEC, - new PerFieldMappingCodec(Lucene90Codec.Mode.BEST_COMPRESSION, mapperService)); + new PerFieldMapperCodec(Lucene90Codec.Mode.BEST_COMPRESSION, mapperService)); } codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault()); for (String codec : Codec.availableCodecs()) { diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java similarity index 86% rename from server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingCodec.java rename to server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java index 4c23fe9089899..22ed0c32975df 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java @@ -18,24 +18,24 @@ import org.elasticsearch.index.mapper.MapperService; /** - * {@link PerFieldMappingCodec This Lucene codec} provides the default + * {@link PerFieldMapperCodec This Lucene codec} provides the default * {@link PostingsFormat} and {@link KnnVectorsFormat} for Elasticsearch. It utilizes the * {@link MapperService} to lookup a {@link PostingsFormat} and {@link KnnVectorsFormat} per field. This * allows users to change the low level postings format and vectors format for individual fields * per index in real time via the mapping API. If no specific postings format or vector format is * configured for a specific field the default postings or vector format is used. */ -public class PerFieldMappingCodec extends Lucene90Codec { +public class PerFieldMapperCodec extends Lucene90Codec { private final MapperService mapperService; private final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat(); static { - assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMappingCodec.class) : - "PerFieldMappingCodec must subclass the latest " + "lucene codec: " + Lucene.LATEST_CODEC; + assert Codec.forName(Lucene.LATEST_CODEC).getClass().isAssignableFrom(PerFieldMapperCodec.class) : + "PerFieldMapperCodec must subclass the latest " + "lucene codec: " + Lucene.LATEST_CODEC; } - public PerFieldMappingCodec(Mode compressionMode, MapperService mapperService) { + public PerFieldMapperCodec(Mode compressionMode, MapperService mapperService) { super(compressionMode); this.mapperService = mapperService; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/PerFieldKnnVectorsFormatFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/PerFieldKnnVectorsFormatFieldMapper.java index 91f2a05ed9275..efd9b5a604a9c 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/PerFieldKnnVectorsFormatFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/PerFieldKnnVectorsFormatFieldMapper.java @@ -17,8 +17,8 @@ public interface PerFieldKnnVectorsFormatFieldMapper { /** - * Returns the knn vectors format that is customly set up for this field or {@code null} if - * the format is not set up or if the set up format matches the default format. + * Returns the knn vectors format that is customly set up for this field + * or {@code null} if the format is not set up. * @return the knn vectors format for the field, or {@code null} if the default format should be used */ KnnVectorsFormat getKnnVectorsFormatForField(); diff --git a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java index 9a666f9057e5d..4e2453834e5f1 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java @@ -40,7 +40,7 @@ public class CodecTests extends ESTestCase { public void testResolveDefaultCodecs() throws Exception { CodecService codecService = createCodecService(); - assertThat(codecService.codec("default"), instanceOf(PerFieldMappingCodec.class)); + assertThat(codecService.codec("default"), instanceOf(PerFieldMapperCodec.class)); assertThat(codecService.codec("default"), instanceOf(Lucene90Codec.class)); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java index a240f1da193b4..6b4b05c346cd0 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java @@ -29,7 +29,7 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.unit.Fuzziness; -import org.elasticsearch.index.codec.PerFieldMappingCodec; +import org.elasticsearch.index.codec.PerFieldMapperCodec; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; @@ -122,8 +122,8 @@ public void testPostingsFormat() throws IOException { MapperService mapperService = createMapperService(fieldMapping(this::minimalMapping)); CodecService codecService = new CodecService(mapperService); Codec codec = codecService.codec("default"); - assertThat(codec, instanceOf(PerFieldMappingCodec.class)); - PerFieldMappingCodec perFieldCodec = (PerFieldMappingCodec) codec; + assertThat(codec, instanceOf(PerFieldMapperCodec.class)); + PerFieldMapperCodec perFieldCodec = (PerFieldMapperCodec) codec; assertThat(perFieldCodec.getPostingsFormatForField("field"), instanceOf(Completion90PostingsFormat.class)); } diff --git a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapper.java b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapper.java index 11ef4eea05562..51f25db3fecc6 100644 --- a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapper.java +++ b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapper.java @@ -49,8 +49,6 @@ import java.util.Objects; import java.util.function.Supplier; -import static org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_BEAM_WIDTH; -import static org.apache.lucene.codecs.lucene90.Lucene90HnswVectorsFormat.DEFAULT_MAX_CONN; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; /** @@ -60,7 +58,6 @@ public class DenseVectorFieldMapper extends FieldMapper implements PerFieldKnnVe public static final String CONTENT_TYPE = "dense_vector"; public static short MAX_DIMS_COUNT = 2048; //maximum allowed number of dimensions - public static final IndexOptions DEFAULT_INDEX_OPTIONS = new HnswIndexOptions(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH); private static final byte INT_BYTES = 4; private static DenseVectorFieldMapper toType(FieldMapper in) { @@ -131,14 +128,14 @@ enum VectorSimilarity { } } - abstract static class IndexOptions implements ToXContent { + private abstract static class IndexOptions implements ToXContent { final String type; IndexOptions(String type) { this.type = type; } } - static class HnswIndexOptions extends IndexOptions { + private static class HnswIndexOptions extends IndexOptions { private final int m; private final int efConstruction; @@ -154,11 +151,7 @@ static IndexOptions parseIndexOptions(String fieldName, Map indexOpti int m = XContentMapValues.nodeIntegerValue(mNode); int efConstruction = XContentMapValues.nodeIntegerValue(efConstructionNode); MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); - if (m == DEFAULT_MAX_CONN && efConstruction == DEFAULT_BEAM_WIDTH) { - return DEFAULT_INDEX_OPTIONS; - } else { - return new HnswIndexOptions(m, efConstruction); - } + return new HnswIndexOptions(m, efConstruction); } private HnswIndexOptions(int m, int efConstruction) { @@ -372,7 +365,7 @@ public FieldMapper.Builder getMergeBuilder() { return new Builder(simpleName(), indexCreatedVersion).init(this); } - public static IndexOptions parseIndexOptions(String fieldName, Object propNode) { + private static IndexOptions parseIndexOptions(String fieldName, Object propNode) { @SuppressWarnings("unchecked") Map indexOptionsMap = (Map) propNode; Object typeNode = indexOptionsMap.remove("type"); @@ -389,7 +382,7 @@ public static IndexOptions parseIndexOptions(String fieldName, Object propNode) @Override public KnnVectorsFormat getKnnVectorsFormatForField() { - if (indexOptions == null || indexOptions == DEFAULT_INDEX_OPTIONS) { + if (indexOptions == null) { return null; // use default format } else { HnswIndexOptions hnswIndexOptions = (HnswIndexOptions) indexOptions; diff --git a/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapperTests.java b/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapperTests.java index 4fa96288ffca2..ff80198eff6d9 100644 --- a/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapperTests.java +++ b/x-pack/plugin/vectors/src/test/java/org/elasticsearch/xpack/vectors/mapper/DenseVectorFieldMapperTests.java @@ -20,7 +20,7 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.index.codec.CodecService; -import org.elasticsearch.index.codec.PerFieldMappingCodec; +import org.elasticsearch.index.codec.PerFieldMapperCodec; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.index.mapper.DocumentMapper; @@ -381,8 +381,8 @@ public void testKnnVectorsFormat() throws IOException { })); CodecService codecService = new CodecService(mapperService); Codec codec = codecService.codec("default"); - assertThat(codec, instanceOf(PerFieldMappingCodec.class)); - KnnVectorsFormat knnVectorsFormat = ((PerFieldMappingCodec) codec).getKnnVectorsFormatForField("field"); + assertThat(codec, instanceOf(PerFieldMapperCodec.class)); + KnnVectorsFormat knnVectorsFormat = ((PerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); assertThat(knnVectorsFormat, instanceOf(Lucene90HnswVectorsFormat.class)); //TODO: add more assertions once LUCENE-10178 is implemented }