From 168cca32d4caa441f1388ae45fe8cd12da923bd0 Mon Sep 17 00:00:00 2001 From: Yannick Welsch Date: Wed, 10 Nov 2021 16:06:05 +0100 Subject: [PATCH 1/3] Add field usage support for vectors --- .../index/search/stats/FieldUsageStats.java | 29 ++++++- .../search/stats/ShardFieldUsageTracker.java | 14 ++- .../FieldUsageTrackingDirectoryReader.java | 23 +++++ .../vectors/50_dense_vector_field_usage.yml | 87 +++++++++++++++++++ 4 files changed, 149 insertions(+), 4 deletions(-) create mode 100644 x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/50_dense_vector_field_usage.yml diff --git a/server/src/main/java/org/elasticsearch/index/search/stats/FieldUsageStats.java b/server/src/main/java/org/elasticsearch/index/search/stats/FieldUsageStats.java index 4c21e557b4e26..9669da82c7c3e 100644 --- a/server/src/main/java/org/elasticsearch/index/search/stats/FieldUsageStats.java +++ b/server/src/main/java/org/elasticsearch/index/search/stats/FieldUsageStats.java @@ -8,6 +8,7 @@ package org.elasticsearch.index.search.stats; +import org.elasticsearch.Version; import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -39,6 +40,7 @@ public class FieldUsageStats implements ToXContentObject, Writeable { public static final String TERM_VECTORS = "term_vectors"; // possibly refine this one public static final String POINTS = "points"; public static final String PROXIMITY = "proximity"; + public static final String VECTORS = "vectors"; private final Map stats; @@ -122,11 +124,12 @@ public enum UsageContext { PAYLOADS, TERM_VECTORS, // possibly refine this one POINTS, + VECTORS, } public static class PerFieldUsageStats implements ToXContentFragment, Writeable { - static final PerFieldUsageStats EMPTY = new PerFieldUsageStats(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + static final PerFieldUsageStats EMPTY = new PerFieldUsageStats(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); private final long any; private final long proximity; @@ -141,6 +144,7 @@ public static class PerFieldUsageStats implements ToXContentFragment, Writeable private final long payloads; private final long termVectors; private final long points; + private final long vectors; public PerFieldUsageStats( long any, @@ -155,7 +159,8 @@ public PerFieldUsageStats( long norms, long payloads, long termVectors, - long points + long points, + long vectors ) { this.any = any; this.proximity = proximity; @@ -170,6 +175,7 @@ public PerFieldUsageStats( this.payloads = payloads; this.termVectors = termVectors; this.points = points; + this.vectors = vectors; } private PerFieldUsageStats add(PerFieldUsageStats other) { @@ -186,7 +192,8 @@ private PerFieldUsageStats add(PerFieldUsageStats other) { norms + other.norms, payloads + other.payloads, termVectors + other.termVectors, - points + other.points + points + other.points, + vectors + other.vectors ); } @@ -204,6 +211,11 @@ public PerFieldUsageStats(StreamInput in) throws IOException { payloads = in.readVLong(); termVectors = in.readVLong(); points = in.readVLong(); + if (in.getVersion().onOrAfter(Version.V_8_1_0)) { + vectors = in.readVLong(); + } else { + vectors = 0; + } } @Override @@ -221,6 +233,9 @@ public void writeTo(StreamOutput out) throws IOException { out.writeVLong(payloads); out.writeVLong(termVectors); out.writeVLong(points); + if (out.getVersion().onOrAfter(Version.V_8_1_0)) { + out.writeVLong(vectors); + } } @Override @@ -240,6 +255,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(POINTS, points); builder.field(NORMS, norms); builder.field(TERM_VECTORS, termVectors); + builder.field(VECTORS, vectors); return builder; } @@ -278,6 +294,9 @@ public Set keySet() { if (points > 0L) { set.add(UsageContext.POINTS); } + if (vectors > 0L) { + set.add(UsageContext.VECTORS); + } return set; } @@ -325,6 +344,10 @@ public long getPoints() { return points; } + public long getVectors() { + return vectors; + } + public long getProximity() { return proximity; } diff --git a/server/src/main/java/org/elasticsearch/index/search/stats/ShardFieldUsageTracker.java b/server/src/main/java/org/elasticsearch/index/search/stats/ShardFieldUsageTracker.java index d166eebc50fdd..40acf896a27cd 100644 --- a/server/src/main/java/org/elasticsearch/index/search/stats/ShardFieldUsageTracker.java +++ b/server/src/main/java/org/elasticsearch/index/search/stats/ShardFieldUsageTracker.java @@ -61,7 +61,8 @@ public FieldUsageStats stats(String... fields) { ifs.norms.longValue(), ifs.payloads.longValue(), ifs.termVectors.longValue(), - ifs.points.longValue() + ifs.points.longValue(), + ifs.vectors.longValue() ); stats.put(entry.getKey(), pf); } @@ -83,6 +84,7 @@ static class InternalFieldStats { final LongAdder payloads = new LongAdder(); final LongAdder termVectors = new LongAdder(); final LongAdder points = new LongAdder(); + final LongAdder vectors = new LongAdder(); } static class PerField { @@ -98,6 +100,7 @@ static class PerField { volatile boolean payloads; volatile boolean termVectors; volatile boolean points; + volatile boolean vectors; } public class FieldUsageStatsTrackingSession implements FieldUsageNotifier, Releasable { @@ -159,6 +162,10 @@ public void close() { any = true; fieldStats.termVectors.increment(); } + if (pf.vectors) { + any = true; + fieldStats.vectors.increment(); + } if (any) { fieldStats.any.increment(); } @@ -227,5 +234,10 @@ public void onPointsUsed(String field) { public void onTermVectorsUsed(String field) { getOrAdd(field).termVectors = true; } + + @Override + public void onVectorsUsed(String field) { + getOrAdd(field).vectors = true; + } } } diff --git a/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java b/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java index 87ab025a813c3..70e5273cdec2b 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java +++ b/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java @@ -25,7 +25,10 @@ import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.VectorValues; +import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.suggest.document.CompletionTerms; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.automaton.CompiledAutomaton; import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader; @@ -83,6 +86,8 @@ public interface FieldUsageNotifier { void onPointsUsed(String field); void onTermVectorsUsed(String field); + + void onVectorsUsed(String field); } public static final class FieldUsageTrackingLeafReader extends SequentialStoredFieldsLeafReader { @@ -181,6 +186,24 @@ public NumericDocValues getNormValues(String field) throws IOException { return numericDocValues; } + @Override + public VectorValues getVectorValues(String field) throws IOException { + VectorValues vectorValues = super.getVectorValues(field); + if (vectorValues != null) { + notifier.onVectorsUsed(field); + } + return vectorValues; + } + + @Override + public TopDocs searchNearestVectors(String field, float[] target, int k, Bits acceptDocs) throws IOException { + TopDocs topDocs = super.searchNearestVectors(field, target, k, acceptDocs); + if (topDocs != null) { + notifier.onVectorsUsed(field); + } + return topDocs; + } + @Override public String toString() { final StringBuilder sb = new StringBuilder("FieldUsageTrackingLeafReader(reader="); diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/50_dense_vector_field_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/50_dense_vector_field_usage.yml new file mode 100644 index 0000000000000..565e0f96c4e55 --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/50_dense_vector_field_usage.yml @@ -0,0 +1,87 @@ +setup: + - skip: + features: headers + + - do: + indices.create: + index: test + body: + settings: + routing.rebalance.enable: none + index.number_of_shards: 1 + index.number_of_replicas: 0 + mappings: + properties: + name: + type: keyword + vector: + type: dense_vector + dims: 5 + index: true + similarity: l2_norm + + - do: + index: + index: test + body: + name: cow.jpg + vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ] + comments: + - body: "free the cows" + vector: [ 0.75, 100.0, 0.33, 16.2, -10.2 ] + + - do: + index: + index: test + id: 2 + body: + name: moose.jpg + vector: [ -0.5, 100.0, -13, 14.8, -156.0 ] + comments: + - body: "what a great moose" + vector: [ 11.4, 99.0, 1.55, -2.9, -10.2 ] + + - do: + index: + index: test + id: 3 + body: + name: rabbit.jpg + vector: [ 0.5, 111.3, -13.0, 14.8, -156.0 ] + + - do: + indices.refresh: { } + +--- +"Field usage": + - do: + knn_search: + index: test + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 2 + num_candidates: 3 + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} + + - match: {hits.hits.1._id: "3"} + - match: {hits.hits.1.fields.name.0: "rabbit.jpg"} + + - do: + indices.field_usage_stats: { index: test } + + - is_true: test + - length: { test.shards: 1 } + + - gt: { test.shards.0.stats.all_fields.any: 0 } + - gt: { test.shards.0.stats.all_fields.vectors: 0 } + + - gt: { test.shards.0.stats.fields.vector.any: 0 } + - gt: { test.shards.0.stats.fields.vector.vectors: 0 } + + - gt: { test.shards.0.stats.fields._id.stored_fields: 0 } + - match: { test.shards.0.stats.fields._id.vectors: 0 } From d68406df1990b9f2aee10ff447fa7087cfde3fbf Mon Sep 17 00:00:00 2001 From: Yannick Welsch Date: Wed, 10 Nov 2021 16:39:08 +0100 Subject: [PATCH 2/3] fix --- .../indices/field-usage-stats.asciidoc | 6 ++-- .../vectors/50_dense_vector_field_usage.yml | 28 +++++++++---------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/docs/reference/indices/field-usage-stats.asciidoc b/docs/reference/indices/field-usage-stats.asciidoc index d63faffb25fde..ae89c8dab9a86 100644 --- a/docs/reference/indices/field-usage-stats.asciidoc +++ b/docs/reference/indices/field-usage-stats.asciidoc @@ -128,7 +128,8 @@ The API returns the following response: "doc_values" : 1, "points" : 0, "norms" : 1, - "term_vectors" : 0 + "term_vectors" : 0, + "vectors" : 0 }, "fields": { "_id": { @@ -146,7 +147,8 @@ The API returns the following response: "doc_values" : 0, "points" : 0, "norms" : 0, - "term_vectors" : 0 + "term_vectors" : 0, + "vectors" : 0 }, "_source": {...}, "context": {...}, diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/50_dense_vector_field_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/50_dense_vector_field_usage.yml index 565e0f96c4e55..29cef52131deb 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/50_dense_vector_field_usage.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/50_dense_vector_field_usage.yml @@ -4,7 +4,7 @@ setup: - do: indices.create: - index: test + index: futest body: settings: routing.rebalance.enable: none @@ -22,7 +22,7 @@ setup: - do: index: - index: test + index: futest body: name: cow.jpg vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ] @@ -32,7 +32,7 @@ setup: - do: index: - index: test + index: futest id: 2 body: name: moose.jpg @@ -43,7 +43,7 @@ setup: - do: index: - index: test + index: futest id: 3 body: name: rabbit.jpg @@ -56,7 +56,7 @@ setup: "Field usage": - do: knn_search: - index: test + index: futest body: fields: [ "name" ] knn: @@ -72,16 +72,16 @@ setup: - match: {hits.hits.1.fields.name.0: "rabbit.jpg"} - do: - indices.field_usage_stats: { index: test } + indices.field_usage_stats: { index: futest } - - is_true: test - - length: { test.shards: 1 } + - is_true: futest + - length: { futest.shards: 1 } - - gt: { test.shards.0.stats.all_fields.any: 0 } - - gt: { test.shards.0.stats.all_fields.vectors: 0 } + - gt: { futest.shards.0.stats.all_fields.any: 0 } + - gt: { futest.shards.0.stats.all_fields.vectors: 0 } - - gt: { test.shards.0.stats.fields.vector.any: 0 } - - gt: { test.shards.0.stats.fields.vector.vectors: 0 } + - gt: { futest.shards.0.stats.fields.vector.any: 0 } + - gt: { futest.shards.0.stats.fields.vector.vectors: 0 } - - gt: { test.shards.0.stats.fields._id.stored_fields: 0 } - - match: { test.shards.0.stats.fields._id.vectors: 0 } + - gt: { futest.shards.0.stats.fields._id.stored_fields: 0 } + - match: { futest.shards.0.stats.fields._id.vectors: 0 } From 6e42856c70954dfa2bfc30ad62f84dd4ced2a12e Mon Sep 17 00:00:00 2001 From: Yannick Welsch Date: Thu, 11 Nov 2021 16:36:25 +0100 Subject: [PATCH 3/3] review feedback --- .../indices/field-usage-stats.asciidoc | 4 +-- .../index/search/stats/FieldUsageStats.java | 28 +++++++++---------- .../search/stats/ShardFieldUsageTracker.java | 14 +++++----- .../FieldUsageTrackingDirectoryReader.java | 6 ++-- .../vectors/50_dense_vector_field_usage.yml | 12 ++------ 5 files changed, 29 insertions(+), 35 deletions(-) diff --git a/docs/reference/indices/field-usage-stats.asciidoc b/docs/reference/indices/field-usage-stats.asciidoc index ae89c8dab9a86..b48d17cb1c2a9 100644 --- a/docs/reference/indices/field-usage-stats.asciidoc +++ b/docs/reference/indices/field-usage-stats.asciidoc @@ -129,7 +129,7 @@ The API returns the following response: "points" : 0, "norms" : 1, "term_vectors" : 0, - "vectors" : 0 + "knn_vectors" : 0 }, "fields": { "_id": { @@ -148,7 +148,7 @@ The API returns the following response: "points" : 0, "norms" : 0, "term_vectors" : 0, - "vectors" : 0 + "knn_vectors" : 0 }, "_source": {...}, "context": {...}, diff --git a/server/src/main/java/org/elasticsearch/index/search/stats/FieldUsageStats.java b/server/src/main/java/org/elasticsearch/index/search/stats/FieldUsageStats.java index 9669da82c7c3e..4f068591be42b 100644 --- a/server/src/main/java/org/elasticsearch/index/search/stats/FieldUsageStats.java +++ b/server/src/main/java/org/elasticsearch/index/search/stats/FieldUsageStats.java @@ -40,7 +40,7 @@ public class FieldUsageStats implements ToXContentObject, Writeable { public static final String TERM_VECTORS = "term_vectors"; // possibly refine this one public static final String POINTS = "points"; public static final String PROXIMITY = "proximity"; - public static final String VECTORS = "vectors"; + public static final String KNN_VECTORS = "knn_vectors"; private final Map stats; @@ -124,7 +124,7 @@ public enum UsageContext { PAYLOADS, TERM_VECTORS, // possibly refine this one POINTS, - VECTORS, + KNN_VECTORS, } public static class PerFieldUsageStats implements ToXContentFragment, Writeable { @@ -144,7 +144,7 @@ public static class PerFieldUsageStats implements ToXContentFragment, Writeable private final long payloads; private final long termVectors; private final long points; - private final long vectors; + private final long knnVectors; public PerFieldUsageStats( long any, @@ -160,7 +160,7 @@ public PerFieldUsageStats( long payloads, long termVectors, long points, - long vectors + long knnVectors ) { this.any = any; this.proximity = proximity; @@ -175,7 +175,7 @@ public PerFieldUsageStats( this.payloads = payloads; this.termVectors = termVectors; this.points = points; - this.vectors = vectors; + this.knnVectors = knnVectors; } private PerFieldUsageStats add(PerFieldUsageStats other) { @@ -193,7 +193,7 @@ private PerFieldUsageStats add(PerFieldUsageStats other) { payloads + other.payloads, termVectors + other.termVectors, points + other.points, - vectors + other.vectors + knnVectors + other.knnVectors ); } @@ -212,9 +212,9 @@ public PerFieldUsageStats(StreamInput in) throws IOException { termVectors = in.readVLong(); points = in.readVLong(); if (in.getVersion().onOrAfter(Version.V_8_1_0)) { - vectors = in.readVLong(); + knnVectors = in.readVLong(); } else { - vectors = 0; + knnVectors = 0; } } @@ -234,7 +234,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeVLong(termVectors); out.writeVLong(points); if (out.getVersion().onOrAfter(Version.V_8_1_0)) { - out.writeVLong(vectors); + out.writeVLong(knnVectors); } } @@ -255,7 +255,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(POINTS, points); builder.field(NORMS, norms); builder.field(TERM_VECTORS, termVectors); - builder.field(VECTORS, vectors); + builder.field(KNN_VECTORS, knnVectors); return builder; } @@ -294,8 +294,8 @@ public Set keySet() { if (points > 0L) { set.add(UsageContext.POINTS); } - if (vectors > 0L) { - set.add(UsageContext.VECTORS); + if (knnVectors > 0L) { + set.add(UsageContext.KNN_VECTORS); } return set; } @@ -344,8 +344,8 @@ public long getPoints() { return points; } - public long getVectors() { - return vectors; + public long getKnnVectors() { + return knnVectors; } public long getProximity() { diff --git a/server/src/main/java/org/elasticsearch/index/search/stats/ShardFieldUsageTracker.java b/server/src/main/java/org/elasticsearch/index/search/stats/ShardFieldUsageTracker.java index 40acf896a27cd..e4e2c3e80a926 100644 --- a/server/src/main/java/org/elasticsearch/index/search/stats/ShardFieldUsageTracker.java +++ b/server/src/main/java/org/elasticsearch/index/search/stats/ShardFieldUsageTracker.java @@ -62,7 +62,7 @@ public FieldUsageStats stats(String... fields) { ifs.payloads.longValue(), ifs.termVectors.longValue(), ifs.points.longValue(), - ifs.vectors.longValue() + ifs.knnVectors.longValue() ); stats.put(entry.getKey(), pf); } @@ -84,7 +84,7 @@ static class InternalFieldStats { final LongAdder payloads = new LongAdder(); final LongAdder termVectors = new LongAdder(); final LongAdder points = new LongAdder(); - final LongAdder vectors = new LongAdder(); + final LongAdder knnVectors = new LongAdder(); } static class PerField { @@ -100,7 +100,7 @@ static class PerField { volatile boolean payloads; volatile boolean termVectors; volatile boolean points; - volatile boolean vectors; + volatile boolean knnVectors; } public class FieldUsageStatsTrackingSession implements FieldUsageNotifier, Releasable { @@ -162,9 +162,9 @@ public void close() { any = true; fieldStats.termVectors.increment(); } - if (pf.vectors) { + if (pf.knnVectors) { any = true; - fieldStats.vectors.increment(); + fieldStats.knnVectors.increment(); } if (any) { fieldStats.any.increment(); @@ -236,8 +236,8 @@ public void onTermVectorsUsed(String field) { } @Override - public void onVectorsUsed(String field) { - getOrAdd(field).vectors = true; + public void onKnnVectorsUsed(String field) { + getOrAdd(field).knnVectors = true; } } } diff --git a/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java b/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java index 70e5273cdec2b..c4cf55f3113b3 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java +++ b/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java @@ -87,7 +87,7 @@ public interface FieldUsageNotifier { void onTermVectorsUsed(String field); - void onVectorsUsed(String field); + void onKnnVectorsUsed(String field); } public static final class FieldUsageTrackingLeafReader extends SequentialStoredFieldsLeafReader { @@ -190,7 +190,7 @@ public NumericDocValues getNormValues(String field) throws IOException { public VectorValues getVectorValues(String field) throws IOException { VectorValues vectorValues = super.getVectorValues(field); if (vectorValues != null) { - notifier.onVectorsUsed(field); + notifier.onKnnVectorsUsed(field); } return vectorValues; } @@ -199,7 +199,7 @@ public VectorValues getVectorValues(String field) throws IOException { public TopDocs searchNearestVectors(String field, float[] target, int k, Bits acceptDocs) throws IOException { TopDocs topDocs = super.searchNearestVectors(field, target, k, acceptDocs); if (topDocs != null) { - notifier.onVectorsUsed(field); + notifier.onKnnVectorsUsed(field); } return topDocs; } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/50_dense_vector_field_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/50_dense_vector_field_usage.yml index 29cef52131deb..004c815556fe1 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/50_dense_vector_field_usage.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/vectors/50_dense_vector_field_usage.yml @@ -26,9 +26,6 @@ setup: body: name: cow.jpg vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ] - comments: - - body: "free the cows" - vector: [ 0.75, 100.0, 0.33, 16.2, -10.2 ] - do: index: @@ -37,9 +34,6 @@ setup: body: name: moose.jpg vector: [ -0.5, 100.0, -13, 14.8, -156.0 ] - comments: - - body: "what a great moose" - vector: [ 11.4, 99.0, 1.55, -2.9, -10.2 ] - do: index: @@ -78,10 +72,10 @@ setup: - length: { futest.shards: 1 } - gt: { futest.shards.0.stats.all_fields.any: 0 } - - gt: { futest.shards.0.stats.all_fields.vectors: 0 } + - gt: { futest.shards.0.stats.all_fields.knn_vectors: 0 } - gt: { futest.shards.0.stats.fields.vector.any: 0 } - - gt: { futest.shards.0.stats.fields.vector.vectors: 0 } + - gt: { futest.shards.0.stats.fields.vector.knn_vectors: 0 } - gt: { futest.shards.0.stats.fields._id.stored_fields: 0 } - - match: { futest.shards.0.stats.fields._id.vectors: 0 } + - match: { futest.shards.0.stats.fields._id.knn_vectors: 0 }