Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions docs/reference/indices/field-usage-stats.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ The API returns the following response:
"doc_values" : 1,
"points" : 0,
"norms" : 1,
"term_vectors" : 0
"term_vectors" : 0,
"knn_vectors" : 0
},
"fields": {
"_id": {
Expand All @@ -146,7 +147,8 @@ The API returns the following response:
"doc_values" : 0,
"points" : 0,
"norms" : 0,
"term_vectors" : 0
"term_vectors" : 0,
"knn_vectors" : 0
},
"_source": {...},
"context": {...},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.elasticsearch.index.search.stats;

import org.elasticsearch.Version;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
Expand Down Expand Up @@ -39,6 +40,7 @@ public class FieldUsageStats implements ToXContentObject, Writeable {
public static final String TERM_VECTORS = "term_vectors"; // possibly refine this one
public static final String POINTS = "points";
public static final String PROXIMITY = "proximity";
public static final String KNN_VECTORS = "knn_vectors";

private final Map<String, PerFieldUsageStats> stats;

Expand Down Expand Up @@ -122,11 +124,12 @@ public enum UsageContext {
PAYLOADS,
TERM_VECTORS, // possibly refine this one
POINTS,
KNN_VECTORS,
}

public static class PerFieldUsageStats implements ToXContentFragment, Writeable {

static final PerFieldUsageStats EMPTY = new PerFieldUsageStats(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
static final PerFieldUsageStats EMPTY = new PerFieldUsageStats(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);

private final long any;
private final long proximity;
Expand All @@ -141,6 +144,7 @@ public static class PerFieldUsageStats implements ToXContentFragment, Writeable
private final long payloads;
private final long termVectors;
private final long points;
private final long knnVectors;

public PerFieldUsageStats(
long any,
Expand All @@ -155,7 +159,8 @@ public PerFieldUsageStats(
long norms,
long payloads,
long termVectors,
long points
long points,
long knnVectors
) {
this.any = any;
this.proximity = proximity;
Expand All @@ -170,6 +175,7 @@ public PerFieldUsageStats(
this.payloads = payloads;
this.termVectors = termVectors;
this.points = points;
this.knnVectors = knnVectors;
}

private PerFieldUsageStats add(PerFieldUsageStats other) {
Expand All @@ -186,7 +192,8 @@ private PerFieldUsageStats add(PerFieldUsageStats other) {
norms + other.norms,
payloads + other.payloads,
termVectors + other.termVectors,
points + other.points
points + other.points,
knnVectors + other.knnVectors
);
}

Expand All @@ -204,6 +211,11 @@ public PerFieldUsageStats(StreamInput in) throws IOException {
payloads = in.readVLong();
termVectors = in.readVLong();
points = in.readVLong();
if (in.getVersion().onOrAfter(Version.V_8_1_0)) {
knnVectors = in.readVLong();
} else {
knnVectors = 0;
}
}

@Override
Expand All @@ -221,6 +233,9 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeVLong(payloads);
out.writeVLong(termVectors);
out.writeVLong(points);
if (out.getVersion().onOrAfter(Version.V_8_1_0)) {
out.writeVLong(knnVectors);
}
}

@Override
Expand All @@ -240,6 +255,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
builder.field(POINTS, points);
builder.field(NORMS, norms);
builder.field(TERM_VECTORS, termVectors);
builder.field(KNN_VECTORS, knnVectors);
return builder;
}

Expand Down Expand Up @@ -278,6 +294,9 @@ public Set<UsageContext> keySet() {
if (points > 0L) {
set.add(UsageContext.POINTS);
}
if (knnVectors > 0L) {
set.add(UsageContext.KNN_VECTORS);
}
return set;
}

Expand Down Expand Up @@ -325,6 +344,10 @@ public long getPoints() {
return points;
}

public long getKnnVectors() {
return knnVectors;
}

public long getProximity() {
return proximity;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ public FieldUsageStats stats(String... fields) {
ifs.norms.longValue(),
ifs.payloads.longValue(),
ifs.termVectors.longValue(),
ifs.points.longValue()
ifs.points.longValue(),
ifs.knnVectors.longValue()
);
stats.put(entry.getKey(), pf);
}
Expand All @@ -83,6 +84,7 @@ static class InternalFieldStats {
final LongAdder payloads = new LongAdder();
final LongAdder termVectors = new LongAdder();
final LongAdder points = new LongAdder();
final LongAdder knnVectors = new LongAdder();
}

static class PerField {
Expand All @@ -98,6 +100,7 @@ static class PerField {
volatile boolean payloads;
volatile boolean termVectors;
volatile boolean points;
volatile boolean knnVectors;
}

public class FieldUsageStatsTrackingSession implements FieldUsageNotifier, Releasable {
Expand Down Expand Up @@ -159,6 +162,10 @@ public void close() {
any = true;
fieldStats.termVectors.increment();
}
if (pf.knnVectors) {
any = true;
fieldStats.knnVectors.increment();
}
if (any) {
fieldStats.any.increment();
}
Expand Down Expand Up @@ -227,5 +234,10 @@ public void onPointsUsed(String field) {
public void onTermVectorsUsed(String field) {
getOrAdd(field).termVectors = true;
}

@Override
public void onKnnVectorsUsed(String field) {
getOrAdd(field).knnVectors = true;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.suggest.document.CompletionTerms;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader;
Expand Down Expand Up @@ -83,6 +86,8 @@ public interface FieldUsageNotifier {
void onPointsUsed(String field);

void onTermVectorsUsed(String field);

void onKnnVectorsUsed(String field);
}

public static final class FieldUsageTrackingLeafReader extends SequentialStoredFieldsLeafReader {
Expand Down Expand Up @@ -181,6 +186,24 @@ public NumericDocValues getNormValues(String field) throws IOException {
return numericDocValues;
}

@Override
public VectorValues getVectorValues(String field) throws IOException {
VectorValues vectorValues = super.getVectorValues(field);
if (vectorValues != null) {
notifier.onKnnVectorsUsed(field);
}
return vectorValues;
}

@Override
public TopDocs searchNearestVectors(String field, float[] target, int k, Bits acceptDocs) throws IOException {
TopDocs topDocs = super.searchNearestVectors(field, target, k, acceptDocs);
if (topDocs != null) {
notifier.onKnnVectorsUsed(field);
}
return topDocs;
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("FieldUsageTrackingLeafReader(reader=");
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
setup:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test might need a skip section for <8.1 for mixed cluster tests.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, don't really understand why this passes test. Maybe we don't run these in x-pack on mixed clusters?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, this validates my understanding (I deliberately did not add the version constraint to check if anything breaks on CI). I find the findings very worrying though.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good to see that we both expected this to fail in some way. Maybe worth some parallel investigation, I'd be interested in the results.

Copy link
Contributor

@mark-vieira mark-vieira Nov 11, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems we don't have "mixed cluster" tests for x-pack like we do for the core apis. So we simply aren't running this test under that scenario at all. See #31096

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow, that's scary (and lol for linking to my own issue, guess my memory is failing me).

Perhaps that one needs to be reprioritized!

Copy link
Contributor

@mark-vieira mark-vieira Nov 11, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The issue seems to be tagged as core/infra but I'm not sure if that's strictly correct. There's no real clear ownership of these large cross-functional test suites. It's really just a matter of adding a qa project for this under x-pack similar to the existing core one.

- skip:
features: headers

- do:
indices.create:
index: futest
body:
settings:
routing.rebalance.enable: none
index.number_of_shards: 1
index.number_of_replicas: 0
mappings:
properties:
name:
type: keyword
vector:
type: dense_vector
dims: 5
index: true
similarity: l2_norm

- do:
index:
index: futest
body:
name: cow.jpg
vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ]

- do:
index:
index: futest
id: 2
body:
name: moose.jpg
vector: [ -0.5, 100.0, -13, 14.8, -156.0 ]

- do:
index:
index: futest
id: 3
body:
name: rabbit.jpg
vector: [ 0.5, 111.3, -13.0, 14.8, -156.0 ]

- do:
indices.refresh: { }

---
"Field usage":
- do:
knn_search:
index: futest
body:
fields: [ "name" ]
knn:
field: vector
query_vector: [-0.5, 90.0, -10, 14.8, -156.0]
k: 2
num_candidates: 3

- match: {hits.hits.0._id: "2"}
- match: {hits.hits.0.fields.name.0: "moose.jpg"}

- match: {hits.hits.1._id: "3"}
- match: {hits.hits.1.fields.name.0: "rabbit.jpg"}

- do:
indices.field_usage_stats: { index: futest }

- is_true: futest
- length: { futest.shards: 1 }

- gt: { futest.shards.0.stats.all_fields.any: 0 }
- gt: { futest.shards.0.stats.all_fields.knn_vectors: 0 }

- gt: { futest.shards.0.stats.fields.vector.any: 0 }
- gt: { futest.shards.0.stats.fields.vector.knn_vectors: 0 }

- gt: { futest.shards.0.stats.fields._id.stored_fields: 0 }
- match: { futest.shards.0.stats.fields._id.knn_vectors: 0 }