Skip to content

Commit 6d4673f

Browse files
Isabel Drost-Frommcbuescher
authored andcommitted
Initial commit for Module to compute metrics on queries
This is an initial squashed commit of the work on a new feature for query metrics proposed in #18798.
1 parent 5903966 commit 6d4673f

File tree

22 files changed

+1540
-0
lines changed

22 files changed

+1540
-0
lines changed

core/src/main/java/org/elasticsearch/client/transport/support/TransportProxyClient.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.elasticsearch.common.settings.Settings;
3333
import org.elasticsearch.transport.TransportService;
3434

35+
import java.util.Collections;
3536
import java.util.HashMap;
3637
import java.util.Map;
3738

modules/rank-eval/build.gradle

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
esplugin {
21+
description 'The Rank Eval module adds APIs to evaluate ranking quality.'
22+
classname 'org.elasticsearch.index.rankeval.RankEvalPlugin'
23+
}
24+
25+
integTest {
26+
cluster {
27+
setting 'script.inline', 'true'
28+
setting 'script.stored', 'true'
29+
}
30+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.index.rankeval;
21+
22+
import java.util.Collection;
23+
24+
/** Returned for each search intent and search specification combination. Summarises the document ids found that were not
25+
* annotated and the average precision of result sets in each particular combination based on the annotations given.
26+
* */
27+
public class EvalQueryQuality {
28+
private double qualityLevel;
29+
30+
private Collection<String> unknownDocs;
31+
32+
public EvalQueryQuality (double qualityLevel, Collection<String> unknownDocs) {
33+
this.qualityLevel = qualityLevel;
34+
this.unknownDocs = unknownDocs;
35+
}
36+
37+
public Collection<String> getUnknownDocs() {
38+
return unknownDocs;
39+
}
40+
41+
public double getQualityLevel() {
42+
return qualityLevel;
43+
}
44+
45+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.index.rankeval;
21+
22+
import org.elasticsearch.common.io.stream.NamedWriteable;
23+
import org.elasticsearch.search.SearchHit;
24+
25+
public interface Evaluator extends NamedWriteable {
26+
27+
public Object evaluate(SearchHit[] hits, RatedQuery intent);
28+
}
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.index.rankeval;
21+
22+
import org.elasticsearch.common.io.stream.StreamInput;
23+
import org.elasticsearch.common.io.stream.StreamOutput;
24+
import org.elasticsearch.search.SearchHit;
25+
26+
import java.io.IOException;
27+
import java.util.ArrayList;
28+
import java.util.Collection;
29+
import java.util.Map;
30+
import java.util.Map.Entry;
31+
32+
import javax.naming.directory.SearchResult;
33+
34+
/**
35+
* Evaluate Precision at N, N being the number of search results to consider for precision calculation.
36+
*
37+
* Documents of unkonwn quality are ignored in the precision at n computation and returned by document id.
38+
* */
39+
public class PrecisionAtN implements RankedListQualityMetric {
40+
41+
/** Number of results to check against a given set of relevant results. */
42+
private int n;
43+
44+
public static final String NAME = "precisionatn";
45+
46+
public PrecisionAtN(StreamInput in) throws IOException {
47+
n = in.readInt();
48+
}
49+
50+
@Override
51+
public void writeTo(StreamOutput out) throws IOException {
52+
out.writeInt(n);
53+
}
54+
55+
@Override
56+
public String getWriteableName() {
57+
return NAME;
58+
}
59+
60+
/**
61+
* Initialises n with 10
62+
* */
63+
public PrecisionAtN() {
64+
this.n = 10;
65+
}
66+
67+
/**
68+
* @param n number of top results to check against a given set of relevant results.
69+
* */
70+
public PrecisionAtN(int n) {
71+
this.n= n;
72+
}
73+
74+
/**
75+
* Return number of search results to check for quality.
76+
* */
77+
public int getN() {
78+
return n;
79+
}
80+
81+
/** Compute precisionAtN based on provided relevant document IDs.
82+
* @return precision at n for above {@link SearchResult} list.
83+
**/
84+
@Override
85+
public EvalQueryQuality evaluate(SearchHit[] hits, RatedQuery intent) {
86+
Map<String, Integer> ratedDocIds = intent.getRatedDocuments();
87+
88+
Collection<String> relevantDocIds = new ArrayList<>();
89+
for (Entry<String, Integer> entry : ratedDocIds.entrySet()) {
90+
if (Rating.RELEVANT.equals(RatingMapping.mapTo(entry.getValue()))) {
91+
relevantDocIds.add(entry.getKey());
92+
}
93+
}
94+
95+
Collection<String> irrelevantDocIds = new ArrayList<>();
96+
for (Entry<String, Integer> entry : ratedDocIds.entrySet()) {
97+
if (Rating.IRRELEVANT.equals(RatingMapping.mapTo(entry.getValue()))) {
98+
irrelevantDocIds.add(entry.getKey());
99+
}
100+
}
101+
102+
int good = 0;
103+
int bad = 0;
104+
Collection<String> unknownDocIds = new ArrayList<String>();
105+
for (int i = 0; (i < n && i < hits.length); i++) {
106+
String id = hits[i].getId();
107+
if (relevantDocIds.contains(id)) {
108+
good++;
109+
} else if (irrelevantDocIds.contains(id)) {
110+
bad++;
111+
} else {
112+
unknownDocIds.add(id);
113+
}
114+
}
115+
116+
double precision = (double) good / (good + bad);
117+
118+
return new EvalQueryQuality(precision, unknownDocIds);
119+
}
120+
121+
public enum Rating {
122+
RELEVANT, IRRELEVANT;
123+
}
124+
125+
/**
126+
* Needed to get the enum accross serialisation boundaries.
127+
* */
128+
public static class RatingMapping {
129+
public static Integer mapFrom(Rating rating) {
130+
if (Rating.RELEVANT.equals(rating)) {
131+
return 0;
132+
}
133+
return 1;
134+
}
135+
136+
public static Rating mapTo(Integer rating) {
137+
if (rating == 0) {
138+
return Rating.RELEVANT;
139+
}
140+
return Rating.IRRELEVANT;
141+
}
142+
}
143+
144+
}
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.index.rankeval;
21+
22+
import org.elasticsearch.common.io.stream.StreamInput;
23+
import org.elasticsearch.common.io.stream.StreamOutput;
24+
import org.elasticsearch.common.io.stream.Writeable;
25+
import org.elasticsearch.search.builder.SearchSourceBuilder;
26+
27+
import java.io.IOException;
28+
import java.util.ArrayList;
29+
import java.util.List;
30+
31+
/**
32+
* Defines a QA specification: All end user supplied query intents will be mapped to the search request specified in this search request
33+
* template and executed against the targetIndex given. Any filters that should be applied in the target system can be specified as well.
34+
*
35+
* The resulting document lists can then be compared against what was specified in the set of rated documents as part of a QAQuery.
36+
* */
37+
public class QuerySpec implements Writeable {
38+
39+
private int specId = 0;
40+
private SearchSourceBuilder testRequest;
41+
private List<String> indices = new ArrayList<>();
42+
private List<String> types = new ArrayList<>();
43+
44+
public QuerySpec(
45+
int specId, SearchSourceBuilder testRequest, List<String> indices, List<String> types) {
46+
this.specId = specId;
47+
this.testRequest = testRequest;
48+
this.indices = indices;
49+
this.types = types;
50+
}
51+
52+
public QuerySpec(StreamInput in) throws IOException {
53+
this.specId = in.readInt();
54+
testRequest = new SearchSourceBuilder(in);
55+
int indicesSize = in.readInt();
56+
indices = new ArrayList<String>(indicesSize);
57+
for (int i = 0; i < indicesSize; i++) {
58+
this.indices.add(in.readString());
59+
}
60+
int typesSize = in.readInt();
61+
types = new ArrayList<String>(typesSize);
62+
for (int i = 0; i < typesSize; i++) {
63+
this.types.add(in.readString());
64+
}
65+
}
66+
67+
@Override
68+
public void writeTo(StreamOutput out) throws IOException {
69+
out.writeInt(specId);
70+
testRequest.writeTo(out);
71+
out.writeInt(indices.size());
72+
for (String index : indices) {
73+
out.writeString(index);
74+
}
75+
out.writeInt(types.size());
76+
for (String type : types) {
77+
out.writeString(type);
78+
}
79+
}
80+
81+
public SearchSourceBuilder getTestRequest() {
82+
return testRequest;
83+
}
84+
85+
public void setTestRequest(SearchSourceBuilder testRequest) {
86+
this.testRequest = testRequest;
87+
}
88+
89+
public List<String> getIndices() {
90+
return indices;
91+
}
92+
93+
public void setIndices(List<String> indices) {
94+
this.indices = indices;
95+
}
96+
97+
public List<String> getTypes() {
98+
return types;
99+
}
100+
101+
public void setTypes(List<String> types) {
102+
this.types = types;
103+
}
104+
105+
/** Returns a user supplied spec id for easier referencing. */
106+
public int getSpecId() {
107+
return specId;
108+
}
109+
110+
/** Sets a user supplied spec id for easier referencing. */
111+
public void setSpecId(int specId) {
112+
this.specId = specId;
113+
}
114+
}

0 commit comments

Comments
 (0)