From 7c84f6541c0fb53e580b0691ec4c582f1e914773 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Thu, 3 Feb 2022 19:08:05 +0100 Subject: [PATCH 1/5] Enable `_terms_enum` on ip fields The _terms_enum API currently does not support ip fields. However, type-ahead-like completion is useful for UI purposes. This change adds the ability to query `ip` fields via the _terms_enum API by leveraging the terms enumeration available when doc_values are enabled on the field, which is the default. In order to make prefix filtering fast, we internally create a fast prefix automaton from the user-supplied prefix that gets intersected with the shards terms enumeration, similar to what we do for keyword fields already. Closes #89933 --- docs/reference/search/terms-enum.asciidoc | 4 +- .../index/mapper/IpFieldMapper.java | 35 ++- .../index/mapper/IpPrefixAutomatonUtil.java | 223 ++++++++++++++++++ .../index/mapper/KeywordFieldMapper.java | 107 --------- .../index/mapper/SortedSetDocValuesTerms.java | 126 ++++++++++ .../mapper/IpPrefixAutomatonUtilTests.java | 217 +++++++++++++++++ .../xpack/core/termsenum/TermsEnumTests.java | 180 ++++++++++++++ 7 files changed, 782 insertions(+), 110 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtil.java create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/SortedSetDocValuesTerms.java create mode 100644 server/src/test/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtilTests.java create mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/termsenum/TermsEnumTests.java diff --git a/docs/reference/search/terms-enum.asciidoc b/docs/reference/search/terms-enum.asciidoc index 3b694a6d5142c..c0abe6858258e 100644 --- a/docs/reference/search/terms-enum.asciidoc +++ b/docs/reference/search/terms-enum.asciidoc @@ -6,8 +6,8 @@ The terms enum API can be used to discover terms in the index that match a partial string. Supported field types are <>, -<>, <> -and <>. This is used for auto-complete: +<>, <>, +<> and <>. This is used for auto-complete: [source,console] -------------------------------------------------- diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java index 68b6fc55c7380..ab695063e1227 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java @@ -12,12 +12,16 @@ import org.apache.lucene.document.InetAddressPoint; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.CompiledAutomaton; import org.elasticsearch.Version; import org.elasticsearch.common.logging.DeprecationCategory; import org.elasticsearch.common.logging.DeprecationLogger; @@ -49,7 +53,11 @@ import java.util.Objects; import java.util.function.BiFunction; -/** A {@link FieldMapper} for ip addresses. */ +import static org.elasticsearch.index.mapper.IpPrefixAutomatonUtil.buildIpPrefixAutomaton; + +/** + * A {@link FieldMapper} for ip addresses. + */ public class IpFieldMapper extends FieldMapper { private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(IpFieldMapper.class); @@ -417,6 +425,31 @@ public DocValueFormat docValueFormat(@Nullable String format, ZoneId timeZone) { return DocValueFormat.IP; } + @Override + public TermsEnum getTerms(IndexReader reader, String prefix, boolean caseInsensitive, String searchAfter) throws IOException { + + Terms terms = null; + // terms_enum for ip only works if doc values are enabled + if (hasDocValues()) { + terms = SortedSetDocValuesTerms.getTerms(reader, name()); + } + if (terms == null) { + // Field does not exist on this shard. + return null; + } + BytesRef searchBytes = searchAfter == null ? null : new BytesRef(InetAddressPoint.encode(InetAddress.getByName(searchAfter))); + CompiledAutomaton prefixAutomaton = buildIpPrefixAutomaton(prefix); + + if (prefixAutomaton.type == CompiledAutomaton.AUTOMATON_TYPE.ALL) { + TermsEnum result = terms.iterator(); + if (searchAfter != null) { + result = new SearchAfterTermsEnum(result, searchBytes); + } + return result; + } + return terms.intersect(prefixAutomaton, searchBytes); + } + /** * @return true if field has been marked as a dimension field */ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtil.java b/server/src/main/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtil.java new file mode 100644 index 0000000000000..d03c53974f540 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtil.java @@ -0,0 +1,223 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.Automata; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.CompiledAutomaton; +import org.apache.lucene.util.automaton.MinimizationOperations; +import org.apache.lucene.util.automaton.Operations; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.IntStream; + +import static org.apache.lucene.util.automaton.Operations.concatenate; + +/** + * This class contains utility functionality to build an Automaton based + * on a prefix String on an `ip` field. + */ +public class IpPrefixAutomatonUtil { + + private static final Automaton EMPTY_AUTOMATON = Automata.makeEmpty(); + private static final Automaton IPV4_PREFIX = Automata.makeBinary(new BytesRef(new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1 })); + + static final Map INCOMPLETE_IP4_GROUP_AUTOMATON_LOOKUP = new HashMap<>(); + static { + for (int c = 0; c <= 255; c++) { + Automaton a = Automata.makeChar(c); + if (c > 0 && c < 10) { + // all one digit prefixes expand to the two digit range, i.e. 1 -> [10..19] + a = Operations.union(a, Automata.makeCharRange(c * 10, c * 10 + 9)); + // 1 and 2 even to three digit ranges + if (c == 1) { + a = Operations.union(a, Automata.makeCharRange(100, 199)); + } + if (c == 2) { + a = Operations.union(a, Automata.makeCharRange(200, 255)); + } + } + if (c >= 10 && c < 26) { + int min = c * 10; + int max = Math.min(c * 10 + 9, 255); + a = Operations.union(a, Automata.makeCharRange(min, max)); + } + INCOMPLETE_IP4_GROUP_AUTOMATON_LOOKUP.put(c, a); + } + } + + /** + * Create a {@link CompiledAutomaton} from the ip Prefix. + * If the prefix is empty, the automaton returned will accept everything. + */ + static CompiledAutomaton buildIpPrefixAutomaton(String ipPrefix) { + Automaton result; + if (ipPrefix.isEmpty() == false) { + Automaton ipv4Automaton = createIp4Automaton(ipPrefix); + if (ipv4Automaton != null) { + ipv4Automaton = concatenate(IPV4_PREFIX, ipv4Automaton); + } + Automaton ipv6Automaton = getIpv6Automaton(ipPrefix); + result = Operations.union(ipv4Automaton, ipv6Automaton); + } else { + result = Automata.makeAnyBinary(); + } + result = MinimizationOperations.minimize(result, Integer.MAX_VALUE); + return new CompiledAutomaton(result, null, false, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, true); + } + + private static Automaton getIpv6Automaton(String ipPrefix) { + Automaton ipv6Automaton = EMPTY_AUTOMATON; + List ip6Groups = parseIp6Prefix(ipPrefix); + if (ip6Groups.isEmpty() == false) { + ipv6Automaton = Automata.makeString(""); + int groupsAdded = 0; + for (String group : ip6Groups) { + if (group.contains(".")) { + // try to parse this as ipv4 ending part, but only if we already have some ipv6 specific stuff in front + if (groupsAdded > 0) { + ipv6Automaton = concatenate(ipv6Automaton, createIp4Automaton(group)); + groupsAdded += 2; // this counts as two bytes, missing bytes are padded already + } else { + return EMPTY_AUTOMATON; + } + } else if (group.endsWith(":")) { + groupsAdded++; + // full block + if (group.length() > 1) { + group = group.substring(0, group.length() - 1); + ipv6Automaton = concatenate(ipv6Automaton, automatonFromIPv6Group(padWithZeros(group, 4 - group.length()))); + } else { + // single colon denotes left out zeros + ipv6Automaton = concatenate(ipv6Automaton, Operations.repeat(Automata.makeChar(0))); + } + } else { + groupsAdded++; + // partial block, we need to create all possibilities of byte sequences this could match + ipv6Automaton = concatenate(ipv6Automaton, automatonFromIPv6Group(group)); + } + } + // fill up the remainder of the 16 address bytes with wildcard matches, each group added so far counts for two bytes + for (int i = 0; i < 16 - groupsAdded * 2; i++) { + ipv6Automaton = concatenate(ipv6Automaton, Operations.optional(Automata.makeCharRange(0, 255))); + } + } + return ipv6Automaton; + } + + static Automaton automatonFromIPv6Group(String ipv6Group) { + assert ipv6Group.length() > 0 && ipv6Group.length() <= 4 : "expected a full ipv6 group or prefix"; + Automaton result = Automata.makeString(""); + for (int leadingZeros = 0; leadingZeros <= 4 - ipv6Group.length(); leadingZeros++) { + int bytesAdded = 0; + String padded = padWithZeros(ipv6Group, leadingZeros); + Automaton a = Automata.makeString(""); + while (padded.length() >= 2) { + a = concatenate(a, Automata.makeChar(Integer.parseInt(padded.substring(0, 2), 16))); + padded = padded.substring(2); + bytesAdded++; + } + if (padded.length() == 1) { + int value = Integer.parseInt(padded, 16); + a = concatenate(a, Operations.union(Automata.makeChar(value), Automata.makeCharRange(value * 16, value * 16 + 15))); + bytesAdded++; + } + if (bytesAdded != 2) { + a = concatenate(a, Automata.makeCharRange(0, 255)); + } + result = Operations.union(result, a); + } + return result; + } + + private static Pattern IPV4_GROUP_MATCHER = Pattern.compile( + "^((?:0|[1-9][0-9]{0,2})\\.)?" + "((?:0|[1-9][0-9]{0,2})\\.)?" + "((?:0|[1-9][0-9]{0,2})\\.)?" + "((?:0|[1-9][0-9]{0,2}))?$" + ); + + /** + * Creates an {@link Automaton} that accepts all ipv4 address byte representation + * that start with the given prefix. If the prefix is no valid ipv4 prefix, an automaton + * that accepts the empty language is returned. + */ + static Automaton createIp4Automaton(String prefix) { + Matcher ip4Matcher = IPV4_GROUP_MATCHER.matcher(prefix); + if (ip4Matcher.matches() == false) { + return EMPTY_AUTOMATON; + } + int prefixBytes = 0; + byte[] completeByteGroups = new byte[4]; + int completeBytes = 0; + // scan the groups the prefix matches + Automaton incompleteGroupAutomaton = Automata.makeString(""); + for (int g = 1; g <= 4; g++) { + String group = ip4Matcher.group(g); + // note that intermediate groups might not match anything and can be empty + if (group != null) { + if (group.endsWith(".")) { + // complete group found + int value = Integer.parseInt(group.substring(0, group.length() - 1)); + if (value < 0 || value > 255) { + // invalid value, append the empty result to the current one to make it match nothing + return EMPTY_AUTOMATON; + } else { + completeByteGroups[completeBytes] = (byte) value; + completeBytes++; + prefixBytes++; + } + } else { + // if present, this is the last group + incompleteGroupAutomaton = INCOMPLETE_IP4_GROUP_AUTOMATON_LOOKUP.get(Integer.parseInt(group)); + prefixBytes++; + } + } + } + return concatenate( + List.of( + Automata.makeBinary(new BytesRef(completeByteGroups, 0, completeBytes)), + incompleteGroupAutomaton, + Operations.repeat(Automata.makeCharRange(0, 255), 4 - prefixBytes, 4 - prefixBytes) + ) + ); + } + + private static String padWithZeros(String input, int leadingZeros) { + return new StringBuilder("0".repeat(leadingZeros)).append(input).toString(); + } + + private static Pattern IP6_BLOCK_MATCHER = Pattern.compile( + "([a-f0-9]{0,4}:)|([a-f0-9]{1,4}$)" // the ipv6 specific notation + + "|((?:(?:0|[1-9][0-9]{0,2})\\.){1,3}(?:0|[1-9][0-9]{0,2})?$)" // the optional ipv4 part + ); + + static List parseIp6Prefix(String ip6Prefix) { + Matcher ip6blockMatcher = IP6_BLOCK_MATCHER.matcher(ip6Prefix); + int position = 0; + List groups = new ArrayList<>(); + while (ip6blockMatcher.find(position)) { + if (ip6blockMatcher.start() == position) { + position = ip6blockMatcher.end(); + IntStream.rangeClosed(1, 3).mapToObj(i -> ip6blockMatcher.group(i)).filter(s -> s != null).forEach(groups::add); + } else { + return Collections.emptyList(); + } + } + if (position != ip6Prefix.length()) { + // no full match, return empty list + return Collections.emptyList(); + } + return groups; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 31a9995f09e0a..04ab0af646af3 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -21,8 +21,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiTerms; -import org.apache.lucene.index.ReaderSlice; -import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.sandbox.search.DocValuesTermsQuery; @@ -68,11 +66,9 @@ import java.io.IOException; import java.io.UncheckedIOException; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Objects; @@ -558,109 +554,6 @@ public TermsEnum getTerms(IndexReader reader, String prefix, boolean caseInsensi return terms.intersect(automaton, searchBytes); } - /** - * A simple terms implementation for SortedSetDocValues that only provides access to {@link TermsEnum} via - * {@link #iterator} and {@link #intersect(CompiledAutomaton, BytesRef)} methods. - * We have this custom implementation based on {@link MultiTerms} instead of using - * {@link org.apache.lucene.index.MultiDocValues#getSortedSetValues(IndexReader, String)} - * because {@link org.apache.lucene.index.MultiDocValues} builds global ordinals up-front whereas - * {@link MultiTerms}, which exposes the terms enum via {@link org.apache.lucene.index.MultiTermsEnum}, - * merges terms on the fly. - */ - static class SortedSetDocValuesTerms extends Terms { - - public static Terms getTerms(IndexReader r, String field) throws IOException { - final List leaves = r.leaves(); - if (leaves.size() == 1) { - SortedSetDocValues sortedSetDocValues = leaves.get(0).reader().getSortedSetDocValues(field); - if (sortedSetDocValues == null) { - return null; - } else { - return new SortedSetDocValuesTerms(sortedSetDocValues); - } - } - - final List termsPerLeaf = new ArrayList<>(leaves.size()); - final List slicePerLeaf = new ArrayList<>(leaves.size()); - - for (int leafIdx = 0; leafIdx < leaves.size(); leafIdx++) { - LeafReaderContext ctx = leaves.get(leafIdx); - SortedSetDocValues sortedSetDocValues = ctx.reader().getSortedSetDocValues(field); - if (sortedSetDocValues != null) { - termsPerLeaf.add(new SortedSetDocValuesTerms(sortedSetDocValues)); - slicePerLeaf.add(new ReaderSlice(ctx.docBase, r.maxDoc(), leafIdx)); - } - } - - if (termsPerLeaf.isEmpty()) { - return null; - } else { - return new MultiTerms(termsPerLeaf.toArray(EMPTY_ARRAY), slicePerLeaf.toArray(ReaderSlice.EMPTY_ARRAY)); - } - } - - private final SortedSetDocValues values; - - SortedSetDocValuesTerms(SortedSetDocValues values) { - this.values = values; - } - - @Override - public TermsEnum iterator() throws IOException { - return values.termsEnum(); - } - - @Override - public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) throws IOException { - if (startTerm == null) { - return values.intersect(compiled); - } else { - return super.intersect(compiled, startTerm); - } - } - - @Override - public long size() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public long getSumTotalTermFreq() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public long getSumDocFreq() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public int getDocCount() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public boolean hasFreqs() { - throw new UnsupportedOperationException(); - } - - @Override - public boolean hasOffsets() { - throw new UnsupportedOperationException(); - } - - @Override - public boolean hasPositions() { - throw new UnsupportedOperationException(); - } - - @Override - public boolean hasPayloads() { - throw new UnsupportedOperationException(); - } - - } - @Override public String typeName() { return CONTENT_TYPE; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SortedSetDocValuesTerms.java b/server/src/main/java/org/elasticsearch/index/mapper/SortedSetDocValuesTerms.java new file mode 100644 index 0000000000000..4fbfc38b9a543 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/SortedSetDocValuesTerms.java @@ -0,0 +1,126 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiTerms; +import org.apache.lucene.index.ReaderSlice; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.CompiledAutomaton; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * A simple terms implementation for SortedSetDocValues that only provides access to {@link TermsEnum} via + * {@link #iterator} and {@link #intersect(CompiledAutomaton, BytesRef)} methods. + * We have this custom implementation based on {@link MultiTerms} instead of using + * {@link org.apache.lucene.index.MultiDocValues#getSortedSetValues(IndexReader, String)} + * because {@link org.apache.lucene.index.MultiDocValues} builds global ordinals up-front whereas + * {@link MultiTerms}, which exposes the terms enum via {@link org.apache.lucene.index.MultiTermsEnum}, + * merges terms on the fly. + */ +class SortedSetDocValuesTerms extends Terms { + + public static Terms getTerms(IndexReader r, String field) throws IOException { + final List leaves = r.leaves(); + if (leaves.size() == 1) { + SortedSetDocValues sortedSetDocValues = leaves.get(0).reader().getSortedSetDocValues(field); + if (sortedSetDocValues == null) { + return null; + } else { + return new org.elasticsearch.index.mapper.SortedSetDocValuesTerms(sortedSetDocValues); + } + } + + final List termsPerLeaf = new ArrayList<>(leaves.size()); + final List slicePerLeaf = new ArrayList<>(leaves.size()); + + for (int leafIdx = 0; leafIdx < leaves.size(); leafIdx++) { + LeafReaderContext ctx = leaves.get(leafIdx); + SortedSetDocValues sortedSetDocValues = ctx.reader().getSortedSetDocValues(field); + if (sortedSetDocValues != null) { + termsPerLeaf.add(new org.elasticsearch.index.mapper.SortedSetDocValuesTerms(sortedSetDocValues)); + slicePerLeaf.add(new ReaderSlice(ctx.docBase, r.maxDoc(), leafIdx)); + } + } + + if (termsPerLeaf.isEmpty()) { + return null; + } else { + return new MultiTerms(termsPerLeaf.toArray(EMPTY_ARRAY), slicePerLeaf.toArray(ReaderSlice.EMPTY_ARRAY)); + } + } + + private final SortedSetDocValues values; + + SortedSetDocValuesTerms(SortedSetDocValues values) { + this.values = values; + } + + @Override + public TermsEnum iterator() throws IOException { + return values.termsEnum(); + } + + @Override + public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) throws IOException { + if (startTerm == null) { + return values.intersect(compiled); + } else { + return super.intersect(compiled, startTerm); + } + } + + @Override + public long size() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long getSumTotalTermFreq() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long getSumDocFreq() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int getDocCount() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public boolean hasFreqs() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean hasOffsets() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean hasPositions() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean hasPayloads() { + throw new UnsupportedOperationException(); + } + +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtilTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtilTests.java new file mode 100644 index 0000000000000..5d5be345c8e9e --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtilTests.java @@ -0,0 +1,217 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.commons.codec.binary.Hex; +import org.apache.lucene.document.InetAddressPoint; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.CompiledAutomaton; +import org.apache.lucene.util.automaton.MinimizationOperations; +import org.apache.lucene.util.automaton.Operations; +import org.elasticsearch.common.network.NetworkAddress; +import org.elasticsearch.test.ESTestCase; + +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.Arrays; + +import static org.elasticsearch.index.mapper.IpPrefixAutomatonUtil.parseIp6Prefix; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.is; + +public class IpPrefixAutomatonUtilTests extends ESTestCase { + + public void testCreateIp4PrefixAutomaton() throws UnknownHostException { + InetAddress randomIp = randomIp(true); + String ipString = NetworkAddress.format(randomIp); + + // get a random prefix, some emphasis on shorter ones, and compile a prefix automaton for it + String randomPrefix = ipString.substring(0, randomBoolean() ? randomIntBetween(1, 6) : randomIntBetween(1, ipString.length())); + CompiledAutomaton ip4Automaton = compileAutomaton(IpPrefixAutomatonUtil.createIp4Automaton(randomPrefix)); + + // check that the original ip is accepted + assertTrue(ip4Automaton.runAutomaton.run(randomIp.getAddress(), 0, randomIp.getAddress().length)); + + // check that another random ip that doesn't have the same prefix isn't accepted + byte[] nonMatchingIp = randomValueOtherThanMany(ipv4 -> { + try { + return NetworkAddress.format(InetAddress.getByAddress(ipv4)).startsWith(randomPrefix); + } catch (UnknownHostException e) { + throw new RuntimeException(e); + } + }, () -> randomByteArrayOfLength(4)); + assertFalse(ip4Automaton.runAutomaton.run(nonMatchingIp, 0, nonMatchingIp.length)); + + // no bytes sequence longer than four bytes should be accepted + byte[] fiveBytes = Arrays.copyOf(randomIp.getAddress(), 5); + fiveBytes[4] = randomByte(); + assertFalse(ip4Automaton.runAutomaton.run(fiveBytes, 0, 5)); + + // the empty prefix should create an automaton that accepts every four bytes address + CompiledAutomaton acceptAll = compileAutomaton(IpPrefixAutomatonUtil.createIp4Automaton("")); + assertTrue(acceptAll.runAutomaton.run(randomByteArrayOfLength(4), 0, 4)); + } + + public void testIncompleteDecimalGroupAutomaton() throws UnknownHostException { + for (int p = 0; p <= 255; p++) { + String prefix = String.valueOf(p); + Automaton automaton = IpPrefixAutomatonUtil.INCOMPLETE_IP4_GROUP_AUTOMATON_LOOKUP.get(Integer.parseInt(prefix)); + CompiledAutomaton compiledAutomaton = compileAutomaton(automaton); + for (int i = 0; i < 256; i++) { + if (String.valueOf(i).startsWith(prefix)) { + assertTrue(compiledAutomaton.runAutomaton.run(new byte[] { (byte) i }, 0, 1)); + } else { + assertFalse(compiledAutomaton.runAutomaton.run(new byte[] { (byte) i }, 0, 1)); + } + } + } + } + + public void testBuildPrefixAutomaton() throws UnknownHostException { + { + CompiledAutomaton compiledAutomaton = IpPrefixAutomatonUtil.buildIpPrefixAutomaton("10"); + byte[] encode = InetAddressPoint.encode(InetAddress.getByName("1.2.3.4")); + assertFalse(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("10.2.3.4")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("2.2.3.4")); + assertFalse(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("1::1")); + assertFalse(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("10::1")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("100::1")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("1000::1")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("1000::1.2.3.4")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + } + { + CompiledAutomaton compiledAutomaton = IpPrefixAutomatonUtil.buildIpPrefixAutomaton("1"); + byte[] encode = InetAddressPoint.encode(InetAddress.getByName("1.2.3.4")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("10.2.3.4")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("2.2.3.4")); + assertFalse(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("1af::1:2")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("1f::1:2")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("::1:2")); + assertFalse(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("1cce:e003:0:0:9279:d8d3:ffff:ffff")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + + } + { + CompiledAutomaton compiledAutomaton = IpPrefixAutomatonUtil.buildIpPrefixAutomaton("1."); + byte[] encode = InetAddressPoint.encode(InetAddress.getByName("1.2.3.4")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("10.2.3.4")); + assertFalse(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("2.2.3.4")); + assertFalse(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + } + { + CompiledAutomaton compiledAutomaton = IpPrefixAutomatonUtil.buildIpPrefixAutomaton("1:2"); + byte[] encode = InetAddressPoint.encode(InetAddress.getByName("1:2::1")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("1:2a::1")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("1:2ab::1")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("1:2ab5::1")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("10:2::3:4")); + assertFalse(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("::1:2:3:4")); + assertFalse(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + } + { + CompiledAutomaton compiledAutomaton = IpPrefixAutomatonUtil.buildIpPrefixAutomaton("::1:2"); + byte[] encode = InetAddressPoint.encode(InetAddress.getByName("::1:2")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("0:0:1:2::1")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("1:2ab::1")); + assertFalse(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + } + { + CompiledAutomaton compiledAutomaton = IpPrefixAutomatonUtil.buildIpPrefixAutomaton("1::1.2"); + byte[] encode = InetAddressPoint.encode(InetAddress.getByName("1::1.2.3.4")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("1::1.3.2.4")); + assertFalse(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + encode = InetAddressPoint.encode(InetAddress.getByName("1::1.22.3.4")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + } + { + CompiledAutomaton compiledAutomaton = IpPrefixAutomatonUtil.buildIpPrefixAutomaton("201."); + byte[] encode = InetAddressPoint.encode(InetAddress.getByName("c935:1902::643f:9e65:0:0")); + assertFalse(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + } + } + + public void testParseIp6Prefix() { + assertThat(parseIp6Prefix("123"), contains("123")); + assertThat(parseIp6Prefix("123:12"), contains("123:", "12")); + assertThat(parseIp6Prefix("123::12"), contains("123:", ":", "12")); + assertThat(parseIp6Prefix("123::12:00ab"), contains("123:", ":", "12:", "00ab")); + assertThat(parseIp6Prefix("123::12:00ah"), is(empty())); + assertThat(parseIp6Prefix("12345:"), is(empty())); + assertThat( + parseIp6Prefix("2001:0db8:85a3:08d3:1319:8a2e:0370:7344"), + contains("2001:", "0db8:", "85a3:", "08d3:", "1319:", "8a2e:", "0370:", "7344") + ); + assertThat(parseIp6Prefix("2001:db8:0:8d3:0:8a2e:70:7344"), contains("2001:", "db8:", "0:", "8d3:", "0:", "8a2e:", "70:", "7344")); + assertThat(parseIp6Prefix("2001:db8::1428:57ab"), contains("2001:", "db8:", ":", "1428:", "57ab")); + assertThat(parseIp6Prefix("::ffff:7f00:1"), contains(":", ":", "ffff:", "7f00:", "1")); + assertThat(parseIp6Prefix("::ffff:127.0.0.1"), contains(":", ":", "ffff:", "127.0.0.1")); + assertThat(parseIp6Prefix("::127."), contains(":", ":", "127.")); + assertThat(parseIp6Prefix("::127.1.2"), contains(":", ":", "127.1.2")); + assertThat(parseIp6Prefix("::127.1.1f"), is(empty())); + assertThat(parseIp6Prefix("::127.1234.1.3"), is(empty())); + assertThat(parseIp6Prefix("::127.1234.1:3"), is(empty())); + } + + public void testAutomatonFromIPv6Group() throws UnknownHostException { + expectThrows(AssertionError.class, () -> IpPrefixAutomatonUtil.automatonFromIPv6Group("")); + expectThrows(AssertionError.class, () -> IpPrefixAutomatonUtil.automatonFromIPv6Group("12345")); + + // start with a 4-char hex string, build automaton for random prefix of it, then assure its accepted + byte[] bytes = randomByteArrayOfLength(2); + String randomHex = new String(Hex.encodeHex(bytes)).replaceAll("^0+", ""); + String prefix = randomHex.substring(0, randomIntBetween(1, randomHex.length())); + Automaton automaton = IpPrefixAutomatonUtil.automatonFromIPv6Group(prefix); + CompiledAutomaton compiledAutomaton = compileAutomaton(automaton); + assertTrue(compiledAutomaton.runAutomaton.run(bytes, 0, bytes.length)); + + // create random 4-char hex that isn't prefixed by the current prefix and check it isn't accepted + byte[] badGroup = randomValueOtherThanMany( + b -> new String(Hex.encodeHex(b)).replaceAll("^0+", "").startsWith(prefix), + () -> randomByteArrayOfLength(2) + ); + assertFalse(compiledAutomaton.runAutomaton.run(badGroup, 0, badGroup.length)); + } + + private static CompiledAutomaton compileAutomaton(Automaton automaton) { + automaton = MinimizationOperations.minimize(automaton, Integer.MAX_VALUE); + CompiledAutomaton compiledAutomaton = new CompiledAutomaton( + automaton, + null, + false, + Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, + true + ); + return compiledAutomaton; + } +} diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/termsenum/TermsEnumTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/termsenum/TermsEnumTests.java new file mode 100644 index 0000000000000..50407b76ebe97 --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/termsenum/TermsEnumTests.java @@ -0,0 +1,180 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.core.termsenum; + +import org.elasticsearch.action.bulk.BulkRequestBuilder; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESSingleNodeTestCase; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin; +import org.elasticsearch.xpack.core.XPackSettings; +import org.elasticsearch.xpack.core.termsenum.action.TermsEnumAction; +import org.elasticsearch.xpack.core.termsenum.action.TermsEnumRequest; +import org.elasticsearch.xpack.core.termsenum.action.TermsEnumResponse; + +import java.net.InetAddress; +import java.util.Collection; +import java.util.List; + +import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; +import static org.hamcrest.Matchers.contains; + +public class TermsEnumTests extends ESSingleNodeTestCase { + + @Override + protected Collection> getPlugins() { + return List.of(LocalStateCompositeXPackPlugin.class); + } + + @Override + protected Settings nodeSettings() { + return Settings.builder().put(XPackSettings.SECURITY_ENABLED.getKey(), "false").build(); + } + + public void testTermsEnumIPBasic() throws Exception { + String indexName = "test"; + createIndex(indexName); + + client().admin() + .indices() + .preparePutMapping(indexName) + .setSource( + XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("ip_addr") + .field("type", "ip") + .endObject() + .endObject() + .endObject() + .endObject() + ) + .get(); + ensureGreen(); + + client().prepareIndex(indexName).setId("1").setSource(jsonBuilder().startObject().field("ip_addr", "1.2.3.4").endObject()).get(); + client().prepareIndex(indexName).setId("2").setSource(jsonBuilder().startObject().field("ip_addr", "205.0.1.2").endObject()).get(); + client().prepareIndex(indexName).setId("3").setSource(jsonBuilder().startObject().field("ip_addr", "2.2.2.2").endObject()).get(); + client().prepareIndex(indexName) + .setId("4") + .setSource(jsonBuilder().startObject().field("ip_addr", "2001:db8::1:0:0:1").endObject()) + .get(); + client().prepareIndex(indexName).setId("5").setSource(jsonBuilder().startObject().field("ip_addr", "13.3.3.3").endObject()).get(); + client().admin().indices().prepareRefresh().get(); + + { + TermsEnumResponse response = client().execute(TermsEnumAction.INSTANCE, new TermsEnumRequest(indexName).field("ip_addr")).get(); + List terms = response.getTerms(); + assertEquals(5, terms.size()); + assertThat(terms, contains("1.2.3.4", "2.2.2.2", "13.3.3.3", "205.0.1.2", "2001:db8::1:0:0:1")); + } + { + TermsEnumResponse response = client().execute( + TermsEnumAction.INSTANCE, + new TermsEnumRequest(indexName).field("ip_addr").searchAfter("13.3.3.3") + ).get(); + List terms = response.getTerms(); + assertEquals(2, terms.size()); + assertThat(terms, contains("205.0.1.2", "2001:db8::1:0:0:1")); + } + { + TermsEnumResponse response = client().execute( + TermsEnumAction.INSTANCE, + new TermsEnumRequest(indexName).field("ip_addr").string("2") + ).get(); + List terms = response.getTerms(); + assertEquals(3, terms.size()); + assertThat(terms, contains("2.2.2.2", "205.0.1.2", "2001:db8::1:0:0:1")); + } + { + TermsEnumResponse response = client().execute( + TermsEnumAction.INSTANCE, + new TermsEnumRequest(indexName).field("ip_addr").string("20") + ).get(); + List terms = response.getTerms(); + assertEquals(2, terms.size()); + assertThat(terms, contains("205.0.1.2", "2001:db8::1:0:0:1")); + } + } + + public void testTermsEnumIPRandomized() throws Exception { + String indexName = "test_random"; + createIndex(indexName); + int numDocs = 500; + + client().admin() + .indices() + .preparePutMapping(indexName) + .setSource( + XContentFactory.jsonBuilder() + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("ip_addr") + .field("type", "ip") + .endObject() + .endObject() + .endObject() + .endObject() + ) + .get(); + ensureGreen(); + + // create random ip test data + InetAddress[] randomIps = new InetAddress[numDocs]; + BulkRequestBuilder bulkRequestBuilder = client().prepareBulk(indexName); + for (int i = 0; i < numDocs; i++) { + randomIps[i] = randomIp(randomBoolean()); + bulkRequestBuilder.add( + client().prepareIndex(indexName) + .setSource(jsonBuilder().startObject().field("ip_addr", randomIps[i].getHostAddress()).endObject()) + ); + } + bulkRequestBuilder.get(); + client().admin().indices().prepareRefresh().get(); + + // test for short random prefixes, max length 7 should at least include some separators but not be too long for short ipv4 + for (int prefixLength = 1; prefixLength < 7; prefixLength++) { + String randomPrefix = randomIps[randomIntBetween(0, numDocs)].getHostAddress().substring(0, prefixLength).replaceAll("^0*", ""); + int expectedResults = 0; + for (int i = 0; i < numDocs; i++) { + if (randomIps[i].getHostAddress().startsWith(randomPrefix)) { + expectedResults++; + } + } + TermsEnumResponse response = client().execute( + TermsEnumAction.INSTANCE, + new TermsEnumRequest(indexName).field("ip_addr").string(randomPrefix).size(numDocs) + ).get(); + List terms = response.getTerms(); + assertEquals( + "expected " + expectedResults + " for prefix " + randomPrefix + " but was " + terms.size() + ", " + terms, + expectedResults, + terms.size() + ); + + // test search after functionality + int searchAfterPosition = randomIntBetween(0, terms.size() - 1); + expectedResults = expectedResults - searchAfterPosition - 1; + response = client().execute( + TermsEnumAction.INSTANCE, + new TermsEnumRequest(indexName).field("ip_addr") + .string(randomPrefix) + .size(numDocs) + .searchAfter(terms.get(searchAfterPosition)) + ).get(); + assertEquals( + "expected " + expectedResults + " for prefix " + randomPrefix + " but was " + response.getTerms().size() + ", " + terms, + expectedResults, + response.getTerms().size() + ); + } + } +} From 8b78d8636ffb7b418a9884d86646e064701114d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Mon, 6 Mar 2023 12:19:43 +0100 Subject: [PATCH 2/5] Update docs/changelog/94322.yaml --- docs/changelog/94322.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 docs/changelog/94322.yaml diff --git a/docs/changelog/94322.yaml b/docs/changelog/94322.yaml new file mode 100644 index 0000000000000..418f2564abcdf --- /dev/null +++ b/docs/changelog/94322.yaml @@ -0,0 +1,6 @@ +pr: 94322 +summary: Enable `_terms_enum` on `ip` fields +area: Mapping +type: enhancement +issues: + - 89933 From 418629f74287e0ea37cffb2ff53b258cf86f7526 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Mon, 6 Mar 2023 13:38:51 +0100 Subject: [PATCH 3/5] forbidden apis --- .../xpack/core/termsenum/TermsEnumTests.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/termsenum/TermsEnumTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/termsenum/TermsEnumTests.java index 50407b76ebe97..a6e7f9733550f 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/termsenum/TermsEnumTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/termsenum/TermsEnumTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.core.termsenum; import org.elasticsearch.action.bulk.BulkRequestBuilder; +import org.elasticsearch.common.network.NetworkAddress; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; @@ -134,7 +135,7 @@ public void testTermsEnumIPRandomized() throws Exception { randomIps[i] = randomIp(randomBoolean()); bulkRequestBuilder.add( client().prepareIndex(indexName) - .setSource(jsonBuilder().startObject().field("ip_addr", randomIps[i].getHostAddress()).endObject()) + .setSource(jsonBuilder().startObject().field("ip_addr", NetworkAddress.format(randomIps[i])).endObject()) ); } bulkRequestBuilder.get(); @@ -142,10 +143,12 @@ public void testTermsEnumIPRandomized() throws Exception { // test for short random prefixes, max length 7 should at least include some separators but not be too long for short ipv4 for (int prefixLength = 1; prefixLength < 7; prefixLength++) { - String randomPrefix = randomIps[randomIntBetween(0, numDocs)].getHostAddress().substring(0, prefixLength).replaceAll("^0*", ""); + String randomPrefix = NetworkAddress.format(randomIps[randomIntBetween(0, numDocs)]) + .substring(0, prefixLength) + .replaceAll("^0*", ""); int expectedResults = 0; for (int i = 0; i < numDocs; i++) { - if (randomIps[i].getHostAddress().startsWith(randomPrefix)) { + if (NetworkAddress.format(randomIps[i]).startsWith(randomPrefix)) { expectedResults++; } } From 03c4d79dbdaefe2a034f82c683e0450aac33fbb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Tue, 7 Mar 2023 17:28:22 +0100 Subject: [PATCH 4/5] fixing one more edge case --- .../index/mapper/IpPrefixAutomatonUtil.java | 10 ++++++++-- .../index/mapper/IpPrefixAutomatonUtilTests.java | 5 +++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtil.java b/server/src/main/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtil.java index d03c53974f540..d467479f5d6c1 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtil.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtil.java @@ -179,8 +179,14 @@ static Automaton createIp4Automaton(String prefix) { } } else { // if present, this is the last group - incompleteGroupAutomaton = INCOMPLETE_IP4_GROUP_AUTOMATON_LOOKUP.get(Integer.parseInt(group)); - prefixBytes++; + int numberPrefix = Integer.parseInt(group); + if (numberPrefix < 255) { + incompleteGroupAutomaton = INCOMPLETE_IP4_GROUP_AUTOMATON_LOOKUP.get(numberPrefix); + prefixBytes++; + } else { + // this cannot be a valid ip4 groups + return EMPTY_AUTOMATON; + } } } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtilTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtilTests.java index 5d5be345c8e9e..da2fddc1be7e2 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtilTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtilTests.java @@ -159,6 +159,11 @@ public void testBuildPrefixAutomaton() throws UnknownHostException { byte[] encode = InetAddressPoint.encode(InetAddress.getByName("c935:1902::643f:9e65:0:0")); assertFalse(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); } + { + CompiledAutomaton compiledAutomaton = IpPrefixAutomatonUtil.buildIpPrefixAutomaton("935"); + byte[] encode = InetAddressPoint.encode(InetAddress.getByName("0935:1902::643f:9e65:0:0")); + assertTrue(compiledAutomaton.runAutomaton.run(encode, 0, encode.length)); + } } public void testParseIp6Prefix() { From 7f45f2aa7f1aed9ba2aed60ce803b6673d664fcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Tue, 7 Mar 2023 17:40:16 +0100 Subject: [PATCH 5/5] spotless --- .../org/elasticsearch/index/mapper/IpPrefixAutomatonUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtil.java b/server/src/main/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtil.java index d467479f5d6c1..b46fa86f2bf5e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtil.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IpPrefixAutomatonUtil.java @@ -179,7 +179,7 @@ static Automaton createIp4Automaton(String prefix) { } } else { // if present, this is the last group - int numberPrefix = Integer.parseInt(group); + int numberPrefix = Integer.parseInt(group); if (numberPrefix < 255) { incompleteGroupAutomaton = INCOMPLETE_IP4_GROUP_AUTOMATON_LOOKUP.get(numberPrefix); prefixBytes++;