From 730da697eb5b8347fc1eb6968329e45bea41415b Mon Sep 17 00:00:00 2001 From: Jake Landis Date: Mon, 23 Jul 2018 14:06:20 -0500 Subject: [PATCH 1/9] Introduce the dissect library The dissect library will be used for the ingest node as an alternative to Grok to split a string based on a pattern. Dissect differs from Grok such that regular expressions are not used to split the string. Note - Regular expressions are used during construction of the objects, but not in the hot path. A dissect pattern takes the form of: '%{a} %{b},%{c}' which is composed of 3 keys (a,b,c) and two delimiters (space and comma). This dissect pattern will match a string of the form: 'foo bar,baz' and will result a key/value pairing of 'a=foo, b=bar, and c=baz'. See the comments in DissectParser for a full explanation. This commit does not include the ingest node processor that will consume it. However, the consumption should be a trivial mapping between the key/value pairing returned by the parser and the key/value pairing needed for the IngestDocument. --- libs/dissect/build.gradle | 47 +++ libs/dissect/src/main/eclipse-build.gradle | 3 + .../dissect/DissectException.java | 57 +++ .../org/elasticsearch/dissect/DissectKey.java | 169 +++++++++ .../elasticsearch/dissect/DissectPair.java | 82 +++++ .../elasticsearch/dissect/DissectParser.java | 340 ++++++++++++++++++ libs/dissect/src/test/eclipse-build.gradle | 7 + .../dissect/DissectKeyTests.java | 156 ++++++++ .../dissect/DissectPairTests.java | 75 ++++ .../dissect/DissectParserTests.java | 312 ++++++++++++++++ 10 files changed, 1248 insertions(+) create mode 100644 libs/dissect/build.gradle create mode 100644 libs/dissect/src/main/eclipse-build.gradle create mode 100644 libs/dissect/src/main/java/org/elasticsearch/dissect/DissectException.java create mode 100644 libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java create mode 100644 libs/dissect/src/main/java/org/elasticsearch/dissect/DissectPair.java create mode 100644 libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java create mode 100644 libs/dissect/src/test/eclipse-build.gradle create mode 100644 libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java create mode 100644 libs/dissect/src/test/java/org/elasticsearch/dissect/DissectPairTests.java create mode 100644 libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java diff --git a/libs/dissect/build.gradle b/libs/dissect/build.gradle new file mode 100644 index 0000000000000..2ff61c10237d3 --- /dev/null +++ b/libs/dissect/build.gradle @@ -0,0 +1,47 @@ +import org.elasticsearch.gradle.precommit.PrecommitTasks + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +archivesBaseName = 'elasticsearch-dissect' + +dependencies { + if (isEclipse == false || project.path == ":libs:dissect-tests") { + testCompile("org.elasticsearch.test:framework:${version}") { + exclude group: 'org.elasticsearch', module: 'dissect' + } + } +} + +forbiddenApisMain { + signaturesURLs = [PrecommitTasks.getResource('/forbidden/jdk-signatures.txt')] +} + +if (isEclipse) { + // in eclipse the project is under a fake root, we need to change around the source sets + sourceSets { + if (project.path == ":libs:dissect") { + main.java.srcDirs = ['java'] + main.resources.srcDirs = ['resources'] + } else { + test.java.srcDirs = ['java'] + test.resources.srcDirs = ['resources'] + } + } +} diff --git a/libs/dissect/src/main/eclipse-build.gradle b/libs/dissect/src/main/eclipse-build.gradle new file mode 100644 index 0000000000000..3188c7aff01f7 --- /dev/null +++ b/libs/dissect/src/main/eclipse-build.gradle @@ -0,0 +1,3 @@ + +// this is just shell gradle file for eclipse to have separate projects for grok src and tests +apply from: '../../build.gradle' diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectException.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectException.java new file mode 100644 index 0000000000000..a2f1ab336401b --- /dev/null +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectException.java @@ -0,0 +1,57 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.dissect; + +/** + * Parent class for all dissect related exceptions. Consumers may catch this exception or more specific child exceptions. + */ +public abstract class DissectException extends RuntimeException { + DissectException(String message) { + super(message); + } + + /** + * Error while parsing a dissect pattern + */ + static class PatternParse extends DissectException { + PatternParse(String pattern, String reason) { + super("Unable to parse pattern: " + pattern + " Reason: " + reason); + } + } + + /** + * Error while parsing a dissect key + */ + static class KeyParse extends DissectException { + KeyParse(String key, String reason) { + super("Unable to parse key: " + key + " Reason: " + reason); + } + } + + /** + * Unable to find a match between pattern and source string + */ + static class FindMatch extends DissectException { + FindMatch(String pattern, String source) { + super("Unable to find match for dissect pattern: " + pattern + " against source: " + source); + + } + } +} diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java new file mode 100644 index 0000000000000..2772466f70aa1 --- /dev/null +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java @@ -0,0 +1,169 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.dissect; + +import java.util.EnumSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + *

A Key of a dissect pattern. This class models the name and modifiers and provides some validation.

+ *

For dissect pattern of {@code %{a->} %{b}} the dissect keys are: + *

+ * This class represents a single key. + *

A single key is composed of a name and it's modifiers. For the key {@code a->}, {@code a} is the name and {@code ->} is the modifier. + */ +public final class DissectKey { + private static final Pattern LEFT_MODIFIER_PATTERN = Pattern.compile("([+?&])(.*?)(->)?$", Pattern.DOTALL); + private static final Pattern RIGHT_PADDING_PATTERN = Pattern.compile("^(.*?)(->)?$", Pattern.DOTALL); + private static final Pattern APPEND_WITH_ORDER_PATTERN = Pattern.compile("[+](.*?)(/)([0-9]+)(->)?$", Pattern.DOTALL); + private final Modifier modifier; + private boolean skip; + private boolean skipRightPadding; + private int orderPosition; + private String name; + + /** + * Constructor - parses the String key into it's name and modifier(s) + * + * @param key The key without the leading %{ or trailing }, for example a-> + */ + DissectKey(String key) { + skip = key == null || key.isEmpty(); + modifier = Modifier.findModifier(key); + switch (modifier) { + case NONE: + Matcher matcher = RIGHT_PADDING_PATTERN.matcher(key); + while (matcher.find()) { + name = matcher.group(1); + skipRightPadding = matcher.group(2) != null; + } + skip = name.isEmpty(); + break; + case APPEND: + matcher = LEFT_MODIFIER_PATTERN.matcher(key); + while (matcher.find()) { + name = matcher.group(2); + skipRightPadding = matcher.group(3) != null; + } + break; + case FIELD_NAME: + matcher = LEFT_MODIFIER_PATTERN.matcher(key); + while (matcher.find()) { + name = matcher.group(2); + skipRightPadding = matcher.group(3) != null; + } + break; + case FIELD_VALUE: + matcher = LEFT_MODIFIER_PATTERN.matcher(key); + while (matcher.find()) { + name = matcher.group(2); + skipRightPadding = matcher.group(3) != null; + } + break; + case APPEND_WITH_ORDER: + matcher = APPEND_WITH_ORDER_PATTERN.matcher(key); + while (matcher.find()) { + name = matcher.group(1); + orderPosition = Short.valueOf(matcher.group(3)); + skipRightPadding = matcher.group(4) != null; + } + break; + + } + + if (name == null || (name.isEmpty() && !skip)) { + throw new DissectException.KeyParse(key, "The key name could be determined"); + } + } + + Modifier getModifier() { + return modifier; + } + + boolean skip() { + return skip; + } + + public boolean skipRightPadding() { + return skipRightPadding; + } + + int getOrderPosition() { + return orderPosition; + } + + public String getName() { + return name; + } + + //generated + @Override + public String toString() { + return "DissectKey{" + + "modifier=" + modifier + + ", skip=" + skip + + ", orderPosition=" + orderPosition + + ", name='" + name + '\'' + + '}'; + } + + + public enum Modifier { + NONE(""), APPEND_WITH_ORDER("/"), APPEND("+"), FIELD_NAME("?"), FIELD_VALUE("&"); + + private static final Pattern MODIFIER_PATTERN = Pattern.compile("[/+?&]"); + + private final String modifier; + + @Override + public String toString() { + return modifier; + } + + Modifier(final String modifier) { + this.modifier = modifier; + } + + static Modifier fromString(String modifier) { + return EnumSet.allOf(Modifier.class).stream().filter(km -> km.modifier.equals(modifier)) + .findFirst().orElseThrow(() -> new IllegalArgumentException("Found invalid modifier.")); //throw should never happen + } + + static Modifier findModifier(String key) { + Modifier modifier = Modifier.NONE; + if (key != null && !key.isEmpty()) { + Matcher matcher = MODIFIER_PATTERN.matcher(key); + int matches = 0; + while (matcher.find()) { + Modifier priorModifier = modifier; + modifier = Modifier.fromString(matcher.group()); + if (++matches > 1 && !(APPEND.equals(priorModifier) && APPEND_WITH_ORDER.equals(modifier))) { + throw new DissectException.KeyParse(key, "multiple modifiers are not allowed."); + } + } + } + return modifier; + } + } +} diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectPair.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectPair.java new file mode 100644 index 0000000000000..6b2d9ea7e7f85 --- /dev/null +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectPair.java @@ -0,0 +1,82 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.dissect; + +import java.util.Objects; + +/** + * A tuple class holds a {@link DissectKey} and an associated String. The value associated with the key may have different meanings + * based on the context. For example, the associated value may be the delimiter associated with the key or the parsed value associated with + * the key. + */ +public final class DissectPair implements Comparable { + + private final DissectKey key; + private final String value; + + DissectPair(DissectKey key, String value) { + this.key = key; + this.value = value; + } + + public DissectKey getKey() { + return key; + } + + public String getValue() { + return value; + } + + //generated + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DissectPair that = (DissectPair) o; + return Objects.equals(key, that.key) && + Objects.equals(value, that.value); + } + + //generated + @Override + public int hashCode() { + return Objects.hash(key, value); + } + + //generated + @Override + public String toString() { + return "DissectPair{" + + "key=" + key + + ", value='" + value + '\'' + + '}'; + } + + @Override + public int compareTo(DissectPair o) { + if(this.getKey().getModifier().equals(DissectKey.Modifier.FIELD_NAME)){ + return -1; + } + if(this.getKey().getModifier().equals(DissectKey.Modifier.FIELD_VALUE)){ + return 1; + } + return Integer.compare(this.getKey().getOrderPosition(), o.getKey().getOrderPosition()); + } +} diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java new file mode 100644 index 0000000000000..be07cea35132f --- /dev/null +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java @@ -0,0 +1,340 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.dissect; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + *

Splits (dissects) a string into its parts based on a pattern.

A dissect pattern is composed of a set of keys and delimiters. + * For example the dissect pattern:

%{a} %{b},%{c}
has 3 keys (a,b,c) and two delimiters (space and comma). This pattern will + * match a string of the form:
foo bar,baz
and will result a key/value pairing of
a=foo, b=bar, and c=baz.
+ *

Matches are all or nothing. For example, the same pattern will NOT match

foo bar baz
since all of the delimiters did not + * match. (the comma did not match) + *

Dissect patterns can optionally have allModifiers. These allModifiers instruct the parser to change it's behavior. For example the + * dissect pattern of

%{a},%{b}:%{c}
would not match
foo,bar,baz
since there the colon never matches. + *

Modifiers appear to the left or the right of the key name. The supported allModifiers are: + *

+ *

Empty key names patterns are also supported. They will simply be ignored in the result. Example + *

+ * pattern: {@code %{a} %{} %{c}}
+ * string: {@code foo bar baz}
+ * result: {@code a=foo, c=baz}
+ * 
+ *

+ * Inspired by the Logstash Dissect Filter by Guy Boertje + */ +public final class DissectParser { + private static final Pattern LEADING_DELIMITER_PATTERN = Pattern.compile("^(.*?)%"); + private static final Pattern KEY_DELIMITER_FIELD_PATTERN = Pattern.compile("%\\{([^}]*?)}([^%]*)", Pattern.DOTALL); + private static final EnumSet POST_PROCESSING_MODIFIERS = EnumSet.of( + DissectKey.Modifier.APPEND_WITH_ORDER, + DissectKey.Modifier.APPEND, + DissectKey.Modifier.FIELD_NAME, + DissectKey.Modifier.FIELD_VALUE); + private static final EnumSet ASSOCIATE_MODIFIERS = EnumSet.of( + DissectKey.Modifier.FIELD_NAME, + DissectKey.Modifier.FIELD_VALUE); + private static final EnumSet APPEND_MODIFIERS = EnumSet.of( + DissectKey.Modifier.APPEND, + DissectKey.Modifier.APPEND_WITH_ORDER); + private static final Function KEY_NAME = val -> val.getKey().getName(); + private final List matchPairs; + private final boolean needsPostParsing; + private final EnumSet allModifiers; + private final String appendSeparator; + private final String pattern; + private String leadingDelimiter = ""; + + public DissectParser(String pattern, String appendSeparator) { + this.pattern = pattern; + this.appendSeparator = appendSeparator == null ? "" : appendSeparator; + Matcher matcher = LEADING_DELIMITER_PATTERN.matcher(pattern); + while (matcher.find()) { + leadingDelimiter = matcher.group(1); + } + List matchPairs = new ArrayList<>(); + matcher = KEY_DELIMITER_FIELD_PATTERN.matcher(pattern.substring(leadingDelimiter.length())); + while (matcher.find()) { + DissectKey key = new DissectKey(matcher.group(1)); + String delimiter = matcher.group(2); + matchPairs.add(new DissectPair(key, delimiter)); + } + if (matchPairs.isEmpty()) { + throw new DissectException.PatternParse(pattern, "Unable to find any keys or delimiters."); + } + + List keys = matchPairs.stream().map(DissectPair::getKey).collect(Collectors.toList()); + this.allModifiers = getAllModifiers(keys); + + if (allModifiers.contains(DissectKey.Modifier.FIELD_NAME) || allModifiers.contains(DissectKey.Modifier.FIELD_VALUE)) { + Map> keyNameToDissectPairs = getAssociateMap(matchPairs); + for (Map.Entry> entry : keyNameToDissectPairs.entrySet()) { + List sameKeyNameList = entry.getValue(); + if (sameKeyNameList.size() != 2) { + throw new DissectException.PatternParse(pattern, "Found invalid key/reference associations: '" + + sameKeyNameList.stream().map(KEY_NAME).collect(Collectors.joining(",")) + + "' Please ensure each '?' is matched with a matching '&"); + } + } + } + needsPostParsing = POST_PROCESSING_MODIFIERS.stream().anyMatch(allModifiers::contains); + this.matchPairs = Collections.unmodifiableList(matchPairs); + } + + /** + *

Entry point to dissect a string into it's parts.

+ *

+ * This implements a naive string matching algorithm. The string is walked left to right, comparing each byte against + * another string's bytes looking for matches. If the bytes match, then a second cursor looks ahead to see if all the bytes + * of the other string matches. If they all match, record it and advances the primary cursor to the match point. If it can not match + * all of the bytes then progress the main cursor. Repeat till the end of the input string. Since the string being searching for + * (the delimiter) is generally small and rare the naive approach is efficient. + *

+ * In this case the the string that is walked is the input string, and the string being searched for is the current delimiter. + * For example for a dissect pattern of {@code %{a},%{b}:%{c}} the delimiters (comma then colon) are searched for in the + * input string. At class construction the list of keys+delimiters are found, which allows the use of that list to know which delimiter + * to use for the search. That list of delimiters is progressed once the current delimiter is matched. + *

+ * There are two special cases that requires additional parsing beyond the standard naive algorithm. Consecutive delimiters should + * results in a empty matches unless the {@code ->} is provided. For example given the dissect pattern of + * {@code %{a},%{b},%{c},%{d}} and input string of {@code foo,,,} the match should be successful with empty values for b,c and d. + * However, if the key modifier {@code ->}, is present it will simply skip over any delimiters just to the right of the key + * without assigning any values. + *

+ * Once the full string is parsed, it is validated that each key has a corresponding value and sent off for post processing. + * Key allModifiers may instruct the parsing to perform operations where the entire results set is needed. Post processing is used to + * obey those instructions and in doing it post parsing, helps to keep the string parsing logic simple. + * All post processing will occur before this method returns. + *

+ * + * @param inputString The string to dissect + * @return a List of {@link DissectPair}s that have the matched key/value pairs that results from the parse. + * @throws DissectException if unable to dissect a pair into it's parts. + */ + public List parse(String inputString) { + Iterator it = matchPairs.iterator(); + List results = new ArrayList<>(); + //ensure leading delimiter matches + if (inputString != null && leadingDelimiter.equals(inputString.substring(0, leadingDelimiter.length()))) { + byte[] input = inputString.getBytes(StandardCharsets.UTF_8); + //grab the first key/delimiter pair + DissectPair dissectPair = it.next(); + DissectKey key = dissectPair.getKey(); + byte[] delimiter = dissectPair.getValue().getBytes(StandardCharsets.UTF_8); + //start dissection after the first delimiter + int i = leadingDelimiter.length(); + int valueStart = i; + int lookAheadMatches; + //start walking the input string byte by byte, look ahead for matches where needed + //if a match is found jump forward to the end of the match + for (; i < input.length; i++) { + lookAheadMatches = 0; + //potential match between delimiter and input string + if (delimiter.length > 0 && input[i] == delimiter[0]) { + //look ahead to see if the entire delimiter matches the input string + for (int j = 0; j < delimiter.length; j++) { + if (i + j < input.length && input[i + j] == delimiter[j]) { + lookAheadMatches++; + } + } + //found a full delimiter match + if (lookAheadMatches == delimiter.length) { + //record the key/value tuple + byte[] value = Arrays.copyOfRange(input, valueStart, i); + results.add(new DissectPair(key, new String(value, StandardCharsets.UTF_8))); + //jump to the end of the match + i += lookAheadMatches; + //look for consecutive delimiters (e.g. a,,,,d,e) + while (i < input.length) { + lookAheadMatches = 0; + for (int j = 0; j < delimiter.length; j++) { + if (i + j < input.length && input[i + j] == delimiter[j]) { + lookAheadMatches++; + } + } + //found consecutive delimiters + if (lookAheadMatches == delimiter.length) { + //jump to the end of the match + i += lookAheadMatches; + if (!key.skipRightPadding()) { + //progress the keys/delimiter if possible + if (!it.hasNext()) { + break; //the while loop + } + dissectPair = it.next(); + key = dissectPair.getKey(); + //add the key with an empty value for the empty delimiter + results.add(new DissectPair(key, "")); + } + } else { + break; + } + } + //progress the keys/delimiter if possible + if (!it.hasNext()) { + break; //the while loop + } + dissectPair = it.next(); + key = dissectPair.getKey(); + delimiter = dissectPair.getValue().getBytes(StandardCharsets.UTF_8); + //i is always one byte after the last found delimiter, aka the start of the next value + valueStart = i; + } + } + } + //the last key, grab the rest of the input (unless consecutive delimiters already grabbed the last key) + if (results.size() < matchPairs.size()) { + byte[] value = Arrays.copyOfRange(input, valueStart, input.length); + String valueString = new String(value, StandardCharsets.UTF_8); + results.add(new DissectPair(key, key.skipRightPadding() ? valueString.replaceFirst("\\s++$", "") : valueString)); + } + } + if (!isValid(results)) { + throw new DissectException.FindMatch(pattern, inputString); + } + return postProcess(results.stream().filter(dissectPair -> !dissectPair.getKey().skip()).collect(Collectors.toList())); + } + + + /** + * Verify that each key has a entry in the result, don't rely only on size since some error cases would result in false positives + */ + private boolean isValid(List results) { + boolean valid = false; + if (results.size() == matchPairs.size()) { + Set resultKeys = results.stream().map(DissectPair::getKey).collect(Collectors.toSet()); + Set sourceKeys = matchPairs.stream().map(DissectPair::getKey).collect(Collectors.toSet()); + long intersectionCount = resultKeys.stream().filter(sourceKeys::contains).count(); + valid = intersectionCount == results.size(); + } + return valid; + } + + + private List postProcess(List results) { + if (needsPostParsing) { + if (allModifiers.contains(DissectKey.Modifier.APPEND) || allModifiers.contains(DissectKey.Modifier.APPEND_WITH_ORDER)) { + results = append(results); + } + if (allModifiers.contains(DissectKey.Modifier.FIELD_NAME)) { //FIELD_VALUE is guaranteed to also be present + results = associate(results); + } + } + return results; + } + + private List append(List parserResult) { + List results = new ArrayList<>(parserResult.size() - 1); + Map> keyNameToDissectPairs = parserResult.stream().collect(Collectors.groupingBy(KEY_NAME)); + for (Map.Entry> entry : keyNameToDissectPairs.entrySet()) { + List sameKeyNameList = entry.getValue(); + long appendCount = sameKeyNameList.stream() + .filter(dissectPair -> APPEND_MODIFIERS.contains(dissectPair.getKey().getModifier())).count(); + // grouped by key name may not include append modifiers, for example associate pairs...don't + if (appendCount > 0) { + Collections.sort(sameKeyNameList); + String value = sameKeyNameList.stream().map(DissectPair::getValue).collect(Collectors.joining(appendSeparator)); + results.add(new DissectPair(sameKeyNameList.get(0).getKey(), value)); + } else { + sameKeyNameList.forEach(results::add); + } + } + return results; + } + + private List associate(List parserResult) { + List results = new ArrayList<>(parserResult.size() - 1); + Map> keyNameToDissectPairs = getAssociateMap(parserResult); + for (Map.Entry> entry : keyNameToDissectPairs.entrySet()) { + List sameKeyNameList = entry.getValue(); + assert (sameKeyNameList.size() == 2); + Collections.sort(sameKeyNameList); + //based on the sort the key will always be first and value second. + String key = sameKeyNameList.get(0).getValue(); + String value = sameKeyNameList.get(1).getValue(); + results.add(new DissectPair(new DissectKey(key), value)); + } + //add non associate modifiers to results + results.addAll(parserResult.stream() + .filter(dissectPair -> !ASSOCIATE_MODIFIERS.contains(dissectPair.getKey().getModifier())) + .collect(Collectors.toList())); + return results; + } + + + private Map> getAssociateMap(List dissectPairs) { + return dissectPairs.stream() + .filter(dissectPair -> ASSOCIATE_MODIFIERS.contains(dissectPair.getKey().getModifier())) + .collect(Collectors.groupingBy(KEY_NAME)); + } + + private EnumSet getAllModifiers(Collection keys) { + Set modifiers = keys.stream().map(DissectKey::getModifier).collect(Collectors.toSet()); + return modifiers.isEmpty() ? EnumSet.noneOf(DissectKey.Modifier.class) : EnumSet.copyOf(modifiers); + } +} + + + diff --git a/libs/dissect/src/test/eclipse-build.gradle b/libs/dissect/src/test/eclipse-build.gradle new file mode 100644 index 0000000000000..56d632f23b129 --- /dev/null +++ b/libs/dissect/src/test/eclipse-build.gradle @@ -0,0 +1,7 @@ + +// this is just shell gradle file for eclipse to have separate projects for dissect src and tests +apply from: '../../build.gradle' + +dependencies { + testCompile project(':libs:dissect') +} diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java new file mode 100644 index 0000000000000..6e6dde059d9f0 --- /dev/null +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java @@ -0,0 +1,156 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.dissect; + +import org.elasticsearch.test.ESTestCase; +import org.hamcrest.CoreMatchers; + +import java.util.EnumSet; +import java.util.List; +import java.util.stream.Collectors; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; + +public class DissectKeyTests extends ESTestCase { + + public void testNoModifier() { + String keyName = randomAlphaOfLengthBetween(1, 10); + DissectKey dissectKey = new DissectKey(keyName); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE)); + assertThat(dissectKey.skip(), is(false)); + assertThat(dissectKey.skipRightPadding(), is(false)); + assertThat(dissectKey.getOrderPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + + public void testAppendModifier() { + String keyName = randomAlphaOfLengthBetween(1, 10); + DissectKey dissectKey = new DissectKey("+" + keyName); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.APPEND)); + assertThat(dissectKey.skip(), is(false)); + assertThat(dissectKey.skipRightPadding(), is(false)); + assertThat(dissectKey.getOrderPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + + public void testAppendWithOrderModifier() { + String keyName = randomAlphaOfLengthBetween(1, 10); + int length = randomIntBetween(1, 100); + DissectKey dissectKey = new DissectKey("+" + keyName + "/" + length); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.APPEND_WITH_ORDER)); + assertThat(dissectKey.skip(), is(false)); + assertThat(dissectKey.skipRightPadding(), is(false)); + assertThat(dissectKey.getOrderPosition(), equalTo(length)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + + public void testAppendWithOrderModifierNoName() { + int length = randomIntBetween(1, 100); + DissectException e = expectThrows(DissectException.class, () -> new DissectKey("+/" + length)); + assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key")); + } + + public void testOrderModifierWithoutAppend() { + String keyName = randomAlphaOfLengthBetween(1, 10); + int length = randomIntBetween(1, 100); + DissectException e = expectThrows(DissectException.class, () -> new DissectKey(keyName + "/" + length)); + assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key")); + } + + public void testFieldNameModifier() { + String keyName = randomAlphaOfLengthBetween(1, 10); + DissectKey dissectKey = new DissectKey("?" + keyName); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.FIELD_NAME)); + assertThat(dissectKey.skip(), is(false)); + assertThat(dissectKey.skipRightPadding(), is(false)); + assertThat(dissectKey.getOrderPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + + public void testFieldValueModifiers() { + String keyName = randomAlphaOfLengthBetween(1, 10); + DissectKey dissectKey = new DissectKey("&" + keyName); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.FIELD_VALUE)); + assertThat(dissectKey.skip(), is(false)); + assertThat(dissectKey.skipRightPadding(), is(false)); + assertThat(dissectKey.getOrderPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + + public void testRightPaddingModifiers() { + String keyName = randomAlphaOfLengthBetween(1, 10); + DissectKey dissectKey = new DissectKey(keyName + "->"); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE)); + assertThat(dissectKey.skip(), is(false)); + assertThat(dissectKey.skipRightPadding(), is(true)); + assertThat(dissectKey.getOrderPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + + dissectKey = new DissectKey("?" + keyName + "->"); + assertThat(dissectKey.skipRightPadding(), is(true)); + + dissectKey = new DissectKey("&" + keyName + "->"); + assertThat(dissectKey.skipRightPadding(), is(true)); + + dissectKey = new DissectKey("+" + keyName + "->"); + assertThat(dissectKey.skipRightPadding(), is(true)); + + dissectKey = new DissectKey("+" + keyName + "/2->"); + assertThat(dissectKey.skipRightPadding(), is(true)); + } + + public void testMultipleLeftModifiers() { + String keyName = randomAlphaOfLengthBetween(1, 10); + List validModifiers = EnumSet.allOf(DissectKey.Modifier.class).stream() + .filter(m -> !m.equals(DissectKey.Modifier.NONE)) + .map(DissectKey.Modifier::toString) + .collect(Collectors.toList()); + String modifier1 = randomFrom(validModifiers); + String modifier2 = randomFrom(validModifiers); + DissectException e = expectThrows(DissectException.class, () -> new DissectKey(modifier1 + modifier2 + keyName)); + assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key")); + } + + public void testSkipKey() { + String keyName = ""; + DissectKey dissectKey = new DissectKey(keyName); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE)); + assertThat(dissectKey.skip(), is(true)); + assertThat(dissectKey.skipRightPadding(), is(false)); + assertThat(dissectKey.getOrderPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + public void testSkipKeyWithPadding() { + String keyName = ""; + DissectKey dissectKey = new DissectKey(keyName + "->"); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE)); + assertThat(dissectKey.skip(), is(true)); + assertThat(dissectKey.skipRightPadding(), is(true)); + assertThat(dissectKey.getOrderPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + + public void testInvalidModifiers() { + //should never happen due to regex + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> DissectKey.Modifier.fromString("x")); + assertThat(e.getMessage(), CoreMatchers.containsString("invalid modifier")); + } +} diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectPairTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectPairTests.java new file mode 100644 index 0000000000000..7e2f357f557e5 --- /dev/null +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectPairTests.java @@ -0,0 +1,75 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.dissect; + +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.Matchers.equalTo; + +public class DissectPairTests extends ESTestCase { + + public void testNoModifierSameOrder(){ + String keyName1 = randomAlphaOfLengthBetween(1, 10); + String keyName2 = randomAlphaOfLengthBetween(1, 10); + String value = randomAlphaOfLengthBetween(1, 10); + DissectPair pair1 = new DissectPair(new DissectKey(keyName1), value); + DissectPair pair2 = new DissectPair(new DissectKey(keyName2), value); + assertThat(pair1.compareTo(pair2), equalTo(0)); + assertThat(pair2.compareTo(pair1), equalTo(0)); + } + + public void testAppendDifferentOrder(){ + String keyName = randomAlphaOfLengthBetween(1, 10); + String value = randomAlphaOfLengthBetween(1, 10); + int length = randomIntBetween(1, 100); + DissectPair pair1 = new DissectPair(new DissectKey("+" + keyName + "/" + length), value); + DissectPair pair2 = new DissectPair(new DissectKey("+" + keyName + "/" + length + 1), value); + assertThat(pair1.compareTo(pair2), equalTo(-1)); + assertThat(pair2.compareTo(pair1), equalTo(1)); + } + public void testAppendWithImplicitZeroOrder(){ + String keyName = randomAlphaOfLengthBetween(1, 10); + String value = randomAlphaOfLengthBetween(1, 10); + int length = randomIntBetween(1, 100); + DissectPair pair1 = new DissectPair(new DissectKey("keyName"), value); + DissectPair pair2 = new DissectPair(new DissectKey("+" + keyName + "/" + length), value); + assertThat(pair1.compareTo(pair2), equalTo(-1)); + assertThat(pair2.compareTo(pair1), equalTo(1)); + } + + public void testAppendSameOrder(){ + String keyName = randomAlphaOfLengthBetween(1, 10); + String value = randomAlphaOfLengthBetween(1, 10); + int length = randomIntBetween(1, 100); + DissectPair pair1 = new DissectPair(new DissectKey("+" + keyName + "/" + length), value); + DissectPair pair2 = new DissectPair(new DissectKey("+" + keyName + "/" + length), value); + assertThat(pair1.compareTo(pair2), equalTo(0)); + assertThat(pair2.compareTo(pair1), equalTo(0)); + } + + public void testFieldNameOrder(){ + String keyName = randomAlphaOfLengthBetween(1, 10); + String value = randomAlphaOfLengthBetween(1, 10); + DissectPair pair1 = new DissectPair(new DissectKey("?" + keyName), value); + DissectPair pair2 = new DissectPair(new DissectKey("&" + keyName), value); + assertThat(pair1.compareTo(pair2), equalTo(-1)); + assertThat(pair2.compareTo(pair1), equalTo(1)); + } +} diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java new file mode 100644 index 0000000000000..a2b86a03abba5 --- /dev/null +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java @@ -0,0 +1,312 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.dissect; + +import org.elasticsearch.test.ESTestCase; +import org.hamcrest.CoreMatchers; +import org.hamcrest.Matchers; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import static com.carrotsearch.randomizedtesting.RandomizedTest.randomAsciiAlphanumOfLengthBetween; + +public class DissectParserTests extends ESTestCase { + + public void testJavaDocExamples() { + assertMatch("%{a} %{b},%{c}", "foo bar,baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz")); + assertMiss("%{a},%{b}:%{c}", "foo,bar,baz"); + assertMatch("%{a->} %{b} %{c}", "foo bar baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz")); + assertMatch("%{a} %{+a} %{+a}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobarbaz")); + assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobazbar")); + assertMatch("%{?a} %{b} %{&a}", "foo bar baz", Arrays.asList("foo", "b"), Arrays.asList("baz", "bar")); + assertMatch("%{a} %{} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz")); + assertMatch("%{a},%{b},%{c},%{d}", "foo,,,", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", "")); + } + + /** + * Borrowed from Logstash's test cases: + * https://github.com/logstash-plugins/logstash-filter-dissect/blob/master/src/test/java/org/logstash/dissect/DissectorTest.java + * Append Note - Logstash appends with the delimiter as the separator between values, this uses a user defined separator + */ + public void testLogstashSpecs() { + assertMatch("%{a} %{b->} %{c}", "foo bar baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz")); + assertMiss("%{a}%{b} %{c}", null); + assertMiss("%{a} %{b}%{c} %{d}", "foo bar baz"); + assertMiss("%{a} %{b} %{c}%{d}", "foo bar baz quux"); + assertMatch("%{a} %{b->} %{c}", "foo bar baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz")); + assertMatch("%{a} %{} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz")); + assertMatch("%{a} %{b} %{+b} %{z}", "foo bar baz quux", Arrays.asList("a", "b", "z"), Arrays.asList("foo", "bar baz", "quux"), " "); + assertMatch("%{a}------->%{b}", "foo------->bar baz quux", Arrays.asList("a", "b"), Arrays.asList("foo", "bar baz quux")); + assertMatch("%{a}------->%{}", "foo------->bar baz quux", Arrays.asList("a"), Arrays.asList("foo")); + assertMatch("%{a} » %{b}»%{c}€%{d}", "foo » bar»baz€quux", + Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "bar", "baz", "quux")); + assertMatch("%{a} %{b} %{+a}", "foo bar baz quux", Arrays.asList("a", "b"), Arrays.asList("foo baz quux", "bar"), " "); + //Logstash supports implicit ordering based anchored by the the key without the '+' + //This implementation will only honor implicit ordering for appending right to left else explicit order (/N) is required. + //The results of this test differ from Logstash. + assertMatch("%{+a} %{a} %{+a} %{b}", "December 31 1999 quux", + Arrays.asList("a", "b"), Arrays.asList("December 31 1999", "quux"), " "); + //Same test as above, but with same result as Logstash using explicit ordering in the pattern + assertMatch("%{+a/1} %{a} %{+a/2} %{b}", "December 31 1999 quux", + Arrays.asList("a", "b"), Arrays.asList("31 December 1999", "quux"), " "); + assertMatch("%{+a/2} %{+a/4} %{+a/1} %{+a/3}", "bar quux foo baz", Arrays.asList("a"), Arrays.asList("foo bar baz quux"), " "); + assertMatch("%{+a} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{+a} %{b} %{+a} %{c}", "foo bar baz quux", + Arrays.asList("a", "b", "c"), Arrays.asList("foo baz", "bar", "quux"), " "); + //Logstash allows implicit '?' for association, which allows this dissect patterns without matching '?' and '&' to work. + //For example, "%{k1}=%{&k1}, %{k2}=%{&k2}" will match in Logstash but not here due to the requirement of matching '?' and '&' + //begin: The following tests match in Logstash, but are considered bad patterns here: + assertBadPattern("%{k1}=%{&k1}, %{k2}=%{&k2}"); + assertBadPattern("%{k1}=%{&k3}, %{k2}=%{&k4}"); + assertBadPattern("%{?k1}=%{&k3}, %{?k2}=%{&k4}"); + assertBadPattern("%{&k1}, %{&k2}, %{&k3}"); + //end + assertMatch("%{} %{syslog_timestamp} %{hostname} %{rt}: %{reason} %{+reason} %{src_ip}/%{src_port}->%{dst_ip}/%{dst_port} " + + "%{polrt} %{+polrt} %{+polrt} %{from_zone} %{to_zone} %{rest}", + "42 2016-05-25T14:47:23Z host.name.com RT_FLOW - RT_FLOW_SESSION_DENY: session denied 2.2.2.20/60000->1.1.1.10/8090 None " + + "6(0) DEFAULT-DENY ZONE-UNTRUST ZONE-DMZ UNKNOWN UNKNOWN N/A(N/A) ge-0/0/0.0", + Arrays.asList("syslog_timestamp", "hostname", "rt", "reason", "src_ip", "src_port", "dst_ip", "dst_port", "polrt" + , "from_zone", "to_zone", "rest"), + Arrays.asList("2016-05-25T14:47:23Z", "host.name.com", "RT_FLOW - RT_FLOW_SESSION_DENY", "session denied", "2.2.2.20", "60000" + , "1.1.1.10", "8090", "None 6(0) DEFAULT-DENY", "ZONE-UNTRUST", "ZONE-DMZ", "UNKNOWN UNKNOWN N/A(N/A) ge-0/0/0.0"), " "); + assertBadKey("%{+/2}"); + assertBadKey("%{&+a_field}"); + assertMatch("%{a->} %{b->}---%{c}", "foo bar------------baz", + Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz")); + //Logstash will match "%{?->}-%{a}" to "-----666", however '?' without a corresponding '&' is not allowed here, so the syntax is + //the same minus the '?' as tested below + assertBadKey("%{?->}-%{a}", "?->"); + assertMatch("%{->}-%{a}", "-----666", Arrays.asList("a"), Arrays.asList("666")); + assertMatch("%{a},%{b},%{c},%{d},%{e},%{f}", "111,,333,,555,666", + Arrays.asList("a", "b", "c", "d", "e", "f"), Arrays.asList("111", "", "333", "", "555", "666")); + assertMatch("%{a}.࿏.%{b}", "⟳༒.࿏.༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲")); + assertMatch("%{a}", "子", Arrays.asList("a"), Arrays.asList("子")); + assertMatch("%{a}{\n}%{b}", "aaa{\n}bbb", Arrays.asList("a", "b"), Arrays.asList("aaa", "bbb")); + assertMiss("MACHINE[%{a}] %{b}", "1234567890 MACHINE[foo] bar"); + assertMiss("%{a} %{b} %{c}", "foo:bar:baz"); + assertMatch("/var/%{key1}/log/%{key2}.log", "/var/foo/log/bar.log", Arrays.asList("key1", "key2"), Arrays.asList("foo", "bar")); + assertMatch("%{a->} %{b}-.-%{c}-%{d}-..-%{e}-%{f}-%{g}-%{h}", "foo bar-.-baz-1111-..-22-333-4444-55555", + Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h"), + Arrays.asList("foo", "bar", "baz", "1111", "22", "333", "4444", "55555")); + } + + public void testBasicMatch() { + String valueFirstInput = ""; + String keyFirstPattern = ""; + String delimiterFirstInput = ""; + String delimiterFirstPattern = ""; + //parallel arrays + List expectedKeys = Arrays.asList(generateRandomStringArray(100, 10, false, false)); + List expectedValues = new ArrayList<>(expectedKeys.size()); + for (String key : expectedKeys) { + String value = randomAsciiAlphanumOfLengthBetween(1, 100); + String delimiter = Integer.toString(randomInt()); //int to ensures values and delimiters don't overlap, else validation can fail + keyFirstPattern += "%{" + key + "}" + delimiter; + valueFirstInput += value + delimiter; + delimiterFirstPattern += delimiter + "%{" + key + "}"; + delimiterFirstInput += delimiter + value; + expectedValues.add(value); + } + assertMatch(keyFirstPattern, valueFirstInput, expectedKeys, expectedValues); + assertMatch(delimiterFirstPattern, delimiterFirstInput, expectedKeys, expectedValues); + } + + public void testBasicMatchUnicode() { + String valueFirstInput = ""; + String keyFirstPattern = ""; + String delimiterFirstInput = ""; + String delimiterFirstPattern = ""; + //parallel arrays + List expectedKeys = new ArrayList<>(); + List expectedValues = new ArrayList<>(); + for (int i = 0; i< randomIntBetween(1,100);i++) { + String key = randomAsciiAlphanumOfLengthBetween(1,100); + String value = randomRealisticUnicodeOfCodepointLengthBetween(1, 100); + String delimiter = Integer.toString(randomInt()); //int to ensures values and delimiters don't overlap, else validation can fail + keyFirstPattern += "%{" + key + "}" + delimiter; + valueFirstInput += value + delimiter; + delimiterFirstPattern += delimiter + "%{" + key + "}"; + delimiterFirstInput += delimiter + value; + expectedKeys.add(key); + expectedValues.add(value); + } + assertMatch(keyFirstPattern, valueFirstInput, expectedKeys, expectedValues); + assertMatch(delimiterFirstPattern, delimiterFirstInput, expectedKeys, expectedValues); + } + + public void testMatchUnicode() { + assertMatch("%{a} %{b}", "foo 子", Arrays.asList("a", "b"), Arrays.asList("foo", "子")); + assertMatch("%{a}࿏%{b} %{c}", "⟳༒࿏༒⟲ 子", Arrays.asList("a", "b", "c"), Arrays.asList("⟳༒", "༒⟲", "子")); + assertMatch("%{a}࿏%{+a} %{+a}", "⟳༒࿏༒⟲ 子", Arrays.asList("a"), Arrays.asList("⟳༒༒⟲子")); + assertMatch("%{a}࿏%{+a/2} %{+a/1}", "⟳༒࿏༒⟲ 子", Arrays.asList("a"), Arrays.asList("⟳༒子༒⟲")); + assertMatch("%{a->}࿏%{b}", "⟳༒࿏࿏࿏࿏࿏༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲")); + assertMatch("%{?a}࿏%{&a}", "⟳༒࿏༒⟲", Arrays.asList("⟳༒"), Arrays.asList("༒⟲")); + assertMatch("%{}࿏%{a}", "⟳༒࿏༒⟲", Arrays.asList("a"), Arrays.asList("༒⟲")); + } + + public void testMatchRemainder() { + assertMatch("%{a}", "foo bar the rest", Arrays.asList("a"), Arrays.asList("foo bar the rest")); + assertMatch("%{a} %{b}", "foo bar the rest", Arrays.asList("a", "b"), Arrays.asList("foo", "bar the rest")); + assertMatch("%{} %{b}", "foo bar the rest", Arrays.asList("b"), Arrays.asList("bar the rest")); + assertMatch("%{a} %{b->}", "foo bar the rest", Arrays.asList("a", "b"), Arrays.asList("foo", "bar the rest")); + assertMatch("%{?a} %{&a}", "foo bar the rest", Arrays.asList("foo"), Arrays.asList("bar the rest")); + assertMatch("%{a} %{+a}", "foo bar the rest", Arrays.asList("a"), Arrays.asList("foo bar the rest"), " "); + } + + public void testAppend() { + assertMatch("%{a} %{+a} %{b} %{+b}", "foo bar baz lol", Arrays.asList("a", "b"), Arrays.asList("foobar", "bazlol")); + assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobazbar")); + assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foo baz bar"), " "); + } + + public void testAssociate() { + assertMatch("%{?a} %{&a}", "foo bar", Arrays.asList("foo"), Arrays.asList("bar")); + assertMatch("%{&a} %{?a}", "foo bar", Arrays.asList("bar"), Arrays.asList("foo")); + assertMatch("%{?a} %{&a} %{?b} %{&b}", "foo bar baz lol", Arrays.asList("foo", "baz"), Arrays.asList("bar", "lol")); + assertMatch("%{?a} %{&a} %{c} %{?b} %{&b}", "foo bar x baz lol", + Arrays.asList("foo", "baz", "c"), Arrays.asList("bar", "lol", "x")); + assertBadPattern("%{?a} %{a}"); + assertBadPattern("%{a} %{&a}"); + assertMiss("%{?a} %{&a} {a} %{?b} %{&b}", "foo bar x baz lol"); + } + + public void testAppendAndAssociate() { + assertMatch("%{a} %{+a} %{?b} %{&b}", "foo bar baz lol", Arrays.asList("a", "baz"), Arrays.asList("foobar", "lol")); + assertMatch("%{a->} %{+a/2} %{+a/1} %{?b} %{&b}", "foo bar baz lol x", + Arrays.asList("a", "lol"), Arrays.asList("foobazbar", "x")); + } + + public void testEmptyKey() { + assertMatch("%{} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar")); + assertMatch("%{a} %{}", "foo bar", Arrays.asList("a"), Arrays.asList("foo")); + assertMatch("%{->} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar")); + assertMatch("%{->} %{b}", " bar", Arrays.asList("b"), Arrays.asList("bar")); + assertMatch("%{a} %{->}", "foo bar ", Arrays.asList("a"), Arrays.asList("foo")); + } + + public void testConsecutiveDelimiters() { + //leading + assertMatch("%{->},%{a}", ",,,,,foo", Arrays.asList("a"), Arrays.asList("foo")); + assertMatch("%{a->},%{b}", ",,,,,foo", Arrays.asList("a", "b"), Arrays.asList("", "foo")); + //trailing + assertMatch("%{a->},", "foo,,,,,", Arrays.asList("a"), Arrays.asList("foo")); + assertMatch("%{a} %{b},", "foo bar,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{a} %{b->},", "foo bar,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + //middle + assertMatch("%{a->},%{b}", "foo,,,,,bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{a->} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{a->}x%{b}", "fooxxxxxbar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{a->} xyz%{b}", "foo xyz xyz xyz xyz xyzbar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + //skipped with empty values + assertMatch("%{a},%{b},%{c},%{d}", "foo,,,", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", "")); + assertMatch("%{a},%{b},%{c},%{d}", "foo,,bar,baz", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "bar", "baz")); + assertMatch("%{a},%{b},%{c},%{d}", "foo,,,baz", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", "baz")); + assertMatch("%{a},%{b},%{c},%{d}", ",bar,,baz", Arrays.asList("a", "b", "c", "d"), Arrays.asList("", "bar", "", "baz")); + assertMatch("%{->},%{a->},%{b}", ",,,bar,,baz", Arrays.asList("a", "b"), Arrays.asList("bar", "baz")); + } + + public void testAppendWithConsecutiveDelimiters() { + assertMatch("%{+a/1},%{+a/3}-%{+a/2} %{b}", "foo,bar----baz lol", Arrays.asList("a", "b"), Arrays.asList("foobar", "")); + assertMatch("%{+a/1},%{+a/3->}-%{+a/2} %{b}", "foo,bar----baz lol", Arrays.asList("a", "b"), Arrays.asList("foobazbar", "lol")); + } + + public void testSkipRightPadding() { + assertMatch("%{a->} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{a->} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{->} %{a}", "foo bar", Arrays.asList("a"), Arrays.asList("bar")); + assertMatch("%{a->} %{+a->} %{?b->} %{&b->} %{c}", "foo bar baz lol x", + Arrays.asList("a", "baz", "c"), Arrays.asList("foobar", "lol", "x")); + } + + public void testTrimmedEnd() { + assertMatch("%{a} %{b}", "foo bar ", Arrays.asList("a", "b"), Arrays.asList("foo", "bar ")); + assertMatch("%{a} %{b->}", "foo bar ", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + //only whitespace is trimmed in the absence of trailing characters + assertMatch("%{a} %{b->}", "foo bar,,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar,,,,,,")); + //consecutive delimiters + right padding can be used to skip over the trailing delimiters + assertMatch("%{a} %{b->},", "foo bar,,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + } + + /** + * Runtime errors + */ + public void testMiss() { + assertMiss("%{a}%{b}", "foo"); + assertMiss("%{a},%{b}", "foo bar"); + assertMiss("%{a}, %{b}", "foo,bar"); + assertMiss("x%{a},%{b}", "foo,bar"); + assertMiss("x%{},%{b}", "foo,bar"); + } + + /** + * Construction errors + */ + public void testBadPatternOrKey(){ + assertBadPattern(""); + assertBadPattern("{}"); + assertBadPattern("%{?a} %{&b}"); + assertBadKey("%{?}"); + assertBadKey("%{++}"); + } + + private void assertMiss(String pattern, String input) { + DissectException e = expectThrows(DissectException.class, () -> new DissectParser(pattern, null).parse(input)); + assertThat(e.getMessage(), CoreMatchers.containsString("Unable to find match for dissect pattern")); + assertThat(e.getMessage(), CoreMatchers.containsString(pattern)); + assertThat(e.getMessage(), input == null ? CoreMatchers.containsString("null") : CoreMatchers.containsString(input)); + + } + + private void assertBadPattern(String pattern) { + DissectException e = expectThrows(DissectException.class, () -> new DissectParser(pattern, null)); + assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse pattern")); + assertThat(e.getMessage(), CoreMatchers.containsString(pattern)); + } + + private void assertBadKey(String pattern, String key) { + DissectException e = expectThrows(DissectException.class, () -> new DissectParser(pattern, null)); + assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key")); + assertThat(e.getMessage(), CoreMatchers.containsString(key)); + } + + private void assertBadKey(String pattern) { + assertBadKey(pattern, pattern.replace("%{", "").replace("}", "")); + } + + private void assertMatch(String pattern, String input, List expectedKeys, List expectedValues) { + assertMatch(pattern, input, expectedKeys, expectedValues, null); + } + + private void assertMatch(String pattern, String input, List expectedKeys, List expectedValues, String appendSeperator) { + List dissectPairs = new DissectParser(pattern, appendSeperator).parse(input); + List foundKeys = dissectPairs.stream().map(d -> d.getKey().getName()).collect(Collectors.toList()); + List foundValues = dissectPairs.stream().map(DissectPair::getValue).collect(Collectors.toList()); + Collections.sort(foundKeys); + Collections.sort(foundValues); + Collections.sort(expectedKeys); + Collections.sort(expectedValues); + assertThat(foundKeys, Matchers.equalTo(expectedKeys)); + assertThat(foundValues, Matchers.equalTo(expectedValues)); + } +} From 79c884384113a2c124d15a00514a1b4c29b03e7d Mon Sep 17 00:00:00 2001 From: Jake Landis Date: Mon, 23 Jul 2018 14:55:29 -0500 Subject: [PATCH 2/9] minor cosmetic fixes --- libs/dissect/src/main/eclipse-build.gradle | 2 +- .../java/org/elasticsearch/dissect/DissectKey.java | 11 +++++------ .../java/org/elasticsearch/dissect/DissectParser.java | 2 -- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/libs/dissect/src/main/eclipse-build.gradle b/libs/dissect/src/main/eclipse-build.gradle index 3188c7aff01f7..c2b72bd21e1f1 100644 --- a/libs/dissect/src/main/eclipse-build.gradle +++ b/libs/dissect/src/main/eclipse-build.gradle @@ -1,3 +1,3 @@ -// this is just shell gradle file for eclipse to have separate projects for grok src and tests +// this is just shell gradle file for eclipse to have separate projects for dissect src and tests apply from: '../../build.gradle' diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java index 2772466f70aa1..7bb3bdaae7c8f 100644 --- a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java @@ -25,13 +25,14 @@ /** *

A Key of a dissect pattern. This class models the name and modifiers and provides some validation.

- *

For dissect pattern of {@code %{a->} %{b}} the dissect keys are: + *

For dissect pattern of {@code %{a} %{+a} %{b}} the dissect keys are: *

    - *
  • {@code a->}
  • + *
  • {@code a}
  • + *
  • {@code +a}
  • *
  • {@code b}
  • *
* This class represents a single key. - *

A single key is composed of a name and it's modifiers. For the key {@code a->}, {@code a} is the name and {@code ->} is the modifier. + *

A single key is composed of a name and it's modifiers. For the key {@code +a}, {@code a} is the name and {@code +} is the modifier. */ public final class DissectKey { private static final Pattern LEFT_MODIFIER_PATTERN = Pattern.compile("([+?&])(.*?)(->)?$", Pattern.DOTALL); @@ -46,7 +47,7 @@ public final class DissectKey { /** * Constructor - parses the String key into it's name and modifier(s) * - * @param key The key without the leading %{ or trailing }, for example a-> + * @param key The key without the leading %{ or trailing }, for example {@code a->} */ DissectKey(String key) { skip = key == null || key.isEmpty(); @@ -89,7 +90,6 @@ public final class DissectKey { skipRightPadding = matcher.group(4) != null; } break; - } if (name == null || (name.isEmpty() && !skip)) { @@ -128,7 +128,6 @@ public String toString() { '}'; } - public enum Modifier { NONE(""), APPEND_WITH_ORDER("/"), APPEND("+"), FIELD_NAME("?"), FIELD_VALUE("&"); diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java index be07cea35132f..eee8c3c8ffec6 100644 --- a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java @@ -257,7 +257,6 @@ public List parse(String inputString) { return postProcess(results.stream().filter(dissectPair -> !dissectPair.getKey().skip()).collect(Collectors.toList())); } - /** * Verify that each key has a entry in the result, don't rely only on size since some error cases would result in false positives */ @@ -272,7 +271,6 @@ private boolean isValid(List results) { return valid; } - private List postProcess(List results) { if (needsPostParsing) { if (allModifiers.contains(DissectKey.Modifier.APPEND) || allModifiers.contains(DissectKey.Modifier.APPEND_WITH_ORDER)) { From 923ce5d1b831d8b058a5fe0c2fc50cdde1a416d8 Mon Sep 17 00:00:00 2001 From: Jake Landis Date: Mon, 23 Jul 2018 16:12:15 -0500 Subject: [PATCH 3/9] Rework the validation and post processing for better performance --- .../org/elasticsearch/dissect/DissectKey.java | 28 ++- .../elasticsearch/dissect/DissectMatch.java | 198 +++++++++++++++++ .../elasticsearch/dissect/DissectPair.java | 82 ------- .../elasticsearch/dissect/DissectParser.java | 209 +++++++----------- .../dissect/DissectKeyTests.java | 16 +- .../dissect/DissectMatchTests.java | 93 ++++++++ .../dissect/DissectPairTests.java | 75 ------- .../dissect/DissectParserTests.java | 16 +- 8 files changed, 417 insertions(+), 300 deletions(-) create mode 100644 libs/dissect/src/main/java/org/elasticsearch/dissect/DissectMatch.java delete mode 100644 libs/dissect/src/main/java/org/elasticsearch/dissect/DissectPair.java create mode 100644 libs/dissect/src/test/java/org/elasticsearch/dissect/DissectMatchTests.java delete mode 100644 libs/dissect/src/test/java/org/elasticsearch/dissect/DissectPairTests.java diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java index 7bb3bdaae7c8f..4ea2b9ebff4da 100644 --- a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java @@ -33,6 +33,7 @@ * * This class represents a single key. *

A single key is composed of a name and it's modifiers. For the key {@code +a}, {@code a} is the name and {@code +} is the modifier. + * @see DissectParser */ public final class DissectKey { private static final Pattern LEFT_MODIFIER_PATTERN = Pattern.compile("([+?&])(.*?)(->)?$", Pattern.DOTALL); @@ -41,7 +42,7 @@ public final class DissectKey { private final Modifier modifier; private boolean skip; private boolean skipRightPadding; - private int orderPosition; + private int appendPosition; private String name; /** @@ -86,7 +87,7 @@ public final class DissectKey { matcher = APPEND_WITH_ORDER_PATTERN.matcher(key); while (matcher.find()) { name = matcher.group(1); - orderPosition = Short.valueOf(matcher.group(3)); + appendPosition = Short.valueOf(matcher.group(3)); skipRightPadding = matcher.group(4) != null; } break; @@ -97,6 +98,19 @@ public final class DissectKey { } } + /** + * Copy constructor to explicitly override the modifier. + * @param key The key to copy (except for the modifier) + * @param modifier the modifer to use for this copy + */ + DissectKey(DissectKey key, DissectKey.Modifier modifier){ + this.modifier = modifier; + this.skipRightPadding = key.skipRightPadding; + this.skip = key.skip; + this.name = key.name; + this.appendPosition = key.appendPosition; + } + Modifier getModifier() { return modifier; } @@ -105,15 +119,15 @@ boolean skip() { return skip; } - public boolean skipRightPadding() { + boolean skipRightPadding() { return skipRightPadding; } - int getOrderPosition() { - return orderPosition; + int getAppendPosition() { + return appendPosition; } - public String getName() { + String getName() { return name; } @@ -123,7 +137,7 @@ public String toString() { return "DissectKey{" + "modifier=" + modifier + ", skip=" + skip + - ", orderPosition=" + orderPosition + + ", appendPosition=" + appendPosition + ", name='" + name + '\'' + '}'; } diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectMatch.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectMatch.java new file mode 100644 index 0000000000000..451f69e51dd45 --- /dev/null +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectMatch.java @@ -0,0 +1,198 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.dissect; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * Represents the matches of a {@link DissectParser#parse(String)}. Handles the appending and referencing based on the key instruction. + */ +final class DissectMatch { + + private final String appendSeparator; + private final Map results; + private final Map simpleResults; + private final Map referenceResults; + private final Map appendResults; + private int implicitAppendOrder = -1000; + private final int maxMatches; + private final int maxResults; + private final int appendCount; + private final int referenceCount; + private final int simpleCount; + private int matches = 0; + + DissectMatch(String appendSeparator, int maxMatches, int maxResults, int appendCount, int referenceCount) { + if (maxMatches <= 0 || maxResults <= 0) { + throw new IllegalArgumentException("Expected results are zero, can not construct DissectMatch");//should never happen + } + this.maxMatches = maxMatches; + this.maxResults = maxResults; + this.appendCount = appendCount; + this.referenceCount = referenceCount; + this.appendSeparator = appendSeparator; + results = new HashMap<>(maxResults); + this.simpleCount = maxMatches - referenceCount - appendCount; + simpleResults = simpleCount <= 0 ? null : new HashMap<>(simpleCount); + referenceResults = referenceCount <= 0 ? null : new HashMap<>(referenceCount); + appendResults = appendCount <= 0 ? null : new HashMap<>(appendCount); + } + + /** + * Add the key/value that was found as result of the parsing + * @param key the {@link DissectKey} + * @param value the discovered value for the key + */ + void add(DissectKey key, String value) { + matches++; + if (key.skip()) { + return; + } + switch (key.getModifier()) { + case NONE: + simpleResults.put(key.getName(), value); + break; + case APPEND: + appendResults.computeIfAbsent(key.getName(), k -> new AppendResult(appendSeparator)).addValue(value, implicitAppendOrder++); + break; + case APPEND_WITH_ORDER: + appendResults.computeIfAbsent(key.getName(), + k -> new AppendResult(appendSeparator)).addValue(value, key.getAppendPosition()); + break; + case FIELD_NAME: + referenceResults.computeIfAbsent(key.getName(), k -> new ReferenceResult()).setKey(value); + break; + case FIELD_VALUE: + referenceResults.computeIfAbsent(key.getName(), k -> new ReferenceResult()).setValue(value); + break; + } + } + + boolean fullyMatched() { + return matches == maxMatches; + } + + /** + * Checks if results are valid. + * @param results the results to check + * @return true if all dissect keys have been matched and the results are of the expected size. + */ + boolean isValid(Map results) { + return fullyMatched() && results.size() == maxResults; + } + + /** + * Gets all the current matches. Pass the results of this to isValid to determine if a fully successful match has occured. + * + * @return the map of the results. + */ + Map getResults() { + results.clear(); + if (simpleCount > 0) { + results.putAll(simpleResults); + } + if (referenceCount > 0) { + referenceResults.forEach((k, v) -> results.put(v.getKey(), v.getValue())); + } + if (appendCount > 0) { + appendResults.forEach((k, v) -> results.put(k, v.getAppendResult())); + } + + return results; + } + + /** + * a result that will need to be part of an append operation. + */ + final class AppendResult { + private final List values = new ArrayList<>(); + private final String appendSeparator; + + AppendResult(String appendSeparator) { + this.appendSeparator = appendSeparator; + } + + void addValue(String value, int order) { + values.add(new AppendValue(value, order)); + } + + String getAppendResult() { + Collections.sort(values); + return values.stream().map(AppendValue::getValue).collect(Collectors.joining(appendSeparator)); + } + } + + /** + * An appendable value that can be sorted based on the provided order + */ + final class AppendValue implements Comparable { + private final String value; + private final int order; + + AppendValue(String value, int order) { + this.value = value; + this.order = order; + } + + String getValue() { + return value; + } + + int getOrder() { + return order; + } + + @Override + public int compareTo(AppendValue o) { + return Integer.compare(this.order, o.getOrder()); + } + } + + /** + * A result that needs to be converted to a key/value reference + */ + final class ReferenceResult { + + private String key; + + String getKey() { + return key; + } + + String getValue() { + return value; + } + + private String value; + + void setValue(String value) { + this.value = value; + } + + void setKey(String key) { + this.key = key; + } + } +} diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectPair.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectPair.java deleted file mode 100644 index 6b2d9ea7e7f85..0000000000000 --- a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectPair.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.dissect; - -import java.util.Objects; - -/** - * A tuple class holds a {@link DissectKey} and an associated String. The value associated with the key may have different meanings - * based on the context. For example, the associated value may be the delimiter associated with the key or the parsed value associated with - * the key. - */ -public final class DissectPair implements Comparable { - - private final DissectKey key; - private final String value; - - DissectPair(DissectKey key, String value) { - this.key = key; - this.value = value; - } - - public DissectKey getKey() { - return key; - } - - public String getValue() { - return value; - } - - //generated - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - DissectPair that = (DissectPair) o; - return Objects.equals(key, that.key) && - Objects.equals(value, that.value); - } - - //generated - @Override - public int hashCode() { - return Objects.hash(key, value); - } - - //generated - @Override - public String toString() { - return "DissectPair{" + - "key=" + key + - ", value='" + value + '\'' + - '}'; - } - - @Override - public int compareTo(DissectPair o) { - if(this.getKey().getModifier().equals(DissectKey.Modifier.FIELD_NAME)){ - return -1; - } - if(this.getKey().getModifier().equals(DissectKey.Modifier.FIELD_VALUE)){ - return 1; - } - return Integer.compare(this.getKey().getOrderPosition(), o.getKey().getOrderPosition()); - } -} diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java index eee8c3c8ffec6..54d21a30380f8 100644 --- a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java @@ -22,7 +22,6 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.EnumSet; import java.util.Iterator; @@ -40,9 +39,9 @@ * match a string of the form:

foo bar,baz
and will result a key/value pairing of
a=foo, b=bar, and c=baz.
*

Matches are all or nothing. For example, the same pattern will NOT match

foo bar baz
since all of the delimiters did not * match. (the comma did not match) - *

Dissect patterns can optionally have allModifiers. These allModifiers instruct the parser to change it's behavior. For example the + *

Dissect patterns can optionally have modifiers. These modifiers instruct the parser to change it's behavior. For example the * dissect pattern of

%{a},%{b}:%{c}
would not match
foo,bar,baz
since there the colon never matches. - *

Modifiers appear to the left or the right of the key name. The supported allModifiers are: + *

Modifiers appear to the left or the right of the key name. The supported modifiers are: *

    *
  • {@code ->} Instructs the parser to ignore repeating delimiters to the right of the key. Example:
      * pattern: {@code %{a->} %{b} %{c}}
    @@ -88,11 +87,6 @@
     public final class DissectParser {
         private static final Pattern LEADING_DELIMITER_PATTERN = Pattern.compile("^(.*?)%");
         private static final Pattern KEY_DELIMITER_FIELD_PATTERN = Pattern.compile("%\\{([^}]*?)}([^%]*)", Pattern.DOTALL);
    -    private static final EnumSet POST_PROCESSING_MODIFIERS = EnumSet.of(
    -        DissectKey.Modifier.APPEND_WITH_ORDER,
    -        DissectKey.Modifier.APPEND,
    -        DissectKey.Modifier.FIELD_NAME,
    -        DissectKey.Modifier.FIELD_VALUE);
         private static final EnumSet ASSOCIATE_MODIFIERS = EnumSet.of(
             DissectKey.Modifier.FIELD_NAME,
             DissectKey.Modifier.FIELD_VALUE);
    @@ -101,11 +95,13 @@ public final class DissectParser {
             DissectKey.Modifier.APPEND_WITH_ORDER);
         private static final Function KEY_NAME = val -> val.getKey().getName();
         private final List matchPairs;
    -    private final boolean needsPostParsing;
    -    private final EnumSet allModifiers;
    -    private final String appendSeparator;
         private final String pattern;
         private String leadingDelimiter = "";
    +    private final int maxMatches;
    +    private final int maxResults;
    +    private final int appendCount;
    +    private final int referenceCount;
    +    private final String appendSeparator;
     
         public DissectParser(String pattern, String appendSeparator) {
             this.pattern = pattern;
    @@ -124,65 +120,83 @@ public DissectParser(String pattern, String appendSeparator) {
             if (matchPairs.isEmpty()) {
                 throw new DissectException.PatternParse(pattern, "Unable to find any keys or delimiters.");
             }
    +        this.maxMatches = matchPairs.size();
    +        this.maxResults = Long.valueOf(matchPairs.stream()
    +            .filter(dissectPair -> !dissectPair.getKey().skip()).map(KEY_NAME).distinct().count()).intValue();
     
    -        List keys = matchPairs.stream().map(DissectPair::getKey).collect(Collectors.toList());
    -        this.allModifiers = getAllModifiers(keys);
    -
    -        if (allModifiers.contains(DissectKey.Modifier.FIELD_NAME) || allModifiers.contains(DissectKey.Modifier.FIELD_VALUE)) {
    -            Map> keyNameToDissectPairs = getAssociateMap(matchPairs);
    -            for (Map.Entry> entry : keyNameToDissectPairs.entrySet()) {
    -                List sameKeyNameList = entry.getValue();
    -                if (sameKeyNameList.size() != 2) {
    -                    throw new DissectException.PatternParse(pattern, "Found invalid key/reference associations: '"
    -                        + sameKeyNameList.stream().map(KEY_NAME).collect(Collectors.joining(",")) +
    -                        "' Please ensure each '?' is matched with a matching '&");
    +        //append validation - look through all of the keys to see if there are any keys that need to participate in an append operation
    +        // but don't have the '+' defined
    +        Set appendKeyNames = matchPairs.stream()
    +            .filter(dissectPair -> APPEND_MODIFIERS.contains(dissectPair.getKey().getModifier()))
    +            .map(KEY_NAME).distinct().collect(Collectors.toSet());
    +        if (appendKeyNames.size() > 0) {
    +            List modifiedMatchPairs = new ArrayList<>(matchPairs.size());
    +            for (DissectPair p : matchPairs) {
    +                if (p.getKey().getModifier().equals(DissectKey.Modifier.NONE) && appendKeyNames.contains(p.getKey().getName())) {
    +                    modifiedMatchPairs.add(new DissectPair(new DissectKey(p.getKey(), DissectKey.Modifier.APPEND), p.getDelimiter()));
    +                } else {
    +                    modifiedMatchPairs.add(p);
                     }
                 }
    +            matchPairs = modifiedMatchPairs;
             }
    -        needsPostParsing = POST_PROCESSING_MODIFIERS.stream().anyMatch(allModifiers::contains);
    +        appendCount = appendKeyNames.size();
    +
    +        //reference validation - ensure that '?' and '&' come in pairs
    +        Map> referenceGroupings = matchPairs.stream()
    +            .filter(dissectPair -> ASSOCIATE_MODIFIERS.contains(dissectPair.getKey().getModifier()))
    +            .collect(Collectors.groupingBy(KEY_NAME));
    +        for (Map.Entry> entry : referenceGroupings.entrySet()) {
    +            if (entry.getValue().size() != 2) {
    +                throw new DissectException.PatternParse(pattern, "Found invalid key/reference associations: '"
    +                    + entry.getValue().stream().map(KEY_NAME).collect(Collectors.joining(",")) +
    +                    "' Please ensure each '?' is matched with a matching '&");
    +            }
    +        }
    +
    +        referenceCount = referenceGroupings.size() * 2;
             this.matchPairs = Collections.unmodifiableList(matchPairs);
         }
     
    +
         /**
          * 

    Entry point to dissect a string into it's parts.

    - *

    - * This implements a naive string matching algorithm. The string is walked left to right, comparing each byte against - * another string's bytes looking for matches. If the bytes match, then a second cursor looks ahead to see if all the bytes - * of the other string matches. If they all match, record it and advances the primary cursor to the match point. If it can not match - * all of the bytes then progress the main cursor. Repeat till the end of the input string. Since the string being searching for - * (the delimiter) is generally small and rare the naive approach is efficient. - *

    - * In this case the the string that is walked is the input string, and the string being searched for is the current delimiter. - * For example for a dissect pattern of {@code %{a},%{b}:%{c}} the delimiters (comma then colon) are searched for in the - * input string. At class construction the list of keys+delimiters are found, which allows the use of that list to know which delimiter - * to use for the search. That list of delimiters is progressed once the current delimiter is matched. - *

    - * There are two special cases that requires additional parsing beyond the standard naive algorithm. Consecutive delimiters should - * results in a empty matches unless the {@code ->} is provided. For example given the dissect pattern of - * {@code %{a},%{b},%{c},%{d}} and input string of {@code foo,,,} the match should be successful with empty values for b,c and d. - * However, if the key modifier {@code ->}, is present it will simply skip over any delimiters just to the right of the key - * without assigning any values. - *

    - * Once the full string is parsed, it is validated that each key has a corresponding value and sent off for post processing. - * Key allModifiers may instruct the parsing to perform operations where the entire results set is needed. Post processing is used to - * obey those instructions and in doing it post parsing, helps to keep the string parsing logic simple. - * All post processing will occur before this method returns. - *

    * * @param inputString The string to dissect - * @return a List of {@link DissectPair}s that have the matched key/value pairs that results from the parse. + * @return TODO: * @throws DissectException if unable to dissect a pair into it's parts. */ - public List parse(String inputString) { + public Map parse(String inputString) { + /** + * + * This implements a naive string matching algorithm. The string is walked left to right, comparing each byte against + * another string's bytes looking for matches. If the bytes match, then a second cursor looks ahead to see if all the bytes + * of the other string matches. If they all match, record it and advances the primary cursor to the match point. If it can not match + * all of the bytes then progress the main cursor. Repeat till the end of the input string. Since the string being searching for + * (the delimiter) is generally small and rare the naive approach is efficient. + * + * In this case the the string that is walked is the input string, and the string being searched for is the current delimiter. + * For example for a dissect pattern of {@code %{a},%{b}:%{c}} the delimiters (comma then colon) are searched for in the + * input string. At class construction the list of keys+delimiters are found (dissectPairs), which allows the use of that ordered + * list to know which delimiter to use for the search. The delimiters is progressed once the current delimiter is matched. + * + * There are two special cases that requires additional parsing beyond the standard naive algorithm. Consecutive delimiters should + * results in a empty matches unless the {@code ->} is provided. For example given the dissect pattern of + * {@code %{a},%{b},%{c},%{d}} and input string of {@code foo,,,} the match should be successful with empty values for b,c and d. + * However, if the key modifier {@code ->}, is present it will simply skip over any delimiters just to the right of the key + * without assigning any values. For example {@code %{a->},{%b}} will match the input string of {@code foo,,,,,,bar} with a=foo and + * b=bar. + * + */ + DissectMatch dissectMatch = new DissectMatch(appendSeparator, maxMatches, maxResults, appendCount, referenceCount); Iterator it = matchPairs.iterator(); - List results = new ArrayList<>(); //ensure leading delimiter matches if (inputString != null && leadingDelimiter.equals(inputString.substring(0, leadingDelimiter.length()))) { byte[] input = inputString.getBytes(StandardCharsets.UTF_8); //grab the first key/delimiter pair DissectPair dissectPair = it.next(); DissectKey key = dissectPair.getKey(); - byte[] delimiter = dissectPair.getValue().getBytes(StandardCharsets.UTF_8); + byte[] delimiter = dissectPair.getDelimiter().getBytes(StandardCharsets.UTF_8); //start dissection after the first delimiter int i = leadingDelimiter.length(); int valueStart = i; @@ -203,7 +217,7 @@ public List parse(String inputString) { if (lookAheadMatches == delimiter.length) { //record the key/value tuple byte[] value = Arrays.copyOfRange(input, valueStart, i); - results.add(new DissectPair(key, new String(value, StandardCharsets.UTF_8))); + dissectMatch.add(key, new String(value, StandardCharsets.UTF_8)); //jump to the end of the match i += lookAheadMatches; //look for consecutive delimiters (e.g. a,,,,d,e) @@ -226,112 +240,61 @@ public List parse(String inputString) { dissectPair = it.next(); key = dissectPair.getKey(); //add the key with an empty value for the empty delimiter - results.add(new DissectPair(key, "")); + dissectMatch.add(key, ""); } } else { - break; + break; //the while loop } } //progress the keys/delimiter if possible if (!it.hasNext()) { - break; //the while loop + break; //the for loop } dissectPair = it.next(); key = dissectPair.getKey(); - delimiter = dissectPair.getValue().getBytes(StandardCharsets.UTF_8); + delimiter = dissectPair.getDelimiter().getBytes(StandardCharsets.UTF_8); //i is always one byte after the last found delimiter, aka the start of the next value valueStart = i; } } } //the last key, grab the rest of the input (unless consecutive delimiters already grabbed the last key) - if (results.size() < matchPairs.size()) { + if (!dissectMatch.fullyMatched()) { byte[] value = Arrays.copyOfRange(input, valueStart, input.length); String valueString = new String(value, StandardCharsets.UTF_8); - results.add(new DissectPair(key, key.skipRightPadding() ? valueString.replaceFirst("\\s++$", "") : valueString)); + dissectMatch.add(key, (key.skipRightPadding() ? valueString.replaceFirst("\\s++$", "") : valueString)); } } - if (!isValid(results)) { + Map results = dissectMatch.getResults(); + if (!dissectMatch.isValid(results)) { throw new DissectException.FindMatch(pattern, inputString); } - return postProcess(results.stream().filter(dissectPair -> !dissectPair.getKey().skip()).collect(Collectors.toList())); + return results; } /** - * Verify that each key has a entry in the result, don't rely only on size since some error cases would result in false positives + * A tuple class to hold the dissect key and delimiter */ - private boolean isValid(List results) { - boolean valid = false; - if (results.size() == matchPairs.size()) { - Set resultKeys = results.stream().map(DissectPair::getKey).collect(Collectors.toSet()); - Set sourceKeys = matchPairs.stream().map(DissectPair::getKey).collect(Collectors.toSet()); - long intersectionCount = resultKeys.stream().filter(sourceKeys::contains).count(); - valid = intersectionCount == results.size(); - } - return valid; - } + class DissectPair { - private List postProcess(List results) { - if (needsPostParsing) { - if (allModifiers.contains(DissectKey.Modifier.APPEND) || allModifiers.contains(DissectKey.Modifier.APPEND_WITH_ORDER)) { - results = append(results); - } - if (allModifiers.contains(DissectKey.Modifier.FIELD_NAME)) { //FIELD_VALUE is guaranteed to also be present - results = associate(results); - } - } - return results; - } + private final DissectKey key; + private final String delimiter; - private List append(List parserResult) { - List results = new ArrayList<>(parserResult.size() - 1); - Map> keyNameToDissectPairs = parserResult.stream().collect(Collectors.groupingBy(KEY_NAME)); - for (Map.Entry> entry : keyNameToDissectPairs.entrySet()) { - List sameKeyNameList = entry.getValue(); - long appendCount = sameKeyNameList.stream() - .filter(dissectPair -> APPEND_MODIFIERS.contains(dissectPair.getKey().getModifier())).count(); - // grouped by key name may not include append modifiers, for example associate pairs...don't - if (appendCount > 0) { - Collections.sort(sameKeyNameList); - String value = sameKeyNameList.stream().map(DissectPair::getValue).collect(Collectors.joining(appendSeparator)); - results.add(new DissectPair(sameKeyNameList.get(0).getKey(), value)); - } else { - sameKeyNameList.forEach(results::add); - } + DissectPair(DissectKey key, String delimiter) { + this.key = key; + this.delimiter = delimiter; } - return results; - } - private List associate(List parserResult) { - List results = new ArrayList<>(parserResult.size() - 1); - Map> keyNameToDissectPairs = getAssociateMap(parserResult); - for (Map.Entry> entry : keyNameToDissectPairs.entrySet()) { - List sameKeyNameList = entry.getValue(); - assert (sameKeyNameList.size() == 2); - Collections.sort(sameKeyNameList); - //based on the sort the key will always be first and value second. - String key = sameKeyNameList.get(0).getValue(); - String value = sameKeyNameList.get(1).getValue(); - results.add(new DissectPair(new DissectKey(key), value)); + DissectKey getKey() { + return key; } - //add non associate modifiers to results - results.addAll(parserResult.stream() - .filter(dissectPair -> !ASSOCIATE_MODIFIERS.contains(dissectPair.getKey().getModifier())) - .collect(Collectors.toList())); - return results; - } + String getDelimiter() { + return delimiter; + } - private Map> getAssociateMap(List dissectPairs) { - return dissectPairs.stream() - .filter(dissectPair -> ASSOCIATE_MODIFIERS.contains(dissectPair.getKey().getModifier())) - .collect(Collectors.groupingBy(KEY_NAME)); } - private EnumSet getAllModifiers(Collection keys) { - Set modifiers = keys.stream().map(DissectKey::getModifier).collect(Collectors.toSet()); - return modifiers.isEmpty() ? EnumSet.noneOf(DissectKey.Modifier.class) : EnumSet.copyOf(modifiers); - } } diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java index 6e6dde059d9f0..0088393b1fa48 100644 --- a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java @@ -37,7 +37,7 @@ public void testNoModifier() { assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE)); assertThat(dissectKey.skip(), is(false)); assertThat(dissectKey.skipRightPadding(), is(false)); - assertThat(dissectKey.getOrderPosition(), equalTo(0)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); assertThat(dissectKey.getName(), equalTo(keyName)); } @@ -47,7 +47,7 @@ public void testAppendModifier() { assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.APPEND)); assertThat(dissectKey.skip(), is(false)); assertThat(dissectKey.skipRightPadding(), is(false)); - assertThat(dissectKey.getOrderPosition(), equalTo(0)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); assertThat(dissectKey.getName(), equalTo(keyName)); } @@ -58,7 +58,7 @@ public void testAppendWithOrderModifier() { assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.APPEND_WITH_ORDER)); assertThat(dissectKey.skip(), is(false)); assertThat(dissectKey.skipRightPadding(), is(false)); - assertThat(dissectKey.getOrderPosition(), equalTo(length)); + assertThat(dissectKey.getAppendPosition(), equalTo(length)); assertThat(dissectKey.getName(), equalTo(keyName)); } @@ -81,7 +81,7 @@ public void testFieldNameModifier() { assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.FIELD_NAME)); assertThat(dissectKey.skip(), is(false)); assertThat(dissectKey.skipRightPadding(), is(false)); - assertThat(dissectKey.getOrderPosition(), equalTo(0)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); assertThat(dissectKey.getName(), equalTo(keyName)); } @@ -91,7 +91,7 @@ public void testFieldValueModifiers() { assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.FIELD_VALUE)); assertThat(dissectKey.skip(), is(false)); assertThat(dissectKey.skipRightPadding(), is(false)); - assertThat(dissectKey.getOrderPosition(), equalTo(0)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); assertThat(dissectKey.getName(), equalTo(keyName)); } @@ -101,7 +101,7 @@ public void testRightPaddingModifiers() { assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE)); assertThat(dissectKey.skip(), is(false)); assertThat(dissectKey.skipRightPadding(), is(true)); - assertThat(dissectKey.getOrderPosition(), equalTo(0)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); assertThat(dissectKey.getName(), equalTo(keyName)); dissectKey = new DissectKey("?" + keyName + "->"); @@ -135,7 +135,7 @@ public void testSkipKey() { assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE)); assertThat(dissectKey.skip(), is(true)); assertThat(dissectKey.skipRightPadding(), is(false)); - assertThat(dissectKey.getOrderPosition(), equalTo(0)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); assertThat(dissectKey.getName(), equalTo(keyName)); } public void testSkipKeyWithPadding() { @@ -144,7 +144,7 @@ public void testSkipKeyWithPadding() { assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE)); assertThat(dissectKey.skip(), is(true)); assertThat(dissectKey.skipRightPadding(), is(true)); - assertThat(dissectKey.getOrderPosition(), equalTo(0)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); assertThat(dissectKey.getName(), equalTo(keyName)); } diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectMatchTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectMatchTests.java new file mode 100644 index 0000000000000..0066aafa99098 --- /dev/null +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectMatchTests.java @@ -0,0 +1,93 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.dissect; + +import org.elasticsearch.common.collect.MapBuilder; +import org.elasticsearch.test.ESTestCase; + +import java.nio.charset.StandardCharsets; +import java.util.Map; +import java.util.stream.IntStream; + +import static org.hamcrest.Matchers.equalTo; + +public class DissectMatchTests extends ESTestCase { + + public void testIllegalArgs() { + expectThrows(IllegalArgumentException.class, () -> new DissectMatch("", 0, 1, 0, 0)); + expectThrows(IllegalArgumentException.class, () -> new DissectMatch("", 1, 0, 0, 0)); + } + + public void testValidAndFullyMatched() { + int expectedMatches = randomIntBetween(1, 26); + DissectMatch dissectMatch = new DissectMatch("", expectedMatches, expectedMatches, 0, 0); + IntStream.range(97, 97 + expectedMatches) //allow for a-z values + .forEach(i -> dissectMatch.add(new DissectKey(new String(new byte[]{(byte) i}, StandardCharsets.UTF_8)), "")); + assertThat(dissectMatch.fullyMatched(), equalTo(true)); + assertThat(dissectMatch.isValid(dissectMatch.getResults()), equalTo(true)); + } + + public void testNotValidAndFullyMatched() { + int expectedMatches = randomIntBetween(1, 26); + DissectMatch dissectMatch = new DissectMatch("", expectedMatches, expectedMatches, 0, 0); + IntStream.range(97, 97 + expectedMatches - 1) //allow for a-z values + .forEach(i -> dissectMatch.add(new DissectKey(new String(new byte[]{(byte) i}, StandardCharsets.UTF_8)), "")); + assertThat(dissectMatch.fullyMatched(), equalTo(false)); + assertThat(dissectMatch.isValid(dissectMatch.getResults()), equalTo(false)); + } + + public void testGetResultsIdempotent(){ + int expectedMatches = randomIntBetween(1, 26); + DissectMatch dissectMatch = new DissectMatch("", expectedMatches, expectedMatches, 0, 0); + IntStream.range(97, 97 + expectedMatches) //allow for a-z values + .forEach(i -> dissectMatch.add(new DissectKey(new String(new byte[]{(byte) i}, StandardCharsets.UTF_8)), "")); + assertThat(dissectMatch.getResults(), equalTo(dissectMatch.getResults())); + } + + public void testAppend(){ + DissectMatch dissectMatch = new DissectMatch("-", 3, 1, 3, 0); + dissectMatch.add(new DissectKey("+a"), "x"); + dissectMatch.add(new DissectKey("+a"), "y"); + dissectMatch.add(new DissectKey("+a"), "z"); + Map results = dissectMatch.getResults(); + assertThat(dissectMatch.isValid(results), equalTo(true)); + assertThat(results, equalTo(MapBuilder.newMapBuilder().put("a", "x-y-z").map())); + } + + public void testAppendWithOrder(){ + DissectMatch dissectMatch = new DissectMatch("-", 3, 1, 3, 0); + dissectMatch.add(new DissectKey("+a/3"), "x"); + dissectMatch.add(new DissectKey("+a"), "y"); + dissectMatch.add(new DissectKey("+a/1"), "z"); + Map results = dissectMatch.getResults(); + assertThat(dissectMatch.isValid(results), equalTo(true)); + assertThat(results, equalTo(MapBuilder.newMapBuilder().put("a", "y-z-x").map())); + } + + public void testReference(){ + DissectMatch dissectMatch = new DissectMatch("-", 2, 1, 0, 1); + dissectMatch.add(new DissectKey("&a"), "x"); + dissectMatch.add(new DissectKey("?a"), "y"); + Map results = dissectMatch.getResults(); + assertThat(dissectMatch.isValid(results), equalTo(true)); + assertThat(results, equalTo(MapBuilder.newMapBuilder().put("y", "x").map())); + } + +} diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectPairTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectPairTests.java deleted file mode 100644 index 7e2f357f557e5..0000000000000 --- a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectPairTests.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.dissect; - -import org.elasticsearch.test.ESTestCase; - -import static org.hamcrest.Matchers.equalTo; - -public class DissectPairTests extends ESTestCase { - - public void testNoModifierSameOrder(){ - String keyName1 = randomAlphaOfLengthBetween(1, 10); - String keyName2 = randomAlphaOfLengthBetween(1, 10); - String value = randomAlphaOfLengthBetween(1, 10); - DissectPair pair1 = new DissectPair(new DissectKey(keyName1), value); - DissectPair pair2 = new DissectPair(new DissectKey(keyName2), value); - assertThat(pair1.compareTo(pair2), equalTo(0)); - assertThat(pair2.compareTo(pair1), equalTo(0)); - } - - public void testAppendDifferentOrder(){ - String keyName = randomAlphaOfLengthBetween(1, 10); - String value = randomAlphaOfLengthBetween(1, 10); - int length = randomIntBetween(1, 100); - DissectPair pair1 = new DissectPair(new DissectKey("+" + keyName + "/" + length), value); - DissectPair pair2 = new DissectPair(new DissectKey("+" + keyName + "/" + length + 1), value); - assertThat(pair1.compareTo(pair2), equalTo(-1)); - assertThat(pair2.compareTo(pair1), equalTo(1)); - } - public void testAppendWithImplicitZeroOrder(){ - String keyName = randomAlphaOfLengthBetween(1, 10); - String value = randomAlphaOfLengthBetween(1, 10); - int length = randomIntBetween(1, 100); - DissectPair pair1 = new DissectPair(new DissectKey("keyName"), value); - DissectPair pair2 = new DissectPair(new DissectKey("+" + keyName + "/" + length), value); - assertThat(pair1.compareTo(pair2), equalTo(-1)); - assertThat(pair2.compareTo(pair1), equalTo(1)); - } - - public void testAppendSameOrder(){ - String keyName = randomAlphaOfLengthBetween(1, 10); - String value = randomAlphaOfLengthBetween(1, 10); - int length = randomIntBetween(1, 100); - DissectPair pair1 = new DissectPair(new DissectKey("+" + keyName + "/" + length), value); - DissectPair pair2 = new DissectPair(new DissectKey("+" + keyName + "/" + length), value); - assertThat(pair1.compareTo(pair2), equalTo(0)); - assertThat(pair2.compareTo(pair1), equalTo(0)); - } - - public void testFieldNameOrder(){ - String keyName = randomAlphaOfLengthBetween(1, 10); - String value = randomAlphaOfLengthBetween(1, 10); - DissectPair pair1 = new DissectPair(new DissectKey("?" + keyName), value); - DissectPair pair2 = new DissectPair(new DissectKey("&" + keyName), value); - assertThat(pair1.compareTo(pair2), equalTo(-1)); - assertThat(pair2.compareTo(pair1), equalTo(1)); - } -} diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java index a2b86a03abba5..5d6163a9008df 100644 --- a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java @@ -27,7 +27,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.stream.Collectors; +import java.util.Map; import static com.carrotsearch.randomizedtesting.RandomizedTest.randomAsciiAlphanumOfLengthBetween; @@ -42,6 +42,7 @@ public void testJavaDocExamples() { assertMatch("%{?a} %{b} %{&a}", "foo bar baz", Arrays.asList("foo", "b"), Arrays.asList("baz", "bar")); assertMatch("%{a} %{} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz")); assertMatch("%{a},%{b},%{c},%{d}", "foo,,,", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", "")); + assertMatch("%{a->},%{b}", "foo,,,,,,bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); } /** @@ -175,6 +176,7 @@ public void testMatchRemainder() { } public void testAppend() { + assertMatch("%{a} %{+a} %{+a}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobarbaz")); assertMatch("%{a} %{+a} %{b} %{+b}", "foo bar baz lol", Arrays.asList("a", "b"), Arrays.asList("foobar", "bazlol")); assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobazbar")); assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foo baz bar"), " "); @@ -248,6 +250,11 @@ public void testTrimmedEnd() { assertMatch("%{a} %{b->},", "foo bar,,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); } + public void testLeadingDelimiter(){ + assertMatch(",,,%{a} %{b}", ",,,foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch(",%{a} %{b}", ",,foo bar", Arrays.asList("a", "b"), Arrays.asList(",foo", "bar")); + } + /** * Runtime errors */ @@ -275,7 +282,6 @@ private void assertMiss(String pattern, String input) { assertThat(e.getMessage(), CoreMatchers.containsString("Unable to find match for dissect pattern")); assertThat(e.getMessage(), CoreMatchers.containsString(pattern)); assertThat(e.getMessage(), input == null ? CoreMatchers.containsString("null") : CoreMatchers.containsString(input)); - } private void assertBadPattern(String pattern) { @@ -299,9 +305,9 @@ private void assertMatch(String pattern, String input, List expectedKeys } private void assertMatch(String pattern, String input, List expectedKeys, List expectedValues, String appendSeperator) { - List dissectPairs = new DissectParser(pattern, appendSeperator).parse(input); - List foundKeys = dissectPairs.stream().map(d -> d.getKey().getName()).collect(Collectors.toList()); - List foundValues = dissectPairs.stream().map(DissectPair::getValue).collect(Collectors.toList()); + Map results = new DissectParser(pattern, appendSeperator).parse(input); + List foundKeys = new ArrayList<>(results.keySet()); + List foundValues = new ArrayList<>(results.values()); Collections.sort(foundKeys); Collections.sort(foundValues); Collections.sort(expectedKeys); From bb57f2f903f23c7e3a51fb8f0d6f3ffeb9723856 Mon Sep 17 00:00:00 2001 From: Jake Landis Date: Thu, 26 Jul 2018 12:12:24 -0500 Subject: [PATCH 4/9] add a couple unit tests that match grok's tests --- .../dissect/DissectParserTests.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java index 5d6163a9008df..d217f0bec0929 100644 --- a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java @@ -277,6 +277,24 @@ public void testBadPatternOrKey(){ assertBadKey("%{++}"); } + public void testSyslog() { + assertMatch("%{timestamp} %{+timestamp} %{+timestamp} %{logsource} %{program}[%{pid}]: %{message}", + "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]", + Arrays.asList("timestamp", "logsource", "program", "pid", "message"), + Arrays.asList("Mar 16 00:01:25", "evita", "postfix/smtpd", "1713", "connect from camomile.cloud9.net[168.100.1.3]"), " "); + } + + public void testApacheLog(){ + assertMatch("%{clientip} %{ident} %{auth} [%{timestamp}] \"%{verb} %{request} HTTP/%{httpversion}\" %{response} %{bytes} \"%{referrer}\" \"%{agent}\" %{->}", + "31.184.238.164 - - [24/Jul/2014:05:35:37 +0530] \"GET /logs/access.log HTTP/1.0\" 200 69849 " + + "\"http://8rursodiol.enjin.com\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " + + "Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36\" \"www.dlwindianrailways.com\"", + Arrays.asList("clientip", "ident", "auth", "timestamp", "verb", "request", "httpversion", "response", "bytes", "referrer", "agent"), + Arrays.asList("31.184.238.164", "-", "-", "24/Jul/2014:05:35:37 +0530", "GET", "/logs/access.log", "1.0", "200", "69849", + "http://8rursodiol.enjin.com", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36" + + " (KHTML, like Gecko) Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36")); + } + private void assertMiss(String pattern, String input) { DissectException e = expectThrows(DissectException.class, () -> new DissectParser(pattern, null).parse(input)); assertThat(e.getMessage(), CoreMatchers.containsString("Unable to find match for dissect pattern")); From 6862333d51be9a7cc99fa52e7554cd54c6646c21 Mon Sep 17 00:00:00 2001 From: Jake Landis Date: Thu, 26 Jul 2018 12:29:50 -0500 Subject: [PATCH 5/9] change reference operator from ? to * (7.0 behavior) --- .../org/elasticsearch/dissect/DissectKey.java | 6 +-- .../elasticsearch/dissect/DissectParser.java | 14 +++--- .../dissect/DissectKeyTests.java | 4 +- .../dissect/DissectMatchTests.java | 2 +- .../dissect/DissectParserTests.java | 47 +++++++++---------- 5 files changed, 34 insertions(+), 39 deletions(-) diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java index 4ea2b9ebff4da..88ccbdfba3d08 100644 --- a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java @@ -36,7 +36,7 @@ * @see DissectParser */ public final class DissectKey { - private static final Pattern LEFT_MODIFIER_PATTERN = Pattern.compile("([+?&])(.*?)(->)?$", Pattern.DOTALL); + private static final Pattern LEFT_MODIFIER_PATTERN = Pattern.compile("([+*&])(.*?)(->)?$", Pattern.DOTALL); private static final Pattern RIGHT_PADDING_PATTERN = Pattern.compile("^(.*?)(->)?$", Pattern.DOTALL); private static final Pattern APPEND_WITH_ORDER_PATTERN = Pattern.compile("[+](.*?)(/)([0-9]+)(->)?$", Pattern.DOTALL); private final Modifier modifier; @@ -143,9 +143,9 @@ public String toString() { } public enum Modifier { - NONE(""), APPEND_WITH_ORDER("/"), APPEND("+"), FIELD_NAME("?"), FIELD_VALUE("&"); + NONE(""), APPEND_WITH_ORDER("/"), APPEND("+"), FIELD_NAME("*"), FIELD_VALUE("&"); - private static final Pattern MODIFIER_PATTERN = Pattern.compile("[/+?&]"); + private static final Pattern MODIFIER_PATTERN = Pattern.compile("[/+*&]"); private final String modifier; diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java index 54d21a30380f8..5cad44daa43a3 100644 --- a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java @@ -61,16 +61,16 @@ * result: {@code a=foobazbar} *
    *
  • - *
  • {@code ?} Instructs the parser to ignore the name of this key, instead use the value of key as the key name. + *
  • {@code *} Instructs the parser to ignore the name of this key, instead use the value of key as the key name. * Requires another key with the same name and the {@code &} modifier to be the value. Example:
    - * pattern: {@code %{?a} %{b} %{&a}}
    + * pattern: {@code %{*a} %{b} %{&a}}
      * string: {@code foo bar baz}
      * result: {@code foo=baz, b=bar}
      * 
  • - *
  • {@code &} Instructs the parser to ignore this key and place the matched value to a key of the same name with the {@code ?} modifier. - * Requires another key with the same name and the {@code ?} modifier. + *
  • {@code &} Instructs the parser to ignore this key and place the matched value to a key of the same name with the {@code *} modifier. + * Requires another key with the same name and the {@code *} modifier. * Example:
    - * pattern: {@code %{?a} %{b} %{&a}}
    + * pattern: {@code %{*a} %{b} %{&a}}
      * string: {@code foo bar baz}
      * result: {@code foo=baz, b=bar}
      * 
  • @@ -142,7 +142,7 @@ public DissectParser(String pattern, String appendSeparator) { } appendCount = appendKeyNames.size(); - //reference validation - ensure that '?' and '&' come in pairs + //reference validation - ensure that '*' and '&' come in pairs Map> referenceGroupings = matchPairs.stream() .filter(dissectPair -> ASSOCIATE_MODIFIERS.contains(dissectPair.getKey().getModifier())) .collect(Collectors.groupingBy(KEY_NAME)); @@ -150,7 +150,7 @@ public DissectParser(String pattern, String appendSeparator) { if (entry.getValue().size() != 2) { throw new DissectException.PatternParse(pattern, "Found invalid key/reference associations: '" + entry.getValue().stream().map(KEY_NAME).collect(Collectors.joining(",")) + - "' Please ensure each '?' is matched with a matching '&"); + "' Please ensure each '*' is matched with a matching '&"); } } diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java index 0088393b1fa48..369d54b6f9ef2 100644 --- a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java @@ -77,7 +77,7 @@ public void testOrderModifierWithoutAppend() { public void testFieldNameModifier() { String keyName = randomAlphaOfLengthBetween(1, 10); - DissectKey dissectKey = new DissectKey("?" + keyName); + DissectKey dissectKey = new DissectKey("*" + keyName); assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.FIELD_NAME)); assertThat(dissectKey.skip(), is(false)); assertThat(dissectKey.skipRightPadding(), is(false)); @@ -104,7 +104,7 @@ public void testRightPaddingModifiers() { assertThat(dissectKey.getAppendPosition(), equalTo(0)); assertThat(dissectKey.getName(), equalTo(keyName)); - dissectKey = new DissectKey("?" + keyName + "->"); + dissectKey = new DissectKey("*" + keyName + "->"); assertThat(dissectKey.skipRightPadding(), is(true)); dissectKey = new DissectKey("&" + keyName + "->"); diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectMatchTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectMatchTests.java index 0066aafa99098..d562afb636308 100644 --- a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectMatchTests.java +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectMatchTests.java @@ -84,7 +84,7 @@ public void testAppendWithOrder(){ public void testReference(){ DissectMatch dissectMatch = new DissectMatch("-", 2, 1, 0, 1); dissectMatch.add(new DissectKey("&a"), "x"); - dissectMatch.add(new DissectKey("?a"), "y"); + dissectMatch.add(new DissectKey("*a"), "y"); Map results = dissectMatch.getResults(); assertThat(dissectMatch.isValid(results), equalTo(true)); assertThat(results, equalTo(MapBuilder.newMapBuilder().put("y", "x").map())); diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java index d217f0bec0929..ee33196b9e9d3 100644 --- a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java @@ -39,7 +39,7 @@ public void testJavaDocExamples() { assertMatch("%{a->} %{b} %{c}", "foo bar baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz")); assertMatch("%{a} %{+a} %{+a}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobarbaz")); assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobazbar")); - assertMatch("%{?a} %{b} %{&a}", "foo bar baz", Arrays.asList("foo", "b"), Arrays.asList("baz", "bar")); + assertMatch("%{*a} %{b} %{&a}", "foo bar baz", Arrays.asList("foo", "b"), Arrays.asList("baz", "bar")); assertMatch("%{a} %{} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz")); assertMatch("%{a},%{b},%{c},%{d}", "foo,,,", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", "")); assertMatch("%{a->},%{b}", "foo,,,,,,bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); @@ -75,14 +75,6 @@ public void testLogstashSpecs() { assertMatch("%{+a} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); assertMatch("%{+a} %{b} %{+a} %{c}", "foo bar baz quux", Arrays.asList("a", "b", "c"), Arrays.asList("foo baz", "bar", "quux"), " "); - //Logstash allows implicit '?' for association, which allows this dissect patterns without matching '?' and '&' to work. - //For example, "%{k1}=%{&k1}, %{k2}=%{&k2}" will match in Logstash but not here due to the requirement of matching '?' and '&' - //begin: The following tests match in Logstash, but are considered bad patterns here: - assertBadPattern("%{k1}=%{&k1}, %{k2}=%{&k2}"); - assertBadPattern("%{k1}=%{&k3}, %{k2}=%{&k4}"); - assertBadPattern("%{?k1}=%{&k3}, %{?k2}=%{&k4}"); - assertBadPattern("%{&k1}, %{&k2}, %{&k3}"); - //end assertMatch("%{} %{syslog_timestamp} %{hostname} %{rt}: %{reason} %{+reason} %{src_ip}/%{src_port}->%{dst_ip}/%{dst_port} " + "%{polrt} %{+polrt} %{+polrt} %{from_zone} %{to_zone} %{rest}", "42 2016-05-25T14:47:23Z host.name.com RT_FLOW - RT_FLOW_SESSION_DENY: session denied 2.2.2.20/60000->1.1.1.10/8090 None " + @@ -95,10 +87,11 @@ public void testLogstashSpecs() { assertBadKey("%{&+a_field}"); assertMatch("%{a->} %{b->}---%{c}", "foo bar------------baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz")); + //TODO: support '?' as a named skip key //Logstash will match "%{?->}-%{a}" to "-----666", however '?' without a corresponding '&' is not allowed here, so the syntax is //the same minus the '?' as tested below - assertBadKey("%{?->}-%{a}", "?->"); - assertMatch("%{->}-%{a}", "-----666", Arrays.asList("a"), Arrays.asList("666")); +// assertBadKey("%{?->}-%{a}", "?->"); +// assertMatch("%{->}-%{a}", "-----666", Arrays.asList("a"), Arrays.asList("666")); assertMatch("%{a},%{b},%{c},%{d},%{e},%{f}", "111,,333,,555,666", Arrays.asList("a", "b", "c", "d", "e", "f"), Arrays.asList("111", "", "333", "", "555", "666")); assertMatch("%{a}.࿏.%{b}", "⟳༒.࿏.༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲")); @@ -162,7 +155,7 @@ public void testMatchUnicode() { assertMatch("%{a}࿏%{+a} %{+a}", "⟳༒࿏༒⟲ 子", Arrays.asList("a"), Arrays.asList("⟳༒༒⟲子")); assertMatch("%{a}࿏%{+a/2} %{+a/1}", "⟳༒࿏༒⟲ 子", Arrays.asList("a"), Arrays.asList("⟳༒子༒⟲")); assertMatch("%{a->}࿏%{b}", "⟳༒࿏࿏࿏࿏࿏༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲")); - assertMatch("%{?a}࿏%{&a}", "⟳༒࿏༒⟲", Arrays.asList("⟳༒"), Arrays.asList("༒⟲")); + assertMatch("%{*a}࿏%{&a}", "⟳༒࿏༒⟲", Arrays.asList("⟳༒"), Arrays.asList("༒⟲")); assertMatch("%{}࿏%{a}", "⟳༒࿏༒⟲", Arrays.asList("a"), Arrays.asList("༒⟲")); } @@ -171,7 +164,7 @@ public void testMatchRemainder() { assertMatch("%{a} %{b}", "foo bar the rest", Arrays.asList("a", "b"), Arrays.asList("foo", "bar the rest")); assertMatch("%{} %{b}", "foo bar the rest", Arrays.asList("b"), Arrays.asList("bar the rest")); assertMatch("%{a} %{b->}", "foo bar the rest", Arrays.asList("a", "b"), Arrays.asList("foo", "bar the rest")); - assertMatch("%{?a} %{&a}", "foo bar the rest", Arrays.asList("foo"), Arrays.asList("bar the rest")); + assertMatch("%{*a} %{&a}", "foo bar the rest", Arrays.asList("foo"), Arrays.asList("bar the rest")); assertMatch("%{a} %{+a}", "foo bar the rest", Arrays.asList("a"), Arrays.asList("foo bar the rest"), " "); } @@ -183,19 +176,19 @@ public void testAppend() { } public void testAssociate() { - assertMatch("%{?a} %{&a}", "foo bar", Arrays.asList("foo"), Arrays.asList("bar")); - assertMatch("%{&a} %{?a}", "foo bar", Arrays.asList("bar"), Arrays.asList("foo")); - assertMatch("%{?a} %{&a} %{?b} %{&b}", "foo bar baz lol", Arrays.asList("foo", "baz"), Arrays.asList("bar", "lol")); - assertMatch("%{?a} %{&a} %{c} %{?b} %{&b}", "foo bar x baz lol", + assertMatch("%{*a} %{&a}", "foo bar", Arrays.asList("foo"), Arrays.asList("bar")); + assertMatch("%{&a} %{*a}", "foo bar", Arrays.asList("bar"), Arrays.asList("foo")); + assertMatch("%{*a} %{&a} %{*b} %{&b}", "foo bar baz lol", Arrays.asList("foo", "baz"), Arrays.asList("bar", "lol")); + assertMatch("%{*a} %{&a} %{c} %{*b} %{&b}", "foo bar x baz lol", Arrays.asList("foo", "baz", "c"), Arrays.asList("bar", "lol", "x")); - assertBadPattern("%{?a} %{a}"); + assertBadPattern("%{*a} %{a}"); assertBadPattern("%{a} %{&a}"); - assertMiss("%{?a} %{&a} {a} %{?b} %{&b}", "foo bar x baz lol"); + assertMiss("%{*a} %{&a} {a} %{*b} %{&b}", "foo bar x baz lol"); } public void testAppendAndAssociate() { - assertMatch("%{a} %{+a} %{?b} %{&b}", "foo bar baz lol", Arrays.asList("a", "baz"), Arrays.asList("foobar", "lol")); - assertMatch("%{a->} %{+a/2} %{+a/1} %{?b} %{&b}", "foo bar baz lol x", + assertMatch("%{a} %{+a} %{*b} %{&b}", "foo bar baz lol", Arrays.asList("a", "baz"), Arrays.asList("foobar", "lol")); + assertMatch("%{a->} %{+a/2} %{+a/1} %{*b} %{&b}", "foo bar baz lol x", Arrays.asList("a", "lol"), Arrays.asList("foobazbar", "x")); } @@ -237,7 +230,7 @@ public void testSkipRightPadding() { assertMatch("%{a->} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); assertMatch("%{a->} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); assertMatch("%{->} %{a}", "foo bar", Arrays.asList("a"), Arrays.asList("bar")); - assertMatch("%{a->} %{+a->} %{?b->} %{&b->} %{c}", "foo bar baz lol x", + assertMatch("%{a->} %{+a->} %{*b->} %{&b->} %{c}", "foo bar baz lol x", Arrays.asList("a", "baz", "c"), Arrays.asList("foobar", "lol", "x")); } @@ -272,8 +265,8 @@ public void testMiss() { public void testBadPatternOrKey(){ assertBadPattern(""); assertBadPattern("{}"); - assertBadPattern("%{?a} %{&b}"); - assertBadKey("%{?}"); + assertBadPattern("%{*a} %{&b}"); + assertBadKey("%{*}"); assertBadKey("%{++}"); } @@ -285,11 +278,13 @@ public void testSyslog() { } public void testApacheLog(){ - assertMatch("%{clientip} %{ident} %{auth} [%{timestamp}] \"%{verb} %{request} HTTP/%{httpversion}\" %{response} %{bytes} \"%{referrer}\" \"%{agent}\" %{->}", + assertMatch("%{clientip} %{ident} %{auth} [%{timestamp}] \"%{verb} %{request} HTTP/%{httpversion}\" %{response} %{bytes}" + + " \"%{referrer}\" \"%{agent}\" %{->}", "31.184.238.164 - - [24/Jul/2014:05:35:37 +0530] \"GET /logs/access.log HTTP/1.0\" 200 69849 " + "\"http://8rursodiol.enjin.com\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36\" \"www.dlwindianrailways.com\"", - Arrays.asList("clientip", "ident", "auth", "timestamp", "verb", "request", "httpversion", "response", "bytes", "referrer", "agent"), + Arrays.asList("clientip", "ident", "auth", "timestamp", "verb", "request", "httpversion", "response", "bytes", + "referrer", "agent"), Arrays.asList("31.184.238.164", "-", "-", "24/Jul/2014:05:35:37 +0530", "GET", "/logs/access.log", "1.0", "200", "69849", "http://8rursodiol.enjin.com", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36" + " (KHTML, like Gecko) Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36")); From f3bd24aa623ada815d13113cc0135452928a050e Mon Sep 17 00:00:00 2001 From: Jake Landis Date: Thu, 26 Jul 2018 14:05:26 -0500 Subject: [PATCH 6/9] Add support for named skiped fields (7.0 support) --- .../org/elasticsearch/dissect/DissectKey.java | 14 +++++++++--- .../elasticsearch/dissect/DissectParser.java | 10 ++++++++- .../dissect/DissectKeyTests.java | 22 +++++++++++++++++++ .../dissect/DissectParserTests.java | 13 +++++++++++ 4 files changed, 55 insertions(+), 4 deletions(-) diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java index 88ccbdfba3d08..0547cf4ea7b19 100644 --- a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java @@ -36,7 +36,7 @@ * @see DissectParser */ public final class DissectKey { - private static final Pattern LEFT_MODIFIER_PATTERN = Pattern.compile("([+*&])(.*?)(->)?$", Pattern.DOTALL); + private static final Pattern LEFT_MODIFIER_PATTERN = Pattern.compile("([+*&?])(.*?)(->)?$", Pattern.DOTALL); private static final Pattern RIGHT_PADDING_PATTERN = Pattern.compile("^(.*?)(->)?$", Pattern.DOTALL); private static final Pattern APPEND_WITH_ORDER_PATTERN = Pattern.compile("[+](.*?)(/)([0-9]+)(->)?$", Pattern.DOTALL); private final Modifier modifier; @@ -62,6 +62,14 @@ public final class DissectKey { } skip = name.isEmpty(); break; + case NAMED_SKIP: + matcher = LEFT_MODIFIER_PATTERN.matcher(key); + while (matcher.find()) { + name = matcher.group(2); + skipRightPadding = matcher.group(3) != null; + } + skip = true; + break; case APPEND: matcher = LEFT_MODIFIER_PATTERN.matcher(key); while (matcher.find()) { @@ -143,9 +151,9 @@ public String toString() { } public enum Modifier { - NONE(""), APPEND_WITH_ORDER("/"), APPEND("+"), FIELD_NAME("*"), FIELD_VALUE("&"); + NONE(""), APPEND_WITH_ORDER("/"), APPEND("+"), FIELD_NAME("*"), FIELD_VALUE("&"), NAMED_SKIP("?"); - private static final Pattern MODIFIER_PATTERN = Pattern.compile("[/+*&]"); + private static final Pattern MODIFIER_PATTERN = Pattern.compile("[/+*&?]"); private final String modifier; diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java index 5cad44daa43a3..638139814a729 100644 --- a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java @@ -74,13 +74,21 @@ * string: {@code foo bar baz} * result: {@code foo=baz, b=bar} * + *
  • {@code ?} Instructs the parser to ignore this key. The key name exists only for the purpose of human readability. Example + *
    + *  pattern: {@code %{a} %{?skipme} %{c}}
    + *  string: {@code foo bar baz}
    + *  result: {@code a=foo, c=baz}
    + * 
    *
- *

Empty key names patterns are also supported. They will simply be ignored in the result. Example + *

Empty key names patterns are also supported. They behave just like the {@code ?} modifier, except the name is not required. + * The result will simply be ignored. Example *

  * pattern: {@code %{a} %{} %{c}}
  * string: {@code foo bar baz}
  * result: {@code a=foo, c=baz}
  * 
+ *

* Inspired by the Logstash Dissect Filter by Guy Boertje */ diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java index 369d54b6f9ef2..0f3f7ed041df5 100644 --- a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java @@ -113,6 +113,9 @@ public void testRightPaddingModifiers() { dissectKey = new DissectKey("+" + keyName + "->"); assertThat(dissectKey.skipRightPadding(), is(true)); + dissectKey = new DissectKey("?" + keyName + "->"); + assertThat(dissectKey.skipRightPadding(), is(true)); + dissectKey = new DissectKey("+" + keyName + "/2->"); assertThat(dissectKey.skipRightPadding(), is(true)); } @@ -138,6 +141,16 @@ public void testSkipKey() { assertThat(dissectKey.getAppendPosition(), equalTo(0)); assertThat(dissectKey.getName(), equalTo(keyName)); } + public void testNamedSkipKey() { + String keyName = "myname"; + DissectKey dissectKey = new DissectKey("?" +keyName); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NAMED_SKIP)); + assertThat(dissectKey.skip(), is(true)); + assertThat(dissectKey.skipRightPadding(), is(false)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } + public void testSkipKeyWithPadding() { String keyName = ""; DissectKey dissectKey = new DissectKey(keyName + "->"); @@ -147,6 +160,15 @@ public void testSkipKeyWithPadding() { assertThat(dissectKey.getAppendPosition(), equalTo(0)); assertThat(dissectKey.getName(), equalTo(keyName)); } + public void testNamedEmptySkipKeyWithPadding() { + String keyName = ""; + DissectKey dissectKey = new DissectKey("?" +keyName + "->"); + assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NAMED_SKIP)); + assertThat(dissectKey.skip(), is(true)); + assertThat(dissectKey.skipRightPadding(), is(true)); + assertThat(dissectKey.getAppendPosition(), equalTo(0)); + assertThat(dissectKey.getName(), equalTo(keyName)); + } public void testInvalidModifiers() { //should never happen due to regex diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java index ee33196b9e9d3..8a0f961146a7f 100644 --- a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java @@ -41,6 +41,7 @@ public void testJavaDocExamples() { assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobazbar")); assertMatch("%{*a} %{b} %{&a}", "foo bar baz", Arrays.asList("foo", "b"), Arrays.asList("baz", "bar")); assertMatch("%{a} %{} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz")); + assertMatch("%{a} %{?skipme} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz")); assertMatch("%{a},%{b},%{c},%{d}", "foo,,,", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", "")); assertMatch("%{a->},%{b}", "foo,,,,,,bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); } @@ -200,6 +201,18 @@ public void testEmptyKey() { assertMatch("%{a} %{->}", "foo bar ", Arrays.asList("a"), Arrays.asList("foo")); } + public void testNamedSkipKey() { + assertMatch("%{?foo} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar")); + assertMatch("%{?} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar")); + assertMatch("%{a} %{?bar}", "foo bar", Arrays.asList("a"), Arrays.asList("foo")); + assertMatch("%{?foo->} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar")); + assertMatch("%{?->} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar")); + assertMatch("%{?foo->} %{b}", " bar", Arrays.asList("b"), Arrays.asList("bar")); + assertMatch("%{a} %{->?bar}", "foo bar ", Arrays.asList("a"), Arrays.asList("foo")); + assertMatch("%{a} %{?skipme} %{?skipme}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foo")); + assertMatch("%{a} %{?} %{?}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foo")); + } + public void testConsecutiveDelimiters() { //leading assertMatch("%{->},%{a}", ",,,,,foo", Arrays.asList("a"), Arrays.asList("foo")); From ce6bb75b4ba8adc58564849fba91b7d061d6aa48 Mon Sep 17 00:00:00 2001 From: Jake Landis Date: Tue, 31 Jul 2018 10:07:37 -0500 Subject: [PATCH 7/9] updates to better match and test against specification --- libs/dissect/build.gradle | 3 + .../elasticsearch/dissect/DissectParser.java | 16 +- .../dissect/DissectParserTests.java | 72 +++- .../test/resources/specification/tests.json | 363 ++++++++++++++++++ 4 files changed, 432 insertions(+), 22 deletions(-) create mode 100644 libs/dissect/src/test/resources/specification/tests.json diff --git a/libs/dissect/build.gradle b/libs/dissect/build.gradle index 2ff61c10237d3..c09a2a4ebd1b3 100644 --- a/libs/dissect/build.gradle +++ b/libs/dissect/build.gradle @@ -27,6 +27,9 @@ dependencies { exclude group: 'org.elasticsearch', module: 'dissect' } } + testCompile "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" + testCompile("com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}") + testCompile("com.fasterxml.jackson.core:jackson-databind:${versions.jackson}") } forbiddenApisMain { diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java index 638139814a729..5160eb9a19932 100644 --- a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java @@ -125,13 +125,12 @@ public DissectParser(String pattern, String appendSeparator) { String delimiter = matcher.group(2); matchPairs.add(new DissectPair(key, delimiter)); } - if (matchPairs.isEmpty()) { - throw new DissectException.PatternParse(pattern, "Unable to find any keys or delimiters."); - } this.maxMatches = matchPairs.size(); this.maxResults = Long.valueOf(matchPairs.stream() .filter(dissectPair -> !dissectPair.getKey().skip()).map(KEY_NAME).distinct().count()).intValue(); - + if (this.maxMatches == 0 || maxResults == 0) { + throw new DissectException.PatternParse(pattern, "Unable to find any keys or delimiters."); + } //append validation - look through all of the keys to see if there are any keys that need to participate in an append operation // but don't have the '+' defined Set appendKeyNames = matchPairs.stream() @@ -199,7 +198,8 @@ public Map parse(String inputString) { DissectMatch dissectMatch = new DissectMatch(appendSeparator, maxMatches, maxResults, appendCount, referenceCount); Iterator it = matchPairs.iterator(); //ensure leading delimiter matches - if (inputString != null && leadingDelimiter.equals(inputString.substring(0, leadingDelimiter.length()))) { + if (inputString != null && inputString.length() > leadingDelimiter.length() + && leadingDelimiter.equals(inputString.substring(0, leadingDelimiter.length()))) { byte[] input = inputString.getBytes(StandardCharsets.UTF_8); //grab the first key/delimiter pair DissectPair dissectPair = it.next(); @@ -267,13 +267,15 @@ public Map parse(String inputString) { } } //the last key, grab the rest of the input (unless consecutive delimiters already grabbed the last key) - if (!dissectMatch.fullyMatched()) { + //and there is no trailing delimiter + if (!dissectMatch.fullyMatched() && delimiter.length == 0 ) { byte[] value = Arrays.copyOfRange(input, valueStart, input.length); String valueString = new String(value, StandardCharsets.UTF_8); - dissectMatch.add(key, (key.skipRightPadding() ? valueString.replaceFirst("\\s++$", "") : valueString)); + dissectMatch.add(key, valueString); } } Map results = dissectMatch.getResults(); + if (!dissectMatch.isValid(results)) { throw new DissectException.FindMatch(pattern, inputString); } diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java index 8a0f961146a7f..c22cec98eb79a 100644 --- a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java @@ -19,6 +19,8 @@ package org.elasticsearch.dissect; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import org.elasticsearch.test.ESTestCase; import org.hamcrest.CoreMatchers; import org.hamcrest.Matchers; @@ -26,6 +28,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Iterator; import java.util.List; import java.util.Map; @@ -88,11 +91,8 @@ public void testLogstashSpecs() { assertBadKey("%{&+a_field}"); assertMatch("%{a->} %{b->}---%{c}", "foo bar------------baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz")); - //TODO: support '?' as a named skip key - //Logstash will match "%{?->}-%{a}" to "-----666", however '?' without a corresponding '&' is not allowed here, so the syntax is - //the same minus the '?' as tested below -// assertBadKey("%{?->}-%{a}", "?->"); -// assertMatch("%{->}-%{a}", "-----666", Arrays.asList("a"), Arrays.asList("666")); + assertMatch("%{->}-%{a}", "-----666", Arrays.asList("a"), Arrays.asList("666")); + assertMatch("%{?skipme->}-%{a}", "-----666", Arrays.asList("a"), Arrays.asList("666")); assertMatch("%{a},%{b},%{c},%{d},%{e},%{f}", "111,,333,,555,666", Arrays.asList("a", "b", "c", "d", "e", "f"), Arrays.asList("111", "", "333", "", "555", "666")); assertMatch("%{a}.࿏.%{b}", "⟳༒.࿏.༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲")); @@ -135,8 +135,8 @@ public void testBasicMatchUnicode() { //parallel arrays List expectedKeys = new ArrayList<>(); List expectedValues = new ArrayList<>(); - for (int i = 0; i< randomIntBetween(1,100);i++) { - String key = randomAsciiAlphanumOfLengthBetween(1,100); + for (int i = 0; i < randomIntBetween(1, 100); i++) { + String key = randomAsciiAlphanumOfLengthBetween(1, 100); String value = randomRealisticUnicodeOfCodepointLengthBetween(1, 100); String delimiter = Integer.toString(randomInt()); //int to ensures values and delimiters don't overlap, else validation can fail keyFirstPattern += "%{" + key + "}" + delimiter; @@ -248,15 +248,15 @@ public void testSkipRightPadding() { } public void testTrimmedEnd() { - assertMatch("%{a} %{b}", "foo bar ", Arrays.asList("a", "b"), Arrays.asList("foo", "bar ")); - assertMatch("%{a} %{b->}", "foo bar ", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{a} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); + assertMatch("%{a} %{b->} ", "foo bar ", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); //only whitespace is trimmed in the absence of trailing characters assertMatch("%{a} %{b->}", "foo bar,,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar,,,,,,")); //consecutive delimiters + right padding can be used to skip over the trailing delimiters assertMatch("%{a} %{b->},", "foo bar,,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); } - public void testLeadingDelimiter(){ + public void testLeadingDelimiter() { assertMatch(",,,%{a} %{b}", ",,,foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar")); assertMatch(",%{a} %{b}", ",,foo bar", Arrays.asList("a", "b"), Arrays.asList(",foo", "bar")); } @@ -270,12 +270,17 @@ public void testMiss() { assertMiss("%{a}, %{b}", "foo,bar"); assertMiss("x%{a},%{b}", "foo,bar"); assertMiss("x%{},%{b}", "foo,bar"); + assertMiss("leading_delimiter_long%{a}", "foo"); + assertMiss("%{a}trailing_delimiter_long", "foo"); + assertMiss("leading_delimiter_long%{a}trailing_delimiter_long", "foo"); + assertMiss("%{a}x", "foo"); + assertMiss("%{a},%{b}x", "foo,bar"); } /** * Construction errors */ - public void testBadPatternOrKey(){ + public void testBadPatternOrKey() { assertBadPattern(""); assertBadPattern("{}"); assertBadPattern("%{*a} %{&b}"); @@ -290,7 +295,7 @@ public void testSyslog() { Arrays.asList("Mar 16 00:01:25", "evita", "postfix/smtpd", "1713", "connect from camomile.cloud9.net[168.100.1.3]"), " "); } - public void testApacheLog(){ + public void testApacheLog() { assertMatch("%{clientip} %{ident} %{auth} [%{timestamp}] \"%{verb} %{request} HTTP/%{httpversion}\" %{response} %{bytes}" + " \"%{referrer}\" \"%{agent}\" %{->}", "31.184.238.164 - - [24/Jul/2014:05:35:37 +0530] \"GET /logs/access.log HTTP/1.0\" 200 69849 " + @@ -303,21 +308,58 @@ public void testApacheLog(){ " (KHTML, like Gecko) Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36")); } + /** + * Shared specification between Beats, Logstash, and Ingest node + */ + public void testJsonSpecification() throws Exception { + ObjectMapper mapper = new ObjectMapper(); + JsonNode rootNode = mapper.readTree(this.getClass().getResourceAsStream("/specification/tests.json")); + Iterator tests = rootNode.elements(); + while (tests.hasNext()) { + JsonNode test = tests.next(); + boolean skip = test.path("skip").asBoolean(); + if (!skip) { + String name = test.path("name").asText(); + logger.debug("Running Json specification: " + name); + String pattern = test.path("tok").asText(); + String input = test.path("msg").asText(); + String append = test.path("append").asText(); + boolean fail = test.path("fail").asBoolean(); + Iterator> expected = test.path("expected").fields(); + List expectedKeys = new ArrayList<>(); + List expectedValues = new ArrayList<>(); + expected.forEachRemaining(entry -> { + expectedKeys.add(entry.getKey()); + expectedValues.add(entry.getValue().asText()); + }); + if (fail) { + assertFail(pattern, input); + } else { + assertMatch(pattern, input, expectedKeys, expectedValues, append); + } + } + } + } + + private DissectException assertFail(String pattern, String input){ + return expectThrows(DissectException.class, () -> new DissectParser(pattern, null).parse(input)); + } + private void assertMiss(String pattern, String input) { - DissectException e = expectThrows(DissectException.class, () -> new DissectParser(pattern, null).parse(input)); + DissectException e = assertFail(pattern, input); assertThat(e.getMessage(), CoreMatchers.containsString("Unable to find match for dissect pattern")); assertThat(e.getMessage(), CoreMatchers.containsString(pattern)); assertThat(e.getMessage(), input == null ? CoreMatchers.containsString("null") : CoreMatchers.containsString(input)); } private void assertBadPattern(String pattern) { - DissectException e = expectThrows(DissectException.class, () -> new DissectParser(pattern, null)); + DissectException e = assertFail(pattern, null); assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse pattern")); assertThat(e.getMessage(), CoreMatchers.containsString(pattern)); } private void assertBadKey(String pattern, String key) { - DissectException e = expectThrows(DissectException.class, () -> new DissectParser(pattern, null)); + DissectException e = assertFail(pattern, null); assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key")); assertThat(e.getMessage(), CoreMatchers.containsString(key)); } diff --git a/libs/dissect/src/test/resources/specification/tests.json b/libs/dissect/src/test/resources/specification/tests.json new file mode 100644 index 0000000000000..1cb85ce651940 --- /dev/null +++ b/libs/dissect/src/test/resources/specification/tests.json @@ -0,0 +1,363 @@ +[ + { + "name": "When all the defined fields are captured by we have remaining data", + "tok": "level=%{level} ts=%{timestamp} caller=%{caller} msg=\"%{message}\"", + "msg": "level=info ts=2018-06-27T17:19:13.036579993Z caller=main.go:222 msg=\"Starting OK\" version=\"(version=2.3.1, branch=HEAD, revision=188ca45bd85ce843071e768d855722a9d9dabe03)\"}", + "expected": { + "caller": "main.go:222", + "level": "info", + "message": "Starting OK", + "timestamp": "2018-06-27T17:19:13.036579993Z" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "Complex stack trace", + "tok": "%{day}-%{month}-%{year} %{hour} %{severity} [%{thread_id}] %{origin} %{message}", + "msg": "18-Apr-2018 06:53:20.411 INFO [http-nio-8080-exec-1] org.apache.coyote.http11.Http11Processor.service Error parsing HTTP request header\n Note: further occurrences of HTTP header parsing errors will be logged at DEBUG level.\n java.lang.IllegalArgumentException: Invalid character found in method name. HTTP method names must be tokens\n at org.apache.coyote.http11.Http11InputBuffer.parseRequestLine(Http11InputBuffer.java:426)\n at org.apache.coyote.http11.Http11Processor.service(Http11Processor.java:687)\n at org.apache.coyote.AbstractProcessorLight.process(AbstractProcessorLight.java:66)\n at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(AbstractProtocol.java:790)\n at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun(NioEndpoint.java:1459)\n at org.apache.tomcat.util.net.SocketProcessorBase.run(SocketProcessorBase.java:49)\n at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)\n at java.lang.Thread.run(Thread.java:748)", + "expected": { + "day": "18", + "hour": "06:53:20.411", + "message": "Error parsing HTTP request header\n Note: further occurrences of HTTP header parsing errors will be logged at DEBUG level.\n java.lang.IllegalArgumentException: Invalid character found in method name. HTTP method names must be tokens\n at org.apache.coyote.http11.Http11InputBuffer.parseRequestLine(Http11InputBuffer.java:426)\n at org.apache.coyote.http11.Http11Processor.service(Http11Processor.java:687)\n at org.apache.coyote.AbstractProcessorLight.process(AbstractProcessorLight.java:66)\n at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(AbstractProtocol.java:790)\n at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun(NioEndpoint.java:1459)\n at org.apache.tomcat.util.net.SocketProcessorBase.run(SocketProcessorBase.java:49)\n at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)\n at java.lang.Thread.run(Thread.java:748)", + "month": "Apr", + "origin": "org.apache.coyote.http11.Http11Processor.service", + "severity": "INFO", + "thread_id": "http-nio-8080-exec-1", + "year": "2018" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "success when delimiter found at the beginning and end of the string", + "tok": "/var/log/%{key}.log", + "msg": "/var/log/foobar.log", + "expected": { + "key": "foobar" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "fails when delimiter is not found at the beginning of the string", + "tok": "/var/log/%{key}.log", + "msg": "foobar", + "expected": null, + "skip": false, + "fail": true, + "append": "" + }, + { + "name": "fails when delimiter is not found after the key", + "tok": "/var/log/%{key}.log", + "msg": "/var/log/foobar", + "expected": null, + "skip": false, + "fail": true, + "append": "" + }, + { + "name": "simple dissect", + "tok": "%{key}", + "msg": "foobar", + "expected": { + "key": "foobar" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "dissect two replacement", + "tok": "%{key1} %{key2}", + "msg": "foo bar", + "expected": { + "key1": "foo", + "key2": "bar" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "fail on partial match", + "tok": "%{key1} %{key2} %{key3}", + "msg": "foo bar", + "expected": null, + "skip": false, + "fail": true, + "append": "" + }, + { + "name": "one level dissect not end of string", + "tok": "/var/%{key}/log", + "msg": "/var/foobar/log", + "expected": { + "key": "foobar" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "one level dissect", + "tok": "/var/%{key}", + "msg": "/var/foobar/log", + "expected": { + "key": "foobar/log" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "multiple keys dissect end of string", + "tok": "/var/%{key}/log/%{key1}", + "msg": "/var/foobar/log/apache", + "expected": { + "key": "foobar", + "key1": "apache" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "multiple keys not end of string", + "tok": "/var/%{key}/log/%{key1}.log", + "msg": "/var/foobar/log/apache.log", + "expected": { + "key": "foobar", + "key1": "apache" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "append with order", + "tok": "%{+key/3} %{+key/1} %{+key/2}", + "msg": "1 2 3", + "expected": { + "key": "231" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "append with order and separator", + "tok": "%{+key/3} %{+key/1} %{+key/2}", + "msg": "1 2 3", + "expected": { + "key": "2::3::1" + }, + "skip": false, + "fail": false, + "append": "::" + }, + { + "name": "append with order and right padding", + "tok": "%{+key/3} %{+key/1-\u003e} %{+key/2}", + "msg": "1 2 3", + "expected": { + "key": "231" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "simple append", + "tok": "%{key}-%{+key}-%{+key}", + "msg": "1-2-3", + "expected": { + "key": "123" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "simple append with separator", + "tok": "%{key}-%{+key}-%{+key}", + "msg": "1-2-3", + "expected": { + "key": "1,2,3" + }, + "skip": false, + "fail": false, + "append": "," + }, + { + "name": "reference field", + "tok": "%{*key} %{\u0026key}", + "msg": "hello world", + "expected": { + "hello": "world" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "reference field alt order", + "tok": "%{\u0026key} %{*key}", + "msg": "hello world", + "expected": { + "world": "hello" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "nameless skip field", + "tok": "%{} %{key}", + "msg": "hello world", + "expected": { + "key": "world" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "named skip field", + "tok": "%{?skipme} %{key}", + "msg": "hello world", + "expected": { + "key": "world" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "reference without pairing", + "tok": "%{key} %{\u0026key}", + "msg": "hello world", + "expected": null, + "skip": false, + "fail": true, + "append": "" + }, + { + "name": "missing fields (consecutive delimiters)", + "tok": "%{name},%{addr1},%{addr2},%{addr3},%{city},%{zip}", + "msg": "Jane Doe,4321 Fifth Avenue,,,New York,87432", + "expected": { + "addr1": "4321 Fifth Avenue", + "addr2": "", + "addr3": "", + "city": "New York", + "name": "Jane Doe", + "zip": "87432" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "missing fields with right padding (consecutive delimiters)", + "tok": "%{name},%{addr1-\u003e},%{city},%{zip}", + "msg": "Jane Doe,4321 Fifth Avenue,,,New York,87432", + "expected": { + "addr1": "4321 Fifth Avenue", + "city": "New York", + "name": "Jane Doe", + "zip": "87432" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "ignore right padding", + "tok": "%{id} %{function-\u003e} %{server}", + "msg": "00000043 ViewReceive machine-321", + "expected": { + "function": "ViewReceive", + "id": "00000043", + "server": "machine-321" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "padding on the last key need a delimiter", + "tok": "%{id} %{function} %{server-\u003e} ", + "msg": "00000043 ViewReceive machine-321 ", + "expected": { + "function": "ViewReceive", + "id": "00000043", + "server": "machine-321" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "ignore left padding", + "tok": "%{id-\u003e} %{function} %{server}", + "msg": "00000043 ViewReceive machine-321", + "expected": { + "function": "ViewReceive", + "id": "00000043", + "server": "machine-321" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "when the delimiters contains `{` and `}`", + "tok": "{%{a}}{%{b}} %{rest}", + "msg": "{c}{d} anything", + "expected": { + "a": "c", + "b": "d", + "rest": "anything" + }, + "skip": false, + "fail": false, + "append": "" + }, + { + "name": "no keys defined", + "tok": "anything", + "msg": "anything", + "expected": null, + "skip": false, + "fail": true, + "append": "" + }, + { + "name": "invalid key", + "tok": "%{some?thing}", + "msg": "anything", + "expected": null, + "skip": false, + "fail": true, + "append": "" + }, + { + "name": "matches non-ascii", + "tok": "%{a}࿏%{b} %{c}", + "msg": "⟳༒࿏༒⟲ 子", + "expected": { + "a": "⟳༒", + "b": "༒⟲", + "c": "子" + }, + "skip": false, + "fail": false, + "append": "" + } + +] \ No newline at end of file From c7a54a3ed8a6e5a8f5d45ec6371ea52f56b6ee35 Mon Sep 17 00:00:00 2001 From: Jake Landis Date: Mon, 13 Aug 2018 09:49:04 -0500 Subject: [PATCH 8/9] fix TODO in doc --- .../src/main/java/org/elasticsearch/dissect/DissectParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java index 5160eb9a19932..035d428e2a578 100644 --- a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java @@ -170,7 +170,7 @@ public DissectParser(String pattern, String appendSeparator) { *

Entry point to dissect a string into it's parts.

* * @param inputString The string to dissect - * @return TODO: + * @return the key/value Map of the results * @throws DissectException if unable to dissect a pair into it's parts. */ public Map parse(String inputString) { From 9e43ba86006b77535f3fb8d104f1010176670b3d Mon Sep 17 00:00:00 2001 From: Jake Landis Date: Tue, 14 Aug 2018 11:30:12 -0500 Subject: [PATCH 9/9] reduce scope where possible --- .../org/elasticsearch/dissect/DissectKey.java | 3 ++- .../elasticsearch/dissect/DissectMatch.java | 26 +++++++++---------- .../elasticsearch/dissect/DissectParser.java | 9 +++---- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java index 0547cf4ea7b19..67a6842182da7 100644 --- a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java @@ -166,12 +166,13 @@ public String toString() { this.modifier = modifier; } + //package private for testing static Modifier fromString(String modifier) { return EnumSet.allOf(Modifier.class).stream().filter(km -> km.modifier.equals(modifier)) .findFirst().orElseThrow(() -> new IllegalArgumentException("Found invalid modifier.")); //throw should never happen } - static Modifier findModifier(String key) { + private static Modifier findModifier(String key) { Modifier modifier = Modifier.NONE; if (key != null && !key.isEmpty()) { Matcher matcher = MODIFIER_PATTERN.matcher(key); diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectMatch.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectMatch.java index 451f69e51dd45..9217413e07557 100644 --- a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectMatch.java +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectMatch.java @@ -126,19 +126,19 @@ Map getResults() { /** * a result that will need to be part of an append operation. */ - final class AppendResult { + private final class AppendResult { private final List values = new ArrayList<>(); private final String appendSeparator; - AppendResult(String appendSeparator) { + private AppendResult(String appendSeparator) { this.appendSeparator = appendSeparator; } - void addValue(String value, int order) { + private void addValue(String value, int order) { values.add(new AppendValue(value, order)); } - String getAppendResult() { + private String getAppendResult() { Collections.sort(values); return values.stream().map(AppendValue::getValue).collect(Collectors.joining(appendSeparator)); } @@ -147,20 +147,20 @@ String getAppendResult() { /** * An appendable value that can be sorted based on the provided order */ - final class AppendValue implements Comparable { + private final class AppendValue implements Comparable { private final String value; private final int order; - AppendValue(String value, int order) { + private AppendValue(String value, int order) { this.value = value; this.order = order; } - String getValue() { + private String getValue() { return value; } - int getOrder() { + private int getOrder() { return order; } @@ -173,25 +173,25 @@ public int compareTo(AppendValue o) { /** * A result that needs to be converted to a key/value reference */ - final class ReferenceResult { + private final class ReferenceResult { private String key; - String getKey() { + private String getKey() { return key; } - String getValue() { + private String getValue() { return value; } private String value; - void setValue(String value) { + private void setValue(String value) { this.value = value; } - void setKey(String key) { + private void setKey(String key) { this.key = key; } } diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java index 035d428e2a578..407d73134b611 100644 --- a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java @@ -285,24 +285,23 @@ public Map parse(String inputString) { /** * A tuple class to hold the dissect key and delimiter */ - class DissectPair { + private class DissectPair { private final DissectKey key; private final String delimiter; - DissectPair(DissectKey key, String delimiter) { + private DissectPair(DissectKey key, String delimiter) { this.key = key; this.delimiter = delimiter; } - DissectKey getKey() { + private DissectKey getKey() { return key; } - String getDelimiter() { + private String getDelimiter() { return delimiter; } - } }