diff --git a/libs/dissect/build.gradle b/libs/dissect/build.gradle
new file mode 100644
index 0000000000000..c09a2a4ebd1b3
--- /dev/null
+++ b/libs/dissect/build.gradle
@@ -0,0 +1,50 @@
+import org.elasticsearch.gradle.precommit.PrecommitTasks
+
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+archivesBaseName = 'elasticsearch-dissect'
+
+dependencies {
+ if (isEclipse == false || project.path == ":libs:dissect-tests") {
+ testCompile("org.elasticsearch.test:framework:${version}") {
+ exclude group: 'org.elasticsearch', module: 'dissect'
+ }
+ }
+ testCompile "com.fasterxml.jackson.core:jackson-core:${versions.jackson}"
+ testCompile("com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}")
+ testCompile("com.fasterxml.jackson.core:jackson-databind:${versions.jackson}")
+}
+
+forbiddenApisMain {
+ signaturesURLs = [PrecommitTasks.getResource('/forbidden/jdk-signatures.txt')]
+}
+
+if (isEclipse) {
+ // in eclipse the project is under a fake root, we need to change around the source sets
+ sourceSets {
+ if (project.path == ":libs:dissect") {
+ main.java.srcDirs = ['java']
+ main.resources.srcDirs = ['resources']
+ } else {
+ test.java.srcDirs = ['java']
+ test.resources.srcDirs = ['resources']
+ }
+ }
+}
diff --git a/libs/dissect/src/main/eclipse-build.gradle b/libs/dissect/src/main/eclipse-build.gradle
new file mode 100644
index 0000000000000..c2b72bd21e1f1
--- /dev/null
+++ b/libs/dissect/src/main/eclipse-build.gradle
@@ -0,0 +1,3 @@
+
+// this is just shell gradle file for eclipse to have separate projects for dissect src and tests
+apply from: '../../build.gradle'
diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectException.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectException.java
new file mode 100644
index 0000000000000..a2f1ab336401b
--- /dev/null
+++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectException.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.dissect;
+
+/**
+ * Parent class for all dissect related exceptions. Consumers may catch this exception or more specific child exceptions.
+ */
+public abstract class DissectException extends RuntimeException {
+ DissectException(String message) {
+ super(message);
+ }
+
+ /**
+ * Error while parsing a dissect pattern
+ */
+ static class PatternParse extends DissectException {
+ PatternParse(String pattern, String reason) {
+ super("Unable to parse pattern: " + pattern + " Reason: " + reason);
+ }
+ }
+
+ /**
+ * Error while parsing a dissect key
+ */
+ static class KeyParse extends DissectException {
+ KeyParse(String key, String reason) {
+ super("Unable to parse key: " + key + " Reason: " + reason);
+ }
+ }
+
+ /**
+ * Unable to find a match between pattern and source string
+ */
+ static class FindMatch extends DissectException {
+ FindMatch(String pattern, String source) {
+ super("Unable to find match for dissect pattern: " + pattern + " against source: " + source);
+
+ }
+ }
+}
diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java
new file mode 100644
index 0000000000000..67a6842182da7
--- /dev/null
+++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectKey.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.dissect;
+
+import java.util.EnumSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ *
A Key of a dissect pattern. This class models the name and modifiers and provides some validation.
+ * For dissect pattern of {@code %{a} %{+a} %{b}} the dissect keys are:
+ *
+ * - {@code a}
+ * - {@code +a}
+ * - {@code b}
+ *
+ * This class represents a single key.
+ * A single key is composed of a name and it's modifiers. For the key {@code +a}, {@code a} is the name and {@code +} is the modifier.
+ * @see DissectParser
+ */
+public final class DissectKey {
+ private static final Pattern LEFT_MODIFIER_PATTERN = Pattern.compile("([+*&?])(.*?)(->)?$", Pattern.DOTALL);
+ private static final Pattern RIGHT_PADDING_PATTERN = Pattern.compile("^(.*?)(->)?$", Pattern.DOTALL);
+ private static final Pattern APPEND_WITH_ORDER_PATTERN = Pattern.compile("[+](.*?)(/)([0-9]+)(->)?$", Pattern.DOTALL);
+ private final Modifier modifier;
+ private boolean skip;
+ private boolean skipRightPadding;
+ private int appendPosition;
+ private String name;
+
+ /**
+ * Constructor - parses the String key into it's name and modifier(s)
+ *
+ * @param key The key without the leading %{ or trailing }, for example {@code a->}
+ */
+ DissectKey(String key) {
+ skip = key == null || key.isEmpty();
+ modifier = Modifier.findModifier(key);
+ switch (modifier) {
+ case NONE:
+ Matcher matcher = RIGHT_PADDING_PATTERN.matcher(key);
+ while (matcher.find()) {
+ name = matcher.group(1);
+ skipRightPadding = matcher.group(2) != null;
+ }
+ skip = name.isEmpty();
+ break;
+ case NAMED_SKIP:
+ matcher = LEFT_MODIFIER_PATTERN.matcher(key);
+ while (matcher.find()) {
+ name = matcher.group(2);
+ skipRightPadding = matcher.group(3) != null;
+ }
+ skip = true;
+ break;
+ case APPEND:
+ matcher = LEFT_MODIFIER_PATTERN.matcher(key);
+ while (matcher.find()) {
+ name = matcher.group(2);
+ skipRightPadding = matcher.group(3) != null;
+ }
+ break;
+ case FIELD_NAME:
+ matcher = LEFT_MODIFIER_PATTERN.matcher(key);
+ while (matcher.find()) {
+ name = matcher.group(2);
+ skipRightPadding = matcher.group(3) != null;
+ }
+ break;
+ case FIELD_VALUE:
+ matcher = LEFT_MODIFIER_PATTERN.matcher(key);
+ while (matcher.find()) {
+ name = matcher.group(2);
+ skipRightPadding = matcher.group(3) != null;
+ }
+ break;
+ case APPEND_WITH_ORDER:
+ matcher = APPEND_WITH_ORDER_PATTERN.matcher(key);
+ while (matcher.find()) {
+ name = matcher.group(1);
+ appendPosition = Short.valueOf(matcher.group(3));
+ skipRightPadding = matcher.group(4) != null;
+ }
+ break;
+ }
+
+ if (name == null || (name.isEmpty() && !skip)) {
+ throw new DissectException.KeyParse(key, "The key name could be determined");
+ }
+ }
+
+ /**
+ * Copy constructor to explicitly override the modifier.
+ * @param key The key to copy (except for the modifier)
+ * @param modifier the modifer to use for this copy
+ */
+ DissectKey(DissectKey key, DissectKey.Modifier modifier){
+ this.modifier = modifier;
+ this.skipRightPadding = key.skipRightPadding;
+ this.skip = key.skip;
+ this.name = key.name;
+ this.appendPosition = key.appendPosition;
+ }
+
+ Modifier getModifier() {
+ return modifier;
+ }
+
+ boolean skip() {
+ return skip;
+ }
+
+ boolean skipRightPadding() {
+ return skipRightPadding;
+ }
+
+ int getAppendPosition() {
+ return appendPosition;
+ }
+
+ String getName() {
+ return name;
+ }
+
+ //generated
+ @Override
+ public String toString() {
+ return "DissectKey{" +
+ "modifier=" + modifier +
+ ", skip=" + skip +
+ ", appendPosition=" + appendPosition +
+ ", name='" + name + '\'' +
+ '}';
+ }
+
+ public enum Modifier {
+ NONE(""), APPEND_WITH_ORDER("/"), APPEND("+"), FIELD_NAME("*"), FIELD_VALUE("&"), NAMED_SKIP("?");
+
+ private static final Pattern MODIFIER_PATTERN = Pattern.compile("[/+*&?]");
+
+ private final String modifier;
+
+ @Override
+ public String toString() {
+ return modifier;
+ }
+
+ Modifier(final String modifier) {
+ this.modifier = modifier;
+ }
+
+ //package private for testing
+ static Modifier fromString(String modifier) {
+ return EnumSet.allOf(Modifier.class).stream().filter(km -> km.modifier.equals(modifier))
+ .findFirst().orElseThrow(() -> new IllegalArgumentException("Found invalid modifier.")); //throw should never happen
+ }
+
+ private static Modifier findModifier(String key) {
+ Modifier modifier = Modifier.NONE;
+ if (key != null && !key.isEmpty()) {
+ Matcher matcher = MODIFIER_PATTERN.matcher(key);
+ int matches = 0;
+ while (matcher.find()) {
+ Modifier priorModifier = modifier;
+ modifier = Modifier.fromString(matcher.group());
+ if (++matches > 1 && !(APPEND.equals(priorModifier) && APPEND_WITH_ORDER.equals(modifier))) {
+ throw new DissectException.KeyParse(key, "multiple modifiers are not allowed.");
+ }
+ }
+ }
+ return modifier;
+ }
+ }
+}
diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectMatch.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectMatch.java
new file mode 100644
index 0000000000000..9217413e07557
--- /dev/null
+++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectMatch.java
@@ -0,0 +1,198 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.dissect;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * Represents the matches of a {@link DissectParser#parse(String)}. Handles the appending and referencing based on the key instruction.
+ */
+final class DissectMatch {
+
+ private final String appendSeparator;
+ private final Map results;
+ private final Map simpleResults;
+ private final Map referenceResults;
+ private final Map appendResults;
+ private int implicitAppendOrder = -1000;
+ private final int maxMatches;
+ private final int maxResults;
+ private final int appendCount;
+ private final int referenceCount;
+ private final int simpleCount;
+ private int matches = 0;
+
+ DissectMatch(String appendSeparator, int maxMatches, int maxResults, int appendCount, int referenceCount) {
+ if (maxMatches <= 0 || maxResults <= 0) {
+ throw new IllegalArgumentException("Expected results are zero, can not construct DissectMatch");//should never happen
+ }
+ this.maxMatches = maxMatches;
+ this.maxResults = maxResults;
+ this.appendCount = appendCount;
+ this.referenceCount = referenceCount;
+ this.appendSeparator = appendSeparator;
+ results = new HashMap<>(maxResults);
+ this.simpleCount = maxMatches - referenceCount - appendCount;
+ simpleResults = simpleCount <= 0 ? null : new HashMap<>(simpleCount);
+ referenceResults = referenceCount <= 0 ? null : new HashMap<>(referenceCount);
+ appendResults = appendCount <= 0 ? null : new HashMap<>(appendCount);
+ }
+
+ /**
+ * Add the key/value that was found as result of the parsing
+ * @param key the {@link DissectKey}
+ * @param value the discovered value for the key
+ */
+ void add(DissectKey key, String value) {
+ matches++;
+ if (key.skip()) {
+ return;
+ }
+ switch (key.getModifier()) {
+ case NONE:
+ simpleResults.put(key.getName(), value);
+ break;
+ case APPEND:
+ appendResults.computeIfAbsent(key.getName(), k -> new AppendResult(appendSeparator)).addValue(value, implicitAppendOrder++);
+ break;
+ case APPEND_WITH_ORDER:
+ appendResults.computeIfAbsent(key.getName(),
+ k -> new AppendResult(appendSeparator)).addValue(value, key.getAppendPosition());
+ break;
+ case FIELD_NAME:
+ referenceResults.computeIfAbsent(key.getName(), k -> new ReferenceResult()).setKey(value);
+ break;
+ case FIELD_VALUE:
+ referenceResults.computeIfAbsent(key.getName(), k -> new ReferenceResult()).setValue(value);
+ break;
+ }
+ }
+
+ boolean fullyMatched() {
+ return matches == maxMatches;
+ }
+
+ /**
+ * Checks if results are valid.
+ * @param results the results to check
+ * @return true if all dissect keys have been matched and the results are of the expected size.
+ */
+ boolean isValid(Map results) {
+ return fullyMatched() && results.size() == maxResults;
+ }
+
+ /**
+ * Gets all the current matches. Pass the results of this to isValid to determine if a fully successful match has occured.
+ *
+ * @return the map of the results.
+ */
+ Map getResults() {
+ results.clear();
+ if (simpleCount > 0) {
+ results.putAll(simpleResults);
+ }
+ if (referenceCount > 0) {
+ referenceResults.forEach((k, v) -> results.put(v.getKey(), v.getValue()));
+ }
+ if (appendCount > 0) {
+ appendResults.forEach((k, v) -> results.put(k, v.getAppendResult()));
+ }
+
+ return results;
+ }
+
+ /**
+ * a result that will need to be part of an append operation.
+ */
+ private final class AppendResult {
+ private final List values = new ArrayList<>();
+ private final String appendSeparator;
+
+ private AppendResult(String appendSeparator) {
+ this.appendSeparator = appendSeparator;
+ }
+
+ private void addValue(String value, int order) {
+ values.add(new AppendValue(value, order));
+ }
+
+ private String getAppendResult() {
+ Collections.sort(values);
+ return values.stream().map(AppendValue::getValue).collect(Collectors.joining(appendSeparator));
+ }
+ }
+
+ /**
+ * An appendable value that can be sorted based on the provided order
+ */
+ private final class AppendValue implements Comparable {
+ private final String value;
+ private final int order;
+
+ private AppendValue(String value, int order) {
+ this.value = value;
+ this.order = order;
+ }
+
+ private String getValue() {
+ return value;
+ }
+
+ private int getOrder() {
+ return order;
+ }
+
+ @Override
+ public int compareTo(AppendValue o) {
+ return Integer.compare(this.order, o.getOrder());
+ }
+ }
+
+ /**
+ * A result that needs to be converted to a key/value reference
+ */
+ private final class ReferenceResult {
+
+ private String key;
+
+ private String getKey() {
+ return key;
+ }
+
+ private String getValue() {
+ return value;
+ }
+
+ private String value;
+
+ private void setValue(String value) {
+ this.value = value;
+ }
+
+ private void setKey(String key) {
+ this.key = key;
+ }
+ }
+}
diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java
new file mode 100644
index 0000000000000..407d73134b611
--- /dev/null
+++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java
@@ -0,0 +1,310 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.dissect;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * Splits (dissects) a string into its parts based on a pattern.
A dissect pattern is composed of a set of keys and delimiters.
+ * For example the dissect pattern:
%{a} %{b},%{c} has 3 keys (a,b,c) and two delimiters (space and comma). This pattern will
+ * match a string of the form: foo bar,baz
and will result a key/value pairing of a=foo, b=bar, and c=baz.
+ * Matches are all or nothing. For example, the same pattern will NOT match
foo bar baz
since all of the delimiters did not
+ * match. (the comma did not match)
+ * Dissect patterns can optionally have modifiers. These modifiers instruct the parser to change it's behavior. For example the
+ * dissect pattern of
%{a},%{b}:%{c} would not match foo,bar,baz
since there the colon never matches.
+ * Modifiers appear to the left or the right of the key name. The supported modifiers are:
+ *
+ * - {@code ->} Instructs the parser to ignore repeating delimiters to the right of the key. Example:
+ * pattern: {@code %{a->} %{b} %{c}}
+ * string: {@code foo bar baz}
+ * result: {@code a=foo, b=bar, c=baz}
+ *
+ * - {@code +} Instructs the parser to appends this key's value to value of prior key with the same name.
+ * Example:
+ * pattern: {@code %{a} %{+a} %{+a}}
+ * string: {@code foo bar baz}
+ * result: {@code a=foobarbaz}
+ *
+ * - {@code /} Instructs the parser to appends this key's value to value of a key based based on the order specified after the
+ * {@code /}. Requires the {@code +} modifier to also be present in the key. Example:
+ * pattern: {@code %{a} %{+a/2} %{+a/1}}
+ * string: {@code foo bar baz}
+ * result: {@code a=foobazbar}
+ *
+ *
+ * - {@code *} Instructs the parser to ignore the name of this key, instead use the value of key as the key name.
+ * Requires another key with the same name and the {@code &} modifier to be the value. Example:
+ * pattern: {@code %{*a} %{b} %{&a}}
+ * string: {@code foo bar baz}
+ * result: {@code foo=baz, b=bar}
+ *
+ * - {@code &} Instructs the parser to ignore this key and place the matched value to a key of the same name with the {@code *} modifier.
+ * Requires another key with the same name and the {@code *} modifier.
+ * Example:
+ * pattern: {@code %{*a} %{b} %{&a}}
+ * string: {@code foo bar baz}
+ * result: {@code foo=baz, b=bar}
+ *
+ * - {@code ?} Instructs the parser to ignore this key. The key name exists only for the purpose of human readability. Example
+ *
+ * pattern: {@code %{a} %{?skipme} %{c}}
+ * string: {@code foo bar baz}
+ * result: {@code a=foo, c=baz}
+ *
+ *
+ * Empty key names patterns are also supported. They behave just like the {@code ?} modifier, except the name is not required.
+ * The result will simply be ignored. Example
+ *
+ * pattern: {@code %{a} %{} %{c}}
+ * string: {@code foo bar baz}
+ * result: {@code a=foo, c=baz}
+ *
+
+ *
+ * Inspired by the Logstash Dissect Filter by Guy Boertje
+ */
+public final class DissectParser {
+ private static final Pattern LEADING_DELIMITER_PATTERN = Pattern.compile("^(.*?)%");
+ private static final Pattern KEY_DELIMITER_FIELD_PATTERN = Pattern.compile("%\\{([^}]*?)}([^%]*)", Pattern.DOTALL);
+ private static final EnumSet ASSOCIATE_MODIFIERS = EnumSet.of(
+ DissectKey.Modifier.FIELD_NAME,
+ DissectKey.Modifier.FIELD_VALUE);
+ private static final EnumSet APPEND_MODIFIERS = EnumSet.of(
+ DissectKey.Modifier.APPEND,
+ DissectKey.Modifier.APPEND_WITH_ORDER);
+ private static final Function KEY_NAME = val -> val.getKey().getName();
+ private final List matchPairs;
+ private final String pattern;
+ private String leadingDelimiter = "";
+ private final int maxMatches;
+ private final int maxResults;
+ private final int appendCount;
+ private final int referenceCount;
+ private final String appendSeparator;
+
+ public DissectParser(String pattern, String appendSeparator) {
+ this.pattern = pattern;
+ this.appendSeparator = appendSeparator == null ? "" : appendSeparator;
+ Matcher matcher = LEADING_DELIMITER_PATTERN.matcher(pattern);
+ while (matcher.find()) {
+ leadingDelimiter = matcher.group(1);
+ }
+ List matchPairs = new ArrayList<>();
+ matcher = KEY_DELIMITER_FIELD_PATTERN.matcher(pattern.substring(leadingDelimiter.length()));
+ while (matcher.find()) {
+ DissectKey key = new DissectKey(matcher.group(1));
+ String delimiter = matcher.group(2);
+ matchPairs.add(new DissectPair(key, delimiter));
+ }
+ this.maxMatches = matchPairs.size();
+ this.maxResults = Long.valueOf(matchPairs.stream()
+ .filter(dissectPair -> !dissectPair.getKey().skip()).map(KEY_NAME).distinct().count()).intValue();
+ if (this.maxMatches == 0 || maxResults == 0) {
+ throw new DissectException.PatternParse(pattern, "Unable to find any keys or delimiters.");
+ }
+ //append validation - look through all of the keys to see if there are any keys that need to participate in an append operation
+ // but don't have the '+' defined
+ Set appendKeyNames = matchPairs.stream()
+ .filter(dissectPair -> APPEND_MODIFIERS.contains(dissectPair.getKey().getModifier()))
+ .map(KEY_NAME).distinct().collect(Collectors.toSet());
+ if (appendKeyNames.size() > 0) {
+ List modifiedMatchPairs = new ArrayList<>(matchPairs.size());
+ for (DissectPair p : matchPairs) {
+ if (p.getKey().getModifier().equals(DissectKey.Modifier.NONE) && appendKeyNames.contains(p.getKey().getName())) {
+ modifiedMatchPairs.add(new DissectPair(new DissectKey(p.getKey(), DissectKey.Modifier.APPEND), p.getDelimiter()));
+ } else {
+ modifiedMatchPairs.add(p);
+ }
+ }
+ matchPairs = modifiedMatchPairs;
+ }
+ appendCount = appendKeyNames.size();
+
+ //reference validation - ensure that '*' and '&' come in pairs
+ Map> referenceGroupings = matchPairs.stream()
+ .filter(dissectPair -> ASSOCIATE_MODIFIERS.contains(dissectPair.getKey().getModifier()))
+ .collect(Collectors.groupingBy(KEY_NAME));
+ for (Map.Entry> entry : referenceGroupings.entrySet()) {
+ if (entry.getValue().size() != 2) {
+ throw new DissectException.PatternParse(pattern, "Found invalid key/reference associations: '"
+ + entry.getValue().stream().map(KEY_NAME).collect(Collectors.joining(",")) +
+ "' Please ensure each '*' is matched with a matching '&");
+ }
+ }
+
+ referenceCount = referenceGroupings.size() * 2;
+ this.matchPairs = Collections.unmodifiableList(matchPairs);
+ }
+
+
+ /**
+ * Entry point to dissect a string into it's parts.
+ *
+ * @param inputString The string to dissect
+ * @return the key/value Map of the results
+ * @throws DissectException if unable to dissect a pair into it's parts.
+ */
+ public Map parse(String inputString) {
+ /**
+ *
+ * This implements a naive string matching algorithm. The string is walked left to right, comparing each byte against
+ * another string's bytes looking for matches. If the bytes match, then a second cursor looks ahead to see if all the bytes
+ * of the other string matches. If they all match, record it and advances the primary cursor to the match point. If it can not match
+ * all of the bytes then progress the main cursor. Repeat till the end of the input string. Since the string being searching for
+ * (the delimiter) is generally small and rare the naive approach is efficient.
+ *
+ * In this case the the string that is walked is the input string, and the string being searched for is the current delimiter.
+ * For example for a dissect pattern of {@code %{a},%{b}:%{c}} the delimiters (comma then colon) are searched for in the
+ * input string. At class construction the list of keys+delimiters are found (dissectPairs), which allows the use of that ordered
+ * list to know which delimiter to use for the search. The delimiters is progressed once the current delimiter is matched.
+ *
+ * There are two special cases that requires additional parsing beyond the standard naive algorithm. Consecutive delimiters should
+ * results in a empty matches unless the {@code ->} is provided. For example given the dissect pattern of
+ * {@code %{a},%{b},%{c},%{d}} and input string of {@code foo,,,} the match should be successful with empty values for b,c and d.
+ * However, if the key modifier {@code ->}, is present it will simply skip over any delimiters just to the right of the key
+ * without assigning any values. For example {@code %{a->},{%b}} will match the input string of {@code foo,,,,,,bar} with a=foo and
+ * b=bar.
+ *
+ */
+ DissectMatch dissectMatch = new DissectMatch(appendSeparator, maxMatches, maxResults, appendCount, referenceCount);
+ Iterator it = matchPairs.iterator();
+ //ensure leading delimiter matches
+ if (inputString != null && inputString.length() > leadingDelimiter.length()
+ && leadingDelimiter.equals(inputString.substring(0, leadingDelimiter.length()))) {
+ byte[] input = inputString.getBytes(StandardCharsets.UTF_8);
+ //grab the first key/delimiter pair
+ DissectPair dissectPair = it.next();
+ DissectKey key = dissectPair.getKey();
+ byte[] delimiter = dissectPair.getDelimiter().getBytes(StandardCharsets.UTF_8);
+ //start dissection after the first delimiter
+ int i = leadingDelimiter.length();
+ int valueStart = i;
+ int lookAheadMatches;
+ //start walking the input string byte by byte, look ahead for matches where needed
+ //if a match is found jump forward to the end of the match
+ for (; i < input.length; i++) {
+ lookAheadMatches = 0;
+ //potential match between delimiter and input string
+ if (delimiter.length > 0 && input[i] == delimiter[0]) {
+ //look ahead to see if the entire delimiter matches the input string
+ for (int j = 0; j < delimiter.length; j++) {
+ if (i + j < input.length && input[i + j] == delimiter[j]) {
+ lookAheadMatches++;
+ }
+ }
+ //found a full delimiter match
+ if (lookAheadMatches == delimiter.length) {
+ //record the key/value tuple
+ byte[] value = Arrays.copyOfRange(input, valueStart, i);
+ dissectMatch.add(key, new String(value, StandardCharsets.UTF_8));
+ //jump to the end of the match
+ i += lookAheadMatches;
+ //look for consecutive delimiters (e.g. a,,,,d,e)
+ while (i < input.length) {
+ lookAheadMatches = 0;
+ for (int j = 0; j < delimiter.length; j++) {
+ if (i + j < input.length && input[i + j] == delimiter[j]) {
+ lookAheadMatches++;
+ }
+ }
+ //found consecutive delimiters
+ if (lookAheadMatches == delimiter.length) {
+ //jump to the end of the match
+ i += lookAheadMatches;
+ if (!key.skipRightPadding()) {
+ //progress the keys/delimiter if possible
+ if (!it.hasNext()) {
+ break; //the while loop
+ }
+ dissectPair = it.next();
+ key = dissectPair.getKey();
+ //add the key with an empty value for the empty delimiter
+ dissectMatch.add(key, "");
+ }
+ } else {
+ break; //the while loop
+ }
+ }
+ //progress the keys/delimiter if possible
+ if (!it.hasNext()) {
+ break; //the for loop
+ }
+ dissectPair = it.next();
+ key = dissectPair.getKey();
+ delimiter = dissectPair.getDelimiter().getBytes(StandardCharsets.UTF_8);
+ //i is always one byte after the last found delimiter, aka the start of the next value
+ valueStart = i;
+ }
+ }
+ }
+ //the last key, grab the rest of the input (unless consecutive delimiters already grabbed the last key)
+ //and there is no trailing delimiter
+ if (!dissectMatch.fullyMatched() && delimiter.length == 0 ) {
+ byte[] value = Arrays.copyOfRange(input, valueStart, input.length);
+ String valueString = new String(value, StandardCharsets.UTF_8);
+ dissectMatch.add(key, valueString);
+ }
+ }
+ Map results = dissectMatch.getResults();
+
+ if (!dissectMatch.isValid(results)) {
+ throw new DissectException.FindMatch(pattern, inputString);
+ }
+ return results;
+ }
+
+ /**
+ * A tuple class to hold the dissect key and delimiter
+ */
+ private class DissectPair {
+
+ private final DissectKey key;
+ private final String delimiter;
+
+ private DissectPair(DissectKey key, String delimiter) {
+ this.key = key;
+ this.delimiter = delimiter;
+ }
+
+ private DissectKey getKey() {
+ return key;
+ }
+
+ private String getDelimiter() {
+ return delimiter;
+ }
+ }
+
+}
+
+
+
diff --git a/libs/dissect/src/test/eclipse-build.gradle b/libs/dissect/src/test/eclipse-build.gradle
new file mode 100644
index 0000000000000..56d632f23b129
--- /dev/null
+++ b/libs/dissect/src/test/eclipse-build.gradle
@@ -0,0 +1,7 @@
+
+// this is just shell gradle file for eclipse to have separate projects for dissect src and tests
+apply from: '../../build.gradle'
+
+dependencies {
+ testCompile project(':libs:dissect')
+}
diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java
new file mode 100644
index 0000000000000..0f3f7ed041df5
--- /dev/null
+++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectKeyTests.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.dissect;
+
+import org.elasticsearch.test.ESTestCase;
+import org.hamcrest.CoreMatchers;
+
+import java.util.EnumSet;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.is;
+
+public class DissectKeyTests extends ESTestCase {
+
+ public void testNoModifier() {
+ String keyName = randomAlphaOfLengthBetween(1, 10);
+ DissectKey dissectKey = new DissectKey(keyName);
+ assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE));
+ assertThat(dissectKey.skip(), is(false));
+ assertThat(dissectKey.skipRightPadding(), is(false));
+ assertThat(dissectKey.getAppendPosition(), equalTo(0));
+ assertThat(dissectKey.getName(), equalTo(keyName));
+ }
+
+ public void testAppendModifier() {
+ String keyName = randomAlphaOfLengthBetween(1, 10);
+ DissectKey dissectKey = new DissectKey("+" + keyName);
+ assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.APPEND));
+ assertThat(dissectKey.skip(), is(false));
+ assertThat(dissectKey.skipRightPadding(), is(false));
+ assertThat(dissectKey.getAppendPosition(), equalTo(0));
+ assertThat(dissectKey.getName(), equalTo(keyName));
+ }
+
+ public void testAppendWithOrderModifier() {
+ String keyName = randomAlphaOfLengthBetween(1, 10);
+ int length = randomIntBetween(1, 100);
+ DissectKey dissectKey = new DissectKey("+" + keyName + "/" + length);
+ assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.APPEND_WITH_ORDER));
+ assertThat(dissectKey.skip(), is(false));
+ assertThat(dissectKey.skipRightPadding(), is(false));
+ assertThat(dissectKey.getAppendPosition(), equalTo(length));
+ assertThat(dissectKey.getName(), equalTo(keyName));
+ }
+
+ public void testAppendWithOrderModifierNoName() {
+ int length = randomIntBetween(1, 100);
+ DissectException e = expectThrows(DissectException.class, () -> new DissectKey("+/" + length));
+ assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key"));
+ }
+
+ public void testOrderModifierWithoutAppend() {
+ String keyName = randomAlphaOfLengthBetween(1, 10);
+ int length = randomIntBetween(1, 100);
+ DissectException e = expectThrows(DissectException.class, () -> new DissectKey(keyName + "/" + length));
+ assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key"));
+ }
+
+ public void testFieldNameModifier() {
+ String keyName = randomAlphaOfLengthBetween(1, 10);
+ DissectKey dissectKey = new DissectKey("*" + keyName);
+ assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.FIELD_NAME));
+ assertThat(dissectKey.skip(), is(false));
+ assertThat(dissectKey.skipRightPadding(), is(false));
+ assertThat(dissectKey.getAppendPosition(), equalTo(0));
+ assertThat(dissectKey.getName(), equalTo(keyName));
+ }
+
+ public void testFieldValueModifiers() {
+ String keyName = randomAlphaOfLengthBetween(1, 10);
+ DissectKey dissectKey = new DissectKey("&" + keyName);
+ assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.FIELD_VALUE));
+ assertThat(dissectKey.skip(), is(false));
+ assertThat(dissectKey.skipRightPadding(), is(false));
+ assertThat(dissectKey.getAppendPosition(), equalTo(0));
+ assertThat(dissectKey.getName(), equalTo(keyName));
+ }
+
+ public void testRightPaddingModifiers() {
+ String keyName = randomAlphaOfLengthBetween(1, 10);
+ DissectKey dissectKey = new DissectKey(keyName + "->");
+ assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE));
+ assertThat(dissectKey.skip(), is(false));
+ assertThat(dissectKey.skipRightPadding(), is(true));
+ assertThat(dissectKey.getAppendPosition(), equalTo(0));
+ assertThat(dissectKey.getName(), equalTo(keyName));
+
+ dissectKey = new DissectKey("*" + keyName + "->");
+ assertThat(dissectKey.skipRightPadding(), is(true));
+
+ dissectKey = new DissectKey("&" + keyName + "->");
+ assertThat(dissectKey.skipRightPadding(), is(true));
+
+ dissectKey = new DissectKey("+" + keyName + "->");
+ assertThat(dissectKey.skipRightPadding(), is(true));
+
+ dissectKey = new DissectKey("?" + keyName + "->");
+ assertThat(dissectKey.skipRightPadding(), is(true));
+
+ dissectKey = new DissectKey("+" + keyName + "/2->");
+ assertThat(dissectKey.skipRightPadding(), is(true));
+ }
+
+ public void testMultipleLeftModifiers() {
+ String keyName = randomAlphaOfLengthBetween(1, 10);
+ List validModifiers = EnumSet.allOf(DissectKey.Modifier.class).stream()
+ .filter(m -> !m.equals(DissectKey.Modifier.NONE))
+ .map(DissectKey.Modifier::toString)
+ .collect(Collectors.toList());
+ String modifier1 = randomFrom(validModifiers);
+ String modifier2 = randomFrom(validModifiers);
+ DissectException e = expectThrows(DissectException.class, () -> new DissectKey(modifier1 + modifier2 + keyName));
+ assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key"));
+ }
+
+ public void testSkipKey() {
+ String keyName = "";
+ DissectKey dissectKey = new DissectKey(keyName);
+ assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE));
+ assertThat(dissectKey.skip(), is(true));
+ assertThat(dissectKey.skipRightPadding(), is(false));
+ assertThat(dissectKey.getAppendPosition(), equalTo(0));
+ assertThat(dissectKey.getName(), equalTo(keyName));
+ }
+ public void testNamedSkipKey() {
+ String keyName = "myname";
+ DissectKey dissectKey = new DissectKey("?" +keyName);
+ assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NAMED_SKIP));
+ assertThat(dissectKey.skip(), is(true));
+ assertThat(dissectKey.skipRightPadding(), is(false));
+ assertThat(dissectKey.getAppendPosition(), equalTo(0));
+ assertThat(dissectKey.getName(), equalTo(keyName));
+ }
+
+ public void testSkipKeyWithPadding() {
+ String keyName = "";
+ DissectKey dissectKey = new DissectKey(keyName + "->");
+ assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NONE));
+ assertThat(dissectKey.skip(), is(true));
+ assertThat(dissectKey.skipRightPadding(), is(true));
+ assertThat(dissectKey.getAppendPosition(), equalTo(0));
+ assertThat(dissectKey.getName(), equalTo(keyName));
+ }
+ public void testNamedEmptySkipKeyWithPadding() {
+ String keyName = "";
+ DissectKey dissectKey = new DissectKey("?" +keyName + "->");
+ assertThat(dissectKey.getModifier(), equalTo(DissectKey.Modifier.NAMED_SKIP));
+ assertThat(dissectKey.skip(), is(true));
+ assertThat(dissectKey.skipRightPadding(), is(true));
+ assertThat(dissectKey.getAppendPosition(), equalTo(0));
+ assertThat(dissectKey.getName(), equalTo(keyName));
+ }
+
+ public void testInvalidModifiers() {
+ //should never happen due to regex
+ IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> DissectKey.Modifier.fromString("x"));
+ assertThat(e.getMessage(), CoreMatchers.containsString("invalid modifier"));
+ }
+}
diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectMatchTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectMatchTests.java
new file mode 100644
index 0000000000000..d562afb636308
--- /dev/null
+++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectMatchTests.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.dissect;
+
+import org.elasticsearch.common.collect.MapBuilder;
+import org.elasticsearch.test.ESTestCase;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Map;
+import java.util.stream.IntStream;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class DissectMatchTests extends ESTestCase {
+
+ public void testIllegalArgs() {
+ expectThrows(IllegalArgumentException.class, () -> new DissectMatch("", 0, 1, 0, 0));
+ expectThrows(IllegalArgumentException.class, () -> new DissectMatch("", 1, 0, 0, 0));
+ }
+
+ public void testValidAndFullyMatched() {
+ int expectedMatches = randomIntBetween(1, 26);
+ DissectMatch dissectMatch = new DissectMatch("", expectedMatches, expectedMatches, 0, 0);
+ IntStream.range(97, 97 + expectedMatches) //allow for a-z values
+ .forEach(i -> dissectMatch.add(new DissectKey(new String(new byte[]{(byte) i}, StandardCharsets.UTF_8)), ""));
+ assertThat(dissectMatch.fullyMatched(), equalTo(true));
+ assertThat(dissectMatch.isValid(dissectMatch.getResults()), equalTo(true));
+ }
+
+ public void testNotValidAndFullyMatched() {
+ int expectedMatches = randomIntBetween(1, 26);
+ DissectMatch dissectMatch = new DissectMatch("", expectedMatches, expectedMatches, 0, 0);
+ IntStream.range(97, 97 + expectedMatches - 1) //allow for a-z values
+ .forEach(i -> dissectMatch.add(new DissectKey(new String(new byte[]{(byte) i}, StandardCharsets.UTF_8)), ""));
+ assertThat(dissectMatch.fullyMatched(), equalTo(false));
+ assertThat(dissectMatch.isValid(dissectMatch.getResults()), equalTo(false));
+ }
+
+ public void testGetResultsIdempotent(){
+ int expectedMatches = randomIntBetween(1, 26);
+ DissectMatch dissectMatch = new DissectMatch("", expectedMatches, expectedMatches, 0, 0);
+ IntStream.range(97, 97 + expectedMatches) //allow for a-z values
+ .forEach(i -> dissectMatch.add(new DissectKey(new String(new byte[]{(byte) i}, StandardCharsets.UTF_8)), ""));
+ assertThat(dissectMatch.getResults(), equalTo(dissectMatch.getResults()));
+ }
+
+ public void testAppend(){
+ DissectMatch dissectMatch = new DissectMatch("-", 3, 1, 3, 0);
+ dissectMatch.add(new DissectKey("+a"), "x");
+ dissectMatch.add(new DissectKey("+a"), "y");
+ dissectMatch.add(new DissectKey("+a"), "z");
+ Map results = dissectMatch.getResults();
+ assertThat(dissectMatch.isValid(results), equalTo(true));
+ assertThat(results, equalTo(MapBuilder.newMapBuilder().put("a", "x-y-z").map()));
+ }
+
+ public void testAppendWithOrder(){
+ DissectMatch dissectMatch = new DissectMatch("-", 3, 1, 3, 0);
+ dissectMatch.add(new DissectKey("+a/3"), "x");
+ dissectMatch.add(new DissectKey("+a"), "y");
+ dissectMatch.add(new DissectKey("+a/1"), "z");
+ Map results = dissectMatch.getResults();
+ assertThat(dissectMatch.isValid(results), equalTo(true));
+ assertThat(results, equalTo(MapBuilder.newMapBuilder().put("a", "y-z-x").map()));
+ }
+
+ public void testReference(){
+ DissectMatch dissectMatch = new DissectMatch("-", 2, 1, 0, 1);
+ dissectMatch.add(new DissectKey("&a"), "x");
+ dissectMatch.add(new DissectKey("*a"), "y");
+ Map results = dissectMatch.getResults();
+ assertThat(dissectMatch.isValid(results), equalTo(true));
+ assertThat(results, equalTo(MapBuilder.newMapBuilder().put("y", "x").map()));
+ }
+
+}
diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java
new file mode 100644
index 0000000000000..c22cec98eb79a
--- /dev/null
+++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java
@@ -0,0 +1,386 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.dissect;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.elasticsearch.test.ESTestCase;
+import org.hamcrest.CoreMatchers;
+import org.hamcrest.Matchers;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import static com.carrotsearch.randomizedtesting.RandomizedTest.randomAsciiAlphanumOfLengthBetween;
+
+public class DissectParserTests extends ESTestCase {
+
+ public void testJavaDocExamples() {
+ assertMatch("%{a} %{b},%{c}", "foo bar,baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz"));
+ assertMiss("%{a},%{b}:%{c}", "foo,bar,baz");
+ assertMatch("%{a->} %{b} %{c}", "foo bar baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz"));
+ assertMatch("%{a} %{+a} %{+a}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobarbaz"));
+ assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobazbar"));
+ assertMatch("%{*a} %{b} %{&a}", "foo bar baz", Arrays.asList("foo", "b"), Arrays.asList("baz", "bar"));
+ assertMatch("%{a} %{} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz"));
+ assertMatch("%{a} %{?skipme} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz"));
+ assertMatch("%{a},%{b},%{c},%{d}", "foo,,,", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", ""));
+ assertMatch("%{a->},%{b}", "foo,,,,,,bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+ }
+
+ /**
+ * Borrowed from Logstash's test cases:
+ * https://github.com/logstash-plugins/logstash-filter-dissect/blob/master/src/test/java/org/logstash/dissect/DissectorTest.java
+ * Append Note - Logstash appends with the delimiter as the separator between values, this uses a user defined separator
+ */
+ public void testLogstashSpecs() {
+ assertMatch("%{a} %{b->} %{c}", "foo bar baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz"));
+ assertMiss("%{a}%{b} %{c}", null);
+ assertMiss("%{a} %{b}%{c} %{d}", "foo bar baz");
+ assertMiss("%{a} %{b} %{c}%{d}", "foo bar baz quux");
+ assertMatch("%{a} %{b->} %{c}", "foo bar baz", Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz"));
+ assertMatch("%{a} %{} %{c}", "foo bar baz", Arrays.asList("a", "c"), Arrays.asList("foo", "baz"));
+ assertMatch("%{a} %{b} %{+b} %{z}", "foo bar baz quux", Arrays.asList("a", "b", "z"), Arrays.asList("foo", "bar baz", "quux"), " ");
+ assertMatch("%{a}------->%{b}", "foo------->bar baz quux", Arrays.asList("a", "b"), Arrays.asList("foo", "bar baz quux"));
+ assertMatch("%{a}------->%{}", "foo------->bar baz quux", Arrays.asList("a"), Arrays.asList("foo"));
+ assertMatch("%{a} » %{b}»%{c}€%{d}", "foo » bar»baz€quux",
+ Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "bar", "baz", "quux"));
+ assertMatch("%{a} %{b} %{+a}", "foo bar baz quux", Arrays.asList("a", "b"), Arrays.asList("foo baz quux", "bar"), " ");
+ //Logstash supports implicit ordering based anchored by the the key without the '+'
+ //This implementation will only honor implicit ordering for appending right to left else explicit order (/N) is required.
+ //The results of this test differ from Logstash.
+ assertMatch("%{+a} %{a} %{+a} %{b}", "December 31 1999 quux",
+ Arrays.asList("a", "b"), Arrays.asList("December 31 1999", "quux"), " ");
+ //Same test as above, but with same result as Logstash using explicit ordering in the pattern
+ assertMatch("%{+a/1} %{a} %{+a/2} %{b}", "December 31 1999 quux",
+ Arrays.asList("a", "b"), Arrays.asList("31 December 1999", "quux"), " ");
+ assertMatch("%{+a/2} %{+a/4} %{+a/1} %{+a/3}", "bar quux foo baz", Arrays.asList("a"), Arrays.asList("foo bar baz quux"), " ");
+ assertMatch("%{+a} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+ assertMatch("%{+a} %{b} %{+a} %{c}", "foo bar baz quux",
+ Arrays.asList("a", "b", "c"), Arrays.asList("foo baz", "bar", "quux"), " ");
+ assertMatch("%{} %{syslog_timestamp} %{hostname} %{rt}: %{reason} %{+reason} %{src_ip}/%{src_port}->%{dst_ip}/%{dst_port} " +
+ "%{polrt} %{+polrt} %{+polrt} %{from_zone} %{to_zone} %{rest}",
+ "42 2016-05-25T14:47:23Z host.name.com RT_FLOW - RT_FLOW_SESSION_DENY: session denied 2.2.2.20/60000->1.1.1.10/8090 None " +
+ "6(0) DEFAULT-DENY ZONE-UNTRUST ZONE-DMZ UNKNOWN UNKNOWN N/A(N/A) ge-0/0/0.0",
+ Arrays.asList("syslog_timestamp", "hostname", "rt", "reason", "src_ip", "src_port", "dst_ip", "dst_port", "polrt"
+ , "from_zone", "to_zone", "rest"),
+ Arrays.asList("2016-05-25T14:47:23Z", "host.name.com", "RT_FLOW - RT_FLOW_SESSION_DENY", "session denied", "2.2.2.20", "60000"
+ , "1.1.1.10", "8090", "None 6(0) DEFAULT-DENY", "ZONE-UNTRUST", "ZONE-DMZ", "UNKNOWN UNKNOWN N/A(N/A) ge-0/0/0.0"), " ");
+ assertBadKey("%{+/2}");
+ assertBadKey("%{&+a_field}");
+ assertMatch("%{a->} %{b->}---%{c}", "foo bar------------baz",
+ Arrays.asList("a", "b", "c"), Arrays.asList("foo", "bar", "baz"));
+ assertMatch("%{->}-%{a}", "-----666", Arrays.asList("a"), Arrays.asList("666"));
+ assertMatch("%{?skipme->}-%{a}", "-----666", Arrays.asList("a"), Arrays.asList("666"));
+ assertMatch("%{a},%{b},%{c},%{d},%{e},%{f}", "111,,333,,555,666",
+ Arrays.asList("a", "b", "c", "d", "e", "f"), Arrays.asList("111", "", "333", "", "555", "666"));
+ assertMatch("%{a}.࿏.%{b}", "⟳༒.࿏.༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲"));
+ assertMatch("%{a}", "子", Arrays.asList("a"), Arrays.asList("子"));
+ assertMatch("%{a}{\n}%{b}", "aaa{\n}bbb", Arrays.asList("a", "b"), Arrays.asList("aaa", "bbb"));
+ assertMiss("MACHINE[%{a}] %{b}", "1234567890 MACHINE[foo] bar");
+ assertMiss("%{a} %{b} %{c}", "foo:bar:baz");
+ assertMatch("/var/%{key1}/log/%{key2}.log", "/var/foo/log/bar.log", Arrays.asList("key1", "key2"), Arrays.asList("foo", "bar"));
+ assertMatch("%{a->} %{b}-.-%{c}-%{d}-..-%{e}-%{f}-%{g}-%{h}", "foo bar-.-baz-1111-..-22-333-4444-55555",
+ Arrays.asList("a", "b", "c", "d", "e", "f", "g", "h"),
+ Arrays.asList("foo", "bar", "baz", "1111", "22", "333", "4444", "55555"));
+ }
+
+ public void testBasicMatch() {
+ String valueFirstInput = "";
+ String keyFirstPattern = "";
+ String delimiterFirstInput = "";
+ String delimiterFirstPattern = "";
+ //parallel arrays
+ List expectedKeys = Arrays.asList(generateRandomStringArray(100, 10, false, false));
+ List expectedValues = new ArrayList<>(expectedKeys.size());
+ for (String key : expectedKeys) {
+ String value = randomAsciiAlphanumOfLengthBetween(1, 100);
+ String delimiter = Integer.toString(randomInt()); //int to ensures values and delimiters don't overlap, else validation can fail
+ keyFirstPattern += "%{" + key + "}" + delimiter;
+ valueFirstInput += value + delimiter;
+ delimiterFirstPattern += delimiter + "%{" + key + "}";
+ delimiterFirstInput += delimiter + value;
+ expectedValues.add(value);
+ }
+ assertMatch(keyFirstPattern, valueFirstInput, expectedKeys, expectedValues);
+ assertMatch(delimiterFirstPattern, delimiterFirstInput, expectedKeys, expectedValues);
+ }
+
+ public void testBasicMatchUnicode() {
+ String valueFirstInput = "";
+ String keyFirstPattern = "";
+ String delimiterFirstInput = "";
+ String delimiterFirstPattern = "";
+ //parallel arrays
+ List expectedKeys = new ArrayList<>();
+ List expectedValues = new ArrayList<>();
+ for (int i = 0; i < randomIntBetween(1, 100); i++) {
+ String key = randomAsciiAlphanumOfLengthBetween(1, 100);
+ String value = randomRealisticUnicodeOfCodepointLengthBetween(1, 100);
+ String delimiter = Integer.toString(randomInt()); //int to ensures values and delimiters don't overlap, else validation can fail
+ keyFirstPattern += "%{" + key + "}" + delimiter;
+ valueFirstInput += value + delimiter;
+ delimiterFirstPattern += delimiter + "%{" + key + "}";
+ delimiterFirstInput += delimiter + value;
+ expectedKeys.add(key);
+ expectedValues.add(value);
+ }
+ assertMatch(keyFirstPattern, valueFirstInput, expectedKeys, expectedValues);
+ assertMatch(delimiterFirstPattern, delimiterFirstInput, expectedKeys, expectedValues);
+ }
+
+ public void testMatchUnicode() {
+ assertMatch("%{a} %{b}", "foo 子", Arrays.asList("a", "b"), Arrays.asList("foo", "子"));
+ assertMatch("%{a}࿏%{b} %{c}", "⟳༒࿏༒⟲ 子", Arrays.asList("a", "b", "c"), Arrays.asList("⟳༒", "༒⟲", "子"));
+ assertMatch("%{a}࿏%{+a} %{+a}", "⟳༒࿏༒⟲ 子", Arrays.asList("a"), Arrays.asList("⟳༒༒⟲子"));
+ assertMatch("%{a}࿏%{+a/2} %{+a/1}", "⟳༒࿏༒⟲ 子", Arrays.asList("a"), Arrays.asList("⟳༒子༒⟲"));
+ assertMatch("%{a->}࿏%{b}", "⟳༒࿏࿏࿏࿏࿏༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲"));
+ assertMatch("%{*a}࿏%{&a}", "⟳༒࿏༒⟲", Arrays.asList("⟳༒"), Arrays.asList("༒⟲"));
+ assertMatch("%{}࿏%{a}", "⟳༒࿏༒⟲", Arrays.asList("a"), Arrays.asList("༒⟲"));
+ }
+
+ public void testMatchRemainder() {
+ assertMatch("%{a}", "foo bar the rest", Arrays.asList("a"), Arrays.asList("foo bar the rest"));
+ assertMatch("%{a} %{b}", "foo bar the rest", Arrays.asList("a", "b"), Arrays.asList("foo", "bar the rest"));
+ assertMatch("%{} %{b}", "foo bar the rest", Arrays.asList("b"), Arrays.asList("bar the rest"));
+ assertMatch("%{a} %{b->}", "foo bar the rest", Arrays.asList("a", "b"), Arrays.asList("foo", "bar the rest"));
+ assertMatch("%{*a} %{&a}", "foo bar the rest", Arrays.asList("foo"), Arrays.asList("bar the rest"));
+ assertMatch("%{a} %{+a}", "foo bar the rest", Arrays.asList("a"), Arrays.asList("foo bar the rest"), " ");
+ }
+
+ public void testAppend() {
+ assertMatch("%{a} %{+a} %{+a}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobarbaz"));
+ assertMatch("%{a} %{+a} %{b} %{+b}", "foo bar baz lol", Arrays.asList("a", "b"), Arrays.asList("foobar", "bazlol"));
+ assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foobazbar"));
+ assertMatch("%{a} %{+a/2} %{+a/1}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foo baz bar"), " ");
+ }
+
+ public void testAssociate() {
+ assertMatch("%{*a} %{&a}", "foo bar", Arrays.asList("foo"), Arrays.asList("bar"));
+ assertMatch("%{&a} %{*a}", "foo bar", Arrays.asList("bar"), Arrays.asList("foo"));
+ assertMatch("%{*a} %{&a} %{*b} %{&b}", "foo bar baz lol", Arrays.asList("foo", "baz"), Arrays.asList("bar", "lol"));
+ assertMatch("%{*a} %{&a} %{c} %{*b} %{&b}", "foo bar x baz lol",
+ Arrays.asList("foo", "baz", "c"), Arrays.asList("bar", "lol", "x"));
+ assertBadPattern("%{*a} %{a}");
+ assertBadPattern("%{a} %{&a}");
+ assertMiss("%{*a} %{&a} {a} %{*b} %{&b}", "foo bar x baz lol");
+ }
+
+ public void testAppendAndAssociate() {
+ assertMatch("%{a} %{+a} %{*b} %{&b}", "foo bar baz lol", Arrays.asList("a", "baz"), Arrays.asList("foobar", "lol"));
+ assertMatch("%{a->} %{+a/2} %{+a/1} %{*b} %{&b}", "foo bar baz lol x",
+ Arrays.asList("a", "lol"), Arrays.asList("foobazbar", "x"));
+ }
+
+ public void testEmptyKey() {
+ assertMatch("%{} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar"));
+ assertMatch("%{a} %{}", "foo bar", Arrays.asList("a"), Arrays.asList("foo"));
+ assertMatch("%{->} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar"));
+ assertMatch("%{->} %{b}", " bar", Arrays.asList("b"), Arrays.asList("bar"));
+ assertMatch("%{a} %{->}", "foo bar ", Arrays.asList("a"), Arrays.asList("foo"));
+ }
+
+ public void testNamedSkipKey() {
+ assertMatch("%{?foo} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar"));
+ assertMatch("%{?} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar"));
+ assertMatch("%{a} %{?bar}", "foo bar", Arrays.asList("a"), Arrays.asList("foo"));
+ assertMatch("%{?foo->} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar"));
+ assertMatch("%{?->} %{b}", "foo bar", Arrays.asList("b"), Arrays.asList("bar"));
+ assertMatch("%{?foo->} %{b}", " bar", Arrays.asList("b"), Arrays.asList("bar"));
+ assertMatch("%{a} %{->?bar}", "foo bar ", Arrays.asList("a"), Arrays.asList("foo"));
+ assertMatch("%{a} %{?skipme} %{?skipme}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foo"));
+ assertMatch("%{a} %{?} %{?}", "foo bar baz", Arrays.asList("a"), Arrays.asList("foo"));
+ }
+
+ public void testConsecutiveDelimiters() {
+ //leading
+ assertMatch("%{->},%{a}", ",,,,,foo", Arrays.asList("a"), Arrays.asList("foo"));
+ assertMatch("%{a->},%{b}", ",,,,,foo", Arrays.asList("a", "b"), Arrays.asList("", "foo"));
+ //trailing
+ assertMatch("%{a->},", "foo,,,,,", Arrays.asList("a"), Arrays.asList("foo"));
+ assertMatch("%{a} %{b},", "foo bar,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+ assertMatch("%{a} %{b->},", "foo bar,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+ //middle
+ assertMatch("%{a->},%{b}", "foo,,,,,bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+ assertMatch("%{a->} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+ assertMatch("%{a->}x%{b}", "fooxxxxxbar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+ assertMatch("%{a->} xyz%{b}", "foo xyz xyz xyz xyz xyzbar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+ //skipped with empty values
+ assertMatch("%{a},%{b},%{c},%{d}", "foo,,,", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", ""));
+ assertMatch("%{a},%{b},%{c},%{d}", "foo,,bar,baz", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "bar", "baz"));
+ assertMatch("%{a},%{b},%{c},%{d}", "foo,,,baz", Arrays.asList("a", "b", "c", "d"), Arrays.asList("foo", "", "", "baz"));
+ assertMatch("%{a},%{b},%{c},%{d}", ",bar,,baz", Arrays.asList("a", "b", "c", "d"), Arrays.asList("", "bar", "", "baz"));
+ assertMatch("%{->},%{a->},%{b}", ",,,bar,,baz", Arrays.asList("a", "b"), Arrays.asList("bar", "baz"));
+ }
+
+ public void testAppendWithConsecutiveDelimiters() {
+ assertMatch("%{+a/1},%{+a/3}-%{+a/2} %{b}", "foo,bar----baz lol", Arrays.asList("a", "b"), Arrays.asList("foobar", ""));
+ assertMatch("%{+a/1},%{+a/3->}-%{+a/2} %{b}", "foo,bar----baz lol", Arrays.asList("a", "b"), Arrays.asList("foobazbar", "lol"));
+ }
+
+ public void testSkipRightPadding() {
+ assertMatch("%{a->} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+ assertMatch("%{a->} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+ assertMatch("%{->} %{a}", "foo bar", Arrays.asList("a"), Arrays.asList("bar"));
+ assertMatch("%{a->} %{+a->} %{*b->} %{&b->} %{c}", "foo bar baz lol x",
+ Arrays.asList("a", "baz", "c"), Arrays.asList("foobar", "lol", "x"));
+ }
+
+ public void testTrimmedEnd() {
+ assertMatch("%{a} %{b}", "foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+ assertMatch("%{a} %{b->} ", "foo bar ", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+ //only whitespace is trimmed in the absence of trailing characters
+ assertMatch("%{a} %{b->}", "foo bar,,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar,,,,,,"));
+ //consecutive delimiters + right padding can be used to skip over the trailing delimiters
+ assertMatch("%{a} %{b->},", "foo bar,,,,,,", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+ }
+
+ public void testLeadingDelimiter() {
+ assertMatch(",,,%{a} %{b}", ",,,foo bar", Arrays.asList("a", "b"), Arrays.asList("foo", "bar"));
+ assertMatch(",%{a} %{b}", ",,foo bar", Arrays.asList("a", "b"), Arrays.asList(",foo", "bar"));
+ }
+
+ /**
+ * Runtime errors
+ */
+ public void testMiss() {
+ assertMiss("%{a}%{b}", "foo");
+ assertMiss("%{a},%{b}", "foo bar");
+ assertMiss("%{a}, %{b}", "foo,bar");
+ assertMiss("x%{a},%{b}", "foo,bar");
+ assertMiss("x%{},%{b}", "foo,bar");
+ assertMiss("leading_delimiter_long%{a}", "foo");
+ assertMiss("%{a}trailing_delimiter_long", "foo");
+ assertMiss("leading_delimiter_long%{a}trailing_delimiter_long", "foo");
+ assertMiss("%{a}x", "foo");
+ assertMiss("%{a},%{b}x", "foo,bar");
+ }
+
+ /**
+ * Construction errors
+ */
+ public void testBadPatternOrKey() {
+ assertBadPattern("");
+ assertBadPattern("{}");
+ assertBadPattern("%{*a} %{&b}");
+ assertBadKey("%{*}");
+ assertBadKey("%{++}");
+ }
+
+ public void testSyslog() {
+ assertMatch("%{timestamp} %{+timestamp} %{+timestamp} %{logsource} %{program}[%{pid}]: %{message}",
+ "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]",
+ Arrays.asList("timestamp", "logsource", "program", "pid", "message"),
+ Arrays.asList("Mar 16 00:01:25", "evita", "postfix/smtpd", "1713", "connect from camomile.cloud9.net[168.100.1.3]"), " ");
+ }
+
+ public void testApacheLog() {
+ assertMatch("%{clientip} %{ident} %{auth} [%{timestamp}] \"%{verb} %{request} HTTP/%{httpversion}\" %{response} %{bytes}" +
+ " \"%{referrer}\" \"%{agent}\" %{->}",
+ "31.184.238.164 - - [24/Jul/2014:05:35:37 +0530] \"GET /logs/access.log HTTP/1.0\" 200 69849 " +
+ "\"http://8rursodiol.enjin.com\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " +
+ "Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36\" \"www.dlwindianrailways.com\"",
+ Arrays.asList("clientip", "ident", "auth", "timestamp", "verb", "request", "httpversion", "response", "bytes",
+ "referrer", "agent"),
+ Arrays.asList("31.184.238.164", "-", "-", "24/Jul/2014:05:35:37 +0530", "GET", "/logs/access.log", "1.0", "200", "69849",
+ "http://8rursodiol.enjin.com", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36" +
+ " (KHTML, like Gecko) Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36"));
+ }
+
+ /**
+ * Shared specification between Beats, Logstash, and Ingest node
+ */
+ public void testJsonSpecification() throws Exception {
+ ObjectMapper mapper = new ObjectMapper();
+ JsonNode rootNode = mapper.readTree(this.getClass().getResourceAsStream("/specification/tests.json"));
+ Iterator tests = rootNode.elements();
+ while (tests.hasNext()) {
+ JsonNode test = tests.next();
+ boolean skip = test.path("skip").asBoolean();
+ if (!skip) {
+ String name = test.path("name").asText();
+ logger.debug("Running Json specification: " + name);
+ String pattern = test.path("tok").asText();
+ String input = test.path("msg").asText();
+ String append = test.path("append").asText();
+ boolean fail = test.path("fail").asBoolean();
+ Iterator> expected = test.path("expected").fields();
+ List expectedKeys = new ArrayList<>();
+ List expectedValues = new ArrayList<>();
+ expected.forEachRemaining(entry -> {
+ expectedKeys.add(entry.getKey());
+ expectedValues.add(entry.getValue().asText());
+ });
+ if (fail) {
+ assertFail(pattern, input);
+ } else {
+ assertMatch(pattern, input, expectedKeys, expectedValues, append);
+ }
+ }
+ }
+ }
+
+ private DissectException assertFail(String pattern, String input){
+ return expectThrows(DissectException.class, () -> new DissectParser(pattern, null).parse(input));
+ }
+
+ private void assertMiss(String pattern, String input) {
+ DissectException e = assertFail(pattern, input);
+ assertThat(e.getMessage(), CoreMatchers.containsString("Unable to find match for dissect pattern"));
+ assertThat(e.getMessage(), CoreMatchers.containsString(pattern));
+ assertThat(e.getMessage(), input == null ? CoreMatchers.containsString("null") : CoreMatchers.containsString(input));
+ }
+
+ private void assertBadPattern(String pattern) {
+ DissectException e = assertFail(pattern, null);
+ assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse pattern"));
+ assertThat(e.getMessage(), CoreMatchers.containsString(pattern));
+ }
+
+ private void assertBadKey(String pattern, String key) {
+ DissectException e = assertFail(pattern, null);
+ assertThat(e.getMessage(), CoreMatchers.containsString("Unable to parse key"));
+ assertThat(e.getMessage(), CoreMatchers.containsString(key));
+ }
+
+ private void assertBadKey(String pattern) {
+ assertBadKey(pattern, pattern.replace("%{", "").replace("}", ""));
+ }
+
+ private void assertMatch(String pattern, String input, List expectedKeys, List expectedValues) {
+ assertMatch(pattern, input, expectedKeys, expectedValues, null);
+ }
+
+ private void assertMatch(String pattern, String input, List expectedKeys, List expectedValues, String appendSeperator) {
+ Map results = new DissectParser(pattern, appendSeperator).parse(input);
+ List foundKeys = new ArrayList<>(results.keySet());
+ List foundValues = new ArrayList<>(results.values());
+ Collections.sort(foundKeys);
+ Collections.sort(foundValues);
+ Collections.sort(expectedKeys);
+ Collections.sort(expectedValues);
+ assertThat(foundKeys, Matchers.equalTo(expectedKeys));
+ assertThat(foundValues, Matchers.equalTo(expectedValues));
+ }
+}
diff --git a/libs/dissect/src/test/resources/specification/tests.json b/libs/dissect/src/test/resources/specification/tests.json
new file mode 100644
index 0000000000000..1cb85ce651940
--- /dev/null
+++ b/libs/dissect/src/test/resources/specification/tests.json
@@ -0,0 +1,363 @@
+[
+ {
+ "name": "When all the defined fields are captured by we have remaining data",
+ "tok": "level=%{level} ts=%{timestamp} caller=%{caller} msg=\"%{message}\"",
+ "msg": "level=info ts=2018-06-27T17:19:13.036579993Z caller=main.go:222 msg=\"Starting OK\" version=\"(version=2.3.1, branch=HEAD, revision=188ca45bd85ce843071e768d855722a9d9dabe03)\"}",
+ "expected": {
+ "caller": "main.go:222",
+ "level": "info",
+ "message": "Starting OK",
+ "timestamp": "2018-06-27T17:19:13.036579993Z"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "Complex stack trace",
+ "tok": "%{day}-%{month}-%{year} %{hour} %{severity} [%{thread_id}] %{origin} %{message}",
+ "msg": "18-Apr-2018 06:53:20.411 INFO [http-nio-8080-exec-1] org.apache.coyote.http11.Http11Processor.service Error parsing HTTP request header\n Note: further occurrences of HTTP header parsing errors will be logged at DEBUG level.\n java.lang.IllegalArgumentException: Invalid character found in method name. HTTP method names must be tokens\n at org.apache.coyote.http11.Http11InputBuffer.parseRequestLine(Http11InputBuffer.java:426)\n at org.apache.coyote.http11.Http11Processor.service(Http11Processor.java:687)\n at org.apache.coyote.AbstractProcessorLight.process(AbstractProcessorLight.java:66)\n at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(AbstractProtocol.java:790)\n at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun(NioEndpoint.java:1459)\n at org.apache.tomcat.util.net.SocketProcessorBase.run(SocketProcessorBase.java:49)\n at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)\n at java.lang.Thread.run(Thread.java:748)",
+ "expected": {
+ "day": "18",
+ "hour": "06:53:20.411",
+ "message": "Error parsing HTTP request header\n Note: further occurrences of HTTP header parsing errors will be logged at DEBUG level.\n java.lang.IllegalArgumentException: Invalid character found in method name. HTTP method names must be tokens\n at org.apache.coyote.http11.Http11InputBuffer.parseRequestLine(Http11InputBuffer.java:426)\n at org.apache.coyote.http11.Http11Processor.service(Http11Processor.java:687)\n at org.apache.coyote.AbstractProcessorLight.process(AbstractProcessorLight.java:66)\n at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(AbstractProtocol.java:790)\n at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun(NioEndpoint.java:1459)\n at org.apache.tomcat.util.net.SocketProcessorBase.run(SocketProcessorBase.java:49)\n at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)\n at java.lang.Thread.run(Thread.java:748)",
+ "month": "Apr",
+ "origin": "org.apache.coyote.http11.Http11Processor.service",
+ "severity": "INFO",
+ "thread_id": "http-nio-8080-exec-1",
+ "year": "2018"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "success when delimiter found at the beginning and end of the string",
+ "tok": "/var/log/%{key}.log",
+ "msg": "/var/log/foobar.log",
+ "expected": {
+ "key": "foobar"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "fails when delimiter is not found at the beginning of the string",
+ "tok": "/var/log/%{key}.log",
+ "msg": "foobar",
+ "expected": null,
+ "skip": false,
+ "fail": true,
+ "append": ""
+ },
+ {
+ "name": "fails when delimiter is not found after the key",
+ "tok": "/var/log/%{key}.log",
+ "msg": "/var/log/foobar",
+ "expected": null,
+ "skip": false,
+ "fail": true,
+ "append": ""
+ },
+ {
+ "name": "simple dissect",
+ "tok": "%{key}",
+ "msg": "foobar",
+ "expected": {
+ "key": "foobar"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "dissect two replacement",
+ "tok": "%{key1} %{key2}",
+ "msg": "foo bar",
+ "expected": {
+ "key1": "foo",
+ "key2": "bar"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "fail on partial match",
+ "tok": "%{key1} %{key2} %{key3}",
+ "msg": "foo bar",
+ "expected": null,
+ "skip": false,
+ "fail": true,
+ "append": ""
+ },
+ {
+ "name": "one level dissect not end of string",
+ "tok": "/var/%{key}/log",
+ "msg": "/var/foobar/log",
+ "expected": {
+ "key": "foobar"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "one level dissect",
+ "tok": "/var/%{key}",
+ "msg": "/var/foobar/log",
+ "expected": {
+ "key": "foobar/log"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "multiple keys dissect end of string",
+ "tok": "/var/%{key}/log/%{key1}",
+ "msg": "/var/foobar/log/apache",
+ "expected": {
+ "key": "foobar",
+ "key1": "apache"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "multiple keys not end of string",
+ "tok": "/var/%{key}/log/%{key1}.log",
+ "msg": "/var/foobar/log/apache.log",
+ "expected": {
+ "key": "foobar",
+ "key1": "apache"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "append with order",
+ "tok": "%{+key/3} %{+key/1} %{+key/2}",
+ "msg": "1 2 3",
+ "expected": {
+ "key": "231"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "append with order and separator",
+ "tok": "%{+key/3} %{+key/1} %{+key/2}",
+ "msg": "1 2 3",
+ "expected": {
+ "key": "2::3::1"
+ },
+ "skip": false,
+ "fail": false,
+ "append": "::"
+ },
+ {
+ "name": "append with order and right padding",
+ "tok": "%{+key/3} %{+key/1-\u003e} %{+key/2}",
+ "msg": "1 2 3",
+ "expected": {
+ "key": "231"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "simple append",
+ "tok": "%{key}-%{+key}-%{+key}",
+ "msg": "1-2-3",
+ "expected": {
+ "key": "123"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "simple append with separator",
+ "tok": "%{key}-%{+key}-%{+key}",
+ "msg": "1-2-3",
+ "expected": {
+ "key": "1,2,3"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ","
+ },
+ {
+ "name": "reference field",
+ "tok": "%{*key} %{\u0026key}",
+ "msg": "hello world",
+ "expected": {
+ "hello": "world"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "reference field alt order",
+ "tok": "%{\u0026key} %{*key}",
+ "msg": "hello world",
+ "expected": {
+ "world": "hello"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "nameless skip field",
+ "tok": "%{} %{key}",
+ "msg": "hello world",
+ "expected": {
+ "key": "world"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "named skip field",
+ "tok": "%{?skipme} %{key}",
+ "msg": "hello world",
+ "expected": {
+ "key": "world"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "reference without pairing",
+ "tok": "%{key} %{\u0026key}",
+ "msg": "hello world",
+ "expected": null,
+ "skip": false,
+ "fail": true,
+ "append": ""
+ },
+ {
+ "name": "missing fields (consecutive delimiters)",
+ "tok": "%{name},%{addr1},%{addr2},%{addr3},%{city},%{zip}",
+ "msg": "Jane Doe,4321 Fifth Avenue,,,New York,87432",
+ "expected": {
+ "addr1": "4321 Fifth Avenue",
+ "addr2": "",
+ "addr3": "",
+ "city": "New York",
+ "name": "Jane Doe",
+ "zip": "87432"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "missing fields with right padding (consecutive delimiters)",
+ "tok": "%{name},%{addr1-\u003e},%{city},%{zip}",
+ "msg": "Jane Doe,4321 Fifth Avenue,,,New York,87432",
+ "expected": {
+ "addr1": "4321 Fifth Avenue",
+ "city": "New York",
+ "name": "Jane Doe",
+ "zip": "87432"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "ignore right padding",
+ "tok": "%{id} %{function-\u003e} %{server}",
+ "msg": "00000043 ViewReceive machine-321",
+ "expected": {
+ "function": "ViewReceive",
+ "id": "00000043",
+ "server": "machine-321"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "padding on the last key need a delimiter",
+ "tok": "%{id} %{function} %{server-\u003e} ",
+ "msg": "00000043 ViewReceive machine-321 ",
+ "expected": {
+ "function": "ViewReceive",
+ "id": "00000043",
+ "server": "machine-321"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "ignore left padding",
+ "tok": "%{id-\u003e} %{function} %{server}",
+ "msg": "00000043 ViewReceive machine-321",
+ "expected": {
+ "function": "ViewReceive",
+ "id": "00000043",
+ "server": "machine-321"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "when the delimiters contains `{` and `}`",
+ "tok": "{%{a}}{%{b}} %{rest}",
+ "msg": "{c}{d} anything",
+ "expected": {
+ "a": "c",
+ "b": "d",
+ "rest": "anything"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ },
+ {
+ "name": "no keys defined",
+ "tok": "anything",
+ "msg": "anything",
+ "expected": null,
+ "skip": false,
+ "fail": true,
+ "append": ""
+ },
+ {
+ "name": "invalid key",
+ "tok": "%{some?thing}",
+ "msg": "anything",
+ "expected": null,
+ "skip": false,
+ "fail": true,
+ "append": ""
+ },
+ {
+ "name": "matches non-ascii",
+ "tok": "%{a}࿏%{b} %{c}",
+ "msg": "⟳༒࿏༒⟲ 子",
+ "expected": {
+ "a": "⟳༒",
+ "b": "༒⟲",
+ "c": "子"
+ },
+ "skip": false,
+ "fail": false,
+ "append": ""
+ }
+
+]
\ No newline at end of file