Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,15 @@
*/
public final class GrokPatternCreator {

private static String PREFACE = "preface";
private static String EPILOGUE = "epilogue";
private static final String PREFACE = "preface";
private static final String EPILOGUE = "epilogue";

/**
* The first match in this list will be chosen, so it needs to be ordered
* such that more generic patterns come after more specific patterns.
*/
private static final List<GrokPatternCandidate> ORDERED_CANDIDATE_GROK_PATTERNS = Arrays.asList(
new GrokPatternCandidate("TOMCAT_DATESTAMP", "timestamp"),
new GrokPatternCandidate("TIMESTAMP_ISO8601", "timestamp"),
new GrokPatternCandidate("DATESTAMP_RFC822", "timestamp"),
new GrokPatternCandidate("DATESTAMP_RFC2822", "timestamp"),
Expand All @@ -41,7 +42,6 @@ public final class GrokPatternCreator {
new GrokPatternCandidate("SYSLOGTIMESTAMP", "timestamp"),
new GrokPatternCandidate("HTTPDATE", "timestamp"),
new GrokPatternCandidate("CATALINA_DATESTAMP", "timestamp"),
new GrokPatternCandidate("TOMCAT_DATESTAMP", "timestamp"),
new GrokPatternCandidate("CISCOTIMESTAMP", "timestamp"),
new GrokPatternCandidate("DATE", "date"),
new GrokPatternCandidate("TIME", "time"),
Expand All @@ -56,12 +56,10 @@ public final class GrokPatternCreator {
new GrokPatternCandidate("IP", "ipaddress"),
// This already includes pre/post break conditions
new GrokPatternCandidate("QUOTEDSTRING", "field", "", ""),
// Can't use \b as the break before, because it doesn't work for negative numbers (the
// minus sign is not a "word" character)
new GrokPatternCandidate("NUMBER", "field", "(?<!\\w)"),
// Disallow +, - and . before hex numbers, otherwise this pattern will pick up base 10
// numbers that NUMBER rejected due to preceeding characters
new GrokPatternCandidate("BASE16NUM", "field", "(?<![\\w.+-])")
// Disallow +, - and . before numbers, as well as "word" characters, otherwise we'll pick
// up numeric suffices too eagerly
new GrokPatternCandidate("NUMBER", "field", "(?<![\\w.+-])", "(?![\\w+-]|\\.\\d)"),
new GrokPatternCandidate("BASE16NUM", "field", "(?<![\\w.+-])", "(?![\\w+-]|\\.\\w)")
// TODO: also unfortunately can't have USERNAME in the list as it matches too broadly
// Fixing these problems with overly broad matches would require some extra intelligence
// to be added to remove inappropriate matches. One idea would be to use a dictionary,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,40 @@ public void testAppendBestGrokMatchForStringsGivenTimestampsAndLogLevels() {
assertEquals(".+?%{TIMESTAMP_ISO8601:timestamp}.+?%{LOGLEVEL:loglevel}.+?", overallGrokPatternBuilder.toString());
}

public void testAppendBestGrokMatchForStringsGivenTomcatDatestamps() {

// The first part of the Tomcat datestamp can match as an ISO8601
// timestamp if the ordering of candidate patterns is wrong
Collection<String> mustMatchStrings = Arrays.asList("2018-09-03 17:03:28,269 +0100 | ERROR | ",
"2018-09-03 17:04:27,279 +0100 | DEBUG | ",
"2018-09-03 17:05:26,289 +0100 | ERROR | ");

Map<String, Integer> fieldNameCountStore = new HashMap<>();
StringBuilder overallGrokPatternBuilder = new StringBuilder();

GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings);

assertEquals(".*?%{TOMCAT_DATESTAMP:timestamp}.+?%{LOGLEVEL:loglevel}.+?", overallGrokPatternBuilder.toString());
}

public void testAppendBestGrokMatchForStringsGivenTrappyFloatCandidates() {

// If we're not careful then we might detect the first part of these strings as a
// number, e.g. 1.2 in the first example, but this is inappropriate given the
// trailing dot and digit
Collection<String> mustMatchStrings = Arrays.asList("1.2.3",
"-2.3.4",
"4.5.6.7",
"-9.8.7.6.5");

Map<String, Integer> fieldNameCountStore = new HashMap<>();
StringBuilder overallGrokPatternBuilder = new StringBuilder();

GrokPatternCreator.appendBestGrokMatchForStrings(fieldNameCountStore, overallGrokPatternBuilder, false, false, mustMatchStrings);

assertEquals(".+?", overallGrokPatternBuilder.toString());
}

public void testAppendBestGrokMatchForStringsGivenNumbersInBrackets() {

Collection<String> mustMatchStrings = Arrays.asList("(-2)",
Expand Down