From 42771a654a944ea8dfb191110546dc3ccd349f7c Mon Sep 17 00:00:00 2001 From: David Roberts Date: Mon, 3 Sep 2018 14:37:40 +0100 Subject: [PATCH 1/2] [ML] Add field stats to log structure finder The log structure endpoint will return these in addition to pure structure information so that it can be used to drive pre-import data visualizer functionality. The statistics for every field are count, cardinality (distinct count) and top hits (most common values). Extra statistics are calculated if the field is numeric: min, max, mean and median. --- .../DelimitedLogStructureFinder.java | 9 +- .../ml/logstructurefinder/FieldStats.java | 147 ++++++++++++ .../FieldStatsCalculator.java | 182 +++++++++++++++ .../GrokPatternCreator.java | 42 ++-- .../JsonLogStructureFinder.java | 9 +- .../ml/logstructurefinder/LogStructure.java | 35 ++- .../logstructurefinder/LogStructureUtils.java | 58 +++-- .../TextLogStructureFinder.java | 5 +- .../XmlLogStructureFinder.java | 9 +- .../FieldStatsCalculatorTests.java | 218 ++++++++++++++++++ .../logstructurefinder/FieldStatsTests.java | 61 +++++ .../GrokPatternCreatorTests.java | 34 +-- .../logstructurefinder/LogStructureTests.java | 8 + .../LogStructureUtilsTests.java | 83 ++++--- 14 files changed, 815 insertions(+), 85 deletions(-) create mode 100644 x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStats.java create mode 100644 x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsCalculator.java create mode 100644 x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsCalculatorTests.java create mode 100644 x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsTests.java diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/DelimitedLogStructureFinder.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/DelimitedLogStructureFinder.java index 2f7bb41d0bae7..de010196808d4 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/DelimitedLogStructureFinder.java +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/DelimitedLogStructureFinder.java @@ -123,9 +123,16 @@ static DelimitedLogStructureFinder makeDelimitedLogStructureFinder(List .setMultilineStartPattern(timeLineRegex); } - SortedMap mappings = LogStructureUtils.guessMappings(explanation, sampleRecords); + Tuple, SortedMap> mappingsAndFieldStats = + LogStructureUtils.guessMappingsAndCalculateFieldStats(explanation, sampleRecords); + + SortedMap mappings = mappingsAndFieldStats.v1(); mappings.put(LogStructureUtils.DEFAULT_TIMESTAMP_FIELD, Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "date")); + if (mappingsAndFieldStats.v2() != null) { + structureBuilder.setFieldStats(mappingsAndFieldStats.v2()); + } + LogStructure structure = structureBuilder .setMappings(mappings) .setExplanation(explanation) diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStats.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStats.java new file mode 100644 index 0000000000000..08ae2b0c60205 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStats.java @@ -0,0 +1,147 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +public class FieldStats implements ToXContentObject { + + static final ParseField COUNT = new ParseField("count"); + static final ParseField CARDINALITY = new ParseField("cardinality"); + static final ParseField MIN_VALUE = new ParseField("min_value"); + static final ParseField MAX_VALUE = new ParseField("max_value"); + static final ParseField MEAN_VALUE = new ParseField("mean_value"); + static final ParseField MEDIAN_VALUE = new ParseField("median_value"); + static final ParseField TOP_HITS = new ParseField("top_hits"); + + @SuppressWarnings("unchecked") + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("field_stats", false, + a -> new FieldStats((int) a[0], (int) a[1], (Double) a[2], (Double) a[3], (Double) a[4], (Double) a[5], + (List>) a[6])); + + static { + PARSER.declareInt(ConstructingObjectParser.constructorArg(), COUNT); + PARSER.declareInt(ConstructingObjectParser.constructorArg(), CARDINALITY); + PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MIN_VALUE); + PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MAX_VALUE); + PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEAN_VALUE); + PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MEDIAN_VALUE); + PARSER.declareObjectArray(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> p.mapOrdered(), TOP_HITS); + } + + private final int count; + private final int cardinality; + private final Double minValue; + private final Double maxValue; + private final Double meanValue; + private final Double medianValue; + private final List> topHits; + + FieldStats(int count, int cardinality, List> topHits) { + this(count, cardinality, null, null, null, null, topHits); + } + + FieldStats(int count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue, + List> topHits) { + this.count = count; + this.cardinality = cardinality; + this.minValue = minValue; + this.maxValue = maxValue; + this.meanValue = meanValue; + this.medianValue = medianValue; + this.topHits = (topHits == null) ? Collections.emptyList() : Collections.unmodifiableList(topHits); + } + + public int getCount() { + return count; + } + + public int getCardinality() { + return cardinality; + } + + public Double getMinValue() { + return minValue; + } + + public Double getMaxValue() { + return maxValue; + } + + public Double getMeanValue() { + return meanValue; + } + + public Double getMedianValue() { + return medianValue; + } + + public List> getTopHits() { + return topHits; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + + builder.startObject(); + builder.field(COUNT.getPreferredName(), count); + builder.field(CARDINALITY.getPreferredName(), cardinality); + if (minValue != null) { + builder.field(MIN_VALUE.getPreferredName(), minValue); + } + if (maxValue != null) { + builder.field(MAX_VALUE.getPreferredName(), maxValue); + } + if (meanValue != null) { + builder.field(MEAN_VALUE.getPreferredName(), meanValue); + } + if (medianValue != null) { + builder.field(MEDIAN_VALUE.getPreferredName(), medianValue); + } + if (topHits.isEmpty() == false) { + builder.field(TOP_HITS.getPreferredName(), topHits); + } + builder.endObject(); + + return builder; + } + + @Override + public int hashCode() { + + return Objects.hash(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits); + } + + @Override + public boolean equals(Object other) { + + if (this == other) { + return true; + } + + if (other == null || getClass() != other.getClass()) { + return false; + } + + FieldStats that = (FieldStats) other; + return this.count == that.count && + this.cardinality == that.cardinality && + Objects.equals(this.minValue, that.minValue) && + Objects.equals(this.maxValue, that.maxValue) && + Objects.equals(this.meanValue, that.meanValue) && + Objects.equals(this.medianValue, that.medianValue) && + Objects.equals(this.topHits, that.topHits); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsCalculator.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsCalculator.java new file mode 100644 index 0000000000000..6e2ea103fbac5 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsCalculator.java @@ -0,0 +1,182 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.stream.Collectors; + +/** + * Calculate statistics for a set of scalar field values. + * Count, cardinality (distinct count) and top hits (most common values) are always calculated. + * Extra statistics are calculated if the field is numeric: min, max, mean and median. + */ +public class FieldStatsCalculator { + + private int count; + private SortedMap countsByStringValue = new TreeMap<>(); + private SortedMap countsByNumericValue = new TreeMap<>(); + + /** + * Add a collection of values to the calculator. + * The values to be added can be combined by the caller and added in a + * single call to this method or added in multiple calls to this method. + * @param fieldValues Zero or more values to add. May not be null. + */ + public void accept(Collection fieldValues) { + + count += fieldValues.size(); + + for (String fieldValue : fieldValues) { + + countsByStringValue.compute(fieldValue, (k, v) -> (v == null) ? 1 : (1 + v)); + + if (countsByNumericValue != null) { + + try { + countsByNumericValue.compute(Double.valueOf(fieldValue), (k, v) -> (v == null) ? 1 : (1 + v)); + } catch (NumberFormatException e) { + countsByNumericValue = null; + } + } + } + } + + /** + * Calculate field statistics based on the previously accepted values. + * @param numTopHits The maximum number of entries to include in the top hits. + * @return The calculated field statistics. + */ + public FieldStats calculate(int numTopHits) { + + if (countsByNumericValue != null && countsByNumericValue.isEmpty() == false) { + return new FieldStats(count, countsByNumericValue.size(), countsByNumericValue.firstKey(), countsByNumericValue.lastKey(), + calculateMean(), calculateMedian(), findNumericTopHits(numTopHits)); + } else { + return new FieldStats(count, countsByStringValue.size(), findStringTopHits(numTopHits)); + } + } + + Double calculateMean() { + + assert countsByNumericValue != null; + + if (countsByNumericValue.isEmpty()) { + return null; + } + + double runningCount = 0.0; + double runningMean = Double.NaN; + + for (Map.Entry entry : countsByNumericValue.entrySet()) { + + double entryCount = (double) entry.getValue(); + double newRunningCount = runningCount + entryCount; + + // Updating a running mean like this is more numerically stable than using (sum / count) + if (runningCount > 0.0) { + runningMean = runningMean * (runningCount / newRunningCount) + entry.getKey() * (entryCount / newRunningCount); + } else if (entryCount > 0.0) { + runningMean = entry.getKey(); + } + + runningCount = newRunningCount; + } + + return runningMean; + } + + Double calculateMedian() { + + assert countsByNumericValue != null; + + if (count % 2 == 1) { + + // Simple case - median is middle value + int targetCount = count / 2 + 1; + int currentUpperBound = 0; + + for (Map.Entry entry : countsByNumericValue.entrySet()) { + + currentUpperBound += entry.getValue(); + + if (currentUpperBound >= targetCount) { + return entry.getKey(); + } + } + + } else { + + // More complicated case - median is average of two middle values + int target1Count = count / 2; + int target2Count = target1Count + 1; + double target1Value = Double.NaN; + int prevUpperBound = -1; + int currentUpperBound = 0; + + for (Map.Entry entry : countsByNumericValue.entrySet()) { + + currentUpperBound += entry.getValue(); + + if (currentUpperBound >= target2Count) { + + if (prevUpperBound < target1Count) { + // Both target values are the same + return entry.getKey(); + } else { + return (target1Value + entry.getKey()) / 2.0; + } + } + + if (currentUpperBound >= target1Count) { + target1Value = entry.getKey(); + } + + prevUpperBound = currentUpperBound; + } + } + + return null; + } + + List> findNumericTopHits(int numTopHits) { + assert countsByNumericValue != null; + return findTopHits(numTopHits, countsByNumericValue, Comparator.comparing(Map.Entry::getKey)); + } + + List> findStringTopHits(int numTopHits) { + return findTopHits(numTopHits, countsByStringValue, Comparator.comparing(Map.Entry::getKey)); + } + + /** + * Order by descending count, with a secondary sort to ensure reproducibility of results. + */ + private static List> findTopHits(int numTopHits, Map countsByValue, + Comparator> secondarySort) { + + List> sortedByCount = countsByValue.entrySet().stream() + .sorted(Comparator.comparing(Map.Entry::getValue, Comparator.reverseOrder()).thenComparing(secondarySort)) + .limit(numTopHits).collect(Collectors.toList()); + + List> topHits = new ArrayList<>(sortedByCount.size()); + + for (Map.Entry entry : sortedByCount) { + + Map topHit = new LinkedHashMap<>(); + topHit.put("value", entry.getKey()); + topHit.put("count", entry.getValue()); + topHits.add(topHit); + } + + return topHits; + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/GrokPatternCreator.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/GrokPatternCreator.java index 186477507acce..b24e067b59d4b 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/GrokPatternCreator.java +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/GrokPatternCreator.java @@ -119,6 +119,7 @@ public final class GrokPatternCreator { * Both this class and other classes will update it. */ private final Map mappings; + private final Map fieldStats; private final Map fieldNameCountStore = new HashMap<>(); private final StringBuilder overallGrokPatternBuilder = new StringBuilder(); @@ -128,22 +129,26 @@ public final class GrokPatternCreator { * can be appended by the methods of this class. * @param sampleMessages Sample messages that any Grok pattern found must match. * @param mappings Will be updated with mappings appropriate for the returned pattern, if non-null. + * @param fieldStats Will be updated with field stats for the fields in the returned pattern, if non-null. */ - public GrokPatternCreator(List explanation, Collection sampleMessages, Map mappings) { + public GrokPatternCreator(List explanation, Collection sampleMessages, Map mappings, + Map fieldStats) { this.explanation = explanation; this.sampleMessages = Collections.unmodifiableCollection(sampleMessages); this.mappings = mappings; + this.fieldStats = fieldStats; } /** * This method attempts to find a Grok pattern that will match all of the sample messages in their entirety. + * It will also update mappings and field stats if they are non-null. * @return A tuple of (time field name, Grok string), or null if no suitable Grok pattern was found. */ public Tuple findFullLineGrokPattern() { for (FullMatchGrokPatternCandidate candidate : FULL_MATCH_GROK_PATTERNS) { if (candidate.matchesAll(sampleMessages)) { - return candidate.processMatch(explanation, sampleMessages, mappings); + return candidate.processMatch(explanation, sampleMessages, mappings, fieldStats); } } @@ -186,7 +191,8 @@ private void processCandidateAndSplit(GrokPatternCandidate chosenPattern, boolea Collection prefaces = new ArrayList<>(); Collection epilogues = new ArrayList<>(); - String patternBuilderContent = chosenPattern.processCaptures(fieldNameCountStore, snippets, prefaces, epilogues, mappings); + String patternBuilderContent = + chosenPattern.processCaptures(fieldNameCountStore, snippets, prefaces, epilogues, mappings, fieldStats); appendBestGrokMatchForStrings(false, prefaces, ignoreKeyValueCandidateLeft, ignoreValueOnlyCandidatesLeft); overallGrokPatternBuilder.append(patternBuilderContent); appendBestGrokMatchForStrings(isLast, epilogues, ignoreKeyValueCandidateRight, ignoreValueOnlyCandidatesRight); @@ -375,11 +381,12 @@ interface GrokPatternCandidate { /** * After it has been determined that this Grok pattern candidate matches a collection of strings, * return collections of the bits that come before (prefaces) and after (epilogues) the bit - * that matches. Also update mappings with the most appropriate field name and type. + * that matches. Also update mappings with the most appropriate field name and type, and + * calculate field stats. * @return The string that needs to be incorporated into the overall Grok pattern for the line. */ String processCaptures(Map fieldNameCountStore, Collection snippets, Collection prefaces, - Collection epilogues, Map mappings); + Collection epilogues, Map mappings, Map fieldStats); } /** @@ -436,7 +443,7 @@ public boolean matchesAll(Collection snippets) { */ @Override public String processCaptures(Map fieldNameCountStore, Collection snippets, Collection prefaces, - Collection epilogues, Map mappings) { + Collection epilogues, Map mappings, Map fieldStats) { String sampleValue = null; for (String snippet : snippets) { Map captures = grok.captures(snippet); @@ -505,7 +512,7 @@ public boolean matchesAll(Collection snippets) { @Override public String processCaptures(Map fieldNameCountStore, Collection snippets, Collection prefaces, - Collection epilogues, Map mappings) { + Collection epilogues, Map mappings, Map fieldStats) { if (fieldName == null) { throw new IllegalStateException("Cannot process KV matches until a field name has been determined"); } @@ -526,6 +533,9 @@ public String processCaptures(Map fieldNameCountStore, Collecti if (mappings != null) { mappings.put(adjustedFieldName, LogStructureUtils.guessScalarMapping(explanation, adjustedFieldName, values)); } + if (fieldStats != null) { + fieldStats.put(adjustedFieldName, LogStructureUtils.calculateFieldStats(values)); + } return "\\b" + fieldName + "=%{USER:" + adjustedFieldName + "}"; } } @@ -541,8 +551,8 @@ static class NoMappingGrokPatternCandidate extends ValueOnlyGrokPatternCandidate @Override public String processCaptures(Map fieldNameCountStore, Collection snippets, Collection prefaces, - Collection epilogues, Map mappings) { - return super.processCaptures(fieldNameCountStore, snippets, prefaces, epilogues, null); + Collection epilogues, Map mappings, Map fieldStats) { + return super.processCaptures(fieldNameCountStore, snippets, prefaces, epilogues, null, fieldStats); } } @@ -570,11 +580,11 @@ public boolean matchesAll(Collection sampleMessages) { * @return A tuple of (time field name, Grok string). */ public Tuple processMatch(List explanation, Collection sampleMessages, - Map mappings) { + Map mappings, Map fieldStats) { explanation.add("A full message Grok pattern [" + grokString.substring(2, grokString.length() - 1) + "] looks appropriate"); - if (mappings != null) { + if (mappings != null || fieldStats != null) { Map> valuesPerField = new HashMap<>(); for (String sampleMessage : sampleMessages) { @@ -604,8 +614,14 @@ public Tuple processMatch(List explanation, Collection> valuesForField : valuesPerField.entrySet()) { String fieldName = valuesForField.getKey(); - mappings.put(fieldName, - LogStructureUtils.guessScalarMapping(explanation, fieldName, valuesForField.getValue())); + if (mappings != null) { + mappings.put(fieldName, + LogStructureUtils.guessScalarMapping(explanation, fieldName, valuesForField.getValue())); + } + if (fieldStats != null) { + fieldStats.put(fieldName, + LogStructureUtils.calculateFieldStats(valuesForField.getValue())); + } } } diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinder.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinder.java index 98e8a0213fbef..7f3cee3f0bd7b 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinder.java +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinder.java @@ -56,9 +56,16 @@ static JsonLogStructureFinder makeJsonLogStructureFinder(List explanatio .setNeedClientTimezone(timeField.v2().hasTimezoneDependentParsing()); } - SortedMap mappings = LogStructureUtils.guessMappings(explanation, sampleRecords); + Tuple, SortedMap> mappingsAndFieldStats = + LogStructureUtils.guessMappingsAndCalculateFieldStats(explanation, sampleRecords); + + SortedMap mappings = mappingsAndFieldStats.v1(); mappings.put(LogStructureUtils.DEFAULT_TIMESTAMP_FIELD, Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "date")); + if (mappingsAndFieldStats.v2() != null) { + structureBuilder.setFieldStats(mappingsAndFieldStats.v2()); + } + LogStructure structure = structureBuilder .setMappings(mappings) .setExplanation(explanation) diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructure.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructure.java index ea8fe37e62f9f..6d36da1180220 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructure.java +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructure.java @@ -9,6 +9,7 @@ import org.elasticsearch.common.xcontent.ObjectParser; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; import java.io.IOException; import java.util.ArrayList; @@ -95,6 +96,7 @@ public String toString() { static final ParseField TIMESTAMP_FORMATS = new ParseField("timestamp_formats"); static final ParseField NEED_CLIENT_TIMEZONE = new ParseField("need_client_timezone"); static final ParseField MAPPINGS = new ParseField("mappings"); + static final ParseField FIELD_STATS = new ParseField("field_stats"); static final ParseField EXPLANATION = new ParseField("explanation"); public static final ObjectParser PARSER = new ObjectParser<>("log_file_structure", false, Builder::new); @@ -117,6 +119,13 @@ public String toString() { PARSER.declareStringArray(Builder::setTimestampFormats, TIMESTAMP_FORMATS); PARSER.declareBoolean(Builder::setNeedClientTimezone, NEED_CLIENT_TIMEZONE); PARSER.declareObject(Builder::setMappings, (p, c) -> new TreeMap<>(p.map()), MAPPINGS); + PARSER.declareObject(Builder::setFieldStats, (p, c) -> { + Map fieldStats = new TreeMap<>(); + while (p.nextToken() == XContentParser.Token.FIELD_NAME) { + fieldStats.put(p.currentName(), FieldStats.PARSER.apply(p, c)); + } + return fieldStats; + }, FIELD_STATS); PARSER.declareStringArray(Builder::setExplanation, EXPLANATION); } @@ -137,13 +146,14 @@ public String toString() { private final String timestampField; private final boolean needClientTimezone; private final SortedMap mappings; + private final SortedMap fieldStats; private final List explanation; public LogStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampleStart, String charset, Boolean hasByteOrderMarker, Format format, String multilineStartPattern, String excludeLinesPattern, List inputFields, Boolean hasHeaderRow, Character delimiter, Boolean shouldTrimFields, String grokPattern, String timestampField, List timestampFormats, boolean needClientTimezone, Map mappings, - List explanation) { + Map fieldStats, List explanation) { this.numLinesAnalyzed = numLinesAnalyzed; this.numMessagesAnalyzed = numMessagesAnalyzed; @@ -162,6 +172,7 @@ public LogStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sample this.timestampFormats = (timestampFormats == null) ? null : Collections.unmodifiableList(new ArrayList<>(timestampFormats)); this.needClientTimezone = needClientTimezone; this.mappings = Collections.unmodifiableSortedMap(new TreeMap<>(mappings)); + this.fieldStats = Collections.unmodifiableSortedMap(new TreeMap<>(fieldStats)); this.explanation = Collections.unmodifiableList(new ArrayList<>(explanation)); } @@ -233,6 +244,10 @@ public SortedMap getMappings() { return mappings; } + public SortedMap getFieldStats() { + return fieldStats; + } + public List getExplanation() { return explanation; } @@ -278,6 +293,13 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } builder.field(NEED_CLIENT_TIMEZONE.getPreferredName(), needClientTimezone); builder.field(MAPPINGS.getPreferredName(), mappings); + if (fieldStats.isEmpty() == false) { + builder.startObject(FIELD_STATS.getPreferredName()); + for (Map.Entry entry : fieldStats.entrySet()) { + builder.field(entry.getKey(), entry.getValue()); + } + builder.endObject(); + } builder.field(EXPLANATION.getPreferredName(), explanation); builder.endObject(); @@ -289,7 +311,7 @@ public int hashCode() { return Objects.hash(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format, multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, delimiter, shouldTrimFields, grokPattern, timestampField, - timestampFormats, needClientTimezone, mappings, explanation); + timestampFormats, needClientTimezone, mappings, fieldStats, explanation); } @Override @@ -321,6 +343,7 @@ public boolean equals(Object other) { Objects.equals(this.timestampField, that.timestampField) && Objects.equals(this.timestampFormats, that.timestampFormats) && Objects.equals(this.mappings, that.mappings) && + Objects.equals(this.fieldStats, that.fieldStats) && Objects.equals(this.explanation, that.explanation); } @@ -343,6 +366,7 @@ public static class Builder { private List timestampFormats; private boolean needClientTimezone; private Map mappings; + private Map fieldStats = Collections.emptyMap(); private List explanation; public Builder() { @@ -438,6 +462,11 @@ public Builder setMappings(Map mappings) { return this; } + public Builder setFieldStats(Map fieldStats) { + this.fieldStats = Objects.requireNonNull(fieldStats); + return this; + } + public Builder setExplanation(List explanation) { this.explanation = Objects.requireNonNull(explanation); return this; @@ -540,7 +569,7 @@ public LogStructure build() { return new LogStructure(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format, multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, delimiter, shouldTrimFields, grokPattern, - timestampField, timestampFormats, needClientTimezone, mappings, explanation); + timestampField, timestampFormats, needClientTimezone, mappings, fieldStats, explanation); } } } diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtils.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtils.java index 71a68c399910b..69214a746ed79 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtils.java +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtils.java @@ -16,6 +16,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import java.util.stream.Collectors; @@ -28,6 +29,7 @@ public final class LogStructureUtils { public static final String MAPPING_FORMAT_SETTING = "format"; public static final String MAPPING_PROPERTIES_SETTING = "properties"; + private static final int NUM_TOP_HITS = 10; // NUMBER Grok pattern doesn't support scientific notation, so we extend it private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$"); private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$"); @@ -112,26 +114,39 @@ private static List> findCandidates(List e * @param sampleRecords The sampled records. * @return A map of field name to mapping settings. */ - static SortedMap guessMappings(List explanation, List> sampleRecords) { + static Tuple, SortedMap> + guessMappingsAndCalculateFieldStats(List explanation, List> sampleRecords) { SortedMap mappings = new TreeMap<>(); + SortedMap fieldStats = new TreeMap<>(); - for (Map sampleRecord : sampleRecords) { - for (String fieldName : sampleRecord.keySet()) { - mappings.computeIfAbsent(fieldName, key -> guessMapping(explanation, fieldName, - sampleRecords.stream().flatMap(record -> { - Object fieldValue = record.get(fieldName); - return (fieldValue == null) ? Stream.empty() : Stream.of(fieldValue); - } - ).collect(Collectors.toList()))); + Set uniqueFieldNames = sampleRecords.stream().flatMap(record -> record.keySet().stream()).collect(Collectors.toSet()); + + for (String fieldName : uniqueFieldNames) { + + List fieldValues = sampleRecords.stream().flatMap(record -> { + Object fieldValue = record.get(fieldName); + return (fieldValue == null) ? Stream.empty() : Stream.of(fieldValue); + } + ).collect(Collectors.toList()); + + Tuple, FieldStats> mappingAndFieldStats = + guessMappingAndCalculateFieldStats(explanation, fieldName, fieldValues); + if (mappingAndFieldStats != null) { + if (mappingAndFieldStats.v1() != null) { + mappings.put(fieldName, mappingAndFieldStats.v1()); + } + if (mappingAndFieldStats.v2() != null) { + fieldStats.put(fieldName, mappingAndFieldStats.v2()); + } } } - return mappings; + return new Tuple<>(mappings, fieldStats); } - static Map guessMapping(List explanation, String fieldName, List fieldValues) { - + static Tuple, FieldStats> guessMappingAndCalculateFieldStats(List explanation, + String fieldName, List fieldValues) { if (fieldValues == null || fieldValues.isEmpty()) { // We can get here if all the records that contained a given field had a null value for it. // In this case it's best not to make any statement about what the mapping type should be. @@ -140,7 +155,7 @@ static Map guessMapping(List explanation, String fieldNa if (fieldValues.stream().anyMatch(value -> value instanceof Map)) { if (fieldValues.stream().allMatch(value -> value instanceof Map)) { - return Collections.singletonMap(MAPPING_TYPE_SETTING, "object"); + return new Tuple<>(Collections.singletonMap(MAPPING_TYPE_SETTING, "object"), null); } throw new IllegalArgumentException("Field [" + fieldName + "] has both object and non-object values - this is not supported by Elasticsearch"); @@ -148,11 +163,12 @@ static Map guessMapping(List explanation, String fieldNa if (fieldValues.stream().anyMatch(value -> value instanceof List || value instanceof Object[])) { // Elasticsearch fields can be either arrays or single values, but array values must all have the same type - return guessMapping(explanation, fieldName, + return guessMappingAndCalculateFieldStats(explanation, fieldName, fieldValues.stream().flatMap(LogStructureUtils::flatten).collect(Collectors.toList())); } - return guessScalarMapping(explanation, fieldName, fieldValues.stream().map(Object::toString).collect(Collectors.toList())); + Collection fieldValuesAsStrings = fieldValues.stream().map(Object::toString).collect(Collectors.toList()); + return new Tuple<>(guessScalarMapping(explanation, fieldName, fieldValuesAsStrings), calculateFieldStats(fieldValuesAsStrings)); } private static Stream flatten(Object value) { @@ -227,6 +243,18 @@ else if (fieldValues.stream().allMatch(IP_GROK::match)) { return Collections.singletonMap(MAPPING_TYPE_SETTING, "keyword"); } + /** + * Calculate stats for a set of field values. + * @param fieldValues Values of the field for which field stats are to be calculated. + * @return The stats calculated from the field values. + */ + static FieldStats calculateFieldStats(Collection fieldValues) { + + FieldStatsCalculator calculator = new FieldStatsCalculator(); + calculator.accept(fieldValues); + return calculator.calculate(NUM_TOP_HITS); + } + /** * The thinking is that the longer the field value and the more spaces it contains, * the more likely it is that it should be indexed as text rather than keyword. diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinder.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinder.java index 722751a4cf49e..e830aa30a1e87 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinder.java +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinder.java @@ -82,10 +82,12 @@ static TextLogStructureFinder makeTextLogStructureFinder(List explanatio mappings.put("message", Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "text")); mappings.put(LogStructureUtils.DEFAULT_TIMESTAMP_FIELD, Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "date")); + SortedMap fieldStats = new TreeMap<>(); + // We can't parse directly into @timestamp using Grok, so parse to some other time field, which the date filter will then remove String interimTimestampField; String grokPattern; - GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings); + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings, fieldStats); Tuple timestampFieldAndFullMatchGrokPattern = grokPatternCreator.findFullLineGrokPattern(); if (timestampFieldAndFullMatchGrokPattern != null) { interimTimestampField = timestampFieldAndFullMatchGrokPattern.v1(); @@ -101,6 +103,7 @@ static TextLogStructureFinder makeTextLogStructureFinder(List explanatio .setNeedClientTimezone(bestTimestamp.v1().hasTimezoneDependentParsing()) .setGrokPattern(grokPattern) .setMappings(mappings) + .setFieldStats(fieldStats) .setExplanation(explanation) .build(); diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinder.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinder.java index d664a9ccb8213..6c81032c05baf 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinder.java +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinder.java @@ -95,7 +95,14 @@ static XmlLogStructureFinder makeXmlLogStructureFinder(List explanation, .setNeedClientTimezone(timeField.v2().hasTimezoneDependentParsing()); } - SortedMap innerMappings = LogStructureUtils.guessMappings(explanation, sampleRecords); + Tuple, SortedMap> mappingsAndFieldStats = + LogStructureUtils.guessMappingsAndCalculateFieldStats(explanation, sampleRecords); + + if (mappingsAndFieldStats.v2() != null) { + structureBuilder.setFieldStats(mappingsAndFieldStats.v2()); + } + + SortedMap innerMappings = mappingsAndFieldStats.v1(); Map secondLevelProperties = new LinkedHashMap<>(); secondLevelProperties.put(LogStructureUtils.MAPPING_TYPE_SETTING, "object"); secondLevelProperties.put(LogStructureUtils.MAPPING_PROPERTIES_SETTING, innerMappings); diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsCalculatorTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsCalculatorTests.java new file mode 100644 index 0000000000000..91e4cbaef82f5 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsCalculatorTests.java @@ -0,0 +1,218 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import java.util.Arrays; +import java.util.Collections; +import java.util.DoubleSummaryStatistics; +import java.util.List; +import java.util.Map; + +public class FieldStatsCalculatorTests extends LogStructureTestCase { + + public void testMean() { + + FieldStatsCalculator calculator = new FieldStatsCalculator(); + + calculator.accept(Arrays.asList("1", "3.5", "2.5", "9")); + + assertEquals(4.0, calculator.calculateMean(), 1e-10); + } + + public void testMedianGivenOddCount() { + + FieldStatsCalculator calculator = new FieldStatsCalculator(); + + calculator.accept(Arrays.asList("3", "23", "-1", "5", "1000")); + + assertEquals(5.0, calculator.calculateMedian(), 1e-10); + } + + public void testMedianGivenOddCountMinimal() { + + FieldStatsCalculator calculator = new FieldStatsCalculator(); + + calculator.accept(Collections.singletonList("3")); + + assertEquals(3.0, calculator.calculateMedian(), 1e-10); + } + + public void testMedianGivenEvenCountMiddleValuesDifferent() { + + FieldStatsCalculator calculator = new FieldStatsCalculator(); + + calculator.accept(Arrays.asList("3", "23", "-1", "5", "1000", "6")); + + assertEquals(5.5, calculator.calculateMedian(), 1e-10); + } + + public void testMedianGivenEvenCountMiddleValuesSame() { + + FieldStatsCalculator calculator = new FieldStatsCalculator(); + + calculator.accept(Arrays.asList("3", "23", "-1", "5", "1000", "5")); + + assertEquals(5.0, calculator.calculateMedian(), 1e-10); + } + + public void testMedianGivenEvenCountMinimal() { + + FieldStatsCalculator calculator = new FieldStatsCalculator(); + + calculator.accept(Arrays.asList("4", "4")); + + assertEquals(4.0, calculator.calculateMedian(), 1e-10); + } + + public void testTopHitsNumeric() { + + FieldStatsCalculator calculator = new FieldStatsCalculator(); + + calculator.accept(Arrays.asList("4", "4", "7", "4", "6", "5", "6", "5", "16", "4", "5")); + + List> topHits = calculator.findNumericTopHits(3); + + assertEquals(3, topHits.size()); + assertEquals(4.0, topHits.get(0).get("value")); + assertEquals(4, topHits.get(0).get("count")); + assertEquals(5.0, topHits.get(1).get("value")); + assertEquals(3, topHits.get(1).get("count")); + assertEquals(6.0, topHits.get(2).get("value")); + assertEquals(2, topHits.get(2).get("count")); + } + + public void testTopHitsString() { + + FieldStatsCalculator calculator = new FieldStatsCalculator(); + + calculator.accept(Arrays.asList("s", "s", "d", "s", "f", "x", "f", "x", "n", "s", "x")); + + List> topHits = calculator.findStringTopHits(3); + + assertEquals(3, topHits.size()); + assertEquals("s", topHits.get(0).get("value")); + assertEquals(4, topHits.get(0).get("count")); + assertEquals("x", topHits.get(1).get("value")); + assertEquals(3, topHits.get(1).get("count")); + assertEquals("f", topHits.get(2).get("value")); + assertEquals(2, topHits.get(2).get("count")); + } + + public void testCalculateGivenEmpty() { + + FieldStatsCalculator calculator = new FieldStatsCalculator(); + + calculator.accept(Collections.emptyList()); + + FieldStats stats = calculator.calculate(3); + + assertEquals(0, stats.getCount()); + assertEquals(0, stats.getCardinality()); + assertNull(stats.getMinValue()); + assertNull(stats.getMaxValue()); + assertNull(stats.getMeanValue()); + assertNull(stats.getMedianValue()); + assertEquals(0, stats.getTopHits().size()); + } + + public void testCalculateGivenNumericField() { + + FieldStatsCalculator calculator = new FieldStatsCalculator(); + + calculator.accept(Arrays.asList("4", "4", "7", "4", "6", "5", "6", "5", "16", "4", "5")); + + FieldStats stats = calculator.calculate(3); + + assertEquals(11, stats.getCount()); + assertEquals(5, stats.getCardinality()); + assertEquals(4.0, stats.getMinValue(), 1e-10); + assertEquals(16.0, stats.getMaxValue(), 1e-10); + assertEquals(6.0, stats.getMeanValue(), 1e-10); + assertEquals(5.0, stats.getMedianValue(), 1e-10); + + List> topHits = stats.getTopHits(); + + assertEquals(3, topHits.size()); + assertEquals(4.0, topHits.get(0).get("value")); + assertEquals(4, topHits.get(0).get("count")); + assertEquals(5.0, topHits.get(1).get("value")); + assertEquals(3, topHits.get(1).get("count")); + assertEquals(6.0, topHits.get(2).get("value")); + assertEquals(2, topHits.get(2).get("count")); + } + + public void testCalculateGivenStringField() { + + FieldStatsCalculator calculator = new FieldStatsCalculator(); + + calculator.accept(Arrays.asList("s", "s", "d", "s", "f", "x", "f", "x", "n", "s", "x")); + + FieldStats stats = calculator.calculate(3); + + assertEquals(11, stats.getCount()); + assertEquals(5, stats.getCardinality()); + assertNull(stats.getMinValue()); + assertNull(stats.getMaxValue()); + assertNull(stats.getMeanValue()); + assertNull(stats.getMedianValue()); + + List> topHits = stats.getTopHits(); + + assertEquals(3, topHits.size()); + assertEquals("s", topHits.get(0).get("value")); + assertEquals(4, topHits.get(0).get("count")); + assertEquals("x", topHits.get(1).get("value")); + assertEquals(3, topHits.get(1).get("count")); + assertEquals("f", topHits.get(2).get("value")); + assertEquals(2, topHits.get(2).get("count")); + } + + public void testCalculateGivenMixedField() { + + FieldStatsCalculator calculator = new FieldStatsCalculator(); + + calculator.accept(Arrays.asList("4", "4", "d", "4", "f", "x", "f", "x", "16", "4", "x")); + + FieldStats stats = calculator.calculate(3); + + assertEquals(11, stats.getCount()); + assertEquals(5, stats.getCardinality()); + assertNull(stats.getMinValue()); + assertNull(stats.getMaxValue()); + assertNull(stats.getMeanValue()); + assertNull(stats.getMedianValue()); + + List> topHits = stats.getTopHits(); + + assertEquals(3, topHits.size()); + assertEquals("4", topHits.get(0).get("value")); + assertEquals(4, topHits.get(0).get("count")); + assertEquals("x", topHits.get(1).get("value")); + assertEquals(3, topHits.get(1).get("count")); + assertEquals("f", topHits.get(2).get("value")); + assertEquals(2, topHits.get(2).get("count")); + } + + public void testJavaStatsEquivalence() { + + DoubleSummaryStatistics summaryStatistics = new DoubleSummaryStatistics(); + FieldStatsCalculator calculator = new FieldStatsCalculator(); + + for (int numValues = randomIntBetween(1000, 10000); numValues > 0; --numValues) { + + double value = randomDouble(); + summaryStatistics.accept(value); + calculator.accept(Collections.singletonList(Double.toString(value))); + } + + FieldStats stats = calculator.calculate(1); + + assertEquals(summaryStatistics.getCount(), stats.getCount()); + assertEquals(summaryStatistics.getMin(), stats.getMinValue(), 1e-10); + assertEquals(summaryStatistics.getMax(), stats.getMaxValue(), 1e-10); + assertEquals(summaryStatistics.getAverage(), stats.getMeanValue(), 1e-10); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsTests.java new file mode 100644 index 0000000000000..a69ef70d4b2de --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsTests.java @@ -0,0 +1,61 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractXContentTestCase; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +public class FieldStatsTests extends AbstractXContentTestCase { + + protected FieldStats createTestInstance() { + return createTestFieldStats(); + } + + static FieldStats createTestFieldStats() { + + int count = randomIntBetween(1, 100000); + int cardinality = randomIntBetween(1, count); + + Double minValue = null; + Double maxValue = null; + Double meanValue = null; + Double medianValue = null; + boolean isMetric = randomBoolean(); + if (isMetric) { + minValue = randomDouble(); + maxValue = randomDouble(); + meanValue = randomDouble(); + medianValue = randomDouble(); + } + + List> topHits = new ArrayList<>(); + for (int i = 0; i < Math.min(10, cardinality); ++i) { + Map topHit = new LinkedHashMap<>(); + if (isMetric) { + topHit.put("value", randomDouble()); + } else { + topHit.put("value", randomAlphaOfLength(20)); + } + topHit.put("count", randomIntBetween(1, cardinality)); + topHits.add(topHit); + } + + return new FieldStats(count, cardinality, minValue, maxValue, meanValue, medianValue, topHits); + } + + protected FieldStats doParseInstance(XContentParser parser) { + return FieldStats.PARSER.apply(parser, null); + } + + protected boolean supportsUnknownFields() { + return false; + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/GrokPatternCreatorTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/GrokPatternCreatorTests.java index 87f9f662698ef..9853efd41de84 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/GrokPatternCreatorTests.java +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/GrokPatternCreatorTests.java @@ -43,7 +43,7 @@ public void testPopulatePrefacesAndEpiloguesGivenTimestamp() { Collection prefaces = new ArrayList<>(); Collection epilogues = new ArrayList<>(); - candidate.processCaptures(fieldNameCountStore, matchingStrings, prefaces, epilogues, null); + candidate.processCaptures(fieldNameCountStore, matchingStrings, prefaces, epilogues, null, null); assertThat(prefaces, containsInAnyOrder("[", "[", "junk [", "[")); assertThat(epilogues, containsInAnyOrder("] DEBUG ", "] ERROR ", "] INFO ", "] DEBUG ")); @@ -60,7 +60,7 @@ public void testPopulatePrefacesAndEpiloguesGivenEmailAddress() { Collection prefaces = new ArrayList<>(); Collection epilogues = new ArrayList<>(); - candidate.processCaptures(fieldNameCountStore, matchingStrings, prefaces, epilogues, null); + candidate.processCaptures(fieldNameCountStore, matchingStrings, prefaces, epilogues, null, null); assertThat(prefaces, containsInAnyOrder("before ", "abc ", "")); assertThat(epilogues, containsInAnyOrder(" after", " xyz", "")); @@ -73,7 +73,7 @@ public void testAppendBestGrokMatchForStringsGivenTimestampsAndLogLevels() { "junk [2018-01-22T07:33:23] INFO ", "[2018-01-21T03:33:23] DEBUG "); - GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null); + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null, null); grokPatternCreator.appendBestGrokMatchForStrings(false, snippets, false, 0); assertEquals(".*?\\[%{TIMESTAMP_ISO8601:extra_timestamp}\\] %{LOGLEVEL:loglevel} ", @@ -87,7 +87,7 @@ public void testAppendBestGrokMatchForStringsGivenNumbersInBrackets() { " (4)", " (-5) "); - GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null); + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null, null); grokPatternCreator.appendBestGrokMatchForStrings(false, snippets, false, 0); assertEquals(".*?\\(%{INT:field}\\).*?", grokPatternCreator.getOverallGrokPatternBuilder().toString()); @@ -99,7 +99,7 @@ public void testAppendBestGrokMatchForStringsGivenNegativeNumbersWithoutBreak() "prior to-3", "-4"); - GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null); + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null, null); grokPatternCreator.appendBestGrokMatchForStrings(false, snippets, false, 0); // It seems sensible that we don't detect these suffices as either base 10 or base 16 numbers @@ -113,7 +113,7 @@ public void testAppendBestGrokMatchForStringsGivenHexNumbers() { " -123", "1f is hex"); - GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null); + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null, null); grokPatternCreator.appendBestGrokMatchForStrings(false, snippets, false, 0); assertEquals(".*?%{BASE16NUM:field}.*?", grokPatternCreator.getOverallGrokPatternBuilder().toString()); @@ -124,7 +124,7 @@ public void testAppendBestGrokMatchForStringsGivenHostnamesWithNumbers() { Collection snippets = Arrays.asList(" mappings = new HashMap<>(); - GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings); + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings, null); assertEquals("%{SYSLOGTIMESTAMP:timestamp} .*? .*?\\[%{INT:field}\\]: %{LOGLEVEL:loglevel} \\(.*? .*? .*?\\) .*? " + "%{QUOTEDSTRING:field2}: %{IP:ipaddress}#%{INT:field3}", @@ -215,7 +215,7 @@ public void testCreateGrokPatternFromExamplesGivenCatalinaLogs() { "Invalid chunk ignored."); Map mappings = new HashMap<>(); - GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings); + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings, null); assertEquals("%{CATALINA_DATESTAMP:timestamp} .*? .*?\\n%{LOGLEVEL:loglevel}: .*", grokPatternCreator.createGrokPatternFromExamples("CATALINA_DATESTAMP", "timestamp")); @@ -237,7 +237,7 @@ public void testCreateGrokPatternFromExamplesGivenMultiTimestampLogs() { "Info\tsshd\tsubsystem request for sftp"); Map mappings = new HashMap<>(); - GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings); + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings, null); assertEquals("%{INT:field}\\t%{TIMESTAMP_ISO8601:timestamp}\\t%{TIMESTAMP_ISO8601:extra_timestamp}\\t%{INT:field2}\\t.*?\\t" + "%{IP:ipaddress}\\t.*?\\t%{LOGLEVEL:loglevel}\\t.*", @@ -271,7 +271,7 @@ public void testFindFullLineGrokPatternGivenApacheCombinedLogs() { "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36\""); Map mappings = new HashMap<>(); - GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings); + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings, null); assertEquals(new Tuple<>("timestamp", "%{COMBINEDAPACHELOG}"), grokPatternCreator.findFullLineGrokPattern()); assertEquals(10, mappings.size()); @@ -300,7 +300,7 @@ public void testAdjustForPunctuationGivenCommonPrefix() { ",\"rule1\",\"Accept\",\"\",\"\",\"\",\"0000000000000000\"" ); - GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null); + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null, null); Collection adjustedSnippets = grokPatternCreator.adjustForPunctuation(snippets); assertEquals("\",", grokPatternCreator.getOverallGrokPatternBuilder().toString()); @@ -317,7 +317,7 @@ public void testAdjustForPunctuationGivenNoCommonPrefix() { "was added by 'User1'(id:2) to servergroup 'GAME'(id:9)" ); - GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null); + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null, null); Collection adjustedSnippets = grokPatternCreator.adjustForPunctuation(snippets); assertEquals("", grokPatternCreator.getOverallGrokPatternBuilder().toString()); diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTests.java index 302946dcaa86c..2a10e11164f6e 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTests.java +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTests.java @@ -66,6 +66,14 @@ protected LogStructure createTestInstance() { } builder.setMappings(mappings); + //if (randomBoolean()) { + Map fieldStats = new TreeMap<>(); + for (String field : generateRandomStringArray(5, 20, false, false)) { + fieldStats.put(field, FieldStatsTests.createTestFieldStats()); + } + builder.setFieldStats(fieldStats); + //} + builder.setExplanation(Arrays.asList(generateRandomStringArray(10, 150, false, false))); return builder.build(); diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtilsTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtilsTests.java index 7e92728f01aa0..8ebfe520d6621 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtilsTests.java +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtilsTests.java @@ -12,7 +12,9 @@ import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; +import java.util.SortedMap; import static org.hamcrest.Matchers.contains; @@ -178,96 +180,83 @@ public void testSamplesWithManyFieldsInconsistentAndConsistentTimeFields() { } public void testGuessMappingGivenNothing() { - assertNull(LogStructureUtils.guessMapping(explanation, "foo", Collections.emptyList())); + assertNull(guessMapping(explanation, "foo", Collections.emptyList())); } public void testGuessMappingGivenKeyword() { Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "keyword"); - assertEquals(expected, - LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList("ERROR", "INFO", "DEBUG"))); - assertEquals(expected, - LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList("2018-06-11T13:26:47Z", "not a date"))); + assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList("ERROR", "INFO", "DEBUG"))); + assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList("2018-06-11T13:26:47Z", "not a date"))); } public void testGuessMappingGivenText() { Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "text"); - assertEquals(expected, LogStructureUtils.guessMapping(explanation, "foo", - Arrays.asList("a", "the quick brown fox jumped over the lazy dog"))); + assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList("a", "the quick brown fox jumped over the lazy dog"))); } public void testGuessMappingGivenIp() { Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "ip"); - assertEquals(expected, - LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList("10.0.0.1", "172.16.0.1", "192.168.0.1"))); + assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList("10.0.0.1", "172.16.0.1", "192.168.0.1"))); } public void testGuessMappingGivenDouble() { Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "double"); - assertEquals(expected, - LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList("3.14159265359", "0", "-8"))); + assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList("3.14159265359", "0", "-8"))); // 12345678901234567890 is too long for long - assertEquals(expected, - LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList("1", "2", "12345678901234567890"))); - assertEquals(expected, - LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList(3.14159265359, 0.0, 1e-308))); - assertEquals(expected, - LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList("-1e-1", "-1e308", "1e-308"))); + assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList("1", "2", "12345678901234567890"))); + assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList(3.14159265359, 0.0, 1e-308))); + assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList("-1e-1", "-1e308", "1e-308"))); } public void testGuessMappingGivenLong() { Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "long"); - assertEquals(expected, - LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList("500", "3", "-3"))); - assertEquals(expected, - LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList(500, 6, 0))); + assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList("500", "3", "-3"))); + assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList(500, 6, 0))); } public void testGuessMappingGivenDate() { Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "date"); - assertEquals(expected, LogStructureUtils.guessMapping(explanation, "foo", - Arrays.asList("2018-06-11T13:26:47Z", "2018-06-11T13:27:12Z"))); + assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList("2018-06-11T13:26:47Z", "2018-06-11T13:27:12Z"))); } public void testGuessMappingGivenBoolean() { Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "boolean"); - assertEquals(expected, LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList("false", "true"))); - assertEquals(expected, LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList(true, false))); + assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList("false", "true"))); + assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList(true, false))); } public void testGuessMappingGivenArray() { Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "long"); - assertEquals(expected, - LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList(42, Arrays.asList(1, -99)))); + assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList(42, Arrays.asList(1, -99)))); expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "keyword"); - assertEquals(expected, - LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList(new String[]{ "x", "y" }, "z"))); + assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList(new String[]{ "x", "y" }, "z"))); } public void testGuessMappingGivenObject() { Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "object"); - assertEquals(expected, LogStructureUtils.guessMapping(explanation, "foo", + assertEquals(expected, guessMapping(explanation, "foo", Arrays.asList(Collections.singletonMap("name", "value1"), Collections.singletonMap("name", "value2")))); } public void testGuessMappingGivenObjectAndNonObject() { - RuntimeException e = expectThrows(RuntimeException.class, () -> LogStructureUtils.guessMapping(explanation, + RuntimeException e = expectThrows(RuntimeException.class, () -> guessMapping(explanation, "foo", Arrays.asList(Collections.singletonMap("name", "value1"), "value2"))); assertEquals("Field [foo] has both object and non-object values - this is not supported by Elasticsearch", e.getMessage()); } - public void testGuessMappings() { + public void testGuessMappingsAndCalculateFieldStats() { Map sample1 = new LinkedHashMap<>(); sample1.put("foo", "not a time"); sample1.put("time", "2018-05-24 17:28:31,735"); @@ -279,7 +268,11 @@ public void testGuessMappings() { sample2.put("bar", 17); sample2.put("nothing", null); - Map mappings = LogStructureUtils.guessMappings(explanation, Arrays.asList(sample1, sample2)); + Tuple, SortedMap> mappingsAndFieldStats = + LogStructureUtils.guessMappingsAndCalculateFieldStats(explanation, Arrays.asList(sample1, sample2)); + assertNotNull(mappingsAndFieldStats); + + Map mappings = mappingsAndFieldStats.v1(); assertNotNull(mappings); assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "keyword"), mappings.get("foo")); Map expectedTimeMapping = new HashMap<>(); @@ -288,5 +281,29 @@ public void testGuessMappings() { assertEquals(expectedTimeMapping, mappings.get("time")); assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("bar")); assertNull(mappings.get("nothing")); + + Map fieldStats = mappingsAndFieldStats.v2(); + assertNotNull(fieldStats); + assertEquals(3, fieldStats.size()); + assertEquals(new FieldStats(2, 2, makeTopHits("not a time", 1, "whatever", 1)), fieldStats.get("foo")); + assertEquals(new FieldStats(2, 2, makeTopHits("2018-05-24 17:28:31,735", 1, "2018-05-29 11:53:02,837", 1)), fieldStats.get("time")); + assertEquals(new FieldStats(2, 2, 17.0, 42.0, 29.5, 29.5, makeTopHits(17.0, 1, 42.0, 1)), fieldStats.get("bar")); + assertNull(fieldStats.get("nothing")); + } + + private Map guessMapping(List explanation, String fieldName, List fieldValues) { + Tuple, FieldStats> mappingAndFieldStats = + LogStructureUtils.guessMappingAndCalculateFieldStats(explanation, fieldName, fieldValues); + return (mappingAndFieldStats == null) ? null : mappingAndFieldStats.v1(); + } + + private List> makeTopHits(Object value1, int count1, Object value2, int count2) { + Map topHit1 = new LinkedHashMap<>(); + topHit1.put("value", value1); + topHit1.put("count", count1); + Map topHit2 = new LinkedHashMap<>(); + topHit2.put("value", value2); + topHit2.put("count", count2); + return Arrays.asList(topHit1, topHit2); } } From 1b025d83f0ec5815d324478529a8f8af5d959376 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Wed, 5 Sep 2018 09:38:30 +0100 Subject: [PATCH 2/2] Address review comments --- .../xpack/ml/logstructurefinder/FieldStats.java | 12 ++++++------ .../logstructurefinder/FieldStatsCalculator.java | 16 ++++++++-------- .../FieldStatsCalculatorTests.java | 8 ++++---- .../ml/logstructurefinder/FieldStatsTests.java | 4 ++-- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStats.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStats.java index 08ae2b0c60205..8e8401123aa9f 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStats.java +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStats.java @@ -28,11 +28,11 @@ public class FieldStats implements ToXContentObject { @SuppressWarnings("unchecked") public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("field_stats", false, - a -> new FieldStats((int) a[0], (int) a[1], (Double) a[2], (Double) a[3], (Double) a[4], (Double) a[5], + a -> new FieldStats((long) a[0], (int) a[1], (Double) a[2], (Double) a[3], (Double) a[4], (Double) a[5], (List>) a[6])); static { - PARSER.declareInt(ConstructingObjectParser.constructorArg(), COUNT); + PARSER.declareLong(ConstructingObjectParser.constructorArg(), COUNT); PARSER.declareInt(ConstructingObjectParser.constructorArg(), CARDINALITY); PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MIN_VALUE); PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), MAX_VALUE); @@ -41,7 +41,7 @@ public class FieldStats implements ToXContentObject { PARSER.declareObjectArray(ConstructingObjectParser.optionalConstructorArg(), (p, c) -> p.mapOrdered(), TOP_HITS); } - private final int count; + private final long count; private final int cardinality; private final Double minValue; private final Double maxValue; @@ -49,11 +49,11 @@ public class FieldStats implements ToXContentObject { private final Double medianValue; private final List> topHits; - FieldStats(int count, int cardinality, List> topHits) { + FieldStats(long count, int cardinality, List> topHits) { this(count, cardinality, null, null, null, null, topHits); } - FieldStats(int count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue, + FieldStats(long count, int cardinality, Double minValue, Double maxValue, Double meanValue, Double medianValue, List> topHits) { this.count = count; this.cardinality = cardinality; @@ -64,7 +64,7 @@ public class FieldStats implements ToXContentObject { this.topHits = (topHits == null) ? Collections.emptyList() : Collections.unmodifiableList(topHits); } - public int getCount() { + public long getCount() { return count; } diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsCalculator.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsCalculator.java index 6e2ea103fbac5..5f76e48f0c8b1 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsCalculator.java +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsCalculator.java @@ -22,7 +22,7 @@ */ public class FieldStatsCalculator { - private int count; + private long count; private SortedMap countsByStringValue = new TreeMap<>(); private SortedMap countsByNumericValue = new TreeMap<>(); @@ -102,8 +102,8 @@ Double calculateMedian() { if (count % 2 == 1) { // Simple case - median is middle value - int targetCount = count / 2 + 1; - int currentUpperBound = 0; + long targetCount = count / 2 + 1; + long currentUpperBound = 0; for (Map.Entry entry : countsByNumericValue.entrySet()) { @@ -117,11 +117,11 @@ Double calculateMedian() { } else { // More complicated case - median is average of two middle values - int target1Count = count / 2; - int target2Count = target1Count + 1; + long target1Count = count / 2; + long target2Count = target1Count + 1; double target1Value = Double.NaN; - int prevUpperBound = -1; - int currentUpperBound = 0; + long prevUpperBound = -1; + long currentUpperBound = 0; for (Map.Entry entry : countsByNumericValue.entrySet()) { @@ -171,7 +171,7 @@ private static List> findTopHits(int numTopHits, Map entry : sortedByCount) { - Map topHit = new LinkedHashMap<>(); + Map topHit = new LinkedHashMap<>(3); topHit.put("value", entry.getKey()); topHit.put("count", entry.getValue()); topHits.add(topHit); diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsCalculatorTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsCalculatorTests.java index 91e4cbaef82f5..6d8927c1c2b3a 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsCalculatorTests.java +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsCalculatorTests.java @@ -109,7 +109,7 @@ public void testCalculateGivenEmpty() { FieldStats stats = calculator.calculate(3); - assertEquals(0, stats.getCount()); + assertEquals(0L, stats.getCount()); assertEquals(0, stats.getCardinality()); assertNull(stats.getMinValue()); assertNull(stats.getMaxValue()); @@ -126,7 +126,7 @@ public void testCalculateGivenNumericField() { FieldStats stats = calculator.calculate(3); - assertEquals(11, stats.getCount()); + assertEquals(11L, stats.getCount()); assertEquals(5, stats.getCardinality()); assertEquals(4.0, stats.getMinValue(), 1e-10); assertEquals(16.0, stats.getMaxValue(), 1e-10); @@ -152,7 +152,7 @@ public void testCalculateGivenStringField() { FieldStats stats = calculator.calculate(3); - assertEquals(11, stats.getCount()); + assertEquals(11L, stats.getCount()); assertEquals(5, stats.getCardinality()); assertNull(stats.getMinValue()); assertNull(stats.getMaxValue()); @@ -178,7 +178,7 @@ public void testCalculateGivenMixedField() { FieldStats stats = calculator.calculate(3); - assertEquals(11, stats.getCount()); + assertEquals(11L, stats.getCount()); assertEquals(5, stats.getCardinality()); assertNull(stats.getMinValue()); assertNull(stats.getMaxValue()); diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsTests.java index a69ef70d4b2de..4a95e6631c96a 100644 --- a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsTests.java +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/FieldStatsTests.java @@ -21,8 +21,8 @@ protected FieldStats createTestInstance() { static FieldStats createTestFieldStats() { - int count = randomIntBetween(1, 100000); - int cardinality = randomIntBetween(1, count); + long count = randomIntBetween(1, 100000); + int cardinality = randomIntBetween(1, (int) count); Double minValue = null; Double maxValue = null;