Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 0 additions & 20 deletions core/src/main/java/org/elasticsearch/common/Strings.java
Original file line number Diff line number Diff line change
Expand Up @@ -276,26 +276,6 @@ public static boolean substringMatch(CharSequence str, int index, CharSequence s
return true;
}

/**
* Count the occurrences of the substring in string s.
*
* @param str string to search in. Return 0 if this is null.
* @param sub string to search for. Return 0 if this is null.
*/
public static int countOccurrencesOf(String str, String sub) {
if (str == null || sub == null || str.length() == 0 || sub.length() == 0) {
return 0;
}
int count = 0;
int pos = 0;
int idx;
while ((idx = str.indexOf(sub, pos)) != -1) {
++count;
pos = idx + sub.length();
}
return count;
}

/**
* Replace all occurrences of a substring within a string with
* another string.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexableField;
import org.elasticsearch.Version;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.joda.FormatDateTimeFormatter;
import org.elasticsearch.common.xcontent.XContentHelper;
Expand Down Expand Up @@ -650,47 +649,55 @@ private static Mapper.Builder<?,?> createBuilderFromFieldType(final ParseContext

private static Mapper.Builder<?,?> createBuilderFromDynamicValue(final ParseContext context, XContentParser.Token token, String currentFieldName) throws IOException {
if (token == XContentParser.Token.VALUE_STRING) {
if (context.root().dateDetection()) {
String text = context.parser().text();
// a safe check since "1" gets parsed as well
if (Strings.countOccurrencesOf(text, ":") > 1 || Strings.countOccurrencesOf(text, "-") > 1 || Strings.countOccurrencesOf(text, "/") > 1) {
for (FormatDateTimeFormatter dateTimeFormatter : context.root().dynamicDateTimeFormatters()) {
try {
dateTimeFormatter.parser().parseMillis(text);
Mapper.Builder builder = context.root().findTemplateBuilder(context, currentFieldName, XContentFieldType.DATE);
if (builder == null) {
builder = newDateBuilder(currentFieldName, dateTimeFormatter, Version.indexCreated(context.indexSettings()));
}
return builder;
} catch (Exception e) {
// failure to parse this, continue
}
}
}
String text = context.parser().text();

boolean parseableAsLong = false;
try {
Long.parseLong(text);
parseableAsLong = true;
} catch (NumberFormatException e) {
// not a long number
}
if (context.root().numericDetection()) {
String text = context.parser().text();
try {
Long.parseLong(text);
Mapper.Builder builder = context.root().findTemplateBuilder(context, currentFieldName, XContentFieldType.LONG);
if (builder == null) {
builder = newLongBuilder(currentFieldName, Version.indexCreated(context.indexSettings()));
}
return builder;
} catch (NumberFormatException e) {
// not a long number

boolean parseableAsDouble = false;
try {
Double.parseDouble(text);
parseableAsDouble = true;
} catch (NumberFormatException e) {
// not a double number
}

if (parseableAsLong && context.root().numericDetection()) {
Mapper.Builder builder = context.root().findTemplateBuilder(context, currentFieldName, XContentFieldType.LONG);
if (builder == null) {
builder = newLongBuilder(currentFieldName, Version.indexCreated(context.indexSettings()));
}
try {
Double.parseDouble(text);
Mapper.Builder builder = context.root().findTemplateBuilder(context, currentFieldName, XContentFieldType.DOUBLE);
return builder;
} else if (parseableAsDouble && context.root().numericDetection()) {
Mapper.Builder builder = context.root().findTemplateBuilder(context, currentFieldName, XContentFieldType.DOUBLE);
if (builder == null) {
builder = newFloatBuilder(currentFieldName, Version.indexCreated(context.indexSettings()));
}
return builder;
} else if (parseableAsLong == false && parseableAsDouble == false && context.root().dateDetection()) {
// We refuse to match pure numbers, which are too likely to be
// false positives with date formats that include eg.
// `epoch_millis` or `YYYY`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess it's difficult to detect ? If the pattern is epoch_millis or composed of number symbol only ? If it's not detectable then maybe add a small warning in the docs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I don't think we can detect this confidently. I'll work on your test suggestions.

for (FormatDateTimeFormatter dateTimeFormatter : context.root().dynamicDateTimeFormatters()) {
try {
dateTimeFormatter.parser().parseMillis(text);
} catch (IllegalArgumentException e) {
// failure to parse this, continue
continue;
}
Mapper.Builder builder = context.root().findTemplateBuilder(context, currentFieldName, XContentFieldType.DATE);
if (builder == null) {
builder = newFloatBuilder(currentFieldName, Version.indexCreated(context.indexSettings()));
builder = newDateBuilder(currentFieldName, dateTimeFormatter, Version.indexCreated(context.indexSettings()));
}
return builder;
} catch (NumberFormatException e) {
// not a long number
}
}

Mapper.Builder builder = context.root().findTemplateBuilder(context, currentFieldName, XContentFieldType.STRING);
if (builder == null) {
builder = new TextFieldMapper.Builder(currentFieldName)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.not;

// TODO: make this a real unit test
public class DocumentParserTests extends ESSingleNodeTestCase {
Expand Down Expand Up @@ -1260,4 +1261,49 @@ public void testIncludeInAllPropagation() throws IOException {
}
assertEquals(new HashSet<>(Arrays.asList("b", "d")), values);
}

public void testDynamicDateDetectionDisabledOnNumbers() throws IOException {
DocumentMapperParser mapperParser = createIndex("test").mapperService().documentMapperParser();
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startArray("dynamic_date_formats")
.value("yyyy")
.endArray().endObject().endObject().string();
DocumentMapper mapper = mapperParser.parse("type", new CompressedXContent(mapping));

BytesReference bytes = XContentFactory.jsonBuilder()
.startObject()
.field("foo", "2016")
.endObject().bytes();

// Even though we matched the dynamic format, we do not match on numbers,
// which are too likely to be false positives
ParsedDocument doc = mapper.parse("test", "type", "1", bytes);
Mapping update = doc.dynamicMappingsUpdate();
assertNotNull(update);
Mapper dateMapper = update.root().getMapper("foo");
assertNotNull(dateMapper);
assertThat(dateMapper, not(instanceOf(DateFieldMapper.class)));
}

public void testDynamicDateDetectionEnabledWithNoSpecialCharacters() throws IOException {
DocumentMapperParser mapperParser = createIndex("test").mapperService().documentMapperParser();
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startArray("dynamic_date_formats")
.value("yyyy MM")
.endArray().endObject().endObject().string();
DocumentMapper mapper = mapperParser.parse("type", new CompressedXContent(mapping));

BytesReference bytes = XContentFactory.jsonBuilder()
.startObject()
.field("foo", "2016 12")
.endObject().bytes();

// We should have generated a date field
ParsedDocument doc = mapper.parse("test", "type", "1", bytes);
Mapping update = doc.dynamicMappingsUpdate();
assertNotNull(update);
Mapper dateMapper = update.root().getMapper("foo");
assertNotNull(dateMapper);
assertThat(dateMapper, instanceOf(DateFieldMapper.class));
}
}