diff --git a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/FingerprintProcessor.java b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/FingerprintProcessor.java new file mode 100644 index 0000000000000..65f8d94296fc8 --- /dev/null +++ b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/FingerprintProcessor.java @@ -0,0 +1,312 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.ingest; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.util.ByteUtils; +import org.elasticsearch.ingest.AbstractProcessor; +import org.elasticsearch.ingest.ConfigurationUtils; +import org.elasticsearch.ingest.IngestDocument; +import org.elasticsearch.ingest.Processor; + +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.time.ZonedDateTime; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Base64; +import java.util.Comparator; +import java.util.Date; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.Stack; + +import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException; +import static org.elasticsearch.ingest.ConfigurationUtils.readBooleanProperty; + +/** + * Computes hash based on the content of selected fields in a document. + */ +public final class FingerprintProcessor extends AbstractProcessor { + + public static final String TYPE = "fingerprint"; + + static final byte[] DELIMITER = new byte[] { 0 }; + static final byte[] TRUE_BYTES = new byte[] { 1 }; + static final byte[] FALSE_BYTES = new byte[] { 2 }; + + private final List fields; + private final String targetField; + private final ThreadLocal threadLocalHasher; + private final byte[] salt; + private final boolean ignoreMissing; + + FingerprintProcessor( + String tag, + String description, + List fields, + String targetField, + byte[] salt, + ThreadLocal threadLocalHasher, + boolean ignoreMissing + ) { + super(tag, description); + this.fields = new ArrayList<>(fields); + this.fields.sort(Comparator.naturalOrder()); + this.targetField = targetField; + this.threadLocalHasher = threadLocalHasher; + this.salt = salt; + this.ignoreMissing = ignoreMissing; + } + + @Override + @SuppressWarnings("unchecked") + public IngestDocument execute(IngestDocument ingestDocument) throws Exception { + Hasher hasher = threadLocalHasher.get(); + hasher.reset(); + hasher.update(salt); + + Stack values = new Stack<>(); + for (int k = fields.size() - 1; k >= 0; k--) { + String field = fields.get(k); + Object value = ingestDocument.getFieldValue(field, Object.class, true); + if (value == null) { + if (ignoreMissing) { + continue; + } else { + throw new IllegalArgumentException("missing field [" + field + "] when calculating fingerprint"); + } + } + values.push(value); + } + + if (values.size() > 0) { + // iteratively traverse document fields + while (values.isEmpty() == false) { + Object value = values.pop(); + if (value instanceof List) { + List list = (List) value; + for (int k = list.size() - 1; k >= 0; k--) { + values.push(list.get(k)); + } + } else if (value instanceof Set) { + @SuppressWarnings("rawtypes") + Set set = (Set) value; + // process set entries in consistent order + @SuppressWarnings("rawtypes") + List setList = new ArrayList<>(set); + setList.sort(Comparator.naturalOrder()); + for (int k = setList.size() - 1; k >= 0; k--) { + values.push(setList.get(k)); + } + } else if (value instanceof Map) { + Map map = (Map) value; + // process map entries in consistent order + List> entryList = new ArrayList<>(map.entrySet()); + entryList.sort(Map.Entry.comparingByKey(Comparator.naturalOrder())); + for (int k = entryList.size() - 1; k >= 0; k--) { + values.push(entryList.get(k)); + } + } else if (value instanceof Map.Entry) { + Map.Entry entry = (Map.Entry) value; + hasher.update(DELIMITER); + hasher.update(toBytes(entry.getKey())); + values.push(entry.getValue()); + } else { + // feed them through digest.update + hasher.update(DELIMITER); + hasher.update(toBytes(value)); + } + } + + ingestDocument.setFieldValue(targetField, Base64.getEncoder().encodeToString(hasher.digest())); + } + + return ingestDocument; + } + + static byte[] toBytes(Object value) { + if (value instanceof String) { + return ((String) value).getBytes(StandardCharsets.UTF_8); + } + if (value instanceof byte[]) { + return (byte[]) value; + } + if (value instanceof Integer) { + byte[] intBytes = new byte[4]; + ByteUtils.writeIntLE((Integer) value, intBytes, 0); + return intBytes; + } + if (value instanceof Long) { + byte[] longBytes = new byte[8]; + ByteUtils.writeLongLE((Long) value, longBytes, 0); + return longBytes; + } + if (value instanceof Float) { + byte[] floatBytes = new byte[4]; + ByteUtils.writeFloatLE((Float) value, floatBytes, 0); + return floatBytes; + } + if (value instanceof Double) { + byte[] doubleBytes = new byte[8]; + ByteUtils.writeDoubleLE((Double) value, doubleBytes, 0); + return doubleBytes; + } + if (value instanceof Boolean) { + return (Boolean) value ? TRUE_BYTES : FALSE_BYTES; + } + if (value instanceof ZonedDateTime) { + ZonedDateTime zdt = (ZonedDateTime) value; + byte[] zoneIdBytes = zdt.getZone().getId().getBytes(StandardCharsets.UTF_8); + byte[] zdtBytes = new byte[32 + zoneIdBytes.length]; + ByteUtils.writeIntLE(zdt.getYear(), zdtBytes, 0); + ByteUtils.writeIntLE(zdt.getMonthValue(), zdtBytes, 4); + ByteUtils.writeIntLE(zdt.getDayOfMonth(), zdtBytes, 8); + ByteUtils.writeIntLE(zdt.getHour(), zdtBytes, 12); + ByteUtils.writeIntLE(zdt.getMinute(), zdtBytes, 16); + ByteUtils.writeIntLE(zdt.getSecond(), zdtBytes, 20); + ByteUtils.writeIntLE(zdt.getNano(), zdtBytes, 24); + ByteUtils.writeIntLE(zdt.getOffset().getTotalSeconds(), zdtBytes, 28); + System.arraycopy(zoneIdBytes, 0, zdtBytes, 32, zoneIdBytes.length); + return zdtBytes; + } + if (value instanceof Date) { + byte[] dateBytes = new byte[8]; + ByteUtils.writeLongLE(((Date) value).getTime(), dateBytes, 0); + return dateBytes; + } + if (value == null) { + return new byte[0]; + } + throw new IllegalArgumentException("cannot convert object of type [" + value.getClass().getName() + "] to bytes"); + } + + public List getFields() { + return fields; + } + + public String getTargetField() { + return targetField; + } + + public ThreadLocal getThreadLocalHasher() { + return threadLocalHasher; + } + + public byte[] getSalt() { + return salt; + } + + public boolean isIgnoreMissing() { + return ignoreMissing; + } + + @Override + public String getType() { + return TYPE; + } + + public static final class Factory implements Processor.Factory { + + public static final String[] SUPPORTED_DIGESTS = { "MD5", "SHA-1", "SHA-256", "SHA-512" }; + + static final String DEFAULT_TARGET = "fingerprint"; + static final String DEFAULT_SALT = ""; + static final String DEFAULT_METHOD = "SHA-1"; + + @Override + public FingerprintProcessor create( + Map registry, + String processorTag, + String description, + Map config + ) throws Exception { + List fields = ConfigurationUtils.readList(TYPE, processorTag, config, "fields"); + if (fields.size() < 1) { + throw newConfigurationException(TYPE, processorTag, "fields", "must specify at least one field"); + } + + String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", DEFAULT_TARGET); + String salt = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "salt", DEFAULT_SALT); + byte[] saltBytes = Strings.hasText(salt) ? toBytes(salt) : new byte[0]; + String method = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "method", DEFAULT_METHOD); + if (Arrays.asList(SUPPORTED_DIGESTS).contains(method) == false) { + throw newConfigurationException( + TYPE, + processorTag, + "method", + String.format( + Locale.ROOT, + "[%s] must be one of the supported hash methods [%s]", + method, + Strings.arrayToCommaDelimitedString(SUPPORTED_DIGESTS) + ) + ); + } + ThreadLocal threadLocalHasher = ThreadLocal.withInitial(() -> { + try { + return MessageDigestHasher.getInstance(method); + } catch (NoSuchAlgorithmException e) { + throw new IllegalStateException("unexpected exception creating MessageDigest instance for [" + method + "]", e); + } + }); + boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); + + return new FingerprintProcessor(processorTag, description, fields, targetField, saltBytes, threadLocalHasher, ignoreMissing); + } + } + + // simple interface around MessageDigest to facilitate testing + public interface Hasher { + + void reset(); + + void update(byte[] input); + + byte[] digest(); + + String getAlgorithm(); + } + + static class MessageDigestHasher implements Hasher { + + private final MessageDigest md; + + private MessageDigestHasher(MessageDigest md) { + this.md = md; + } + + static MessageDigestHasher getInstance(String method) throws NoSuchAlgorithmException { + MessageDigest md = MessageDigest.getInstance(method); + return new MessageDigestHasher(md); + } + + @Override + public void reset() { + md.reset(); + } + + @Override + public void update(byte[] input) { + md.update(input); + } + + @Override + public byte[] digest() { + return md.digest(); + } + + @Override + public String getAlgorithm() { + return md.getAlgorithm(); + } + } +} diff --git a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/IngestPlugin.java b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/IngestPlugin.java index 74bfdad8f31d4..4e4b282018ead 100644 --- a/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/IngestPlugin.java +++ b/x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/IngestPlugin.java @@ -20,7 +20,9 @@ public Map getProcessors(Processor.Parameters paramet UriPartsProcessor.TYPE, new UriPartsProcessor.Factory(), CommunityIdProcessor.TYPE, - new CommunityIdProcessor.Factory() + new CommunityIdProcessor.Factory(), + FingerprintProcessor.TYPE, + new FingerprintProcessor.Factory() ); } } diff --git a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/FingerprintProcessorFactoryTests.java b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/FingerprintProcessorFactoryTests.java new file mode 100644 index 0000000000000..e9b55335a3eeb --- /dev/null +++ b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/FingerprintProcessorFactoryTests.java @@ -0,0 +1,125 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.ingest; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.test.ESTestCase; +import org.junit.Before; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; + +public class FingerprintProcessorFactoryTests extends ESTestCase { + + private FingerprintProcessor.Factory factory; + + @Before + public void init() { + factory = new FingerprintProcessor.Factory(); + } + + public void testCreate() throws Exception { + Map config = new HashMap<>(); + List fieldList = randomList(1, 10, () -> randomAlphaOfLength(8)); + List sortedFieldList = new ArrayList<>(fieldList); + sortedFieldList.sort(Comparator.naturalOrder()); + config.put("fields", fieldList); + String targetField = randomAlphaOfLength(6); + config.put("target_field", targetField); + String salt = randomAlphaOfLength(6); + config.put("salt", salt); + String method = randomFrom(FingerprintProcessor.Factory.SUPPORTED_DIGESTS); + config.put("method", method); + boolean ignoreMissing = randomBoolean(); + config.put("ignore_missing", ignoreMissing); + + String processorTag = randomAlphaOfLength(10); + FingerprintProcessor fingerprintProcessor = factory.create(null, processorTag, null, config); + assertThat(fingerprintProcessor.getTag(), equalTo(processorTag)); + assertThat(fingerprintProcessor.getFields(), equalTo(sortedFieldList)); + assertThat(fingerprintProcessor.getTargetField(), equalTo(targetField)); + assertThat(fingerprintProcessor.getSalt(), equalTo(salt.getBytes(StandardCharsets.UTF_8))); + assertThat(fingerprintProcessor.getThreadLocalHasher().get().getAlgorithm(), equalTo(method)); + assertThat(fingerprintProcessor.isIgnoreMissing(), equalTo(ignoreMissing)); + } + + public void testMethod() throws Exception { + // valid method + Map config = new HashMap<>(); + List fieldList = randomList(1, 10, () -> randomAlphaOfLength(8)); + List sortedFieldList = new ArrayList<>(fieldList); + sortedFieldList.sort(Comparator.naturalOrder()); + config.put("fields", fieldList); + String method = randomFrom(FingerprintProcessor.Factory.SUPPORTED_DIGESTS); + config.put("method", method); + + String processorTag = randomAlphaOfLength(10); + FingerprintProcessor fingerprintProcessor = factory.create(null, processorTag, null, config); + assertThat(fingerprintProcessor.getTag(), equalTo(processorTag)); + assertThat(fingerprintProcessor.getFields(), equalTo(sortedFieldList)); + assertThat(fingerprintProcessor.getThreadLocalHasher().get().getAlgorithm(), equalTo(method)); + + // invalid method + String invalidMethod = randomValueOtherThanMany( + m -> Arrays.asList(FingerprintProcessor.Factory.SUPPORTED_DIGESTS).contains(m), + () -> randomAlphaOfLengthBetween(5, 9) + ); + config.put("fields", fieldList); + config.put("method", invalidMethod); + ElasticsearchException e = expectThrows(ElasticsearchException.class, () -> factory.create(null, processorTag, null, config)); + assertThat(e.getMessage(), containsString("[" + invalidMethod + "] must be one of the supported hash methods [")); + } + + public void testFields() throws Exception { + // valid fields + Map config = new HashMap<>(); + List fieldList = randomList(1, 10, () -> randomAlphaOfLength(8)); + List sortedFieldList = new ArrayList<>(fieldList); + sortedFieldList.sort(Comparator.naturalOrder()); + config.put("fields", fieldList); + + String processorTag = randomAlphaOfLength(10); + FingerprintProcessor fingerprintProcessor = factory.create(null, processorTag, null, config); + assertThat(fingerprintProcessor.getTag(), equalTo(processorTag)); + assertThat(fingerprintProcessor.getFields(), equalTo(sortedFieldList)); + + // fields is a list of length zero + config.put("fields", org.elasticsearch.common.collect.List.of()); + ElasticsearchException e = expectThrows(ElasticsearchException.class, () -> factory.create(null, processorTag, null, config)); + assertThat(e.getMessage(), containsString("must specify at least one field")); + + // fields is missing + e = expectThrows(ElasticsearchException.class, () -> factory.create(null, processorTag, null, config)); + assertThat(e.getMessage(), containsString("[fields] required property is missing")); + } + + public void testDefaults() throws Exception { + String processorTag = randomAlphaOfLength(10); + List fieldList = randomList(1, 10, () -> randomAlphaOfLength(8)); + List sortedFieldList = new ArrayList<>(fieldList); + sortedFieldList.sort(Comparator.naturalOrder()); + HashMap config = new HashMap<>(); + config.put("fields", fieldList); + + FingerprintProcessor fingerprintProcessor = factory.create(null, processorTag, null, config); + assertThat(fingerprintProcessor.getTag(), equalTo(processorTag)); + assertThat(fingerprintProcessor.getFields(), equalTo(sortedFieldList)); + assertThat(fingerprintProcessor.getTargetField(), equalTo(FingerprintProcessor.Factory.DEFAULT_TARGET)); + assertThat(fingerprintProcessor.getSalt(), equalTo(new byte[0])); + assertThat(fingerprintProcessor.getThreadLocalHasher().get().getAlgorithm(), equalTo(FingerprintProcessor.Factory.DEFAULT_METHOD)); + assertThat(fingerprintProcessor.isIgnoreMissing(), equalTo(false)); + } +} diff --git a/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/FingerprintProcessorTests.java b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/FingerprintProcessorTests.java new file mode 100644 index 0000000000000..1bf82e6fda6e5 --- /dev/null +++ b/x-pack/plugin/ingest/src/test/java/org/elasticsearch/xpack/ingest/FingerprintProcessorTests.java @@ -0,0 +1,502 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.ingest; + +import org.elasticsearch.ingest.IngestDocument; +import org.elasticsearch.test.ESTestCase; + +import java.security.MessageDigest; +import java.time.Instant; +import java.time.ZonedDateTime; +import java.util.ArrayList; +import java.util.Base64; +import java.util.Comparator; +import java.util.Date; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.elasticsearch.xpack.ingest.FingerprintProcessor.DELIMITER; +import static org.elasticsearch.xpack.ingest.FingerprintProcessor.toBytes; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; + +public class FingerprintProcessorTests extends ESTestCase { + + public void testBasic() throws Exception { + List fields = new ArrayList<>(); + fields.add("foo"); + fields.add("bar"); + + Map inputMap = new LinkedHashMap<>(); + inputMap.put("foo", "fooValue"); + inputMap.put("bar", "barValue"); + + List expectedValues = org.elasticsearch.common.collect.List.of("barValue", "fooValue"); + + doTestFingerprint(inputMap, fields, expectedValues, "IgxzmZVknx4+Og/eUpvIlqH9PdI="); + } + + public void testFieldsAreConsistentlyOrdered() throws Exception { + List fieldList = randomList(1, 10, () -> randomAlphaOfLength(8)); + List sortedFieldList = new ArrayList<>(fieldList); + sortedFieldList.sort(Comparator.naturalOrder()); + + Map sortedInputMap = new LinkedHashMap<>(); + List expectedValues = new ArrayList<>(); + for (String s : sortedFieldList) { + sortedInputMap.put(s, s); + expectedValues.add(s); + } + String sortedFingerprint = doTestFingerprint(sortedInputMap, sortedFieldList, expectedValues, null); + + Map shuffledInputMap = new LinkedHashMap<>(); + for (String s : fieldList) { + shuffledInputMap.put(s, s); + } + String shuffledFingerprint = doTestFingerprint(shuffledInputMap, fieldList, expectedValues, null); + + assertThat(sortedFingerprint, equalTo(shuffledFingerprint)); + } + + public void testMapEntriesAreConsistentlyOrdered() throws Exception { + List keyList = randomList(1, 10, () -> randomAlphaOfLength(8)); + List sortedKeyList = new ArrayList<>(keyList); + sortedKeyList.sort(Comparator.naturalOrder()); + + Map sortedInputMap = new LinkedHashMap<>(); + List expectedValues = new ArrayList<>(); + for (String s : sortedKeyList) { + sortedInputMap.put(s, s); + expectedValues.add(s); + expectedValues.add(s); + } + Map docMap = new HashMap<>(); + docMap.put("map", sortedInputMap); + String sortedFingerprint = doTestFingerprint(docMap, org.elasticsearch.common.collect.List.of("map"), expectedValues, null); + + Map shuffledInputMap = new LinkedHashMap<>(); + for (String s : keyList) { + shuffledInputMap.put(s, s); + } + docMap = new HashMap<>(); + docMap.put("map", shuffledInputMap); + String shuffledFingerprint = doTestFingerprint(docMap, org.elasticsearch.common.collect.List.of("map"), expectedValues, null); + + assertThat(sortedFingerprint, equalTo(shuffledFingerprint)); + } + + public void testIgnoreMissing() throws Exception { + // only one value contributes to fingerprint + Map docMap = new HashMap<>(); + docMap.put("foo", "foo"); + doTestFingerprint( + docMap, + org.elasticsearch.common.collect.List.of("foo", "bar", "baz"), + org.elasticsearch.common.collect.List.of("foo"), + "WoyqQDn9vALAGmScjA9Z2yg7sos=", + true + ); + + // two values contribute to fingerprint + docMap = new HashMap<>(); + docMap.put("foo", "foo"); + docMap.put("bar", "foo"); + doTestFingerprint( + docMap, + org.elasticsearch.common.collect.List.of("foo", "bar", "baz"), + org.elasticsearch.common.collect.List.of("foo", "foo"), + "vjq2RyU5UA8vzeM5gIbfrOGir7w=", + true + ); + + // three values contribute to fingerprint + docMap = new HashMap<>(); + docMap.put("foo", "foo"); + docMap.put("bar", "foo"); + docMap.put("baz", "foo"); + doTestFingerprint( + docMap, + org.elasticsearch.common.collect.List.of("foo", "bar", "baz"), + org.elasticsearch.common.collect.List.of("foo", "foo", "foo"), + "2Ozd89kaee2AnbrjU8zB6QGn9Wo=", + true + ); + + // error when ignore_missing is false + final Map docMap2 = new HashMap<>(); + docMap2.put("foo", "foo"); + docMap2.put("bar", "foo"); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> doTestFingerprint( + docMap2, + org.elasticsearch.common.collect.List.of("foo", "bar", "baz"), + org.elasticsearch.common.collect.List.of("foo"), + null, + false, + null + ) + ); + assertThat(e.getMessage(), containsString("missing field [baz] when calculating fingerprint")); + } + + public void testDataTypes() throws Exception { + Map typesMap = new HashMap<>(); + typesMap.put("0string", "foo"); + typesMap.put("1byte[]", new byte[] { 0, 1, 2 }); + typesMap.put("2integer", 42); + typesMap.put("3long", 43L); + typesMap.put("4float", 3.14F); + typesMap.put("5double", 3.15D); + typesMap.put("6boolean", true); + typesMap.put("7ZonedDateTime", ZonedDateTime.now()); + typesMap.put("8date", Date.from(Instant.now())); + typesMap.put("9null", null); + + List expectedValues = new ArrayList<>(); + expectedValues.add("0string"); + expectedValues.add("foo"); + expectedValues.add("1byte[]"); + expectedValues.add(new byte[] { 0, 1, 2 }); + expectedValues.add("2integer"); + expectedValues.add(42); + expectedValues.add("3long"); + expectedValues.add(43L); + expectedValues.add("4float"); + expectedValues.add(3.14F); + expectedValues.add("5double"); + expectedValues.add(3.15D); + expectedValues.add("6boolean"); + expectedValues.add(true); + expectedValues.add("7ZonedDateTime"); + expectedValues.add(typesMap.get("7ZonedDateTime")); + expectedValues.add("8date"); + expectedValues.add(typesMap.get("8date")); + expectedValues.add("9null"); + expectedValues.add(null); + + Map docMap = new HashMap<>(); + docMap.put("types", typesMap); + doTestFingerprint(docMap, org.elasticsearch.common.collect.List.of("types"), expectedValues, null); + } + + public void testSalt() throws Exception { + Map inputMap = new LinkedHashMap<>(); + inputMap.put("foo", "foo"); + doTestFingerprint( + inputMap, + org.elasticsearch.common.collect.List.of("foo"), + org.elasticsearch.common.collect.List.of("foo"), + "rWTTCYvRPQAzKXydmKwyC+//dmM=", + "salt" + ); + } + + private String doTestFingerprint( + Map inputMap, + List fields, + List expectedValues, + String expectedFingerprint, + String salt + ) throws Exception { + return doTestFingerprint(inputMap, fields, expectedValues, expectedFingerprint, false, salt); + } + + private String doTestFingerprint( + Map inputMap, + List fields, + List expectedValues, + String expectedFingerprint + ) throws Exception { + return doTestFingerprint(inputMap, fields, expectedValues, expectedFingerprint, false, null); + } + + private String doTestFingerprint( + Map inputMap, + List fields, + List expectedValues, + String expectedFingerprint, + boolean ignoreMissing + ) throws Exception { + return doTestFingerprint(inputMap, fields, expectedValues, expectedFingerprint, ignoreMissing, null); + } + + private String doTestFingerprint( + Map inputMap, + List fields, + List expectedValues, + String expectedFingerprint, + boolean ignoreMissing, + String salt + ) throws Exception { + FingerprintProcessor.Factory factory = new FingerprintProcessor.Factory(); + Map config = new HashMap<>(); + config.put("fields", fields); + config.put("ignore_missing", ignoreMissing); + if (salt != null) { + config.put("salt", salt); + } + FingerprintProcessor fp = factory.create(null, randomAlphaOfLength(10), null, config); + + byte[] expectedBytes = new byte[0]; + if (salt != null) { + expectedBytes = toBytes(salt); + } + for (Object value : expectedValues) { + expectedBytes = concatBytes(expectedBytes, DELIMITER); + expectedBytes = concatBytes(expectedBytes, toBytes(value)); + } + MessageDigest md = MessageDigest.getInstance(FingerprintProcessor.Factory.DEFAULT_METHOD); + expectedBytes = md.digest(expectedBytes); + + IngestDocument input = new IngestDocument(inputMap, org.elasticsearch.common.collect.Map.of()); + IngestDocument output = fp.execute(input); + assertTrue(output.hasField("fingerprint")); + String fingerprint = output.getFieldValue("fingerprint", String.class); + assertThat(fingerprint, equalTo(Base64.getEncoder().encodeToString(expectedBytes))); + if (expectedFingerprint != null) { + assertThat(fingerprint, equalTo(expectedFingerprint)); + } + return fingerprint; + } + + public void testMethod() throws Exception { + List expectedFingerprints = org.elasticsearch.common.collect.List.of( + "b+3QyaPYdnUF1lb5IKE+1g==", + "SX/93t223OurJvgMUOCtSl9hcpg=", + "zDQYTy34tBlmNedlDdn++N7NN+wBY15mCoPDINmUxXc=", + "xNIpYyJzRmg5R0T44ZORC2tgh8N4tVtTFzD5AdBqxmdOuRUjibQQ64lgefkbuZFl8Hv9ze9U6PAmrlgJPcRPGA==" + ); + + Map inputMap = new LinkedHashMap<>(); + inputMap.put("foo", "foo"); + inputMap.put("bar", "bar"); + FingerprintProcessor.Factory factory = new FingerprintProcessor.Factory(); + for (int k = 0; k < FingerprintProcessor.Factory.SUPPORTED_DIGESTS.length; k++) { + Map config = new HashMap<>(); + config.put("fields", org.elasticsearch.common.collect.List.of("foo", "bar")); + config.put("method", FingerprintProcessor.Factory.SUPPORTED_DIGESTS[k]); + + FingerprintProcessor fp = factory.create(null, randomAlphaOfLength(10), null, config); + IngestDocument input = new IngestDocument(inputMap, org.elasticsearch.common.collect.Map.of()); + IngestDocument output = fp.execute(input); + assertTrue(output.hasField("fingerprint")); + String fingerprint = output.getFieldValue("fingerprint", String.class); + assertThat(fingerprint, equalTo(expectedFingerprints.get(k))); + } + } + + public void testBasicObjectTraversal() throws Exception { + List fields = new ArrayList<>(); + fields.add("foo"); + fields.add("bar"); + + Map inputMap = new HashMap<>(); + inputMap.put("foo", "foo1"); + inputMap.put("bar", "bar1"); + doTestObjectTraversal(inputMap, fields, org.elasticsearch.common.collect.List.of("bar1", "foo1")); + } + + public void testObjectTraversalWithLists() throws Exception { + List fields = new ArrayList<>(); + fields.add("foo"); + fields.add("bar"); + + List listInList = new ArrayList<>(); + listInList.add("rat"); + listInList.add("tiger"); + listInList.add("bear"); + + Set setInList = new LinkedHashSet<>(); + setInList.add("dog"); + setInList.add("cat"); + setInList.add("eel"); + + List list = new ArrayList<>(); + list.add("zoo"); + list.add("yak"); + list.add(listInList); + list.add(setInList); + list.add("xor"); + + Map inputMap = new LinkedHashMap<>(); + inputMap.put("foo", list); + inputMap.put("bar", "barValue"); + + List expectedValues = org.elasticsearch.common.collect.List.of( + "barValue", + "zoo", + "yak", + "rat", + "tiger", + "bear", + "cat", + "dog", + "eel", + "xor" + ); + + doTestObjectTraversal(inputMap, fields, expectedValues); + } + + public void testObjectTraversalWithMaps() throws Exception { + List fields = new ArrayList<>(); + fields.add("foo"); + fields.add("bar"); + + Map fooSubMap = new LinkedHashMap<>(); + fooSubMap.put("foo-sub1", "foo3"); + fooSubMap.put("foo-sub2", "foo2"); + Map barSubMap = new LinkedHashMap<>(); + barSubMap.put("bar-sub1", "bar3"); + barSubMap.put("bar-sub2", "bar2"); + Map inputMap = new LinkedHashMap<>(); + inputMap.put("foo", fooSubMap); + inputMap.put("bar", barSubMap); + + List expectedValues = org.elasticsearch.common.collect.List.of( + "bar-sub1", + "bar3", + "bar-sub2", + "bar2", + "foo-sub1", + "foo3", + "foo-sub2", + "foo2" + ); + + doTestObjectTraversal(inputMap, fields, expectedValues); + } + + public void testObjectTraversalWithSets() throws Exception { + List fields = new ArrayList<>(); + fields.add("foo"); + fields.add("bar"); + + Set fooSet = new LinkedHashSet<>(); + fooSet.add("foo3"); + fooSet.add("foo2"); + Set barSet = new LinkedHashSet<>(); + barSet.add("bar3"); + barSet.add("bar2"); + Map inputMap = new LinkedHashMap<>(); + inputMap.put("foo", fooSet); + inputMap.put("bar", barSet); + + List expectedValues = org.elasticsearch.common.collect.List.of("bar2", "bar3", "foo2", "foo3"); + + doTestObjectTraversal(inputMap, fields, expectedValues); + } + + public void testObjectTraversalWithNestedStructures() throws Exception { + List fields = new ArrayList<>(); + fields.add("foo"); + fields.add("bar"); + + Map mapInList = new LinkedHashMap<>(); + mapInList.put("abc", "def"); + mapInList.put("ghi", "jkl"); + + ZonedDateTime now = ZonedDateTime.now(); + List listInMap = new ArrayList<>(); + listInMap.add(now); + listInMap.add("foo"); + listInMap.add(mapInList); + listInMap.add(3.14D); + + Map fooMap = new LinkedHashMap<>(); + fooMap.put("list", listInMap); + fooMap.put("alpha", "beta"); + + Map inputMap = new LinkedHashMap<>(); + inputMap.put("foo", fooMap); + inputMap.put("bar", "barValue"); + + List expectedValues = org.elasticsearch.common.collect.List.of( + "barValue", + "alpha", + "beta", + "list", + now, + "foo", + "abc", + "def", + "ghi", + "jkl", + 3.14D + ); + + doTestObjectTraversal(inputMap, fields, expectedValues); + } + + private void doTestObjectTraversal(Map inputMap, List fields, List expectedValues) throws Exception { + ThreadLocal threadLocalHasher = ThreadLocal.withInitial(TestHasher::new); + FingerprintProcessor fp = new FingerprintProcessor( + FingerprintProcessor.TYPE, + "", + fields, + "fingerprint", + new byte[0], + threadLocalHasher, + false + ); + + byte[] expectedBytes = new byte[0]; + for (Object value : expectedValues) { + expectedBytes = concatBytes(expectedBytes, DELIMITER); + expectedBytes = concatBytes(expectedBytes, toBytes(value)); + } + + IngestDocument input = new IngestDocument(inputMap, org.elasticsearch.common.collect.Map.of()); + IngestDocument output = fp.execute(input); + TestHasher hasher = (TestHasher) threadLocalHasher.get(); + assertThat(hasher.getBytesSeen(), equalTo(expectedBytes)); + assertTrue(output.hasField("fingerprint")); + assertThat(output.getFieldValue("fingerprint", String.class), equalTo(Base64.getEncoder().encodeToString(expectedBytes))); + } + + static byte[] concatBytes(byte[] bytes1, byte[] bytes2) { + byte[] newBytes = new byte[bytes1.length + bytes2.length]; + System.arraycopy(bytes1, 0, newBytes, 0, bytes1.length); + System.arraycopy(bytes2, 0, newBytes, bytes1.length, bytes2.length); + return newBytes; + } + + static class TestHasher implements FingerprintProcessor.Hasher { + + private byte[] bytesSeen = new byte[0]; + + @Override + public void reset() { + bytesSeen = new byte[0]; + } + + @Override + public void update(byte[] input) { + this.bytesSeen = concatBytes(bytesSeen, input); + } + + @Override + public byte[] digest() { + // doesn't reset so that the bytes seen can be verified + return bytesSeen; + } + + public byte[] getBytesSeen() { + return bytesSeen; + } + + public String getAlgorithm() { + return "test"; + } + } +}