Skip to content

Commit 0083e9c

Browse files
authored
Fingerprint ingest processor (#68415)
1 parent 4660fae commit 0083e9c

File tree

4 files changed

+877
-1
lines changed

4 files changed

+877
-1
lines changed
Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.ingest;
9+
10+
import org.elasticsearch.common.Strings;
11+
import org.elasticsearch.common.util.ByteUtils;
12+
import org.elasticsearch.ingest.AbstractProcessor;
13+
import org.elasticsearch.ingest.ConfigurationUtils;
14+
import org.elasticsearch.ingest.IngestDocument;
15+
import org.elasticsearch.ingest.Processor;
16+
17+
import java.nio.charset.StandardCharsets;
18+
import java.security.MessageDigest;
19+
import java.security.NoSuchAlgorithmException;
20+
import java.time.ZonedDateTime;
21+
import java.util.ArrayList;
22+
import java.util.Arrays;
23+
import java.util.Base64;
24+
import java.util.Comparator;
25+
import java.util.Date;
26+
import java.util.List;
27+
import java.util.Locale;
28+
import java.util.Map;
29+
import java.util.Set;
30+
import java.util.Stack;
31+
32+
import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException;
33+
import static org.elasticsearch.ingest.ConfigurationUtils.readBooleanProperty;
34+
35+
/**
36+
* Computes hash based on the content of selected fields in a document.
37+
*/
38+
public final class FingerprintProcessor extends AbstractProcessor {
39+
40+
public static final String TYPE = "fingerprint";
41+
42+
static final byte[] DELIMITER = new byte[] { 0 };
43+
static final byte[] TRUE_BYTES = new byte[] { 1 };
44+
static final byte[] FALSE_BYTES = new byte[] { 2 };
45+
46+
private final List<String> fields;
47+
private final String targetField;
48+
private final ThreadLocal<Hasher> threadLocalHasher;
49+
private final byte[] salt;
50+
private final boolean ignoreMissing;
51+
52+
FingerprintProcessor(
53+
String tag,
54+
String description,
55+
List<String> fields,
56+
String targetField,
57+
byte[] salt,
58+
ThreadLocal<Hasher> threadLocalHasher,
59+
boolean ignoreMissing
60+
) {
61+
super(tag, description);
62+
this.fields = new ArrayList<>(fields);
63+
this.fields.sort(Comparator.naturalOrder());
64+
this.targetField = targetField;
65+
this.threadLocalHasher = threadLocalHasher;
66+
this.salt = salt;
67+
this.ignoreMissing = ignoreMissing;
68+
}
69+
70+
@Override
71+
@SuppressWarnings("unchecked")
72+
public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
73+
Hasher hasher = threadLocalHasher.get();
74+
hasher.reset();
75+
hasher.update(salt);
76+
77+
var values = new Stack<>();
78+
for (int k = fields.size() - 1; k >= 0; k--) {
79+
String field = fields.get(k);
80+
Object value = ingestDocument.getFieldValue(field, Object.class, true);
81+
if (value == null) {
82+
if (ignoreMissing) {
83+
continue;
84+
} else {
85+
throw new IllegalArgumentException("missing field [" + field + "] when calculating fingerprint");
86+
}
87+
}
88+
values.push(value);
89+
}
90+
91+
if (values.size() > 0) {
92+
// iteratively traverse document fields
93+
while (values.isEmpty() == false) {
94+
var value = values.pop();
95+
if (value instanceof List) {
96+
var list = (List<?>) value;
97+
for (int k = list.size() - 1; k >= 0; k--) {
98+
values.push(list.get(k));
99+
}
100+
} else if (value instanceof Set) {
101+
@SuppressWarnings("rawtypes")
102+
var set = (Set<Comparable>) value;
103+
// process set entries in consistent order
104+
var setList = new ArrayList<>(set);
105+
setList.sort(Comparator.naturalOrder());
106+
for (int k = setList.size() - 1; k >= 0; k--) {
107+
values.push(setList.get(k));
108+
}
109+
} else if (value instanceof Map) {
110+
var map = (Map<String, Object>) value;
111+
// process map entries in consistent order
112+
var entryList = new ArrayList<>(map.entrySet());
113+
entryList.sort(Map.Entry.comparingByKey(Comparator.naturalOrder()));
114+
for (int k = entryList.size() - 1; k >= 0; k--) {
115+
values.push(entryList.get(k));
116+
}
117+
} else if (value instanceof Map.Entry) {
118+
var entry = (Map.Entry<?, ?>) value;
119+
hasher.update(DELIMITER);
120+
hasher.update(toBytes(entry.getKey()));
121+
values.push(entry.getValue());
122+
} else {
123+
// feed them through digest.update
124+
hasher.update(DELIMITER);
125+
hasher.update(toBytes(value));
126+
}
127+
}
128+
129+
ingestDocument.setFieldValue(targetField, Base64.getEncoder().encodeToString(hasher.digest()));
130+
}
131+
132+
return ingestDocument;
133+
}
134+
135+
static byte[] toBytes(Object value) {
136+
if (value instanceof String) {
137+
return ((String) value).getBytes(StandardCharsets.UTF_8);
138+
}
139+
if (value instanceof byte[]) {
140+
return (byte[]) value;
141+
}
142+
if (value instanceof Integer) {
143+
byte[] intBytes = new byte[4];
144+
ByteUtils.writeIntLE((Integer) value, intBytes, 0);
145+
return intBytes;
146+
}
147+
if (value instanceof Long) {
148+
byte[] longBytes = new byte[8];
149+
ByteUtils.writeLongLE((Long) value, longBytes, 0);
150+
return longBytes;
151+
}
152+
if (value instanceof Float) {
153+
byte[] floatBytes = new byte[4];
154+
ByteUtils.writeFloatLE((Float) value, floatBytes, 0);
155+
return floatBytes;
156+
}
157+
if (value instanceof Double) {
158+
byte[] doubleBytes = new byte[8];
159+
ByteUtils.writeDoubleLE((Double) value, doubleBytes, 0);
160+
return doubleBytes;
161+
}
162+
if (value instanceof Boolean) {
163+
return (Boolean) value ? TRUE_BYTES : FALSE_BYTES;
164+
}
165+
if (value instanceof ZonedDateTime) {
166+
ZonedDateTime zdt = (ZonedDateTime) value;
167+
byte[] zoneIdBytes = zdt.getZone().getId().getBytes(StandardCharsets.UTF_8);
168+
byte[] zdtBytes = new byte[32 + zoneIdBytes.length];
169+
ByteUtils.writeIntLE(zdt.getYear(), zdtBytes, 0);
170+
ByteUtils.writeIntLE(zdt.getMonthValue(), zdtBytes, 4);
171+
ByteUtils.writeIntLE(zdt.getDayOfMonth(), zdtBytes, 8);
172+
ByteUtils.writeIntLE(zdt.getHour(), zdtBytes, 12);
173+
ByteUtils.writeIntLE(zdt.getMinute(), zdtBytes, 16);
174+
ByteUtils.writeIntLE(zdt.getSecond(), zdtBytes, 20);
175+
ByteUtils.writeIntLE(zdt.getNano(), zdtBytes, 24);
176+
ByteUtils.writeIntLE(zdt.getOffset().getTotalSeconds(), zdtBytes, 28);
177+
System.arraycopy(zoneIdBytes, 0, zdtBytes, 32, zoneIdBytes.length);
178+
return zdtBytes;
179+
}
180+
if (value instanceof Date) {
181+
byte[] dateBytes = new byte[8];
182+
ByteUtils.writeLongLE(((Date) value).getTime(), dateBytes, 0);
183+
return dateBytes;
184+
}
185+
if (value == null) {
186+
return new byte[0];
187+
}
188+
throw new IllegalArgumentException("cannot convert object of type [" + value.getClass().getName() + "] to bytes");
189+
}
190+
191+
public List<String> getFields() {
192+
return fields;
193+
}
194+
195+
public String getTargetField() {
196+
return targetField;
197+
}
198+
199+
public ThreadLocal<Hasher> getThreadLocalHasher() {
200+
return threadLocalHasher;
201+
}
202+
203+
public byte[] getSalt() {
204+
return salt;
205+
}
206+
207+
public boolean isIgnoreMissing() {
208+
return ignoreMissing;
209+
}
210+
211+
@Override
212+
public String getType() {
213+
return TYPE;
214+
}
215+
216+
public static final class Factory implements Processor.Factory {
217+
218+
public static final String[] SUPPORTED_DIGESTS = { "MD5", "SHA-1", "SHA-256", "SHA-512" };
219+
220+
static final String DEFAULT_TARGET = "fingerprint";
221+
static final String DEFAULT_SALT = "";
222+
static final String DEFAULT_METHOD = "SHA-1";
223+
224+
@Override
225+
public FingerprintProcessor create(
226+
Map<String, Processor.Factory> registry,
227+
String processorTag,
228+
String description,
229+
Map<String, Object> config
230+
) throws Exception {
231+
List<String> fields = ConfigurationUtils.readList(TYPE, processorTag, config, "fields");
232+
if (fields.size() < 1) {
233+
throw newConfigurationException(TYPE, processorTag, "fields", "must specify at least one field");
234+
}
235+
236+
String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", DEFAULT_TARGET);
237+
String salt = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "salt", DEFAULT_SALT);
238+
byte[] saltBytes = Strings.hasText(salt) ? toBytes(salt) : new byte[0];
239+
String method = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "method", DEFAULT_METHOD);
240+
if (Arrays.asList(SUPPORTED_DIGESTS).contains(method) == false) {
241+
throw newConfigurationException(
242+
TYPE,
243+
processorTag,
244+
"method",
245+
String.format(
246+
Locale.ROOT,
247+
"[%s] must be one of the supported hash methods [%s]",
248+
method,
249+
Strings.arrayToCommaDelimitedString(SUPPORTED_DIGESTS)
250+
)
251+
);
252+
}
253+
ThreadLocal<Hasher> threadLocalHasher = ThreadLocal.withInitial(() -> {
254+
try {
255+
return MessageDigestHasher.getInstance(method);
256+
} catch (NoSuchAlgorithmException e) {
257+
throw new IllegalStateException("unexpected exception creating MessageDigest instance for [" + method + "]", e);
258+
}
259+
});
260+
boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
261+
262+
return new FingerprintProcessor(processorTag, description, fields, targetField, saltBytes, threadLocalHasher, ignoreMissing);
263+
}
264+
}
265+
266+
// simple interface around MessageDigest to facilitate testing
267+
public interface Hasher {
268+
269+
void reset();
270+
271+
void update(byte[] input);
272+
273+
byte[] digest();
274+
275+
String getAlgorithm();
276+
}
277+
278+
static class MessageDigestHasher implements Hasher {
279+
280+
private final MessageDigest md;
281+
282+
private MessageDigestHasher(MessageDigest md) {
283+
this.md = md;
284+
}
285+
286+
static MessageDigestHasher getInstance(String method) throws NoSuchAlgorithmException {
287+
MessageDigest md = MessageDigest.getInstance(method);
288+
return new MessageDigestHasher(md);
289+
}
290+
291+
@Override
292+
public void reset() {
293+
md.reset();
294+
}
295+
296+
@Override
297+
public void update(byte[] input) {
298+
md.update(input);
299+
}
300+
301+
@Override
302+
public byte[] digest() {
303+
return md.digest();
304+
}
305+
306+
@Override
307+
public String getAlgorithm() {
308+
return md.getAlgorithm();
309+
}
310+
}
311+
}

x-pack/plugin/ingest/src/main/java/org/elasticsearch/xpack/ingest/IngestPlugin.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ public Map<String, Processor.Factory> getProcessors(Processor.Parameters paramet
2222
NetworkDirectionProcessor.TYPE,
2323
new NetworkDirectionProcessor.Factory(),
2424
CommunityIdProcessor.TYPE,
25-
new CommunityIdProcessor.Factory()
25+
new CommunityIdProcessor.Factory(),
26+
FingerprintProcessor.TYPE,
27+
new FingerprintProcessor.Factory()
2628
);
2729
}
2830
}

0 commit comments

Comments
 (0)