Skip to content

Commit a5a5eae

Browse files
Cache results of geoip lookups (#22231)
With this commit, we introduce a cache to the geoip ingest processor. The cache is enabled by default and caches the 1000 most recent items. The cache size is controlled by the setting `ingest.geoip.cache_size`. Closes #22074
1 parent 80d048a commit a5a5eae

File tree

5 files changed

+137
-4
lines changed

5 files changed

+137
-4
lines changed

docs/plugins/ingest-geoip.asciidoc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,3 +203,14 @@ Which returns:
203203
}
204204
--------------------------------------------------
205205
// TESTRESPONSE
206+
207+
[[ingest-geoip-settings]]
208+
===== Node Settings
209+
210+
The geoip processor supports the following setting:
211+
212+
`ingest.geoip.cache_size`::
213+
214+
The maximum number of results that should be cached. Defaults to `1000`.
215+
216+
Note that these settings are node settings and apply to all geoip processors, i.e. there is one cache for all defined geoip processors.
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.ingest.geoip;
20+
21+
import com.fasterxml.jackson.databind.JsonNode;
22+
import com.maxmind.db.NodeCache;
23+
import org.elasticsearch.ElasticsearchException;
24+
import org.elasticsearch.common.cache.Cache;
25+
import org.elasticsearch.common.cache.CacheBuilder;
26+
27+
import java.io.IOException;
28+
import java.util.concurrent.ExecutionException;
29+
30+
final class GeoIpCache implements NodeCache {
31+
private final Cache<Integer, JsonNode> cache;
32+
33+
GeoIpCache(long maxSize) {
34+
this.cache = CacheBuilder.<Integer, JsonNode>builder().setMaximumWeight(maxSize).build();
35+
}
36+
37+
@Override
38+
public JsonNode get(int key, Loader loader) throws IOException {
39+
try {
40+
return cache.computeIfAbsent(key, loader::load);
41+
} catch (ExecutionException e) {
42+
Throwable cause = e.getCause() != null ? e.getCause() : e;
43+
throw new ElasticsearchException(cause);
44+
}
45+
}
46+
}

plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,38 +26,57 @@
2626
import java.nio.file.Path;
2727
import java.nio.file.PathMatcher;
2828
import java.nio.file.StandardOpenOption;
29+
import java.util.Arrays;
2930
import java.util.Collections;
3031
import java.util.HashMap;
3132
import java.util.Iterator;
33+
import java.util.List;
3234
import java.util.Map;
3335
import java.util.stream.Stream;
3436
import java.util.zip.GZIPInputStream;
3537

38+
import com.maxmind.db.NoCache;
39+
import com.maxmind.db.NodeCache;
3640
import com.maxmind.geoip2.DatabaseReader;
3741
import org.apache.lucene.util.IOUtils;
42+
import org.elasticsearch.common.settings.Setting;
3843
import org.elasticsearch.ingest.Processor;
3944
import org.elasticsearch.plugins.IngestPlugin;
4045
import org.elasticsearch.plugins.Plugin;
4146

4247
public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, Closeable {
48+
public static final Setting<Long> CACHE_SIZE =
49+
Setting.longSetting("ingest.geoip.cache_size", 1000, 0, Setting.Property.NodeScope);
4350

4451
private Map<String, DatabaseReader> databaseReaders;
4552

53+
@Override
54+
public List<Setting<?>> getSettings() {
55+
return Arrays.asList(CACHE_SIZE);
56+
}
57+
4658
@Override
4759
public Map<String, Processor.Factory> getProcessors(Processor.Parameters parameters) {
4860
if (databaseReaders != null) {
4961
throw new IllegalStateException("getProcessors called twice for geoip plugin!!");
5062
}
5163
Path geoIpConfigDirectory = parameters.env.configFile().resolve("ingest-geoip");
64+
NodeCache cache;
65+
long cacheSize = CACHE_SIZE.get(parameters.env.settings());
66+
if (cacheSize > 0) {
67+
cache = new GeoIpCache(cacheSize);
68+
} else {
69+
cache = NoCache.getInstance();
70+
}
5271
try {
53-
databaseReaders = loadDatabaseReaders(geoIpConfigDirectory);
72+
databaseReaders = loadDatabaseReaders(geoIpConfigDirectory, cache);
5473
} catch (IOException e) {
5574
throw new RuntimeException(e);
5675
}
5776
return Collections.singletonMap(GeoIpProcessor.TYPE, new GeoIpProcessor.Factory(databaseReaders));
5877
}
5978

60-
static Map<String, DatabaseReader> loadDatabaseReaders(Path geoIpConfigDirectory) throws IOException {
79+
static Map<String, DatabaseReader> loadDatabaseReaders(Path geoIpConfigDirectory, NodeCache cache) throws IOException {
6180
if (Files.exists(geoIpConfigDirectory) == false && Files.isDirectory(geoIpConfigDirectory)) {
6281
throw new IllegalStateException("the geoip directory [" + geoIpConfigDirectory + "] containing databases doesn't exist");
6382
}
@@ -71,7 +90,8 @@ static Map<String, DatabaseReader> loadDatabaseReaders(Path geoIpConfigDirectory
7190
Path databasePath = iterator.next();
7291
if (Files.isRegularFile(databasePath) && pathMatcher.matches(databasePath)) {
7392
try (InputStream inputStream = new GZIPInputStream(Files.newInputStream(databasePath, StandardOpenOption.READ))) {
74-
databaseReaders.put(databasePath.getFileName().toString(), new DatabaseReader.Builder(inputStream).build());
93+
databaseReaders.put(databasePath.getFileName().toString(),
94+
new DatabaseReader.Builder(inputStream).withCache(cache).build());
7595
}
7696
}
7797
}
@@ -85,4 +105,5 @@ public void close() throws IOException {
85105
IOUtils.close(databaseReaders.values());
86106
}
87107
}
108+
88109
}
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.ingest.geoip;
20+
21+
import com.fasterxml.jackson.databind.JsonNode;
22+
import com.fasterxml.jackson.databind.node.IntNode;
23+
import com.maxmind.db.NodeCache;
24+
import org.elasticsearch.ElasticsearchException;
25+
import org.elasticsearch.test.ESTestCase;
26+
27+
public class GeoIpCacheTests extends ESTestCase {
28+
public void testCachesAndEvictsResults() throws Exception {
29+
GeoIpCache cache = new GeoIpCache(1);
30+
final NodeCache.Loader loader = key -> new IntNode(key);
31+
32+
JsonNode jsonNode1 = cache.get(1, loader);
33+
assertSame(jsonNode1, cache.get(1, loader));
34+
35+
// evict old key by adding another value
36+
cache.get(2, loader);
37+
38+
assertNotSame(jsonNode1, cache.get(1, loader));
39+
}
40+
41+
public void testThrowsElasticsearchException() throws Exception {
42+
GeoIpCache cache = new GeoIpCache(1);
43+
NodeCache.Loader loader = (int key) -> {
44+
throw new IllegalArgumentException("Illegal key");
45+
};
46+
ElasticsearchException ex = expectThrows(ElasticsearchException.class, () -> cache.get(1, loader));
47+
assertTrue("Expected cause to be of type IllegalArgumentException but was [" + ex.getCause().getClass() + "]",
48+
ex.getCause() instanceof IllegalArgumentException);
49+
assertEquals("Illegal key", ex.getCause().getMessage());
50+
}
51+
}

plugins/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorFactoryTests.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
package org.elasticsearch.ingest.geoip;
2121

2222
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
23+
import com.maxmind.db.NoCache;
24+
import com.maxmind.db.NodeCache;
2325
import com.maxmind.geoip2.DatabaseReader;
2426
import org.elasticsearch.ElasticsearchParseException;
2527
import org.elasticsearch.common.Randomness;
@@ -57,7 +59,9 @@ public static void loadDatabaseReaders() throws IOException {
5759
geoIpConfigDir.resolve("GeoLite2-City.mmdb.gz"));
5860
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-Country.mmdb.gz")),
5961
geoIpConfigDir.resolve("GeoLite2-Country.mmdb.gz"));
60-
databaseReaders = IngestGeoIpPlugin.loadDatabaseReaders(geoIpConfigDir);
62+
63+
NodeCache cache = randomFrom(NoCache.getInstance(), new GeoIpCache(randomPositiveLong()));
64+
databaseReaders = IngestGeoIpPlugin.loadDatabaseReaders(geoIpConfigDir, cache);
6165
}
6266

6367
@AfterClass

0 commit comments

Comments
 (0)