Skip to content

Commit 9350d8e

Browse files
Reduce heap-memory usage of ingest-geoip plugin (#28963)
With this commit we reduce heap usage of the ingest-geoip plugin by memory-mapping the database files. Previously, we have stored these files gzip-compressed but this has resulted that data are loaded on the heap. Closes #28782
1 parent 8fe5cb0 commit 9350d8e

File tree

7 files changed

+66
-60
lines changed

7 files changed

+66
-60
lines changed

docs/plugins/ingest-geoip.asciidoc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ The ingest-geoip plugin ships by default with the GeoLite2 City, GeoLite2 Countr
99
under the CCA-ShareAlike 4.0 license. For more details see, http://dev.maxmind.com/geoip/geoip2/geolite2/
1010

1111
The GeoIP processor can run with other geoip2 databases from Maxmind. The files must be copied into the geoip config directory,
12-
and the `database_file` option should be used to specify the filename of the custom database. Custom database files must be compressed
13-
with gzip. The geoip config directory is located at `$ES_HOME/config/ingest-geoip` and holds the shipped databases too.
12+
and the `database_file` option should be used to specify the filename of the custom database. Custom database files must be stored
13+
uncompressed. The geoip config directory is located at `$ES_HOME/config/ingest-geoip` and holds the shipped databases too.
1414

1515
:plugin_name: ingest-geoip
1616
include::install_remove.asciidoc[]
@@ -25,7 +25,7 @@ include::install_remove.asciidoc[]
2525
| Name | Required | Default | Description
2626
| `field` | yes | - | The field to get the ip address from for the geographical lookup.
2727
| `target_field` | no | geoip | The field that will hold the geographical information looked up from the Maxmind database.
28-
| `database_file` | no | GeoLite2-City.mmdb | The database filename in the geoip config directory. The ingest-geoip plugin ships with the GeoLite2-City.mmdb.gz and GeoLite2-Country.mmdb.gz files.
28+
| `database_file` | no | GeoLite2-City.mmdb | The database filename in the geoip config directory. The ingest-geoip plugin ships with the GeoLite2-City.mmdb, GeoLite2-Country.mmdb and GeoLite2-ASN.mmdb files.
2929
| `properties` | no | [`continent_name`, `country_iso_code`, `region_name`, `city_name`, `location`] * | Controls what properties are added to the `target_field` based on the geoip lookup.
3030
| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
3131
|======
@@ -101,7 +101,7 @@ PUT _ingest/pipeline/geoip
101101
"geoip" : {
102102
"field" : "ip",
103103
"target_field" : "geo",
104-
"database_file" : "GeoLite2-Country.mmdb.gz"
104+
"database_file" : "GeoLite2-Country.mmdb"
105105
}
106106
}
107107
]

plugins/ingest-geoip/build.gradle

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,13 @@ dependencies {
3030
compile("com.fasterxml.jackson.core:jackson-databind:${versions.jackson}")
3131
compile('com.maxmind.db:maxmind-db:1.2.2')
3232

33-
testCompile 'org.elasticsearch:geolite2-databases:20171206'
33+
testCompile 'org.elasticsearch:geolite2-databases:20180303'
3434
}
3535

3636
task copyDefaultGeoIp2DatabaseFiles(type: Copy) {
3737
from { zipTree(configurations.testCompile.files.find { it.name.contains('geolite2-databases')}) }
3838
into "${project.buildDir}/ingest-geoip"
39-
include "*.mmdb.gz"
39+
include "*.mmdb"
4040
}
4141

4242
project.bundlePlugin.dependsOn(copyDefaultGeoIp2DatabaseFiles)

plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
import org.elasticsearch.ingest.IngestDocument;
3838
import org.elasticsearch.ingest.Processor;
3939

40-
import java.io.IOException;
4140
import java.net.InetAddress;
4241
import java.security.AccessController;
4342
import java.security.PrivilegedAction;
@@ -68,8 +67,7 @@ public final class GeoIpProcessor extends AbstractProcessor {
6867
private final Set<Property> properties;
6968
private final boolean ignoreMissing;
7069

71-
GeoIpProcessor(String tag, String field, DatabaseReader dbReader, String targetField, Set<Property> properties,
72-
boolean ignoreMissing) throws IOException {
70+
GeoIpProcessor(String tag, String field, DatabaseReader dbReader, String targetField, Set<Property> properties, boolean ignoreMissing) {
7371
super(tag);
7472
this.field = field;
7573
this.targetField = targetField;
@@ -323,7 +321,7 @@ public GeoIpProcessor create(Map<String, Processor.Factory> registry, String pro
323321
Map<String, Object> config) throws Exception {
324322
String ipField = readStringProperty(TYPE, processorTag, config, "field");
325323
String targetField = readStringProperty(TYPE, processorTag, config, "target_field", "geoip");
326-
String databaseFile = readStringProperty(TYPE, processorTag, config, "database_file", "GeoLite2-City.mmdb.gz");
324+
String databaseFile = readStringProperty(TYPE, processorTag, config, "database_file", "GeoLite2-City.mmdb");
327325
List<String> propertyNames = readOptionalList(TYPE, processorTag, config, "properties");
328326
boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
329327

plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,28 +21,28 @@
2121

2222
import com.maxmind.db.NoCache;
2323
import com.maxmind.db.NodeCache;
24+
import com.maxmind.db.Reader;
2425
import com.maxmind.geoip2.DatabaseReader;
2526
import org.apache.lucene.util.IOUtils;
27+
import org.elasticsearch.common.Booleans;
28+
import org.elasticsearch.common.SuppressForbidden;
2629
import org.elasticsearch.common.settings.Setting;
2730
import org.elasticsearch.ingest.Processor;
2831
import org.elasticsearch.plugins.IngestPlugin;
2932
import org.elasticsearch.plugins.Plugin;
3033

3134
import java.io.Closeable;
3235
import java.io.IOException;
33-
import java.io.InputStream;
3436
import java.nio.file.Files;
3537
import java.nio.file.Path;
3638
import java.nio.file.PathMatcher;
37-
import java.nio.file.StandardOpenOption;
3839
import java.util.Arrays;
3940
import java.util.Collections;
4041
import java.util.HashMap;
4142
import java.util.Iterator;
4243
import java.util.List;
4344
import java.util.Map;
4445
import java.util.stream.Stream;
45-
import java.util.zip.GZIPInputStream;
4646

4747
public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, Closeable {
4848
public static final Setting<Long> CACHE_SIZE =
@@ -80,28 +80,38 @@ static Map<String, DatabaseReaderLazyLoader> loadDatabaseReaders(Path geoIpConfi
8080
if (Files.exists(geoIpConfigDirectory) == false && Files.isDirectory(geoIpConfigDirectory)) {
8181
throw new IllegalStateException("the geoip directory [" + geoIpConfigDirectory + "] containing databases doesn't exist");
8282
}
83-
83+
boolean loadDatabaseOnHeap = Booleans.parseBoolean(System.getProperty("es.geoip.load_db_on_heap", "false"));
8484
Map<String, DatabaseReaderLazyLoader> databaseReaders = new HashMap<>();
8585
try (Stream<Path> databaseFiles = Files.list(geoIpConfigDirectory)) {
86-
PathMatcher pathMatcher = geoIpConfigDirectory.getFileSystem().getPathMatcher("glob:**.mmdb.gz");
86+
PathMatcher pathMatcher = geoIpConfigDirectory.getFileSystem().getPathMatcher("glob:**.mmdb");
8787
// Use iterator instead of forEach otherwise IOException needs to be caught twice...
8888
Iterator<Path> iterator = databaseFiles.iterator();
8989
while (iterator.hasNext()) {
9090
Path databasePath = iterator.next();
9191
if (Files.isRegularFile(databasePath) && pathMatcher.matches(databasePath)) {
9292
String databaseFileName = databasePath.getFileName().toString();
93-
DatabaseReaderLazyLoader holder = new DatabaseReaderLazyLoader(databaseFileName, () -> {
94-
try (InputStream inputStream = new GZIPInputStream(Files.newInputStream(databasePath, StandardOpenOption.READ))) {
95-
return new DatabaseReader.Builder(inputStream).withCache(cache).build();
96-
}
97-
});
93+
DatabaseReaderLazyLoader holder = new DatabaseReaderLazyLoader(databaseFileName,
94+
() -> {
95+
DatabaseReader.Builder builder = createDatabaseBuilder(databasePath).withCache(cache);
96+
if (loadDatabaseOnHeap) {
97+
builder.fileMode(Reader.FileMode.MEMORY);
98+
} else {
99+
builder.fileMode(Reader.FileMode.MEMORY_MAPPED);
100+
}
101+
return builder.build();
102+
});
98103
databaseReaders.put(databaseFileName, holder);
99104
}
100105
}
101106
}
102107
return Collections.unmodifiableMap(databaseReaders);
103108
}
104109

110+
@SuppressForbidden(reason = "Maxmind API requires java.io.File")
111+
private static DatabaseReader.Builder createDatabaseBuilder(Path databasePath) {
112+
return new DatabaseReader.Builder(databasePath.toFile());
113+
}
114+
105115
@Override
106116
public void close() throws IOException {
107117
if (databaseReaders != null) {

plugins/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorFactoryTests.java

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,12 @@ public static void loadDatabaseReaders() throws IOException {
5454
Path configDir = createTempDir();
5555
Path geoIpConfigDir = configDir.resolve("ingest-geoip");
5656
Files.createDirectories(geoIpConfigDir);
57-
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb.gz")),
58-
geoIpConfigDir.resolve("GeoLite2-City.mmdb.gz"));
59-
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-Country.mmdb.gz")),
60-
geoIpConfigDir.resolve("GeoLite2-Country.mmdb.gz"));
61-
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-ASN.mmdb.gz")),
62-
geoIpConfigDir.resolve("GeoLite2-ASN.mmdb.gz"));
57+
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb")),
58+
geoIpConfigDir.resolve("GeoLite2-City.mmdb"));
59+
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-Country.mmdb")),
60+
geoIpConfigDir.resolve("GeoLite2-Country.mmdb"));
61+
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-ASN.mmdb")),
62+
geoIpConfigDir.resolve("GeoLite2-ASN.mmdb"));
6363

6464
NodeCache cache = randomFrom(NoCache.getInstance(), new GeoIpCache(randomNonNegativeLong()));
6565
databaseReaders = IngestGeoIpPlugin.loadDatabaseReaders(geoIpConfigDir, cache);
@@ -111,7 +111,7 @@ public void testCountryBuildDefaults() throws Exception {
111111

112112
Map<String, Object> config = new HashMap<>();
113113
config.put("field", "_field");
114-
config.put("database_file", "GeoLite2-Country.mmdb.gz");
114+
config.put("database_file", "GeoLite2-Country.mmdb");
115115
String processorTag = randomAlphaOfLength(10);
116116

117117
GeoIpProcessor processor = factory.create(null, processorTag, config);
@@ -129,7 +129,7 @@ public void testAsnBuildDefaults() throws Exception {
129129

130130
Map<String, Object> config = new HashMap<>();
131131
config.put("field", "_field");
132-
config.put("database_file", "GeoLite2-ASN.mmdb.gz");
132+
config.put("database_file", "GeoLite2-ASN.mmdb");
133133
String processorTag = randomAlphaOfLength(10);
134134

135135
GeoIpProcessor processor = factory.create(null, processorTag, config);
@@ -157,7 +157,7 @@ public void testBuildDbFile() throws Exception {
157157
GeoIpProcessor.Factory factory = new GeoIpProcessor.Factory(databaseReaders);
158158
Map<String, Object> config = new HashMap<>();
159159
config.put("field", "_field");
160-
config.put("database_file", "GeoLite2-Country.mmdb.gz");
160+
config.put("database_file", "GeoLite2-Country.mmdb");
161161
GeoIpProcessor processor = factory.create(null, null, config);
162162
assertThat(processor.getField(), equalTo("_field"));
163163
assertThat(processor.getTargetField(), equalTo("geoip"));
@@ -170,7 +170,7 @@ public void testBuildWithCountryDbAndAsnFields() throws Exception {
170170
GeoIpProcessor.Factory factory = new GeoIpProcessor.Factory(databaseReaders);
171171
Map<String, Object> config = new HashMap<>();
172172
config.put("field", "_field");
173-
config.put("database_file", "GeoLite2-Country.mmdb.gz");
173+
config.put("database_file", "GeoLite2-Country.mmdb");
174174
EnumSet<GeoIpProcessor.Property> asnOnlyProperties = EnumSet.copyOf(GeoIpProcessor.Property.ALL_ASN_PROPERTIES);
175175
asnOnlyProperties.remove(GeoIpProcessor.Property.IP);
176176
String asnProperty = RandomPicks.randomFrom(Randomness.get(), asnOnlyProperties).toString();
@@ -184,7 +184,7 @@ public void testBuildWithAsnDbAndCityFields() throws Exception {
184184
GeoIpProcessor.Factory factory = new GeoIpProcessor.Factory(databaseReaders);
185185
Map<String, Object> config = new HashMap<>();
186186
config.put("field", "_field");
187-
config.put("database_file", "GeoLite2-ASN.mmdb.gz");
187+
config.put("database_file", "GeoLite2-ASN.mmdb");
188188
EnumSet<GeoIpProcessor.Property> cityOnlyProperties = EnumSet.copyOf(GeoIpProcessor.Property.ALL_CITY_PROPERTIES);
189189
cityOnlyProperties.remove(GeoIpProcessor.Property.IP);
190190
String cityProperty = RandomPicks.randomFrom(Randomness.get(), cityOnlyProperties).toString();
@@ -199,9 +199,9 @@ public void testBuildNonExistingDbFile() throws Exception {
199199

200200
Map<String, Object> config = new HashMap<>();
201201
config.put("field", "_field");
202-
config.put("database_file", "does-not-exist.mmdb.gz");
202+
config.put("database_file", "does-not-exist.mmdb");
203203
Exception e = expectThrows(ElasticsearchParseException.class, () -> factory.create(null, null, config));
204-
assertThat(e.getMessage(), equalTo("[database_file] database file [does-not-exist.mmdb.gz] doesn't exist"));
204+
assertThat(e.getMessage(), equalTo("[database_file] database file [does-not-exist.mmdb] doesn't exist"));
205205
}
206206

207207
public void testBuildFields() throws Exception {
@@ -249,12 +249,12 @@ public void testLazyLoading() throws Exception {
249249
Path configDir = createTempDir();
250250
Path geoIpConfigDir = configDir.resolve("ingest-geoip");
251251
Files.createDirectories(geoIpConfigDir);
252-
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb.gz")),
253-
geoIpConfigDir.resolve("GeoLite2-City.mmdb.gz"));
254-
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-Country.mmdb.gz")),
255-
geoIpConfigDir.resolve("GeoLite2-Country.mmdb.gz"));
256-
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-ASN.mmdb.gz")),
257-
geoIpConfigDir.resolve("GeoLite2-ASN.mmdb.gz"));
252+
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb")),
253+
geoIpConfigDir.resolve("GeoLite2-City.mmdb"));
254+
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-Country.mmdb")),
255+
geoIpConfigDir.resolve("GeoLite2-Country.mmdb"));
256+
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-ASN.mmdb")),
257+
geoIpConfigDir.resolve("GeoLite2-ASN.mmdb"));
258258

259259
// Loading another database reader instances, because otherwise we can't test lazy loading as the
260260
// database readers used at class level are reused between tests. (we want to keep that otherwise running this
@@ -268,15 +268,15 @@ public void testLazyLoading() throws Exception {
268268

269269
Map<String, Object> config = new HashMap<>();
270270
config.put("field", "_field");
271-
config.put("database_file", "GeoLite2-City.mmdb.gz");
271+
config.put("database_file", "GeoLite2-City.mmdb");
272272
factory.create(null, "_tag", config);
273273
config = new HashMap<>();
274274
config.put("field", "_field");
275-
config.put("database_file", "GeoLite2-Country.mmdb.gz");
275+
config.put("database_file", "GeoLite2-Country.mmdb");
276276
factory.create(null, "_tag", config);
277277
config = new HashMap<>();
278278
config.put("field", "_field");
279-
config.put("database_file", "GeoLite2-ASN.mmdb.gz");
279+
config.put("database_file", "GeoLite2-ASN.mmdb");
280280
factory.create(null, "_tag", config);
281281

282282
for (DatabaseReaderLazyLoader lazyLoader : databaseReaders.values()) {

0 commit comments

Comments
 (0)