From 93d34779d2fa8180d4305774c208a03ffbb03666 Mon Sep 17 00:00:00 2001 From: Tim Vernum Date: Tue, 31 Dec 2019 14:35:58 +1100 Subject: [PATCH 1/4] Increase Size and lower TTL on DLS BitSet Cache The Document Level Security BitSet Cache (see #43669) had a default configuration of "small size, long lifetime". However, this is not a very useful default as the cache is most valuable for BitSets that take a long time to construct, which is (generally speaking) the same ones that operate over a large number of documents and contain many bytes. This commit changes the cache to be "large size, short lifetime" so that it can hold bitsets representing billions of documents, but releases memory quickly. The new defaults are 10% of heap, and 2 hours. This also adds some logging when a single BitSet exceeds the size of the cache and when the cache is full. --- .../DocumentSubsetBitsetCache.java | 72 ++++++++++++--- .../DocumentSubsetBitsetCacheTests.java | 92 +++++++++++++++++++ 2 files changed, 152 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java index 6b47c3da2fb58..37c4c0c8be2f8 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java @@ -38,44 +38,69 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; /** * This is a cache for {@link BitSet} instances that are used with the {@link DocumentSubsetReader}. * It is bounded by memory size and access time. * + * DLS uses {@link BitSet} instances to track which documents should be visible to the user ("live") and which should not ("dead"). + * This means that there is a bit for each document in a Lucene index (ES shard). + * Consequently, an index with 10 million document will use more than 1Mb of bitset memory for every unique DLS query, and an index + * with 1 billion documents will use more than 100Mb of memory per DLS query. + * Because DLS supports templating queries based on user metadata, there may be many distinct queries in use for each index, even if + * there is only a single active role. + * + * The primary benefit of the cache is to avoid recalculating the "live docs" (visible documents) when a user performs multiple + * consecutive queries across one or more large indices. Given the memory examples above, the cache is only useful if it can hold at + * least 1 large (>100Mb) {@code BitSet} during a user's active session, and ideally should be capable of support multiple simultaneous + * users with distinct DLS queries. + * + * For this reason the default memory usage (weight) for the cache set to 10% of JVM heap ({@link #CACHE_SIZE_SETTING}), so that it + * automatically scales with the size of the Elasticsearch deployment, and can provide benefit to most use cases without needing + * customisation. On a 32Gb heap, a 10% cache would be 3.2Gb which is large enough to store BitSets representing 25 billion docs. + * + * However, because queries can be templated by user metadata and that metadata can change frequently, it is common for the + * effetively lifetime of a single DLS query to be relatively short. We do not want to sacrifice 10% of heap to a cache that is storing + * BitSets that are not longer needed, so we set the TTL on this cache to be 2 hours ({@link #CACHE_TTL_SETTING}). This time has been + * chosen so that it will retain BitSets that are in active use during a user's session, but not be an ongoing drain on memory. + * * @see org.elasticsearch.index.cache.bitset.BitsetFilterCache */ public final class DocumentSubsetBitsetCache implements IndexReader.ClosedListener, Closeable, Accountable { /** - * The TTL defaults to 1 week. We depend on the {@code max_bytes} setting to keep the cache to a sensible size, by evicting LRU - * entries, however there is benefit in reclaiming memory by expiring bitsets that have not be used for some period of time. - * Because {@link org.elasticsearch.xpack.core.security.authz.permission.IndicesPermission.Group#query} can be templated, it is - * not uncommon for a query to only be used for a relatively short period of time (e.g. because a user's metadata changed, or because - * that user is an infrequent user of Elasticsearch). This access time expiry helps free up memory in those circumstances even if the - * cache is never filled. + * The TTL defaults to 2 hours. We default to a large cache size ({@link #CACHE_SIZE_SETTING}), and aggressively + * expire unused entries so that the cache does not hold on to memory unnecessarily. */ static final Setting CACHE_TTL_SETTING = - Setting.timeSetting("xpack.security.dls.bitset.cache.ttl", TimeValue.timeValueHours(24 * 7), Property.NodeScope); + Setting.timeSetting("xpack.security.dls.bitset.cache.ttl", TimeValue.timeValueHours(2), Property.NodeScope); - static final Setting CACHE_SIZE_SETTING = Setting.byteSizeSetting("xpack.security.dls.bitset.cache.size", - new ByteSizeValue(50, ByteSizeUnit.MB), Property.NodeScope); + /** + * The size defaults to 10% of heap so that it automatically scales up with larger node size + */ + static final Setting CACHE_SIZE_SETTING = Setting.memorySizeSetting("xpack.security.dls.bitset.cache.size", + "10%", Property.NodeScope); private static final BitSet NULL_MARKER = new FixedBitSet(0); private final Logger logger; + private final long maxWeightBytes; private final Cache bitsetCache; private final Map> keysByIndex; + private final AtomicLong cacheFullWarningTime; public DocumentSubsetBitsetCache(Settings settings) { this.logger = LogManager.getLogger(getClass()); final TimeValue ttl = CACHE_TTL_SETTING.get(settings); - final ByteSizeValue size = CACHE_SIZE_SETTING.get(settings); + this.maxWeightBytes = CACHE_SIZE_SETTING.get(settings).getBytes(); this.bitsetCache = CacheBuilder.builder() .setExpireAfterAccess(ttl) - .setMaximumWeight(size.getBytes()) + .setMaximumWeight(maxWeightBytes) .weigher((key, bitSet) -> bitSet == NULL_MARKER ? 0 : bitSet.ramBytesUsed()).build(); this.keysByIndex = new ConcurrentHashMap<>(); + this.cacheFullWarningTime = new AtomicLong(0); } @Override @@ -148,7 +173,16 @@ public BitSet getBitSet(final Query query, final LeafReaderContext context) thro // A cache loader is not allowed to return null, return a marker object instead. return NULL_MARKER; } else { - return BitSet.of(s.iterator(), context.reader().maxDoc()); + final BitSet bs = BitSet.of(s.iterator(), context.reader().maxDoc()); + final long bitSetBytes = bs.ramBytesUsed(); + if (bitSetBytes > this.maxWeightBytes) { + logger.warn("built a DLS BitSet that uses [{}] bytes; the BitSet cache has a maximum size of [{}] bytes;" + + " this object cannot be cached and will need to be rebuilt for each use; consider increasing the value of [{}]", + bitSetBytes, maxWeightBytes, CACHE_SIZE_SETTING.getKey()); + } else if (bitSetBytes + bitsetCache.weight() > maxWeightBytes) { + maybeLogCacheFullWarning(); + } + return bs; } }); if (bitSet == NULL_MARKER) { @@ -158,6 +192,20 @@ public BitSet getBitSet(final Query query, final LeafReaderContext context) thro } } + private void maybeLogCacheFullWarning() { + final long nextLogTime = cacheFullWarningTime.get(); + final long now = System.currentTimeMillis(); + if (nextLogTime > now) { + return; + } + final long nextCheck = now + TimeUnit.MINUTES.toMillis(30); + if (cacheFullWarningTime.compareAndSet(nextLogTime, nextCheck)) { + logger.info( + "the Document Level Security BitSet cache is full which may impact performance; consider increasing the value of [{}]", + CACHE_SIZE_SETTING.getKey()); + } + } + public static List> getSettings() { return List.of(CACHE_TTL_SETTING, CACHE_SIZE_SETTING); } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java index a8bf5272e94fb..42af9e226ed65 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java @@ -6,6 +6,9 @@ package org.elasticsearch.xpack.core.security.authz.accesscontrol; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -21,6 +24,7 @@ import org.apache.lucene.util.BitSet; import org.elasticsearch.client.Client; import org.elasticsearch.common.CheckedBiConsumer; +import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.index.IndexSettings; @@ -31,6 +35,7 @@ import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.IndexSettingsModule; +import org.elasticsearch.test.MockLogAppender; import org.hamcrest.Matchers; import java.util.concurrent.TimeUnit; @@ -138,6 +143,93 @@ public void testCacheRespectsMemoryLimit() throws Exception { }); } + public void testLogWarningIfBitSetExceedsCacheSize() throws Exception { + // This value is based on the internal implementation details of lucene's FixedBitSet + // If the implementation changes, this can be safely updated to match the new ram usage for a single bitset + final long expectedBytesPerBitSet = 56; + + // Enough to hold less than 1 bit-sets in the cache + final long maxCacheBytes = expectedBytesPerBitSet - expectedBytesPerBitSet/3; + final Settings settings = Settings.builder() + .put(DocumentSubsetBitsetCache.CACHE_SIZE_SETTING.getKey(), maxCacheBytes + "b") + .build(); + final DocumentSubsetBitsetCache cache = new DocumentSubsetBitsetCache(settings); + assertThat(cache.entryCount(), equalTo(0)); + assertThat(cache.ramBytesUsed(), equalTo(0L)); + + final Logger cacheLogger = LogManager.getLogger(cache.getClass()); + final MockLogAppender mockAppender = new MockLogAppender(); + mockAppender.start(); + try { + Loggers.addAppender(cacheLogger, mockAppender); + mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation( + "[bitset too big]", + cache.getClass().getName(), + Level.WARN, + "built a DLS BitSet that uses [" + expectedBytesPerBitSet + "] bytes; the BitSet cache has a maximum size of [" + + maxCacheBytes + "] bytes; this object cannot be cached and will need to be rebuilt for each use;" + + " consider increasing the value of [xpack.security.dls.bitset.cache.size]" + )); + + runTestOnIndex((shardContext, leafContext) -> { + final TermQueryBuilder queryBuilder = QueryBuilders.termQuery("field-1", "value-1"); + final Query query = queryBuilder.toQuery(shardContext); + final BitSet bitSet = cache.getBitSet(query, leafContext); + assertThat(bitSet, notNullValue()); + assertThat(bitSet.ramBytesUsed(), equalTo(expectedBytesPerBitSet)); + }); + + mockAppender.assertAllExpectationsMatched(); + } finally { + Loggers.removeAppender(cacheLogger, mockAppender); + mockAppender.stop(); + } + } + + public void testLogMessageIfCacheFull() throws Exception { + // This value is based on the internal implementation details of lucene's FixedBitSet + // If the implementation changes, this can be safely updated to match the new ram usage for a single bitset + final long expectedBytesPerBitSet = 56; + + // Enough to hold slightly more than 1 bit-sets in the cache + final long maxCacheBytes = expectedBytesPerBitSet + expectedBytesPerBitSet/3; + final Settings settings = Settings.builder() + .put(DocumentSubsetBitsetCache.CACHE_SIZE_SETTING.getKey(), maxCacheBytes + "b") + .build(); + final DocumentSubsetBitsetCache cache = new DocumentSubsetBitsetCache(settings); + assertThat(cache.entryCount(), equalTo(0)); + assertThat(cache.ramBytesUsed(), equalTo(0L)); + + final Logger cacheLogger = LogManager.getLogger(cache.getClass()); + final MockLogAppender mockAppender = new MockLogAppender(); + mockAppender.start(); + try { + Loggers.addAppender(cacheLogger, mockAppender); + mockAppender.addExpectation(new MockLogAppender.SeenEventExpectation( + "[cache full]", + cache.getClass().getName(), + Level.INFO, + "the Document Level Security BitSet cache is full which may impact performance;" + + " consider increasing the value of [xpack.security.dls.bitset.cache.size]" + )); + + runTestOnIndex((shardContext, leafContext) -> { + for (int i = 1; i <= 3; i++) { + final TermQueryBuilder queryBuilder = QueryBuilders.termQuery("field-" + i, "value-" + i); + final Query query = queryBuilder.toQuery(shardContext); + final BitSet bitSet = cache.getBitSet(query, leafContext); + assertThat(bitSet, notNullValue()); + assertThat(bitSet.ramBytesUsed(), equalTo(expectedBytesPerBitSet)); + } + }); + + mockAppender.assertAllExpectationsMatched(); + } finally { + Loggers.removeAppender(cacheLogger, mockAppender); + mockAppender.stop(); + } + } + public void testCacheRespectsAccessTimeExpiry() throws Exception { final Settings settings = Settings.builder() .put(DocumentSubsetBitsetCache.CACHE_TTL_SETTING.getKey(), "10ms") From e09102bc462b1e0be91affbe3e17d4b50f5b6047 Mon Sep 17 00:00:00 2001 From: Tim Vernum Date: Tue, 31 Dec 2019 15:25:53 +1100 Subject: [PATCH 2/4] Fix javadoc --- .../authz/accesscontrol/DocumentSubsetBitsetCache.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java index 37c4c0c8be2f8..aacd45389dd09 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java @@ -54,8 +54,8 @@ * * The primary benefit of the cache is to avoid recalculating the "live docs" (visible documents) when a user performs multiple * consecutive queries across one or more large indices. Given the memory examples above, the cache is only useful if it can hold at - * least 1 large (>100Mb) {@code BitSet} during a user's active session, and ideally should be capable of support multiple simultaneous - * users with distinct DLS queries. + * least 1 large (100Mb or more ) {@code BitSet} during a user's active session, and ideally should be capable of support multiple + * simultaneous users with distinct DLS queries. * * For this reason the default memory usage (weight) for the cache set to 10% of JVM heap ({@link #CACHE_SIZE_SETTING}), so that it * automatically scales with the size of the Elasticsearch deployment, and can provide benefit to most use cases without needing From 1a81f90ef1510899b738510bf52b7733513fa05d Mon Sep 17 00:00:00 2001 From: Tim Vernum Date: Mon, 13 Jan 2020 15:09:54 +1100 Subject: [PATCH 3/4] Update x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java Co-Authored-By: Adrien Grand --- .../security/authz/accesscontrol/DocumentSubsetBitsetCache.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java index aacd45389dd09..8d695a8802c15 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java @@ -176,7 +176,7 @@ public BitSet getBitSet(final Query query, final LeafReaderContext context) thro final BitSet bs = BitSet.of(s.iterator(), context.reader().maxDoc()); final long bitSetBytes = bs.ramBytesUsed(); if (bitSetBytes > this.maxWeightBytes) { - logger.warn("built a DLS BitSet that uses [{}] bytes; the BitSet cache has a maximum size of [{}] bytes;" + + logger.warn("built a DLS BitSet that uses [{}] bytes; the DLS BitSet cache has a maximum size of [{}] bytes;" + " this object cannot be cached and will need to be rebuilt for each use; consider increasing the value of [{}]", bitSetBytes, maxWeightBytes, CACHE_SIZE_SETTING.getKey()); } else if (bitSetBytes + bitsetCache.weight() > maxWeightBytes) { From c5de039e2332b233423828809c8308742ed64d38 Mon Sep 17 00:00:00 2001 From: Tim Vernum Date: Mon, 13 Jan 2020 15:36:07 +1100 Subject: [PATCH 4/4] Update test to reflect new log msg --- .../authz/accesscontrol/DocumentSubsetBitsetCacheTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java index 42af9e226ed65..731d422d46fb2 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCacheTests.java @@ -166,7 +166,7 @@ public void testLogWarningIfBitSetExceedsCacheSize() throws Exception { "[bitset too big]", cache.getClass().getName(), Level.WARN, - "built a DLS BitSet that uses [" + expectedBytesPerBitSet + "] bytes; the BitSet cache has a maximum size of [" + + "built a DLS BitSet that uses [" + expectedBytesPerBitSet + "] bytes; the DLS BitSet cache has a maximum size of [" + maxCacheBytes + "] bytes; this object cannot be cached and will need to be rebuilt for each use;" + " consider increasing the value of [xpack.security.dls.bitset.cache.size]" ));