4545import java .util .concurrent .ConcurrentHashMap ;
4646import java .util .concurrent .ExecutionException ;
4747import java .util .concurrent .ExecutorService ;
48+ import java .util .concurrent .TimeUnit ;
49+ import java .util .concurrent .atomic .AtomicLong ;
4850import java .util .concurrent .locks .ReentrantReadWriteLock ;
4951
5052/**
5153 * This is a cache for {@link BitSet} instances that are used with the {@link DocumentSubsetReader}.
5254 * It is bounded by memory size and access time.
5355 *
56+ * DLS uses {@link BitSet} instances to track which documents should be visible to the user ("live") and which should not ("dead").
57+ * This means that there is a bit for each document in a Lucene index (ES shard).
58+ * Consequently, an index with 10 million document will use more than 1Mb of bitset memory for every unique DLS query, and an index
59+ * with 1 billion documents will use more than 100Mb of memory per DLS query.
60+ * Because DLS supports templating queries based on user metadata, there may be many distinct queries in use for each index, even if
61+ * there is only a single active role.
62+ *
63+ * The primary benefit of the cache is to avoid recalculating the "live docs" (visible documents) when a user performs multiple
64+ * consecutive queries across one or more large indices. Given the memory examples above, the cache is only useful if it can hold at
65+ * least 1 large (100Mb or more ) {@code BitSet} during a user's active session, and ideally should be capable of support multiple
66+ * simultaneous users with distinct DLS queries.
67+ *
68+ * For this reason the default memory usage (weight) for the cache set to 10% of JVM heap ({@link #CACHE_SIZE_SETTING}), so that it
69+ * automatically scales with the size of the Elasticsearch deployment, and can provide benefit to most use cases without needing
70+ * customisation. On a 32Gb heap, a 10% cache would be 3.2Gb which is large enough to store BitSets representing 25 billion docs.
71+ *
72+ * However, because queries can be templated by user metadata and that metadata can change frequently, it is common for the
73+ * effetively lifetime of a single DLS query to be relatively short. We do not want to sacrifice 10% of heap to a cache that is storing
74+ * BitSets that are not longer needed, so we set the TTL on this cache to be 2 hours ({@link #CACHE_TTL_SETTING}). This time has been
75+ * chosen so that it will retain BitSets that are in active use during a user's session, but not be an ongoing drain on memory.
76+ *
5477 * @see org.elasticsearch.index.cache.bitset.BitsetFilterCache
5578 */
5679public final class DocumentSubsetBitsetCache implements IndexReader .ClosedListener , Closeable , Accountable {
5780
5881 /**
59- * The TTL defaults to 1 week. We depend on the {@code max_bytes} setting to keep the cache to a sensible size, by evicting LRU
60- * entries, however there is benefit in reclaiming memory by expiring bitsets that have not be used for some period of time.
61- * Because {@link org.elasticsearch.xpack.core.security.authz.permission.IndicesPermission.Group#query} can be templated, it is
62- * not uncommon for a query to only be used for a relatively short period of time (e.g. because a user's metadata changed, or because
63- * that user is an infrequent user of Elasticsearch). This access time expiry helps free up memory in those circumstances even if the
64- * cache is never filled.
82+ * The TTL defaults to 2 hours. We default to a large cache size ({@link #CACHE_SIZE_SETTING}), and aggressively
83+ * expire unused entries so that the cache does not hold on to memory unnecessarily.
6584 */
6685 static final Setting <TimeValue > CACHE_TTL_SETTING =
67- Setting .timeSetting ("xpack.security.dls.bitset.cache.ttl" , TimeValue .timeValueHours (24 * 7 ), Property .NodeScope );
86+ Setting .timeSetting ("xpack.security.dls.bitset.cache.ttl" , TimeValue .timeValueHours (2 ), Property .NodeScope );
6887
69- static final Setting <ByteSizeValue > CACHE_SIZE_SETTING = Setting .byteSizeSetting ("xpack.security.dls.bitset.cache.size" ,
70- new ByteSizeValue (50 , ByteSizeUnit .MB ), Property .NodeScope );
88+ /**
89+ * The size defaults to 10% of heap so that it automatically scales up with larger node size
90+ */
91+ static final Setting <ByteSizeValue > CACHE_SIZE_SETTING = Setting .memorySizeSetting ("xpack.security.dls.bitset.cache.size" ,
92+ "10%" , Property .NodeScope );
7193
7294 private static final BitSet NULL_MARKER = new FixedBitSet (0 );
7395
@@ -85,8 +107,10 @@ public final class DocumentSubsetBitsetCache implements IndexReader.ClosedListen
85107 private final ReleasableLock cacheModificationLock ;
86108 private final ExecutorService cleanupExecutor ;
87109
110+ private final long maxWeightBytes ;
88111 private final Cache <BitsetCacheKey , BitSet > bitsetCache ;
89112 private final Map <IndexReader .CacheKey , Set <BitsetCacheKey >> keysByIndex ;
113+ private final AtomicLong cacheFullWarningTime ;
90114
91115 public DocumentSubsetBitsetCache (Settings settings , ThreadPool threadPool ) {
92116 this (settings , threadPool .executor (ThreadPool .Names .GENERIC ));
@@ -106,15 +130,16 @@ protected DocumentSubsetBitsetCache(Settings settings, ExecutorService cleanupEx
106130 this .cleanupExecutor = cleanupExecutor ;
107131
108132 final TimeValue ttl = CACHE_TTL_SETTING .get (settings );
109- final ByteSizeValue size = CACHE_SIZE_SETTING .get (settings );
133+ this . maxWeightBytes = CACHE_SIZE_SETTING .get (settings ). getBytes ( );
110134 this .bitsetCache = CacheBuilder .<BitsetCacheKey , BitSet >builder ()
111135 .setExpireAfterAccess (ttl )
112- .setMaximumWeight (size . getBytes () )
136+ .setMaximumWeight (maxWeightBytes )
113137 .weigher ((key , bitSet ) -> bitSet == NULL_MARKER ? 0 : bitSet .ramBytesUsed ())
114138 .removalListener (this ::onCacheEviction )
115139 .build ();
116140
117141 this .keysByIndex = new ConcurrentHashMap <>();
142+ this .cacheFullWarningTime = new AtomicLong (0 );
118143 }
119144
120145 @ Override
@@ -214,7 +239,17 @@ public BitSet getBitSet(final Query query, final LeafReaderContext context) thro
214239 // A cache loader is not allowed to return null, return a marker object instead.
215240 return NULL_MARKER ;
216241 } else {
217- return BitSet .of (s .iterator (), context .reader ().maxDoc ());
242+ final BitSet bs = BitSet .of (s .iterator (), context .reader ().maxDoc ());
243+ final long bitSetBytes = bs .ramBytesUsed ();
244+ if (bitSetBytes > this .maxWeightBytes ) {
245+ logger .warn ("built a DLS BitSet that uses [{}] bytes; the DLS BitSet cache has a maximum size of [{}] bytes;" +
246+ " this object cannot be cached and will need to be rebuilt for each use;" +
247+ " consider increasing the value of [{}]" ,
248+ bitSetBytes , maxWeightBytes , CACHE_SIZE_SETTING .getKey ());
249+ } else if (bitSetBytes + bitsetCache .weight () > maxWeightBytes ) {
250+ maybeLogCacheFullWarning ();
251+ }
252+ return bs ;
218253 }
219254 });
220255 if (bitSet == NULL_MARKER ) {
@@ -225,6 +260,20 @@ public BitSet getBitSet(final Query query, final LeafReaderContext context) thro
225260 }
226261 }
227262
263+ private void maybeLogCacheFullWarning () {
264+ final long nextLogTime = cacheFullWarningTime .get ();
265+ final long now = System .currentTimeMillis ();
266+ if (nextLogTime > now ) {
267+ return ;
268+ }
269+ final long nextCheck = now + TimeUnit .MINUTES .toMillis (30 );
270+ if (cacheFullWarningTime .compareAndSet (nextLogTime , nextCheck )) {
271+ logger .info (
272+ "the Document Level Security BitSet cache is full which may impact performance; consider increasing the value of [{}]" ,
273+ CACHE_SIZE_SETTING .getKey ());
274+ }
275+ }
276+
228277 public static List <Setting <?>> getSettings () {
229278 return Arrays .asList (CACHE_TTL_SETTING , CACHE_SIZE_SETTING );
230279 }
0 commit comments