77
88package org .elasticsearch .xpack .ml .aggs .frequentitemsets ;
99
10+ import org .apache .logging .log4j .LogManager ;
11+ import org .apache .logging .log4j .Logger ;
1012import org .apache .lucene .util .BitSet ;
1113import org .apache .lucene .util .FixedBitSet ;
12- import org .apache .lucene .util .LongsRef ;
1314import org .elasticsearch .core .Releasable ;
1415import org .elasticsearch .core .Releasables ;
16+ import org .elasticsearch .xpack .ml .aggs .frequentitemsets .TransactionStore .TopItemIds ;
1517
1618import java .io .IOException ;
1719import java .util .Arrays ;
3032 * if [a, b] is not in T, [a, b, c] can not be in T either
3133 */
3234class CountingItemSetTraverser implements Releasable {
35+ private static final Logger logger = LogManager .getLogger (CountingItemSetTraverser .class );
3336
3437 // start size and size increment for the occurences stack
3538 private static final int OCCURENCES_SIZE_INCREMENT = 10 ;
@@ -48,13 +51,19 @@ class CountingItemSetTraverser implements Releasable {
4851 // growable bit set from java util
4952 private java .util .BitSet visited ;
5053
51- CountingItemSetTraverser (TransactionStore transactionStore , int cacheTraversalDepth , int cacheNumberOfTransactions , long minCount ) {
54+ CountingItemSetTraverser (
55+ TransactionStore transactionStore ,
56+ TopItemIds topItemIds ,
57+ int cacheTraversalDepth ,
58+ int cacheNumberOfTransactions ,
59+ long minCount
60+ ) {
5261 this .transactionStore = transactionStore ;
5362
5463 boolean success = false ;
5564 try {
5665 // we allocate 2 big arrays, if the 2nd allocation fails, ensure we clean up
57- this .topItemSetTraverser = transactionStore . getTopItemIdTraverser ( );
66+ this .topItemSetTraverser = new ItemSetTraverser ( topItemIds );
5867 this .topTransactionIds = transactionStore .getTopTransactionIds ();
5968 success = true ;
6069 } finally {
@@ -80,11 +89,15 @@ public boolean next(long earlyStopMinCount) throws IOException {
8089 final long totalTransactionCount = transactionStore .getTotalTransactionCount ();
8190
8291 int depth = topItemSetTraverser .getNumberOfItems ();
92+ long occurencesOfSingleItem = transactionStore .getItemCount (topItemSetTraverser .getItemId ());
93+
8394 if (depth == 1 ) {
8495 // at the 1st level, we can take the count directly from the transaction store
85- occurencesStack [0 ] = transactionStore .getItemCount (topItemSetTraverser .getItemId ());
96+ occurencesStack [0 ] = occurencesOfSingleItem ;
97+ return true ;
98+ } else if (occurencesOfSingleItem < earlyStopMinCount ) {
99+ rememberCountInStack (depth , occurencesOfSingleItem );
86100 return true ;
87-
88101 // till a certain depth store results in a cache matrix
89102 } else if (depth < cacheTraversalDepth ) {
90103 // get the cached skip count
@@ -187,7 +200,7 @@ public long getCount() {
187200 /**
188201 * Get the count of the item set without the last item
189202 */
190- public long getPreviousCount () {
203+ public long getParentCount () {
191204 if (topItemSetTraverser .getNumberOfItems () > 1 ) {
192205 return occurencesStack [topItemSetTraverser .getNumberOfItems () - 2 ];
193206 }
@@ -201,7 +214,7 @@ public boolean hasBeenVisited() {
201214 return true ;
202215 }
203216
204- public boolean hasPredecessorBeenVisited () {
217+ public boolean hasParentBeenVisited () {
205218 if (topItemSetTraverser .getNumberOfItems () > 1 ) {
206219 return visited .get (topItemSetTraverser .getNumberOfItems () - 2 );
207220 }
@@ -214,7 +227,7 @@ public void setVisited() {
214227 }
215228 }
216229
217- public void setPredecessorVisited () {
230+ public void setParentVisited () {
218231 if (topItemSetTraverser .getNumberOfItems () > 1 ) {
219232 visited .set (topItemSetTraverser .getNumberOfItems () - 2 );
220233 }
@@ -228,10 +241,15 @@ public int getNumberOfItems() {
228241 }
229242
230243 /**
231- * Get the current item set
244+ *
245+ * Get a bitset representation of the current item set
232246 */
233- public LongsRef getItemSet () {
234- return topItemSetTraverser .getItemSet ();
247+ public ItemSetBitSet getItemSetBitSet () {
248+ return topItemSetTraverser .getItemSetBitSet ();
249+ }
250+
251+ public ItemSetBitSet getParentItemSetBitSet () {
252+ return topItemSetTraverser .getParentItemSetBitSet ();
235253 }
236254
237255 /**
@@ -250,7 +268,7 @@ public boolean atLeaf() {
250268
251269 @ Override
252270 public void close () {
253- Releasables .close (topItemSetTraverser , topTransactionIds );
271+ Releasables .close (topTransactionIds );
254272 }
255273
256274 // remember the count in the stack without tracking push and pop
0 commit comments