2121
2222import org .apache .lucene .index .IndexCommit ;
2323import org .apache .lucene .index .IndexDeletionPolicy ;
24- import org .apache . lucene . index .SnapshotDeletionPolicy ;
24+ import org .elasticsearch . index .seqno . SequenceNumbers ;
2525import org .elasticsearch .index .translog .Translog ;
2626import org .elasticsearch .index .translog .TranslogDeletionPolicy ;
2727
2828import java .io .IOException ;
2929import java .util .List ;
30+ import java .util .Map ;
31+ import java .util .function .LongSupplier ;
3032
3133/**
3234 * An {@link IndexDeletionPolicy} that coordinates between Lucene's commits and the retention of translog generation files,
3335 * making sure that all translog files that are needed to recover from the Lucene commit are not deleted.
36+ * <p>
37+ * In particular, this policy will delete index commits whose max sequence number is at most
38+ * the current global checkpoint except the index commit which has the highest max sequence number among those.
3439 */
35- class CombinedDeletionPolicy extends IndexDeletionPolicy {
36-
40+ final class CombinedDeletionPolicy extends IndexDeletionPolicy {
3741 private final TranslogDeletionPolicy translogDeletionPolicy ;
3842 private final EngineConfig .OpenMode openMode ;
43+ private final LongSupplier globalCheckpointSupplier ;
3944
40- private final SnapshotDeletionPolicy indexDeletionPolicy ;
41-
42- CombinedDeletionPolicy (SnapshotDeletionPolicy indexDeletionPolicy , TranslogDeletionPolicy translogDeletionPolicy ,
43- EngineConfig .OpenMode openMode ) {
44- this .indexDeletionPolicy = indexDeletionPolicy ;
45- this .translogDeletionPolicy = translogDeletionPolicy ;
45+ CombinedDeletionPolicy (EngineConfig .OpenMode openMode , TranslogDeletionPolicy translogDeletionPolicy ,
46+ LongSupplier globalCheckpointSupplier ) {
4647 this .openMode = openMode ;
48+ this .translogDeletionPolicy = translogDeletionPolicy ;
49+ this .globalCheckpointSupplier = globalCheckpointSupplier ;
4750 }
4851
4952 @ Override
5053 public void onInit (List <? extends IndexCommit > commits ) throws IOException {
51- indexDeletionPolicy .onInit (commits );
5254 switch (openMode ) {
5355 case CREATE_INDEX_AND_TRANSLOG :
56+ assert commits .isEmpty () : "index is created, but we have commits" ;
5457 break ;
5558 case OPEN_INDEX_CREATE_TRANSLOG :
5659 assert commits .isEmpty () == false : "index is opened, but we have no commits" ;
60+ // When an engine starts with OPEN_INDEX_CREATE_TRANSLOG, a new fresh index commit will be created immediately.
61+ // We therefore can simply skip processing here as `onCommit` will be called right after with a new commit.
5762 break ;
5863 case OPEN_INDEX_AND_TRANSLOG :
5964 assert commits .isEmpty () == false : "index is opened, but we have no commits" ;
60- setLastCommittedTranslogGeneration (commits );
65+ onCommit (commits );
6166 break ;
6267 default :
6368 throw new IllegalArgumentException ("unknown openMode [" + openMode + "]" );
@@ -66,24 +71,56 @@ public void onInit(List<? extends IndexCommit> commits) throws IOException {
6671
6772 @ Override
6873 public void onCommit (List <? extends IndexCommit > commits ) throws IOException {
69- indexDeletionPolicy .onCommit (commits );
70- setLastCommittedTranslogGeneration (commits );
74+ final int keptPosition = indexOfKeptCommits (commits );
75+ for (int i = 0 ; i < keptPosition ; i ++) {
76+ commits .get (i ).delete ();
77+ }
78+ updateTranslogDeletionPolicy (commits .get (keptPosition ), commits .get (commits .size () - 1 ));
7179 }
7280
73- private void setLastCommittedTranslogGeneration (List <? extends IndexCommit > commits ) throws IOException {
74- // when opening an existing lucene index, we currently always open the last commit.
75- // we therefore use the translog gen as the one that will be required for recovery
76- final IndexCommit indexCommit = commits .get (commits .size () - 1 );
77- assert indexCommit .isDeleted () == false : "last commit is deleted" ;
78- long minGen = Long .parseLong (indexCommit .getUserData ().get (Translog .TRANSLOG_GENERATION_KEY ));
79- translogDeletionPolicy .setMinTranslogGenerationForRecovery (minGen );
80- }
81+ private void updateTranslogDeletionPolicy (final IndexCommit minRequiredCommit , final IndexCommit lastCommit ) throws IOException {
82+ assert minRequiredCommit .isDeleted () == false : "The minimum required commit must not be deleted" ;
83+ final long minRequiredGen = Long .parseLong (minRequiredCommit .getUserData ().get (Translog .TRANSLOG_GENERATION_KEY ));
84+
85+ assert lastCommit .isDeleted () == false : "The last commit must not be deleted" ;
86+ final long lastGen = Long .parseLong (lastCommit .getUserData ().get (Translog .TRANSLOG_GENERATION_KEY ));
8187
82- public SnapshotDeletionPolicy getIndexDeletionPolicy () {
83- return indexDeletionPolicy ;
88+ assert minRequiredGen <= lastGen : "minRequiredGen must not be greater than lastGen" ;
89+ translogDeletionPolicy .setTranslogGenerationOfLastCommit (lastGen );
90+ translogDeletionPolicy .setMinTranslogGenerationForRecovery (minRequiredGen );
8491 }
8592
86- public TranslogDeletionPolicy getTranslogDeletionPolicy () {
87- return translogDeletionPolicy ;
93+ /**
94+ * Find the highest index position of a safe index commit whose max sequence number is not greater than the global checkpoint.
95+ * Index commits with different translog UUID will be filtered out as they don't belong to this engine.
96+ */
97+ private int indexOfKeptCommits (List <? extends IndexCommit > commits ) throws IOException {
98+ final long currentGlobalCheckpoint = globalCheckpointSupplier .getAsLong ();
99+ final String expectedTranslogUUID = commits .get (commits .size () - 1 ).getUserData ().get (Translog .TRANSLOG_UUID_KEY );
100+
101+ // Commits are sorted by age (the 0th one is the oldest commit).
102+ for (int i = commits .size () - 1 ; i >= 0 ; i --) {
103+ final Map <String , String > commitUserData = commits .get (i ).getUserData ();
104+ // Ignore index commits with different translog uuid.
105+ if (expectedTranslogUUID .equals (commitUserData .get (Translog .TRANSLOG_UUID_KEY )) == false ) {
106+ return i + 1 ;
107+ }
108+ // 5.x commits do not contain MAX_SEQ_NO.
109+ if (commitUserData .containsKey (SequenceNumbers .MAX_SEQ_NO ) == false ) {
110+ return i ;
111+ }
112+ final long maxSeqNoFromCommit = Long .parseLong (commitUserData .get (SequenceNumbers .MAX_SEQ_NO ));
113+ if (maxSeqNoFromCommit <= currentGlobalCheckpoint ) {
114+ return i ;
115+ }
116+ }
117+ /*
118+ * We may reach to this point in these cases:
119+ * 1. In the previous 6.x, we keep only the last commit - which is likely not a safe commit if writes are in progress.
120+ * Thus, after upgrading, we may not find a safe commit until we can reserve one.
121+ * 2. In peer-recovery, if the file-based happens, a replica will be received the latest commit from a primary.
122+ * However, that commit may not be a safe commit if writes are in progress in the primary.
123+ */
124+ return 0 ;
88125 }
89126}
0 commit comments