Skip to content

Commit aa6c88f

Browse files
authored
Write translog operation bytes to byte stream (#63298)
Currently we add translog operation bytes to an array list and flush them on the next write. Unfortunately, this does not currently play well with our byte pooling which means each operation is backed, at minimum, by a 16KB array. This commit improves memory efficiency for small operations by serializing the operations to an output stream.
1 parent 266ac76 commit aa6c88f

File tree

5 files changed

+58
-59
lines changed

5 files changed

+58
-59
lines changed

server/src/main/java/org/elasticsearch/index/translog/Translog.java

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import org.elasticsearch.common.UUIDs;
2828
import org.elasticsearch.common.bytes.BytesArray;
2929
import org.elasticsearch.common.bytes.BytesReference;
30-
import org.elasticsearch.common.bytes.ReleasableBytesReference;
3130
import org.elasticsearch.common.io.stream.ReleasableBytesStreamOutput;
3231
import org.elasticsearch.common.io.stream.StreamInput;
3332
import org.elasticsearch.common.io.stream.StreamOutput;
@@ -74,6 +73,8 @@
7473
import java.util.stream.Collectors;
7574
import java.util.stream.Stream;
7675

76+
import static org.elasticsearch.index.translog.TranslogConfig.EMPTY_TRANSLOG_BUFFER_SIZE;
77+
7778
/**
7879
* A Translog is a per index shard component that records all non-committed index operations in a durable manner.
7980
* In Elasticsearch there is one Translog instance per {@link org.elasticsearch.index.engine.InternalEngine}.
@@ -118,7 +119,7 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC
118119

119120
// the list of translog readers is guaranteed to be in order of translog generation
120121
private final List<TranslogReader> readers = new ArrayList<>();
121-
private BigArrays bigArrays;
122+
private final BigArrays bigArrays;
122123
protected final ReleasableLock readLock;
123124
protected final ReleasableLock writeLock;
124125
private final Path location;
@@ -509,7 +510,8 @@ TranslogWriter createWriter(long fileGeneration, long initialMinTranslogGen, lon
509510
config.getBufferSize(),
510511
initialMinTranslogGen, initialGlobalCheckpoint,
511512
globalCheckpointSupplier, this::getMinFileGeneration, primaryTermSupplier.getAsLong(), tragedy,
512-
persistedSequenceNumberConsumer);
513+
persistedSequenceNumberConsumer,
514+
bigArrays);
513515
} catch (final IOException e) {
514516
throw new TranslogException(shardId, "failed to create new translog file", e);
515517
}
@@ -525,7 +527,6 @@ TranslogWriter createWriter(long fileGeneration, long initialMinTranslogGen, lon
525527
*/
526528
public Location add(final Operation operation) throws IOException {
527529
final ReleasableBytesStreamOutput out = new ReleasableBytesStreamOutput(bigArrays);
528-
boolean successfullySerialized = false;
529530
try {
530531
final long start = out.position();
531532
out.skip(Integer.BYTES);
@@ -535,9 +536,8 @@ public Location add(final Operation operation) throws IOException {
535536
out.seek(start);
536537
out.writeInt(operationSize);
537538
out.seek(end);
538-
successfullySerialized = true;
539-
try (ReleasableBytesReference bytes = new ReleasableBytesReference(out.bytes(), out);
540-
ReleasableLock ignored = readLock.acquire()) {
539+
final BytesReference bytes = out.bytes();
540+
try (ReleasableLock ignored = readLock.acquire()) {
541541
ensureOpen();
542542
if (operation.primaryTerm() > current.getPrimaryTerm()) {
543543
assert false :
@@ -555,9 +555,7 @@ public Location add(final Operation operation) throws IOException {
555555
closeOnTragicEvent(ex);
556556
throw new TranslogException(shardId, "Failed to write operation [" + operation + "]", ex);
557557
} finally {
558-
if (successfullySerialized == false) {
559-
Releasables.close(out);
560-
}
558+
Releasables.close(out);
561559
}
562560
}
563561

@@ -1893,7 +1891,7 @@ public static String createEmptyTranslog(final Path location,
18931891
Checkpoint.write(channelFactory, checkpointFile, checkpoint, StandardOpenOption.WRITE, StandardOpenOption.CREATE_NEW);
18941892
IOUtils.fsync(checkpointFile, false);
18951893
final TranslogWriter writer = TranslogWriter.create(shardId, uuid, generation, translogFile, channelFactory,
1896-
TranslogConfig.DEFAULT_BUFFER_SIZE, minTranslogGeneration, initialGlobalCheckpoint,
1894+
EMPTY_TRANSLOG_BUFFER_SIZE, minTranslogGeneration, initialGlobalCheckpoint,
18971895
() -> {
18981896
throw new UnsupportedOperationException();
18991897
}, () -> {
@@ -1903,7 +1901,7 @@ public static String createEmptyTranslog(final Path location,
19031901
new TragicExceptionHolder(),
19041902
seqNo -> {
19051903
throw new UnsupportedOperationException();
1906-
});
1904+
}, BigArrays.NON_RECYCLING_INSTANCE);
19071905
writer.close();
19081906
return uuid;
19091907
}

server/src/main/java/org/elasticsearch/index/translog/TranslogConfig.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
public final class TranslogConfig {
3636

3737
public static final ByteSizeValue DEFAULT_BUFFER_SIZE = new ByteSizeValue(1, ByteSizeUnit.MB);
38+
public static final ByteSizeValue EMPTY_TRANSLOG_BUFFER_SIZE = new ByteSizeValue(10, ByteSizeUnit.BYTES);
3839
private final BigArrays bigArrays;
3940
private final IndexSettings indexSettings;
4041
private final ShardId shardId;

server/src/main/java/org/elasticsearch/index/translog/TranslogWriter.java

Lines changed: 44 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,10 @@
3232
import org.elasticsearch.common.collect.Tuple;
3333
import org.elasticsearch.common.io.Channels;
3434
import org.elasticsearch.common.io.DiskIoBufferPool;
35-
import org.elasticsearch.common.lease.Releasable;
35+
import org.elasticsearch.common.io.stream.ReleasableBytesStreamOutput;
3636
import org.elasticsearch.common.lease.Releasables;
3737
import org.elasticsearch.common.unit.ByteSizeValue;
38+
import org.elasticsearch.common.util.BigArrays;
3839
import org.elasticsearch.common.util.concurrent.ReleasableLock;
3940
import org.elasticsearch.core.internal.io.IOUtils;
4041
import org.elasticsearch.index.seqno.SequenceNumbers;
@@ -46,8 +47,6 @@
4647
import java.nio.channels.FileChannel;
4748
import java.nio.file.Path;
4849
import java.nio.file.StandardOpenOption;
49-
import java.util.ArrayDeque;
50-
import java.util.ArrayList;
5150
import java.util.HashMap;
5251
import java.util.Map;
5352
import java.util.Objects;
@@ -61,6 +60,7 @@ public class TranslogWriter extends BaseTranslogReader implements Closeable {
6160
private final ShardId shardId;
6261
private final FileChannel checkpointChannel;
6362
private final Path checkpointPath;
63+
private final BigArrays bigArrays;
6464
// the last checkpoint that was written when the translog was last synced
6565
private volatile Checkpoint lastSyncedCheckpoint;
6666
/* the number of translog operations written to this file */
@@ -87,8 +87,7 @@ public class TranslogWriter extends BaseTranslogReader implements Closeable {
8787

8888
private LongArrayList nonFsyncedSequenceNumbers = new LongArrayList(64);
8989
private final int forceWriteThreshold;
90-
private final ArrayList<ReleasableBytesReference> bufferedOps = new ArrayList<>();
91-
private long bufferedBytes = 0L;
90+
private ReleasableBytesStreamOutput buffer;
9291

9392
private final Map<Long, Tuple<BytesReference, Exception>> seenSequenceNumbers;
9493

@@ -101,8 +100,9 @@ private TranslogWriter(
101100
final Path checkpointPath,
102101
final ByteSizeValue bufferSize,
103102
final LongSupplier globalCheckpointSupplier, LongSupplier minTranslogGenerationSupplier, TranslogHeader header,
104-
TragicExceptionHolder tragedy,
105-
final LongConsumer persistedSequenceNumberConsumer)
103+
final TragicExceptionHolder tragedy,
104+
final LongConsumer persistedSequenceNumberConsumer,
105+
final BigArrays bigArrays)
106106
throws
107107
IOException {
108108
super(initialCheckpoint.generation, channel, path, header);
@@ -123,14 +123,16 @@ private TranslogWriter(
123123
assert initialCheckpoint.trimmedAboveSeqNo == SequenceNumbers.UNASSIGNED_SEQ_NO : initialCheckpoint.trimmedAboveSeqNo;
124124
this.globalCheckpointSupplier = globalCheckpointSupplier;
125125
this.persistedSequenceNumberConsumer = persistedSequenceNumberConsumer;
126+
this.bigArrays = bigArrays;
126127
this.seenSequenceNumbers = Assertions.ENABLED ? new HashMap<>() : null;
127128
this.tragedy = tragedy;
128129
}
129130

130131
public static TranslogWriter create(ShardId shardId, String translogUUID, long fileGeneration, Path file, ChannelFactory channelFactory,
131132
ByteSizeValue bufferSize, final long initialMinTranslogGen, long initialGlobalCheckpoint,
132133
final LongSupplier globalCheckpointSupplier, final LongSupplier minTranslogGenerationSupplier,
133-
final long primaryTerm, TragicExceptionHolder tragedy, LongConsumer persistedSequenceNumberConsumer)
134+
final long primaryTerm, TragicExceptionHolder tragedy,
135+
final LongConsumer persistedSequenceNumberConsumer, final BigArrays bigArrays)
134136
throws IOException {
135137
final Path checkpointFile = file.getParent().resolve(Translog.CHECKPOINT_FILE_NAME);
136138

@@ -155,7 +157,7 @@ public static TranslogWriter create(ShardId shardId, String translogUUID, long f
155157
writerGlobalCheckpointSupplier = globalCheckpointSupplier;
156158
}
157159
return new TranslogWriter(shardId, checkpoint, channel, checkpointChannel, file, checkpointFile, bufferSize,
158-
writerGlobalCheckpointSupplier, minTranslogGenerationSupplier, header, tragedy, persistedSequenceNumberConsumer);
160+
writerGlobalCheckpointSupplier, minTranslogGenerationSupplier, header, tragedy, persistedSequenceNumberConsumer, bigArrays);
159161
} catch (Exception exception) {
160162
// if we fail to bake the file-generation into the checkpoint we stick with the file and once we recover and that
161163
// file exists we remove it. We only apply this logic to the checkpoint.generation+1 any other file with a higher generation
@@ -182,15 +184,17 @@ private synchronized void closeWithTragicEvent(final Exception ex) {
182184
* @return the location the bytes were written to
183185
* @throws IOException if writing to the translog resulted in an I/O exception
184186
*/
185-
public Translog.Location add(final ReleasableBytesReference data, final long seqNo) throws IOException {
187+
public Translog.Location add(final BytesReference data, final long seqNo) throws IOException {
186188
final Translog.Location location;
187189
final long bytesBufferedAfterAdd;
188190
synchronized (this) {
189191
ensureOpen();
192+
if (buffer == null) {
193+
buffer = new ReleasableBytesStreamOutput(bigArrays);
194+
}
190195
final long offset = totalOffset;
191196
totalOffset += data.length();
192-
bufferedBytes += data.length();
193-
bufferedOps.add(data.retain());
197+
data.writeTo(buffer);
194198

195199
assert minSeqNo != SequenceNumbers.NO_OPS_PERFORMED || operationCounter == 0;
196200
assert maxSeqNo != SequenceNumbers.NO_OPS_PERFORMED || operationCounter == 0;
@@ -205,7 +209,7 @@ public Translog.Location add(final ReleasableBytesReference data, final long seq
205209
assert assertNoSeqNumberConflict(seqNo, data);
206210

207211
location = new Translog.Location(generation, offset, data.length());
208-
bytesBufferedAfterAdd = bufferedBytes;
212+
bytesBufferedAfterAdd = buffer.size();
209213
}
210214

211215
if (bytesBufferedAfterAdd >= forceWriteThreshold) {
@@ -335,7 +339,7 @@ public TranslogReader closeIntoReader() throws IOException {
335339
throw ex;
336340
}
337341
// If we reached this point, all of the buffered ops should have been flushed successfully.
338-
assert bufferedOps.size() == 0;
342+
assert buffer == null;
339343
assert checkChannelPositionWhileHandlingException(totalOffset);
340344
assert totalOffset == lastSyncedCheckpoint.offset;
341345
if (closed.compareAndSet(false, true)) {
@@ -372,7 +376,7 @@ public TranslogSnapshot newSnapshot() {
372376
throw new TranslogException(shardId, "exception while syncing before creating a snapshot", e);
373377
}
374378
// If we reached this point, all of the buffered ops should have been flushed successfully.
375-
assert bufferedOps.size() == 0;
379+
assert buffer == null;
376380
assert checkChannelPositionWhileHandlingException(totalOffset);
377381
assert totalOffset == lastSyncedCheckpoint.offset;
378382
return super.newSnapshot();
@@ -398,7 +402,7 @@ final boolean syncUpTo(long offset) throws IOException {
398402
// the lock we should check again since if this code is busy we might have fsynced enough already
399403
final Checkpoint checkpointToSync;
400404
final LongArrayList flushedSequenceNumbers;
401-
final ArrayDeque<ReleasableBytesReference> toWrite;
405+
final ReleasableBytesReference toWrite;
402406
try (ReleasableLock toClose = writeLock.acquire()) {
403407
synchronized (this) {
404408
ensureOpen();
@@ -449,44 +453,39 @@ private void writeBufferedOps(long offset, boolean blockOnExistingWriter) throws
449453
}
450454
}
451455

452-
private synchronized ArrayDeque<ReleasableBytesReference> pollOpsToWrite() {
456+
private synchronized ReleasableBytesReference pollOpsToWrite() {
453457
ensureOpen();
454-
final ArrayDeque<ReleasableBytesReference> operationsToWrite = new ArrayDeque<>(bufferedOps.size());
455-
operationsToWrite.addAll(bufferedOps);
456-
bufferedOps.clear();
457-
bufferedBytes = 0;
458-
return operationsToWrite;
458+
if (this.buffer != null) {
459+
ReleasableBytesStreamOutput toWrite = this.buffer;
460+
this.buffer = null;
461+
return new ReleasableBytesReference(toWrite.bytes(), toWrite);
462+
} else {
463+
return ReleasableBytesReference.wrap(BytesArray.EMPTY);
464+
}
459465
}
460466

461-
private void writeAndReleaseOps(final ArrayDeque<ReleasableBytesReference> operationsToWrite) throws IOException {
462-
try {
467+
private void writeAndReleaseOps(ReleasableBytesReference toWrite) throws IOException {
468+
try (ReleasableBytesReference toClose = toWrite) {
463469
assert writeLock.isHeldByCurrentThread();
464470
ByteBuffer ioBuffer = DiskIoBufferPool.getIoBuffer();
465471

466-
ReleasableBytesReference operation;
467-
while ((operation = operationsToWrite.pollFirst()) != null) {
468-
try (Releasable toClose = operation) {
469-
BytesRefIterator iterator = operation.iterator();
470-
BytesRef current;
471-
while ((current = iterator.next()) != null) {
472-
int currentBytesConsumed = 0;
473-
while (currentBytesConsumed != current.length) {
474-
int nBytesToWrite = Math.min(current.length - currentBytesConsumed, ioBuffer.remaining());
475-
ioBuffer.put(current.bytes, current.offset + currentBytesConsumed, nBytesToWrite);
476-
currentBytesConsumed += nBytesToWrite;
477-
if (ioBuffer.hasRemaining() == false) {
478-
ioBuffer.flip();
479-
writeToFile(ioBuffer);
480-
ioBuffer.clear();
481-
}
482-
}
472+
BytesRefIterator iterator = toWrite.iterator();
473+
BytesRef current;
474+
while ((current = iterator.next()) != null) {
475+
int currentBytesConsumed = 0;
476+
while (currentBytesConsumed != current.length) {
477+
int nBytesToWrite = Math.min(current.length - currentBytesConsumed, ioBuffer.remaining());
478+
ioBuffer.put(current.bytes, current.offset + currentBytesConsumed, nBytesToWrite);
479+
currentBytesConsumed += nBytesToWrite;
480+
if (ioBuffer.hasRemaining() == false) {
481+
ioBuffer.flip();
482+
writeToFile(ioBuffer);
483+
ioBuffer.clear();
483484
}
484485
}
485486
}
486487
ioBuffer.flip();
487488
writeToFile(ioBuffer);
488-
} finally {
489-
Releasables.close(operationsToWrite);
490489
}
491490
}
492491

@@ -550,8 +549,8 @@ private boolean checkChannelPositionWhileHandlingException(long expectedOffset)
550549
public final void close() throws IOException {
551550
if (closed.compareAndSet(false, true)) {
552551
synchronized (this) {
553-
Releasables.closeWhileHandlingException(bufferedOps);
554-
bufferedOps.clear();
552+
Releasables.closeWhileHandlingException(buffer);
553+
buffer = null;
555554
}
556555
IOUtils.close(checkpointChannel, channel);
557556
}

server/src/test/java/org/elasticsearch/index/translog/TranslogDeletionPolicyTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.elasticsearch.common.bytes.ReleasableBytesReference;
2626
import org.elasticsearch.common.collect.Tuple;
2727
import org.elasticsearch.common.lease.Releasable;
28+
import org.elasticsearch.common.util.BigArrays;
2829
import org.elasticsearch.core.internal.io.IOUtils;
2930
import org.elasticsearch.index.shard.ShardId;
3031
import org.elasticsearch.test.ESTestCase;
@@ -89,7 +90,7 @@ private Tuple<List<TranslogReader>, TranslogWriter> createReadersAndWriter() thr
8990
}
9091
writer = TranslogWriter.create(new ShardId("index", "uuid", 0), translogUUID, gen,
9192
tempDir.resolve(Translog.getFilename(gen)), FileChannel::open, TranslogConfig.DEFAULT_BUFFER_SIZE, 1L, 1L, () -> 1L,
92-
() -> 1L, randomNonNegativeLong(), new TragicExceptionHolder(), seqNo -> {});
93+
() -> 1L, randomNonNegativeLong(), new TragicExceptionHolder(), seqNo -> {}, BigArrays.NON_RECYCLING_INSTANCE);
9394
writer = Mockito.spy(writer);
9495
byte[] bytes = new byte[4];
9596
ByteArrayDataOutput out = new ByteArrayDataOutput(bytes);

server/src/test/java/org/elasticsearch/index/translog/TranslogTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1253,7 +1253,7 @@ public void testTranslogWriter() throws IOException {
12531253
final TranslogWriter writer = translog.createWriter(translog.currentFileGeneration() + 1);
12541254
final Set<Long> persistedSeqNos = new HashSet<>();
12551255
persistedSeqNoConsumer.set(persistedSeqNos::add);
1256-
final int numOps = randomIntBetween(8, 128);
1256+
final int numOps = scaledRandomIntBetween(8, 250000);
12571257
final Set<Long> seenSeqNos = new HashSet<>();
12581258
boolean opsHaveValidSequenceNumbers = randomBoolean();
12591259
for (int i = 0; i < numOps; i++) {

0 commit comments

Comments
 (0)