Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ public class CSMMetrics {
private @Metric MutableCounterLong numStartTransactionVerifyFailures;
private @Metric MutableCounterLong numContainerNotOpenVerifyFailures;

private @Metric MutableRate applyTransaction;
private @Metric MutableRate writeStateMachineData;

public CSMMetrics() {
int numCmdTypes = ContainerProtos.Type.values().length;
this.opsLatency = new MutableRate[numCmdTypes];
Expand Down Expand Up @@ -186,6 +189,10 @@ public long getNumBytesCommittedCount() {
return numBytesCommittedCount.value();
}

public MutableRate getApplyTransactionLatency() {
return applyTransaction;
}

public void incPipelineLatency(ContainerProtos.Type type, long latencyNanos) {
opsLatency[type.ordinal()].add(latencyNanos);
transactionLatency.add(latencyNanos);
Expand All @@ -199,6 +206,13 @@ public void incNumContainerNotOpenVerifyFailures() {
numContainerNotOpenVerifyFailures.incr();
}

public void recordApplyTransactionCompletion(long latencyNanos) {
applyTransaction.add(latencyNanos);
}

public void recordWriteStateMachineCompletion(long latencyNanos) {
writeStateMachineData.add(latencyNanos);
}

public void unRegister() {
MetricsSystem ms = DefaultMetricsSystem.instance();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,8 @@ private ExecutorService getCommandExecutor(
}

private CompletableFuture<Message> handleWriteChunk(
ContainerCommandRequestProto requestProto, long entryIndex, long term) {
ContainerCommandRequestProto requestProto, long entryIndex, long term,
long startTime) {
final WriteChunkRequestProto write = requestProto.getWriteChunk();
RaftServer server = ratisServer.getServer();
Preconditions.checkState(server instanceof RaftServerProxy);
Expand Down Expand Up @@ -461,6 +462,8 @@ private CompletableFuture<Message> handleWriteChunk(
write.getBlockID() + " logIndex " + entryIndex + " chunkName " +
write.getChunkData().getChunkName());
raftFuture.complete(r::toByteString);
metrics.recordWriteStateMachineCompletion(
Time.monotonicNowNanos() - startTime);
}

writeChunkFutureMap.remove(entryIndex);
Expand All @@ -477,6 +480,7 @@ private CompletableFuture<Message> handleWriteChunk(
public CompletableFuture<Message> writeStateMachineData(LogEntryProto entry) {
try {
metrics.incNumWriteStateMachineOps();
long writeStateMachineStartTime = Time.monotonicNowNanos();
ContainerCommandRequestProto requestProto =
getContainerCommandRequestProto(
entry.getStateMachineLogEntry().getLogData());
Expand All @@ -493,7 +497,7 @@ public CompletableFuture<Message> writeStateMachineData(LogEntryProto entry) {
switch (cmdType) {
case WriteChunk:
return handleWriteChunk(requestProto, entry.getIndex(),
entry.getTerm());
entry.getTerm(), writeStateMachineStartTime);
default:
throw new IllegalStateException("Cmd Type:" + cmdType
+ " should not have state machine data");
Expand Down Expand Up @@ -673,6 +677,7 @@ public CompletableFuture<Message> applyTransaction(TransactionContext trx) {
.setTerm(trx.getLogEntry().getTerm())
.setLogIndex(index);

long applyTxnStartTime = Time.monotonicNowNanos();
try {
applyTransactionSemaphore.acquire();
metrics.incNumApplyTransactionsOps();
Expand Down Expand Up @@ -740,7 +745,11 @@ public CompletableFuture<Message> applyTransaction(TransactionContext trx) {
}
}
return applyTransactionFuture;
}).whenComplete((r, t) -> applyTransactionSemaphore.release());
}).whenComplete((r, t) -> {
applyTransactionSemaphore.release();
metrics.recordApplyTransactionCompletion(
Time.monotonicNowNanos() - applyTxnStartTime);
});
return applyTransactionFuture;
} catch (IOException | InterruptedException e) {
metrics.incNumApplyTransactionsFails();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ HDFS-SITE.XML_dfs.datanode.address=0.0.0.0:1019
HDFS-SITE.XML_dfs.datanode.http.address=0.0.0.0:1012
CORE-SITE.XML_dfs.data.transfer.protection=authentication
CORE-SITE.XML_hadoop.security.authentication=kerberos
COER-SITE.XML_hadoop.security.auth_to_local=RULE:[2:$1@$0](.*@EXAMPLE.COM)s/@.*///L
CORE-SITE.XML_hadoop.security.auth_to_local=RULE:[2:$1@$0](.*@EXAMPLE.COM)s/@.*///L
CORE-SITE.XML_hadoop.security.key.provider.path=kms://http@kms:9600/kms

#temporary disable authorization as org.apache.hadoop.yarn.server.api.ResourceTrackerPB is not properly annotated to support it
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.hadoop.ozone.container.common.transport.server.ratis;

import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
import static org.apache.hadoop.test.MetricsAsserts.getDoubleGauge;
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;

import java.io.File;
Expand Down Expand Up @@ -49,6 +50,8 @@
import org.apache.hadoop.hdds.conf.OzoneConfiguration;

import static org.apache.ratis.rpc.SupportedRpcType.GRPC;
import static org.junit.Assert.assertTrue;

import org.apache.ratis.protocol.RaftGroupId;
import org.apache.ratis.util.function.CheckedBiConsumer;

Expand Down Expand Up @@ -118,6 +121,12 @@ static void runContainerStateMachineMetrics(
assertCounter("NumStartTransactionVerifyFailures", 0L, metric);
assertCounter("NumContainerNotOpenVerifyFailures", 0L, metric);
assertCounter("WriteChunkNumOps", 0L, metric);
double applyTransactionLatency = getDoubleGauge(
"ApplyTransactionAvgTime", metric);
assertTrue(applyTransactionLatency == 0.0);
double writeStateMachineLatency = getDoubleGauge(
"WriteStateMachineDataAvgTime", metric);
assertTrue(writeStateMachineLatency == 0.0);

// Write Chunk
BlockID blockID = ContainerTestHelper.getTestBlockID(ContainerTestHelper.
Expand Down Expand Up @@ -152,6 +161,13 @@ static void runContainerStateMachineMetrics(
RaftGroupId.valueOf(pipeline.getId().getId()).toString());
assertCounter("NumQueryStateMachineOps", 1L, metric);
assertCounter("NumApplyTransactionOps", 1L, metric);
applyTransactionLatency = getDoubleGauge(
"ApplyTransactionAvgTime", metric);
assertTrue(applyTransactionLatency > 0.0);
writeStateMachineLatency = getDoubleGauge(
"WriteStateMachineDataAvgTime", metric);
assertTrue(writeStateMachineLatency > 0.0);

} finally {
if (client != null) {
client.close();
Expand Down