diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/protocolPB/ProtocolMessageMetrics.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/protocolPB/ProtocolMessageMetrics.java new file mode 100644 index 0000000000000..96725f269a124 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/protocolPB/ProtocolMessageMetrics.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.protocolPB; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.hadoop.metrics2.MetricsCollector; +import org.apache.hadoop.metrics2.MetricsInfo; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; +import org.apache.hadoop.metrics2.MetricsSource; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; + +import com.google.protobuf.ProtocolMessageEnum; + +/** + * Metrics to count all the subtypes of a specific message. + */ +public class ProtocolMessageMetrics implements MetricsSource { + + private String name; + + private String description; + + private Map counters = + new ConcurrentHashMap<>(); + + public static ProtocolMessageMetrics create(String name, + String description, ProtocolMessageEnum[] types) { + ProtocolMessageMetrics protocolMessageMetrics = + new ProtocolMessageMetrics(name, description, + types); + return protocolMessageMetrics; + } + + public ProtocolMessageMetrics(String name, String description, + ProtocolMessageEnum[] values) { + this.name = name; + this.description = description; + for (ProtocolMessageEnum value : values) { + counters.put(value, new AtomicLong(0)); + } + } + + public void increment(ProtocolMessageEnum key) { + counters.get(key).incrementAndGet(); + } + + public void register() { + DefaultMetricsSystem.instance() + .register(name, description, this); + } + + public void unregister() { + DefaultMetricsSystem.instance().unregisterSource(name); + } + + @Override + public void getMetrics(MetricsCollector collector, boolean all) { + MetricsRecordBuilder builder = collector.addRecord(name); + counters.forEach((key, value) -> { + builder.addCounter(new MetricName(key.toString(), ""), value.longValue()); + }); + builder.endRecord(); + } + + /** + * Simple metrics info implementation. + */ + public static class MetricName implements MetricsInfo { + private String name; + private String description; + + public MetricName(String name, String description) { + this.name = name; + this.description = description; + } + + @Override + public String name() { + return name; + } + + @Override + public String description() { + return description; + } + } +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/protocolPB/ScmBlockLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/protocolPB/ScmBlockLocationProtocolServerSideTranslatorPB.java index 5c3648e6d347f..bad24cffe58f6 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/protocolPB/ScmBlockLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/protocolPB/ScmBlockLocationProtocolServerSideTranslatorPB.java @@ -17,15 +17,25 @@ */ package org.apache.hadoop.ozone.protocolPB; -import com.google.protobuf.RpcController; -import com.google.protobuf.ServiceException; -import io.opentracing.Scope; +import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos; import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos - .AllocateBlockResponse; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos; +import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateBlockResponse; +import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateScmBlockRequestProto; +import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.AllocateScmBlockResponseProto; +import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.DeleteKeyBlocksResultProto; +import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.DeleteScmKeyBlocksRequestProto; +import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.DeleteScmKeyBlocksResponseProto; +import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.SCMBlockLocationRequest; +import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.SCMBlockLocationResponse; +import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.SortDatanodesRequestProto; +import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.SortDatanodesResponseProto; +import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.Status; import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.container.common.helpers.AllocatedBlock; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; @@ -34,34 +44,15 @@ import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; import org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolPB; import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolPB; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos - .AllocateScmBlockRequestProto; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos - .AllocateScmBlockResponseProto; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos - .DeleteKeyBlocksResultProto; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos - .DeleteScmKeyBlocksRequestProto; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos - .DeleteScmKeyBlocksResponseProto; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos - .SCMBlockLocationResponse; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos - .SCMBlockLocationRequest; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos - .Status; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos - .SortDatanodesRequestProto; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos - .SortDatanodesResponseProto; import org.apache.hadoop.hdds.tracing.TracingUtil; import org.apache.hadoop.ozone.common.BlockGroup; import org.apache.hadoop.ozone.common.DeleteBlockGroupResult; -import java.io.IOException; -import java.util.List; -import java.util.stream.Collectors; +import com.google.protobuf.RpcController; +import com.google.protobuf.ServiceException; +import io.opentracing.Scope; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * This class is the server-side translator that forwards requests received on @@ -74,14 +65,22 @@ public final class ScmBlockLocationProtocolServerSideTranslatorPB private final ScmBlockLocationProtocol impl; + private static final Logger LOG = LoggerFactory + .getLogger(ScmBlockLocationProtocolServerSideTranslatorPB.class); + + private final ProtocolMessageMetrics + protocolMessageMetrics; + /** * Creates a new ScmBlockLocationProtocolServerSideTranslatorPB. * * @param impl {@link ScmBlockLocationProtocol} server implementation */ public ScmBlockLocationProtocolServerSideTranslatorPB( - ScmBlockLocationProtocol impl) throws IOException { + ScmBlockLocationProtocol impl, + ProtocolMessageMetrics metrics) throws IOException { this.impl = impl; + this.protocolMessageMetrics = metrics; } private SCMBlockLocationResponse.Builder createSCMBlockResponse( @@ -97,15 +96,45 @@ public SCMBlockLocationResponse send(RpcController controller, SCMBlockLocationRequest request) throws ServiceException { String traceId = request.getTraceID(); + if (LOG.isTraceEnabled()) { + LOG.trace("BlockLocationProtocol {} request is received: {}", + request.getCmdType().toString(), + request.toString().replaceAll("\n", "\\\\n")); + + } else if (LOG.isDebugEnabled()) { + LOG.debug("BlockLocationProtocol {} request is received", + request.getCmdType().toString()); + } + + protocolMessageMetrics.increment(request.getCmdType()); + + try (Scope scope = TracingUtil + .importAndCreateScope( + "ScmBlockLocationProtocol." + request.getCmdType(), + request.getTraceID())) { + SCMBlockLocationResponse response = + processMessage(request, traceId); + + if (LOG.isTraceEnabled()) { + LOG.trace( + "BlockLocationProtocol {} request is processed. Response: " + + "{}", + request.getCmdType().toString(), + response.toString().replaceAll("\n", "\\\\n")); + } + return response; + } + } + + private SCMBlockLocationResponse processMessage( + SCMBlockLocationRequest request, String traceId) throws ServiceException { SCMBlockLocationResponse.Builder response = createSCMBlockResponse( request.getCmdType(), traceId); response.setSuccess(true); response.setStatus(Status.OK); - try(Scope scope = TracingUtil - .importAndCreateScope("ScmBlockLocationProtocol."+request.getCmdType(), - request.getTraceID())) { + try { switch (request.getCmdType()) { case AllocateScmBlock: response.setAllocateScmBlockResponse( @@ -125,7 +154,7 @@ public SCMBlockLocationResponse send(RpcController controller, break; default: // Should never happen - throw new IOException("Unknown Operation "+request.getCmdType()+ + throw new IOException("Unknown Operation " + request.getCmdType() + " in ScmBlockLocationProtocol"); } } catch (IOException e) { @@ -135,6 +164,7 @@ public SCMBlockLocationResponse send(RpcController controller, response.setMessage(e.getMessage()); } } + return response.build(); } @@ -182,12 +212,12 @@ public DeleteScmKeyBlocksResponseProto deleteScmKeyBlocks( .map(BlockGroup::getFromProto).collect(Collectors.toList()); final List results = impl.deleteKeyBlocks(infoList); - for (DeleteBlockGroupResult result: results) { + for (DeleteBlockGroupResult result : results) { DeleteKeyBlocksResultProto.Builder deleteResult = DeleteKeyBlocksResultProto - .newBuilder() - .setObjectKey(result.getObjectKey()) - .addAllBlockResults(result.getBlockResultProtoList()); + .newBuilder() + .setObjectKey(result.getObjectKey()) + .addAllBlockResults(result.getBlockResultProtoList()); resp.addResults(deleteResult.build()); } return resp.build(); diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index 9987415a1c557..22f1a3c727983 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -331,7 +331,7 @@ hdds.prometheus.endpoint.enabled - false + true OZONE, MANAGEMENT Enable prometheus compatible metric page on the HTTP servers. diff --git a/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigFileGenerator.java b/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigFileGenerator.java index e9e88a0898805..64c20acf23f7e 100644 --- a/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigFileGenerator.java +++ b/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigFileGenerator.java @@ -93,15 +93,16 @@ public boolean process(Set annotations, } } - FileObject resource = filer - .createResource(StandardLocation.CLASS_OUTPUT, "", - OUTPUT_FILE_NAME); + } + FileObject resource = filer + .createResource(StandardLocation.CLASS_OUTPUT, "", + OUTPUT_FILE_NAME); - try (Writer writer = new OutputStreamWriter( - resource.openOutputStream(), StandardCharsets.UTF_8)) { - appender.write(writer); - } + try (Writer writer = new OutputStreamWriter( + resource.openOutputStream(), StandardCharsets.UTF_8)) { + appender.write(writer); } + } catch (IOException e) { processingEnv.getMessager().printMessage(Kind.ERROR, "Can't generate the config file from annotation: " + e.getMessage()); @@ -109,5 +110,4 @@ public boolean process(Set annotations, return false; } - } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/BaseHttpServer.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/BaseHttpServer.java index 906790f8dea6c..990d89dc0cb5c 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/BaseHttpServer.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/BaseHttpServer.java @@ -26,6 +26,7 @@ import org.apache.hadoop.http.HttpServer2; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.net.NetUtils; + import org.eclipse.jetty.webapp.WebAppContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -92,8 +93,9 @@ public BaseHttpServer(Configuration conf, String name) throws IOException { httpServer = builder.build(); httpServer.addServlet("conf", "/conf", HddsConfServlet.class); + httpServer.addServlet("logstream", "/logstream", LogStreamServlet.class); prometheusSupport = - conf.getBoolean(HddsConfigKeys.HDDS_PROMETHEUS_ENABLED, false); + conf.getBoolean(HddsConfigKeys.HDDS_PROMETHEUS_ENABLED, true); profilerSupport = conf.getBoolean(HddsConfigKeys.HDDS_PROFILER_ENABLED, false); diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/LogStreamServlet.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/LogStreamServlet.java new file mode 100644 index 0000000000000..1869c8b19ec5e --- /dev/null +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/LogStreamServlet.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.server; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import java.io.IOException; + +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.PatternLayout; +import org.apache.log4j.WriterAppender; + +/** + * Servlet to stream the current logs to the response. + */ +public class LogStreamServlet extends HttpServlet { + + private static final String PATTERN = "%d [%p|%c|%C{1}] %m%n"; + + @Override + protected void doGet(HttpServletRequest req, HttpServletResponse resp) + throws ServletException, IOException { + + WriterAppender appender = + new WriterAppender(new PatternLayout(PATTERN), resp.getWriter()); + appender.setThreshold(Level.TRACE); + + try { + Logger.getRootLogger().addAppender(appender); + try { + Thread.sleep(Integer.MAX_VALUE); + } catch (InterruptedException e) { + //interrupted + } + } finally { + Logger.getRootLogger().removeAppender(appender); + } + } + +} diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/PrometheusMetricsSink.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/PrometheusMetricsSink.java index 94fa9b83845b4..14ced45d572a0 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/PrometheusMetricsSink.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/PrometheusMetricsSink.java @@ -112,6 +112,10 @@ public String prometheusName(String recordName, String baseName = StringUtils.capitalize(recordName) + StringUtils.capitalize(metricName); + return normalizeName(baseName); + } + + public static String normalizeName(String baseName) { String[] parts = SPLIT_PATTERN.split(baseName); String result = String.join("_", parts).toLowerCase(); return REPLACE_PATTERN.matcher(result).replaceAll("_"); diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventQueue.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventQueue.java index 1a6555c140930..91e0153e357c3 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventQueue.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventQueue.java @@ -23,6 +23,8 @@ import org.apache.hadoop.util.Time; import com.google.common.base.Preconditions; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,6 +59,8 @@ public class EventQueue implements EventPublisher, AutoCloseable { private boolean isRunning = true; + private static final Gson TRACING_SERIALIZER = new GsonBuilder().create(); + public > void addHandler( EVENT_TYPE event, EventHandler handler) { this.addHandler(event, handler, generateHandlerName(handler)); @@ -129,8 +133,6 @@ public > void addHandler( executors.get(event).get(executor).add(handler); } - - /** * Route an event with payload to the right listener(s). * @@ -159,11 +161,17 @@ public > void fireEvent( for (EventHandler handler : executorAndHandlers.getValue()) { queuedCount.incrementAndGet(); - if (LOG.isDebugEnabled()) { + if (LOG.isTraceEnabled()) { + LOG.debug( + "Delivering event {} to executor/handler {}: {}", + event.getName(), + executorAndHandlers.getKey().getName(), + TRACING_SERIALIZER.toJson(payload).replaceAll("\n", "\\\\n")); + } else if (LOG.isDebugEnabled()) { LOG.debug("Delivering event {} to executor/handler {}: {}", event.getName(), executorAndHandlers.getKey().getName(), - payload); + payload.getClass().getSimpleName()); } executorAndHandlers.getKey() .onMessage(handler, payload, this); @@ -232,6 +240,7 @@ public void processAll(long timeout) { } } } + @Override public void close() { @@ -250,5 +259,4 @@ public void close() { }); } - } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java index cb34f8d4fedc0..6ae1e0206c44c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java @@ -18,21 +18,34 @@ package org.apache.hadoop.hdds.scm.container; -import com.google.common.annotations.VisibleForTesting; -import com.google.protobuf.GeneratedMessage; -import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.StringJoiner; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Consumer; +import java.util.function.Predicate; +import java.util.stream.Collectors; -import org.apache.hadoop.hdds.conf.ConfigType; -import org.apache.hadoop.hdds.conf.ConfigGroup; import org.apache.hadoop.hdds.conf.Config; +import org.apache.hadoop.hdds.conf.ConfigGroup; +import org.apache.hadoop.hdds.conf.ConfigType; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; -import org.apache.hadoop.hdds.scm.container.placement.algorithms - .ContainerPlacementPolicy; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State; +import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementPolicy; import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.server.events.EventPublisher; +import org.apache.hadoop.metrics2.MetricsCollector; +import org.apache.hadoop.metrics2.MetricsInfo; +import org.apache.hadoop.metrics2.MetricsSource; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.ozone.lock.LockManager; import org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand; import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; @@ -42,35 +55,27 @@ import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.Time; -import static org.apache.hadoop.hdds.conf.ConfigTag.*; +import com.google.common.annotations.VisibleForTesting; +import com.google.protobuf.GeneratedMessage; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import static org.apache.hadoop.hdds.conf.ConfigTag.OZONE; +import static org.apache.hadoop.hdds.conf.ConfigTag.SCM; import org.apache.ratis.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.function.Consumer; -import java.util.function.Predicate; -import java.util.stream.Collectors; - /** * Replication Manager (RM) is the one which is responsible for making sure * that the containers are properly replicated. Replication Manager deals only * with Quasi Closed / Closed container. */ -public class ReplicationManager { +public class ReplicationManager implements MetricsSource { private static final Logger LOG = LoggerFactory.getLogger(ReplicationManager.class); + public static final String METRICS_SOURCE_NAME = "SCMReplicationManager"; + /** * Reference to the ContainerManager. */ @@ -140,15 +145,20 @@ public ReplicationManager(final ReplicationManagerConfiguration conf, this.lockManager = lockManager; this.conf = conf; this.running = false; - this.inflightReplication = new HashMap<>(); - this.inflightDeletion = new HashMap<>(); + this.inflightReplication = new ConcurrentHashMap<>(); + this.inflightDeletion = new ConcurrentHashMap<>(); } /** * Starts Replication Monitor thread. */ public synchronized void start() { + if (!isRunning()) { + DefaultMetricsSystem.instance().register(METRICS_SOURCE_NAME, + "SCM Replication manager (closed container replication) related " + + "metrics", + this); LOG.info("Starting Replication Monitor Thread."); running = true; replicationMonitor = new Thread(this::run); @@ -472,6 +482,8 @@ private void forceCloseContainer(final ContainerInfo container, */ private void handleUnderReplicatedContainer(final ContainerInfo container, final Set replicas) { + LOG.debug("Handling underreplicated container: {}", + container.getContainerID()); try { final ContainerID id = container.containerID(); final List deletionInFlight = inflightDeletion @@ -748,6 +760,16 @@ private static boolean compareState(final LifeCycleState containerState, } } + @Override + public void getMetrics(MetricsCollector collector, boolean all) { + collector.addRecord(ReplicationManager.class.getSimpleName()) + .addGauge(ReplicationManagerMetrics.INFLIGHT_REPLICATION, + inflightReplication.size()) + .addGauge(ReplicationManagerMetrics.INFLIGHT_DELETION, + inflightDeletion.size()) + .endRecord(); + } + /** * Wrapper class to hold the InflightAction with its start time. */ @@ -822,4 +844,32 @@ public long getEventTimeout() { return eventTimeout; } } + + /** + * Metric name definitions for Replication manager. + */ + public enum ReplicationManagerMetrics implements MetricsInfo { + + INFLIGHT_REPLICATION("Tracked inflight container replication requests."), + INFLIGHT_DELETION("Tracked inflight container deletion requests."); + + private final String desc; + + ReplicationManagerMetrics(String desc) { + this.desc = desc; + } + + @Override + public String description() { + return desc; + } + + @Override + public String toString() { + return new StringJoiner(", ", this.getClass().getSimpleName() + "{", "}") + .add("name=" + name()) + .add("description=" + desc) + .toString(); + } + } } \ No newline at end of file diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeReportHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeReportHandler.java index f419764a07fb8..71e1b0777138a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeReportHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeReportHandler.java @@ -48,7 +48,6 @@ public void onMessage(NodeReportFromDatanode nodeReportFromDatanode, DatanodeDetails dn = nodeReportFromDatanode.getDatanodeDetails(); Preconditions.checkNotNull(dn, "NodeReport is " + "missing DatanodeDetails."); - LOGGER.trace("Processing node report for dn: {}", dn); nodeManager .processNodeReport(dn, nodeReportFromDatanode.getReport()); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index 7b1edd2241fc0..d3df858e6e6e1 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -17,36 +17,39 @@ */ package org.apache.hadoop.hdds.scm.node; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.base.Strings; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.PipelineReportsProto; +import javax.management.ObjectName; +import java.io.IOException; +import java.net.InetAddress; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ScheduledFuture; +import java.util.stream.Collectors; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.NodeReportProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMRegisteredResponseProto.ErrorCode; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto; +import org.apache.hadoop.hdds.scm.VersionInfo; import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; +import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeStat; import org.apache.hadoop.hdds.scm.net.NetworkTopology; -import org.apache.hadoop.hdds.scm.pipeline.Pipeline; -import org.apache.hadoop.hdds.scm.pipeline.PipelineID; import org.apache.hadoop.hdds.scm.node.states.NodeAlreadyExistsException; import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.pipeline.PipelineID; import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; -import org.apache.hadoop.hdds.scm.VersionInfo; -import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric; -import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeStat; import org.apache.hadoop.hdds.server.events.EventPublisher; -import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.NodeReportProto; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.SCMRegisteredResponseProto - .ErrorCode; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.StorageReportProto; -import org.apache.hadoop.hdds.protocol.proto - .StorageContainerDatanodeProtocolProtos.SCMVersionRequestProto; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.metrics2.util.MBeans; @@ -58,24 +61,14 @@ import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode; import org.apache.hadoop.ozone.protocol.commands.RegisteredCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; - import org.apache.hadoop.util.ReflectionUtils; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.management.ObjectName; -import java.io.IOException; -import java.net.InetAddress; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ScheduledFuture; -import java.util.stream.Collectors; - /** * Maintains information about the Datanodes on SCM side. *

@@ -322,6 +315,15 @@ public Boolean isNodeRegistered(DatanodeDetails datanodeDetails) { @Override public void processNodeReport(DatanodeDetails datanodeDetails, NodeReportProto nodeReport) { + if (LOG.isDebugEnabled()) { + LOG.debug("Processing node report from [datanode={}]", + datanodeDetails.getHostName()); + } + if (LOG.isTraceEnabled()) { + LOG.trace("HB is received from [datanode={}]: {}", + datanodeDetails.getHostName(), + nodeReport.toString().replaceAll("\n", "\\\\n")); + } try { DatanodeInfo datanodeInfo = nodeStateManager.getNode(datanodeDetails); if (nodeReport != null) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java index 35ec2954ce17e..500a8cd8aab02 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java @@ -21,9 +21,14 @@ */ package org.apache.hadoop.hdds.scm.server; -import com.google.common.collect.Maps; -import com.google.protobuf.BlockingService; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -50,28 +55,19 @@ import org.apache.hadoop.ozone.audit.Auditor; import org.apache.hadoop.ozone.audit.SCMAction; import org.apache.hadoop.ozone.common.BlockGroup; -import org.apache.hadoop.hdds.client.BlockID; import org.apache.hadoop.ozone.common.DeleteBlockGroupResult; -import org.apache.hadoop.ozone.protocolPB - .ScmBlockLocationProtocolServerSideTranslatorPB; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.hadoop.ozone.protocolPB.ProtocolMessageMetrics; +import org.apache.hadoop.ozone.protocolPB.ScmBlockLocationProtocolServerSideTranslatorPB; -import java.io.IOException; -import java.net.InetSocketAddress; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import static org.apache.hadoop.hdds.scm.ScmConfigKeys - .OZONE_SCM_BLOCK_CLIENT_ADDRESS_KEY; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys - .OZONE_SCM_HANDLER_COUNT_DEFAULT; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys - .OZONE_SCM_HANDLER_COUNT_KEY; +import com.google.common.collect.Maps; +import com.google.protobuf.BlockingService; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_CLIENT_ADDRESS_KEY; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HANDLER_COUNT_DEFAULT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HANDLER_COUNT_KEY; +import static org.apache.hadoop.hdds.scm.server.StorageContainerManager.startRpcServer; import static org.apache.hadoop.hdds.server.ServerUtils.updateRPCListenAddress; -import static org.apache.hadoop.hdds.scm.server.StorageContainerManager - .startRpcServer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * SCM block protocol is the protocol used by Namenode and OzoneManager to get @@ -89,6 +85,8 @@ public class SCMBlockProtocolServer implements private final OzoneConfiguration conf; private final RPC.Server blockRpcServer; private final InetSocketAddress blockRpcAddress; + private final ProtocolMessageMetrics + protocolMessageMetrics; /** * The RPC server that listens to requests from block service clients. @@ -103,11 +101,18 @@ public SCMBlockProtocolServer(OzoneConfiguration conf, RPC.setProtocolEngine(conf, ScmBlockLocationProtocolPB.class, ProtobufRpcEngine.class); + + protocolMessageMetrics = + ProtocolMessageMetrics.create("ScmBlockLocationProtocol", + "SCM Block location protocol counters", + ScmBlockLocationProtocolProtos.Type.values()); + // SCM Block Service RPC. BlockingService blockProtoPbService = ScmBlockLocationProtocolProtos.ScmBlockLocationProtocolService .newReflectiveBlockingService( - new ScmBlockLocationProtocolServerSideTranslatorPB(this)); + new ScmBlockLocationProtocolServerSideTranslatorPB(this, + protocolMessageMetrics)); final InetSocketAddress scmBlockAddress = HddsServerUtil .getScmBlockClientBindAddress(conf); @@ -137,6 +142,7 @@ public InetSocketAddress getBlockRpcAddress() { } public void start() { + protocolMessageMetrics.register(); LOG.info( StorageContainerManager.buildRpcServerStartMessage( "RPC server for Block Protocol", getBlockRpcAddress())); @@ -145,6 +151,7 @@ public void start() { public void stop() { try { + protocolMessageMetrics.unregister(); LOG.info("Stopping the RPC server for Block Protocol"); getBlockRpcServer().stop(); } catch (Exception ex) { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMBlockProtocolServer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMBlockProtocolServer.java index 25b5b9bfc75c7..e08fdc17ff3f2 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMBlockProtocolServer.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/server/TestSCMBlockProtocolServer.java @@ -24,13 +24,16 @@ import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos; import org.apache.hadoop.hdds.scm.TestUtils; import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.ozone.protocolPB.ProtocolMessageMetrics; import org.apache.hadoop.ozone.protocolPB .ScmBlockLocationProtocolServerSideTranslatorPB; import org.apache.hadoop.test.GenericTestUtils; + import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.mockito.Mockito; import java.io.File; import java.util.ArrayList; @@ -39,7 +42,7 @@ /** * Test class for @{@link SCMBlockProtocolServer}. - * */ + */ public class TestSCMBlockProtocolServer { private OzoneConfiguration config; private SCMBlockProtocolServer server; @@ -64,7 +67,8 @@ public void setUp() throws Exception { } server = scm.getBlockProtocolServer(); - service = new ScmBlockLocationProtocolServerSideTranslatorPB(server); + service = new ScmBlockLocationProtocolServerSideTranslatorPB(server, + Mockito.mock(ProtocolMessageMetrics.class)); } @After diff --git a/hadoop-ozone/common/src/main/bin/ozone b/hadoop-ozone/common/src/main/bin/ozone index 838651c21b19a..e8cda82b9d108 100755 --- a/hadoop-ozone/common/src/main/bin/ozone +++ b/hadoop-ozone/common/src/main/bin/ozone @@ -51,6 +51,7 @@ function hadoop_usage hadoop_add_subcommand "scmcli" client "run the CLI of the Storage Container Manager" hadoop_add_subcommand "sh" client "command line interface for object store operations" hadoop_add_subcommand "s3" client "command line interface for s3 related operations" + hadoop_add_subcommand "insight" client "tool to get runtime opeartion information" hadoop_add_subcommand "version" client "print the version" hadoop_add_subcommand "dtutil" client "operations related to delegation tokens" hadoop_add_subcommand "upgrade" client "HDFS to Ozone in-place upgrade tool" @@ -175,6 +176,11 @@ function ozonecmd_case HADOOP_OPTS="${HADOOP_OPTS} ${HDFS_SCM_CLI_OPTS}" OZONE_RUN_ARTIFACT_NAME="hadoop-hdds-tools" ;; + insight) + HADOOP_CLASSNAME=org.apache.hadoop.ozone.insight.Insight + HADOOP_OPTS="${HADOOP_OPTS} ${HDFS_SCM_CLI_OPTS}" + OZONE_RUN_ARTIFACT_NAME="hadoop-ozone-insight" + ;; version) HADOOP_CLASSNAME=org.apache.hadoop.ozone.util.OzoneVersionInfo OZONE_RUN_ARTIFACT_NAME="hadoop-ozone-common" diff --git a/hadoop-ozone/dev-support/intellij/ozone-site.xml b/hadoop-ozone/dev-support/intellij/ozone-site.xml index d3209c15cb27f..2eb79aa5219ed 100644 --- a/hadoop-ozone/dev-support/intellij/ozone-site.xml +++ b/hadoop-ozone/dev-support/intellij/ozone-site.xml @@ -63,4 +63,8 @@ hdds.datanode.storage.utilization.critical.threshold 0.99 + + hdds.prometheus.endpoint.enabled + true + diff --git a/hadoop-ozone/dist/pom.xml b/hadoop-ozone/dist/pom.xml index a95c1c7ee6716..57b9a0cbd080d 100644 --- a/hadoop-ozone/dist/pom.xml +++ b/hadoop-ozone/dist/pom.xml @@ -136,6 +136,14 @@ cp hadoop-ozone-upgrade.classpath + + org.apache.hadoop + hadoop-ozone-insight + ${ozone.version} + classpath + cp + hadoop-ozone-insight.classpath + @@ -326,6 +334,10 @@ org.apache.hadoop hadoop-ozone-upgrade + + org.apache.hadoop + hadoop-ozone-insight + diff --git a/hadoop-ozone/insight/dev-support/findbugsExcludeFile.xml b/hadoop-ozone/insight/dev-support/findbugsExcludeFile.xml new file mode 100644 index 0000000000000..55abc26301783 --- /dev/null +++ b/hadoop-ozone/insight/dev-support/findbugsExcludeFile.xml @@ -0,0 +1,19 @@ + + + + diff --git a/hadoop-ozone/insight/pom.xml b/hadoop-ozone/insight/pom.xml new file mode 100644 index 0000000000000..80b2f8fad5842 --- /dev/null +++ b/hadoop-ozone/insight/pom.xml @@ -0,0 +1,132 @@ + + + + 4.0.0 + + org.apache.hadoop + hadoop-ozone + 0.5.0-SNAPSHOT + + hadoop-ozone-insight + 0.5.0-SNAPSHOT + Apache Hadoop Ozone Insight Tool + Apache Hadoop Ozone Insight Tool + jar + + + + org.apache.hadoop + hadoop-ozone-ozone-manager + + + org.apache.hadoop + hadoop-ozone-common + + + + org.apache.hadoop + hadoop-hdds-server-scm + + + org.apache.hadoop + hadoop-ozone-client + + + org.apache.hadoop + hadoop-ozone-filesystem + + + org.apache.hadoop + hadoop-hdds-server-framework + + + org.apache.hadoop + hadoop-common + compile + + + org.apache.hadoop + hadoop-hdfs + compile + + + com.sun.xml.bind + jaxb-core + + + javax.xml.bind + jaxb-api + + + javax.activation + activation + + + io.dropwizard.metrics + metrics-core + 3.2.4 + + + org.openjdk.jmh + jmh-core + 1.19 + + + org.openjdk.jmh + jmh-generator-annprocess + 1.19 + + + com.google.code.findbugs + findbugs + 3.0.1 + provided + + + junit + junit + test + + + org.apache.hadoop + hadoop-common + test + test-jar + + + org.apache.hadoop + hadoop-ozone-integration-test + test + test-jar + + + + + + org.codehaus.mojo + findbugs-maven-plugin + + ${basedir}/dev-support/findbugsExcludeFile.xml + + true + 2048 + + + + + diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/BaseInsightPoint.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/BaseInsightPoint.java new file mode 100644 index 0000000000000..a23b876b53d7c --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/BaseInsightPoint.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.StorageUnit; +import org.apache.hadoop.hdds.HddsUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.XceiverClientManager; +import org.apache.hadoop.hdds.scm.client.ContainerOperationClient; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; +import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB; +import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolPB; +import org.apache.hadoop.hdds.server.PrometheusMetricsSink; +import org.apache.hadoop.hdds.tracing.TracingUtil; +import org.apache.hadoop.ipc.Client; +import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.RPC; +import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.insight.LoggerSource.Level; +import org.apache.hadoop.security.UserGroupInformation; + +import com.google.protobuf.ProtocolMessageEnum; +import static org.apache.hadoop.hdds.HddsUtils.getScmAddressForClients; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT; + +/** + * Default implementation of Insight point logic. + */ +public abstract class BaseInsightPoint implements InsightPoint { + + /** + * List the related metrics. + */ + @Override + public List getMetrics() { + return new ArrayList<>(); + } + + /** + * List the related configuration. + */ + @Override + public List getConfigurationClasses() { + return new ArrayList<>(); + } + + /** + * List the related loggers. + * + * @param verbose true if verbose logging is requested. + */ + @Override + public List getRelatedLoggers(boolean verbose) { + List loggers = new ArrayList<>(); + return loggers; + } + + /** + * Create scm client. + */ + public ScmClient createScmClient(OzoneConfiguration ozoneConf) + throws IOException { + + if (!HddsUtils.getHostNameFromConfigKeys(ozoneConf, + ScmConfigKeys.OZONE_SCM_CLIENT_ADDRESS_KEY).isPresent()) { + + throw new IllegalArgumentException( + ScmConfigKeys.OZONE_SCM_CLIENT_ADDRESS_KEY + + " should be set in ozone-site.xml"); + } + + long version = RPC.getProtocolVersion( + StorageContainerLocationProtocolPB.class); + InetSocketAddress scmAddress = + getScmAddressForClients(ozoneConf); + int containerSizeGB = (int) ozoneConf.getStorageSize( + OZONE_SCM_CONTAINER_SIZE, OZONE_SCM_CONTAINER_SIZE_DEFAULT, + StorageUnit.GB); + ContainerOperationClient + .setContainerSizeB(containerSizeGB * OzoneConsts.GB); + + RPC.setProtocolEngine(ozoneConf, StorageContainerLocationProtocolPB.class, + ProtobufRpcEngine.class); + StorageContainerLocationProtocol client = + TracingUtil.createProxy( + new StorageContainerLocationProtocolClientSideTranslatorPB( + RPC.getProxy(StorageContainerLocationProtocolPB.class, version, + scmAddress, UserGroupInformation.getCurrentUser(), + ozoneConf, + NetUtils.getDefaultSocketFactory(ozoneConf), + Client.getRpcTimeout(ozoneConf))), + StorageContainerLocationProtocol.class, ozoneConf); + return new ContainerOperationClient( + client, new XceiverClientManager(ozoneConf)); + } + + /** + * Convenient method to define default log levels. + */ + public Level defaultLevel(boolean verbose) { + return verbose ? Level.TRACE : Level.DEBUG; + } + + /** + * Default metrics for any message type based RPC ServerSide translators. + */ + public void addProtocolMessageMetrics(List metrics, + String prefix, + Component.Type component, + ProtocolMessageEnum[] types) { + + MetricGroupDisplay messageTypeCounters = + new MetricGroupDisplay(component, "Message type counters"); + for (ProtocolMessageEnum type : types) { + String typeName = type.toString(); + MetricDisplay metricDisplay = new MetricDisplay("Number of " + typeName, + prefix + "_" + PrometheusMetricsSink + .normalizeName(typeName)); + messageTypeCounters.addMetrics(metricDisplay); + } + metrics.add(messageTypeCounters); + } + + /** + * Rpc metrics for any hadoop rpc endpoint. + */ + public void addRpcMetrics(List metrics, + Component.Type component, + Map filter) { + MetricGroupDisplay connection = + new MetricGroupDisplay(component, "RPC connections"); + connection.addMetrics(new MetricDisplay("Open connections", + "rpc_num_open_connections", filter)); + connection.addMetrics( + new MetricDisplay("Dropped connections", "rpc_num_dropped_connections", + filter)); + connection.addMetrics( + new MetricDisplay("Received bytes", "rpc_received_bytes", + filter)); + connection.addMetrics( + new MetricDisplay("Sent bytes", "rpc_sent_bytes", + filter)); + metrics.add(connection); + + MetricGroupDisplay queue = new MetricGroupDisplay(component, "RPC queue"); + queue.addMetrics(new MetricDisplay("RPC average queue time", + "rpc_rpc_queue_time_avg_time", filter)); + queue.addMetrics( + new MetricDisplay("RPC call queue length", "rpc_call_queue_length", + filter)); + metrics.add(queue); + + MetricGroupDisplay performance = + new MetricGroupDisplay(component, "RPC performance"); + performance.addMetrics(new MetricDisplay("RPC processing time average", + "rpc_rpc_processing_time_avg_time", filter)); + performance.addMetrics( + new MetricDisplay("Number of slow calls", "rpc_rpc_slow_calls", + filter)); + metrics.add(performance); + } + +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/BaseInsightSubCommand.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/BaseInsightSubCommand.java new file mode 100644 index 0000000000000..95cda4168c5c3 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/BaseInsightSubCommand.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight; + +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Optional; + +import org.apache.hadoop.hdds.HddsUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.ozone.insight.Component.Type; +import org.apache.hadoop.ozone.insight.om.KeyManagerInsight; +import org.apache.hadoop.ozone.insight.om.OmProtocolInsight; +import org.apache.hadoop.ozone.insight.scm.EventQueueInsight; +import org.apache.hadoop.ozone.insight.scm.NodeManagerInsight; +import org.apache.hadoop.ozone.insight.scm.ReplicaManagerInsight; +import org.apache.hadoop.ozone.insight.scm.ScmProtocolBlockLocationInsight; +import org.apache.hadoop.ozone.om.OMConfigKeys; + +import picocli.CommandLine; + +/** + * Parent class for all the insight subcommands. + */ +public class BaseInsightSubCommand { + + @CommandLine.ParentCommand + private Insight insightCommand; + + public InsightPoint getInsight(OzoneConfiguration configuration, + String selection) { + Map insights = createInsightPoints(configuration); + + if (!insights.containsKey(selection)) { + throw new RuntimeException(String + .format("No such component; %s. Available components: %s", selection, + insights.keySet())); + } + return insights.get(selection); + } + + /** + * Utility to get the host base on a component. + */ + public String getHost(OzoneConfiguration conf, Component component) { + if (component.getHostname() != null) { + return "http://" + component.getHostname() + ":" + component.getPort(); + } else if (component.getName() == Type.SCM) { + Optional scmHost = + HddsUtils.getHostNameFromConfigKeys(conf, + ScmConfigKeys.OZONE_SCM_BLOCK_CLIENT_ADDRESS_KEY, + ScmConfigKeys.OZONE_SCM_CLIENT_ADDRESS_KEY); + + return "http://" + scmHost.get() + ":9876"; + } else if (component.getName() == Type.OM) { + Optional omHost = + HddsUtils.getHostNameFromConfigKeys(conf, + OMConfigKeys.OZONE_OM_ADDRESS_KEY); + return "http://" + omHost.get() + ":9874"; + } else { + throw new IllegalArgumentException( + "Component type is not supported: " + component.getName()); + } + + } + + public Map createInsightPoints( + OzoneConfiguration configuration) { + Map insights = new LinkedHashMap<>(); + insights.put("scm.node-manager", new NodeManagerInsight()); + insights.put("scm.replica-manager", new ReplicaManagerInsight()); + insights.put("scm.event-queue", new EventQueueInsight()); + insights.put("scm.protocol.block-location", + new ScmProtocolBlockLocationInsight()); + + insights.put("om.key-manager", new KeyManagerInsight()); + insights.put("om.protocol.client", new OmProtocolInsight()); + + return insights; + } + + public Insight getInsightCommand() { + return insightCommand; + } +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/Component.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/Component.java new file mode 100644 index 0000000000000..261ae49fb8b00 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/Component.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight; + +import java.util.Objects; + +/** + * Identifier an ozone component. + */ +public class Component { + + /** + * The type of the component (eg. scm, s3g...) + */ + private Type name; + + /** + * Unique identifier of the instance (uuid or index). Can be null for + * non-HA server component. + */ + private String id; + + /** + * Hostname of the component. Optional, may help to find the right host + * name. + */ + private String hostname; + + /** + * HTTP service port. Optional. + */ + private int port; + + public Component(Type name) { + this.name = name; + } + + public Component(Type name, String id) { + this.name = name; + this.id = id; + } + + public Component(Type name, String id, String hostname) { + this.name = name; + this.id = id; + this.hostname = hostname; + } + + public Component(Type name, String id, String hostname, int port) { + this.name = name; + this.id = id; + this.hostname = hostname; + this.port = port; + } + + public Type getName() { + return name; + } + + public String getId() { + return id; + } + + public String getHostname() { + return hostname; + } + + public int getPort() { + return port; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Component that = (Component) o; + return Objects.equals(name, that.name) && + Objects.equals(id, that.id); + } + + public String prefix() { + return name + (id != null && id.length() > 0 ? "-" + id : ""); + } + + @Override + public int hashCode() { + return Objects.hash(name, id); + } + + /** + * Ozone component types. + */ + public enum Type { + SCM, OM, DATANODE, S3G, RECON; + } + +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/ConfigurationSubCommand.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/ConfigurationSubCommand.java new file mode 100644 index 0000000000000..e32ecd7cde9fb --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/ConfigurationSubCommand.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight; + +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.conf.Config; +import org.apache.hadoop.hdds.conf.ConfigGroup; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.insight.Component.Type; + +import picocli.CommandLine; + +import java.lang.reflect.Method; +import java.util.concurrent.Callable; + +/** + * Subcommand to show configuration values/documentation. + */ +@CommandLine.Command( + name = "config", + description = "Show configuration for a specific subcomponents", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class ConfigurationSubCommand extends BaseInsightSubCommand + implements Callable { + + @CommandLine.Parameters(description = "Name of the insight point (use list " + + "to check the available options)") + private String insightName; + + @Override + public Void call() throws Exception { + InsightPoint insight = + getInsight(getInsightCommand().createOzoneConfiguration(), insightName); + System.out.println( + "Configuration for `" + insightName + "` (" + insight.getDescription() + + ")"); + System.out.println(); + for (Class clazz : insight.getConfigurationClasses()) { + showConfig(clazz); + + } + return null; + } + + private void showConfig(Class clazz) { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.addResource(getHost(conf, new Component(Type.SCM)) + "/conf"); + ConfigGroup configGroup = + (ConfigGroup) clazz.getAnnotation(ConfigGroup.class); + if (configGroup == null) { + return; + } + + String prefix = configGroup.prefix(); + + for (Method method : clazz.getMethods()) { + if (method.isAnnotationPresent(Config.class)) { + Config config = method.getAnnotation(Config.class); + String key = prefix + "." + config.key(); + System.out.println(">>> " + key); + System.out.println(" default: " + config.defaultValue()); + System.out.println(" current: " + conf.get(key)); + System.out.println(); + System.out.println(config.description()); + System.out.println(); + System.out.println(); + + } + } + + } + +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/Insight.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/Insight.java new file mode 100644 index 0000000000000..690783ee411be --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/Insight.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight; + +import org.apache.hadoop.hdds.cli.GenericCli; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; + +import picocli.CommandLine; + +/** + * Command line utility to check logs/metrics of internal ozone components. + */ +@CommandLine.Command(name = "ozone insight", + hidden = true, description = "Show debug information about a selected " + + "Ozone component", + versionProvider = HddsVersionProvider.class, + subcommands = {ListSubCommand.class, LogSubcommand.class, + MetricsSubCommand.class, ConfigurationSubCommand.class}, + mixinStandardHelpOptions = true) +public class Insight extends GenericCli { + + public static void main(String[] args) throws Exception { + new Insight().run(args); + } + +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/InsightPoint.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/InsightPoint.java new file mode 100644 index 0000000000000..1284cfa958436 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/InsightPoint.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight; + +import java.util.List; + +/** + * Definition of a specific insight points. + */ +public interface InsightPoint { + + /** + * Human readdable description. + */ + String getDescription(); + + /** + * List of the related loggers. + */ + List getRelatedLoggers(boolean verbose); + + /** + * List of the related metrics. + */ + List getMetrics(); + + /** + * List of the configuration classes. + */ + List getConfigurationClasses(); + + + +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/ListSubCommand.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/ListSubCommand.java new file mode 100644 index 0000000000000..8f91398364720 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/ListSubCommand.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight; + +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; + +import picocli.CommandLine; + +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.Callable; + +/** + * Subcommand to list of the available insight points. + */ +@CommandLine.Command( + name = "list", + description = "Show available insight points.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class ListSubCommand extends BaseInsightSubCommand + implements Callable { + + @CommandLine.Parameters(defaultValue = "") + private String insightPrefix; + + @Override + public Void call() throws Exception { + + System.out.println("Available insight points:\n\n"); + + Map insightPoints = + createInsightPoints(new OzoneConfiguration()); + for (Entry entry : insightPoints.entrySet()) { + if (insightPrefix == null || entry.getKey().startsWith(insightPrefix)) { + System.out.println(String.format(" %-33s %s", entry.getKey(), + entry.getValue().getDescription())); + } + } + return null; + } + +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/LogSubcommand.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/LogSubcommand.java new file mode 100644 index 0000000000000..2e8787f2b2629 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/LogSubcommand.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.insight.LoggerSource.Level; + +import org.apache.http.HttpResponse; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.HttpClientBuilder; +import picocli.CommandLine; + +/** + * Subcommand to display log. + */ +@CommandLine.Command( + name = "log", + aliases = "logs", + description = "Show log4j events related to the insight point", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class LogSubcommand extends BaseInsightSubCommand + implements Callable { + + @CommandLine.Parameters(description = "Name of the insight point (use list " + + "to check the available options)") + private String insightName; + + @CommandLine.Option(names = "-v", description = "Enable verbose mode to " + + "show more information / detailed message") + private boolean verbose; + + @Override + public Void call() throws Exception { + OzoneConfiguration conf = + getInsightCommand().createOzoneConfiguration(); + InsightPoint insight = + getInsight(conf, insightName); + + List loggers = insight.getRelatedLoggers(verbose); + + for (LoggerSource logger : loggers) { + setLogLevel(conf, logger.getLoggerName(), logger.getComponent(), + logger.getLevel()); + } + + Set sources = loggers.stream().map(LoggerSource::getComponent) + .collect(Collectors.toSet()); + try { + streamLog(conf, sources, loggers); + } finally { + for (LoggerSource logger : loggers) { + setLogLevel(conf, logger.getLoggerName(), logger.getComponent(), + Level.INFO); + } + } + return null; + } + + private void streamLog(OzoneConfiguration conf, Set sources, + List relatedLoggers) { + List loggers = new ArrayList<>(); + for (Component sourceComponent : sources) { + loggers.add(new Thread( + () -> streamLog(conf, sourceComponent, relatedLoggers))); + } + for (Thread thread : loggers) { + thread.start(); + } + for (Thread thread : loggers) { + try { + thread.join(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + + private void streamLog(OzoneConfiguration conf, Component logComponent, + List loggers) { + HttpClient client = HttpClientBuilder.create().build(); + + HttpGet get = new HttpGet(getHost(conf, logComponent) + "/logstream"); + try { + HttpResponse execute = client.execute(get); + try (BufferedReader bufferedReader = new BufferedReader( + new InputStreamReader(execute.getEntity().getContent(), + StandardCharsets.UTF_8))) { + bufferedReader.lines() + .filter(line -> { + for (LoggerSource logger : loggers) { + if (line.contains(logger.getLoggerName())) { + return true; + } + } + return false; + }) + .map(this::processLogLine) + .map(l -> "[" + logComponent.prefix() + "] " + l) + .forEach(System.out::println); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public String processLogLine(String line) { + Pattern p = Pattern.compile("(.*)"); + Matcher m = p.matcher(line); + StringBuffer sb = new StringBuffer(); + while (m.find()) { + m.appendReplacement(sb, "\n" + m.group(1).replaceAll("\\\\n", "\n")); + } + m.appendTail(sb); + return sb.toString(); + } + + private void setLogLevel(OzoneConfiguration conf, String name, + Component component, LoggerSource.Level level) { + HttpClient client = HttpClientBuilder.create().build(); + + String request = String + .format("/logLevel?log=%s&level=%s", name, + level); + String hostName = getHost(conf, component); + HttpGet get = new HttpGet(hostName + request); + try { + HttpResponse execute = client.execute(get); + if (execute.getStatusLine().getStatusCode() != 200) { + throw new RuntimeException( + "Can't set the log level: " + hostName + " -> HTTP " + execute + .getStatusLine().getStatusCode()); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/LoggerSource.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/LoggerSource.java new file mode 100644 index 0000000000000..180b3e818b5fd --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/LoggerSource.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight; + +import org.apache.hadoop.ozone.insight.Component.Type; + +/** + * Definition of a log source. + */ +public class LoggerSource { + + /** + * Id of the component where the log is generated. + */ + private Component component; + + /** + * Log4j/slf4j logger name. + */ + private String loggerName; + + /** + * Log level. + */ + private Level level; + + public LoggerSource(Component component, String loggerName, Level level) { + this.component = component; + this.loggerName = loggerName; + this.level = level; + } + + public LoggerSource(Type componentType, Class loggerClass, + Level level) { + this(new Component(componentType), loggerClass.getCanonicalName(), level); + } + + public Component getComponent() { + return component; + } + + public String getLoggerName() { + return loggerName; + } + + public Level getLevel() { + return level; + } + + /** + * Log level definition. + */ + public enum Level { + TRACE, DEBUG, INFO, WARN, ERROR + } + +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/MetricDisplay.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/MetricDisplay.java new file mode 100644 index 0000000000000..395c14cca6911 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/MetricDisplay.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight; + +import java.util.HashMap; +import java.util.Map; + +/** + * Definition of one displayable hadoop metrics. + */ +public class MetricDisplay { + + /** + * Prometheus metrics name. + */ + private String id; + + /** + * Human readable definition of the metrhics. + */ + private String description; + + /** + * Prometheus metrics tag to filter out the right metrics. + */ + private Map filter; + + public MetricDisplay(String description, String id) { + this(description, id, new HashMap<>()); + } + + public MetricDisplay(String description, String id, + Map filter) { + this.id = id; + this.description = description; + this.filter = filter; + } + + public String getId() { + return id; + } + + public String getDescription() { + return description; + } + + public Map getFilter() { + return filter; + } + + public boolean checkLine(String line) { + return false; + } +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/MetricGroupDisplay.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/MetricGroupDisplay.java new file mode 100644 index 0000000000000..08fd60c48bbc6 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/MetricGroupDisplay.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.ozone.insight.Component.Type; + +/** + * Definition of a group of metrics which can be displayed. + */ +public class MetricGroupDisplay { + + /** + * List fhe included metrics. + */ + private List metrics = new ArrayList<>(); + + /** + * Name of the component which includes the metrics (scm, om,...). + */ + private Component component; + + /** + * Human readable description. + */ + private String description; + + public MetricGroupDisplay(Component component, String description) { + this.component = component; + this.description = description; + } + + public MetricGroupDisplay(Type componentType, String metricName) { + this(new Component(componentType), metricName); + } + + public List getMetrics() { + return metrics; + } + + public void addMetrics(MetricDisplay item) { + this.metrics.add(item); + } + + public String getDescription() { + return description; + } + + public Component getComponent() { + return component; + } +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/MetricsSubCommand.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/MetricsSubCommand.java new file mode 100644 index 0000000000000..d320c82b18776 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/MetricsSubCommand.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight; + +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; + +import org.apache.http.HttpResponse; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.HttpClientBuilder; +import picocli.CommandLine; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.*; +import java.util.Map.Entry; +import java.util.concurrent.Callable; +import java.util.stream.Collectors; + +/** + * Command line interface to show metrics for a specific component. + */ +@CommandLine.Command( + name = "metrics", + aliases = "metric", + description = "Show available metrics.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class) +public class MetricsSubCommand extends BaseInsightSubCommand + implements Callable { + + @CommandLine.Parameters(description = "Name of the insight point (use list " + + "to check the available options)") + private String insightName; + + @Override + public Void call() throws Exception { + OzoneConfiguration conf = + getInsightCommand().createOzoneConfiguration(); + InsightPoint insight = + getInsight(conf, insightName); + Set sources = + insight.getMetrics().stream().map(MetricGroupDisplay::getComponent) + .collect(Collectors.toSet()); + Map> metrics = getMetrics(conf, sources); + System.out.println( + "Metrics for `" + insightName + "` (" + insight.getDescription() + ")"); + System.out.println(); + for (MetricGroupDisplay group : insight.getMetrics()) { + System.out.println(group.getDescription()); + System.out.println(); + for (MetricDisplay display : group.getMetrics()) { + System.out.println(" " + display.getDescription() + ": " + selectValue( + metrics.get(group.getComponent()), display)); + } + System.out.println(); + System.out.println(); + + } + return null; + } + + private Map> getMetrics(OzoneConfiguration conf, + Collection sources) { + Map> result = new HashMap<>(); + for (Component source : sources) { + result.put(source, getMetrics(conf, source)); + } + return result; + } + + private String selectValue(List metrics, + MetricDisplay metricDisplay) { + for (String line : metrics) { + if (line.startsWith(metricDisplay.getId())) { + boolean filtered = false; + for (Entry filter : metricDisplay.getFilter() + .entrySet()) { + if (!line + .contains(filter.getKey() + "=\"" + filter.getValue() + "\"")) { + filtered = true; + } + } + if (!filtered) { + return line.split(" ")[1]; + } + } + } + return "???"; + } + + private List getMetrics(OzoneConfiguration conf, + Component component) { + HttpClient client = HttpClientBuilder.create().build(); + HttpGet get = new HttpGet(getHost(conf, component) + "/prom"); + try { + HttpResponse execute = client.execute(get); + if (execute.getStatusLine().getStatusCode() != 200) { + throw new RuntimeException( + "Can't read prometheus metrics endpoint" + execute.getStatusLine() + .getStatusCode()); + } + try (BufferedReader bufferedReader = new BufferedReader( + new InputStreamReader(execute.getEntity().getContent(), + StandardCharsets.UTF_8))) { + return bufferedReader.lines().collect(Collectors.toList()); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/datanode/RatisInsight.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/datanode/RatisInsight.java new file mode 100644 index 0000000000000..b87955e8aafa6 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/datanode/RatisInsight.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight.datanode; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.ozone.insight.BaseInsightPoint; +import org.apache.hadoop.ozone.insight.Component; +import org.apache.hadoop.ozone.insight.Component.Type; +import org.apache.hadoop.ozone.insight.InsightPoint; +import org.apache.hadoop.ozone.insight.LoggerSource; + +/** + * Insight definition for datanode/pipline metrics. + */ +public class RatisInsight extends BaseInsightPoint implements InsightPoint { + + private OzoneConfiguration conf; + + public RatisInsight(OzoneConfiguration conf) { + this.conf = conf; + } + + @Override + public List getRelatedLoggers(boolean verbose) { + List result = new ArrayList<>(); + try { + ScmClient scmClient = createScmClient(conf); + Pipeline pipeline = scmClient.listPipelines() + .stream() + .filter(d -> d.getNodes().size() > 1) + .findFirst() + .get(); + for (DatanodeDetails datanode : pipeline.getNodes()) { + Component dn = + new Component(Type.DATANODE, datanode.getUuid().toString(), + datanode.getHostName(), 9882); + result + .add(new LoggerSource(dn, "org.apache.ratis.server.impl", + defaultLevel(verbose))); + } + } catch (IOException e) { + throw new RuntimeException("Can't enumerate required logs", e); + } + + return result; + } + + @Override + public String getDescription() { + return "More information about one ratis datanode ring."; + } + +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/datanode/package-info.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/datanode/package-info.java new file mode 100644 index 0000000000000..97dd495478902 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/datanode/package-info.java @@ -0,0 +1,23 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.insight.datanode; + +/** + * Insight points for the ozone datanodes. + */ \ No newline at end of file diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/om/KeyManagerInsight.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/om/KeyManagerInsight.java new file mode 100644 index 0000000000000..515cf3886403c --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/om/KeyManagerInsight.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight.om; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.ozone.insight.BaseInsightPoint; +import org.apache.hadoop.ozone.insight.Component.Type; +import org.apache.hadoop.ozone.insight.LoggerSource; +import org.apache.hadoop.ozone.insight.MetricDisplay; +import org.apache.hadoop.ozone.insight.MetricGroupDisplay; +import org.apache.hadoop.ozone.om.KeyManagerImpl; + +/** + * Insight implementation for the key management related operations. + */ +public class KeyManagerInsight extends BaseInsightPoint { + + @Override + public List getMetrics() { + List display = new ArrayList<>(); + + MetricGroupDisplay state = + new MetricGroupDisplay(Type.OM, "Key related metrics"); + state + .addMetrics(new MetricDisplay("Number of keys", "om_metrics_num_keys")); + state.addMetrics(new MetricDisplay("Number of key operations", + "om_metrics_num_key_ops")); + + display.add(state); + + MetricGroupDisplay key = + new MetricGroupDisplay(Type.OM, "Key operation stats"); + for (String operation : new String[] {"allocate", "commit", "lookup", + "list", "delete"}) { + key.addMetrics(new MetricDisplay( + "Number of key " + operation + "s (failure + success)", + "om_metrics_num_key_" + operation)); + key.addMetrics( + new MetricDisplay("Number of failed key " + operation + "s", + "om_metrics_num_key_" + operation + "_fails")); + } + display.add(key); + + return display; + } + + @Override + public List getRelatedLoggers(boolean verbose) { + List loggers = new ArrayList<>(); + loggers.add( + new LoggerSource(Type.OM, KeyManagerImpl.class, + defaultLevel(verbose))); + return loggers; + } + + @Override + public String getDescription() { + return "OM Key Manager"; + } + +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/om/OmProtocolInsight.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/om/OmProtocolInsight.java new file mode 100644 index 0000000000000..502ba60dc864c --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/om/OmProtocolInsight.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight.om; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.ozone.insight.BaseInsightPoint; +import org.apache.hadoop.ozone.insight.Component.Type; +import org.apache.hadoop.ozone.insight.LoggerSource; +import org.apache.hadoop.ozone.insight.MetricGroupDisplay; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; +import org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB; + +/** + * Insight definition for the OM RPC server. + */ +public class OmProtocolInsight extends BaseInsightPoint { + + @Override + public List getRelatedLoggers(boolean verbose) { + List loggers = new ArrayList<>(); + loggers.add( + new LoggerSource(Type.OM, + OzoneManagerProtocolServerSideTranslatorPB.class, + defaultLevel(verbose))); + return loggers; + } + + @Override + public List getMetrics() { + List metrics = new ArrayList<>(); + + Map filter = new HashMap<>(); + filter.put("servername", "OzoneManagerService"); + + addRpcMetrics(metrics, Type.OM, filter); + + addProtocolMessageMetrics(metrics, "om_client_protocol", Type.OM, + OzoneManagerProtocolProtos.Type.values()); + + return metrics; + } + + @Override + public String getDescription() { + return "Ozone Manager RPC endpoint"; + } + +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/om/package-info.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/om/package-info.java new file mode 100644 index 0000000000000..c0dfc4d00e863 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/om/package-info.java @@ -0,0 +1,23 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.insight.om; + +/** + * Insight points for the Ozone Manager. + */ \ No newline at end of file diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/package-info.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/package-info.java new file mode 100644 index 0000000000000..a77524d7e1872 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/package-info.java @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.insight; + +/** + * Framework to collect log/metrics and configuration for specified ozone + * components. + */ \ No newline at end of file diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/scm/EventQueueInsight.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/scm/EventQueueInsight.java new file mode 100644 index 0000000000000..5a88cd2994241 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/scm/EventQueueInsight.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight.scm; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hdds.server.events.EventQueue; +import org.apache.hadoop.ozone.insight.BaseInsightPoint; +import org.apache.hadoop.ozone.insight.Component.Type; +import org.apache.hadoop.ozone.insight.LoggerSource; + +/** + * Insight definition to check internal events. + */ +public class EventQueueInsight extends BaseInsightPoint { + + @Override + public List getRelatedLoggers(boolean verbose) { + List loggers = new ArrayList<>(); + loggers + .add(new LoggerSource(Type.SCM, EventQueue.class, + defaultLevel(verbose))); + return loggers; + } + + @Override + public String getDescription() { + return "Information about the internal async event delivery"; + } + +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/scm/NodeManagerInsight.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/scm/NodeManagerInsight.java new file mode 100644 index 0000000000000..c4fb0258d8a62 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/scm/NodeManagerInsight.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight.scm; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hdds.scm.node.SCMNodeManager; +import org.apache.hadoop.ozone.insight.BaseInsightPoint; +import org.apache.hadoop.ozone.insight.Component.Type; +import org.apache.hadoop.ozone.insight.LoggerSource; +import org.apache.hadoop.ozone.insight.MetricDisplay; +import org.apache.hadoop.ozone.insight.MetricGroupDisplay; + +/** + * Insight definition to check node manager / node report events. + */ +public class NodeManagerInsight extends BaseInsightPoint { + + @Override + public List getRelatedLoggers(boolean verbose) { + List loggers = new ArrayList<>(); + loggers.add( + new LoggerSource(Type.SCM, SCMNodeManager.class, + defaultLevel(verbose))); + return loggers; + } + + @Override + public List getMetrics() { + List display = new ArrayList<>(); + + MetricGroupDisplay nodes = + new MetricGroupDisplay(Type.SCM, "Node counters"); + + nodes.addMetrics( + new MetricDisplay("Healthy Nodes", "scm_node_manager_healthy_nodes")); + nodes.addMetrics( + new MetricDisplay("Dead Nodes", "scm_node_manager_dead_nodes")); + + display.add(nodes); + + MetricGroupDisplay hb = + new MetricGroupDisplay(Type.SCM, "HB processing stats"); + hb.addMetrics( + new MetricDisplay("HB processed", "scm_node_manager_num_hb_processed")); + hb.addMetrics(new MetricDisplay("HB processing failed", + "scm_node_manager_num_hb_processing_failed")); + display.add(hb); + + return display; + } + + @Override + public String getDescription() { + return "SCM Datanode management related information."; + } + +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/scm/ReplicaManagerInsight.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/scm/ReplicaManagerInsight.java new file mode 100644 index 0000000000000..ec87f3f7727c5 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/scm/ReplicaManagerInsight.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight.scm; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hdds.scm.container.ReplicationManager; +import org.apache.hadoop.ozone.insight.BaseInsightPoint; +import org.apache.hadoop.ozone.insight.Component.Type; +import org.apache.hadoop.ozone.insight.LoggerSource; +import org.apache.hadoop.ozone.insight.MetricGroupDisplay; + +/** + * Insight definition to chech the replication manager internal state. + */ +public class ReplicaManagerInsight extends BaseInsightPoint { + + @Override + public List getRelatedLoggers(boolean verbose) { + List loggers = new ArrayList<>(); + loggers.add(new LoggerSource(Type.SCM, ReplicationManager.class, + defaultLevel(verbose))); + return loggers; + } + + @Override + public List getMetrics() { + List display = new ArrayList<>(); + return display; + } + + @Override + public List getConfigurationClasses() { + List result = new ArrayList<>(); + result.add(ReplicationManager.ReplicationManagerConfiguration.class); + return result; + } + + @Override + public String getDescription() { + return "SCM closed container replication manager"; + } + +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/scm/ScmProtocolBlockLocationInsight.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/scm/ScmProtocolBlockLocationInsight.java new file mode 100644 index 0000000000000..73f151228f8f2 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/scm/ScmProtocolBlockLocationInsight.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight.scm; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos; +import org.apache.hadoop.hdds.scm.server.SCMBlockProtocolServer; +import org.apache.hadoop.ozone.insight.BaseInsightPoint; +import org.apache.hadoop.ozone.insight.Component.Type; +import org.apache.hadoop.ozone.insight.LoggerSource; +import org.apache.hadoop.ozone.insight.MetricGroupDisplay; +import org.apache.hadoop.ozone.protocolPB.ScmBlockLocationProtocolServerSideTranslatorPB; + +/** + * Insight metric to check the SCM block location protocol behaviour. + */ +public class ScmProtocolBlockLocationInsight extends BaseInsightPoint { + + @Override + public List getRelatedLoggers(boolean verbose) { + List loggers = new ArrayList<>(); + loggers.add( + new LoggerSource(Type.SCM, + ScmBlockLocationProtocolServerSideTranslatorPB.class, + defaultLevel(verbose))); + new LoggerSource(Type.SCM, + SCMBlockProtocolServer.class, + defaultLevel(verbose)); + return loggers; + } + + @Override + public List getMetrics() { + List metrics = new ArrayList<>(); + + Map filter = new HashMap<>(); + filter.put("servername", "StorageContainerLocationProtocolService"); + + addRpcMetrics(metrics, Type.SCM, filter); + + addProtocolMessageMetrics(metrics, "scm_block_location_protocol", + Type.SCM, ScmBlockLocationProtocolProtos.Type.values()); + + return metrics; + } + + @Override + public String getDescription() { + return "SCM Block location protocol endpoint"; + } + +} diff --git a/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/scm/package-info.java b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/scm/package-info.java new file mode 100644 index 0000000000000..0966fbda401d1 --- /dev/null +++ b/hadoop-ozone/insight/src/main/java/org/apache/hadoop/ozone/insight/scm/package-info.java @@ -0,0 +1,23 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.insight.scm; + +/** + * Insight points for the Storage Container Manager. + */ \ No newline at end of file diff --git a/hadoop-ozone/insight/src/test/java/org/apache/hadoop/ozone/insight/LogSubcommandTest.java b/hadoop-ozone/insight/src/test/java/org/apache/hadoop/ozone/insight/LogSubcommandTest.java new file mode 100644 index 0000000000000..67c2f70e70428 --- /dev/null +++ b/hadoop-ozone/insight/src/test/java/org/apache/hadoop/ozone/insight/LogSubcommandTest.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.insight; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Testing utility methods of the log subcommand test. + */ +public class LogSubcommandTest { + + @Test + public void filterLog() { + LogSubcommand logSubcommand = new LogSubcommand(); + String result = logSubcommand.processLogLine( + "2019-08-04 12:27:08,648 [TRACE|org.apache.hadoop.hdds.scm.node" + + ".SCMNodeManager|SCMNodeManager] HB is received from " + + "[datanode=localhost]: storageReport {\\n storageUuid: " + + "\"DS-29204db6-a615-4106-9dd4-ce294c2f4cf6\"\\n " + + "storageLocation: \"/tmp/hadoop-elek/dfs/data\"\\n capacity: " + + "8348086272\\n scmUsed: 4096\\n remaining: 8246956032n " + + "storageType: DISK\\n failed: falsen}\\n\n"); + Assert.assertEquals(3, result.split("\n").length); + } +} \ No newline at end of file diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index 121b6f635baf8..a2e958f791c7d 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -35,6 +35,7 @@ import java.util.Objects; import org.apache.commons.codec.digest.DigestUtils; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.StorageUnit; import org.apache.hadoop.crypto.key.KeyProvider; @@ -82,9 +83,11 @@ import org.apache.hadoop.ozone.om.protocol.OzoneManagerServerProtocol; import org.apache.hadoop.ozone.om.ratis.OMRatisSnapshotInfo; import org.apache.hadoop.ozone.om.snapshot.OzoneManagerSnapshotProvider; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.DBUpdatesRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos .KeyArgs; +import org.apache.hadoop.ozone.protocolPB.ProtocolMessageMetrics; import org.apache.hadoop.ozone.security.OzoneSecurityException; import org.apache.hadoop.ozone.security.OzoneTokenIdentifier; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; @@ -150,6 +153,7 @@ import org.apache.hadoop.utils.db.SequenceNumberNotFoundException; import org.apache.hadoop.utils.db.DBCheckpoint; import org.apache.hadoop.utils.db.DBStore; + import org.apache.ratis.server.protocol.TermIndex; import org.apache.ratis.util.FileUtils; import org.apache.ratis.util.LifeCycle; @@ -253,6 +257,7 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl private S3BucketManager s3BucketManager; private final OMMetrics metrics; + private final ProtocolMessageMetrics omClientProtocolMetrics; private OzoneManagerHttpServer httpServer; private final OMStorage omStorage; private final ScmBlockLocationProtocol scmBlockClient; @@ -294,14 +299,12 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl // execution, we can get from ozoneManager. private long maxUserVolumeCount; - private final ScmClient scmClient; private final long scmBlockSize; private final int preallocateBlocksMax; private final boolean grpcBlockTokenEnabled; private final boolean useRatisForReplication; - private OzoneManager(OzoneConfiguration conf) throws IOException, AuthenticationException { super(OzoneVersionInfo.OZONE_VERSION_INFO); @@ -412,6 +415,10 @@ private OzoneManager(OzoneConfiguration conf) throws IOException, metrics = OMMetrics.create(); + omClientProtocolMetrics = ProtocolMessageMetrics + .create("OmClientProtocol", "Ozone Manager RPC endpoint", + OzoneManagerProtocolProtos.Type.values()); + // Start Om Rpc Server. omRpcServer = getRpcServer(conf); omRpcAddress = updateRPCListenAddress(configuration, @@ -984,10 +991,11 @@ private static StorageContainerLocationProtocol getScmContainerClient( StorageContainerLocationProtocol scmContainerClient = TracingUtil.createProxy( new StorageContainerLocationProtocolClientSideTranslatorPB( - RPC.getProxy(StorageContainerLocationProtocolPB.class, scmVersion, - scmAddr, UserGroupInformation.getCurrentUser(), conf, - NetUtils.getDefaultSocketFactory(conf), - Client.getRpcTimeout(conf))), + RPC.getProxy(StorageContainerLocationProtocolPB.class, + scmVersion, + scmAddr, UserGroupInformation.getCurrentUser(), conf, + NetUtils.getDefaultSocketFactory(conf), + Client.getRpcTimeout(conf))), StorageContainerLocationProtocol.class, conf); return scmContainerClient; } @@ -1253,6 +1261,8 @@ public OMMetrics getMetrics() { */ public void start() throws IOException { + omClientProtocolMetrics.register(); + LOG.info(buildRpcServerStartMessage("OzoneManager RPC server", omRpcAddress)); @@ -1377,7 +1387,7 @@ private RPC.Server getRpcServer(OzoneConfiguration conf) throws IOException { RPC.setProtocolEngine(configuration, OzoneManagerProtocolPB.class, ProtobufRpcEngine.class); this.omServerProtocol = new OzoneManagerProtocolServerSideTranslatorPB( - this, omRatisServer, isRatisEnabled); + this, omRatisServer, omClientProtocolMetrics, isRatisEnabled); BlockingService omService = newReflectiveBlockingService(omServerProtocol); @@ -1471,6 +1481,7 @@ public void stop() { } metadataManager.stop(); metrics.unRegister(); + omClientProtocolMetrics.unregister(); unregisterMXBean(); if (jvmPauseMonitor != null) { jvmPauseMonitor.stop(); diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java index 303c994a42491..2452c1de805dc 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java @@ -16,7 +16,6 @@ */ package org.apache.hadoop.ozone.protocolPB; - import org.apache.hadoop.hdds.tracing.TracingUtil; import org.apache.hadoop.ozone.OmUtils; import org.apache.hadoop.ozone.om.OzoneManager; @@ -54,6 +53,7 @@ public class OzoneManagerProtocolServerSideTranslatorPB implements private final boolean isRatisEnabled; private final OzoneManager ozoneManager; private final OzoneManagerDoubleBuffer ozoneManagerDoubleBuffer; + private final ProtocolMessageMetrics protocolMessageMetrics; /** * Constructs an instance of the server handler. @@ -61,12 +61,15 @@ public class OzoneManagerProtocolServerSideTranslatorPB implements * @param impl OzoneManagerProtocolPB */ public OzoneManagerProtocolServerSideTranslatorPB( - OzoneManager impl, OzoneManagerRatisServer ratisServer, + OzoneManager impl, + OzoneManagerRatisServer ratisServer, + ProtocolMessageMetrics metrics, boolean enableRatis) { this.ozoneManager = impl; handler = new OzoneManagerRequestHandler(impl); this.omRatisServer = ratisServer; this.isRatisEnabled = enableRatis; + this.protocolMessageMetrics = metrics; this.ozoneManagerDoubleBuffer = new OzoneManagerDoubleBuffer(ozoneManager.getMetadataManager(), (i) -> { // Do nothing. @@ -82,48 +85,77 @@ public OzoneManagerProtocolServerSideTranslatorPB( * translator for OM protocol. */ @Override - public OMResponse submitRequest(RpcController controller, + public OMResponse submitRequest(RpcController controller, OMRequest request) throws ServiceException { Scope scope = TracingUtil .importAndCreateScope(request.getCmdType().name(), request.getTraceID()); try { - if (isRatisEnabled) { - // Check if the request is a read only request - if (OmUtils.isReadOnly(request)) { - return submitReadRequestToOM(request); - } else { - if (omRatisServer.isLeader()) { - try { - OMClientRequest omClientRequest = - OzoneManagerRatisUtils.createClientRequest(request); - if (omClientRequest != null) { - request = omClientRequest.preExecute(ozoneManager); - } - } catch(IOException ex) { - // As some of the preExecute returns error. So handle here. - return createErrorResponse(request, ex); + if (LOG.isTraceEnabled()) { + LOG.trace( + "OzoneManagerProtocol {} request is received: {}", + request.getCmdType().toString(), + request.toString().replaceAll("\n", "\\\\n")); + } else if (LOG.isDebugEnabled()) { + LOG.debug("OzoneManagerProtocol {} request is received", + request.getCmdType().toString()); + } + protocolMessageMetrics.increment(request.getCmdType()); + + OMResponse omResponse = processRequest(request); + + if (LOG.isTraceEnabled()) { + LOG.trace( + "OzoneManagerProtocol {} request is processed. Response: " + + "{}", + request.getCmdType().toString(), + omResponse.toString().replaceAll("\n", "\\\\n")); + } + return omResponse; + + } finally { + scope.close(); + } + } + + private OMResponse processRequest(OMRequest request) throws + ServiceException { + + if (isRatisEnabled) { + // Check if the request is a read only request + if (OmUtils.isReadOnly(request)) { + return submitReadRequestToOM(request); + } else { + if (omRatisServer.isLeader()) { + try { + OMClientRequest omClientRequest = + OzoneManagerRatisUtils.createClientRequest(request); + if (omClientRequest != null) { + request = omClientRequest.preExecute(ozoneManager); } - return submitRequestToRatis(request); - } else { - // throw not leader exception. This is being done, so to avoid - // unnecessary execution of preExecute on follower OM's. This - // will be helpful in the case like where we we reduce the - // chance of allocate blocks on follower OM's. Right now our - // leader status is updated every 1 second. - throw createNotLeaderException(); + } catch (IOException ex) { + // As some of the preExecute returns error. So handle here. + return createErrorResponse(request, ex); } + return submitRequestToRatis(request); + } else { + // throw not leader exception. This is being done, so to avoid + // unnecessary execution of preExecute on follower OM's. This + // will be helpful in the case like where we we reduce the + // chance of allocate blocks on follower OM's. Right now our + // leader status is updated every 1 second. + throw createNotLeaderException(); } - } else { - return submitRequestDirectlyToOM(request); } - } finally { - scope.close(); + } else { + return submitRequestDirectlyToOM(request); } + } /** * Create OMResponse from the specified OMRequest and exception. + * * @param omRequest * @param exception * @return OMResponse @@ -153,6 +185,7 @@ private OMResponse createErrorResponse( return null; } } + /** * Submits request to OM's Ratis server. */ diff --git a/hadoop-ozone/pom.xml b/hadoop-ozone/pom.xml index 9d00bc4d36e98..fd1a3a85b4209 100644 --- a/hadoop-ozone/pom.xml +++ b/hadoop-ozone/pom.xml @@ -54,6 +54,7 @@ upgrade csi fault-injection-test + insight @@ -168,6 +169,11 @@ hadoop-hdds-tools ${hdds.version} + + org.apache.hadoop + hadoop-ozone-insight + ${hdds.version} + org.apache.hadoop hadoop-ozone-recon