From 11148227ea2cf2cbd66fa1412b52084ba7fb63c2 Mon Sep 17 00:00:00 2001 From: "zengqiang.xu" Date: Sat, 30 Jul 2022 11:08:45 +0800 Subject: [PATCH] HDFS-16703. Enable RPC Timeout for some protocols of NameNode. --- .../hadoop/hdfs/NameNodeProxiesClient.java | 3 +- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 21 ++++++ .../apache/hadoop/hdfs/NameNodeProxies.java | 54 +++++++++++-- .../src/main/resources/hdfs-default.xml | 75 +++++++++++++++++++ 4 files changed, 144 insertions(+), 9 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/NameNodeProxiesClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/NameNodeProxiesClient.java index 2e5532381978d..cf1efabf39e8b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/NameNodeProxiesClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/NameNodeProxiesClient.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hdfs.server.namenode.ha.ClientHAProxyFactory; import org.apache.hadoop.hdfs.server.namenode.ha.HAProxyFactory; import org.apache.hadoop.ipc.AlignmentContext; +import org.apache.hadoop.ipc.Client; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -371,7 +372,7 @@ public static ClientProtocol createProxyWithAlignmentContext( ClientNamenodeProtocolPB proxy = RPC.getProtocolProxy( ClientNamenodeProtocolPB.class, version, address, ugi, conf, NetUtils.getDefaultSocketFactory(conf), - org.apache.hadoop.ipc.Client.getTimeout(conf), defaultPolicy, + Client.getRpcTimeout(conf), defaultPolicy, fallbackToSimpleAuth, alignmentContext).getProxy(); if (withRetries) { // create the proxy with retries diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index f92a2ad56581b..80e240fac05a7 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -2072,4 +2072,25 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final long DFS_LEASE_HARDLIMIT_DEFAULT = HdfsClientConfigKeys.DFS_LEASE_HARDLIMIT_DEFAULT; + public static final String IPC_RPC_TIMEOUT_FOR_ALIASHMAP_PROTOCOL = + "ipc.rpc-timeout.for.aliash-map.ms"; + public static final long IPC_RPC_TIMEOUT_FOR_ALIASHMAP_PROTOCOL_DEFAULT = 30000; + public static final String IPC_RPC_TIMEOUT_FOR_JOURNAL_PROTOCOL = + "ipc.rpc-timeout.for.journal.ms"; + public static final long IPC_RPC_TIMEOUT_FOR_JOURNAL_PROTOCOL_DEFAULT = 30000; + public static final String IPC_RPC_TIMEOUT_FOR_REFRESH_AUTHORIZATION_PROTOCOL = + "ipc.rpc-timeout.for.refresh-authorization.ms"; + public static final long IPC_RPC_TIMEOUT_FOR_REFRESH_AUTHORIZATION_PROTOCOL_DEFAULT = 0; + public static final String IPC_RPC_TIMEOUT_FOR_REFRESH_USER_MAPPING_PROTOCOL = + "ipc.rpc-timeout.for.refresh-user-mappings.ms"; + public static final long IPC_RPC_TIMEOUT_FOR_REFRESH_USER_MAPPING_PROTOCOL_DEFAULT = 0; + public static final String IPC_RPC_TIMEOUT_FOR_REFRESH_CALL_QUEUE_PROTOCOL = + "ipc.rpc-timeout.for.refresh-call-queue.ms"; + public static final long IPC_RPC_TIMEOUT_FOR_REFRESH_CALL_QUEUE_PROTOCOL_DEFAULT = 0; + public static final String IPC_RPC_TIMEOUT_FOR_GET_USER_MAPPING_PROTOCOL = + "ipc.rpc-timeout.for.get-user-mappings.ms"; + public static final long IPC_RPC_TIMEOUT_FOR_GET_USER_MAPPING_PROTOCOL_DEFAULT = 0; + public static final String IPC_RPC_TIMEOUT_FOR_NAMENODE_PROTOCOL = + "ipc.rpc-timeout.for.namenode.ms"; + public static final long IPC_RPC_TIMEOUT_FOR_NAMENODE_PROTOCOL_DEFAULT = 0; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java index 2a56ef3e1868b..79ebdd86ebcc3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java @@ -217,16 +217,22 @@ public static ProxyAndInfo createNonHAProxy( private static InMemoryAliasMapProtocol createNNProxyWithInMemoryAliasMapProtocol( InetSocketAddress address, Configuration conf, UserGroupInformation ugi, AlignmentContext alignmentContext) throws IOException { + int timeout = getRPCTimeout(conf, + DFSConfigKeys.IPC_RPC_TIMEOUT_FOR_ALIASHMAP_PROTOCOL, + DFSConfigKeys.IPC_RPC_TIMEOUT_FOR_ALIASHMAP_PROTOCOL_DEFAULT); AliasMapProtocolPB proxy = createNameNodeProxy( - address, conf, ugi, AliasMapProtocolPB.class, 30000, alignmentContext); + address, conf, ugi, AliasMapProtocolPB.class, timeout, alignmentContext); return new InMemoryAliasMapProtocolClientSideTranslatorPB(proxy); } private static JournalProtocol createNNProxyWithJournalProtocol( InetSocketAddress address, Configuration conf, UserGroupInformation ugi, AlignmentContext alignmentContext) throws IOException { + int timeout = getRPCTimeout(conf, + DFSConfigKeys.IPC_RPC_TIMEOUT_FOR_JOURNAL_PROTOCOL, + DFSConfigKeys.IPC_RPC_TIMEOUT_FOR_JOURNAL_PROTOCOL_DEFAULT); JournalProtocolPB proxy = createNameNodeProxy(address, - conf, ugi, JournalProtocolPB.class, 30000, alignmentContext); + conf, ugi, JournalProtocolPB.class, timeout, alignmentContext); return new JournalProtocolTranslatorPB(proxy); } @@ -234,8 +240,11 @@ private static JournalProtocol createNNProxyWithJournalProtocol( createNNProxyWithRefreshAuthorizationPolicyProtocol(InetSocketAddress address, Configuration conf, UserGroupInformation ugi, AlignmentContext alignmentContext) throws IOException { + int timeout = getRPCTimeout(conf, + DFSConfigKeys.IPC_RPC_TIMEOUT_FOR_REFRESH_AUTHORIZATION_PROTOCOL, + DFSConfigKeys.IPC_RPC_TIMEOUT_FOR_REFRESH_AUTHORIZATION_PROTOCOL_DEFAULT); RefreshAuthorizationPolicyProtocolPB proxy = createNameNodeProxy(address, - conf, ugi, RefreshAuthorizationPolicyProtocolPB.class, 0, + conf, ugi, RefreshAuthorizationPolicyProtocolPB.class, timeout, alignmentContext); return new RefreshAuthorizationPolicyProtocolClientSideTranslatorPB(proxy); } @@ -244,8 +253,11 @@ private static JournalProtocol createNNProxyWithJournalProtocol( createNNProxyWithRefreshUserMappingsProtocol(InetSocketAddress address, Configuration conf, UserGroupInformation ugi, AlignmentContext alignmentContext) throws IOException { + int timeout = getRPCTimeout(conf, + DFSConfigKeys.IPC_RPC_TIMEOUT_FOR_REFRESH_USER_MAPPING_PROTOCOL, + DFSConfigKeys.IPC_RPC_TIMEOUT_FOR_REFRESH_USER_MAPPING_PROTOCOL_DEFAULT); RefreshUserMappingsProtocolPB proxy = createNameNodeProxy(address, conf, - ugi, RefreshUserMappingsProtocolPB.class, 0, alignmentContext); + ugi, RefreshUserMappingsProtocolPB.class, timeout, alignmentContext); return new RefreshUserMappingsProtocolClientSideTranslatorPB(proxy); } @@ -253,16 +265,22 @@ private static JournalProtocol createNNProxyWithJournalProtocol( createNNProxyWithRefreshCallQueueProtocol(InetSocketAddress address, Configuration conf, UserGroupInformation ugi, AlignmentContext alignmentContext) throws IOException { + int timeout = getRPCTimeout(conf, + DFSConfigKeys.IPC_RPC_TIMEOUT_FOR_REFRESH_CALL_QUEUE_PROTOCOL, + DFSConfigKeys.IPC_RPC_TIMEOUT_FOR_REFRESH_CALL_QUEUE_PROTOCOL_DEFAULT); RefreshCallQueueProtocolPB proxy = createNameNodeProxy(address, conf, ugi, - RefreshCallQueueProtocolPB.class, 0, alignmentContext); + RefreshCallQueueProtocolPB.class, timeout, alignmentContext); return new RefreshCallQueueProtocolClientSideTranslatorPB(proxy); } private static GetUserMappingsProtocol createNNProxyWithGetUserMappingsProtocol( InetSocketAddress address, Configuration conf, UserGroupInformation ugi, AlignmentContext alignmentContext) throws IOException { + int timeout = getRPCTimeout(conf, + DFSConfigKeys.IPC_RPC_TIMEOUT_FOR_GET_USER_MAPPING_PROTOCOL, + DFSConfigKeys.IPC_RPC_TIMEOUT_FOR_GET_USER_MAPPING_PROTOCOL_DEFAULT); GetUserMappingsProtocolPB proxy = createNameNodeProxy(address, conf, ugi, - GetUserMappingsProtocolPB.class, 0, alignmentContext); + GetUserMappingsProtocolPB.class, timeout, alignmentContext); return new GetUserMappingsProtocolClientSideTranslatorPB(proxy); } @@ -270,8 +288,11 @@ private static NamenodeProtocol createNNProxyWithNamenodeProtocol( InetSocketAddress address, Configuration conf, UserGroupInformation ugi, boolean withRetries, AlignmentContext alignmentContext) throws IOException { - NamenodeProtocolPB proxy = createNameNodeProxy( - address, conf, ugi, NamenodeProtocolPB.class, 0, alignmentContext); + int timeout = getRPCTimeout(conf, + DFSConfigKeys.IPC_RPC_TIMEOUT_FOR_NAMENODE_PROTOCOL, + DFSConfigKeys.IPC_RPC_TIMEOUT_FOR_NAMENODE_PROTOCOL_DEFAULT); + NamenodeProtocolPB proxy = createNameNodeProxy(address, conf, ugi, + NamenodeProtocolPB.class, timeout, alignmentContext); if (withRetries) { // create the proxy with retries RetryPolicy timeoutPolicy = RetryPolicies.exponentialBackoffRetry(5, 200, TimeUnit.MILLISECONDS); @@ -312,4 +333,21 @@ private static T createNameNodeProxy(InetSocketAddress address, alignmentContext).getProxy(); } + /** + * Try to obtain the timeout for confKey from Conf. + * If the value is invalid, just print some warn log and return the default value. + * @param conf input Configuration. + * @param confKey input conf key. + * @param defaultValue input default conf value. + * @return a non negative number. + */ + private static int getRPCTimeout(Configuration conf, String confKey, long defaultValue) { + long tmpTimeout = conf.getLong(confKey, defaultValue); + if (tmpTimeout < 0) { + LOG.warn("Invalid value {} configured for {} should be greater than or equal to 0. " + + "Using default value of : {}ms instead.", tmpTimeout, conf, defaultValue); + tmpTimeout = defaultValue; + } + return (int) tmpTimeout; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index e6dc8c5ba1ac4..2cb1a34e11265 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -6591,4 +6591,79 @@ Enables observer reads for clients. This should only be enabled when clients are using routers. + + ipc.rpc-timeout.for.aliash-map.ms + 30000 + + + + The amount of time the aliasMapProtocol client will wait to read from the namenode + before timing out. If the namenode does not report progress more + frequently than this time, the client will give up waiting. + + + + ipc.rpc-timeout.for.journal.ms + 30000 + + The amount of time the journalProtocol client will wait to read from the journalnode + before timing out. If the journalnode does not report progress more + frequently than this time, the client will give up waiting. + + + + ipc.rpc-timeout.for.refresh-authorization.ms + 0 + + The amount of time the refreshAuthorizationPolicyProtocol client will wait + to read from the namenode before timing out. If the namenode does not + report progress more frequently than this time, the client will give up waiting. + The default value of 0 indicates that timeout is disabled, + which can be set to the same as ipc.client.rpc-timeout.ms, such as 120s. + + + + ipc.rpc-timeout.for.refresh-user-mappings.ms + 0 + + The amount of time the refreshUserMappingsProtocol client will wait + to read from the namenode before timing out. If the namenode does not + report progress more frequently than this time, the client will give up waiting. + The default value of 0 indicates that timeout is disabled, + which can be set to the same as ipc.client.rpc-timeout.ms, such as 120s. + + + + ipc.rpc-timeout.for.refresh-call-queue.ms + 0 + + The amount of time the refreshCallQueueProtocol client will wait + to read from the namenode before timing out. If the namenode does not + report progress more frequently than this time, the client will give up waiting. + The default value of 0 indicates that timeout is disabled, + which can be set to the same as ipc.client.rpc-timeout.ms, such as 120s. + + + + ipc.rpc-timeout.for.get-user-mappings.ms + 0 + + The amount of time the getUserMappingsProtocol client will wait + to read from the namenode before timing out. If the namenode does not + report progress more frequently than this time, the client will give up waiting. + The default value of 0 indicates that timeout is disabled, + which can be set to the same as ipc.client.rpc-timeout.ms, such as 120s. + + + + ipc.rpc-timeout.for.namenode.ms + 0 + + The amount of time the namenodeProtocol client will wait + to read from the namenode before timing out. If the namenode does not + report progress more frequently than this time, the client will give up waiting. + The default value of 0 indicates that timeout is disabled, + which can be set to the same as ipc.client.rpc-timeout.ms, such as 120s. + +