From 51074d2ec190d4980e0dbf50d4bff935d9c569fb Mon Sep 17 00:00:00 2001 From: Fengnan Li Date: Fri, 17 Apr 2020 23:01:26 -0700 Subject: [PATCH 1/2] Add support for router delegation token without watch Summary: This patch is targeting improving router's performance for delegation tokens related operations. It achieves the goal by removing watchers from router on tokens since based on our experience. The huge number of watches inside Zookeeper is degrading Zookeeper's performance pretty hard. The current limit is about 1.2-1.5 million. Specific changes: 1. Explicitly disable the watcher to tokens by not using PathChildrenCache or any curator provided cache at all. 2. Schedule a sync task between router and Zookeeper at a configurable interval to make routers sync with their token information through Zookeeper. 3. For token's change, always make sure to change local cache first instead of depending on callbacks of the watch event when using PathChildrenCache. The above three points are trying to make router token cache behaves as close as possible to the case when the PathChildrenCache is used. The below point handles one corner case. 4. Before token remover(a background thread) removes token from Zookeeper, one router will first make sure this token hasn't been renewed by other peers. This happens only when somehow the sync failed for this token that router local cache doesn't have the corret renewal date (expiry date) Test Plan: 1. Add several unit tests covering all common use cases. 2. Deployed on two machines and performing all tests. 3. Pressure testing: create production scale number of tokens (100k) and monitor the sync latency. --- .../AbstractDelegationTokenSecretManager.java | 6 +- .../ZKDelegationTokenSecretManager.java | 137 ++++++----- .../TestZKDelegationTokenSecretManager.java | 12 +- .../ZKDelegationTokenSecretManagerImpl.java | 177 +++++++++++++- ...estZKDelegationTokenSecretManagerImpl.java | 216 ++++++++++++++++++ 5 files changed, 480 insertions(+), 68 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/security/token/TestZKDelegationTokenSecretManagerImpl.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java index f329accec7553..3a22cee881070 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java @@ -23,11 +23,11 @@ import java.io.IOException; import java.security.MessageDigest; import java.util.Collection; -import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import javax.crypto.SecretKey; @@ -63,7 +63,7 @@ private String formatTokenId(TokenIdent id) { * to DelegationTokenInformation. Protected by this object lock. */ protected final Map currentTokens - = new HashMap(); + = new ConcurrentHashMap<>(); /** * Sequence number to create DelegationTokenIdentifier. @@ -75,7 +75,7 @@ private String formatTokenId(TokenIdent id) { * Access to allKeys is protected by this object lock */ protected final Map allKeys - = new HashMap(); + = new ConcurrentHashMap<>(); /** * Access to currentId is protected by this object lock. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java index cd3b8c0c0f279..4837da92f4843 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java @@ -55,6 +55,7 @@ import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.delegation.web.DelegationTokenManager; +import static org.apache.hadoop.util.Time.now; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException.NoNodeException; @@ -100,6 +101,9 @@ public abstract class ZKDelegationTokenSecretManager 0) { int keyId = Integer.parseInt(tokSeg.substring(j + 1)); - synchronized (this) { - allKeys.remove(keyId); - } + allKeys.remove(keyId); } } } - private void processTokenAddOrUpdate(ChildData data) throws IOException { - ByteArrayInputStream bin = new ByteArrayInputStream(data.getData()); + protected void processTokenAddOrUpdate(byte[] data) throws IOException { + ByteArrayInputStream bin = new ByteArrayInputStream(data); DataInputStream din = new DataInputStream(bin); TokenIdent ident = createIdentifier(); ident.readFields(din); @@ -488,11 +495,7 @@ private void processTokenAddOrUpdate(ChildData data) throws IOException { if (numRead > -1) { DelegationTokenInformation tokenInfo = new DelegationTokenInformation(renewDate, password); - synchronized (this) { - currentTokens.put(ident, tokenInfo); - // The cancel task might be waiting - notifyAll(); - } + currentTokens.put(ident, tokenInfo); } } @@ -501,11 +504,7 @@ private void processTokenRemoved(ChildData data) throws IOException { DataInputStream din = new DataInputStream(bin); TokenIdent ident = createIdentifier(); ident.readFields(din); - synchronized (this) { - currentTokens.remove(ident); - // The cancel task might be waiting - notifyAll(); - } + currentTokens.remove(ident); } @Override @@ -706,7 +705,7 @@ protected DelegationTokenInformation getTokenInfo(TokenIdent ident) { * * @param ident Identifier of the token */ - private synchronized void syncLocalCacheWithZk(TokenIdent ident) { + protected void syncLocalCacheWithZk(TokenIdent ident) { try { DelegationTokenInformation tokenInfo = getTokenInfoFromZK(ident); if (tokenInfo != null && !currentTokens.containsKey(ident)) { @@ -720,16 +719,21 @@ private synchronized void syncLocalCacheWithZk(TokenIdent ident) { } } - private DelegationTokenInformation getTokenInfoFromZK(TokenIdent ident) + protected DelegationTokenInformation getTokenInfoFromZK(TokenIdent ident) throws IOException { return getTokenInfoFromZK(ident, false); } - private DelegationTokenInformation getTokenInfoFromZK(TokenIdent ident, + protected DelegationTokenInformation getTokenInfoFromZK(TokenIdent ident, boolean quiet) throws IOException { String nodePath = getNodePath(ZK_DTSM_TOKENS_ROOT, DELEGATION_TOKEN_PREFIX + ident.getSequenceNumber()); + return getTokenInfoFromZK(nodePath, quiet); + } + + protected DelegationTokenInformation getTokenInfoFromZK(String nodePath, + boolean quiet) throws IOException { try { byte[] data = zkClient.getData().forPath(nodePath); if ((data == null) || (data.length == 0)) { @@ -864,15 +868,30 @@ protected void updateToken(TokenIdent ident, @Override protected void removeStoredToken(TokenIdent ident) throws IOException { + removeStoredToken(ident, false); + } + + protected void removeStoredToken(TokenIdent ident, + boolean checkAgainstZkBeforeDeletion) throws IOException { String nodeRemovePath = getNodePath(ZK_DTSM_TOKENS_ROOT, DELEGATION_TOKEN_PREFIX + ident.getSequenceNumber()); - if (LOG.isDebugEnabled()) { - LOG.debug("Removing ZKDTSMDelegationToken_" - + ident.getSequenceNumber()); - } try { - if (zkClient.checkExists().forPath(nodeRemovePath) != null) { + DelegationTokenInformation dtInfo = getTokenInfoFromZK(ident, true); + if (dtInfo != null) { + // For the case there is no sync or watch miss, it is possible that the + // local storage has expired tokens which have been renewed by peer + // so double check again to avoid accidental delete + if (checkAgainstZkBeforeDeletion + && dtInfo.getRenewDate() > now()) { + LOG.info("Node already renewed by peer " + nodeRemovePath + + " so this token should not be deleted"); + return; + } + if (LOG.isDebugEnabled()) { + LOG.debug("Removing ZKDTSMDelegationToken_" + + ident.getSequenceNumber()); + } while(zkClient.checkExists().forPath(nodeRemovePath) != null){ try { zkClient.delete().guaranteed().forPath(nodeRemovePath); @@ -895,7 +914,7 @@ protected void removeStoredToken(TokenIdent ident) } @Override - public synchronized TokenIdent cancelToken(Token token, + public TokenIdent cancelToken(Token token, String canceller) throws IOException { ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier()); DataInputStream in = new DataInputStream(buf); @@ -906,7 +925,7 @@ public synchronized TokenIdent cancelToken(Token token, return super.cancelToken(token, canceller); } - private void addOrUpdateToken(TokenIdent ident, + protected void addOrUpdateToken(TokenIdent ident, DelegationTokenInformation info, boolean isUpdate) throws Exception { String nodeCreatePath = getNodePath(ZK_DTSM_TOKENS_ROOT, DELEGATION_TOKEN_PREFIX @@ -933,6 +952,10 @@ private void addOrUpdateToken(TokenIdent ident, } } + public boolean isTokenWatcherEnabled() { + return isTokenWatcherEnabled; + } + /** * Simple implementation of an {@link ACLProvider} that simply returns an ACL * that gives all permissions only to a single principal. diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestZKDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestZKDelegationTokenSecretManager.java index b2e177976b6d5..643da6a368b64 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestZKDelegationTokenSecretManager.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestZKDelegationTokenSecretManager.java @@ -59,15 +59,15 @@ public class TestZKDelegationTokenSecretManager { private static final Logger LOG = LoggerFactory.getLogger(TestZKDelegationTokenSecretManager.class); - private static final int TEST_RETRIES = 2; + protected static final int TEST_RETRIES = 2; - private static final int RETRY_COUNT = 5; + protected static final int RETRY_COUNT = 5; - private static final int RETRY_WAIT = 1000; + protected static final int RETRY_WAIT = 1000; - private static final long DAY_IN_SECS = 86400; + protected static final long DAY_IN_SECS = 86400; - private TestingServer zkServer; + protected TestingServer zkServer; @Rule public Timeout globalTimeout = new Timeout(300000); @@ -425,7 +425,7 @@ private void verifyACL(CuratorFramework curatorFramework, // cancelled but.. that would mean having to make an RPC call for every // verification request. // Thus, the eventual consistency tradef-off should be acceptable here... - private void verifyTokenFail(DelegationTokenManager tm, + protected void verifyTokenFail(DelegationTokenManager tm, Token token) throws IOException, InterruptedException { verifyTokenFailWithRetry(tm, token, RETRY_COUNT); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/ZKDelegationTokenSecretManagerImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/ZKDelegationTokenSecretManagerImpl.java index 4a111187ac46a..e29e70593c114 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/ZKDelegationTokenSecretManagerImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/ZKDelegationTokenSecretManagerImpl.java @@ -19,13 +19,26 @@ package org.apache.hadoop.hdfs.server.federation.router.security.token; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; +import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier; import org.apache.hadoop.security.token.delegation.ZKDelegationTokenSecretManager; +import org.apache.hadoop.util.Time; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.ZooKeeper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; /** * Zookeeper based router delegation token store implementation. @@ -33,24 +46,184 @@ public class ZKDelegationTokenSecretManagerImpl extends ZKDelegationTokenSecretManager { + public static final String ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL = + "zk-dt-secret-manager.router.token.sync.interval"; + public static final int ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL_DEFAULT = 5; + private static final Logger LOG = LoggerFactory.getLogger(ZKDelegationTokenSecretManagerImpl.class); - private Configuration conf = null; + private Configuration conf; + + private final ScheduledExecutorService scheduler = + Executors.newSingleThreadScheduledExecutor(); + + // Local cache of delegation tokens, used for depracating tokens from + // currentTokenMap + private final Set localTokenCache = + new HashSet<>(); + // Native zk client for getting all tokens + private ZooKeeper zookeeper; + private final String TOKEN_PATH = "/" + zkClient.getNamespace() + + ZK_DTSM_TOKENS_ROOT; + // The flag used to issue an extra check before deletion + // Since cancel token and token remover thread use the same + // API here and one router could have a token that is renewed + // by another router, thus token remover should always check ZK + // to confirm whether it has been renewed or not + private ThreadLocal checkAgainstZkBeforeDeletion = + new ThreadLocal() { + @Override + protected Boolean initialValue() { + return true; + } + }; public ZKDelegationTokenSecretManagerImpl(Configuration conf) { super(conf); this.conf = conf; try { - super.startThreads(); + startThreads(); } catch (IOException e) { LOG.error("Error starting threads for zkDelegationTokens", e); } LOG.info("Zookeeper delegation token secret manager instantiated"); } + @Override + public void startThreads() throws IOException { + super.startThreads(); + // start token cache related work when watcher is disabled + if (!isTokenWatcherEnabled()) { + LOG.info("Watcher for tokens is disabled in this secret manager"); + try { + // By default set this variable + checkAgainstZkBeforeDeletion.set(true); + // Ensure the token root path exists + if (zkClient.checkExists().forPath(ZK_DTSM_TOKENS_ROOT) == null) { + zkClient.create().creatingParentsIfNeeded() + .withMode(CreateMode.PERSISTENT) + .forPath(ZK_DTSM_TOKENS_ROOT); + } + // Set up zookeeper client + try { + zookeeper = zkClient.getZookeeperClient().getZooKeeper(); + } catch (Exception e) { + LOG.info("Cannot get zookeeper client ", e); + } finally { + if (zookeeper == null) { + throw new IOException("Zookeeper client is null"); + } + } + + LOG.info("Start loading token cache"); + long start = Time.now(); + rebuildTokenCache(true); + LOG.info("Loaded token cache in {} milliseconds", Time.now() - start); + + int syncInterval = conf.getInt(ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL, + ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL_DEFAULT); + scheduler.scheduleAtFixedRate(new Runnable() { + @Override + public void run() { + try { + rebuildTokenCache(false); + } catch (Exception e) { + // ignore + } + } + }, syncInterval, syncInterval, TimeUnit.SECONDS); + } catch (Exception e) { + LOG.error("Error rebuilding local cache for zkDelegationTokens ", e); + } + } + } + + @Override + public void stopThreads() { + super.stopThreads(); + scheduler.shutdown(); + } + @Override public DelegationTokenIdentifier createIdentifier() { return new DelegationTokenIdentifier(); } + + /** + * This function will rebuild local token cache from zk storage. + * It is first called when the secret manager is initialized and + * then regularly at a configured interval. + * + * @param initial whether this is called during initialization + * @throws IOException + */ + private void rebuildTokenCache(boolean initial) throws IOException { + localTokenCache.clear(); + // Use bare zookeeper client to get all children since curator will + // wrap the same API with a sorting process. This is time consuming given + // millions of tokens + List zkTokens; + try { + zkTokens = zookeeper.getChildren(TOKEN_PATH, false); + } catch (KeeperException | InterruptedException e) { + throw new IOException("Tokens cannot be fetched from path " + + TOKEN_PATH, e); + } + byte[] data; + for (String tokenPath : zkTokens) { + try { + data = zkClient.getData().forPath( + ZK_DTSM_TOKENS_ROOT + "/" + tokenPath); + } catch (KeeperException.NoNodeException e) { + LOG.debug("No node in path [" + tokenPath + "]"); + continue; + } catch (Exception ex) { + throw new IOException(ex); + } + // Store data to currentTokenMap + processTokenAddOrUpdate(data); + // Store data to localTokenCache for sync + AbstractDelegationTokenIdentifier ident = createIdentifier(); + DataInputStream din = new DataInputStream(new ByteArrayInputStream(data)); + ident.readFields(din); + localTokenCache.add(ident); + } + if (!initial) { + // Sync zkTokens with local cache, specifically + // 1) add/update tokens to local cache from zk, which is done through + // processTokenAddOrUpdate above + // 2) remove tokens in local cache but not in zk anymore + for (AbstractDelegationTokenIdentifier ident : currentTokens.keySet()) { + if (!localTokenCache.contains(ident)) { + currentTokens.remove(ident); + } + } + } + } + + @Override + public AbstractDelegationTokenIdentifier cancelToken( + Token token, String canceller) + throws IOException { + checkAgainstZkBeforeDeletion.set(false); + AbstractDelegationTokenIdentifier ident = super.cancelToken(token, + canceller); + checkAgainstZkBeforeDeletion.set(true); + return ident; + } + + @Override + protected void removeStoredToken(AbstractDelegationTokenIdentifier ident) + throws IOException { + super.removeStoredToken(ident, checkAgainstZkBeforeDeletion.get()); + } + + @Override + protected void addOrUpdateToken(AbstractDelegationTokenIdentifier ident, + DelegationTokenInformation info, boolean isUpdate) throws Exception { + // Store the data in local memory first + currentTokens.put(ident, info); + super.addOrUpdateToken(ident, info, isUpdate); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/security/token/TestZKDelegationTokenSecretManagerImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/security/token/TestZKDelegationTokenSecretManagerImpl.java new file mode 100644 index 0000000000000..8e91aec2b4b6f --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/security/token/TestZKDelegationTokenSecretManagerImpl.java @@ -0,0 +1,216 @@ +package org.apache.hadoop.hdfs.server.federation.security.token; + +import static org.apache.hadoop.hdfs.server.federation.router.security.token.ZKDelegationTokenSecretManagerImpl.ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL; +import static org.apache.hadoop.security.token.delegation.ZKDelegationTokenSecretManager.ZK_DTSM_TOKEN_WATCHER_ENABLED; +import static org.apache.hadoop.security.token.delegation.web.DelegationTokenManager.REMOVAL_SCAN_INTERVAL; +import static org.apache.hadoop.security.token.delegation.web.DelegationTokenManager.RENEW_INTERVAL; +import static org.junit.Assert.fail; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.server.federation.router.security.token.ZKDelegationTokenSecretManagerImpl; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.SecretManager; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.delegation.TestZKDelegationTokenSecretManager; +import org.apache.hadoop.security.token.delegation.web.DelegationTokenIdentifier; +import org.apache.hadoop.security.token.delegation.web.DelegationTokenManager; +import org.apache.hadoop.util.Time; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestZKDelegationTokenSecretManagerImpl + extends TestZKDelegationTokenSecretManager { + private static final Logger LOG = + LoggerFactory.getLogger(TestZKDelegationTokenSecretManagerImpl.class); + + @SuppressWarnings("unchecked") + @Test + public void testMultiNodeOperationWithoutWatch() throws Exception { + String connectString = zkServer.getConnectString(); + Configuration conf = getSecretConf(connectString); + // disable watch + conf.setBoolean(ZK_DTSM_TOKEN_WATCHER_ENABLED, false); + conf.setInt(ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL, 3); + + for (int i = 0; i < TEST_RETRIES; i++) { + ZKDelegationTokenSecretManagerImpl dtsm1 = + new ZKDelegationTokenSecretManagerImpl(conf); + ZKDelegationTokenSecretManagerImpl dtsm2 = + new ZKDelegationTokenSecretManagerImpl(conf); + DelegationTokenManager tm1, tm2; + tm1 = new DelegationTokenManager(conf, new Text("bla")); + tm1.setExternalDelegationTokenSecretManager(dtsm1); + tm2 = new DelegationTokenManager(conf, new Text("bla")); + tm2.setExternalDelegationTokenSecretManager(dtsm2); + + // common token operation without watchers should still be working + Token token = + (Token) tm1.createToken( + UserGroupInformation.getCurrentUser(), "foo"); + Assert.assertNotNull(token); + tm2.verifyToken(token); + tm2.renewToken(token, "foo"); + tm1.verifyToken(token); + tm1.cancelToken(token, "foo"); + try { + verifyTokenFail(tm2, token); + fail("Expected InvalidToken"); + } catch (SecretManager.InvalidToken it) { + // Ignore + } + + token = (Token) tm2.createToken( + UserGroupInformation.getCurrentUser(), "bar"); + Assert.assertNotNull(token); + tm1.verifyToken(token); + tm1.renewToken(token, "bar"); + tm2.verifyToken(token); + tm2.cancelToken(token, "bar"); + try { + verifyTokenFail(tm1, token); + fail("Expected InvalidToken"); + } catch (SecretManager.InvalidToken it) { + // Ignore + } + + dtsm1.stopThreads(); + dtsm2.stopThreads(); + verifyDestroy(tm1, conf); + verifyDestroy(tm2, conf); + } + } + + @Test + public void testMultiNodeTokenRemovalShortSyncWithoutWatch() + throws Exception { + String connectString = zkServer.getConnectString(); + Configuration conf = getSecretConf(connectString); + // disable watch + conf.setBoolean(ZK_DTSM_TOKEN_WATCHER_ENABLED, false); + // make sync quick + conf.setInt(ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL, 3); + // set the renew window and removal interval to be a + // short time to trigger the background cleanup + conf.setInt(RENEW_INTERVAL, 10); + conf.setInt(REMOVAL_SCAN_INTERVAL, 10); + + for (int i = 0; i < TEST_RETRIES; i++) { + ZKDelegationTokenSecretManagerImpl dtsm1 = + new ZKDelegationTokenSecretManagerImpl(conf); + ZKDelegationTokenSecretManagerImpl dtsm2 = + new ZKDelegationTokenSecretManagerImpl(conf); + DelegationTokenManager tm1, tm2; + tm1 = new DelegationTokenManager(conf, new Text("bla")); + tm1.setExternalDelegationTokenSecretManager(dtsm1); + tm2 = new DelegationTokenManager(conf, new Text("bla")); + tm2.setExternalDelegationTokenSecretManager(dtsm2); + + // time: X + // token expiry time: + // tm1: X + 10 + // tm2: X + 10 + Token token = + (Token) tm1.createToken( + UserGroupInformation.getCurrentUser(), "foo"); + Assert.assertNotNull(token); + tm2.verifyToken(token); + + // time: X + 9 + // token expiry time: + // tm1: X + 10 + // tm2: X + 19 + Thread.sleep(9 * 1000); + tm2.renewToken(token, "foo"); + tm1.verifyToken(token); + + // time: X + 13 + // token expiry time: (sync happened) + // tm1: X + 19 + // tm2: X + 19 + Thread.sleep(4 * 1000); + tm1.verifyToken(token); + tm2.verifyToken(token); + + dtsm1.stopThreads(); + dtsm2.stopThreads(); + verifyDestroy(tm1, conf); + verifyDestroy(tm2, conf); + } + } + + // This is very unlikely to happen in real case, but worth putting + // the case out + @Test + public void testMultiNodeTokenRemovalLongSyncWithoutWatch() + throws Exception { + String connectString = zkServer.getConnectString(); + Configuration conf = getSecretConf(connectString); + // disable watch + conf.setBoolean(ZK_DTSM_TOKEN_WATCHER_ENABLED, false); + // make sync quick + conf.setInt(ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL, 20); + // set the renew window and removal interval to be a + // short time to trigger the background cleanup + conf.setInt(RENEW_INTERVAL, 10); + conf.setInt(REMOVAL_SCAN_INTERVAL, 10); + + for (int i = 0; i < TEST_RETRIES; i++) { + ZKDelegationTokenSecretManagerImpl dtsm1 = + new ZKDelegationTokenSecretManagerImpl(conf); + ZKDelegationTokenSecretManagerImpl dtsm2 = + new ZKDelegationTokenSecretManagerImpl(conf); + ZKDelegationTokenSecretManagerImpl dtsm3 = + new ZKDelegationTokenSecretManagerImpl(conf); + DelegationTokenManager tm1, tm2, tm3; + tm1 = new DelegationTokenManager(conf, new Text("bla")); + tm1.setExternalDelegationTokenSecretManager(dtsm1); + tm2 = new DelegationTokenManager(conf, new Text("bla")); + tm2.setExternalDelegationTokenSecretManager(dtsm2); + tm3 = new DelegationTokenManager(conf, new Text("bla")); + tm3.setExternalDelegationTokenSecretManager(dtsm3); + + // time: X + // token expiry time: + // tm1: X + 10 + // tm2: X + 10 + // tm3: No token due to no sync + Token token = + (Token) tm1.createToken( + UserGroupInformation.getCurrentUser(), "foo"); + Assert.assertNotNull(token); + tm2.verifyToken(token); + + // time: X + 9 + // token expiry time: + // tm1: X + 10 + // tm2: X + 19 + // tm3: No token due to no sync + Thread.sleep(9 * 1000); + long renewalTime = tm2.renewToken(token, "foo"); + LOG.info("Renew for token {} at current time {} renewal time {}", + token.getIdentifier(), Time.formatTime(Time.now()), + Time.formatTime(renewalTime)); + tm1.verifyToken(token); + + // time: X + 13 + // token expiry time: (sync din't happen) + // tm1: X + 10 + // tm2: X + 19 + // tm3: X + 19 due to fetch from zk + Thread.sleep(4 * 1000); + tm2.verifyToken(token); + tm3.verifyToken(token); + + dtsm1.stopThreads(); + dtsm2.stopThreads(); + dtsm3.stopThreads(); + verifyDestroy(tm1, conf); + verifyDestroy(tm2, conf); + verifyDestroy(tm3, conf); + } + } + +} From 38dc294ecc480b29c0d7096b4788c1501866460a Mon Sep 17 00:00:00 2001 From: Fengnan Li Date: Thu, 18 Jun 2020 22:14:34 -0700 Subject: [PATCH 2/2] Address comments --- .../ZKDelegationTokenSecretManager.java | 6 ++++-- .../ZKDelegationTokenSecretManagerImpl.java | 9 +++------ ...TestZKDelegationTokenSecretManagerImpl.java | 18 ++++++++++++++++++ 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java index 4837da92f4843..f50035d03773e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java @@ -80,7 +80,7 @@ public abstract class ZKDelegationTokenSecretManager extends AbstractDelegationTokenSecretManager { - private static final String ZK_CONF_PREFIX = "zk-dt-secret-manager."; + public static final String ZK_CONF_PREFIX = "zk-dt-secret-manager."; public static final String ZK_DTSM_ZK_NUM_RETRIES = ZK_CONF_PREFIX + "zkNumRetries"; public static final String ZK_DTSM_ZK_SESSION_TIMEOUT = ZK_CONF_PREFIX @@ -483,7 +483,7 @@ private void processKeyRemoved(String path) { } } - protected void processTokenAddOrUpdate(byte[] data) throws IOException { + protected TokenIdent processTokenAddOrUpdate(byte[] data) throws IOException { ByteArrayInputStream bin = new ByteArrayInputStream(data); DataInputStream din = new DataInputStream(bin); TokenIdent ident = createIdentifier(); @@ -496,7 +496,9 @@ protected void processTokenAddOrUpdate(byte[] data) throws IOException { DelegationTokenInformation tokenInfo = new DelegationTokenInformation(renewDate, password); currentTokens.put(ident, tokenInfo); + return ident; } + return null; } private void processTokenRemoved(ChildData data) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/ZKDelegationTokenSecretManagerImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/ZKDelegationTokenSecretManagerImpl.java index e29e70593c114..2d55026c807af 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/ZKDelegationTokenSecretManagerImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/security/token/ZKDelegationTokenSecretManagerImpl.java @@ -47,7 +47,7 @@ public class ZKDelegationTokenSecretManagerImpl extends ZKDelegationTokenSecretManager { public static final String ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL = - "zk-dt-secret-manager.router.token.sync.interval"; + ZK_CONF_PREFIX + "router.token.sync.interval"; public static final int ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL_DEFAULT = 5; private static final Logger LOG = @@ -58,7 +58,7 @@ public class ZKDelegationTokenSecretManagerImpl extends private final ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor(); - // Local cache of delegation tokens, used for depracating tokens from + // Local cache of delegation tokens, used for deprecating tokens from // currentTokenMap private final Set localTokenCache = new HashSet<>(); @@ -182,11 +182,8 @@ private void rebuildTokenCache(boolean initial) throws IOException { throw new IOException(ex); } // Store data to currentTokenMap - processTokenAddOrUpdate(data); + AbstractDelegationTokenIdentifier ident = processTokenAddOrUpdate(data); // Store data to localTokenCache for sync - AbstractDelegationTokenIdentifier ident = createIdentifier(); - DataInputStream din = new DataInputStream(new ByteArrayInputStream(data)); - ident.readFields(din); localTokenCache.add(ident); } if (!initial) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/security/token/TestZKDelegationTokenSecretManagerImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/security/token/TestZKDelegationTokenSecretManagerImpl.java index 8e91aec2b4b6f..3c7f8e88a91d1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/security/token/TestZKDelegationTokenSecretManagerImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/security/token/TestZKDelegationTokenSecretManagerImpl.java @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.hdfs.server.federation.security.token; import static org.apache.hadoop.hdfs.server.federation.router.security.token.ZKDelegationTokenSecretManagerImpl.ZK_DTSM_ROUTER_TOKEN_SYNC_INTERVAL;