@@ -72,14 +72,14 @@ public static void recoverFileLease(FileSystem fs, Path p, Configuration conf,
7272 * file's primary node. If all is well, it should return near immediately. But, as is common, it
7373 * is the very primary node that has crashed and so the namenode will be stuck waiting on a socket
7474 * timeout before it will ask another datanode to start the recovery. It does not help if we call
75- * recoverLease in the meantime and in particular, subsequent to the socket timeout, a
76- * recoverLease invocation will cause us to start over from square one (possibly waiting on socket
77- * timeout against primary node). So, in the below, we do the following: 1. Call recoverLease. 2.
78- * If it returns true, break. 3. If it returns false, wait a few seconds and then call it again.
79- * 4. If it returns true, break. 5. If it returns false, wait for what we think the datanode
80- * socket timeout is (configurable) and then try again. 6. If it returns true, break. 7. If it
81- * returns false, repeat starting at step 5. above. If HDFS-4525 is available, call it every
82- * second and we might be able to exit early.
75+ * recoverLease in the meantime and in particular, after the socket timeout, a recoverLease
76+ * invocation will cause us to start over from square one (possibly waiting on socket timeout
77+ * against primary node). So, in the below, we do the following: 1. Call recoverLease. 2. If it
78+ * returns true, break. 3. If it returns false, wait a few seconds and then call it again. 4. If
79+ * it returns true, break. 5. If it returns false, wait for what we think the datanode socket
80+ * timeout is (configurable) and then try again. 6. If it returns true, break. 7. If it returns
81+ * false, repeat starting at step 5. above. If HDFS-4525 is available, call it every second, and
82+ * we might be able to exit early.
8383 */
8484 private static boolean recoverDFSFileLease (final DistributedFileSystem dfs , final Path p ,
8585 final Configuration conf , final CancelableProgressable reporter ) throws IOException {
@@ -89,10 +89,10 @@ private static boolean recoverDFSFileLease(final DistributedFileSystem dfs, fina
8989 // usually needs 10 minutes before marking the nodes as dead. So we're putting ourselves
9090 // beyond that limit 'to be safe'.
9191 long recoveryTimeout = conf .getInt ("hbase.lease.recovery.timeout" , 900000 ) + startWaiting ;
92- // This setting should be a little bit above what the cluster dfs heartbeat is set to.
92+ // This setting should be a little above what the cluster dfs heartbeat is set to.
9393 long firstPause = conf .getInt ("hbase.lease.recovery.first.pause" , 4000 );
9494 // This should be set to how long it'll take for us to timeout against primary datanode if it
95- // is dead. We set it to 64 seconds, 4 second than the default READ_TIMEOUT in HDFS, the
95+ // is dead. We set it to 64 seconds, 4 seconds than the default READ_TIMEOUT in HDFS, the
9696 // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY. If recovery is still failing after this
9797 // timeout, then further recovery will take liner backoff with this base, to avoid endless
9898 // preemptions when this value is not properly configured.
@@ -118,7 +118,7 @@ private static boolean recoverDFSFileLease(final DistributedFileSystem dfs, fina
118118 Thread .sleep (firstPause );
119119 } else {
120120 // Cycle here until (subsequentPause * nbAttempt) elapses. While spinning, check
121- // isFileClosed if available (should be in hadoop 2.0.5... not in hadoop 1 though.
121+ // isFileClosed if available (should be in hadoop 2.0.5... not in hadoop 1 though) .
122122 long localStartWaiting = EnvironmentEdgeManager .currentTime ();
123123 while (
124124 (EnvironmentEdgeManager .currentTime () - localStartWaiting )
0 commit comments