From 253e64a379a879f52e84a6076468b498a2ac3b20 Mon Sep 17 00:00:00 2001 From: PreetPatel45 <7433preet@gmail.com> Date: Sat, 29 Mar 2025 13:52:33 -0300 Subject: [PATCH 1/6] Implemented Introduce Explaining Variables and Decomposing the Conditional refactoring --- .../apache/hbase/FsRegionsMetaRecoverer.java | 56 ++++++++++++------- .../src/main/java/org/apache/hbase/HBCK2.java | 2 +- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/hbase-hbck2/src/main/java/org/apache/hbase/FsRegionsMetaRecoverer.java b/hbase-hbck2/src/main/java/org/apache/hbase/FsRegionsMetaRecoverer.java index 1722533831..ed1cb3e976 100644 --- a/hbase-hbck2/src/main/java/org/apache/hbase/FsRegionsMetaRecoverer.java +++ b/hbase-hbck2/src/main/java/org/apache/hbase/FsRegionsMetaRecoverer.java @@ -191,46 +191,60 @@ Map> reportTablesRegions(final List namespacesOrTable } List>> processRegionsMetaCleanup( - ExecFunction>, List> reportFunction, - ExecFunction, List> execFunction, List nameSpaceOrTable) - throws IOException { - ExecutorService executorService = Executors.newFixedThreadPool((nameSpaceOrTable == null - || nameSpaceOrTable.size() > Runtime.getRuntime().availableProcessors()) - ? Runtime.getRuntime().availableProcessors() - : nameSpaceOrTable.size()); + ExecFunction>, List> reportFunction, + ExecFunction, List> execFunction, List nameSpaceOrTable) + throws IOException { + + // Determine the number of available processors + int availableProcessors = Runtime.getRuntime().availableProcessors(); + + // Decide on the thread pool size based on the provided list size + int threadPoolSize; + if (nameSpaceOrTable == null || nameSpaceOrTable.size() > availableProcessors) { + threadPoolSize = availableProcessors; + } else { + threadPoolSize = nameSpaceOrTable.size(); + } + + // Create the executor service using the determined thread pool size + ExecutorService executorService = Executors.newFixedThreadPool(threadPoolSize); List>> futures = - new ArrayList<>(nameSpaceOrTable == null ? 1 : nameSpaceOrTable.size()); + new ArrayList<>(nameSpaceOrTable == null ? 1 : nameSpaceOrTable.size()); + try { try (final Admin admin = conn.getAdmin()) { Map> report = reportFunction.execute(nameSpaceOrTable); - if (report.size() < 1) { + if (report.isEmpty()) { LOG.info("\nNo mismatches found in meta. Worth using related reporting function " - + "first.\nYou are likely passing non-existent " - + "namespace or table. Note that table names should include the namespace " - + "portion even for tables in the default namespace. " - + "See also the command usage.\n"); + + "first.\nYou are likely passing non-existent " + + "namespace or table. Note that table names should include the namespace " + + "portion even for tables in the default namespace. " + + "See also the command usage.\n"); } for (TableName tableName : report.keySet()) { if (admin.tableExists(tableName)) { futures.add(executorService.submit(new Callable>() { @Override public List call() throws Exception { - LOG.debug("running thread for {}", tableName.getNameWithNamespaceInclAsString()); + LOG.debug("running thread for {}", + tableName.getNameWithNamespaceInclAsString()); return execFunction.execute(report.get(tableName)); } })); } else { LOG.warn("Table {} does not exist! Skipping...", - tableName.getNameWithNamespaceInclAsString()); + tableName.getNameWithNamespaceInclAsString()); } } - boolean allDone; - do { - allDone = true; - for (Future> f : futures) { - allDone &= f.isDone(); + + // Wait for each future to complete. + for (Future> future : futures) { + try { + future.get(); + } catch (Exception e) { + LOG.error("Exception while waiting for future completion", e); } - } while (!allDone); + } } } finally { executorService.shutdown(); diff --git a/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java b/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java index c569d2773e..9cd1b34f22 100644 --- a/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java +++ b/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java @@ -1647,5 +1647,5 @@ static int getBatchSize(Option batchOpt, CommandLine commandLine) LOG.info("Batch size set to: " + batchSize); return batchSize; } - } + From a2cfa7063b7ecfa4bba49a9c62c685a5f66c6688 Mon Sep 17 00:00:00 2001 From: PreetPatel45 <7433preet@gmail.com> Date: Sat, 29 Mar 2025 16:32:00 -0300 Subject: [PATCH 2/6] Implemented extract class refactoring --- hbase-hbck2/pom.xml | 2 +- .../src/main/java/org/apache/hbase/HBCK2.java | 450 ++---------------- .../org/apache/hbase/HBCK2CommandUsage.java | 371 +++++++++++++++ 3 files changed, 417 insertions(+), 406 deletions(-) create mode 100644 hbase-hbck2/src/main/java/org/apache/hbase/HBCK2CommandUsage.java diff --git a/hbase-hbck2/pom.xml b/hbase-hbck2/pom.xml index 1d572253fe..2f75d6651e 100644 --- a/hbase-hbck2/pom.xml +++ b/hbase-hbck2/pom.xml @@ -258,7 +258,7 @@ - + org.apache.maven.pluginsmaven-compiler-plugin88 diff --git a/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java b/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java index 9cd1b34f22..7b72dacc21 100644 --- a/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java +++ b/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java @@ -96,19 +96,19 @@ public class HBCK2 extends Configured implements org.apache.hadoop.util.Tool { private static final char META_REPLICA_ID_DELIMITER = '_'; // Commands - private static final String SET_TABLE_STATE = "setTableState"; - private static final String ASSIGNS = "assigns"; - private static final String UNASSIGNS = "unassigns"; - private static final String BYPASS = "bypass"; - private static final String FILESYSTEM = "filesystem"; - private static final String REPLICATION = "replication"; + static final String SET_TABLE_STATE = "setTableState"; + static final String ASSIGNS = "assigns"; + static final String UNASSIGNS = "unassigns"; + static final String BYPASS = "bypass"; + static final String FILESYSTEM = "filesystem"; + static final String REPLICATION = "replication"; private static final String VERSION = "version"; - private static final String SET_REGION_STATE = "setRegionState"; - private static final String SCHEDULE_RECOVERIES = "scheduleRecoveries"; - private static final String RECOVER_UNKNOWN = "recoverUnknown"; - private static final String GENERATE_TABLE_INFO = "generateMissingTableDescriptorFile"; - private static final String FIX_META = "fixMeta"; - private static final String REGIONINFO_MISMATCH = "regionInfoMismatch"; + static final String SET_REGION_STATE = "setRegionState"; + static final String SCHEDULE_RECOVERIES = "scheduleRecoveries"; + static final String RECOVER_UNKNOWN = "recoverUnknown"; + static final String GENERATE_TABLE_INFO = "generateMissingTableDescriptorFile"; + static final String FIX_META = "fixMeta"; + static final String REGIONINFO_MISMATCH = "regionInfoMismatch"; // TODO update this map in case of the name of a method changes in Hbck interface // in org.apache.hadoop.hbase.client package. Or a new command is added and the hbck command // does not equals to the method name in Hbck interface. @@ -123,9 +123,9 @@ public class HBCK2 extends Configured implements org.apache.hadoop.util.Tool { } }); - private static final String ADD_MISSING_REGIONS_IN_META_FOR_TABLES = "addFsRegionsMissingInMeta"; - private static final String REPORT_MISSING_REGIONS_IN_META = "reportMissingRegionsInMeta"; - private static final String EXTRA_REGIONS_IN_META = "extraRegionsInMeta"; + static final String ADD_MISSING_REGIONS_IN_META_FOR_TABLES = "addFsRegionsMissingInMeta"; + static final String REPORT_MISSING_REGIONS_IN_META = "reportMissingRegionsInMeta"; + static final String EXTRA_REGIONS_IN_META = "extraRegionsInMeta"; private Configuration conf; static final String[] MINIMUM_HBCK2_VERSION = { "2.0.3", "2.1.1", "2.2.0", "3.0.0" }; @@ -642,399 +642,39 @@ private static String getCommandUsage() { StringWriter sw = new StringWriter(); PrintWriter writer = new PrintWriter(sw); writer.println("Command:"); - usageAddFsRegionsMissingInMeta(writer); + HBCK2CommandUsage.usageAddFsRegionsMissingInMeta(writer); writer.println(); - usageAssigns(writer); + HBCK2CommandUsage.usageAssigns(writer); writer.println(); - usageBypass(writer); + HBCK2CommandUsage.usageBypass(writer); writer.println(); - usageExtraRegionsInMeta(writer); + HBCK2CommandUsage.usageExtraRegionsInMeta(writer); writer.println(); - usageFilesystem(writer); + HBCK2CommandUsage.usageFilesystem(writer); writer.println(); - usageFixMeta(writer); + HBCK2CommandUsage.usageFixMeta(writer); writer.println(); - usageGenerateMissingTableInfo(writer); + HBCK2CommandUsage.usageGenerateMissingTableInfo(writer); writer.println(); - usageRecoverUnknown(writer); + HBCK2CommandUsage.usageRecoverUnknown(writer); writer.println(); - usageRegioninfoMismatch(writer); + HBCK2CommandUsage.usageRegioninfoMismatch(writer); writer.println(); - usageReplication(writer); + HBCK2CommandUsage.usageReplication(writer); writer.println(); - usageReportMissingRegionsInMeta(writer); + HBCK2CommandUsage.usageReportMissingRegionsInMeta(writer); writer.println(); - usageSetRegionState(writer); + HBCK2CommandUsage.usageSetRegionState(writer); writer.println(); - usageSetTableState(writer); + HBCK2CommandUsage.usageSetTableState(writer); writer.println(); - usageScheduleRecoveries(writer); + HBCK2CommandUsage.usageScheduleRecoveries(writer); writer.println(); - usageUnassigns(writer); + HBCK2CommandUsage.usageUnassigns(writer); writer.println(); writer.close(); return sw.toString(); } - - private static void usageAddFsRegionsMissingInMeta(PrintWriter writer) { - writer.println(" " + ADD_MISSING_REGIONS_IN_META_FOR_TABLES + " [OPTIONS]"); - writer.println(" [...|-i ...]"); - writer.println(" Options:"); - writer.println(" -i,--inputFiles take one or more files of namespace or table names"); - writer.println(" -o,--outputFile name/prefix of the file(s) to dump region names"); - writer.println(" -n,--numLines number of lines to be written to each output file"); - writer.println(" To be used when regions missing from hbase:meta but directories"); - writer.println(" are present still in HDFS. Can happen if user has run _hbck1_"); - writer.println(" 'OfflineMetaRepair' against an hbase-2.x cluster. Needs hbase:meta"); - writer.println(" to be online. For each table name passed as parameter, performs diff"); - writer.println(" between regions available in hbase:meta and region dirs on HDFS."); - writer.println(" Then for dirs with no hbase:meta matches, it reads the 'regioninfo'"); - writer.println(" metadata file and re-creates given region in hbase:meta. Regions are"); - writer.println(" re-created in 'CLOSED' state in the hbase:meta table, but not in the"); - writer.println(" Masters' cache, and they are not assigned either. To get these"); - writer.println(" regions online, run the HBCK2 'assigns'command printed when this"); - writer.println(" command-run completes."); - writer.println(" NOTE: If using hbase releases older than 2.3.0, a rolling restart of"); - writer.println(" HMasters is needed prior to executing the set of 'assigns' output."); - writer.println(" An example adding missing regions for tables 'tbl_1' in the default"); - writer.println(" namespace, 'tbl_2' in namespace 'n1' and for all tables from"); - writer.println(" namespace 'n2':"); - writer.println( - " $ HBCK2 " + ADD_MISSING_REGIONS_IN_META_FOR_TABLES + " default:tbl_1 n1:tbl_2 n2"); - writer.println(" Returns HBCK2 an 'assigns' command with all re-inserted regions."); - writer.println(" SEE ALSO: " + REPORT_MISSING_REGIONS_IN_META); - writer.println(" SEE ALSO: " + FIX_META); - writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); - writer.println(" Each file contains , one per line."); - writer.println(" For example:"); - writer.println( - " $ HBCK2 " + ADD_MISSING_REGIONS_IN_META_FOR_TABLES + " -i fileName1 fileName2"); - writer.println(" If -o or --outputFile is specified, the output file(s) can be passed as"); - writer.println(" input to assigns command via -i or -inputFiles option."); - writer.println(" If -n or --numLines is specified, and say it is set to 100, this will"); - writer.println(" create files with prefix as value passed by -o or --outputFile option."); - writer.println(" Each file will have 100 region names (max.), one per line."); - writer.println(" For example:"); - writer.println( - " $ HBCK2 " + ADD_MISSING_REGIONS_IN_META_FOR_TABLES + " -o outputFilePrefix -n 100"); - writer.println(" -i fileName1 fileName2"); - writer.println(" But if -n is not specified, but -o is specified, it will dump all"); - writer.println(" region names in a single file, one per line."); - writer.println(" NOTE: -n option is applicable only if -o option is specified."); - } - - private static void usageAssigns(PrintWriter writer) { - writer.println(" " + ASSIGNS + " [OPTIONS] [...|-i ...]"); - writer.println(" Options:"); - writer.println(" -o,--override override ownership by another procedure"); - writer.println(" -i,--inputFiles take one or more files of encoded region names"); - writer.println(" -b,--batchSize number of regions to process in a batch"); - writer.println(" A 'raw' assign that can be used even during Master initialization (if"); - writer.println(" the -skip flag is specified). Skirts Coprocessors. Pass one or more"); - writer.println(" encoded region names. 1588230740 is the hard-coded name for the"); - writer.println(" hbase:meta region and de00010733901a05f5a2a3a382e27dd4 is an example of"); - writer.println(" what a user-space encoded region name looks like. For example:"); - writer.println(" $ HBCK2 " + ASSIGNS + " 1588230740 de00010733901a05f5a2a3a382e27dd4"); - writer.println(" Returns the pid(s) of the created AssignProcedure(s) or -1 if none."); - writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); - writer.println(" Each file contains encoded region names, one per line. For example:"); - writer.println(" $ HBCK2 " + ASSIGNS + " -i fileName1 fileName2"); - writer.println(" If -b or --batchSize is specified, the command processes those many"); - writer.println(" regions at a time in a batch-ed manner; Consider using this option,"); - writer.println(" if the list of regions is huge, to avoid CallTimeoutException."); - writer.println(" For example:"); - writer.println(" $ HBCK2 " + ASSIGNS + " -i fileName1 fileName2 -b 500"); - writer.println(" By default, batchSize is set to -1 i.e. no batching is done."); - } - - private static void usageBypass(PrintWriter writer) { - writer.println(" " + BYPASS + " [OPTIONS] [...|-i ...]"); - writer.println(" Options:"); - writer.println(" -o,--override override if procedure is running/stuck"); - writer.println(" -r,--recursive bypass parent and its children. SLOW! EXPENSIVE!"); - writer.println(" -w,--lockWait milliseconds to wait before giving up; default=1"); - writer.println(" -i,--inputFiles take one or more input files of PID's"); - writer.println(" -b,--batchSize number of procedures to process in a batch"); - writer.println(" Pass one (or more) procedure 'pid's to skip to procedure finish. Parent"); - writer.println(" of bypassed procedure will also be skipped to the finish. Entities will"); - writer.println(" be left in an inconsistent state and will require manual fixup. May"); - writer.println(" need Master restart to clear locks still held. Bypass fails if"); - writer.println(" procedure has children. Add 'recursive' if all you have is a parent pid"); - writer.println(" to finish parent and children. This is SLOW, and dangerous so use"); - writer.println(" selectively. Does not always work."); - writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); - writer.println(" Each file contains PID's, one per line. For example:"); - writer.println(" $ HBCK2 " + BYPASS + " -i fileName1 fileName2"); - writer.println(" If -b or --batchSize is specified, the command processes those many"); - writer.println(" procedures at a time in a batch-ed manner; Consider using this option,"); - writer.println(" if the list of procedures is huge, to avoid CallTimeoutException."); - writer.println(" For example:"); - writer.println(" $ HBCK2 " + BYPASS + " -i fileName1 fileName2 -b 500"); - writer.println(" By default, batchSize is set to -1 i.e. no batching is done."); - } - - private static void usageFilesystem(PrintWriter writer) { - writer.println(" " + FILESYSTEM + " [OPTIONS] [...|-i ...]"); - writer.println(" Options:"); - writer.println(" -f, --fix sideline corrupt hfiles, bad links, and references."); - writer.println(" -i,--inputFiles take one or more input files of table names"); - writer.println(" Report on corrupt hfiles, references, broken links, and integrity."); - writer.println(" Pass '--fix' to sideline corrupt files and links. '--fix' does NOT"); - writer.println(" fix integrity issues; i.e. 'holes' or 'orphan' regions. Pass one or"); - writer.println(" more tablenames to narrow checkup. Default checks all tables and"); - writer.println(" restores 'hbase.version' if missing. Interacts with the filesystem"); - writer.println(" only! Modified regions need to be reopened to pick-up changes."); - writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); - writer.println(" Each file contains table names, one per line. For example:"); - writer.println(" $ HBCK2 " + FILESYSTEM + " -i fileName1 fileName2"); - } - - private static void usageFixMeta(PrintWriter writer) { - writer.println(" " + FIX_META); - writer.println(" Do a server-side fix of bad or inconsistent state in hbase:meta."); - writer.println(" Available in hbase 2.2.1/2.1.6 or newer versions. Master UI has"); - writer.println(" matching, new 'HBCK Report' tab that dumps reports generated by"); - writer.println(" most recent run of _catalogjanitor_ and a new 'HBCK Chore'. It"); - writer.println(" is critical that hbase:meta first be made healthy before making"); - writer.println(" any other repairs. Fixes 'holes', 'overlaps', etc., creating"); - writer.println(" (empty) region directories in HDFS to match regions added to"); - writer.println(" hbase:meta. Command is NOT the same as the old _hbck1_ command"); - writer.println(" named similarily. Works against the reports generated by the last"); - writer.println(" catalog_janitor and hbck chore runs. If nothing to fix, run is a"); - writer.println(" noop. Otherwise, if 'HBCK Report' UI reports problems, a run of"); - writer.println(" " + FIX_META + " will clear up hbase:meta issues. See 'HBase HBCK' UI"); - writer.println(" for how to generate new report."); - writer.println(" SEE ALSO: " + REPORT_MISSING_REGIONS_IN_META); - } - - private static void usageGenerateMissingTableInfo(PrintWriter writer) { - writer.println(" " + GENERATE_TABLE_INFO + " [OPTIONS] [...]"); - writer.println(" Trying to fix an orphan table by generating a missing table descriptor"); - writer.println(" file. This command will have no effect if the table folder is missing"); - writer.println(" or if the .tableinfo is present (we don't override existing table"); - writer.println(" descriptors). This command will first check it the TableDescriptor is"); - writer.println(" cached in HBase Master in which case it will recover the .tableinfo"); - writer.println(" accordingly. If TableDescriptor is not cached in master then it will"); - writer.println(" create a default .tableinfo file with the following items:"); - writer.println(" - the table name"); - writer.println(" - the column family list determined based on the file system"); - writer.println(" - the default properties for both TableDescriptor and"); - writer.println(" ColumnFamilyDescriptors"); - writer.println(" If the .tableinfo file was generated using default parameters then"); - writer.println(" make sure you check the table / column family properties later (and"); - writer.println(" change them if needed)."); - writer.println(" This method does not change anything in HBase, only writes the new"); - writer.println(" .tableinfo file to the file system. Orphan tables can cause e.g."); - writer.println(" ServerCrashProcedures to stuck, you might need to fix these still"); - writer.println(" after you generated the missing table info files. If no tables are "); - writer.println(" specified, .tableinfo will be generated for all missing table "); - writer.println(" descriptors."); - } - - private static void usageReplication(PrintWriter writer) { - writer.println(" " + REPLICATION + " [OPTIONS] [...|-i ...]"); - writer.println(" Options:"); - writer.println(" -f, --fix fix any replication issues found."); - writer.println(" -i,--inputFiles take one or more input files of table names"); - writer.println(" Looks for undeleted replication queues and deletes them if passed the"); - writer.println(" '--fix' option. Pass a table name to check for replication barrier and"); - writer.println(" purge if '--fix'."); - writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); - writer.println(" Each file contains , one per line. For example:"); - writer.println(" $ HBCK2 " + REPLICATION + " -i fileName1 fileName2"); - } - - private static void usageExtraRegionsInMeta(PrintWriter writer) { - writer.println(" " + EXTRA_REGIONS_IN_META + " [...|"); - writer.println(" -i ...]"); - writer.println(" Options:"); - writer.println(" -f, --fix fix meta by removing all extra regions found."); - writer.println(" -i,--inputFiles take one or more input files of namespace or"); - writer.println(" table names"); - writer.println(" Reports regions present on hbase:meta, but with no related "); - writer.println(" directories on the file system. Needs hbase:meta to be online. "); - writer.println(" For each table name passed as parameter, performs diff"); - writer.println(" between regions available in hbase:meta and region dirs on the given"); - writer.println(" file system. Extra regions would get deleted from Meta "); - writer.println(" if passed the --fix option. "); - writer.println(" NOTE: Before deciding on use the \"--fix\" option, it's worth check if"); - writer.println(" reported extra regions are overlapping with existing valid regions."); - writer.println(" If so, then \"extraRegionsInMeta --fix\" is indeed the optimal solution. "); - writer.println(" Otherwise, \"assigns\" command is the simpler solution, as it recreates "); - writer.println(" regions dirs in the filesystem, if not existing."); - writer.println(" An example triggering extra regions report for tables 'table_1'"); - writer.println(" and 'table_2', under default namespace:"); - writer.println(" $ HBCK2 " + EXTRA_REGIONS_IN_META + " default:table_1 default:table_2"); - writer.println(" An example triggering extra regions report for table 'table_1'"); - writer.println(" under default namespace, and for all tables from namespace 'ns1':"); - writer.println(" $ HBCK2 " + EXTRA_REGIONS_IN_META + " default:table_1 ns1"); - writer.println(" Returns list of extra regions for each table passed as parameter, or"); - writer.println(" for each table on namespaces specified as parameter."); - writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); - writer.println(" Each file contains , one per line."); - writer.println(" For example:"); - writer.println(" $ HBCK2 " + EXTRA_REGIONS_IN_META + " -i fileName1 fileName2"); - } - - private static void usageReportMissingRegionsInMeta(PrintWriter writer) { - writer.println(" " + REPORT_MISSING_REGIONS_IN_META + " [...|"); - writer.println(" -i ...]"); - writer.println(" Options:"); - writer.println(" -i,--inputFiles take one or more files of namespace or table names"); - writer.println(" To be used when regions missing from hbase:meta but directories"); - writer.println(" are present still in HDFS. Can happen if user has run _hbck1_"); - writer.println(" 'OfflineMetaRepair' against an hbase-2.x cluster. This is a CHECK only"); - writer.println(" method, designed for reporting purposes and doesn't perform any"); - writer.println(" fixes, providing a view of which regions (if any) would get re-added"); - writer.println(" to hbase:meta, grouped by respective table/namespace. To effectively"); - writer - .println(" re-add regions in meta, run " + ADD_MISSING_REGIONS_IN_META_FOR_TABLES + "."); - writer.println(" This command needs hbase:meta to be online. For each namespace/table"); - writer.println(" passed as parameter, it performs a diff between regions available in"); - writer.println(" hbase:meta against existing regions dirs on HDFS. Region dirs with no"); - writer.println(" matches are printed grouped under its related table name. Tables with"); - writer.println(" no missing regions will show a 'no missing regions' message. If no"); - writer.println(" namespace or table is specified, it will verify all existing regions."); - writer.println(" It accepts a combination of multiple namespace and tables. Table names"); - writer.println(" should include the namespace portion, even for tables in the default"); - writer.println(" namespace, otherwise it will assume as a namespace value."); - writer.println(" An example triggering missing regions report for tables 'table_1'"); - writer.println(" and 'table_2', under default namespace:"); - writer.println(" $ HBCK2 reportMissingRegionsInMeta default:table_1 default:table_2"); - writer.println(" An example triggering missing regions report for table 'table_1'"); - writer.println(" under default namespace, and for all tables from namespace 'ns1':"); - writer.println(" $ HBCK2 reportMissingRegionsInMeta default:table_1 ns1"); - writer.println(" Returns list of missing regions for each table passed as parameter, or"); - writer.println(" for each table on namespaces specified as parameter."); - writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); - writer.println(" Each file contains , one per line."); - writer.println(" For example:"); - writer.println(" $ HBCK2 " + REPORT_MISSING_REGIONS_IN_META + " -i fileName1 fileName2"); - } - - private static void usageSetRegionState(PrintWriter writer) { - writer.println(" " + SET_REGION_STATE + " [ |-i ...]"); - writer.println(" Options:"); - writer.println(" -i,--inputFiles take one or more input files of encoded region names "); - writer.println(" and states."); - writer.println(" To set the replica region's state, it needs the primary region's "); - writer.println(" encoded regionname and replica id. The command will be "); - writer.println(" " + SET_REGION_STATE + " , "); - writer.println(" Possible region states:"); - writer.println(" OFFLINE, OPENING, OPEN, CLOSING, CLOSED, SPLITTING, SPLIT,"); - writer.println(" FAILED_OPEN, FAILED_CLOSE, MERGING, MERGED, SPLITTING_NEW,"); - writer.println(" MERGING_NEW, ABNORMALLY_CLOSED"); - writer.println(" WARNING: This is a very risky option intended for use as last resort."); - writer.println(" Example scenarios include unassigns/assigns that can't move forward"); - writer.println(" because region is in an inconsistent state in 'hbase:meta'. For"); - writer.println(" example, the 'unassigns' command can only proceed if passed a region"); - writer.println(" in one of the following states: SPLITTING|SPLIT|MERGING|OPEN|CLOSING"); - writer.println(" Before manually setting a region state with this command, please"); - writer.println(" certify that this region is not being handled by a running procedure,"); - writer.println(" such as 'assign' or 'split'. You can get a view of running procedures"); - writer.println(" in the hbase shell using the 'list_procedures' command. An example"); - writer.println(" setting region 'de00010733901a05f5a2a3a382e27dd4' to CLOSING:"); - writer.println(" $ HBCK2 setRegionState de00010733901a05f5a2a3a382e27dd4 CLOSING"); - writer.println(" Returns \"0\" if region state changed and \"1\" otherwise."); - writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); - writer.println(" Each file contains , one pair per line."); - writer.println(" For example:"); - writer.println(" $ HBCK2 " + SET_REGION_STATE + " -i fileName1 fileName2"); - } - - private static void usageSetTableState(PrintWriter writer) { - writer.println(" " + SET_TABLE_STATE + " [ |-i ...]"); - writer.println(" Options:"); - writer.println(" -i,--inputFiles take one or more files of table names and states"); - writer.println(" Possible table states: " + Arrays.stream(TableState.State.values()) - .map(Enum::toString).collect(Collectors.joining(", "))); - writer.println(" To read current table state, in the hbase shell run:"); - writer.println(" hbase> get 'hbase:meta', '', 'table:state'"); - writer.println(" A value of \\x08\\x00 == ENABLED, \\x08\\x01 == DISABLED, etc."); - writer.println(" Can also run a 'describe \"\"' at the shell prompt."); - writer.println(" An example making table name 'user' ENABLED:"); - writer.println(" $ HBCK2 setTableState users ENABLED"); - writer.println(" Returns whatever the previous table state was."); - writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); - writer.println(" Each file contains , one pair per line."); - writer.println(" For example:"); - writer.println(" $ HBCK2 " + SET_TABLE_STATE + " -i fileName1 fileName2"); - } - - private static void usageScheduleRecoveries(PrintWriter writer) { - writer.println(" " + SCHEDULE_RECOVERIES + " [...|-i ...]"); - writer.println(" Options:"); - writer.println(" -i,--inputFiles take one or more input files of server names"); - writer.println(" Schedule ServerCrashProcedure(SCP) for list of RegionServers. Format"); - writer.println(" server name as ',,' (See HBase UI/logs)."); - writer.println(" Example using RegionServer 'a.example.org,29100,1540348649479':"); - writer.println(" $ HBCK2 scheduleRecoveries a.example.org,29100,1540348649479"); - writer.println(" Returns the pid(s) of the created ServerCrashProcedure(s) or -1 if"); - writer.println(" no procedure created (see master logs for why not)."); - writer.println(" Command support added in hbase versions 2.0.3, 2.1.2, 2.2.0 or newer."); - writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); - writer.println(" Each file contains , one per line. For example:"); - writer.println(" $ HBCK2 " + SCHEDULE_RECOVERIES + " -i fileName1 fileName2"); - } - - private static void usageRecoverUnknown(PrintWriter writer) { - writer.println(" " + RECOVER_UNKNOWN); - writer.println(" Schedule ServerCrashProcedure(SCP) for RegionServers that are reported"); - writer.println(" as unknown."); - writer.println(" Returns the pid(s) of the created ServerCrashProcedure(s) or -1 if"); - writer.println(" no procedure created (see master logs for why not)."); - writer.println(" Command support added in hbase versions 2.2.7, 2.3.5, 2.4.3,"); - writer.println(" 2.5.0 or newer."); - } - - private static void usageUnassigns(PrintWriter writer) { - writer.println(" " + UNASSIGNS + " [OPTIONS] [...|-i ...]"); - writer.println(" Options:"); - writer.println(" -o,--override override ownership by another procedure"); - writer.println(" -i,--inputFiles take one or more input files of encoded region names"); - writer.println(" -b,--batchSize number of regions to process in a batch"); - writer.println(" A 'raw' unassign that can be used even during Master initialization"); - writer.println(" (if the -skip flag is specified). Skirts Coprocessors. Pass one or"); - writer.println(" more encoded region names. 1588230740 is the hard-coded name for the"); - writer.println(" hbase:meta region and de00010733901a05f5a2a3a382e27dd4 is an example"); - writer.println(" of what a userspace encoded region name looks like. For example:"); - writer.println(" $ HBCK2 " + UNASSIGNS + " 1588230740 de00010733901a05f5a2a3a382e27dd4"); - writer.println(" Returns the pid(s) of the created UnassignProcedure(s) or -1 if none."); - writer.println(); - writer.println(" SEE ALSO, org.apache.hbase.hbck1.OfflineMetaRepair, the offline"); - writer.println(" hbase:meta tool. See the HBCK2 README for how to use."); - writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); - writer.println(" Each file contains encoded region names, one per line. For example:"); - writer.println(" $ HBCK2 " + UNASSIGNS + " -i fileName1 fileName2"); - writer.println(" If -b or --batchSize is specified, the tool processes those many"); - writer.println(" regions at a time in a batch-ed manner; Consider using this option,"); - writer.println(" if the list of regions is huge, to avoid CallTimeoutException."); - writer.println(" For example:"); - writer.println(" $ HBCK2 " + UNASSIGNS + " -i fileName1 fileName2 -b 500"); - writer.println(" By default, batchSize is set to -1 i.e. no batching is done."); - } - - private static void usageRegioninfoMismatch(PrintWriter writer) { - writer.println(" " + REGIONINFO_MISMATCH); - writer.println(" Options:"); - writer.println(" -f,--fix Update hbase:meta with the corrections"); - writer.println(" It is recommended to first run this utility without the fix"); - writer.println(" option to ensure that the utility is generating the correct"); - writer.println(" serialized RegionInfo data structures. Inspect the output to"); - writer.println(" confirm that the hbase:meta rowkey matches the new RegionInfo."); - writer.println(); - writer.println(" This tool will read hbase:meta and report any regions whose rowkey"); - writer.println(" and cell value differ in their encoded region name. HBASE-23328 "); - writer.println(" illustrates a problem for read-replica enabled tables in which "); - writer.println(" the encoded region name (the MD5 hash) does not match between "); - writer.println(" the rowkey and the value. This problem is generally harmless "); - writer.println(" for normal operation, but can break other HBCK2 tools."); - writer.println(); - writer.println(" Run this command to determine if any regions are affected by "); - writer.println(" this bug and use the -f/--fix option to then correct any"); - writer.println(" affected regions."); - } - static void showErrorMessage(String error) { if (error != null) { System.out.println("ERROR: " + error); @@ -1356,49 +996,49 @@ static int showUsagePerCommand(String command, Options options) throws IOExcepti writer.println("Command:"); switch (command) { case ADD_MISSING_REGIONS_IN_META_FOR_TABLES: - usageAddFsRegionsMissingInMeta(writer); + HBCK2CommandUsage.usageAddFsRegionsMissingInMeta(writer); break; case ASSIGNS: - usageAssigns(writer); + HBCK2CommandUsage.usageAssigns(writer); break; case BYPASS: - usageBypass(writer); + HBCK2CommandUsage.usageBypass(writer); break; case FILESYSTEM: - usageFilesystem(writer); + HBCK2CommandUsage.usageFilesystem(writer); break; case FIX_META: - usageFixMeta(writer); + HBCK2CommandUsage.usageFixMeta(writer); break; case GENERATE_TABLE_INFO: - usageGenerateMissingTableInfo(writer); + HBCK2CommandUsage.usageGenerateMissingTableInfo(writer); break; case REPLICATION: - usageReplication(writer); + HBCK2CommandUsage.usageReplication(writer); break; case EXTRA_REGIONS_IN_META: - usageExtraRegionsInMeta(writer); + HBCK2CommandUsage.usageExtraRegionsInMeta(writer); break; case REPORT_MISSING_REGIONS_IN_META: - usageReportMissingRegionsInMeta(writer); + HBCK2CommandUsage.usageReportMissingRegionsInMeta(writer); break; case SET_REGION_STATE: - usageSetRegionState(writer); + HBCK2CommandUsage.usageSetRegionState(writer); break; case SET_TABLE_STATE: - usageSetTableState(writer); + HBCK2CommandUsage.usageSetTableState(writer); break; case SCHEDULE_RECOVERIES: - usageScheduleRecoveries(writer); + HBCK2CommandUsage.usageScheduleRecoveries(writer); break; case RECOVER_UNKNOWN: - usageRecoverUnknown(writer); + HBCK2CommandUsage.usageRecoverUnknown(writer); break; case UNASSIGNS: - usageUnassigns(writer); + HBCK2CommandUsage.usageUnassigns(writer); break; case REGIONINFO_MISMATCH: - usageRegioninfoMismatch(writer); + HBCK2CommandUsage.usageRegioninfoMismatch(writer); break; default: showErrorMessage("Invalid arg: " + command); diff --git a/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2CommandUsage.java b/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2CommandUsage.java new file mode 100644 index 0000000000..932dfd7906 --- /dev/null +++ b/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2CommandUsage.java @@ -0,0 +1,371 @@ +package org.apache.hbase; + +import org.apache.hadoop.hbase.client.TableState; + +import java.io.PrintWriter; +import java.util.Arrays; +import java.util.stream.Collectors; + +import static org.apache.hbase.HBCK2.*; + +public class HBCK2CommandUsage { + + public static void usageAddFsRegionsMissingInMeta(PrintWriter writer) { + writer.println(" " + ADD_MISSING_REGIONS_IN_META_FOR_TABLES + " [OPTIONS]"); + writer.println(" [...|-i ...]"); + writer.println(" Options:"); + writer.println(" -i,--inputFiles take one or more files of namespace or table names"); + writer.println(" -o,--outputFile name/prefix of the file(s) to dump region names"); + writer.println(" -n,--numLines number of lines to be written to each output file"); + writer.println(" To be used when regions missing from hbase:meta but directories"); + writer.println(" are present still in HDFS. Can happen if user has run _hbck1_"); + writer.println(" 'OfflineMetaRepair' against an hbase-2.x cluster. Needs hbase:meta"); + writer.println(" to be online. For each table name passed as parameter, performs diff"); + writer.println(" between regions available in hbase:meta and region dirs on HDFS."); + writer.println(" Then for dirs with no hbase:meta matches, it reads the 'regioninfo'"); + writer.println(" metadata file and re-creates given region in hbase:meta. Regions are"); + writer.println(" re-created in 'CLOSED' state in the hbase:meta table, but not in the"); + writer.println(" Masters' cache, and they are not assigned either. To get these"); + writer.println(" regions online, run the HBCK2 'assigns'command printed when this"); + writer.println(" command-run completes."); + writer.println(" NOTE: If using hbase releases older than 2.3.0, a rolling restart of"); + writer.println(" HMasters is needed prior to executing the set of 'assigns' output."); + writer.println(" An example adding missing regions for tables 'tbl_1' in the default"); + writer.println(" namespace, 'tbl_2' in namespace 'n1' and for all tables from"); + writer.println(" namespace 'n2':"); + writer.println( + " $ HBCK2 " + ADD_MISSING_REGIONS_IN_META_FOR_TABLES + " default:tbl_1 n1:tbl_2 n2"); + writer.println(" Returns HBCK2 an 'assigns' command with all re-inserted regions."); + writer.println(" SEE ALSO: " + REPORT_MISSING_REGIONS_IN_META); + writer.println(" SEE ALSO: " + FIX_META); + writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); + writer.println(" Each file contains , one per line."); + writer.println(" For example:"); + writer.println( + " $ HBCK2 " + ADD_MISSING_REGIONS_IN_META_FOR_TABLES + " -i fileName1 fileName2"); + writer.println(" If -o or --outputFile is specified, the output file(s) can be passed as"); + writer.println(" input to assigns command via -i or -inputFiles option."); + writer.println(" If -n or --numLines is specified, and say it is set to 100, this will"); + writer.println(" create files with prefix as value passed by -o or --outputFile option."); + writer.println(" Each file will have 100 region names (max.), one per line."); + writer.println(" For example:"); + writer.println( + " $ HBCK2 " + ADD_MISSING_REGIONS_IN_META_FOR_TABLES + " -o outputFilePrefix -n 100"); + writer.println(" -i fileName1 fileName2"); + writer.println(" But if -n is not specified, but -o is specified, it will dump all"); + writer.println(" region names in a single file, one per line."); + writer.println(" NOTE: -n option is applicable only if -o option is specified."); + } + + public static void usageAssigns(PrintWriter writer) { + writer.println(" " + ASSIGNS + " [OPTIONS] [...|-i ...]"); + writer.println(" Options:"); + writer.println(" -o,--override override ownership by another procedure"); + writer.println(" -i,--inputFiles take one or more files of encoded region names"); + writer.println(" -b,--batchSize number of regions to process in a batch"); + writer.println(" A 'raw' assign that can be used even during Master initialization (if"); + writer.println(" the -skip flag is specified). Skirts Coprocessors. Pass one or more"); + writer.println(" encoded region names. 1588230740 is the hard-coded name for the"); + writer.println(" hbase:meta region and de00010733901a05f5a2a3a382e27dd4 is an example of"); + writer.println(" what a user-space encoded region name looks like. For example:"); + writer.println(" $ HBCK2 " + ASSIGNS + " 1588230740 de00010733901a05f5a2a3a382e27dd4"); + writer.println(" Returns the pid(s) of the created AssignProcedure(s) or -1 if none."); + writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); + writer.println(" Each file contains encoded region names, one per line. For example:"); + writer.println(" $ HBCK2 " + ASSIGNS + " -i fileName1 fileName2"); + writer.println(" If -b or --batchSize is specified, the command processes those many"); + writer.println(" regions at a time in a batch-ed manner; Consider using this option,"); + writer.println(" if the list of regions is huge, to avoid CallTimeoutException."); + writer.println(" For example:"); + writer.println(" $ HBCK2 " + ASSIGNS + " -i fileName1 fileName2 -b 500"); + writer.println(" By default, batchSize is set to -1 i.e. no batching is done."); + } + + public static void usageBypass(PrintWriter writer) { + writer.println(" " + BYPASS + " [OPTIONS] [...|-i ...]"); + writer.println(" Options:"); + writer.println(" -o,--override override if procedure is running/stuck"); + writer.println(" -r,--recursive bypass parent and its children. SLOW! EXPENSIVE!"); + writer.println(" -w,--lockWait milliseconds to wait before giving up; default=1"); + writer.println(" -i,--inputFiles take one or more input files of PID's"); + writer.println(" -b,--batchSize number of procedures to process in a batch"); + writer.println(" Pass one (or more) procedure 'pid's to skip to procedure finish. Parent"); + writer.println(" of bypassed procedure will also be skipped to the finish. Entities will"); + writer.println(" be left in an inconsistent state and will require manual fixup. May"); + writer.println(" need Master restart to clear locks still held. Bypass fails if"); + writer.println(" procedure has children. Add 'recursive' if all you have is a parent pid"); + writer.println(" to finish parent and children. This is SLOW, and dangerous so use"); + writer.println(" selectively. Does not always work."); + writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); + writer.println(" Each file contains PID's, one per line. For example:"); + writer.println(" $ HBCK2 " + BYPASS + " -i fileName1 fileName2"); + writer.println(" If -b or --batchSize is specified, the command processes those many"); + writer.println(" procedures at a time in a batch-ed manner; Consider using this option,"); + writer.println(" if the list of procedures is huge, to avoid CallTimeoutException."); + writer.println(" For example:"); + writer.println(" $ HBCK2 " + BYPASS + " -i fileName1 fileName2 -b 500"); + writer.println(" By default, batchSize is set to -1 i.e. no batching is done."); + } + + public static void usageFilesystem(PrintWriter writer) { + writer.println(" " + FILESYSTEM + " [OPTIONS] [...|-i ...]"); + writer.println(" Options:"); + writer.println(" -f, --fix sideline corrupt hfiles, bad links, and references."); + writer.println(" -i,--inputFiles take one or more input files of table names"); + writer.println(" Report on corrupt hfiles, references, broken links, and integrity."); + writer.println(" Pass '--fix' to sideline corrupt files and links. '--fix' does NOT"); + writer.println(" fix integrity issues; i.e. 'holes' or 'orphan' regions. Pass one or"); + writer.println(" more tablenames to narrow checkup. Default checks all tables and"); + writer.println(" restores 'hbase.version' if missing. Interacts with the filesystem"); + writer.println(" only! Modified regions need to be reopened to pick-up changes."); + writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); + writer.println(" Each file contains table names, one per line. For example:"); + writer.println(" $ HBCK2 " + FILESYSTEM + " -i fileName1 fileName2"); + } + + public static void usageFixMeta(PrintWriter writer) { + writer.println(" " + FIX_META); + writer.println(" Do a server-side fix of bad or inconsistent state in hbase:meta."); + writer.println(" Available in hbase 2.2.1/2.1.6 or newer versions. Master UI has"); + writer.println(" matching, new 'HBCK Report' tab that dumps reports generated by"); + writer.println(" most recent run of _catalogjanitor_ and a new 'HBCK Chore'. It"); + writer.println(" is critical that hbase:meta first be made healthy before making"); + writer.println(" any other repairs. Fixes 'holes', 'overlaps', etc., creating"); + writer.println(" (empty) region directories in HDFS to match regions added to"); + writer.println(" hbase:meta. Command is NOT the same as the old _hbck1_ command"); + writer.println(" named similarily. Works against the reports generated by the last"); + writer.println(" catalog_janitor and hbck chore runs. If nothing to fix, run is a"); + writer.println(" noop. Otherwise, if 'HBCK Report' UI reports problems, a run of"); + writer.println(" " + FIX_META + " will clear up hbase:meta issues. See 'HBase HBCK' UI"); + writer.println(" for how to generate new report."); + writer.println(" SEE ALSO: " + REPORT_MISSING_REGIONS_IN_META); + } + + public static void usageGenerateMissingTableInfo(PrintWriter writer) { + writer.println(" " + GENERATE_TABLE_INFO + " [OPTIONS] [...]"); + writer.println(" Trying to fix an orphan table by generating a missing table descriptor"); + writer.println(" file. This command will have no effect if the table folder is missing"); + writer.println(" or if the .tableinfo is present (we don't override existing table"); + writer.println(" descriptors). This command will first check it the TableDescriptor is"); + writer.println(" cached in HBase Master in which case it will recover the .tableinfo"); + writer.println(" accordingly. If TableDescriptor is not cached in master then it will"); + writer.println(" create a default .tableinfo file with the following items:"); + writer.println(" - the table name"); + writer.println(" - the column family list determined based on the file system"); + writer.println(" - the default properties for both TableDescriptor and"); + writer.println(" ColumnFamilyDescriptors"); + writer.println(" If the .tableinfo file was generated using default parameters then"); + writer.println(" make sure you check the table / column family properties later (and"); + writer.println(" change them if needed)."); + writer.println(" This method does not change anything in HBase, only writes the new"); + writer.println(" .tableinfo file to the file system. Orphan tables can cause e.g."); + writer.println(" ServerCrashProcedures to stuck, you might need to fix these still"); + writer.println(" after you generated the missing table info files. If no tables are "); + writer.println(" specified, .tableinfo will be generated for all missing table "); + writer.println(" descriptors."); + } + + public static void usageReplication(PrintWriter writer) { + writer.println(" " + REPLICATION + " [OPTIONS] [...|-i ...]"); + writer.println(" Options:"); + writer.println(" -f, --fix fix any replication issues found."); + writer.println(" -i,--inputFiles take one or more input files of table names"); + writer.println(" Looks for undeleted replication queues and deletes them if passed the"); + writer.println(" '--fix' option. Pass a table name to check for replication barrier and"); + writer.println(" purge if '--fix'."); + writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); + writer.println(" Each file contains , one per line. For example:"); + writer.println(" $ HBCK2 " + REPLICATION + " -i fileName1 fileName2"); + } + + public static void usageExtraRegionsInMeta(PrintWriter writer) { + writer.println(" " + EXTRA_REGIONS_IN_META + " [...|"); + writer.println(" -i ...]"); + writer.println(" Options:"); + writer.println(" -f, --fix fix meta by removing all extra regions found."); + writer.println(" -i,--inputFiles take one or more input files of namespace or"); + writer.println(" table names"); + writer.println(" Reports regions present on hbase:meta, but with no related "); + writer.println(" directories on the file system. Needs hbase:meta to be online. "); + writer.println(" For each table name passed as parameter, performs diff"); + writer.println(" between regions available in hbase:meta and region dirs on the given"); + writer.println(" file system. Extra regions would get deleted from Meta "); + writer.println(" if passed the --fix option. "); + writer.println(" NOTE: Before deciding on use the \"--fix\" option, it's worth check if"); + writer.println(" reported extra regions are overlapping with existing valid regions."); + writer.println(" If so, then \"extraRegionsInMeta --fix\" is indeed the optimal solution. "); + writer.println(" Otherwise, \"assigns\" command is the simpler solution, as it recreates "); + writer.println(" regions dirs in the filesystem, if not existing."); + writer.println(" An example triggering extra regions report for tables 'table_1'"); + writer.println(" and 'table_2', under default namespace:"); + writer.println(" $ HBCK2 " + EXTRA_REGIONS_IN_META + " default:table_1 default:table_2"); + writer.println(" An example triggering extra regions report for table 'table_1'"); + writer.println(" under default namespace, and for all tables from namespace 'ns1':"); + writer.println(" $ HBCK2 " + EXTRA_REGIONS_IN_META + " default:table_1 ns1"); + writer.println(" Returns list of extra regions for each table passed as parameter, or"); + writer.println(" for each table on namespaces specified as parameter."); + writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); + writer.println(" Each file contains , one per line."); + writer.println(" For example:"); + writer.println(" $ HBCK2 " + EXTRA_REGIONS_IN_META + " -i fileName1 fileName2"); + } + + public static void usageReportMissingRegionsInMeta(PrintWriter writer) { + writer.println(" " + REPORT_MISSING_REGIONS_IN_META + " [...|"); + writer.println(" -i ...]"); + writer.println(" Options:"); + writer.println(" -i,--inputFiles take one or more files of namespace or table names"); + writer.println(" To be used when regions missing from hbase:meta but directories"); + writer.println(" are present still in HDFS. Can happen if user has run _hbck1_"); + writer.println(" 'OfflineMetaRepair' against an hbase-2.x cluster. This is a CHECK only"); + writer.println(" method, designed for reporting purposes and doesn't perform any"); + writer.println(" fixes, providing a view of which regions (if any) would get re-added"); + writer.println(" to hbase:meta, grouped by respective table/namespace. To effectively"); + writer + .println(" re-add regions in meta, run " + ADD_MISSING_REGIONS_IN_META_FOR_TABLES + "."); + writer.println(" This command needs hbase:meta to be online. For each namespace/table"); + writer.println(" passed as parameter, it performs a diff between regions available in"); + writer.println(" hbase:meta against existing regions dirs on HDFS. Region dirs with no"); + writer.println(" matches are printed grouped under its related table name. Tables with"); + writer.println(" no missing regions will show a 'no missing regions' message. If no"); + writer.println(" namespace or table is specified, it will verify all existing regions."); + writer.println(" It accepts a combination of multiple namespace and tables. Table names"); + writer.println(" should include the namespace portion, even for tables in the default"); + writer.println(" namespace, otherwise it will assume as a namespace value."); + writer.println(" An example triggering missing regions report for tables 'table_1'"); + writer.println(" and 'table_2', under default namespace:"); + writer.println(" $ HBCK2 reportMissingRegionsInMeta default:table_1 default:table_2"); + writer.println(" An example triggering missing regions report for table 'table_1'"); + writer.println(" under default namespace, and for all tables from namespace 'ns1':"); + writer.println(" $ HBCK2 reportMissingRegionsInMeta default:table_1 ns1"); + writer.println(" Returns list of missing regions for each table passed as parameter, or"); + writer.println(" for each table on namespaces specified as parameter."); + writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); + writer.println(" Each file contains , one per line."); + writer.println(" For example:"); + writer.println(" $ HBCK2 " + REPORT_MISSING_REGIONS_IN_META + " -i fileName1 fileName2"); + } + + public static void usageSetRegionState(PrintWriter writer) { + writer.println(" " + SET_REGION_STATE + " [ |-i ...]"); + writer.println(" Options:"); + writer.println(" -i,--inputFiles take one or more input files of encoded region names "); + writer.println(" and states."); + writer.println(" To set the replica region's state, it needs the primary region's "); + writer.println(" encoded regionname and replica id. The command will be "); + writer.println(" " + SET_REGION_STATE + " , "); + writer.println(" Possible region states:"); + writer.println(" OFFLINE, OPENING, OPEN, CLOSING, CLOSED, SPLITTING, SPLIT,"); + writer.println(" FAILED_OPEN, FAILED_CLOSE, MERGING, MERGED, SPLITTING_NEW,"); + writer.println(" MERGING_NEW, ABNORMALLY_CLOSED"); + writer.println(" WARNING: This is a very risky option intended for use as last resort."); + writer.println(" Example scenarios include unassigns/assigns that can't move forward"); + writer.println(" because region is in an inconsistent state in 'hbase:meta'. For"); + writer.println(" example, the 'unassigns' command can only proceed if passed a region"); + writer.println(" in one of the following states: SPLITTING|SPLIT|MERGING|OPEN|CLOSING"); + writer.println(" Before manually setting a region state with this command, please"); + writer.println(" certify that this region is not being handled by a running procedure,"); + writer.println(" such as 'assign' or 'split'. You can get a view of running procedures"); + writer.println(" in the hbase shell using the 'list_procedures' command. An example"); + writer.println(" setting region 'de00010733901a05f5a2a3a382e27dd4' to CLOSING:"); + writer.println(" $ HBCK2 setRegionState de00010733901a05f5a2a3a382e27dd4 CLOSING"); + writer.println(" Returns \"0\" if region state changed and \"1\" otherwise."); + writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); + writer.println(" Each file contains , one pair per line."); + writer.println(" For example:"); + writer.println(" $ HBCK2 " + SET_REGION_STATE + " -i fileName1 fileName2"); + } + + public static void usageSetTableState(PrintWriter writer) { + writer.println(" " + SET_TABLE_STATE + " [ |-i ...]"); + writer.println(" Options:"); + writer.println(" -i,--inputFiles take one or more files of table names and states"); + writer.println(" Possible table states: " + Arrays.stream(TableState.State.values()) + .map(Enum::toString).collect(Collectors.joining(", "))); + writer.println(" To read current table state, in the hbase shell run:"); + writer.println(" hbase> get 'hbase:meta', '', 'table:state'"); + writer.println(" A value of \\x08\\x00 == ENABLED, \\x08\\x01 == DISABLED, etc."); + writer.println(" Can also run a 'describe \"\"' at the shell prompt."); + writer.println(" An example making table name 'user' ENABLED:"); + writer.println(" $ HBCK2 setTableState users ENABLED"); + writer.println(" Returns whatever the previous table state was."); + writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); + writer.println(" Each file contains , one pair per line."); + writer.println(" For example:"); + writer.println(" $ HBCK2 " + SET_TABLE_STATE + " -i fileName1 fileName2"); + } + + public static void usageScheduleRecoveries(PrintWriter writer) { + writer.println(" " + SCHEDULE_RECOVERIES + " [...|-i ...]"); + writer.println(" Options:"); + writer.println(" -i,--inputFiles take one or more input files of server names"); + writer.println(" Schedule ServerCrashProcedure(SCP) for list of RegionServers. Format"); + writer.println(" server name as ',,' (See HBase UI/logs)."); + writer.println(" Example using RegionServer 'a.example.org,29100,1540348649479':"); + writer.println(" $ HBCK2 scheduleRecoveries a.example.org,29100,1540348649479"); + writer.println(" Returns the pid(s) of the created ServerCrashProcedure(s) or -1 if"); + writer.println(" no procedure created (see master logs for why not)."); + writer.println(" Command support added in hbase versions 2.0.3, 2.1.2, 2.2.0 or newer."); + writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); + writer.println(" Each file contains , one per line. For example:"); + writer.println(" $ HBCK2 " + SCHEDULE_RECOVERIES + " -i fileName1 fileName2"); + } + + public static void usageRecoverUnknown(PrintWriter writer) { + writer.println(" " + RECOVER_UNKNOWN); + writer.println(" Schedule ServerCrashProcedure(SCP) for RegionServers that are reported"); + writer.println(" as unknown."); + writer.println(" Returns the pid(s) of the created ServerCrashProcedure(s) or -1 if"); + writer.println(" no procedure created (see master logs for why not)."); + writer.println(" Command support added in hbase versions 2.2.7, 2.3.5, 2.4.3,"); + writer.println(" 2.5.0 or newer."); + } + + public static void usageUnassigns(PrintWriter writer) { + writer.println(" " + UNASSIGNS + " [OPTIONS] [...|-i ...]"); + writer.println(" Options:"); + writer.println(" -o,--override override ownership by another procedure"); + writer.println(" -i,--inputFiles take one or more input files of encoded region names"); + writer.println(" -b,--batchSize number of regions to process in a batch"); + writer.println(" A 'raw' unassign that can be used even during Master initialization"); + writer.println(" (if the -skip flag is specified). Skirts Coprocessors. Pass one or"); + writer.println(" more encoded region names. 1588230740 is the hard-coded name for the"); + writer.println(" hbase:meta region and de00010733901a05f5a2a3a382e27dd4 is an example"); + writer.println(" of what a userspace encoded region name looks like. For example:"); + writer.println(" $ HBCK2 " + UNASSIGNS + " 1588230740 de00010733901a05f5a2a3a382e27dd4"); + writer.println(" Returns the pid(s) of the created UnassignProcedure(s) or -1 if none."); + writer.println(); + writer.println(" SEE ALSO, org.apache.hbase.hbck1.OfflineMetaRepair, the offline"); + writer.println(" hbase:meta tool. See the HBCK2 README for how to use."); + writer.println(" If -i or --inputFiles is specified, pass one or more input file names."); + writer.println(" Each file contains encoded region names, one per line. For example:"); + writer.println(" $ HBCK2 " + UNASSIGNS + " -i fileName1 fileName2"); + writer.println(" If -b or --batchSize is specified, the tool processes those many"); + writer.println(" regions at a time in a batch-ed manner; Consider using this option,"); + writer.println(" if the list of regions is huge, to avoid CallTimeoutException."); + writer.println(" For example:"); + writer.println(" $ HBCK2 " + UNASSIGNS + " -i fileName1 fileName2 -b 500"); + writer.println(" By default, batchSize is set to -1 i.e. no batching is done."); + } + + public static void usageRegioninfoMismatch(PrintWriter writer) { + writer.println(" " + REGIONINFO_MISMATCH); + writer.println(" Options:"); + writer.println(" -f,--fix Update hbase:meta with the corrections"); + writer.println(" It is recommended to first run this utility without the fix"); + writer.println(" option to ensure that the utility is generating the correct"); + writer.println(" serialized RegionInfo data structures. Inspect the output to"); + writer.println(" confirm that the hbase:meta rowkey matches the new RegionInfo."); + writer.println(); + writer.println(" This tool will read hbase:meta and report any regions whose rowkey"); + writer.println(" and cell value differ in their encoded region name. HBASE-23328 "); + writer.println(" illustrates a problem for read-replica enabled tables in which "); + writer.println(" the encoded region name (the MD5 hash) does not match between "); + writer.println(" the rowkey and the value. This problem is generally harmless "); + writer.println(" for normal operation, but can break other HBCK2 tools."); + writer.println(); + writer.println(" Run this command to determine if any regions are affected by "); + writer.println(" this bug and use the -f/--fix option to then correct any"); + writer.println(" affected regions."); + } +} From 09e550f0332f499b4ef7289d6608dfdcfb5c900f Mon Sep 17 00:00:00 2001 From: PreetPatel45 <7433preet@gmail.com> Date: Sat, 29 Mar 2025 17:13:02 -0300 Subject: [PATCH 3/6] removed long method cpde smell --- .../src/main/java/org/apache/hbase/HBCK2.java | 384 ++++++++++-------- 1 file changed, 214 insertions(+), 170 deletions(-) diff --git a/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java b/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java index 7b72dacc21..fbbdd4efce 100644 --- a/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java +++ b/hbase-hbck2/src/main/java/org/apache/hbase/HBCK2.java @@ -798,198 +798,242 @@ private int doCommandLine(CommandLine commandLine, Options options) throws IOExc } switch (command) { - // Case handlers all have same format. Check first that the server supports - // the feature FIRST, then move to process the command. case SET_TABLE_STATE: - if (commands.length < 2) { - showErrorMessage(command - + " takes tablename and state arguments: e.g. user ENABLED, or a list of input files"); - return EXIT_FAILURE; - } - try (ClusterConnection connection = connect(); Hbck hbck = connection.getHbck()) { - checkFunctionSupported(connection, command); - setTableState(hbck, purgeFirst(commands)); - } - break; - + return handleSetTableState(commands); case ASSIGNS: - if (commands.length < 2) { - showErrorMessage(command + " takes one or more encoded region names"); - return EXIT_FAILURE; - } - try (ClusterConnection connection = connect(); Hbck hbck = connection.getHbck()) { - checkFunctionSupported(connection, command); - System.out.println(assigns(hbck, purgeFirst(commands))); - } - break; - + return handleAssigns(commands); case BYPASS: - if (commands.length < 2) { - showErrorMessage(command + " takes one or more pids"); - return EXIT_FAILURE; - } - // bypass does the connection setup and the checkFunctionSupported down - // inside in the bypass method delaying connection setup until last - // moment. It does this because it has another set of command options - // to process and wants to do that before setting up connection. - // This is why it is not like the other command processings. - List bs = bypass(purgeFirst(commands)); - if (bs == null) { - // Something went wrong w/ the parse and command didn't run. - return EXIT_FAILURE; - } - System.out.println(toString(bs)); - break; - + return handleBypass(commands); case UNASSIGNS: - if (commands.length < 2) { - showErrorMessage(command + " takes one or more encoded region names"); - return EXIT_FAILURE; - } - try (ClusterConnection connection = connect(); Hbck hbck = connection.getHbck()) { - checkFunctionSupported(connection, command); - System.out.println(toString(unassigns(hbck, purgeFirst(commands)))); - } - break; - + return handleUnassigns(commands); case SET_REGION_STATE: - if (commands.length < 2) { - showErrorMessage(command + " takes region encoded name and state arguments: e.g. " - + "35f30b0ce922c34bf5c284eff33ba8b3 CLOSING, or a list of input files"); - return EXIT_FAILURE; - } - - try (ClusterConnection connection = connect()) { - checkHBCKSupport(connection, command); - return setRegionState(connection, purgeFirst(commands)); - } - + return handleSetRegionState(commands); case FILESYSTEM: - try (ClusterConnection connection = connect()) { - checkHBCKSupport(connection, command); - try (FileSystemFsck fsfsck = new FileSystemFsck(getConf())) { - Pair> pair = - parseCommandWithFixAndInputOptions(purgeFirst(commands)); - return fsfsck.fsck(pair.getSecond(), pair.getFirst().hasOption("f")) != 0 - ? EXIT_FAILURE - : EXIT_SUCCESS; - } - } - + return handleFileSystem(commands); case REPLICATION: - try (ClusterConnection connection = connect()) { - checkHBCKSupport(connection, command, "2.1.1", "2.2.0", "3.0.0"); - try (ReplicationFsck replicationFsck = new ReplicationFsck(getConf())) { - Pair> pair = - parseCommandWithFixAndInputOptions(purgeFirst(commands)); - return replicationFsck.fsck(pair.getSecond(), pair.getFirst().hasOption("f")) != 0 - ? EXIT_FAILURE - : EXIT_SUCCESS; - } - } - + return handleReplication(commands); case SCHEDULE_RECOVERIES: - if (commands.length < 2) { - showErrorMessage(command + " takes one or more serverNames"); - return EXIT_FAILURE; - } - try (ClusterConnection connection = connect(); Hbck hbck = connection.getHbck()) { - checkFunctionSupported(connection, command); - System.out.println(toString(scheduleRecoveries(hbck, purgeFirst(commands)))); - } - break; - + return handleScheduleRecoveries(commands); case RECOVER_UNKNOWN: - if (commands.length > 1) { - showErrorMessage(command + " doesn't take any arguments"); - return EXIT_FAILURE; - } - try (ClusterConnection connection = connect(); Hbck hbck = connection.getHbck()) { - checkFunctionSupported(connection, command); - System.out.println(toString(recoverUnknown(hbck))); - } - break; - + return handleRecoverUnknown(commands); case FIX_META: - if (commands.length > 1) { - showErrorMessage(command + " doesn't take any arguments"); - return EXIT_FAILURE; - } - try (ClusterConnection connection = connect(); Hbck hbck = connection.getHbck()) { - checkFunctionSupported(connection, command); - hbck.fixMeta(); - System.out.println("Server-side processing of fixMeta triggered."); - } - break; - + return handleFixMeta(commands); case ADD_MISSING_REGIONS_IN_META_FOR_TABLES: - if (commands.length < 2) { - showErrorMessage(command + " takes one or more table names."); - return EXIT_FAILURE; - } - - try { - Pair, List> result = - addMissingRegionsInMetaForTablesWrapper(purgeFirst(commands)); - System.out.println(formatReAddedRegionsMessage(result.getFirst(), result.getSecond())); - } catch (Exception e) { - return EXIT_FAILURE; - } - break; - + return handleAddMissingRegionsInMetaForTables(commands); case REPORT_MISSING_REGIONS_IN_META: - try { - Map> report = - reportTablesWithMissingRegionsInMeta(purgeFirst(commands)); - System.out.println(formatMissingRegionsInMetaReport(report)); - } catch (Exception e) { - return EXIT_FAILURE; - } - break; - + return handleReportMissingRegionsInMeta(commands); case EXTRA_REGIONS_IN_META: - try { - Map> report = extraRegionsInMeta(purgeFirst(commands)); - System.out.println(formatExtraRegionsReport(report)); - } catch (Exception e) { - return EXIT_FAILURE; - } - break; - + return handleExtraRegionsInMeta(commands); case GENERATE_TABLE_INFO: - List tableNames = Arrays.asList(purgeFirst(commands)); - MissingTableDescriptorGenerator tableInfoGenerator = - new MissingTableDescriptorGenerator(getConf()); - try (ClusterConnection connection = connect()) { - tableInfoGenerator.generateTableDescriptorFileIfMissing(connection.getAdmin(), - tableNames); - } catch (IOException e) { - showErrorMessage(e.getMessage()); - return EXIT_FAILURE; - } - break; - + return handleGenerateTableInfo(commands); case REGIONINFO_MISMATCH: - // `commands` includes the `regionInfoMismatch` argument. - if (commands.length > 2) { - showErrorMessage(command + " takes one optional argument, got more than one."); - return EXIT_FAILURE; - } - try { - regionInfoMismatch(commands); - } catch (Exception e) { - e.printStackTrace(); - return EXIT_FAILURE; - } - break; - + return handleRegionInfoMismatch(commands); default: showErrorMessage("Unsupported command: " + command); return EXIT_FAILURE; } + } + + private int handleSetTableState(String[] commands) throws IOException { + if (commands.length < 2) { + showErrorMessage(commands[0] + + " takes tablename and state arguments: e.g. user ENABLED, or a list of input files"); + return EXIT_FAILURE; + } + try (ClusterConnection connection = connect(); Hbck hbck = connection.getHbck()) { + checkFunctionSupported(connection, commands[0]); + setTableState(hbck, purgeFirst(commands)); + } + return EXIT_SUCCESS; + } + + private int handleAssigns(String[] commands) throws IOException { + if (commands.length < 2) { + showErrorMessage(commands[0] + " takes one or more encoded region names"); + return EXIT_FAILURE; + } + try (ClusterConnection connection = connect(); Hbck hbck = connection.getHbck()) { + checkFunctionSupported(connection, commands[0]); + System.out.println(assigns(hbck, purgeFirst(commands))); + } + return EXIT_SUCCESS; + } + + private int handleBypass(String[] commands) throws IOException { + if (commands.length < 2) { + showErrorMessage(commands[0] + " takes one or more pids"); + return EXIT_FAILURE; + } + // bypass does the connection setup and the checkFunctionSupported down + // inside in the bypass method delaying connection setup until last + // moment. It does this because it has another set of command options + // to process and wants to do that before setting up connection. + // This is why it is not like the other command processings. + List bs = bypass(purgeFirst(commands)); + if (bs == null) { + // Something went wrong w/ the parse and command didn't run. + return EXIT_FAILURE; + } + System.out.println(toString(bs)); return EXIT_SUCCESS; } + private int handleUnassigns(String[] commands) throws IOException { + if (commands.length < 2) { + showErrorMessage(commands[0] + " takes one or more encoded region names"); + return EXIT_FAILURE; + } + try (ClusterConnection connection = connect(); Hbck hbck = connection.getHbck()) { + checkFunctionSupported(connection, commands[0]); + System.out.println(toString(unassigns(hbck, purgeFirst(commands)))); + } + return EXIT_SUCCESS; + } + + private int handleSetRegionState(String[] commands) throws IOException { + if (commands.length < 2) { + showErrorMessage(commands[0] + " takes region encoded name and state arguments: e.g. " + + "35f30b0ce922c34bf5c284eff33ba8b3 CLOSING, or a list of input files"); + return EXIT_FAILURE; + } + + try (ClusterConnection connection = connect()) { + checkHBCKSupport(connection, commands[0]); + return setRegionState(connection, purgeFirst(commands)); + } + } + + private int handleFileSystem(String[] commands) throws IOException { + try (ClusterConnection connection = connect()) { + checkHBCKSupport(connection, commands[0]); + try (FileSystemFsck fsfsck = new FileSystemFsck(getConf())) { + Pair> pair = + parseCommandWithFixAndInputOptions(purgeFirst(commands)); + return fsfsck.fsck(pair.getSecond(), pair.getFirst().hasOption("f")) != 0 + ? EXIT_FAILURE + : EXIT_SUCCESS; + } + } + } + + private int handleReplication(String[] commands) throws IOException { + try (ClusterConnection connection = connect()) { + checkHBCKSupport(connection, commands[0], "2.1.1", "2.2.0", "3.0.0"); + try (ReplicationFsck replicationFsck = new ReplicationFsck(getConf())) { + Pair> pair = + parseCommandWithFixAndInputOptions(purgeFirst(commands)); + return replicationFsck.fsck(pair.getSecond(), pair.getFirst().hasOption("f")) != 0 + ? EXIT_FAILURE + : EXIT_SUCCESS; + } + } + } + + private int handleScheduleRecoveries(String[] commands) throws IOException { + if (commands.length < 2) { + showErrorMessage(commands[0] + " takes one or more serverNames"); + return EXIT_FAILURE; + } + try (ClusterConnection connection = connect(); Hbck hbck = connection.getHbck()) { + checkFunctionSupported(connection, commands[0]); + System.out.println(toString(scheduleRecoveries(hbck, purgeFirst(commands)))); + } + return EXIT_SUCCESS; + } + + private int handleRecoverUnknown(String[] commands) throws IOException { + if (commands.length > 1) { + showErrorMessage(commands[0] + " doesn't take any arguments"); + return EXIT_FAILURE; + } + try (ClusterConnection connection = connect(); Hbck hbck = connection.getHbck()) { + checkFunctionSupported(connection, commands[0]); + System.out.println(toString(recoverUnknown(hbck))); + } + return EXIT_SUCCESS; + } + + private int handleFixMeta(String[] commands) throws IOException { + if (commands.length > 1) { + showErrorMessage(commands[0] + " doesn't take any arguments"); + return EXIT_FAILURE; + } + try (ClusterConnection connection = connect(); Hbck hbck = connection.getHbck()) { + checkFunctionSupported(connection, commands[0]); + hbck.fixMeta(); + System.out.println("Server-side processing of fixMeta triggered."); + } + return EXIT_SUCCESS; + } + + private int handleAddMissingRegionsInMetaForTables(String[] commands) { + if (commands.length < 2) { + showErrorMessage(commands[0] + " takes one or more table names."); + return EXIT_FAILURE; + } + + try { + Pair, List> result = + addMissingRegionsInMetaForTablesWrapper(purgeFirst(commands)); + System.out.println(formatReAddedRegionsMessage(result.getFirst(), result.getSecond())); + return EXIT_SUCCESS; + } catch (Exception e) { + return EXIT_FAILURE; + } + } + + private int handleReportMissingRegionsInMeta(String[] commands) { + try { + Map> report = + reportTablesWithMissingRegionsInMeta(purgeFirst(commands)); + System.out.println(formatMissingRegionsInMetaReport(report)); + return EXIT_SUCCESS; + } catch (Exception e) { + return EXIT_FAILURE; + } + } + + private int handleExtraRegionsInMeta(String[] commands) { + try { + Map> report = extraRegionsInMeta(purgeFirst(commands)); + System.out.println(formatExtraRegionsReport(report)); + return EXIT_SUCCESS; + } catch (Exception e) { + return EXIT_FAILURE; + } + } + + private int handleGenerateTableInfo(String[] commands) { + try { + List tableNames = Arrays.asList(purgeFirst(commands)); + MissingTableDescriptorGenerator tableInfoGenerator = + new MissingTableDescriptorGenerator(getConf()); + try (ClusterConnection connection = connect()) { + tableInfoGenerator.generateTableDescriptorFileIfMissing(connection.getAdmin(), + tableNames); + return EXIT_SUCCESS; + } + } catch (IOException e) { + showErrorMessage(e.getMessage()); + return EXIT_FAILURE; + } + } + + private int handleRegionInfoMismatch(String[] commands) { + // `commands` includes the `regionInfoMismatch` argument. + if (commands.length > 2) { + showErrorMessage(commands[0] + " takes one optional argument, got more than one."); + return EXIT_FAILURE; + } + try { + regionInfoMismatch(commands); + return EXIT_SUCCESS; + } catch (Exception e) { + e.printStackTrace(); + return EXIT_FAILURE; + } + } + static int showUsagePerCommand(String command, Options options) throws IOException { boolean invalidCommand = false; try (StringWriter sw = new StringWriter(); PrintWriter writer = new PrintWriter(sw)) { From 0a895e747d77455a8b23839eeb991cd80a8c036a Mon Sep 17 00:00:00 2001 From: PreetPatel45 <7433preet@gmail.com> Date: Sat, 29 Mar 2025 19:15:43 -0300 Subject: [PATCH 4/6] remove deficient modularization code smell using Replace conditional with polymorphism refactoring strategy --- .../apache/hbase/DefaultMergeStrategy.java | 27 +++ .../java/org/apache/hbase/RegionsMerger.java | 159 ++++++++---------- .../apache/hbase/SkipSplitRegionStrategy.java | 14 ++ 3 files changed, 115 insertions(+), 85 deletions(-) create mode 100644 hbase-tools/src/main/java/org/apache/hbase/DefaultMergeStrategy.java create mode 100644 hbase-tools/src/main/java/org/apache/hbase/SkipSplitRegionStrategy.java diff --git a/hbase-tools/src/main/java/org/apache/hbase/DefaultMergeStrategy.java b/hbase-tools/src/main/java/org/apache/hbase/DefaultMergeStrategy.java new file mode 100644 index 0000000000..88efe60bff --- /dev/null +++ b/hbase-tools/src/main/java/org/apache/hbase/DefaultMergeStrategy.java @@ -0,0 +1,27 @@ +package org.apache.hbase; + +import java.util.Set; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.client.RegionInfo; + +/** + * Default strategy for merging regions. + * Merges if neither region is split and if the combined region size is acceptable. + */ +public class DefaultMergeStrategy implements RegionMergeStrategy { + private final RegionsMerger merger; + private final Path tableDir; + private final Set mergingRegions; + + public DefaultMergeStrategy(RegionsMerger merger, Path tableDir, Set mergingRegions) { + this.merger = merger; + this.tableDir = tableDir; + this.mergingRegions = mergingRegions; + } + + @Override + public boolean canMerge(RegionInfo region1, RegionInfo region2) throws Exception { + // Only merge if both regions are not split and the RegionsMerger's logic approves. + return !region1.isSplit() && !region2.isSplit() && merger.canMerge(tableDir, region1, region2, mergingRegions); + } +} diff --git a/hbase-tools/src/main/java/org/apache/hbase/RegionsMerger.java b/hbase-tools/src/main/java/org/apache/hbase/RegionsMerger.java index 6d4be8cdbd..b93dc2a5d3 100644 --- a/hbase-tools/src/main/java/org/apache/hbase/RegionsMerger.java +++ b/hbase-tools/src/main/java/org/apache/hbase/RegionsMerger.java @@ -26,9 +26,8 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; -import java.util.Map; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.Future; import java.util.concurrent.atomic.LongAdder; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; @@ -59,33 +58,30 @@ import org.slf4j.LoggerFactory; /** - * HBase maintenance tool for merging regions of a specific table, until a target number of regions - * for the table is reached, or no more merges can complete due to limit in resulting merged region - * size. + * HBase maintenance tool for merging regions of a specific table, until a target number + * of regions is reached, or no more merges can complete due to limit in resulting merged + * region size. */ -public class RegionsMerger extends Configured implements org.apache.hadoop.util.Tool { - - private static final Logger LOG = LoggerFactory.getLogger(RegionsMerger.class.getName()); +public class RegionsMerger extends BaseHBaseMaintenanceTool { + // Keep ALL original constants public static final String RESULTING_REGION_UPPER_MARK = "hbase.tools.merge.upper.mark"; public static final String SLEEP = "hbase.tools.merge.sleep"; public static final String MAX_ROUNDS_IDLE = "hbase.tools.max.iterations.blocked"; - private final Configuration conf; - private final FileSystem fs; private final double resultSizeThreshold; private final int sleepBetweenCycles; private final long maxRoundsStuck; public RegionsMerger(Configuration conf) throws IOException { - this.conf = conf; - Path basePath = new Path(conf.get(HConstants.HBASE_DIR)); - fs = basePath.getFileSystem(conf); - resultSizeThreshold = this.conf.getDouble(RESULTING_REGION_UPPER_MARK, 0.9) - * this.conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); - sleepBetweenCycles = this.conf.getInt(SLEEP, 2000); + super(conf); // Initialize base class + // Keep original initialization logic + this.resultSizeThreshold = this.conf.getDouble(RESULTING_REGION_UPPER_MARK, 0.9) + * this.conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); + this.sleepBetweenCycles = this.conf.getInt(SLEEP, 2000); this.maxRoundsStuck = this.conf.getInt(MAX_ROUNDS_IDLE, 10); } + private Path getTablePath(TableName table) { Path basePath = new Path(conf.get(HConstants.HBASE_DIR)); basePath = new Path(basePath, "data"); @@ -111,9 +107,9 @@ private List getOpenRegions(Connection connection, TableName table) Table metaTbl = connection.getTable(META_TABLE_NAME); String tblName = table.getNameAsString(); RowFilter rowFilter = - new RowFilter(CompareOperator.EQUAL, new SubstringComparator(tblName + ",")); + new RowFilter(CompareOperator.EQUAL, new SubstringComparator(tblName + ",")); SingleColumnValueFilter colFilter = new SingleColumnValueFilter(CATALOG_FAMILY, STATE_QUALIFIER, - CompareOperator.EQUAL, Bytes.toBytes("OPEN")); + CompareOperator.EQUAL, Bytes.toBytes("OPEN")); colFilter.setFilterIfMissing(true); Scan scan = new Scan(); FilterList filter = new FilterList(FilterList.Operator.MUST_PASS_ALL); @@ -130,14 +126,14 @@ private List getOpenRegions(Connection connection, TableName table) return regions; } - private boolean canMerge(Path path, RegionInfo region1, RegionInfo region2, - Collection> alreadyMerging) throws IOException { - if ( - alreadyMerging.stream() - .anyMatch(regionPair -> region1.equals(regionPair.getFirst()) - || region2.equals(regionPair.getFirst()) || region1.equals(regionPair.getSecond()) - || region2.equals(regionPair.getSecond())) - ) { + /** + * Determines if two regions can be merged. It checks that neither region is already + * involved in a merge, that the regions are adjacent, and that the combined size does not + * exceed the upper size threshold. + */ + public boolean canMerge(Path path, RegionInfo region1, RegionInfo region2, + Collection alreadyMerging) throws IOException { + if (alreadyMerging.stream().anyMatch(region -> region1.equals(region) || region2.equals(region))) { return false; } if (RegionInfo.areAdjacent(region1, region2)) { @@ -145,16 +141,15 @@ private boolean canMerge(Path path, RegionInfo region1, RegionInfo region2, long size2 = sumSizeInFS(new Path(path, region2.getEncodedName())); boolean mergeable = (resultSizeThreshold > (size1 + size2)); if (!mergeable) { - LOG.warn( - "Not merging regions {} and {} because resulting region size would get close to " - + "the {} limit. {} total size: {}; {} total size:{}", - region1.getEncodedName(), region2.getEncodedName(), resultSizeThreshold, - region1.getEncodedName(), size1, region2.getEncodedName(), size2); + LOG.warn("Not merging regions {} and {} because resulting region size would get close to " + + "the {} limit. {} total size: {}; {} total size:{}", + region1.getEncodedName(), region2.getEncodedName(), resultSizeThreshold, + region1.getEncodedName(), size1, region2.getEncodedName(), size2); } return mergeable; } else { LOG.warn("WARNING: Can't merge regions {} and {} because those are not adjacent.", - region1.getEncodedName(), region2.getEncodedName()); + region1.getEncodedName(), region2.getEncodedName()); return false; } } @@ -166,13 +161,17 @@ private boolean hasPreviousMergeRef(Connection conn, RegionInfo region) throws E Result r = meta.get(get); boolean result = HBCKMetaTableAccessor.getMergeRegions(r.rawCells()) != null; if (result) { - LOG.warn("Region {} has an existing merge qualifier and can't be merged until for now. \n " - + "RegionsMerger will skip this region until merge qualifier is cleaned away. \n " - + "Consider major compact this region.", region.getEncodedName()); + LOG.warn("Region {} has an existing merge qualifier and can't be merged until for now. \n" + + "RegionsMerger will skip this region until merge qualifier is cleaned away. \n" + + "Consider major compact this region.", region.getEncodedName()); } return result; } + /** + * Attempts to merge regions until the number of regions is reduced to targetRegions or no + * further progress can be made. + */ public void mergeRegions(String tblName, int targetRegions) throws Exception { TableName table = TableName.valueOf(tblName); Path tableDir = getTablePath(table); @@ -182,69 +181,54 @@ public void mergeRegions(String tblName, int targetRegions) throws Exception { LongAdder lastTimeProgessed = new LongAdder(); // need to get all regions for the table, regardless of region state List regions = admin.getRegions(table); - Map> regionsMerging = new ConcurrentHashMap<>(); + // use a Set to track regions currently merging + Set mergingRegions = ConcurrentHashMap.newKeySet(); long roundsNoProgress = 0; + RegionMergeStrategy strategy = new DefaultMergeStrategy(this, tableDir, mergingRegions); while (regions.size() > targetRegions) { LOG.info("Iteration: {}", counter); RegionInfo previous = null; int regionSize = regions.size(); - LOG.info("Attempting to merge {} regions to reach the target {} ...", regionSize, - targetRegions); + LOG.info("Attempting to merge {} regions to reach the target {} ...", regionSize, targetRegions); // to request merge, regions must be OPEN, though regions = getOpenRegions(conn, table); for (RegionInfo current : regions) { - if (!current.isSplit()) { - if ( - previous != null && canMerge(tableDir, previous, current, regionsMerging.values()) - ) { - // Before submitting a merge request, we need to check if any of the region candidates - // still have merge references from previous cycle - boolean hasMergeRef = - hasPreviousMergeRef(conn, previous) || hasPreviousMergeRef(conn, current); - if (!hasMergeRef) { - Future f = admin.mergeRegionsAsync(current.getEncodedNameAsBytes(), - previous.getEncodedNameAsBytes(), true); - Pair regionPair = new Pair<>(previous, current); - regionsMerging.put(f, regionPair); - if ((regionSize - regionsMerging.size()) <= targetRegions) { - break; - } - } else { - LOG.info("Skipping merge of candidates {} and {} because of existing merge " - + "qualifiers.", previous.getEncodedName(), current.getEncodedName()); + // Delegate decision to appropriate strategy + if (current.isSplit()) { + strategy = new SkipSplitRegionStrategy(); + } else { + strategy = new DefaultMergeStrategy(this, tableDir, mergingRegions); + } + if (previous != null && strategy.canMerge(previous, current)) { + boolean hasMergeRef = hasPreviousMergeRef(conn, previous) || hasPreviousMergeRef(conn, current); + if (!hasMergeRef) { + admin.mergeRegionsAsync(current.getEncodedNameAsBytes(), + previous.getEncodedNameAsBytes(), true); + mergingRegions.add(previous); + mergingRegions.add(current); + if ((regionSize - mergingRegions.size()) <= targetRegions) { + break; } - previous = null; } else { - previous = current; + LOG.info("Skipping merge of candidates {} and {} because of existing merge qualifiers.", + previous.getEncodedName(), current.getEncodedName()); } + previous = null; } else { - LOG.debug("Skipping split region: {}", current.getEncodedName()); + previous = current; } } counter.increment(); LOG.info("Sleeping for {} seconds before next iteration...", (sleepBetweenCycles / 1000)); Thread.sleep(sleepBetweenCycles); - regionsMerging.forEach((f, currentPair) -> { - if (f.isDone()) { - LOG.info("Merged regions {} and {} together.", currentPair.getFirst().getEncodedName(), - currentPair.getSecond().getEncodedName()); - regionsMerging.remove(f); - lastTimeProgessed.reset(); - lastTimeProgessed.add(counter.longValue()); - } else { - LOG.warn("Merge of regions {} and {} isn't completed yet.", currentPair.getFirst(), - currentPair.getSecond()); - } - }); + // Clear tracking of merging regions for the next iteration. + mergingRegions.clear(); roundsNoProgress = counter.longValue() - lastTimeProgessed.longValue(); if (roundsNoProgress == this.maxRoundsStuck) { - LOG.warn("Reached {} iterations without progressing with new merges. Aborting...", - roundsNoProgress); + LOG.warn("Reached {} iterations without progressing with new merges. Aborting...", roundsNoProgress); break; } - - // again, get all regions, regardless of the state, - // in order to avoid breaking the loop prematurely + // Re-read regions from HBase for the next iteration. regions = admin.getRegions(table); } } @@ -252,9 +236,9 @@ public void mergeRegions(String tblName, int targetRegions) throws Exception { @Override public int run(String[] args) { + // Keep original run() implementation exactly as is if (args.length != 2) { - LOG.error( - "Wrong number of arguments. " + "Arguments are: "); + LOG.error("Wrong number of arguments. Arguments are: "); return 1; } try { @@ -266,11 +250,16 @@ public int run(String[] args) { return 0; } - public static void main(String[] args) throws Exception { - Configuration conf = HBaseConfiguration.create(); - int errCode = ToolRunner.run(new RegionsMerger(conf), args); - if (errCode != 0) { - System.exit(errCode); - } + public void main(String[] args) throws Exception { + System.exit(launchTool(args, new RegionsMerger(HBaseConfiguration.create()))); } } + +// public static void main(String[] args) throws Exception { +// Configuration conf = HBaseConfiguration.create(); +// int errCode = ToolRunner.run(new RegionsMerger(conf), args); +// if (errCode != 0) { +// System.exit(errCode); +// } +// } +//} diff --git a/hbase-tools/src/main/java/org/apache/hbase/SkipSplitRegionStrategy.java b/hbase-tools/src/main/java/org/apache/hbase/SkipSplitRegionStrategy.java new file mode 100644 index 0000000000..5a82232bd5 --- /dev/null +++ b/hbase-tools/src/main/java/org/apache/hbase/SkipSplitRegionStrategy.java @@ -0,0 +1,14 @@ +package org.apache.hbase; + +import org.apache.hadoop.hbase.client.RegionInfo; + +/** + * Strategy that prevents merging when a region is split. + */ +public class SkipSplitRegionStrategy implements RegionMergeStrategy { + @Override + public boolean canMerge(RegionInfo region1, RegionInfo region2) { + // Always return false to prevent merging if any region is split. + return false; + } +} From f850218f3e2d0add5d89ca51c0527a85bd9bed05 Mon Sep 17 00:00:00 2001 From: PreetPatel45 <7433preet@gmail.com> Date: Sat, 29 Mar 2025 19:16:05 -0300 Subject: [PATCH 5/6] remove deficient modularization code smell using Replace conditional with polymorphism refactoring strategy --- .../org/apache/hbase/RegionMergeStrategy.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 hbase-tools/src/main/java/org/apache/hbase/RegionMergeStrategy.java diff --git a/hbase-tools/src/main/java/org/apache/hbase/RegionMergeStrategy.java b/hbase-tools/src/main/java/org/apache/hbase/RegionMergeStrategy.java new file mode 100644 index 0000000000..fc0d39d186 --- /dev/null +++ b/hbase-tools/src/main/java/org/apache/hbase/RegionMergeStrategy.java @@ -0,0 +1,18 @@ +package org.apache.hbase; + +import org.apache.hadoop.hbase.client.RegionInfo; + +/** + * Strategy interface for deciding if two regions can be merged. + */ +public interface RegionMergeStrategy { + /** + * Determines whether the two regions can be merged. + * + * @param region1 the first region + * @param region2 the second region + * @return true if mergeable; false otherwise. + * @throws Exception if any error occurs during evaluation. + */ + boolean canMerge(RegionInfo region1, RegionInfo region2) throws Exception; +} From 6368267b565ea2083058129d064efe3e5f889c50 Mon Sep 17 00:00:00 2001 From: PreetPatel45 <7433preet@gmail.com> Date: Sat, 29 Mar 2025 19:19:43 -0300 Subject: [PATCH 6/6] remove duplicate code smell using push up refactoring strategy --- .../hbase/BaseHBaseMaintenanceTool.java | 38 +++++++++++++++++++ .../hbase/MissingRegionDirsRepairTool.java | 17 ++------- 2 files changed, 42 insertions(+), 13 deletions(-) create mode 100644 hbase-tools/src/main/java/org/apache/hbase/BaseHBaseMaintenanceTool.java diff --git a/hbase-tools/src/main/java/org/apache/hbase/BaseHBaseMaintenanceTool.java b/hbase-tools/src/main/java/org/apache/hbase/BaseHBaseMaintenanceTool.java new file mode 100644 index 0000000000..99874e0cea --- /dev/null +++ b/hbase-tools/src/main/java/org/apache/hbase/BaseHBaseMaintenanceTool.java @@ -0,0 +1,38 @@ +package org.apache.hbase; + +import java.io.IOException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class BaseHBaseMaintenanceTool extends Configured implements Tool { + protected static final Logger LOG = LoggerFactory.getLogger(BaseHBaseMaintenanceTool.class); + protected final Configuration conf; + protected final FileSystem fs; + + protected BaseHBaseMaintenanceTool(Configuration conf) throws IOException { + super(conf); + this.conf = HBaseConfiguration.create(conf); + this.fs = FileSystem.get(this.conf); + } + + protected Connection createConnection() throws IOException { + return ConnectionFactory.createConnection(conf); + } + + public static int launchTool(String[] args, BaseHBaseMaintenanceTool tool) { + try { + return ToolRunner.run(tool, args); + } catch (Exception e) { + LOG.error("Tool failed:", e); + return 1; + } + } +} \ No newline at end of file diff --git a/hbase-tools/src/main/java/org/apache/hbase/MissingRegionDirsRepairTool.java b/hbase-tools/src/main/java/org/apache/hbase/MissingRegionDirsRepairTool.java index 0d9e892fba..a1b68b163e 100644 --- a/hbase-tools/src/main/java/org/apache/hbase/MissingRegionDirsRepairTool.java +++ b/hbase-tools/src/main/java/org/apache/hbase/MissingRegionDirsRepairTool.java @@ -36,19 +36,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class MissingRegionDirsRepairTool extends Configured implements org.apache.hadoop.util.Tool { - - private static final Logger LOG = - LoggerFactory.getLogger(MissingRegionDirsRepairTool.class.getName()); - +public class MissingRegionDirsRepairTool extends BaseHBaseMaintenanceTool { private static final String WORKING_DIR = ".missing_dirs_repair"; - private Configuration conf; private HBCK2 hbck; private LoadIncrementalHFiles bulkLoad; - public MissingRegionDirsRepairTool(Configuration conf) { - this.conf = conf; + public MissingRegionDirsRepairTool(Configuration conf) throws IOException { + super(conf); // Initialize base class this.hbck = new HBCK2(conf); this.bulkLoad = new LoadIncrementalHFiles(conf); } @@ -114,10 +109,6 @@ public int run(String[] strings) throws Exception { } public static void main(String[] args) throws Exception { - Configuration conf = HBaseConfiguration.create(); - int errCode = ToolRunner.run(new MissingRegionDirsRepairTool(conf), args); - if (errCode != 0) { - System.exit(errCode); - } + System.exit(launchTool(args, new MissingRegionDirsRepairTool(HBaseConfiguration.create()))); } }