Skip to content

Commit 4fb8a7f

Browse files
committed
HBASE-26446 CellCounter should report serialized cell size counts too (#3841)
Add the following stats for a given table: - 7. Total size of serialized cells of each CF. - 8. Total size of serialized cells of each qualifier. - 9. Total size of serialized cells across all rows. Signed-off-by: Viraj Jasani <[email protected]>
1 parent e65f28c commit 4fb8a7f

File tree

1 file changed

+34
-24
lines changed

1 file changed

+34
-24
lines changed

hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/CellCounter.java

Lines changed: 34 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
import org.apache.hadoop.hbase.filter.RowFilter;
4040
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
4141
import org.apache.hadoop.hbase.util.Bytes;
42-
import org.apache.hadoop.io.IntWritable;
42+
import org.apache.hadoop.io.LongWritable;
4343
import org.apache.hadoop.io.Text;
4444
import org.apache.hadoop.mapreduce.Job;
4545
import org.apache.hadoop.mapreduce.Reducer;
@@ -50,7 +50,6 @@
5050

5151
import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
5252

53-
5453
/**
5554
* A job with a a map and reduce phase to count cells in a table.
5655
* The counter lists the following stats for a given table:
@@ -59,8 +58,11 @@
5958
* 2. Total number of CFs across all rows
6059
* 3. Total qualifiers across all rows
6160
* 4. Total occurrence of each CF
62-
* 5. Total occurrence of each qualifier
61+
* 5. Total occurrence of each qualifier
6362
* 6. Total number of versions of each qualifier.
63+
* 7. Total size of serialized cells of each CF.
64+
* 8. Total size of serialized cells of each qualifier.
65+
* 9. Total size of serialized cells across all rows.
6466
* </pre>
6567
*
6668
* The cellcounter can take optional parameters to use a user
@@ -86,13 +88,14 @@ public class CellCounter extends Configured implements Tool {
8688
* Mapper that runs the count.
8789
*/
8890
static class CellCounterMapper
89-
extends TableMapper<Text, IntWritable> {
91+
extends TableMapper<Text, LongWritable> {
9092
/**
9193
* Counter enumeration to count the actual rows.
9294
*/
9395
public static enum Counters {
9496
ROWS,
95-
CELLS
97+
CELLS,
98+
SIZE
9699
}
97100

98101
private Configuration conf;
@@ -143,34 +146,41 @@ public void map(ImmutableBytesWritable row, Result values,
143146
currentFamily = null;
144147
currentQualifier = null;
145148
context.getCounter(Counters.ROWS).increment(1);
146-
context.write(new Text("Total ROWS"), new IntWritable(1));
149+
context.write(new Text("Total ROWS"), new LongWritable(1));
147150
}
148151
if (!values.isEmpty()) {
149152
int cellCount = 0;
150153
for (Cell value : values.listCells()) {
151154
cellCount++;
155+
long size = value.getSerializedSize();
152156
if (currentFamily == null || !CellUtil.matchingFamily(value, currentFamily)) {
153157
currentFamily = CellUtil.cloneFamily(value);
154158
currentFamilyName = Bytes.toStringBinary(currentFamily);
155159
currentQualifier = null;
156160
context.getCounter("CF", currentFamilyName).increment(1);
157161
if (1 == context.getCounter("CF", currentFamilyName).getValue()) {
158-
context.write(new Text("Total Families Across all Rows"), new IntWritable(1));
159-
context.write(new Text(currentFamily), new IntWritable(1));
162+
context.write(new Text("Total Families Across all Rows"), new LongWritable(1));
163+
context.write(new Text(currentFamily), new LongWritable(1));
160164
}
165+
context.getCounter(Counters.SIZE).increment(size);
166+
context.write(new Text("Total SIZE"), new LongWritable(size));
167+
context.getCounter("CF", currentFamilyName + "_Size").increment(size);
168+
context.write(new Text(currentFamilyName + "_Size"), new LongWritable(size));
161169
}
162-
if (currentQualifier == null || !CellUtil.matchingQualifier(value, currentQualifier)) {
170+
if (currentQualifier == null || !CellUtil.matchingQualifier(value, currentQualifier)){
163171
currentQualifier = CellUtil.cloneQualifier(value);
164172
currentQualifierName = currentFamilyName + separator +
165173
Bytes.toStringBinary(currentQualifier);
166174
currentRowQualifierName = currentRowKey + separator + currentQualifierName;
167175

168176
context.write(new Text("Total Qualifiers across all Rows"),
169-
new IntWritable(1));
170-
context.write(new Text(currentQualifierName), new IntWritable(1));
177+
new LongWritable(1));
178+
context.write(new Text(currentQualifierName), new LongWritable(1));
179+
context.getCounter("Q", currentQualifierName + "_Size").increment(size);
180+
context.write(new Text(currentQualifierName + "_Size"), new LongWritable(size));
171181
}
172182
// Increment versions
173-
context.write(new Text(currentRowQualifierName + "_Versions"), new IntWritable(1));
183+
context.write(new Text(currentRowQualifierName + "_Versions"), new LongWritable(1));
174184
}
175185
context.getCounter(Counters.CELLS).increment(cellCount);
176186
}
@@ -180,20 +190,20 @@ public void map(ImmutableBytesWritable row, Result values,
180190
}
181191
}
182192

183-
static class IntSumReducer<Key> extends Reducer<Key, IntWritable,
184-
Key, IntWritable> {
193+
static class LongSumReducer<Key> extends Reducer<Key, LongWritable, Key, LongWritable> {
194+
195+
private LongWritable result = new LongWritable();
185196

186-
private IntWritable result = new IntWritable();
187-
public void reduce(Key key, Iterable<IntWritable> values,
188-
Context context)
189-
throws IOException, InterruptedException {
190-
int sum = 0;
191-
for (IntWritable val : values) {
197+
public void reduce(Key key, Iterable<LongWritable> values, Context context)
198+
throws IOException, InterruptedException {
199+
long sum = 0;
200+
for (LongWritable val : values) {
192201
sum += val.get();
193202
}
194203
result.set(sum);
195204
context.write(key, result);
196205
}
206+
197207
}
198208

199209
/**
@@ -216,13 +226,13 @@ public static Job createSubmittableJob(Configuration conf, String[] args)
216226
TableMapReduceUtil.initTableMapperJob(tableName, scan,
217227
CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
218228
job.setMapOutputKeyClass(Text.class);
219-
job.setMapOutputValueClass(IntWritable.class);
229+
job.setMapOutputValueClass(LongWritable.class);
220230
job.setOutputFormatClass(TextOutputFormat.class);
221231
job.setOutputKeyClass(Text.class);
222-
job.setOutputValueClass(IntWritable.class);
232+
job.setOutputValueClass(LongWritable.class);
223233
FileOutputFormat.setOutputPath(job, outputDir);
224-
job.setReducerClass(IntSumReducer.class);
225-
job.setCombinerClass(IntSumReducer.class);
234+
job.setReducerClass(LongSumReducer.class);
235+
job.setCombinerClass(LongSumReducer.class);
226236
return job;
227237
}
228238

0 commit comments

Comments
 (0)