3939import  org .apache .hadoop .hbase .filter .RowFilter ;
4040import  org .apache .hadoop .hbase .io .ImmutableBytesWritable ;
4141import  org .apache .hadoop .hbase .util .Bytes ;
42- import  org .apache .hadoop .io .IntWritable ;
42+ import  org .apache .hadoop .io .LongWritable ;
4343import  org .apache .hadoop .io .Text ;
4444import  org .apache .hadoop .mapreduce .Job ;
4545import  org .apache .hadoop .mapreduce .Reducer ;
5050
5151import  org .apache .hbase .thirdparty .com .google .common .base .Preconditions ;
5252
53- 
5453/** 
5554 * A job with a a map and reduce phase to count cells in a table. 
5655 * The counter lists the following stats for a given table: 
5958 * 2. Total number of CFs across all rows 
6059 * 3. Total qualifiers across all rows 
6160 * 4. Total occurrence of each CF 
62-  * 5. Total occurrence   of each qualifier 
61+  * 5. Total occurrence of each qualifier 
6362 * 6. Total number of versions of each qualifier. 
63+  * 7. Total size of serialized cells of each CF. 
64+  * 8. Total size of serialized cells of each qualifier. 
65+  * 9. Total size of serialized cells across all rows. 
6466 * </pre> 
6567 * 
6668 * The cellcounter can take optional parameters to use a user 
@@ -86,13 +88,14 @@ public class CellCounter extends Configured implements Tool {
8688   * Mapper that runs the count. 
8789   */ 
8890  static  class  CellCounterMapper 
89-   extends  TableMapper <Text , IntWritable > {
91+   extends  TableMapper <Text , LongWritable > {
9092    /** 
9193     * Counter enumeration to count the actual rows. 
9294     */ 
9395    public  static  enum  Counters  {
9496      ROWS ,
95-       CELLS 
97+       CELLS ,
98+       SIZE 
9699    }
97100
98101    private  Configuration  conf ;
@@ -143,34 +146,41 @@ public void map(ImmutableBytesWritable row, Result values,
143146          currentFamily  = null ;
144147          currentQualifier  = null ;
145148          context .getCounter (Counters .ROWS ).increment (1 );
146-           context .write (new  Text ("Total ROWS" ), new  IntWritable (1 ));
149+           context .write (new  Text ("Total ROWS" ), new  LongWritable (1 ));
147150        }
148151        if  (!values .isEmpty ()) {
149152          int  cellCount  = 0 ;
150153          for  (Cell  value  : values .listCells ()) {
151154            cellCount ++;
155+             long  size  = value .getSerializedSize ();
152156            if  (currentFamily  == null  || !CellUtil .matchingFamily (value , currentFamily )) {
153157              currentFamily  = CellUtil .cloneFamily (value );
154158              currentFamilyName  = Bytes .toStringBinary (currentFamily );
155159              currentQualifier  = null ;
156160              context .getCounter ("CF" , currentFamilyName ).increment (1 );
157161              if  (1  == context .getCounter ("CF" , currentFamilyName ).getValue ()) {
158-                 context .write (new  Text ("Total Families Across all Rows" ), new  IntWritable (1 ));
159-                 context .write (new  Text (currentFamily ), new  IntWritable (1 ));
162+                 context .write (new  Text ("Total Families Across all Rows" ), new  LongWritable (1 ));
163+                 context .write (new  Text (currentFamily ), new  LongWritable (1 ));
160164              }
165+               context .getCounter (Counters .SIZE ).increment (size );
166+               context .write (new  Text ("Total SIZE" ), new  LongWritable (size ));
167+               context .getCounter ("CF" , currentFamilyName  + "_Size" ).increment (size );
168+               context .write (new  Text (currentFamilyName  + "_Size" ), new  LongWritable (size ));
161169            }
162-             if  (currentQualifier  == null  || !CellUtil .matchingQualifier (value , currentQualifier ))  {
170+             if  (currentQualifier  == null  || !CellUtil .matchingQualifier (value , currentQualifier )){
163171              currentQualifier  = CellUtil .cloneQualifier (value );
164172              currentQualifierName  = currentFamilyName  + separator  +
165173                  Bytes .toStringBinary (currentQualifier );
166174              currentRowQualifierName  = currentRowKey  + separator  + currentQualifierName ;
167175
168176              context .write (new  Text ("Total Qualifiers across all Rows" ),
169-                   new  IntWritable (1 ));
170-               context .write (new  Text (currentQualifierName ), new  IntWritable (1 ));
177+                   new  LongWritable (1 ));
178+               context .write (new  Text (currentQualifierName ), new  LongWritable (1 ));
179+               context .getCounter ("Q" , currentQualifierName  + "_Size" ).increment (size );
180+               context .write (new  Text (currentQualifierName  + "_Size" ), new  LongWritable (size ));
171181            }
172182            // Increment versions 
173-             context .write (new  Text (currentRowQualifierName  + "_Versions" ), new  IntWritable (1 ));
183+             context .write (new  Text (currentRowQualifierName  + "_Versions" ), new  LongWritable (1 ));
174184          }
175185          context .getCounter (Counters .CELLS ).increment (cellCount );
176186        }
@@ -180,20 +190,20 @@ public void map(ImmutableBytesWritable row, Result values,
180190    }
181191  }
182192
183-   static  class  IntSumReducer <Key > extends  Reducer <Key , IntWritable ,
184-       Key , IntWritable > {
193+   static  class  LongSumReducer <Key > extends  Reducer <Key , LongWritable , Key , LongWritable > {
194+ 
195+     private  LongWritable  result  = new  LongWritable ();
185196
186-     private  IntWritable  result  = new  IntWritable ();
187-     public  void  reduce (Key  key , Iterable <IntWritable > values ,
188-       Context  context )
189-     throws  IOException , InterruptedException  {
190-       int  sum  = 0 ;
191-       for  (IntWritable  val  : values ) {
197+     public  void  reduce (Key  key , Iterable <LongWritable > values , Context  context )
198+         throws  IOException , InterruptedException  {
199+       long  sum  = 0 ;
200+       for  (LongWritable  val  : values ) {
192201        sum  += val .get ();
193202      }
194203      result .set (sum );
195204      context .write (key , result );
196205    }
206+ 
197207  }
198208
199209  /** 
@@ -216,13 +226,13 @@ public static Job createSubmittableJob(Configuration conf, String[] args)
216226    TableMapReduceUtil .initTableMapperJob (tableName , scan ,
217227        CellCounterMapper .class , ImmutableBytesWritable .class , Result .class , job );
218228    job .setMapOutputKeyClass (Text .class );
219-     job .setMapOutputValueClass (IntWritable .class );
229+     job .setMapOutputValueClass (LongWritable .class );
220230    job .setOutputFormatClass (TextOutputFormat .class );
221231    job .setOutputKeyClass (Text .class );
222-     job .setOutputValueClass (IntWritable .class );
232+     job .setOutputValueClass (LongWritable .class );
223233    FileOutputFormat .setOutputPath (job , outputDir );
224-     job .setReducerClass (IntSumReducer .class );
225-     job .setCombinerClass (IntSumReducer .class );
234+     job .setReducerClass (LongSumReducer .class );
235+     job .setCombinerClass (LongSumReducer .class );
226236    return  job ;
227237  }
228238
0 commit comments