3333import java .util .Objects ;
3434
3535/**
36- * Provide access to the C++ model memory usage numbers for the Java process.
36+ * Provide access to the C++ model size stats for the Java process.
3737 */
3838public class ModelSizeStats implements ToXContentObject {
3939
@@ -54,6 +54,12 @@ public class ModelSizeStats implements ToXContentObject {
5454 public static final ParseField TOTAL_PARTITION_FIELD_COUNT_FIELD = new ParseField ("total_partition_field_count" );
5555 public static final ParseField BUCKET_ALLOCATION_FAILURES_COUNT_FIELD = new ParseField ("bucket_allocation_failures_count" );
5656 public static final ParseField MEMORY_STATUS_FIELD = new ParseField ("memory_status" );
57+ public static final ParseField CATEGORIZED_DOC_COUNT_FIELD = new ParseField ("categorized_doc_count" );
58+ public static final ParseField TOTAL_CATEGORY_COUNT_FIELD = new ParseField ("total_category_count" );
59+ public static final ParseField FREQUENT_CATEGORY_COUNT_FIELD = new ParseField ("frequent_category_count" );
60+ public static final ParseField RARE_CATEGORY_COUNT_FIELD = new ParseField ("rare_category_count" );
61+ public static final ParseField DEAD_CATEGORY_COUNT_FIELD = new ParseField ("dead_category_count" );
62+ public static final ParseField CATEGORIZATION_STATUS_FIELD = new ParseField ("categorization_status" );
5763 public static final ParseField LOG_TIME_FIELD = new ParseField ("log_time" );
5864 public static final ParseField TIMESTAMP_FIELD = new ParseField ("timestamp" );
5965
@@ -69,6 +75,14 @@ public class ModelSizeStats implements ToXContentObject {
6975 PARSER .declareLong (Builder ::setTotalByFieldCount , TOTAL_BY_FIELD_COUNT_FIELD );
7076 PARSER .declareLong (Builder ::setTotalOverFieldCount , TOTAL_OVER_FIELD_COUNT_FIELD );
7177 PARSER .declareLong (Builder ::setTotalPartitionFieldCount , TOTAL_PARTITION_FIELD_COUNT_FIELD );
78+ PARSER .declareField (Builder ::setMemoryStatus , p -> MemoryStatus .fromString (p .text ()), MEMORY_STATUS_FIELD , ValueType .STRING );
79+ PARSER .declareLong (Builder ::setCategorizedDocCount , CATEGORIZED_DOC_COUNT_FIELD );
80+ PARSER .declareLong (Builder ::setTotalCategoryCount , TOTAL_CATEGORY_COUNT_FIELD );
81+ PARSER .declareLong (Builder ::setFrequentCategoryCount , FREQUENT_CATEGORY_COUNT_FIELD );
82+ PARSER .declareLong (Builder ::setRareCategoryCount , RARE_CATEGORY_COUNT_FIELD );
83+ PARSER .declareLong (Builder ::setDeadCategoryCount , DEAD_CATEGORY_COUNT_FIELD );
84+ PARSER .declareField (Builder ::setCategorizationStatus ,
85+ p -> CategorizationStatus .fromString (p .text ()), CATEGORIZATION_STATUS_FIELD , ValueType .STRING );
7286 PARSER .declareField (Builder ::setLogTime ,
7387 (p ) -> TimeUtil .parseTimeField (p , LOG_TIME_FIELD .getPreferredName ()),
7488 LOG_TIME_FIELD ,
@@ -77,7 +91,6 @@ public class ModelSizeStats implements ToXContentObject {
7791 (p ) -> TimeUtil .parseTimeField (p , TIMESTAMP_FIELD .getPreferredName ()),
7892 TIMESTAMP_FIELD ,
7993 ValueType .VALUE );
80- PARSER .declareField (Builder ::setMemoryStatus , p -> MemoryStatus .fromString (p .text ()), MEMORY_STATUS_FIELD , ValueType .STRING );
8194 }
8295
8396 /**
@@ -99,6 +112,23 @@ public String toString() {
99112 }
100113 }
101114
115+ /**
116+ * The status of categorization for a job. OK is default, WARN
117+ * means that inappropriate numbers of categories are being found
118+ */
119+ public enum CategorizationStatus {
120+ OK , WARN ;
121+
122+ public static CategorizationStatus fromString (String statusName ) {
123+ return valueOf (statusName .trim ().toUpperCase (Locale .ROOT ));
124+ }
125+
126+ @ Override
127+ public String toString () {
128+ return name ().toLowerCase (Locale .ROOT );
129+ }
130+ }
131+
102132 private final String jobId ;
103133 private final long modelBytes ;
104134 private final Long modelBytesExceeded ;
@@ -108,12 +138,20 @@ public String toString() {
108138 private final long totalPartitionFieldCount ;
109139 private final long bucketAllocationFailuresCount ;
110140 private final MemoryStatus memoryStatus ;
141+ private final long categorizedDocCount ;
142+ private final long totalCategoryCount ;
143+ private final long frequentCategoryCount ;
144+ private final long rareCategoryCount ;
145+ private final long deadCategoryCount ;
146+ private final CategorizationStatus categorizationStatus ;
111147 private final Date timestamp ;
112148 private final Date logTime ;
113149
114150 private ModelSizeStats (String jobId , long modelBytes , Long modelBytesExceeded , Long modelBytesMemoryLimit , long totalByFieldCount ,
115151 long totalOverFieldCount , long totalPartitionFieldCount , long bucketAllocationFailuresCount ,
116- MemoryStatus memoryStatus , Date timestamp , Date logTime ) {
152+ MemoryStatus memoryStatus , long categorizedDocCount , long totalCategoryCount , long frequentCategoryCount ,
153+ long rareCategoryCount , long deadCategoryCount , CategorizationStatus categorizationStatus ,
154+ Date timestamp , Date logTime ) {
117155 this .jobId = jobId ;
118156 this .modelBytes = modelBytes ;
119157 this .modelBytesExceeded = modelBytesExceeded ;
@@ -123,6 +161,12 @@ private ModelSizeStats(String jobId, long modelBytes, Long modelBytesExceeded, L
123161 this .totalPartitionFieldCount = totalPartitionFieldCount ;
124162 this .bucketAllocationFailuresCount = bucketAllocationFailuresCount ;
125163 this .memoryStatus = memoryStatus ;
164+ this .categorizedDocCount = categorizedDocCount ;
165+ this .totalCategoryCount = totalCategoryCount ;
166+ this .frequentCategoryCount = frequentCategoryCount ;
167+ this .rareCategoryCount = rareCategoryCount ;
168+ this .deadCategoryCount = deadCategoryCount ;
169+ this .categorizationStatus = categorizationStatus ;
126170 this .timestamp = timestamp ;
127171 this .logTime = logTime ;
128172 }
@@ -145,6 +189,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
145189 builder .field (TOTAL_PARTITION_FIELD_COUNT_FIELD .getPreferredName (), totalPartitionFieldCount );
146190 builder .field (BUCKET_ALLOCATION_FAILURES_COUNT_FIELD .getPreferredName (), bucketAllocationFailuresCount );
147191 builder .field (MEMORY_STATUS_FIELD .getPreferredName (), memoryStatus );
192+ builder .field (CATEGORIZED_DOC_COUNT_FIELD .getPreferredName (), categorizedDocCount );
193+ builder .field (TOTAL_CATEGORY_COUNT_FIELD .getPreferredName (), totalCategoryCount );
194+ builder .field (FREQUENT_CATEGORY_COUNT_FIELD .getPreferredName (), frequentCategoryCount );
195+ builder .field (RARE_CATEGORY_COUNT_FIELD .getPreferredName (), rareCategoryCount );
196+ builder .field (DEAD_CATEGORY_COUNT_FIELD .getPreferredName (), deadCategoryCount );
197+ builder .field (CATEGORIZATION_STATUS_FIELD .getPreferredName (), categorizationStatus );
148198 builder .timeField (LOG_TIME_FIELD .getPreferredName (), LOG_TIME_FIELD .getPreferredName () + "_string" , logTime .getTime ());
149199 if (timestamp != null ) {
150200 builder .timeField (TIMESTAMP_FIELD .getPreferredName (), TIMESTAMP_FIELD .getPreferredName () + "_string" , timestamp .getTime ());
@@ -190,6 +240,30 @@ public MemoryStatus getMemoryStatus() {
190240 return memoryStatus ;
191241 }
192242
243+ public long getCategorizedDocCount () {
244+ return categorizedDocCount ;
245+ }
246+
247+ public long getTotalCategoryCount () {
248+ return totalCategoryCount ;
249+ }
250+
251+ public long getFrequentCategoryCount () {
252+ return frequentCategoryCount ;
253+ }
254+
255+ public long getRareCategoryCount () {
256+ return rareCategoryCount ;
257+ }
258+
259+ public long getDeadCategoryCount () {
260+ return deadCategoryCount ;
261+ }
262+
263+ public CategorizationStatus getCategorizationStatus () {
264+ return categorizationStatus ;
265+ }
266+
193267 /**
194268 * The timestamp of the last processed record when this instance was created.
195269 *
@@ -211,7 +285,8 @@ public Date getLogTime() {
211285 @ Override
212286 public int hashCode () {
213287 return Objects .hash (jobId , modelBytes , modelBytesExceeded , modelBytesMemoryLimit , totalByFieldCount , totalOverFieldCount ,
214- totalPartitionFieldCount , this .bucketAllocationFailuresCount , memoryStatus , timestamp , logTime );
288+ totalPartitionFieldCount , this .bucketAllocationFailuresCount , memoryStatus , categorizedDocCount , totalCategoryCount ,
289+ frequentCategoryCount , rareCategoryCount , deadCategoryCount , categorizationStatus , timestamp , logTime );
215290 }
216291
217292 /**
@@ -233,7 +308,14 @@ public boolean equals(Object other) {
233308 && Objects .equals (this .modelBytesMemoryLimit , that .modelBytesMemoryLimit ) && this .totalByFieldCount == that .totalByFieldCount
234309 && this .totalOverFieldCount == that .totalOverFieldCount && this .totalPartitionFieldCount == that .totalPartitionFieldCount
235310 && this .bucketAllocationFailuresCount == that .bucketAllocationFailuresCount
236- && Objects .equals (this .memoryStatus , that .memoryStatus ) && Objects .equals (this .timestamp , that .timestamp )
311+ && Objects .equals (this .memoryStatus , that .memoryStatus )
312+ && this .categorizedDocCount == that .categorizedDocCount
313+ && this .totalCategoryCount == that .totalCategoryCount
314+ && this .frequentCategoryCount == that .frequentCategoryCount
315+ && this .rareCategoryCount == that .rareCategoryCount
316+ && this .deadCategoryCount == that .deadCategoryCount
317+ && Objects .equals (this .categorizationStatus , that .categorizationStatus )
318+ && Objects .equals (this .timestamp , that .timestamp )
237319 && Objects .equals (this .logTime , that .logTime )
238320 && Objects .equals (this .jobId , that .jobId );
239321 }
@@ -249,12 +331,19 @@ public static class Builder {
249331 private long totalPartitionFieldCount ;
250332 private long bucketAllocationFailuresCount ;
251333 private MemoryStatus memoryStatus ;
334+ private long categorizedDocCount ;
335+ private long totalCategoryCount ;
336+ private long frequentCategoryCount ;
337+ private long rareCategoryCount ;
338+ private long deadCategoryCount ;
339+ private CategorizationStatus categorizationStatus ;
252340 private Date timestamp ;
253341 private Date logTime ;
254342
255343 public Builder (String jobId ) {
256344 this .jobId = jobId ;
257345 memoryStatus = MemoryStatus .OK ;
346+ categorizationStatus = CategorizationStatus .OK ;
258347 logTime = new Date ();
259348 }
260349
@@ -268,6 +357,12 @@ public Builder(ModelSizeStats modelSizeStats) {
268357 this .totalPartitionFieldCount = modelSizeStats .totalPartitionFieldCount ;
269358 this .bucketAllocationFailuresCount = modelSizeStats .bucketAllocationFailuresCount ;
270359 this .memoryStatus = modelSizeStats .memoryStatus ;
360+ this .categorizedDocCount = modelSizeStats .categorizedDocCount ;
361+ this .totalCategoryCount = modelSizeStats .totalCategoryCount ;
362+ this .frequentCategoryCount = modelSizeStats .frequentCategoryCount ;
363+ this .rareCategoryCount = modelSizeStats .rareCategoryCount ;
364+ this .deadCategoryCount = modelSizeStats .deadCategoryCount ;
365+ this .categorizationStatus = modelSizeStats .categorizationStatus ;
271366 this .timestamp = modelSizeStats .timestamp ;
272367 this .logTime = modelSizeStats .logTime ;
273368 }
@@ -313,6 +408,37 @@ public Builder setMemoryStatus(MemoryStatus memoryStatus) {
313408 return this ;
314409 }
315410
411+ public Builder setCategorizedDocCount (long categorizedDocCount ) {
412+ this .categorizedDocCount = categorizedDocCount ;
413+ return this ;
414+ }
415+
416+ public Builder setTotalCategoryCount (long totalCategoryCount ) {
417+ this .totalCategoryCount = totalCategoryCount ;
418+ return this ;
419+ }
420+
421+ public Builder setFrequentCategoryCount (long frequentCategoryCount ) {
422+ this .frequentCategoryCount = frequentCategoryCount ;
423+ return this ;
424+ }
425+
426+ public Builder setRareCategoryCount (long rareCategoryCount ) {
427+ this .rareCategoryCount = rareCategoryCount ;
428+ return this ;
429+ }
430+
431+ public Builder setDeadCategoryCount (long deadCategoryCount ) {
432+ this .deadCategoryCount = deadCategoryCount ;
433+ return this ;
434+ }
435+
436+ public Builder setCategorizationStatus (CategorizationStatus categorizationStatus ) {
437+ Objects .requireNonNull (categorizationStatus , "[" + CATEGORIZATION_STATUS_FIELD .getPreferredName () + "] must not be null" );
438+ this .categorizationStatus = categorizationStatus ;
439+ return this ;
440+ }
441+
316442 public Builder setTimestamp (Date timestamp ) {
317443 this .timestamp = timestamp ;
318444 return this ;
@@ -325,7 +451,8 @@ public Builder setLogTime(Date logTime) {
325451
326452 public ModelSizeStats build () {
327453 return new ModelSizeStats (jobId , modelBytes , modelBytesExceeded , modelBytesMemoryLimit , totalByFieldCount , totalOverFieldCount ,
328- totalPartitionFieldCount , bucketAllocationFailuresCount , memoryStatus , timestamp , logTime );
454+ totalPartitionFieldCount , bucketAllocationFailuresCount , memoryStatus , categorizedDocCount , totalCategoryCount ,
455+ frequentCategoryCount , rareCategoryCount , deadCategoryCount , categorizationStatus , timestamp , logTime );
329456 }
330457 }
331458}
0 commit comments