33 * or more contributor license agreements. Licensed under the Elastic License;
44 * you may not use this file except in compliance with the Elastic License.
55 */
6- package org .elasticsearch .xpack .ml .datafeed .extractor .scroll ;
6+ package org .elasticsearch .xpack .ml .datafeed .extractor .fields ;
77
88import org .elasticsearch .action .fieldcaps .FieldCapabilities ;
99import org .elasticsearch .action .fieldcaps .FieldCapabilitiesResponse ;
10- import org .elasticsearch .search .SearchHit ;
11- import org .elasticsearch .xpack .core .ml .datafeed .DatafeedConfig ;
12- import org .elasticsearch .xpack .core .ml .job .config .Job ;
13- import org .elasticsearch .xpack .core .ml .utils .ExceptionsHelper ;
1410import org .elasticsearch .xpack .core .ml .utils .MlStrings ;
1511
16- import java .util .ArrayList ;
17- import java .util .Arrays ;
12+ import java .util .Collection ;
1813import java .util .Collections ;
1914import java .util .List ;
2015import java .util .Map ;
2520/**
2621 * The fields the datafeed has to extract
2722 */
28- class ExtractedFields {
23+ public class ExtractedFields {
2924
3025 private static final String TEXT = "text" ;
3126
32- private final ExtractedField timeField ;
3327 private final List <ExtractedField > allFields ;
3428 private final List <ExtractedField > docValueFields ;
3529 private final String [] sourceFields ;
3630
37- ExtractedFields (ExtractedField timeField , List <ExtractedField > allFields ) {
38- if (!allFields .contains (timeField )) {
39- throw new IllegalArgumentException ("timeField should also be contained in allFields" );
40- }
41- this .timeField = Objects .requireNonNull (timeField );
31+ public ExtractedFields (List <ExtractedField > allFields ) {
4232 this .allFields = Collections .unmodifiableList (allFields );
4333 this .docValueFields = filterFields (ExtractedField .ExtractionMethod .DOC_VALUE , allFields );
4434 this .sourceFields = filterFields (ExtractedField .ExtractionMethod .SOURCE , allFields ).stream ().map (ExtractedField ::getName )
@@ -61,60 +51,33 @@ private static List<ExtractedField> filterFields(ExtractedField.ExtractionMethod
6151 return fields .stream ().filter (field -> field .getExtractionMethod () == method ).collect (Collectors .toList ());
6252 }
6353
64- public String timeField () {
65- return timeField .getName ();
54+ public static ExtractedFields build (Collection <String > allFields , Set <String > scriptFields ,
55+ FieldCapabilitiesResponse fieldsCapabilities ) {
56+ ExtractionMethodDetector extractionMethodDetector = new ExtractionMethodDetector (scriptFields , fieldsCapabilities );
57+ return new ExtractedFields (allFields .stream ().map (field -> extractionMethodDetector .detect (field )).collect (Collectors .toList ()));
6658 }
6759
68- public Long timeFieldValue (SearchHit hit ) {
69- Object [] value = timeField .value (hit );
70- if (value .length != 1 ) {
71- throw new RuntimeException ("Time field [" + timeField .getAlias () + "] expected a single value; actual was: "
72- + Arrays .toString (value ));
73- }
74- if (value [0 ] instanceof Long ) {
75- return (Long ) value [0 ];
76- }
77- throw new RuntimeException ("Time field [" + timeField .getAlias () + "] expected a long value; actual was: " + value [0 ]);
78- }
60+ protected static class ExtractionMethodDetector {
7961
80- public static ExtractedFields build (Job job , DatafeedConfig datafeed , FieldCapabilitiesResponse fieldsCapabilities ) {
81- Set <String > scriptFields = datafeed .getScriptFields ().stream ().map (sf -> sf .fieldName ()).collect (Collectors .toSet ());
82- ExtractionMethodDetector extractionMethodDetector = new ExtractionMethodDetector (datafeed .getId (), scriptFields ,
83- fieldsCapabilities );
84- String timeField = job .getDataDescription ().getTimeField ();
85- if (scriptFields .contains (timeField ) == false && extractionMethodDetector .isAggregatable (timeField ) == false ) {
86- throw ExceptionsHelper .badRequestException ("datafeed [" + datafeed .getId () + "] cannot retrieve time field [" + timeField
87- + "] because it is not aggregatable" );
88- }
89- ExtractedField timeExtractedField = ExtractedField .newTimeField (timeField , scriptFields .contains (timeField ) ?
90- ExtractedField .ExtractionMethod .SCRIPT_FIELD : ExtractedField .ExtractionMethod .DOC_VALUE );
91- List <String > remainingFields = job .allInputFields ().stream ().filter (f -> !f .equals (timeField )).collect (Collectors .toList ());
92- List <ExtractedField > allExtractedFields = new ArrayList <>(remainingFields .size () + 1 );
93- allExtractedFields .add (timeExtractedField );
94- remainingFields .stream ().forEach (field -> allExtractedFields .add (extractionMethodDetector .detect (field )));
95- return new ExtractedFields (timeExtractedField , allExtractedFields );
96- }
97-
98- private static class ExtractionMethodDetector {
99-
100- private final String datafeedId ;
10162 private final Set <String > scriptFields ;
10263 private final FieldCapabilitiesResponse fieldsCapabilities ;
10364
104- private ExtractionMethodDetector (String datafeedId , Set <String > scriptFields , FieldCapabilitiesResponse fieldsCapabilities ) {
105- this .datafeedId = datafeedId ;
65+ protected ExtractionMethodDetector (Set <String > scriptFields , FieldCapabilitiesResponse fieldsCapabilities ) {
10666 this .scriptFields = scriptFields ;
10767 this .fieldsCapabilities = fieldsCapabilities ;
10868 }
10969
110- private ExtractedField detect (String field ) {
70+ protected ExtractedField detect (String field ) {
11171 String internalField = field ;
11272 ExtractedField .ExtractionMethod method = ExtractedField .ExtractionMethod .SOURCE ;
11373 if (scriptFields .contains (field )) {
11474 method = ExtractedField .ExtractionMethod .SCRIPT_FIELD ;
11575 } else if (isAggregatable (field )) {
11676 method = ExtractedField .ExtractionMethod .DOC_VALUE ;
117- } else if (isText (field )) {
77+ if (isFieldOfType (field , "date" )) {
78+ return ExtractedField .newTimeField (field , method );
79+ }
80+ } else if (isFieldOfType (field , TEXT )) {
11881 String parentField = MlStrings .getParentField (field );
11982 // Field is text so check if it is a multi-field
12083 if (Objects .equals (parentField , field ) == false && fieldsCapabilities .getField (parentField ) != null ) {
@@ -127,11 +90,10 @@ private ExtractedField detect(String field) {
12790 return ExtractedField .newField (field , internalField , method );
12891 }
12992
130- private boolean isAggregatable (String field ) {
93+ protected boolean isAggregatable (String field ) {
13194 Map <String , FieldCapabilities > fieldCaps = fieldsCapabilities .getField (field );
13295 if (fieldCaps == null || fieldCaps .isEmpty ()) {
133- throw ExceptionsHelper .badRequestException ("datafeed [" + datafeedId + "] cannot retrieve field [" + field
134- + "] because it has no mappings" );
96+ throw new IllegalArgumentException ("cannot retrieve field [" + field + "] because it has no mappings" );
13597 }
13698 for (FieldCapabilities capsPerIndex : fieldCaps .values ()) {
13799 if (!capsPerIndex .isAggregatable ()) {
@@ -141,10 +103,10 @@ private boolean isAggregatable(String field) {
141103 return true ;
142104 }
143105
144- private boolean isText (String field ) {
106+ private boolean isFieldOfType (String field , String type ) {
145107 Map <String , FieldCapabilities > fieldCaps = fieldsCapabilities .getField (field );
146108 if (fieldCaps != null && fieldCaps .size () == 1 ) {
147- return fieldCaps .containsKey (TEXT );
109+ return fieldCaps .containsKey (type );
148110 }
149111 return false ;
150112 }
0 commit comments