5151import org .elasticsearch .rest .RestHandler ;
5252import org .elasticsearch .script .ScriptService ;
5353import org .elasticsearch .threadpool .ExecutorBuilder ;
54- import org .elasticsearch .threadpool .FixedExecutorBuilder ;
54+ import org .elasticsearch .threadpool .ScalingExecutorBuilder ;
5555import org .elasticsearch .threadpool .ThreadPool ;
5656import org .elasticsearch .watcher .ResourceWatcherService ;
5757import org .elasticsearch .xpack .core .XPackPlugin ;
@@ -256,7 +256,7 @@ public class MachineLearning extends Plugin implements ActionPlugin, AnalysisPlu
256256 public static final String BASE_PATH = "/_ml/" ;
257257 public static final String PRE_V7_BASE_PATH = "/_xpack/ml/" ;
258258 public static final String DATAFEED_THREAD_POOL_NAME = NAME + "_datafeed" ;
259- public static final String AUTODETECT_THREAD_POOL_NAME = NAME + "_autodetect " ;
259+ public static final String JOB_COMMS_THREAD_POOL_NAME = NAME + "_job_comms " ;
260260 public static final String UTILITY_THREAD_POOL_NAME = NAME + "_utility" ;
261261
262262 // This is for performance testing. It's not exposed to the end user.
@@ -276,6 +276,17 @@ public class MachineLearning extends Plugin implements ActionPlugin, AnalysisPlu
276276 public static final Setting <Integer > MAX_LAZY_ML_NODES =
277277 Setting .intSetting ("xpack.ml.max_lazy_ml_nodes" , 0 , 0 , 3 , Property .Dynamic , Property .NodeScope );
278278
279+ // Before 8.0.0 this needs to match the max allowed value for xpack.ml.max_open_jobs,
280+ // as the current node could be running in a cluster where some nodes are still using
281+ // that setting. From 8.0.0 onwards we have the flexibility to increase it...
282+ private static final int MAX_MAX_OPEN_JOBS_PER_NODE = 512 ;
283+ // This setting is cluster-wide and can be set dynamically. However, prior to version 7.1 it was
284+ // a non-dynamic per-node setting. n a mixed version cluster containing 6.7 or 7.0 nodes those
285+ // older nodes will not react to the dynamic changes. Therefore, in such mixed version clusters
286+ // allocation will be based on the value first read at node startup rather than the current value.
287+ public static final Setting <Integer > MAX_OPEN_JOBS_PER_NODE =
288+ Setting .intSetting ("xpack.ml.max_open_jobs" , 20 , 1 , MAX_MAX_OPEN_JOBS_PER_NODE , Property .Dynamic , Property .NodeScope );
289+
279290 private static final Logger logger = LogManager .getLogger (XPackPlugin .class );
280291
281292 private final Settings settings ;
@@ -315,7 +326,7 @@ public List<Setting<?>> getSettings() {
315326 MAX_MACHINE_MEMORY_PERCENT ,
316327 AutodetectBuilder .DONT_PERSIST_MODEL_STATE_SETTING ,
317328 AutodetectBuilder .MAX_ANOMALY_RECORDS_SETTING_DYNAMIC ,
318- AutodetectProcessManager . MAX_OPEN_JOBS_PER_NODE ,
329+ MAX_OPEN_JOBS_PER_NODE ,
319330 AutodetectProcessManager .MIN_DISK_SPACE_OFF_HEAP ,
320331 MlConfigMigrationEligibilityCheck .ENABLE_CONFIG_MIGRATION ));
321332 }
@@ -333,8 +344,10 @@ public Settings additionalSettings() {
333344 Settings .Builder additionalSettings = Settings .builder ();
334345 Boolean allocationEnabled = ML_ENABLED .get (settings );
335346 if (allocationEnabled != null && allocationEnabled ) {
347+ // TODO: stop setting this attribute in 8.0.0 but disallow it (like mlEnabledNodeAttrName below)
348+ // The ML UI will need to be changed to check machineMemoryAttrName instead before this is done
336349 addMlNodeAttribute (additionalSettings , maxOpenJobsPerNodeNodeAttrName ,
337- String .valueOf (AutodetectProcessManager . MAX_OPEN_JOBS_PER_NODE .get (settings )));
350+ String .valueOf (MAX_OPEN_JOBS_PER_NODE .get (settings )));
338351 addMlNodeAttribute (additionalSettings , machineMemoryAttrName ,
339352 Long .toString (machineMemoryFromStats (OsProbe .getInstance ().osStats ())));
340353 // This is not used in v7 and higher, but users are still prevented from setting it directly to avoid confusion
@@ -608,35 +621,37 @@ public List<RestHandler> getRestHandlers(Settings settings, RestController restC
608621 new ActionHandler <>(SetUpgradeModeAction .INSTANCE , TransportSetUpgradeModeAction .class )
609622 );
610623 }
624+
611625 @ Override
612626 public List <ExecutorBuilder <?>> getExecutorBuilders (Settings settings ) {
613627 if (false == enabled || transportClientMode ) {
614628 return emptyList ();
615629 }
616- int maxNumberOfJobs = AutodetectProcessManager . MAX_OPEN_JOBS_PER_NODE . get ( settings );
617- // 4 threads per job: for cpp logging, result processing, state processing and
618- // AutodetectProcessManager worker thread:
619- FixedExecutorBuilder autoDetect = new FixedExecutorBuilder ( settings , AUTODETECT_THREAD_POOL_NAME ,
620- maxNumberOfJobs * 4 , maxNumberOfJobs * 4 , "xpack.ml.autodetect_thread_pool" );
621-
622- // 4 threads per job: processing logging, result and state of the renormalization process.
623- // Renormalization does't run for the entire lifetime of a job, so additionally autodetect process
624- // based operation (open, close, flush, post data), datafeed based operations (start and stop)
625- // and deleting expired data use this threadpool too and queue up if all threads are busy.
626- FixedExecutorBuilder renormalizer = new FixedExecutorBuilder ( settings , UTILITY_THREAD_POOL_NAME ,
627- maxNumberOfJobs * 4 , 500 , "xpack.ml.utility_thread_pool" );
628-
629- // TODO: if datafeed and non datafeed jobs are considered more equal and the datafeed and
630- // autodetect process are created at the same time then these two different TPs can merge.
631- FixedExecutorBuilder datafeed = new FixedExecutorBuilder ( settings , DATAFEED_THREAD_POOL_NAME ,
632- maxNumberOfJobs , 200 , "xpack.ml.datafeed_thread_pool" );
633- return Arrays .asList (autoDetect , renormalizer , datafeed );
630+
631+ // These thread pools scale such that they can accommodate the maximum number of jobs per node
632+ // that is permitted to be configured. It is up to other code to enforce the configured maximum
633+ // number of jobs per node.
634+
635+ // 4 threads per job process: for input, c++ logger output, result processing and state processing.
636+ ScalingExecutorBuilder jobComms = new ScalingExecutorBuilder ( JOB_COMMS_THREAD_POOL_NAME ,
637+ 4 , MAX_MAX_OPEN_JOBS_PER_NODE * 4 , TimeValue . timeValueMinutes ( 1 ), "xpack.ml.job_comms_thread_pool" );
638+
639+ // This pool is used by renormalization, plus some other parts of ML that
640+ // need to kick off non-trivial activities that mustn't block other threads.
641+ ScalingExecutorBuilder utility = new ScalingExecutorBuilder ( UTILITY_THREAD_POOL_NAME ,
642+ 1 , MAX_MAX_OPEN_JOBS_PER_NODE * 4 , TimeValue . timeValueMinutes ( 10 ), "xpack.ml.utility_thread_pool" );
643+
644+ ScalingExecutorBuilder datafeed = new ScalingExecutorBuilder ( DATAFEED_THREAD_POOL_NAME ,
645+ 1 , MAX_MAX_OPEN_JOBS_PER_NODE , TimeValue . timeValueMinutes ( 1 ), "xpack.ml.datafeed_thread_pool" );
646+
647+ return Arrays .asList (jobComms , utility , datafeed );
634648 }
635649
636650 @ Override
637651 public Map <String , AnalysisProvider <TokenizerFactory >> getTokenizers () {
638652 return Collections .singletonMap (MlClassicTokenizer .NAME , MlClassicTokenizerFactory ::new );
639653 }
654+
640655 @ Override
641656 public UnaryOperator <Map <String , IndexTemplateMetaData >> getIndexTemplateMetaDataUpgrader () {
642657 return templates -> {
0 commit comments