@@ -486,11 +486,14 @@ def _load_best_individual_model(self) -> SingleBest:
486
486
487
487
return ensemble
488
488
489
- def _do_dummy_prediction (self , num_run : int ) -> None :
489
+ def _do_dummy_prediction (self ) -> None :
490
490
491
491
assert self ._metric is not None
492
492
assert self ._logger is not None
493
493
494
+ # For dummy estimator, we always expect the num_run to be 1
495
+ num_run = 1
496
+
494
497
self ._logger .info ("Starting to create dummy predictions." )
495
498
496
499
memory_limit = self ._memory_limit
@@ -551,29 +554,20 @@ def _do_dummy_prediction(self, num_run: int) -> None:
551
554
% (str (status ), str (additional_info ))
552
555
)
553
556
554
- def _do_traditional_prediction (self , num_run : int , time_left : int , func_eval_time_limit_secs : int
555
- ) -> int :
557
+ def _do_traditional_prediction (self , time_left : int , func_eval_time_limit_secs : int ) -> None :
556
558
"""
557
559
Fits traditional machine learning algorithms to the provided dataset, while
558
560
complying with time resource allocation.
559
561
560
562
This method currently only supports classification.
561
563
562
564
Args:
563
- num_run: (int)
564
- An identifier to indicate the current machine learning algorithm
565
- being processed
566
565
time_left: (int)
567
566
Hard limit on how many machine learning algorithms can be fit. Depending on how
568
567
fast a traditional machine learning algorithm trains, it will allow multiple
569
568
models to be fitted.
570
569
func_eval_time_limit_secs: (int)
571
570
Maximum training time each algorithm is allowed to take, during training
572
-
573
- Returns:
574
- num_run: (int)
575
- The incremented identifier index. This depends on how many machine learning
576
- models were fitted.
577
571
"""
578
572
579
573
# Mypy Checkings -- Traditional prediction is only called for search
@@ -588,8 +582,8 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim
588
582
available_classifiers = get_available_classifiers ()
589
583
dask_futures = []
590
584
591
- total_number_classifiers = len (available_classifiers ) + num_run
592
- for n_r , classifier in enumerate (available_classifiers , start = num_run ):
585
+ total_number_classifiers = len (available_classifiers )
586
+ for n_r , classifier in enumerate (available_classifiers ):
593
587
594
588
# Only launch a task if there is time
595
589
start_time = time .time ()
@@ -608,7 +602,7 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim
608
602
logger_port = self ._logger_port ,
609
603
cost_for_crash = get_cost_of_crash (self ._metric ),
610
604
abort_on_first_run_crash = False ,
611
- initial_num_run = n_r ,
605
+ initial_num_run = self . _backend . get_next_num_run () ,
612
606
stats = stats ,
613
607
memory_limit = memory_limit ,
614
608
disable_file_output = True if len (self ._disable_file_output ) > 0 else False ,
@@ -622,9 +616,6 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim
622
616
)
623
617
])
624
618
625
- # Increment the launched job index
626
- num_run = n_r
627
-
628
619
# When managing time, we need to take into account the allocated time resources,
629
620
# which are dependent on the number of cores. 'dask_futures' is a proxy to the number
630
621
# of workers /n_jobs that we have, in that if there are 4 cores allocated, we can run at most
@@ -677,7 +668,7 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim
677
668
"Please consider increasing the run time to further improve performance." )
678
669
break
679
670
680
- return num_run
671
+ return
681
672
682
673
def _search (
683
674
self ,
@@ -847,10 +838,9 @@ def _search(
847
838
)
848
839
849
840
# ============> Run dummy predictions
850
- num_run = 1
851
841
dummy_task_name = 'runDummy'
852
842
self ._stopwatch .start_task (dummy_task_name )
853
- self ._do_dummy_prediction (num_run )
843
+ self ._do_dummy_prediction ()
854
844
self ._stopwatch .stop_task (dummy_task_name )
855
845
856
846
# ============> Run traditional ml
@@ -866,8 +856,8 @@ def _search(
866
856
time_for_traditional = int (
867
857
self ._time_for_task - elapsed_time - func_eval_time_limit_secs
868
858
)
869
- num_run = self ._do_traditional_prediction (
870
- num_run = num_run + 1 , func_eval_time_limit_secs = func_eval_time_limit_secs ,
859
+ self ._do_traditional_prediction (
860
+ func_eval_time_limit_secs = func_eval_time_limit_secs ,
871
861
time_left = time_for_traditional ,
872
862
)
873
863
self ._stopwatch .stop_task (traditional_task_name )
@@ -943,7 +933,9 @@ def _search(
943
933
pipeline_config = {** self .pipeline_options , ** budget_config },
944
934
ensemble_callback = proc_ensemble ,
945
935
logger_port = self ._logger_port ,
946
- start_num_run = num_run ,
936
+ # We do not increase the num_run here, this is something
937
+ # smac does internally
938
+ start_num_run = self ._backend .get_next_num_run (peek = True ),
947
939
search_space_updates = self .search_space_updates
948
940
)
949
941
try :
@@ -1048,7 +1040,7 @@ def refit(
1048
1040
'train_indices' : dataset .splits [split_id ][0 ],
1049
1041
'val_indices' : dataset .splits [split_id ][1 ],
1050
1042
'split_id' : split_id ,
1051
- 'num_run' : 0
1043
+ 'num_run' : self . _backend . get_next_num_run (),
1052
1044
})
1053
1045
X .update ({** self .pipeline_options , ** budget_config })
1054
1046
if self .models_ is None or len (self .models_ ) == 0 or self .ensemble_ is None :
@@ -1125,7 +1117,7 @@ def fit(self,
1125
1117
'train_indices' : dataset .splits [split_id ][0 ],
1126
1118
'val_indices' : dataset .splits [split_id ][1 ],
1127
1119
'split_id' : split_id ,
1128
- 'num_run' : 0
1120
+ 'num_run' : self . _backend . get_next_num_run (),
1129
1121
})
1130
1122
X .update ({** self .pipeline_options , ** budget_config })
1131
1123
0 commit comments