Skip to content

Commit 99515db

Browse files
committed
Addressed comments from arlind, change in T_mul and T_0 calculations
Added debug information for API Fix flake Fix import made test deterministic for feature preprocessing Fix bug in parsing log convert to int Fix bug in testing
1 parent daaf23a commit 99515db

File tree

8 files changed

+55
-81
lines changed

8 files changed

+55
-81
lines changed

autoPyTorch/evaluation/abstract_evaluator.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -879,8 +879,7 @@ def file_output(
879879
pipeline = None
880880
else:
881881
pipeline = None
882-
883-
self.logger.debug("Saving directory {}, {}, {}".format(self.seed, self.num_run, self.budget))
882+
self.logger.debug("Saving model {}_{}_{} to disk".format(self.seed, self.num_run, self.budget))
884883
self.backend.save_numrun_to_dir(
885884
seed=int(self.seed),
886885
idx=int(self.num_run),

autoPyTorch/pipeline/components/setup/lr_scheduler/CosineAnnealingWarmRestarts.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,14 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> BaseLRComponent:
5353
self.check_requirements(X, y)
5454

5555
# initialise required attributes for the scheduler
56-
T_mult: int = 1
57-
T_0: int = max(X['epochs'] // self.n_restarts, 1)
56+
T_mult: int = 2
57+
# using Epochs = T_0 * (T_mul ** n_restarts -1) / (T_mul - 1) (Sum of GP)
58+
T_0: int = max((X['epochs'] * (T_mult - 1)) // (T_mult ** self.n_restarts - 1), 1)
5859

5960
self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
6061
optimizer=X['optimizer'],
61-
T_0=T_0,
62-
T_mult=T_mult,
62+
T_0=int(T_0),
63+
T_mult=int(T_mult),
6364
)
6465
return self
6566

autoPyTorch/pipeline/components/setup/lr_scheduler/ReduceLROnPlateau.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,6 @@ def get_hyperparameter_search_space(
9999
default_value=0.1,
100100
)
101101
) -> ConfigurationSpace:
102-
103102
cs = ConfigurationSpace()
104103

105104
add_hyperparameter(cs, mode, CategoricalHyperparameter)

test/test_api/api_utils.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import glob
2+
import os
3+
4+
5+
def print_debug_information(automl):
6+
7+
# Log file path
8+
log_file = glob.glob(os.path.join(
9+
automl._backend.temporary_directory, 'AutoPyTorch*.log'))[0]
10+
11+
include_messages = ['INFO', 'DEBUG', 'WARN',
12+
'CRITICAL', 'ERROR', 'FATAL']
13+
14+
# There is a lot of content in the log files. Only
15+
# parsing the main message and ignore the metalearning
16+
# messages
17+
try:
18+
with open(log_file) as logfile:
19+
content = logfile.readlines()
20+
21+
# Get the messages to debug easier!
22+
content = [line for line in content if any(
23+
msg in line for msg in include_messages
24+
) and 'metalearning' not in line]
25+
26+
except Exception as e:
27+
return str(e)
28+
29+
# Also add the run history if any
30+
if hasattr(automl, 'runhistory') and hasattr(automl.runhistory, 'data'):
31+
for k, v in automl.runhistory_.data.items():
32+
content += ["{}->{}".format(k, v)]
33+
else:
34+
content += ['No RunHistory']
35+
36+
# Also add the ensemble history if any
37+
if len(automl.ensemble_performance_history) > 0:
38+
content += [str(h) for h in automl.ensemble_performance_history]
39+
else:
40+
content += ['No Ensemble History']
41+
42+
return os.linesep.join(content)

test/test_api/test_api.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
from autoPyTorch.pipeline.components.setup.traditional_ml.traditional_learner import _traditional_learners
3636
from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy
3737

38+
from test.test_api.api_utils import print_debug_information
39+
3840

3941
CV_NUM_SPLITS = 2
4042
HOLDOUT_NUM_SPLITS = 1
@@ -148,7 +150,8 @@ def test_tabular_classification(openml_id, resampling_strategy, backend, resampl
148150
run_key_model_run_dir,
149151
f"{estimator.seed}.{successful_num_run}.{run_key.budget}.cv_model"
150152
)
151-
assert os.path.exists(model_file), model_file
153+
time.sleep(5)
154+
assert os.path.exists(model_file), print_debug_information(estimator)
152155

153156
model = estimator._backend.load_cv_model_by_seed_and_id_and_budget(
154157
estimator.seed, successful_num_run, run_key.budget)

test/test_pipeline/components/preprocessing/test_feature_preprocessor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def test_pipeline_fit_include(self, fit_dictionary_tabular, preprocessor):
7373
dataset_properties=fit_dictionary_tabular['dataset_properties'],
7474
include={'feature_preprocessor': [preprocessor]})
7575
cs = pipeline.get_hyperparameter_search_space()
76-
config = cs.sample_configuration()
76+
config = cs.get_default_configuration()
7777
pipeline.set_hyperparameters(config)
7878
try:
7979
pipeline.fit(fit_dictionary_tabular)

test/test_pipeline/test_tabular_classification.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,8 @@ def test_set_choices_updates(self, fit_dictionary_tabular):
361361
@pytest.mark.parametrize('lr_scheduler', ['CosineAnnealingWarmRestarts',
362362
'ReduceLROnPlateau'])
363363
def test_trainer_cocktails(self, fit_dictionary_tabular, mocker, lr_scheduler, trainer): # noqa F811
364-
fit_dictionary_tabular['epochs'] = 10
364+
fit_dictionary_tabular['epochs'] = 20
365+
fit_dictionary_tabular['early_stopping'] = 20
365366
pipeline = TabularClassificationPipeline(
366367
dataset_properties=fit_dictionary_tabular['dataset_properties'],
367368
include={'lr_scheduler': [lr_scheduler], 'trainer': [trainer]})

test/utils.py

Lines changed: 0 additions & 71 deletions
This file was deleted.

0 commit comments

Comments
 (0)