Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion autosklearn/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
)
from autosklearn.util.parallel import preload_modules
from autosklearn.util.single_thread_client import SingleThreadedClient
from autosklearn.util.smac_wrap import SMACCallback, SmacRunCallback
from autosklearn.util.stopwatch import StopWatch

import unittest.mock
Expand Down Expand Up @@ -218,7 +219,7 @@ def __init__(
logging_config: Optional[Mapping] = None,
metrics: Sequence[Scorer] | None = None,
scoring_functions: Optional[list[Scorer]] = None,
get_trials_callback: Optional[IncorporateRunResultCallback] = None,
get_trials_callback: SMACCallback | None = None,
dataset_compression: bool | Mapping[str, Any] = True,
allow_string_features: bool = True,
):
Expand Down Expand Up @@ -247,6 +248,15 @@ def __init__(
memory_limit=memory_limit,
)

# If we got something callable for `get_trials_callback`, wrap it so SMAC
# will accept it.
if (
get_trials_callback is not None
and callable(get_trials_callback)
and not isinstance(get_trials_callback, IncorporateRunResultCallback)
):
get_trials_callback = SmacRunCallback(get_trials_callback)

self._delete_tmp_folder_after_terminate = delete_tmp_folder_after_terminate
self._time_for_task = time_left_for_this_task
self._per_run_time_limit = per_run_time_limit
Expand Down
20 changes: 15 additions & 5 deletions autosklearn/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
)
from autosklearn.metrics import Scorer
from autosklearn.pipeline.base import BasePipeline
from autosklearn.util.smac_wrap import SMACCallback


class AutoSklearnEstimator(BaseEstimator):
Expand Down Expand Up @@ -51,7 +52,7 @@ def __init__(
metric: Scorer | Sequence[Scorer] | None = None,
scoring_functions: Optional[List[Scorer]] = None,
load_models: bool = True,
get_trials_callback=None,
get_trials_callback: SMACCallback | None = None,
dataset_compression: Union[bool, Mapping[str, Any]] = True,
allow_string_features: bool = True,
):
Expand Down Expand Up @@ -266,10 +267,19 @@ def __init__(
Whether to load the models after fitting Auto-sklearn.

get_trials_callback: callable
Callback function to create an object of subclass defined in module
`smac.callbacks <https://automl.github.io/SMAC3/master/apidoc/smac.callbacks.html>`_.
This is an advanced feature. Use only if you are familiar with
`SMAC <https://automl.github.io/SMAC3/master/index.html>`_.
A callable with the following definition.

* (smac.SMBO, smac.RunInfo, smac.RunValue, time_left: float) -> bool | None

This will be called after SMAC, the underlying optimizer for autosklearn,
finishes training each run.

You can use this to record your own information about the optimization
process. You can also use this to enable a early stopping based on some
critera.

See the example:
:ref:`Early Stopping And Callbacks <sphx_glr_examples_40_advanced_example_early_stopping_and_callbacks.py>`.

dataset_compression: Union[bool, Mapping[str, Any]] = True
We compress datasets so that they fit into some predefined amount of memory.
Expand Down
43 changes: 43 additions & 0 deletions autosklearn/util/smac_wrap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from __future__ import annotations

from typing import Callable, Union

from smac.callbacks import IncorporateRunResultCallback
from smac.optimizer.smbo import SMBO
from smac.runhistory.runhistory import RunInfo, RunValue

SMACCallback = Callable[[SMBO, RunInfo, RunValue, float], Union[bool, None]]


class SmacRunCallback(IncorporateRunResultCallback):
def __init__(self, f: SMACCallback):
self.f = f

def __call__(
self,
smbo: SMBO,
run_info: RunInfo,
result: RunValue,
time_left: float,
) -> bool | None:
"""
Parameters
----------
smbo: SMBO
The SMAC SMBO object

run_info: RunInfo
Information about the run completed

result: RunValue
The results of the run

time_left: float
How much time is left for the remaining runs

Returns
-------
bool | None
If False is returned, the optimization loop will stop
"""
return self.f(smbo, run_info, result, time_left)
5 changes: 5 additions & 0 deletions doc/manual.rst
Original file line number Diff line number Diff line change
Expand Up @@ -376,3 +376,8 @@ Other
according to its performance on the validation set. Setting the initial
configurations found by meta-learning to zero makes *auto-sklearn* use the
regular SMAC algorithm for suggesting new hyperparameter configurations.

.. collapse:: <b>Early stopping and Callbacks</b>

By using the parameter ``get_trials_callback``, we can get access to the results
of runs as they occur. See this example :ref:`Early Stopping And Callbacks <sphx_glr_examples_40_advanced_example_early_stopping_and_callbacks.py>` for more!
81 changes: 81 additions & 0 deletions examples/40_advanced/example_early_stopping_and_callbacks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""
============================
Early stopping and Callbacks
============================

The example below shows how we can use the ``get_trials_callback`` parameter of
auto-sklearn to implement an early-stopping mechanism through a callback.

These callbacks give access to the result of each model + hyperparameter configuration
optimized by SMAC, the underlying optimizer for autosklearn. By checking the cost of
a result, we can implement a simple yet effective early stopping mechanism!

Do note however, this does not provide any access to the ensembles that autosklearn
produces, only the individual models. You may wish to perform a more sophisticated
early stopping mechanism such that there are enough good models for autosklearn to build
and ensemble with. This is here to provide a simple example.
"""
from __future__ import annotations

from pprint import pprint

import sklearn.datasets
import sklearn.metrics

import autosklearn.classification

from smac.optimizer.smbo import SMBO
from smac.runhistory.runhistory import RunInfo, RunValue


############################################################################
# Build and fit a classifier
# ==========================
def callback(
smbo: SMBO,
run_info: RunInfo,
result: RunValue,
time_left: float,
) -> bool | None:
"""Stop early if we get a very low cost value for a single run

The return value indicates to SMAC whether to stop or not. False will
stop the search process while any other value will mean it continues.
"""
# You can find out the parameters in the SMAC documentation
# https://automl.github.io/SMAC3/main/
if result.cost <= 0.02:
print("Stopping!")
print(run_info)
print(result)
return False


X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
X, y, random_state=1
)

automl = autosklearn.classification.AutoSklearnClassifier(
time_left_for_this_task=120, per_run_time_limit=30, get_trials_callback=callback
)
automl.fit(X_train, y_train, dataset_name="breast_cancer")

############################################################################
# View the models found by auto-sklearn
# =====================================

print(automl.leaderboard())

############################################################################
# Print the final ensemble constructed by auto-sklearn
# ====================================================

pprint(automl.show_models(), indent=4)

###########################################################################
# Get the Score of the final ensemble
# ===================================

predictions = automl.predict(X_test)
print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))
37 changes: 37 additions & 0 deletions test/test_automl/test_early_stopping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Callable

if TYPE_CHECKING:
import numpy as np
from smac.optimizer.smbo import SMBO
from smac.runhistory.runhistory import RunInfo, RunValue

from autosklearn.automl import AutoMLClassifier


def test_early_stopping(
make_automl_classifier: Callable[..., AutoMLClassifier],
make_sklearn_dataset: Callable[..., tuple[np.ndarray, ...]],
) -> None:
"""
Expects
-------
* Should early after fitting 2 models
"""

def callback(
smbo: SMBO,
run_info: RunInfo,
result: RunValue,
time_left: float,
) -> bool | None:
if int(result.additional_info["num_run"]) >= 2:
return False

automl = make_automl_classifier(get_trials_callback=callback)

X_train, Y_train, X_test, Y_test = make_sklearn_dataset("iris")
automl.fit(X_train, Y_train)

assert len(automl.runhistory_.data) == 2