Skip to content
25 changes: 24 additions & 1 deletion autoPyTorch/pipeline/base_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,24 +398,47 @@ def _check_search_space_updates(self, include: Optional[Dict[str, Any]],
raise ValueError("Unknown node name. Expected update node name to be in {} "
"got {}".format(self.named_steps.keys(), update.node_name))
node = self.named_steps[update.node_name]
# if node is a choice module
if hasattr(node, 'get_components'):
split_hyperparameter = update.hyperparameter.split(':')

# check if component is not present in include
if include is not None and update.node_name in include.keys():
if split_hyperparameter[0] not in include[update.node_name]:
raise ValueError("Not found {} in include".format(split_hyperparameter[0]))

# check if component is present in exclude
if exclude is not None and update.node_name in exclude.keys():
if split_hyperparameter[0] in exclude[update.node_name]:
raise ValueError("Found {} in exclude".format(split_hyperparameter[0]))

components = node.get_components()
if split_hyperparameter[0] not in components.keys():
# if hyperparameter is __choice__, check if
# the components in the value range of search space update
# are in components of the choice module
if split_hyperparameter[0] == '__choice__':
for choice in update.value_range:
if include is not None and update.node_name in include.keys():
if choice not in include[update.node_name]:
raise ValueError("Not found {} in include".format(choice))
if exclude is not None and update.node_name in exclude.keys():
if choice in exclude[update.node_name]:
raise ValueError("Found {} in exclude".format(choice))
if choice not in components.keys():
raise ValueError("Unknown hyperparameter for choice {}. "
"Expected update hyperparameter "
"to be in {} got {}".format(node.__class__.__name__,
components.keys(), choice))
# check if the component whose hyperparameter
# needs to be updated is in components of the
# choice module
elif split_hyperparameter[0] not in components.keys():
raise ValueError("Unknown hyperparameter for choice {}. "
"Expected update hyperparameter "
"to be in {} got {}".format(node.__class__.__name__,
components.keys(), split_hyperparameter[0]))
else:
# check if hyperparameter is in the search space of the component
component = components[split_hyperparameter[0]]
if split_hyperparameter[1] not in component. \
get_hyperparameter_search_space(dataset_properties=self.dataset_properties):
Expand Down
48 changes: 25 additions & 23 deletions autoPyTorch/pipeline/components/base_choice.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import warnings
from collections import OrderedDict
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional

from ConfigSpace.configuration_space import Configuration, ConfigurationSpace

Expand All @@ -9,7 +10,8 @@
from sklearn.utils import check_random_state

from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent
from autoPyTorch.utils.common import FitRequirement
from autoPyTorch.utils.common import FitRequirement, HyperparameterSearchSpace
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdate


class autoPyTorchChoice(object):
Expand Down Expand Up @@ -49,7 +51,7 @@ def __init__(self,
# self.set_hyperparameters(self.configuration)
self.choice: Optional[autoPyTorchComponent] = None

self._cs_updates: Dict[str, Tuple] = dict()
self._cs_updates: Dict[str, HyperparameterSearchSpaceUpdate] = dict()

def get_fit_requirements(self) -> Optional[List[FitRequirement]]:
if self.choice is not None:
Expand Down Expand Up @@ -247,35 +249,35 @@ def _check_dataset_properties(self, dataset_properties: Dict[str, Any]) -> None:
"""
assert isinstance(dataset_properties, dict), "dataset_properties must be a dictionary"

def _apply_search_space_update(self, name: str, new_value_range: Union[List, Tuple],
default_value: Union[int, float, str], log: bool = False) -> None:
"""Allows the user to update a hyperparameter
def _apply_search_space_update(self, hyperparameter_search_space_update: HyperparameterSearchSpaceUpdate) -> None:
"""
Applies search space update to the class

Arguments:
name {string} -- name of hyperparameter
new_value_range {List[?] -- value range can be either lower, upper or a list of possible conditionals
log {bool} -- is hyperparameter logscale
Args:
hyperparameter_search_space_update (HyperparameterSearchSpaceUpdate):
Search Space update for the current autoPyTorchChoice module
"""

if len(new_value_range) == 0:
raise ValueError("The new value range needs at least one value")
self._cs_updates[name] = tuple([new_value_range, default_value, log])
self._cs_updates[hyperparameter_search_space_update.hyperparameter] = hyperparameter_search_space_update

def _get_search_space_updates(self, prefix: Optional[str] = None) -> Dict[str, Tuple]:
def _get_search_space_updates(self, prefix: Optional[str] = None) -> Dict[str, HyperparameterSearchSpace]:
"""Get the search space updates with the given prefix

Keyword Arguments:
prefix {str} -- Only return search space updates with given prefix (default: {None})
Args:
prefix (str):
Only return search space updates with given prefix (default: {None})

Returns:
dict -- Mapping of search space updates. Keys don't contain the prefix.
Dict[str, HyperparameterSearchSpace]:
Mapping of search space updates. Keys don't contain the prefix.
"""
if prefix is None:
return self._cs_updates
result: Dict[str, Tuple] = dict()

# iterate over all search space updates of this node and filter the ones out, that have the given prefix
result: Dict[str, HyperparameterSearchSpace] = dict()

# iterate over all search space updates of this node and keep the ones that have the given prefix
for key in self._cs_updates.keys():
if key.startswith(prefix):
result[key[len(prefix) + 1:]] = self._cs_updates[key]
if prefix is None:
result[key] = self._cs_updates[key].get_search_space()
elif re.search(f'^{prefix}', key) is not None:
result[key[len(prefix) + 1:]] = self._cs_updates[key].get_search_space(remove_prefix=prefix)
return result
41 changes: 16 additions & 25 deletions autoPyTorch/pipeline/components/base_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,20 @@
import sys
import warnings
from collections import OrderedDict
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional

from ConfigSpace.configuration_space import Configuration, ConfigurationSpace

from sklearn.base import BaseEstimator

from autoPyTorch.utils.common import FitRequirement
from autoPyTorch.utils.common import FitRequirement, HyperparameterSearchSpace
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdate


def find_components(
package: str,
directory: str,
base_class: BaseEstimator
package: str,
directory: str,
base_class: BaseEstimator
) -> Dict[str, BaseEstimator]:
"""Utility to find component on a given directory,
that inherit from base_class
Expand All @@ -34,8 +35,7 @@ def find_components(
module = importlib.import_module(full_module_name)

for member_name, obj in inspect.getmembers(module):
if inspect.isclass(obj) and issubclass(obj, base_class) and \
obj != base_class:
if inspect.isclass(obj) and issubclass(obj, base_class) and obj != base_class:
# TODO test if the obj implements the interface
# Keep in mind that this only instantiates the ensemble_wrapper,
# but not the real target classifier
Expand Down Expand Up @@ -96,7 +96,7 @@ class autoPyTorchComponent(BaseEstimator):
def __init__(self) -> None:
super().__init__()
self._fit_requirements: List[FitRequirement] = list()
self._cs_updates: Dict[str, Tuple] = dict()
self._cs_updates: Dict[str, HyperparameterSearchSpaceUpdate] = dict()

@classmethod
def get_required_properties(cls) -> Optional[List[str]]:
Expand Down Expand Up @@ -140,7 +140,7 @@ def get_properties(dataset_properties: Optional[Dict[str, str]] = None

@staticmethod
def get_hyperparameter_search_space(
dataset_properties: Optional[Dict[str, str]] = None
dataset_properties: Optional[Dict[str, str]] = None
) -> ConfigurationSpace:
"""Return the configuration space of this classification algorithm.

Expand Down Expand Up @@ -253,8 +253,7 @@ def __str__(self) -> str:
name = self.get_properties()['name']
return "autoPyTorch.pipeline %s" % name

def _apply_search_space_update(self, name: str, new_value_range: Union[List, Tuple],
default_value: Union[int, float, str], log: bool = False) -> None:
def _apply_search_space_update(self, hyperparameter_search_space_update: HyperparameterSearchSpaceUpdate) -> None:
"""Allows the user to update a hyperparameter

Arguments:
Expand All @@ -263,26 +262,18 @@ def _apply_search_space_update(self, name: str, new_value_range: Union[List, Tup
log {bool} -- is hyperparameter logscale
"""

if len(new_value_range) == 0:
raise ValueError("The new value range needs at least one value")
self._cs_updates[name] = tuple([new_value_range, default_value, log])
self._cs_updates[hyperparameter_search_space_update.hyperparameter] = hyperparameter_search_space_update

def _get_search_space_updates(self, prefix: Optional[str] = None) -> Dict[str, Tuple]:
"""Get the search space updates with the given prefix

Keyword Arguments:
prefix {str} -- Only return search space updates with given prefix (default: {None})
def _get_search_space_updates(self) -> Dict[str, HyperparameterSearchSpace]:
"""Get the search space updates

Returns:
dict -- Mapping of search space updates. Keys don't contain the prefix.
"""
if prefix is None:
return self._cs_updates
result: Dict[str, Tuple] = dict()

result: Dict[str, HyperparameterSearchSpace] = dict()

# iterate over all search space updates of this node and keep the ones that have the given prefix
for key in self._cs_updates.keys():
if key.startswith(prefix):
# different for autopytorch component as the hyperparameter
result[key[len(prefix):]] = self._cs_updates[key]
result[key] = self._cs_updates[key].get_search_space()
return result
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,21 @@ def get_hyperparameter_search_space(self,
default = default_
break

preprocessor = CSH.CategoricalHyperparameter('__choice__',
list(available_preprocessors.keys()),
default_value=default)

updates = self._get_search_space_updates()
if '__choice__' in updates.keys():
choice_hyperparameter = updates['__choice__']
if not set(choice_hyperparameter.value_range).issubset(available_preprocessors):
raise ValueError("Expected given update for {} to have "
"choices in {} got {}".format(self.__class__.__name__,
available_preprocessors,
choice_hyperparameter.value_range))
preprocessor = CSH.CategoricalHyperparameter('__choice__',
choice_hyperparameter.value_range,
default_value=choice_hyperparameter.default_value)
else:
preprocessor = CSH.CategoricalHyperparameter('__choice__',
list(available_preprocessors.keys()),
default_value=default)
cs.add_hyperparameter(preprocessor)

# add only child hyperparameters of early_preprocessor choices
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,19 +75,37 @@ def get_hyperparameter_search_space(self,
default = default_
break

# add only no encoder to choice hyperparameters in case the dataset is only numerical
if len(dataset_properties['categorical_columns']) == 0:
default = 'NoEncoder'
if include is not None and default not in include:
raise ValueError("Provided {} in include, however, the dataset "
"is incompatible with it".format(include))
updates = self._get_search_space_updates()
if '__choice__' in updates.keys():
choice_hyperparameter = updates['__choice__']
if not set(choice_hyperparameter.value_range).issubset(available_preprocessors):
raise ValueError("Expected given update for {} to have "
"choices in {} got {}".format(self.__class__.__name__,
available_preprocessors,
choice_hyperparameter.value_range))
if len(dataset_properties['categorical_columns']) == 0:
assert len(choice_hyperparameter.value_range) == 1
assert 'NoEncoder' in choice_hyperparameter.value_range, \
"Provided {} in choices, however, the dataset " \
"is incompatible with it".format(choice_hyperparameter.value_range)

preprocessor = CSH.CategoricalHyperparameter('__choice__',
['NoEncoder'],
default_value=default)
choice_hyperparameter.value_range,
default_value=choice_hyperparameter.default_value)
else:
preprocessor = CSH.CategoricalHyperparameter('__choice__',
list(available_preprocessors.keys()),
default_value=default)
# add only no encoder to choice hyperparameters in case the dataset is only numerical
if len(dataset_properties['categorical_columns']) == 0:
default = 'NoEncoder'
if include is not None and default not in include:
raise ValueError("Provided {} in include, however, the dataset "
"is incompatible with it".format(include))
preprocessor = CSH.CategoricalHyperparameter('__choice__',
['NoEncoder'],
default_value=default)
else:
preprocessor = CSH.CategoricalHyperparameter('__choice__',
list(available_preprocessors.keys()),
default_value=default)

cs.add_hyperparameter(preprocessor)

Expand Down
Loading