[test] Increase the coverage

nabenabe0928 · nabenabe0928 · commit aa9409aa1bbd · 2022-02-09T15:55:58.000+01:00
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/coalescer/MinorityCoalescer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/coalescer/MinorityCoalescer.py
@@ -18,9 +18,7 @@ def __init__(self, min_frac: float, random_state: np.random.RandomState):
         self.random_state = random_state
 
     def fit(self, X: Dict[str, Any], y: Any = None) -> BaseCoalescer:
-
         self.check_requirements(X, y)
-
         self.preprocessor['categorical'] = MinorityCoalesceTransformer(min_frac=self.min_frac)
         return self
 
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/coalescer/__init__.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/coalescer/__init__.py
@@ -155,9 +155,9 @@ def get_hyperparameter_search_space(self,
 
     def _check_dataset_properties(self, dataset_properties: Dict[str, BaseDatasetPropertiesType]) -> None:
         """
-        A mechanism in code to ensure the correctness of the fit dictionary
+        A mechanism in code to ensure the correctness of the dataset_properties
         It recursively makes sure that the children and parent level requirements
-        are honored before fit.
+        are honored.
 
         Args:
             dataset_properties:
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/coalescer/base_coalescer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/coalescer/base_coalescer.py
@@ -25,8 +25,9 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         Returns:
             X (Dict[str, Any]): the updated fit dictionary
         """
-        if self._processing and all(self.preprocessor[key] is None for key in ['numerical', 'categorical']):
-            raise ValueError(f"fit() must be called before transform() on {self.__class__.__name__}")
+        if self._processing and self.preprocessor['categorical'] is None:
+            # If we apply minority coalescer, we must have categorical preprocessor!
+            raise RuntimeError(f"fit() must be called before transform() on {self.__class__.__name__}")
 
         X.update({'coalescer': self.preprocessor})
         return X
diff --git a/test/test_pipeline/components/preprocessing/base.py b/test/test_pipeline/components/preprocessing/base.py
@@ -30,9 +30,9 @@ def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]],
             default_dataset_properties.update(dataset_properties)
 
         steps.extend([
-            ("coalescer", CoalescerChoice())
             ("imputer", SimpleImputer()),
             ("variance_threshold", VarianceThreshold()),
+            ("coalescer", CoalescerChoice(default_dataset_properties)),
             ("encoder", EncoderChoice(default_dataset_properties)),
             ("scaler", ScalerChoice(default_dataset_properties)),
             ("tabular_transformer", TabularColumnTransformer()),
diff --git a/test/test_pipeline/components/preprocessing/test_coalescer.py b/test/test_pipeline/components/preprocessing/test_coalescer.py
@@ -1,12 +1,45 @@
 import copy
 import unittest
 
+import numpy as np
+
+import pytest
+
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.coalescer import (
     CoalescerChoice
 )
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.coalescer.MinorityCoalescer import (
+    MinorityCoalescer
+)
+
+
+def test_transform_before_fit():
+    with pytest.raises(RuntimeError):
+        mc = MinorityCoalescer(min_frac=None, random_state=np.random.RandomState())
+        mc.transform(np.random.random((4, 4)))
 
 
 class TestCoalescerChoice(unittest.TestCase):
+    def test_raise_error_in_check_update_compatiblity(self):
+        dataset_properties = {'numerical_columns': [], 'categorical_columns': []}
+        cc = CoalescerChoice(dataset_properties)
+        choices = ["NoCoescer"]  # component name with typo
+        with pytest.raises(ValueError):
+            # raise error because no categorical columns, but choices do not have no coalescer
+            cc._check_update_compatiblity(choices_in_update=choices, dataset_properties=dataset_properties)
+
+    def test_raise_error_in_get_component_without_updates(self):
+        dataset_properties = {'numerical_columns': [], 'categorical_columns': []}
+        cc = CoalescerChoice(dataset_properties)
+        with pytest.raises(ValueError):
+            # raise error because no categorical columns, but choices do not have no coalescer
+            cc._get_component_without_updates(
+                avail_components={},
+                dataset_properties=dataset_properties,
+                default="",
+                include=[]
+            )
+
     def test_get_set_config_space(self):
         """Make sure that we can setup a valid choice in the Coalescer
         choice"""
diff --git a/test/test_utils/test_coalescer_transformer.py b/test/test_utils/test_coalescer_transformer.py
@@ -75,6 +75,17 @@ def test_invalid_X(X1):
         MinorityCoalesceTransformer().fit_transform(X)
 
 
+@pytest.mark.parametrize("min_frac", [-0.1, 1.1])
+def test_invalid_min_frac(min_frac):
+    with pytest.raises(ValueError):
+        MinorityCoalesceTransformer(min_frac=min_frac)
+
+
+def test_transform_before_fit(X1):
+    with pytest.raises(RuntimeError):
+        MinorityCoalesceTransformer().transform(X1)
+
+
 def test_transform_after_fit(X1, X2):
     # On both X_fit and X_transf, the categories 3, 4, 5, 6, 7 are present.
     X_fit = X1  # Here categories 3, 4, 5 have ocurrence above 10%