From 7877fc3d0676e0dcd0cce93fdfb36b8165b5962a Mon Sep 17 00:00:00 2001 From: "pieths.dev@gmail.com" Date: Thu, 19 Sep 2019 13:36:10 -0700 Subject: [PATCH] Fix issue when using predict_proba or decision_function with combined models. --- src/python/nimbusml/pipeline.py | 4 +- .../tests/pipeline/test_pipeline_combining.py | 46 +++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/src/python/nimbusml/pipeline.py b/src/python/nimbusml/pipeline.py index ade5744e..1cbd0674 100644 --- a/src/python/nimbusml/pipeline.py +++ b/src/python/nimbusml/pipeline.py @@ -2002,7 +2002,7 @@ def predict_proba(self, X, verbose=0, **params): :return: array, shape = [n_samples, n_classes] """ - if hasattr(self, 'steps') and len(self.steps) > 0: + if hasattr(self, 'steps') and self.steps: last_node = self.last_node last_node._check_implements_method('predict_proba') @@ -2042,7 +2042,7 @@ def decision_function(self, X, verbose=0, **params): :return: array, shape=(n_samples,) if n_classes == 2 else ( n_samples, n_classes) """ - if hasattr(self, 'steps') and len(self.steps) > 0: + if hasattr(self, 'steps') and self.steps: last_node = self.last_node last_node._check_implements_method('decision_function') diff --git a/src/python/nimbusml/tests/pipeline/test_pipeline_combining.py b/src/python/nimbusml/tests/pipeline/test_pipeline_combining.py index f16e43aa..2eb77763 100644 --- a/src/python/nimbusml/tests/pipeline/test_pipeline_combining.py +++ b/src/python/nimbusml/tests/pipeline/test_pipeline_combining.py @@ -406,6 +406,52 @@ def test_combine_with_classifier_trained_with_filedatastream(self): self.assertTrue(result_1.equals(result_2)) + def test_combined_models_support_predict_proba(self): + path = get_dataset('infert').as_filepath() + + data = FileDataStream.read_csv(path) + + transform = OneHotVectorizer(columns={'edu': 'education'}) + df = transform.fit_transform(data, as_binary_data_stream=True) + + feature_cols = ['parity', 'edu', 'age', 'induced', 'spontaneous', 'stratum', 'pooled.stratum'] + predictor = LogisticRegressionBinaryClassifier(feature=feature_cols, label='case') + predictor.fit(df) + + data = FileDataStream.read_csv(path) + df = transform.transform(data, as_binary_data_stream=True) + result_1 = predictor.predict_proba(df) + + data = FileDataStream.read_csv(path) + combined_pipeline = Pipeline.combine_models(transform, predictor) + result_2 = combined_pipeline.predict_proba(data) + + self.assertTrue(np.array_equal(result_1, result_2)) + + + def test_combined_models_support_decision_function(self): + path = get_dataset('infert').as_filepath() + + data = FileDataStream.read_csv(path) + + transform = OneHotVectorizer(columns={'edu': 'education'}) + df = transform.fit_transform(data, as_binary_data_stream=True) + + feature_cols = ['parity', 'edu', 'age', 'induced', 'spontaneous', 'stratum', 'pooled.stratum'] + predictor = LogisticRegressionBinaryClassifier(feature=feature_cols, label='case') + predictor.fit(df) + + data = FileDataStream.read_csv(path) + df = transform.transform(data, as_binary_data_stream=True) + result_1 = predictor.decision_function(df) + + data = FileDataStream.read_csv(path) + combined_pipeline = Pipeline.combine_models(transform, predictor) + result_2 = combined_pipeline.decision_function(data) + + self.assertTrue(np.array_equal(result_1, result_2)) + + if __name__ == '__main__': unittest.main()