diff --git a/src/python/nimbusml.pyproj b/src/python/nimbusml.pyproj index acd5a6c6..49492026 100644 --- a/src/python/nimbusml.pyproj +++ b/src/python/nimbusml.pyproj @@ -583,6 +583,7 @@ + diff --git a/src/python/nimbusml/pipeline.py b/src/python/nimbusml/pipeline.py index 7237ef7a..74435d87 100644 --- a/src/python/nimbusml/pipeline.py +++ b/src/python/nimbusml/pipeline.py @@ -1839,7 +1839,7 @@ def predict_proba(self, X, verbose=0, **params): last_node = self.last_node last_node._check_implements_method('predict_proba') - scores = self.predict(X, verbose, **params) + scores, _ = self._predict(X, verbose=verbose, **params) # REVIEW: Consider adding an entry point that extracts the # probability column instead. @@ -1883,7 +1883,7 @@ def decision_function(self, X, verbose=0, **params): last_node = self.last_node last_node._check_implements_method('decision_function') - scores = self.predict(X, verbose, **params) + scores, _ = self._predict(X, verbose=verbose, **params) # REVIEW: Consider adding an entry point that extracts the score # column instead. diff --git a/src/python/nimbusml/tests/pipeline/test_pipeline_subclassing.py b/src/python/nimbusml/tests/pipeline/test_pipeline_subclassing.py new file mode 100644 index 00000000..56d9ef42 --- /dev/null +++ b/src/python/nimbusml/tests/pipeline/test_pipeline_subclassing.py @@ -0,0 +1,73 @@ +# -------------------------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------------------------- +import os +import unittest + +import numpy as np +import pandas as pd +from nimbusml import Pipeline +from nimbusml.linear_model import LogisticRegressionBinaryClassifier + + +def generate_dataset_1(): + X = pd.DataFrame({'x1': [2, 3, 2, 2, 8, 9, 10, 8], + 'x2': [1, 2, 3, 1, 7, 10, 9, 8]}) + y = pd.DataFrame({'y': [1, 1, 1, 1, 0, 0, 0, 0]}) + return X, y + + +class CustomPipeline(Pipeline): + # Override the predict method + def predict(self, X, *args, **kwargs): + return kwargs.get('test_return_value') + + +class TestPipelineSubclassing(unittest.TestCase): + + def test_pipeline_subclass_can_override_predict(self): + X, y = generate_dataset_1() + + pipeline = Pipeline([LogisticRegressionBinaryClassifier()]) + pipeline.fit(X, y) + result = pipeline.predict(X)['PredictedLabel'] + + self.assertTrue(np.array_equal(result.values, y['y'].values)) + + pipeline = CustomPipeline([LogisticRegressionBinaryClassifier()]) + pipeline.fit(X, y) + + self.assertEqual(pipeline.predict(X, test_return_value=3), 3) + + + def test_pipeline_subclass_correctly_supports_predict_proba(self): + X, y = generate_dataset_1() + + pipeline = Pipeline([LogisticRegressionBinaryClassifier()]) + pipeline.fit(X, y) + orig_result = pipeline.predict_proba(X) + + pipeline = CustomPipeline([LogisticRegressionBinaryClassifier()]) + pipeline.fit(X, y) + new_result = pipeline.predict_proba(X) + + self.assertTrue(np.array_equal(orig_result, new_result)) + + + def test_pipeline_subclass_correctly_supports_decision_function(self): + X, y = generate_dataset_1() + + pipeline = Pipeline([LogisticRegressionBinaryClassifier()]) + pipeline.fit(X, y) + orig_result = pipeline.decision_function(X) + + pipeline = CustomPipeline([LogisticRegressionBinaryClassifier()]) + pipeline.fit(X, y) + new_result = pipeline.decision_function(X) + + self.assertTrue(np.array_equal(orig_result, new_result)) + + +if __name__ == '__main__': + unittest.main()