Skip to content
This repository was archived by the owner on Nov 16, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/python/nimbusml/internal/core/base_pipeline_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -956,8 +956,10 @@ def _steal_io(self, node):
"""
if hasattr(node, '_columns') and node._columns is not None:
self << node._columns
setattr(node, node._attr_input,
getattr(node, node._attr_output))

if hasattr(node, '_attr_output'):
setattr(node, node._attr_input,
getattr(node, node._attr_output))
else:
# No columns specified. The user plans to fit the pipeline as
# fit(X, y).
Expand Down
10 changes: 10 additions & 0 deletions src/python/nimbusml/preprocessing/missing_values/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,13 @@ def get_params(self, deep=False):
Get the parameters for this operator.
"""
return core.get_params(self)

def _nodes_with_presteps(self):
"""
Inserts preprocessing before this one.
"""
from ..schema import TypeConverter
return [
TypeConverter(
result_type='R4')._steal_io(self),
self]
10 changes: 10 additions & 0 deletions src/python/nimbusml/preprocessing/missing_values/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,13 @@ def get_params(self, deep=False):
Get the parameters for this operator.
"""
return core.get_params(self)

def _nodes_with_presteps(self):
"""
Inserts preprocessing before this one.
"""
from ..schema import TypeConverter
return [
TypeConverter(
result_type='R4')._steal_io(self),
self]
10 changes: 10 additions & 0 deletions src/python/nimbusml/preprocessing/missing_values/indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,13 @@ def get_params(self, deep=False):
Get the parameters for this operator.
"""
return core.get_params(self)

def _nodes_with_presteps(self):
"""
Inserts preprocessing before this one.
"""
from ..schema import TypeConverter
return [
TypeConverter(
result_type='R4')._steal_io(self),
self]
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from math import isnan
from nimbusml import Pipeline
from nimbusml.linear_model import FastLinearRegressor
from nimbusml.preprocessing.missing_values import Filter, Handler
from nimbusml.preprocessing.missing_values import Filter, Handler, Indicator
from pandas import DataFrame
from sklearn.utils.testing import assert_equal, assert_true, \
assert_allclose
Expand Down Expand Up @@ -75,6 +75,90 @@ def test_input_types(self):
res['Score'].values, [
4.965541, 0.519701, 4.992831, 3.877400, 5.020121], rtol=1e-4)

def test_input_conversion_to_float(self):
data={'f0': [0, 1, 2, 3],
'f1': [1, 2, 3, 4],
'f2': [1, 2, 3, 4],
'f3': [1, 2, 3, 4],
'f4': ['2', '3', '4', '5'],
'f5': [4, 5, np.nan, 9]}

data = DataFrame(data).astype({
'f0': np.int8,
'f1': np.int16,
'f2': np.int32,
'f3': np.int64,
'f4': str,
'f5': np.float64})

# Check Indicator
xf = Indicator()
result = xf.fit_transform(data)

assert_equal(result.loc[2, 'f5'], True)
result.loc[2, 'f5'] = False
result = ~result
self.assertTrue(result.all(axis=None))

# Check Filter
xf = Filter()
result = xf.fit_transform(data)
assert_equal(len(result), 3)
assert_equal(result.loc[2, 'f5'], 9.0)

# Check Handler
xf = Handler(replace_with='Mean')
result = xf.fit_transform(data)
assert_equal(len(result), 4)
assert_equal(result.loc[2, 'f5.f5'], 6.0)
assert_equal(result.loc[2, 'f5.IsMissing.f5'], 1.0)

def test_input_conversion_to_float_retains_other_column_types(self):
data={'f0': [0, 1, 2, 3],
'f1': ['2', '3', '4', '5'],
'f2': [4, 5, np.nan, 9]}

data = DataFrame(data).astype({
'f0': np.int32,
'f1': str,
'f2': np.float64})

# Check Indicator
xf = Indicator(columns={'f2.ind': 'f2'})
result = xf.fit_transform(data)
assert_equal(result.dtypes['f0'], np.int32)
assert_equal(result.dtypes['f1'], np.object)
assert_equal(result.dtypes['f2'], np.float64)
assert_equal(result.dtypes['f2.ind'], np.bool)
assert_equal(result.loc[2, 'f2.ind'], True)
assert_equal(len(result), 4)

# Check Filter
xf = Filter(columns=['f2'])
result = xf.fit_transform(data)
assert_equal(len(result), 3)
assert_equal(result.loc[2, 'f2'], 9.0)
assert_equal(result.dtypes['f0'], np.int32)
assert_equal(result.dtypes['f1'], np.object)
assert_equal(result.dtypes['f2'], np.float32)

xf = Filter(columns=['f1'])
result = xf.fit_transform(data)
assert_equal(len(result), 4)
assert_equal(result.loc[3, 'f2'], 9.0)
assert_equal(result.dtypes['f0'], np.int32)
assert_equal(result.dtypes['f1'], np.float32)
assert_equal(result.dtypes['f2'], np.float64)

# Check Handler
xf = Handler(columns=['f2'], replace_with='Mean')
result = xf.fit_transform(data)
assert_equal(len(result), 4)
assert_equal(result.loc[2, 'f2.f2'], 6.0)
assert_equal(result.dtypes['f0'], np.int32)
assert_equal(result.dtypes['f1'], np.object)
assert_equal(result.dtypes['f2.f2'], np.float32)


if __name__ == '__main__':
unittest.main()
13 changes: 12 additions & 1 deletion src/python/nimbusml/tests/utils/test_exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,17 @@ def test_get_fit_info_fastl(self):
'Month',
'Day'],
'type': 'start'},
{'inputs': ['Ozone'],
'name': 'TypeConverter',
'outputs': ['Ozone'],
'schema_after': ['Unnamed0',
'Ozone',
'Solar_R',
'Wind',
'Temp',
'Month',
'Day'],
'type': 'transform'},
{'inputs': ['Ozone'],
'name': 'Filter',
'outputs': ['Ozone'],
Expand All @@ -506,7 +517,7 @@ def test_get_fit_info_fastl(self):
for el in info[0]:
if 'operator' in el:
del el['operator']
self.assertEqual(exp, info[0][:2])
self.assertEqual(exp, info[0][:3])

def test_word_embedding(self):

Expand Down
3 changes: 3 additions & 0 deletions src/python/tools/compiler_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ def _nodes_with_presteps(self):
'MeanVarianceScaler': int_to_r4_converter,
'LogMeanVarianceScaler': int_to_r4_converter,
'Binner': int_to_r4_converter,
'Filter': int_to_r4_converter,
'Handler': int_to_r4_converter,
'Indicator': int_to_r4_converter,
# 'SupervisedBinner': int_to_r4_converter, # not exist in nimbusml

'IidSpikeDetector': timeseries_to_r4_converter,
Expand Down