diff --git a/src/python/tests_extended/data_frame_tool.py b/src/python/tests_extended/data_frame_tool.py index 104a0bb8..ecf17015 100644 --- a/src/python/tests_extended/data_frame_tool.py +++ b/src/python/tests_extended/data_frame_tool.py @@ -81,6 +81,8 @@ def _validate_type(self, input_meta, col_type): return elif expected_type == np.int64 and str(col_type) == 'datetime64[ns]': return + elif expected_type == np.uint32 and str(col_type) == 'category': + return elif expected_type in ort_float_set and str(col_type) in pd_float_set: return elif expected_type in ort_int_set and str(col_type) in pd_int_set: @@ -172,21 +174,34 @@ def execute(self, df, output_names=None, output_types=None, run_options=None): """ input_feed = self._get_input_feeds(df, self._sess); if not output_names: - output_names = [output.name for output in self._sess._outputs_meta] - + output_names = [output.name for output in self._sess._outputs_meta] + results = self._sess.run(output_names, input_feed, run_options) df = pd.DataFrame() - for i in range(len(results)): - r = results[i].flatten() - if output_types and output_names[i] in output_types: - dtype = output_types[output_names[i]] - if dtype == np.dtype('datetime64'): - r = r.astype(np.int64) - r = [datetime.utcfromtimestamp(ts) for ts in r] - else: - r = r.astype(dtype) - - df[output_names[i]] = r - + for i, r in enumerate(results): + # TODO: remove this. These extra columns + # should not be in the output. + if output_names[i].startswith('mlnet.') and \ + output_names[i].endswith('.unusedOutput') and \ + r.shape == (1,1): + continue + + r = np.split(r, r.shape[-1], axis=-1) \ + if r.shape[-1] > 1 else [r] + + for suffix, col in enumerate(r): + col = col.flatten() + if output_types and output_names[i] in output_types: + dtype = output_types[output_names[i]] + if dtype == np.dtype('datetime64'): + col = col.astype(np.int64) + col = [datetime.utcfromtimestamp(ts) for ts in col] + else: + col = col.astype(dtype) + + col_name = output_names[i] if len(r) == 1 else \ + output_names[i] + '.' + str(suffix) + df[col_name] = col + return df diff --git a/src/python/tests_extended/test_export_to_onnx.py b/src/python/tests_extended/test_export_to_onnx.py index 85bcc40b..64ef017c 100644 --- a/src/python/tests_extended/test_export_to_onnx.py +++ b/src/python/tests_extended/test_export_to_onnx.py @@ -45,6 +45,7 @@ SsaForecaster) from data_frame_tool import DataFrameTool as DFT +from data_frame_tool import DataFrameTool as DFT SHOW_ONNX_JSON = False SHOW_TRANSFORMED_RESULTS = True @@ -291,54 +292,55 @@ } EXPECTED_RESULTS = { - 'AveragedPerceptronBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]}, + 'AveragedPerceptronBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]}, 'CharTokenizer': {'num_cols': 424, 'cols': 0}, 'ColumnConcatenator': {'num_cols': 11, 'cols': 0}, 'ColumnDuplicator': {'num_cols': 7, 'cols': 0}, 'ColumnSelector': { 'num_cols': 2, - 'cols': [('Sepal_Width', 'Sepal_Width'), ('Sepal_Length', 'Sepal_Length')] + 'cols': [('Sepal_Width', 'Sepal_Width', 'Sepal_Width.output'), + ('Sepal_Length', 'Sepal_Length', 'Sepal_Length.output')] }, #'EnsembleClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]}, #'EnsembleRegressor': {'cols': [('Score', 'Score')]}, - 'FastForestBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]}, - 'FastForestRegressor': {'cols': [('Score', 'Score')]}, - 'FastLinearBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]}, - 'FastLinearClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]}, - 'FastLinearRegressor': {'cols': [('Score', 'Score')]}, - 'FastTreesBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]}, - 'FastTreesRegressor': {'cols': [('Score', 'Score')]}, - 'FastTreesTweedieRegressor': {'cols': [('Score', 'Score')]}, + 'FastForestBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]}, + 'FastForestRegressor': {'cols': [('Score', 'Score', 'Score.output')]}, + 'FastLinearBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]}, + 'FastLinearClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]}, + 'FastLinearRegressor': {'cols': [('Score', 'Score', 'Score.output')]}, + 'FastTreesBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]}, + 'FastTreesRegressor': {'cols': [('Score', 'Score', 'Score.output')]}, + 'FastTreesTweedieRegressor': {'cols': [('Score', 'Score', 'Score.output')]}, 'FromKey': {'num_cols': 6, 'cols': 0}, 'GlobalContrastRowScaler': {'num_cols': 12, 'cols': 0}, 'Handler': {'num_cols': 8, 'cols': 0}, 'Indicator': {'num_cols': 7, 'cols': 0}, - 'KMeansPlusPlus': {'cols': [('PredictedLabel', 'PredictedLabel')]}, - 'LightGbmBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]}, - 'LightGbmClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]}, - 'LightGbmRanker': {'cols': [('Score', 'Score')]}, - 'LightGbmRegressor': {'cols': [('Score', 'Score')]}, - 'LinearSvmBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]}, - 'LogisticRegressionBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]}, - 'LogisticRegressionClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]}, + 'KMeansPlusPlus': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]}, + 'LightGbmBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]}, + 'LightGbmClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]}, + 'LightGbmRanker': {'cols': [('Score', 'Score', 'Score.output')]}, + 'LightGbmRegressor': {'cols': [('Score', 'Score', 'Score.output')]}, + 'LinearSvmBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]}, + 'LogisticRegressionBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]}, + 'LogisticRegressionClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]}, 'LpScaler': {'num_cols': 10, 'cols': 0}, 'MeanVarianceScaler': {'num_cols': 5, 'cols': 0}, 'MinMaxScaler': {'num_cols': 5, 'cols': 0}, 'MutualInformationSelector': {'num_cols': 8, 'cols': 0}, 'NGramFeaturizer': {'num_cols': 273, 'cols': 0}, - 'NaiveBayesClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]}, + 'NaiveBayesClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]}, 'OneHotVectorizer': {'num_cols': 12, 'cols': 0}, 'OneVsRestClassifier(AveragedPerceptronBinaryClassifier)': \ - {'cols': [('PredictedLabel', 'PredictedLabel')]}, + {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]}, 'OneVsRestClassifier(LinearSvmBinaryClassifier)': \ - {'cols': [('PredictedLabel', 'PredictedLabel')]}, - 'OnlineGradientDescentRegressor': {'cols': [('Score', 'Score')]}, - 'OrdinaryLeastSquaresRegressor': {'cols': [('Score', 'Score')]}, + {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]}, + 'OnlineGradientDescentRegressor': {'cols': [('Score', 'Score', 'Score.output')]}, + 'OrdinaryLeastSquaresRegressor': {'cols': [('Score', 'Score', 'Score.output')]}, 'PcaTransformer': {'num_cols': 9, 'cols': 0}, - 'PoissonRegressionRegressor': {'cols': [('Score', 'Score')]}, + 'PoissonRegressionRegressor': {'cols': [('Score', 'Score', 'Score.output')]}, 'PrefixColumnConcatenator': {'num_cols': 8, 'cols': 0}, - 'SgdBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]}, - 'SymSgdBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel')]}, + 'SgdBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]}, + 'SymSgdBinaryClassifier': {'cols': [('PredictedLabel', 'PredictedLabel', 'PredictedLabel.output')]}, 'ToKey': {'num_cols': 11, 'cols': 0}, 'TypeConverter': {'num_cols': 8, 'cols': 0}, 'WordTokenizer': {'num_cols': 73, 'cols': 0} @@ -449,7 +451,7 @@ def load_json(file_path): return json.loads(content_without_comments) -def print_results(result_expected, result_onnx): +def print_results(result_expected, result_onnx, result_onnx_ort): print("\nML.Net Output (Expected Result):") print(result_expected) if not isinstance(result_expected, pd.Series): @@ -460,8 +462,12 @@ def print_results(result_expected, result_onnx): if not isinstance(result_onnx, pd.Series): print('Columns', result_onnx.columns) + print("\nORT Result:") + print(result_onnx_ort) + if not isinstance(result_onnx_ort, pd.Series): + print('Columns', result_onnx_ort.columns) -def validate_results(class_name, result_expected, result_onnx): +def validate_results(class_name, result_expected, result_onnx, result_ort): if not class_name in EXPECTED_RESULTS: raise RuntimeError("ERROR: ONNX model executed but no results specified for comparison.") @@ -474,21 +480,26 @@ def validate_results(class_name, result_expected, result_onnx): if len(result_onnx.columns) != num_cols: raise RuntimeError("ERROR: The ONNX output does not contain the expected number of columns.") - col_pairs = EXPECTED_RESULTS[class_name]['cols'] + if len(result_ort.columns) != num_cols: + raise RuntimeError("ERROR: The ORT output does not contain the expected number of columns.") + + col_tuples = EXPECTED_RESULTS[class_name]['cols'] - if isinstance(col_pairs, int): + if isinstance(col_tuples, int): # If col_pairs is an int then slice the columns # based on the value and use those pairs for comparison - col_pairs = list(zip(result_expected.columns[col_pairs:], - result_onnx.columns[col_pairs:])) + col_tuples = list(zip(result_expected.columns[col_tuples:], + result_onnx.columns[col_tuples:], + result_ort.columns[col_tuples:])) - if not col_pairs: + if not col_tuples: raise RuntimeError("ERROR: no columns specified for comparison of results.") - for col_pair in col_pairs: + for col_tuple in col_tuples: try: - col_expected = result_expected.loc[:, col_pair[0]] - col_onnx = result_onnx.loc[:, col_pair[1]] + col_expected = result_expected.loc[:, col_tuple[0]] + col_onnx = result_onnx.loc[:, col_tuple[1]] + col_ort = result_ort.loc[:, col_tuple[2]] if isinstance(col_expected.dtype, pd.api.types.CategoricalDtype): # ONNX does not export categorical columns so convert categorical @@ -496,12 +507,16 @@ def validate_results(class_name, result_expected, result_onnx): # the comparison. col_expected = col_expected.astype(col_expected.dtype.categories.dtype) - pd.testing.assert_series_equal(col_expected, - col_onnx, - check_names=False, - check_exact=False, - check_dtype=True, - check_less_precise=True) + check_kwargs = { + 'check_names': False, + 'check_exact': False, + 'check_dtype': True, + 'check_less_precise': True + } + + pd.testing.assert_series_equal(col_expected, col_onnx, **check_kwargs) + pd.testing.assert_series_equal(col_expected, col_ort, **check_kwargs) + except Exception as e: print(e) raise RuntimeError("ERROR: OnnxRunner result does not match expected result.") @@ -566,15 +581,17 @@ def test_export_to_onnx(estimator, class_name): try: onnxrunner = OnnxRunner(model_file=onnx_path) result_onnx = onnxrunner.fit_transform(dataset) + df_tool = DFT(onnx_path) - result_onnx1 = df_tool.execute(dataset, []) + result_ort = df_tool.execute(dataset, []) if SHOW_TRANSFORMED_RESULTS: - print_results(result_expected, result_onnx) + print_results(result_expected, result_onnx, result_ort) export_valid = validate_results(class_name, result_expected, - result_onnx) + result_onnx, + result_ort) except Exception as e: print(e) @@ -599,8 +616,8 @@ def test_export_to_onnx(estimator, class_name): for entry_point in entry_points: class_name = entry_point['NewName'] - if not class_name in ['Handler']: - continue +# if not class_name in ['Handler']: +# continue print('\n===========> %s' % class_name)