Skip to content
This repository was archived by the owner on Nov 16, 2023. It is now read-only.

Commit 5b97afe

Browse files
authored
Generate PrefixColumnConcatenator with entry point compiler instead of manually. (#364)
1 parent 28dcc8b commit 5b97afe

File tree

5 files changed

+66
-3
lines changed

5 files changed

+66
-3
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""
2+
3+
Combines several columns into a single vector-valued column by prefix.
4+
5+
.. remarks::
6+
``PrefixColumnConcatenator`` creates a single vector-valued column from
7+
multiple
8+
columns. It can be performed on data before training a model. The
9+
concatenation
10+
can significantly speed up the processing of data when the number of
11+
columns
12+
is as large as hundreds to thousands.
13+
14+
:param columns: a dictionary of key-value pairs, where key is the output
15+
column name and value is a list of input column names.
16+
17+
* Only one key-value pair is allowed.
18+
* Input column type: numeric or string.
19+
* Output column type:
20+
`Vector Type </nimbusml/concepts/types#vectortype-column>`_.
21+
22+
The << operator can be used to set this value (see
23+
`Column Operator </nimbusml/concepts/columns>`_)
24+
25+
For example
26+
* ColumnConcatenator(columns={'features': ['age', 'parity',
27+
'induced']})
28+
* ColumnConcatenator() << {'features': ['age', 'parity',
29+
'induced']})
30+
31+
For more details see `Columns </nimbusml/concepts/columns>`_.
32+
33+
.. seealso::
34+
:py:class:`ColumnDropper
35+
<nimbusml.preprocessing.schema.ColumnDropper>`,
36+
:py:class:`ColumnSelector
37+
<nimbusml.preprocessing.schema.ColumnSelector>`.
38+
39+
.. index:: transform, schema
40+
41+
Example:
42+
.. literalinclude:: /../nimbusml/examples/PrefixColumnConcatenator.py
43+
:language: python
44+
"""

src/python/nimbusml/internal/core/preprocessing/schema/prefixcolumnconcatenator.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# Copyright (c) Microsoft Corporation. All rights reserved.
33
# Licensed under the MIT License.
44
# --------------------------------------------------------------------------------------------
5+
# - Generated by tools/entrypoint_compiler.py: do not edit by hand
56
"""
67
PrefixColumnConcatenator
78
"""
@@ -15,10 +16,12 @@
1516
from ...base_pipeline_item import BasePipelineItem, DefaultSignature
1617

1718

18-
class PrefixColumnConcatenator(BasePipelineItem, DefaultSignature):
19+
class PrefixColumnConcatenator(
20+
BasePipelineItem,
21+
DefaultSignature):
1922
"""
2023
21-
Combines several columns into a single vector-valued column by prefix
24+
Combines several columns into a single vector-valued column by prefix.
2225
2326
.. remarks::
2427
``PrefixColumnConcatenator`` creates a single vector-valued column from

src/python/nimbusml/preprocessing/schema/prefixcolumnconcatenator.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# Copyright (c) Microsoft Corporation. All rights reserved.
33
# Licensed under the MIT License.
44
# --------------------------------------------------------------------------------------------
5+
# - Generated by tools/entrypoint_compiler.py: do not edit by hand
56
"""
67
PrefixColumnConcatenator
78
"""
@@ -17,7 +18,10 @@
1718
from ...internal.utils.utils import trace
1819

1920

20-
class PrefixColumnConcatenator(core, BaseTransform, TransformerMixin):
21+
class PrefixColumnConcatenator(
22+
core,
23+
BaseTransform,
24+
TransformerMixin):
2125
"""
2226
2327
Combines several columns into a single vector-valued column by prefix.

src/python/tools/code_fixer.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,12 +247,18 @@ def fix_code(class_name, filename):
247247
all_args['output_for_sub_graph'] = {'Model' : \
248248
all_args['predictor_model']}"""
249249

250+
prefixcolumnconcatenator_1 = "output_columns = input_columns"
251+
prefixcolumnconcatenator_1_correct = """raise ValueError(
252+
"'None' output passed when it cannot be none.")"""
253+
250254
signature_fixes_core = {
251255
'NGramFeaturizer': (textTransform_1, textTransform_1_correct),
252256
'ColumnConcatenator': [(concatColumns_1, concatColumns_1_correct)],
253257
'ColumnSelector': [(columnselector_1, columnselector_1_correct)],
254258
'OneVsRestClassifier': [
255259
(onevsrestclassifier_1, onevsrestclassifier_1_correct)],
260+
'PrefixColumnConcatenator': (prefixcolumnconcatenator_1,
261+
prefixcolumnconcatenator_1_correct)
256262
}
257263

258264

src/python/tools/manifest_diff.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,12 @@
785785
"NewName": "TypeConverter",
786786
"Module": "preprocessing.schema",
787787
"Type": "Transform"
788+
},
789+
{
790+
"Name": "Transforms.PrefixColumnConcatenator",
791+
"NewName": "PrefixColumnConcatenator",
792+
"Module": "preprocessing.schema",
793+
"Type": "Transform"
788794
}
789795
],
790796
"Components": [

0 commit comments

Comments
 (0)