Skip to content

Commit 9d33efe

Browse files
authored
Adding transform extensions (#1460)
* adding all extensions for the Text related transformation * adding keyToVector extensions and renaming the estimatorto conform to 1318 * Adding SelectColumns xtensions. Renaming SelectEstimator and Textnormalizer * KeyToBinary extensions * Adding extensions for the Image transforms. Some renaming
1 parent 1391107 commit 9d33efe

File tree

58 files changed

+683
-315
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+683
-315
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValue_Term.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using Microsoft.ML.Runtime.Api;
88
using Microsoft.ML.Runtime.Data;
99
using Microsoft.ML.Transforms.Categorical;
10+
using Microsoft.ML.Transforms.Conversions;
1011
using Microsoft.ML.Transforms.Text;
1112
using System;
1213
using System.Collections.Generic;

src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
using Microsoft.ML.Runtime.Data.IO;
77
using Microsoft.ML.Runtime.Internal.Utilities;
88
using Microsoft.ML.Transforms;
9-
using Microsoft.ML.Transforms.Categorical;
9+
using Microsoft.ML.Transforms.Conversions;
1010
using System;
1111
using System.Collections.Generic;
1212
using System.Globalization;

src/Microsoft.ML.Data/Transforms/CategoricalsCatalog.cs

Lines changed: 0 additions & 31 deletions
This file was deleted.

src/Microsoft.ML.Data/Transforms/ConversionsCatalog.cs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,50 @@ public static ConvertingEstimator ConvertTo(this TransformsCatalog.Conversions c
5555
public static ConvertingEstimator ConvertTo(this TransformsCatalog.Conversions catalog, params ConvertingTransform.ColumnInfo[] columns)
5656
=> new ConvertingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
5757
}
58+
59+
public static class ToValueCatalog
60+
{
61+
/// <summary>
62+
/// Convert the key types back to their original values.
63+
/// </summary>
64+
/// <param name="catalog">The categorical transform's catalog.</param>
65+
/// <param name="inputColumn">Name of the input column.</param>
66+
public static KeyToValueEstimator MapKeyToValue(this TransformsCatalog.Conversions catalog, string inputColumn)
67+
=> new KeyToValueEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn);
68+
69+
/// <summary>
70+
/// Convert the key types (name of the column specified in the first item of the tuple) back to their original values
71+
/// (named as specified in the second item of the tuple).
72+
/// </summary>
73+
/// <param name="catalog">The categorical transform's catalog</param>
74+
/// <param name="columns">The pairs of input and output columns.</param>
75+
public static KeyToValueEstimator MapKeyToValue(this TransformsCatalog.Conversions catalog, params (string input, string output)[] columns)
76+
=> new KeyToValueEstimator(CatalogUtils.GetEnvironment(catalog), columns);
77+
}
78+
79+
/// <summary>
80+
/// Extensions for KeyToVectorMappingEstimator.
81+
/// </summary>
82+
public static class ToVectorCatalog
83+
{
84+
/// <summary>
85+
/// Convert the key types back to their original vectors.
86+
/// </summary>
87+
/// <param name="catalog">The categorical transform's catalog.</param>
88+
/// <param name="columns">The input column to map back to vectors.</param>
89+
public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.Conversions catalog,
90+
params KeyToVectorTransform.ColumnInfo[] columns)
91+
=> new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
92+
93+
/// <summary>
94+
/// Convert the key types back to their original vectors.
95+
/// </summary>
96+
/// <param name="catalog">The categorical transform's catalog.</param>
97+
/// <param name="inputColumn">The name of the input column.</param>
98+
/// <param name="outputColumn">The name of the output column.</param>
99+
/// <param name="bag">Whether bagging is used for the conversion. </param>
100+
public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.Conversions catalog,
101+
string inputColumn, string outputColumn = null, bool bag = KeyToVectorMappingEstimator.Defaults.Bag)
102+
=> new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, bag);
103+
}
58104
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using Microsoft.ML.Runtime;
6+
using Microsoft.ML.Runtime.Data;
7+
using Microsoft.ML.Transforms;
8+
9+
namespace Microsoft.ML
10+
{
11+
/// <summary>
12+
/// Extensions for Column Copying Estimator.
13+
/// </summary>
14+
public static class ColumnCopyingCatalog
15+
{
16+
/// <summary>
17+
/// Copies the input column to another column named as specified in <paramref name="outputColumn"/>.
18+
/// </summary>
19+
/// <param name="catalog">The transform's catalog.</param>
20+
/// <param name="inputColumn">Name of the input column.</param>
21+
/// <param name="outputColumn">Name of the new column, resulting from copying.</param>
22+
public static CopyColumnsEstimator CopyColumns(this TransformsCatalog catalog, string inputColumn, string outputColumn)
23+
=> new CopyColumnsEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn);
24+
25+
/// <summary>
26+
/// Copies the input column, name specified in the first item of the tuple,
27+
/// to another column, named as specified in the second item of the tuple.
28+
/// </summary>
29+
/// <param name="catalog">The transform's catalog</param>
30+
/// <param name="columns">The pairs of input and output columns.</param>
31+
public static CopyColumnsEstimator CopyColumns(this TransformsCatalog catalog, params (string source, string name)[] columns)
32+
=> new CopyColumnsEstimator(CatalogUtils.GetEnvironment(catalog), columns);
33+
34+
}
35+
36+
/// <summary>
37+
/// Extensions for ColumnConcatenatingEstimator.
38+
/// </summary>
39+
public static class ColumnConcatenatingEstimatorCatalog
40+
{
41+
/// <summary>
42+
/// Concatenates two columns together.
43+
/// </summary>
44+
/// <param name="catalog">The transform's catalog.</param>
45+
/// <param name="outputColumn">The name of the output column.</param>
46+
/// <param name="inputColumns">The names of the columns to concatenate together.</param>
47+
public static ColumnConcatenatingEstimator Concatenate(this TransformsCatalog catalog, string outputColumn, params string[] inputColumns)
48+
=> new ColumnConcatenatingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumn, inputColumns);
49+
50+
}
51+
52+
/// <summary>
53+
/// Extensions for ColumnSelectingEstimator.
54+
/// </summary>
55+
public static class ColumnSelectingCatalog
56+
{
57+
/// <summary>
58+
/// KeepColumns is used to select a list of columns that the user wants to keep on a given an input. Any column not specified
59+
/// will be dropped from the output output schema.
60+
/// </summary>
61+
/// <param name="catalog">The transform's catalog.</param>
62+
/// <param name="columnsToKeep">The array of column names to keep.</param>
63+
public static ColumnSelectingEstimator KeepColumns(this TransformsCatalog catalog, params string[] columnsToKeep)
64+
=> ColumnSelectingEstimator.KeepColumns(CatalogUtils.GetEnvironment(catalog), columnsToKeep);
65+
66+
/// <summary>
67+
/// DropColumns is used to select a list of columns that user wants to drop from a given input. Any column not specified will
68+
/// be maintained in the output schema.
69+
/// </summary>
70+
/// <param name="catalog">The transform's catalog.</param>
71+
/// <param name="columnsToDrop">The array of column names to drop.</param>
72+
public static ColumnSelectingEstimator DropColumns(this TransformsCatalog catalog, params string[] columnsToDrop)
73+
=> ColumnSelectingEstimator.DropColumns(CatalogUtils.GetEnvironment(catalog), columnsToDrop);
74+
75+
/// <summary>
76+
/// ColumnSelectingEstimator is used to select a list of columns that user wants to drop from a given input.
77+
/// </summary>
78+
/// <param name="catalog">The transform's catalog.</param>
79+
/// <param name="keepColumns">The array of column names to keep, cannot be set with <paramref name="dropColumns"/>.</param>
80+
/// <param name="dropColumns">The array of column names to drop, cannot be set with <paramref name="keepColumns"/>.</param>
81+
/// <param name="keepHidden">If true will keep hidden columns and false will remove hidden columns.</param>
82+
/// <param name="ignoreMissing">If false will check for any columns given in <paramref name="keepColumns"/>
83+
/// or <paramref name="dropColumns"/> that are missing from the input. If a missing colums exists a
84+
/// SchemaMistmatch exception is thrown. If true, the check is not made.</param>
85+
public static ColumnSelectingEstimator SelectColumns(this TransformsCatalog catalog,
86+
string[] keepColumns,
87+
string[] dropColumns,
88+
bool keepHidden = SelectColumnsTransform.Defaults.KeepHidden,
89+
bool ignoreMissing = SelectColumnsTransform.Defaults.IgnoreMissing)
90+
=> new ColumnSelectingEstimator(CatalogUtils.GetEnvironment(catalog),
91+
keepColumns, dropColumns, keepHidden, ignoreMissing);
92+
}
93+
}

src/Microsoft.ML.Data/Transforms/KeyToValueTransform.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
using Microsoft.ML.Runtime.Model.Pfa;
1313
using Microsoft.ML.StaticPipe;
1414
using Microsoft.ML.StaticPipe.Runtime;
15-
using Microsoft.ML.Transforms.Categorical;
15+
using Microsoft.ML.Transforms.Conversions;
1616
using Newtonsoft.Json.Linq;
1717
using System;
1818
using System.Collections.Generic;
@@ -32,7 +32,7 @@
3232
[assembly: LoadableClass(typeof(IRowMapper), typeof(KeyToValueTransform), null, typeof(SignatureLoadRowMapper),
3333
KeyToValueTransform.UserName, KeyToValueTransform.LoaderSignature)]
3434

35-
namespace Microsoft.ML.Transforms.Categorical
35+
namespace Microsoft.ML.Transforms.Conversions
3636
{
3737
/// <summary>
3838
/// KeyToValueTransform utilizes KeyValues metadata to map key indices to the corresponding values in the KeyValues metadata.

src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
using Microsoft.ML.Runtime.Model.Pfa;
1313
using Microsoft.ML.StaticPipe;
1414
using Microsoft.ML.StaticPipe.Runtime;
15-
using Microsoft.ML.Transforms.Categorical;
15+
using Microsoft.ML.Transforms.Conversions;
1616
using Newtonsoft.Json.Linq;
1717
using System;
1818
using System.Collections.Generic;
@@ -31,7 +31,7 @@
3131
[assembly: LoadableClass(typeof(IRowMapper), typeof(KeyToVectorTransform), null, typeof(SignatureLoadRowMapper),
3232
KeyToVectorTransform.UserName, KeyToVectorTransform.LoaderSignature)]
3333

34-
namespace Microsoft.ML.Transforms.Categorical
34+
namespace Microsoft.ML.Transforms.Conversions
3535
{
3636
public sealed class KeyToVectorTransform : OneToOneTransformerBase
3737
{
@@ -84,7 +84,7 @@ public sealed class Arguments
8484

8585
[Argument(ArgumentType.AtMostOnce,
8686
HelpText = "Whether to combine multiple indicator vectors into a single bag vector instead of concatenating them. This is only relevant when the input is a vector.")]
87-
public bool Bag = KeyToVectorEstimator.Defaults.Bag;
87+
public bool Bag = KeyToVectorMappingEstimator.Defaults.Bag;
8888
}
8989

9090
public class ColumnInfo
@@ -93,7 +93,7 @@ public class ColumnInfo
9393
public readonly string Output;
9494
public readonly bool Bag;
9595

96-
public ColumnInfo(string input, string output, bool bag = KeyToVectorEstimator.Defaults.Bag)
96+
public ColumnInfo(string input, string output, bool bag = KeyToVectorMappingEstimator.Defaults.Bag)
9797
{
9898
Input = input;
9999
Output = output;
@@ -733,25 +733,25 @@ private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string src
733733
}
734734
}
735735

736-
public sealed class KeyToVectorEstimator : TrivialEstimator<KeyToVectorTransform>
736+
public sealed class KeyToVectorMappingEstimator : TrivialEstimator<KeyToVectorTransform>
737737
{
738738
internal static class Defaults
739739
{
740740
public const bool Bag = false;
741741
}
742742

743-
public KeyToVectorEstimator(IHostEnvironment env, params KeyToVectorTransform.ColumnInfo[] columns)
743+
public KeyToVectorMappingEstimator(IHostEnvironment env, params KeyToVectorTransform.ColumnInfo[] columns)
744744
: this(env, new KeyToVectorTransform(env, columns))
745745
{
746746
}
747747

748-
public KeyToVectorEstimator(IHostEnvironment env, string name, string source = null, bool bag = Defaults.Bag)
749-
: this(env, new KeyToVectorTransform(env, new KeyToVectorTransform.ColumnInfo(source ?? name, name, bag)))
748+
public KeyToVectorMappingEstimator(IHostEnvironment env, string inputColumn, string outputColumn = null, bool bag = Defaults.Bag)
749+
: this(env, new KeyToVectorTransform(env, new KeyToVectorTransform.ColumnInfo(inputColumn, outputColumn ?? inputColumn, bag)))
750750
{
751751
}
752752

753-
private KeyToVectorEstimator(IHostEnvironment env, KeyToVectorTransform transformer)
754-
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(KeyToVectorEstimator)), transformer)
753+
private KeyToVectorMappingEstimator(IHostEnvironment env, KeyToVectorTransform transformer)
754+
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(KeyToVectorMappingEstimator)), transformer)
755755
{
756756
}
757757

@@ -891,7 +891,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
891891
var col = (IColInput)toOutput[i];
892892
infos[i] = new KeyToVectorTransform.ColumnInfo(inputNames[col.Input], outputNames[toOutput[i]], col.Bag);
893893
}
894-
return new KeyToVectorEstimator(env, infos);
894+
return new KeyToVectorMappingEstimator(env, infos);
895895
}
896896
}
897897

src/Microsoft.ML.Data/Transforms/NormalizerCatalog.cs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,10 @@ public static class NormalizerCatalog
3434
/// ]]>
3535
/// </format>
3636
/// </example>
37-
public static NormalizingEstimator Normalize(this TransformsCatalog catalog, string inputName, string outputName = null, NormalizingEstimator.NormalizerMode mode = NormalizingEstimator.NormalizerMode.MinMax)
37+
public static NormalizingEstimator Normalize(this TransformsCatalog catalog,
38+
string inputName,
39+
string outputName = null,
40+
NormalizingEstimator.NormalizerMode mode = NormalizingEstimator.NormalizerMode.MinMax)
3841
=> new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), inputName, outputName, mode);
3942

4043
/// <summary>
@@ -57,15 +60,18 @@ public static NormalizingEstimator Normalize(this TransformsCatalog catalog, str
5760
/// ]]>
5861
/// </format>
5962
/// </example>
60-
public static NormalizingEstimator Normalize(this TransformsCatalog catalog, NormalizingEstimator.NormalizerMode mode, params (string input, string output)[] columns)
63+
public static NormalizingEstimator Normalize(this TransformsCatalog catalog,
64+
NormalizingEstimator.NormalizerMode mode,
65+
params (string input, string output)[] columns)
6166
=> new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), mode, columns);
6267

6368
/// <summary>
6469
/// Normalize (rescale) columns according to specified custom parameters.
6570
/// </summary>
6671
/// <param name="catalog">The transform catalog</param>
6772
/// <param name="columns">The normalization settings for all the columns</param>
68-
public static NormalizingEstimator Normalize(this TransformsCatalog catalog, params NormalizingEstimator.ColumnBase[] columns)
73+
public static NormalizingEstimator Normalize(this TransformsCatalog catalog,
74+
params NormalizingEstimator.ColumnBase[] columns)
6975
=> new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
7076
}
7177
}

0 commit comments

Comments
 (0)