diff --git a/docs/code/MlNetCookBook.md b/docs/code/MlNetCookBook.md
index 727ed415e2..f509ebfe57 100644
--- a/docs/code/MlNetCookBook.md
+++ b/docs/code/MlNetCookBook.md
@@ -424,7 +424,7 @@ var pipeline =
// Use the multi-class SDCA model to predict the label using features.
.Append(mlContext.MulticlassClassification.Trainers.SdcaCalibrated())
// Apply the inverse conversion from 'PredictedLabel' column back to string value.
- .Append(mlContext.Transforms.Conversion.MapKeyToValue(("PredictedLabel", "Data")));
+ .Append(mlContext.Transforms.Conversion.MapKeyToValue("Data", "PredictedLabel"));
// Train the model.
var model = pipeline.Fit(trainData);
diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs
index 4792dd61d4..e8edbc9c9d 100644
--- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs
+++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs
@@ -5,6 +5,7 @@
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Data;
+using Microsoft.ML.Runtime;
using Microsoft.ML.Transforms;
namespace Microsoft.ML
@@ -65,6 +66,22 @@ public static TypeConvertingEstimator ConvertType(this TransformsCatalog.Convers
DataKind outputKind = ConvertDefaults.DefaultOutputKind)
=> new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { new TypeConvertingEstimator.ColumnOptions(outputColumnName, outputKind, inputColumnName) });
+ ///
+ /// Changes column type of the input columns.
+ ///
+ /// The conversion transform's catalog.
+ /// Specifies the names of the columns on which to apply the transformation.
+ /// The expected kind of the output column.
+ public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog,
+ InputOutputColumnPair[] columns,
+ DataKind outputKind = ConvertDefaults.DefaultOutputKind)
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ var columnOptions = columns.Select(x => new TypeConvertingEstimator.ColumnOptions(x.OutputColumnName, outputKind, x.InputColumnName)).ToArray();
+ return new TypeConvertingEstimator(env, columnOptions);
+ }
+
///
/// Changes column type of the input column.
///
@@ -90,20 +107,16 @@ public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.Co
=> new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName);
///
- /// Convert the key types (name of the column specified in the first item of the tuple) back to their original values
- /// (named as specified in the second item of the tuple).
+ /// Convert the key types back to their original values.
///
- /// The conversion transform's catalog
- /// The pairs of input and output columns.
- ///
- ///
- ///
- ///
- [BestFriend]
- internal static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, params ColumnOptions[] columns)
- => new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns));
+ /// The conversion transform's catalog.
+ /// Specifies the names of the columns on which to apply the transformation.
+ public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, InputOutputColumnPair[] columns)
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ return new KeyToValueMappingEstimator(env, columns.Select(x => (x.OutputColumnName, x.InputColumnName)).ToArray());
+ }
///
/// Maps key types or key values into a floating point vector.
@@ -127,6 +140,23 @@ public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.
string outputColumnName, string inputColumnName = null, bool outputCountVector = KeyToVectorMappingEstimator.Defaults.OutputCountVector)
=> new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, outputCountVector);
+ ///
+ /// Maps columns of key types or key values into columns of floating point vectors.
+ ///
+ /// The conversion transform's catalog.
+ /// Specifies the names of the columns on which to apply the transformation.
+ /// Whether to combine multiple indicator vectors into a single vector of counts instead of concatenating them.
+ /// This is only relevant when the input column is a vector of keys.
+ public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog,
+ InputOutputColumnPair[] columns, bool outputCountVector = KeyToVectorMappingEstimator.Defaults.OutputCountVector)
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ var columnOptions = columns.Select(x => new KeyToVectorMappingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, outputCountVector)).ToArray();
+ return new KeyToVectorMappingEstimator(env, columnOptions);
+
+ }
+
///
/// Converts value types into .
///
@@ -157,6 +187,31 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co
=> new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog),
new[] { new ValueToKeyMappingEstimator.ColumnOptions(outputColumnName, inputColumnName, maximumNumberOfKeys, keyOrdinality, addKeyValueAnnotationsAsText) }, keyData);
+ ///
+ /// Converts value types into .
+ ///
+ /// The conversion transform's catalog.
+ /// Specifies the names of the columns on which to apply the transformation.
+ /// Maximum number of keys to keep per column when auto-training.
+ /// How items should be ordered when vectorized. If choosen they will be in the order encountered.
+ /// If , items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').
+ /// Whether key value annotations should be text, regardless of the actual input type.
+ /// The data view containing the terms. If specified, this should be a single column data
+ /// view, and the key-values will be taken from that column. If unspecified, the key-values will be determined
+ /// from the input data upon fitting.
+ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog,
+ InputOutputColumnPair[] columns,
+ int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys,
+ ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality,
+ bool addKeyValueAnnotationsAsText = ValueToKeyMappingEstimator.Defaults.AddKeyValueAnnotationsAsText,
+ IDataView keyData = null)
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ var columnOptions = columns.Select(x => new ValueToKeyMappingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, maximumNumberOfKeys, keyOrdinality, addKeyValueAnnotationsAsText)).ToArray();
+ return new ValueToKeyMappingEstimator(env, columnOptions, keyData);
+ }
+
///
/// Converts value types into , optionally loading the keys to use from .
///
@@ -232,11 +287,13 @@ public static ValueMappingEstimator MapValue MapValue(
this TransformsCatalog.ConversionTransforms catalog,
IEnumerable> keyValuePairs,
- params ColumnOptions[] columns)
+ params InputOutputColumnPair[] columns)
{
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
var keys = keyValuePairs.Select(pair => pair.Key);
var values = keyValuePairs.Select(pair => pair.Value);
- return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, ColumnOptions.ConvertToValueTuples(columns));
+ return new ValueMappingEstimator(env, keys, values, InputOutputColumnPair.ConvertToValueTuples(columns));
}
///
@@ -260,12 +317,14 @@ internal static ValueMappingEstimator MapValue> keyValuePairs,
bool treatValuesAsKeyType,
- params ColumnOptions[] columns)
+ params InputOutputColumnPair[] columns)
{
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
var keys = keyValuePairs.Select(pair => pair.Key);
var values = keyValuePairs.Select(pair => pair.Value);
- return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, treatValuesAsKeyType,
- ColumnOptions.ConvertToValueTuples(columns));
+ return new ValueMappingEstimator(env, keys, values, treatValuesAsKeyType,
+ InputOutputColumnPair.ConvertToValueTuples(columns));
}
///
@@ -321,12 +380,14 @@ public static ValueMappingEstimator MapValue MapValue(
this TransformsCatalog.ConversionTransforms catalog,
IEnumerable> keyValuePairs,
- params ColumnOptions[] columns)
+ params InputOutputColumnPair[] columns)
{
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
var keys = keyValuePairs.Select(pair => pair.Key);
var values = keyValuePairs.Select(pair => pair.Value);
- return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values,
- ColumnOptions.ConvertToValueTuples(columns));
+ return new ValueMappingEstimator(env, keys, values,
+ InputOutputColumnPair.ConvertToValueTuples(columns));
}
///
@@ -377,8 +438,12 @@ public static ValueMappingEstimator MapValue(
[BestFriend]
internal static ValueMappingEstimator MapValue(
this TransformsCatalog.ConversionTransforms catalog,
- IDataView lookupMap, DataViewSchema.Column keyColumn, DataViewSchema.Column valueColumn, params ColumnOptions[] columns)
- => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumn.Name, valueColumn.Name,
- ColumnOptions.ConvertToValueTuples(columns));
+ IDataView lookupMap, DataViewSchema.Column keyColumn, DataViewSchema.Column valueColumn, params InputOutputColumnPair[] columns)
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ return new ValueMappingEstimator(env, lookupMap, keyColumn.Name, valueColumn.Name,
+ InputOutputColumnPair.ConvertToValueTuples(columns));
+ }
}
}
diff --git a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs
index c4b97d0dea..2b84be4bc7 100644
--- a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs
+++ b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs
@@ -4,6 +4,7 @@
using System.Linq;
using Microsoft.ML.Data;
+using Microsoft.ML.Runtime;
using Microsoft.ML.Transforms;
namespace Microsoft.ML
@@ -11,35 +12,33 @@ namespace Microsoft.ML
///
/// Specifies input and output column names for a transformation.
///
- [BestFriend]
- internal sealed class ColumnOptions
+ public sealed class InputOutputColumnPair
{
- private readonly string _outputColumnName;
- private readonly string _inputColumnName;
+ ///
+ /// Name of the column to transform. If set to , the value of the will be used as source.
+ ///
+ public string InputColumnName { get; }
+ ///
+ /// Name of the column resulting from the transformation of .
+ ///
+ public string OutputColumnName { get; }
///
/// Specifies input and output column names for a transformation.
///
/// Name of the column resulting from the transformation of .
/// Name of the column to transform. If set to , the value of the will be used as source.
- public ColumnOptions(string outputColumnName, string inputColumnName = null)
+ public InputOutputColumnPair(string outputColumnName, string inputColumnName = null)
{
- _outputColumnName = outputColumnName;
- _inputColumnName = inputColumnName ?? outputColumnName;
- }
-
- ///
- /// Instantiates a from a tuple of input and output column names.
- ///
- public static implicit operator ColumnOptions((string outputColumnName, string inputColumnName) value)
- {
- return new ColumnOptions(value.outputColumnName, value.inputColumnName);
+ Contracts.CheckNonEmpty(outputColumnName, nameof(outputColumnName));
+ InputColumnName = inputColumnName ?? outputColumnName;
+ OutputColumnName = outputColumnName;
}
[BestFriend]
- internal static (string outputColumnName, string inputColumnName)[] ConvertToValueTuples(ColumnOptions[] infos)
+ internal static (string outputColumnName, string inputColumnName)[] ConvertToValueTuples(InputOutputColumnPair[] infos)
{
- return infos.Select(info => (info._outputColumnName, info._inputColumnName)).ToArray();
+ return infos.Select(info => (info.OutputColumnName, info.InputColumnName)).ToArray();
}
}
@@ -78,8 +77,12 @@ public static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog,
///
///
[BestFriend]
- internal static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, params ColumnOptions[] columns)
- => new ColumnCopyingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns));
+ internal static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, params InputOutputColumnPair[] columns)
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ return new ColumnCopyingEstimator(env, InputOutputColumnPair.ConvertToValueTuples(columns));
+ }
///
/// Concatenates columns together.
diff --git a/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs b/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs
index ea203359a5..5c7ab27db5 100644
--- a/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs
+++ b/src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs
@@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information.
using Microsoft.ML.Data;
+using Microsoft.ML.Runtime;
using Microsoft.ML.Transforms.Image;
namespace Microsoft.ML
@@ -32,8 +33,12 @@ public static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalo
/// ]]>
///
[BestFriend]
- internal static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalog catalog, params ColumnOptions[] columns)
- => new ImageGrayscalingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns));
+ internal static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalog catalog, params InputOutputColumnPair[] columns)
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ return new ImageGrayscalingEstimator(env, InputOutputColumnPair.ConvertToValueTuples(columns));
+ }
///
/// Loads the images from the into memory.
@@ -80,8 +85,12 @@ public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, s
/// ]]>
///
[BestFriend]
- internal static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, params ColumnOptions[] columns)
- => new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, ColumnOptions.ConvertToValueTuples(columns));
+ internal static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, params InputOutputColumnPair[] columns)
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ return new ImageLoadingEstimator(env, imageFolder, InputOutputColumnPair.ConvertToValueTuples(columns));
+ }
///
/// The transform's catalog.
diff --git a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs
index 22ebfe7890..4697824165 100644
--- a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs
+++ b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs
@@ -2,7 +2,9 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+using System.Linq;
using Microsoft.ML.Data;
+using Microsoft.ML.Runtime;
using Microsoft.ML.Transforms;
namespace Microsoft.ML
@@ -40,6 +42,30 @@ public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.Cate
=> new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog),
new[] { new OneHotEncodingEstimator.ColumnOptions(outputColumnName, inputColumnName, outputKind, maximumNumberOfKeys, keyOrdinality) }, keyData);
+ ///
+ /// Convert text columns into one-hot encoded vectors.
+ ///
+ /// The transform catalog
+ /// Specifies the names of the columns on which to apply the transformation.
+ /// Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector.
+ /// Maximum number of terms to keep per column when auto-training.
+ /// How items should be ordered when vectorized. If choosen they will be in the order encountered.
+ /// If , items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').
+ /// Specifies an ordering for the encoding. If specified, this should be a single column data view,
+ /// and the key-values will be taken from that column. If unspecified, the ordering will be determined from the input data upon fitting.
+ public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog,
+ InputOutputColumnPair[] columns,
+ OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.Defaults.OutKind,
+ int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys,
+ ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality,
+ IDataView keyData = null)
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ var columnOptions = columns.Select(x => new OneHotEncodingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, outputKind, maximumNumberOfKeys, keyOrdinality)).ToArray();
+ return new OneHotEncodingEstimator(env, columnOptions, keyData);
+ }
+
///
/// Convert several text column into one-hot encoded vectors.
///
@@ -88,6 +114,33 @@ public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCata
=> new OneHotHashEncodingEstimator(CatalogUtils.GetEnvironment(catalog),
new[] { new OneHotHashEncodingEstimator.ColumnOptions(outputColumnName, inputColumnName, outputKind, numberOfBits, seed, useOrderedHashing, maximumNumberOfInverts) });
+ ///
+ /// Convert text columns into hash-based one-hot encoded vector columns.
+ ///
+ /// The transform catalog
+ /// Specifies the names of the columns on which to apply the transformation.
+ /// The conversion mode.
+ /// Number of bits to hash into. Must be between 1 and 30, inclusive.
+ /// Hashing seed.
+ /// Whether the position of each term should be included in the hash.
+ /// During hashing we constuct mappings between original values and the produced hash values.
+ /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one.
+ /// specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
+ /// 0 does not retain any input values. -1 retains all input values mapping to each hash.
+ public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCatalog.CategoricalTransforms catalog,
+ InputOutputColumnPair[] columns,
+ OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.OutputKind.Indicator,
+ int numberOfBits = OneHotHashEncodingEstimator.Defaults.NumberOfBits,
+ uint seed = OneHotHashEncodingEstimator.Defaults.Seed,
+ bool useOrderedHashing = OneHotHashEncodingEstimator.Defaults.UseOrderedHashing,
+ int maximumNumberOfInverts = OneHotHashEncodingEstimator.Defaults.MaximumNumberOfInverts)
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ var columnOptions = columns.Select(x => new OneHotHashEncodingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, outputKind, numberOfBits, seed, useOrderedHashing, maximumNumberOfInverts)).ToArray();
+ return new OneHotHashEncodingEstimator(env, columnOptions);
+ }
+
///
/// Convert several text column into hash-based one-hot encoded vectors.
///
diff --git a/src/Microsoft.ML.Transforms/ConversionsCatalog.cs b/src/Microsoft.ML.Transforms/ConversionsCatalog.cs
index 406cef8d2d..7ea68ff33f 100644
--- a/src/Microsoft.ML.Transforms/ConversionsCatalog.cs
+++ b/src/Microsoft.ML.Transforms/ConversionsCatalog.cs
@@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information.
using Microsoft.ML.Data;
+using Microsoft.ML.Runtime;
using Microsoft.ML.Transforms;
namespace Microsoft.ML
@@ -20,8 +21,12 @@ public static class ConversionsCatalog
/// Specifies the output and input columns on which the transformation should be applied.
[BestFriend]
internal static KeyToBinaryVectorMappingEstimator MapKeyToBinaryVector(this TransformsCatalog.ConversionTransforms catalog,
- params ColumnOptions[] columns)
- => new KeyToBinaryVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns));
+ params InputOutputColumnPair[] columns)
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ return new KeyToBinaryVectorMappingEstimator(env, InputOutputColumnPair.ConvertToValueTuples(columns));
+ }
///
/// Convert the key types back to binary vector.
diff --git a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs
index 55659fbcb9..30685d8067 100644
--- a/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs
+++ b/src/Microsoft.ML.Transforms/ExtensionsCatalog.cs
@@ -2,24 +2,15 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+using System.Linq;
using Microsoft.ML.Data;
+using Microsoft.ML.Runtime;
using Microsoft.ML.Transforms;
namespace Microsoft.ML
{
public static class ExtensionsCatalog
{
- ///
- /// Creates a new output column, of boolean type, with the same number of slots as the input column. The value in the output column
- /// is true if the value in the input column is missing.
- ///
- /// The transform extensions' catalog.
- /// The names of the input columns of the transformation and the corresponding names for the output columns.
- [BestFriend]
- internal static MissingValueIndicatorEstimator IndicateMissingValues(this TransformsCatalog catalog,
- params ColumnOptions[] columns)
- => new MissingValueIndicatorEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns));
-
///
/// Creates a new output column, or replaces the source with a new column
/// (depending on whether the is given a value, or left to null)
@@ -41,6 +32,19 @@ public static MissingValueIndicatorEstimator IndicateMissingValues(this Transfor
string inputColumnName = null)
=> new MissingValueIndicatorEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName);
+ ///
+ /// Creates a new output column, of boolean type, with the same number of slots as the input column. The value in the output column
+ /// is true if the value in the input column is missing.
+ ///
+ /// The transform extensions' catalog.
+ /// Specifies the names of the columns on which to apply the transformation.
+ public static MissingValueIndicatorEstimator IndicateMissingValues(this TransformsCatalog catalog, InputOutputColumnPair[] columns)
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ return new MissingValueIndicatorEstimator(env, columns.Select(x => (x.OutputColumnName, x.InputColumnName)).ToArray());
+ }
+
///
/// Creates a new output column, or replaces the source with a new column
/// (depending on whether the is given a value, or left to null)
@@ -69,6 +73,27 @@ public static MissingValueReplacingEstimator ReplaceMissingValues(this Transform
bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot)
=> new MissingValueReplacingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { new MissingValueReplacingEstimator.ColumnOptions(outputColumnName, inputColumnName, replacementMode, imputeBySlot) });
+ ///
+ /// Creates a new output column, identical to the input column for everything but the missing values.
+ /// The missing values of the input column, in this new column are replaced with .
+ ///
+ /// The transform extensions' catalog.
+ /// Specifies the names of the columns on which to apply the transformation.
+ /// The type of replacement to use as specified in
+ /// If true, per-slot imputation of replacement is performed.
+ /// Otherwise, replacement value is imputed for the entire vector column. This setting is ignored for scalars and variable vectors,
+ /// where imputation is always for the entire column.
+ public static MissingValueReplacingEstimator ReplaceMissingValues(this TransformsCatalog catalog,
+ InputOutputColumnPair[] columns,
+ MissingValueReplacingEstimator.ReplacementMode replacementMode = MissingValueReplacingEstimator.Defaults.Mode,
+ bool imputeBySlot = MissingValueReplacingEstimator.Defaults.ImputeBySlot)
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ var columnOptions = columns.Select(x => new MissingValueReplacingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, replacementMode, imputeBySlot)).ToArray();
+ return new MissingValueReplacingEstimator(env, columnOptions);
+ }
+
///
/// Creates a new output column, identical to the input column for everything but the missing values.
/// The missing values of the input column, in this new column are replaced with .
diff --git a/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs b/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs
index 3c59b738a4..1b9d35d251 100644
--- a/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs
+++ b/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs
@@ -2,7 +2,9 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+using System.Linq;
using Microsoft.ML.Data;
+using Microsoft.ML.Runtime;
using Microsoft.ML.Transforms;
namespace Microsoft.ML
@@ -14,10 +16,11 @@ public static class FeatureSelectionCatalog
{
///
/// The transform's catalog.
+ /// Name of the column resulting from the transformation of .
+ /// Name of column to transform. If set to , the value of the will be used as source.
/// The name of the label column.
/// The maximum number of slots to preserve in the output. The number of slots to preserve is taken across all input columns.
/// Max number of bins used to approximate mutual information between each input column and the label column. Power of 2 recommended.
- /// Specifies the names of the input columns for the transformation, and their respective output column names.
///
///
///
///
///
- [BestFriend]
- internal static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMutualInformation(this TransformsCatalog.FeatureSelectionTransforms catalog,
+ public static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMutualInformation(this TransformsCatalog.FeatureSelectionTransforms catalog,
+ string outputColumnName, string inputColumnName = null,
string labelColumnName = MutualInfoSelectDefaults.LabelColumn,
int slotsInOutput = MutualInfoSelectDefaults.SlotsInOutput,
- int numberOfBins = MutualInfoSelectDefaults.NumBins,
- params ColumnOptions[] columns)
- => new MutualInformationFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), labelColumnName, slotsInOutput, numberOfBins,
- ColumnOptions.ConvertToValueTuples(columns));
+ int numberOfBins = MutualInfoSelectDefaults.NumBins)
+ => new MutualInformationFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, labelColumnName, slotsInOutput, numberOfBins);
///
/// The transform's catalog.
- /// Name of the column resulting from the transformation of .
- /// Name of column to transform. If set to , the value of the will be used as source.
+ /// Specifies the names of the input columns for the transformation, and their respective output column names.
/// The name of the label column.
/// The maximum number of slots to preserve in the output. The number of slots to preserve is taken across all input columns.
/// Max number of bins used to approximate mutual information between each input column and the label column. Power of 2 recommended.
- ///
- ///
- ///
- ///
- ///
public static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMutualInformation(this TransformsCatalog.FeatureSelectionTransforms catalog,
- string outputColumnName, string inputColumnName = null,
+ InputOutputColumnPair[] columns,
string labelColumnName = MutualInfoSelectDefaults.LabelColumn,
int slotsInOutput = MutualInfoSelectDefaults.SlotsInOutput,
int numberOfBins = MutualInfoSelectDefaults.NumBins)
- => new MutualInformationFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, labelColumnName, slotsInOutput, numberOfBins);
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ return new MutualInformationFeatureSelectingEstimator(env, labelColumnName, slotsInOutput, numberOfBins,
+ columns.Select(x => (x.OutputColumnName, x.InputColumnName)).ToArray());
+ }
///
/// The transform's catalog.
@@ -87,5 +85,19 @@ public static CountFeatureSelectingEstimator SelectFeaturesBasedOnCount(this Tra
string inputColumnName = null,
long count = CountSelectDefaults.Count)
=> new CountFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, count);
+
+ ///
+ /// The transform's catalog.
+ /// Specifies the names of the columns on which to apply the transformation.
+ /// If the count of non-default values for a slot is greater than or equal to this threshold in the training data, the slot is preserved.
+ public static CountFeatureSelectingEstimator SelectFeaturesBasedOnCount(this TransformsCatalog.FeatureSelectionTransforms catalog,
+ InputOutputColumnPair[] columns,
+ long count = CountSelectDefaults.Count)
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ var columnOptions = columns.Select(x => new CountFeatureSelectingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, count)).ToArray();
+ return new CountFeatureSelectingEstimator(env, columnOptions);
+ }
}
}
diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs
index 657e58bf32..dd12c452c4 100644
--- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs
+++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs
@@ -1,4 +1,5 @@
using Microsoft.ML.Data;
+using Microsoft.ML.Runtime;
using Microsoft.ML.Transforms;
namespace Microsoft.ML
@@ -43,8 +44,12 @@ public static NormalizingEstimator Normalize(this TransformsCatalog catalog,
[BestFriend]
internal static NormalizingEstimator Normalize(this TransformsCatalog catalog,
NormalizingEstimator.NormalizationMode mode,
- params ColumnOptions[] columns)
- => new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), mode, ColumnOptions.ConvertToValueTuples(columns));
+ params InputOutputColumnPair[] columns)
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ return new NormalizingEstimator(env, mode, InputOutputColumnPair.ConvertToValueTuples(columns));
+ }
///
/// Normalize (rescale) columns according to specified custom parameters.
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 4aa28da763..e2baf1578e 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -75,8 +75,12 @@ public static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(this
[BestFriend]
internal static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(this TransformsCatalog.TextTransforms catalog,
bool useMarkerCharacters = CharTokenizingDefaults.UseMarkerCharacters,
- params ColumnOptions[] columns)
- => new TokenizingByCharactersEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), useMarkerCharacters, ColumnOptions.ConvertToValueTuples(columns));
+ params InputOutputColumnPair[] columns)
+ {
+ var env = CatalogUtils.GetEnvironment(catalog);
+ env.CheckValue(columns, nameof(columns));
+ return new TokenizingByCharactersEstimator(env, useMarkerCharacters, InputOutputColumnPair.ConvertToValueTuples(columns));
+ }
///
/// Normalizes incoming text in by changing case, removing diacritical marks, punctuation marks and/or numbers
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
index e2a71c27cd..cb5c35793a 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
@@ -187,7 +187,7 @@ private ITransformer TrainOnIris(string irisDataPath)
// [2] -9.709775 float
// Apply the inverse conversion from 'PredictedLabel' column back to string value.
- var finalPipeline = pipeline.Append(mlContext.Transforms.Conversion.MapKeyToValue(("Data", "PredictedLabel")));
+ var finalPipeline = pipeline.Append(mlContext.Transforms.Conversion.MapKeyToValue("Data", "PredictedLabel"));
dataPreview = finalPipeline.Preview(trainData);
return finalPipeline.Fit(trainData);
diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs
index 010a973741..1c05497e55 100644
--- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs
@@ -39,7 +39,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest()
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.MulticlassClassification.Trainers.SdcaCalibrated(
new SdcaCalibratedMulticlassTrainer.Options { NumberOfThreads = 1 }))
- .Append(mlContext.Transforms.Conversion.MapKeyToValue(("Plant", "PredictedLabel")));
+ .Append(mlContext.Transforms.Conversion.MapKeyToValue("Plant", "PredictedLabel"));
// Train the pipeline
var trainedModel = pipe.Fit(trainData);
diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
index 20f97d63a8..9cab97c2e0 100644
--- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
+++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
@@ -391,7 +391,7 @@ public void TensorFlowTransformInceptionTest()
);
var data = reader.Load(new MultiFileSource(dataFile));
- var images = mlContext.Transforms.LoadImages(imageFolder, ("ImageReal", "ImagePath")).Fit(data).Transform(data);
+ var images = mlContext.Transforms.LoadImages("ImageReal", "ImagePath", imageFolder).Fit(data).Transform(data);
var cropped = mlContext.Transforms.ResizeImages("ImageCropped", 224, 224, "ImageReal").Fit(images).Transform(images);
var pixels = mlContext.Transforms.ExtractPixels(inputName, "ImageCropped", interleavePixelColors: true).Fit(cropped).Transform(cropped);
var tf = mlContext.Model.LoadTensorFlowModel(modelLocation).ScoreTensorFlowModel(outputName, inputName, true).Fit(pixels).Transform(pixels);
@@ -507,7 +507,7 @@ public void TensorFlowTransformMNISTConvTest()
var trainData = reader.Load(GetDataPath(TestDatasets.mnistTiny28.trainFilename));
var testData = reader.Load(GetDataPath(TestDatasets.mnistOneClass.testFilename));
- var pipe = mlContext.Transforms.CopyColumns(("reshape_input", "Placeholder"))
+ var pipe = mlContext.Transforms.CopyColumns("reshape_input", "Placeholder")
.Append(mlContext.Model.LoadTensorFlowModel("mnist_model/frozen_saved_model.pb").ScoreTensorFlowModel(new[] { "Softmax", "dense/Relu" }, new[] { "Placeholder", "reshape_input" }))
.Append(mlContext.Transforms.Concatenate("Features", "Softmax", "dense/Relu"))
.Append(mlContext.MulticlassClassification.Trainers.LightGbm("Label", "Features"));
@@ -662,7 +662,7 @@ private void ExecuteTFTransformMNISTConvTrainingTest(bool shuffle, int? shuffleS
preprocessedTestData = testData;
}
- var pipe = mlContext.Transforms.CopyColumns(("Features", "Placeholder"))
+ var pipe = mlContext.Transforms.CopyColumns("Features", "Placeholder")
.Append(mlContext.Model.LoadTensorFlowModel(modelLocation).RetrainTensorFlowModel(
inputColumnNames: new[] { "Features" },
outputColumnNames: new[] { "Prediction" },
@@ -729,7 +729,7 @@ public void TensorFlowTransformMNISTConvSavedModelTest()
var trainData = reader.Load(GetDataPath(TestDatasets.mnistTiny28.trainFilename));
var testData = reader.Load(GetDataPath(TestDatasets.mnistOneClass.testFilename));
- var pipe = mlContext.Transforms.CopyColumns(("reshape_input", "Placeholder"))
+ var pipe = mlContext.Transforms.CopyColumns("reshape_input", "Placeholder")
.Append(mlContext.Model.LoadTensorFlowModel("mnist_model").ScoreTensorFlowModel(new[] { "Softmax", "dense/Relu" }, new[] { "Placeholder", "reshape_input" }))
.Append(mlContext.Transforms.Concatenate("Features", new[] { "Softmax", "dense/Relu" }))
.Append(mlContext.MulticlassClassification.Trainers.LightGbm("Label", "Features"));
@@ -898,7 +898,7 @@ public void TensorFlowTransformCifarSavedModel()
new TextLoader.Column("Name", DataKind.String, 1),
}
);
- var images = mlContext.Transforms.LoadImages(imageFolder, ("ImageReal", "ImagePath")).Fit(data).Transform(data);
+ var images = mlContext.Transforms.LoadImages("ImageReal", imageFolder, "ImagePath").Fit(data).Transform(data);
var cropped = mlContext.Transforms.ResizeImages("ImageCropped", imageWidth, imageHeight, "ImageReal").Fit(images).Transform(images);
var pixels = mlContext.Transforms.ExtractPixels("Input", "ImageCropped", interleavePixelColors: true).Fit(cropped).Transform(cropped);
IDataView trans = tensorFlowModel.ScoreTensorFlowModel("Output", "Input").Fit(pixels).Transform(pixels);
@@ -1000,7 +1000,7 @@ public void TensorFlowSentimentClassificationTest()
// The second pipeline 'tfEnginePipe' takes the resized integer vector and passes it to TensoFlow and gets the classification scores.
var estimator = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedWords", "Sentiment_Text")
.Append(mlContext.Transforms.Conversion.MapValue(lookupMap, lookupMap.Schema["Words"], lookupMap.Schema["Ids"],
- new ColumnOptions[] { ("Features", "TokenizedWords") }));
+ new[] { new InputOutputColumnPair("Features", "TokenizedWords") }));
var model = estimator.Fit(dataView);
var dataPipe = mlContext.Model.CreatePredictionEngine(model);
@@ -1008,7 +1008,7 @@ public void TensorFlowSentimentClassificationTest()
// c.f. https://github.com/dotnet/machinelearning-testdata/blob/master/Microsoft.ML.TensorFlow.TestModels/sentiment_model/README.md
string modelLocation = @"sentiment_model";
var pipelineModel = mlContext.Model.LoadTensorFlowModel(modelLocation).ScoreTensorFlowModel(new[] { "Prediction/Softmax" }, new[] { "Features" })
- .Append(mlContext.Transforms.CopyColumns(("Prediction", "Prediction/Softmax")))
+ .Append(mlContext.Transforms.CopyColumns("Prediction", "Prediction/Softmax"))
.Fit(dataView);
var tfEnginePipe = mlContext.Model.CreatePredictionEngine(pipelineModel);
@@ -1052,7 +1052,7 @@ public void TensorFlowStringTest()
var dataview = mlContext.Data.CreateTextLoader().Load(new MultiFileSource(null));
var pipeline = tensorFlowModel.ScoreTensorFlowModel(new[] { "Original_A", "Joined_Splited_Text" }, new[] { "A", "B" })
- .Append(mlContext.Transforms.CopyColumns(("AOut", "Original_A"), ("BOut", "Joined_Splited_Text")));
+ .Append(mlContext.Transforms.CopyColumns(new[] { new InputOutputColumnPair("AOut", "Original_A"), new InputOutputColumnPair("BOut", "Joined_Splited_Text") }));
var transformer = mlContext.Model.CreatePredictionEngine(pipeline.Fit(dataview));
var input = new TextInput
diff --git a/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs b/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs
index 5161071a1a..5d27514eb1 100644
--- a/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs
@@ -300,9 +300,9 @@ public void TestOldSavingAndLoading()
var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } };
var dataView = ML.Data.LoadFromEnumerable(data);
var pipe = ML.Transforms.Categorical.OneHotEncoding(new[]{
- new OneHotEncodingEstimator.ColumnOptions("TermA", "A"),
- new OneHotEncodingEstimator.ColumnOptions("TermB", "B"),
- new OneHotEncodingEstimator.ColumnOptions("TermC", "C")
+ new InputOutputColumnPair("TermA", "A"),
+ new InputOutputColumnPair("TermB", "B"),
+ new InputOutputColumnPair("TermC", "C")
});
var result = pipe.Fit(dataView).Transform(dataView);
var resultRoles = new RoleMappedData(result);
diff --git a/test/Microsoft.ML.Tests/Transformers/FeatureSelectionTests.cs b/test/Microsoft.ML.Tests/Transformers/FeatureSelectionTests.cs
index 7bcf688e1c..a7e8c685cf 100644
--- a/test/Microsoft.ML.Tests/Transformers/FeatureSelectionTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/FeatureSelectionTests.cs
@@ -182,9 +182,9 @@ public void MutualInformationSelectionWorkout()
var est = ML.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation("FeatureSelect", "VectorFloat", slotsInOutput: 1, labelColumnName: "Label")
.Append(ML.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation(labelColumnName: "Label", slotsInOutput: 2, numberOfBins: 100,
- columns: new ColumnOptions[] {
- ("out1", "VectorFloat"),
- ("out2", "VectorDouble")
+ columns: new[] {
+ new InputOutputColumnPair("out1", "VectorFloat"),
+ new InputOutputColumnPair("out2", "VectorDouble")
}));
TestEstimatorCore(est, data);
diff --git a/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs b/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs
index 0b9d1dd190..4dfe700764 100644
--- a/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs
+++ b/test/Microsoft.ML.Tests/Transformers/KeyToBinaryVectorEstimatorTest.cs
@@ -51,7 +51,7 @@ public void KeyToBinaryVectorWorkout()
new ValueToKeyMappingEstimator.ColumnOptions("TermC", "C", addKeyValueAnnotationsAsText:true)
}).Fit(dataView).Transform(dataView);
- var pipe = ML.Transforms.Conversion.MapKeyToBinaryVector(("CatA", "TermA"), ("CatC", "TermC"));
+ var pipe = ML.Transforms.Conversion.MapKeyToBinaryVector(new[] { new InputOutputColumnPair("CatA", "TermA"), new InputOutputColumnPair("CatC", "TermC") });
TestEstimatorCore(pipe, dataView);
Done();
}
@@ -105,7 +105,12 @@ public void TestMetadataPropagation()
var termTransformer = termEst.Fit(dataView);
dataView = termTransformer.Transform(dataView);
- var pipe = ML.Transforms.Conversion.MapKeyToBinaryVector(("CatA", "TA"), ("CatB", "TB"), ("CatC", "TC"), ("CatD", "TD"));
+ var pipe = ML.Transforms.Conversion.MapKeyToBinaryVector(new[] {
+ new InputOutputColumnPair("CatA", "TA"),
+ new InputOutputColumnPair("CatB", "TB"),
+ new InputOutputColumnPair("CatC", "TC"),
+ new InputOutputColumnPair("CatD", "TD")
+ });
var result = pipe.Fit(dataView).Transform(dataView);
ValidateMetadata(result);
@@ -155,7 +160,7 @@ public void TestOldSavingAndLoading()
});
var transformer = est.Fit(dataView);
dataView = transformer.Transform(dataView);
- var pipe = ML.Transforms.Conversion.MapKeyToBinaryVector(("CatA", "TermA"), ("CatB", "TermB"), ("CatC", "TermC"));
+ var pipe = ML.Transforms.Conversion.MapKeyToBinaryVector(new[] { new InputOutputColumnPair("CatA", "TermA"), new InputOutputColumnPair("CatB", "TermB"), new InputOutputColumnPair("CatC", "TermC") });
var result = pipe.Fit(dataView).Transform(dataView);
var resultRoles = new RoleMappedData(result);
using (var ms = new MemoryStream())
diff --git a/test/Microsoft.ML.Tests/Transformers/NAIndicatorTests.cs b/test/Microsoft.ML.Tests/Transformers/NAIndicatorTests.cs
index fe1b0a8d9f..a5bb6f7c1c 100644
--- a/test/Microsoft.ML.Tests/Transformers/NAIndicatorTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/NAIndicatorTests.cs
@@ -44,7 +44,12 @@ public void NAIndicatorWorkout()
};
var dataView = ML.Data.LoadFromEnumerable(data);
- var pipe = ML.Transforms.IndicateMissingValues(new ColumnOptions[] { ("NAA", "A"), ("NAB", "B"), ("NAC", "C"), ("NAD", "D") });
+ var pipe = ML.Transforms.IndicateMissingValues(new[] {
+ new InputOutputColumnPair("NAA", "A"),
+ new InputOutputColumnPair("NAB", "B"),
+ new InputOutputColumnPair("NAC", "C"),
+ new InputOutputColumnPair("NAD", "D")
+ });
TestEstimatorCore(pipe, dataView);
Done();
}
@@ -67,7 +72,12 @@ public void TestOldSavingAndLoading()
};
var dataView = ML.Data.LoadFromEnumerable(data);
- var pipe = ML.Transforms.IndicateMissingValues(new ColumnOptions[] { ("NAA", "A"), ("NAB", "B"), ("NAC", "C"), ("NAD", "D") });
+ var pipe = ML.Transforms.IndicateMissingValues(new[] {
+ new InputOutputColumnPair("NAA", "A"),
+ new InputOutputColumnPair("NAB", "B"),
+ new InputOutputColumnPair("NAC", "C"),
+ new InputOutputColumnPair("NAD", "D")
+ });
var result = pipe.Fit(dataView).Transform(dataView);
var resultRoles = new RoleMappedData(result);
using (var ms = new MemoryStream())
@@ -92,10 +102,12 @@ public void NAIndicatorFileOutput()
var data = reader.Load(new MultiFileSource(dataPath)).AsDynamic;
var wrongCollection = new[] { new TestClass() { A = 1, B = 3, C = new float[2] { 1, 2 }, D = new double[2] { 3, 4 } } };
var invalidData = ML.Data.LoadFromEnumerable(wrongCollection);
- var est = ML.Transforms.IndicateMissingValues(new ColumnOptions[]
+ var est = ML.Transforms.IndicateMissingValues(new[]
{
- ("A", "ScalarFloat"), ("B", "ScalarDouble"),
- ("C", "VectorFloat"), ("D", "VectorDoulbe")
+ new InputOutputColumnPair("A", "ScalarFloat"),
+ new InputOutputColumnPair("B", "ScalarDouble"),
+ new InputOutputColumnPair("C", "VectorFloat"),
+ new InputOutputColumnPair("D", "VectorDoulbe")
});
TestEstimatorCore(est, data, invalidInput: invalidData);
@@ -125,7 +137,7 @@ public void NAIndicatorMetadataTest()
var dataView = ML.Data.LoadFromEnumerable(data);
var pipe = ML.Transforms.Categorical.OneHotEncoding("CatA", "A");
- var newpipe = pipe.Append(ML.Transforms.IndicateMissingValues(("NAA", "CatA")));
+ var newpipe = pipe.Append(ML.Transforms.IndicateMissingValues("NAA", "CatA"));
var result = newpipe.Fit(dataView).Transform(dataView);
Assert.True(result.Schema.TryGetColumnIndex("NAA", out var col));
// Check that the column is normalized.
diff --git a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs
index 54cb4da52a..5b0b01abc7 100644
--- a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs
@@ -225,7 +225,7 @@ public void SimpleConstructorsAndExtensions()
var est1 = new NormalizingEstimator(Env, "float4");
var est2 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.MinMax, ("float4", "float4"));
var est3 = new NormalizingEstimator(Env, new NormalizingEstimator.MinMaxColumnOptions("float4"));
- var est4 = ML.Transforms.Normalize(NormalizingEstimator.NormalizationMode.MinMax, ("float4", "float4"));
+ var est4 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.MinMax);
var est5 = ML.Transforms.Normalize("float4");
var data1 = est1.Fit(data).Transform(data);
@@ -246,7 +246,7 @@ public void SimpleConstructorsAndExtensions()
// Tests for MeanVariance
var est6 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.MeanVariance, ("float4", "float4"));
var est7 = new NormalizingEstimator(Env, new NormalizingEstimator.MeanVarianceColumnOptions("float4"));
- var est8 = ML.Transforms.Normalize(NormalizingEstimator.NormalizationMode.MeanVariance, ("float4", "float4"));
+ var est8 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.MeanVariance);
var data6 = est6.Fit(data).Transform(data);
var data7 = est7.Fit(data).Transform(data);
@@ -259,7 +259,7 @@ public void SimpleConstructorsAndExtensions()
// Tests for LogMeanVariance
var est9 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.LogMeanVariance, ("float4", "float4"));
var est10 = new NormalizingEstimator(Env, new NormalizingEstimator.LogMeanVarianceColumnOptions("float4"));
- var est11 = ML.Transforms.Normalize(NormalizingEstimator.NormalizationMode.LogMeanVariance, ("float4", "float4"));
+ var est11 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.LogMeanVariance);
var data9 = est9.Fit(data).Transform(data);
var data10 = est10.Fit(data).Transform(data);
@@ -272,7 +272,7 @@ public void SimpleConstructorsAndExtensions()
// Tests for Binning
var est12 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.Binning, ("float4", "float4"));
var est13 = new NormalizingEstimator(Env, new NormalizingEstimator.BinningColumnOptions("float4"));
- var est14 = ML.Transforms.Normalize(NormalizingEstimator.NormalizationMode.Binning, ("float4", "float4"));
+ var est14 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.Binning);
var data12 = est12.Fit(data).Transform(data);
var data13 = est13.Fit(data).Transform(data);
@@ -285,7 +285,7 @@ public void SimpleConstructorsAndExtensions()
// Tests for SupervisedBinning
var est15 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.SupervisedBinning, ("float4", "float4"));
var est16 = new NormalizingEstimator(Env, new NormalizingEstimator.SupervisedBinningColumOptions("float4"));
- var est17 = ML.Transforms.Normalize(NormalizingEstimator.NormalizationMode.SupervisedBinning, ("float4", "float4"));
+ var est17 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.SupervisedBinning);
var data15 = est15.Fit(data).Transform(data);
var data16 = est16.Fit(data).Transform(data);
@@ -314,11 +314,11 @@ public void NormalizerExperimentalExtensions()
var data = loader.Load(dataPath);
// Normalizer Extensions
- var est1 = ML.Transforms.Normalize(NormalizingEstimator.NormalizationMode.MinMax, ("float4", "float4"));
- var est2 = ML.Transforms.Normalize(NormalizingEstimator.NormalizationMode.MeanVariance, ("float4", "float4"));
- var est3 = ML.Transforms.Normalize(NormalizingEstimator.NormalizationMode.LogMeanVariance, ("float4", "float4"));
- var est4 = ML.Transforms.Normalize(NormalizingEstimator.NormalizationMode.Binning, ("float4", "float4"));
- var est5 = ML.Transforms.Normalize(NormalizingEstimator.NormalizationMode.SupervisedBinning, ("float4", "float4"));
+ var est1 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.MinMax);
+ var est2 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.MeanVariance);
+ var est3 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.LogMeanVariance);
+ var est4 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.Binning);
+ var est5 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.SupervisedBinning);
// Normalizer Extensions (Experimental)
var est6 = ML.Transforms.NormalizeMinMax("float4", "float4");
diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs
index f6de4553db..090ab7e9d4 100644
--- a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs
@@ -515,7 +515,7 @@ public void ValueMappingWorkout()
};
// Workout on value mapping
- var est = ML.Transforms.Conversion.MapValue(keyValuePairs, new ColumnOptions[] { ("D", "A"), ("E", "B"), ("F", "C") });
+ var est = ML.Transforms.Conversion.MapValue(keyValuePairs, new[] { new InputOutputColumnPair("D", "A"), new InputOutputColumnPair("E", "B"), new InputOutputColumnPair("F", "C") });
TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView);
}
@@ -534,7 +534,7 @@ public void ValueMappingValueTypeIsVectorWorkout()
};
// Workout on value mapping
- var est = ML.Transforms.Conversion.MapValue(keyValuePairs, new ColumnOptions[] { ("D", "A"), ("E", "B"), ("F", "C") });
+ var est = ML.Transforms.Conversion.MapValue(keyValuePairs, new[] { new InputOutputColumnPair("D", "A"), new InputOutputColumnPair("E", "B"), new InputOutputColumnPair("F", "C") });
TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView);
}
@@ -555,7 +555,7 @@ public void ValueMappingInputIsVectorWorkout()
};
var est = ML.Transforms.Text.TokenizeIntoWords("TokenizeB", "B")
- .Append(ML.Transforms.Conversion.MapValue(keyValuePairs, new ColumnOptions[] { ("VecB", "TokenizeB") }));
+ .Append(ML.Transforms.Conversion.MapValue("VecB", keyValuePairs, "TokenizeB"));
TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView);
}