diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs
index 113b4794fb..de98b1ddb0 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs
@@ -55,7 +55,7 @@ public static void Example()
// specify the parameter `numBins', which controls the number of bins used in the approximation of the mutual information
// between features and label.
var mutualInfoEst = ml.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation(
- outputColumnName: "FeaturesMISelect", inputColumnName: "FeaturesCountSelect", labelColumn: "Label", slotsInOutput: 5);
+ outputColumnName: "FeaturesMISelect", inputColumnName: "FeaturesCountSelect", labelColumnName: "Label", slotsInOutput: 5);
// Now, we can put the previous two transformations together in a pipeline.
var pipeline = countSelectEst.Append(mutualInfoEst);
diff --git a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs
index 4e719a4cd9..7d16a7936e 100644
--- a/src/Microsoft.ML.Transforms/CountFeatureSelection.cs
+++ b/src/Microsoft.ML.Transforms/CountFeatureSelection.cs
@@ -10,7 +10,6 @@
using Microsoft.ML;
using Microsoft.ML.CommandLine;
using Microsoft.ML.Data;
-using Microsoft.ML.EntryPoints;
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.Transforms.FeatureSelection;
@@ -54,23 +53,25 @@ public sealed class ColumnOptions
public readonly string Name;
/// Name of the column to transform.
public readonly string InputColumnName;
- /// If the count of non-default values for a slot is greater than or equal to this threshold in the training data, the slot is preserved.
- public readonly long MinCount;
+ /// If the count of non-default values for a slot is greater than or equal to this threshold in the training data, the slot is preserved.
+ public readonly long Count;
///
/// Describes the parameters of the feature selection process for a column pair.
///
/// Name of the column resulting from the transformation of .
/// Name of the column to transform. If set to , the value of the will be used as source.
- /// If the count of non-default values for a slot is greater than or equal to this threshold in the training data, the slot is preserved.
- public ColumnOptions(string name, string inputColumnName = null, long minCount = Defaults.Count)
+ /// If the count of non-default values for a slot is greater than or equal to this threshold in the training data, the slot is preserved.
+
+ public ColumnOptions(string name, string inputColumnName = null, long count = Defaults.Count)
{
Name = name;
Contracts.CheckValue(Name, nameof(Name));
InputColumnName = inputColumnName ?? name;
Contracts.CheckValue(InputColumnName, nameof(InputColumnName));
- MinCount = minCount;
+ Contracts.CheckParam(count >= 0, nameof(count), "Must be non-negative.");
+ Count = count;
}
}
@@ -183,7 +184,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa
host.CheckUserArg(Utils.Size(options.Columns) > 0, nameof(options.Columns));
host.CheckUserArg(options.Count > 0, nameof(options.Count));
- var columnOptions = options.Columns.Select(inColName => new ColumnOptions(inColName, minCount: options.Count)).ToArray();
+ var columnOptions = options.Columns.Select(inColName => new ColumnOptions(inColName, count: options.Count)).ToArray();
return new CountFeatureSelectingEstimator(env, columnOptions).Fit(input).Transform(input) as IDataTransform;
}
@@ -206,11 +207,11 @@ private static void CreateDropAndCopyColumns(ColumnOptions[] columnOptions, int
selectedCount[i] = 0;
for (int j = 0; j < score.Length; j++)
{
- if (score[j] < columnOptions[i].MinCount)
+ if (score[j] < columnOptions[i].Count)
{
// Adjacent slots are combined into a single range.
int min = j;
- while (j < score.Length && score[j] < columnOptions[i].MinCount)
+ while (j < score.Length && score[j] < columnOptions[i].Count)
j++;
int max = j - 1;
slots.Add((min, max));
diff --git a/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs b/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs
index 5217043a50..32a977be53 100644
--- a/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs
+++ b/src/Microsoft.ML.Transforms/FeatureSelectionCatalog.cs
@@ -14,9 +14,9 @@ public static class FeatureSelectionCatalog
{
///
/// The transform's catalog.
- /// Name of the column to use for labels.
+ /// The name of the label column.
/// The maximum number of slots to preserve in the output. The number of slots to preserve is taken across all input columns.
- /// Max number of bins used to approximate mutual information between each input column and the label column. Power of 2 recommended.
+ /// Max number of bins used to approximate mutual information between each input column and the label column. Power of 2 recommended.
/// Specifies the names of the input columns for the transformation, and their respective output column names.
///
///
@@ -26,20 +26,20 @@ public static class FeatureSelectionCatalog
///
///
public static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMutualInformation(this TransformsCatalog.FeatureSelectionTransforms catalog,
- string labelColumn = MutualInfoSelectDefaults.LabelColumn,
+ string labelColumnName = MutualInfoSelectDefaults.LabelColumn,
int slotsInOutput = MutualInfoSelectDefaults.SlotsInOutput,
- int numBins = MutualInfoSelectDefaults.NumBins,
+ int numberOfBins = MutualInfoSelectDefaults.NumBins,
params ColumnOptions[] columns)
- => new MutualInformationFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), labelColumn, slotsInOutput, numBins,
+ => new MutualInformationFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), labelColumnName, slotsInOutput, numberOfBins,
ColumnOptions.ConvertToValueTuples(columns));
///
/// The transform's catalog.
/// Name of the column resulting from the transformation of .
/// Name of column to transform. If set to , the value of the will be used as source.
- /// Name of the column to use for labels.
+ /// The name of the label column.
/// The maximum number of slots to preserve in the output. The number of slots to preserve is taken across all input columns.
- /// Max number of bins used to approximate mutual information between each input column and the label column. Power of 2 recommended.
+ /// Max number of bins used to approximate mutual information between each input column and the label column. Power of 2 recommended.
///
///
///
public static MutualInformationFeatureSelectingEstimator SelectFeaturesBasedOnMutualInformation(this TransformsCatalog.FeatureSelectionTransforms catalog,
string outputColumnName, string inputColumnName = null,
- string labelColumn = MutualInfoSelectDefaults.LabelColumn,
+ string labelColumnName = MutualInfoSelectDefaults.LabelColumn,
int slotsInOutput = MutualInfoSelectDefaults.SlotsInOutput,
- int numBins = MutualInfoSelectDefaults.NumBins)
- => new MutualInformationFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, labelColumn, slotsInOutput, numBins);
+ int numberOfBins = MutualInfoSelectDefaults.NumBins)
+ => new MutualInformationFeatureSelectingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, labelColumnName, slotsInOutput, numberOfBins);
///
/// The transform's catalog.
diff --git a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs
index 4c0066e1b9..05d67116a2 100644
--- a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs
+++ b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs
@@ -66,7 +66,7 @@ internal sealed class Options : TransformInputBase
/// The environment to use.
/// Name of the column to use for labels.
/// The maximum number of slots to preserve in the output. The number of slots to preserve is taken across all input columns.
- /// Max number of bins used to approximate mutual information between each input column and the label column. Power of 2 recommended.
+ /// Max number of bins used to approximate mutual information between each input column and the label column. Power of 2 recommended.
/// Specifies the names of the input columns for the transformation, and their respective output column names.
///
///
@@ -78,7 +78,7 @@ internal sealed class Options : TransformInputBase
internal MutualInformationFeatureSelectingEstimator(IHostEnvironment env,
string labelColumn = Defaults.LabelColumn,
int slotsInOutput = Defaults.SlotsInOutput,
- int numBins = Defaults.NumBins,
+ int numberOfBins = Defaults.NumBins,
params (string outputColumnName, string inputColumnName)[] columns)
{
Contracts.CheckValue(env, nameof(env));
@@ -87,12 +87,12 @@ internal MutualInformationFeatureSelectingEstimator(IHostEnvironment env,
_host.CheckUserArg(Utils.Size(columns) > 0, nameof(columns));
_host.CheckUserArg(slotsInOutput > 0, nameof(slotsInOutput));
_host.CheckNonWhiteSpace(labelColumn, nameof(labelColumn));
- _host.Check(numBins > 1, "numBins must be greater than 1.");
+ _host.Check(numberOfBins > 1, "numBins must be greater than 1.");
_columns = columns;
_labelColumn = labelColumn;
_slotsInOutput = slotsInOutput;
- _numBins = numBins;
+ _numBins = numberOfBins;
}
///
diff --git a/test/Microsoft.ML.Tests/Transformers/FeatureSelectionTests.cs b/test/Microsoft.ML.Tests/Transformers/FeatureSelectionTests.cs
index f5c143d793..55d802bc83 100644
--- a/test/Microsoft.ML.Tests/Transformers/FeatureSelectionTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/FeatureSelectionTests.cs
@@ -42,7 +42,7 @@ public void FeatureSelectionWorkout()
var est = new WordBagEstimator(ML, "bag_of_words", "text")
.AppendCacheCheckpoint(ML)
.Append(ML.Transforms.FeatureSelection.SelectFeaturesBasedOnCount("bag_of_words_count", "bag_of_words", 10)
- .Append(ML.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation("bag_of_words_mi", "bag_of_words", labelColumn: "label")));
+ .Append(ML.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation("bag_of_words_mi", "bag_of_words", labelColumnName: "label")));
var outputPath = GetOutputPath("FeatureSelection", "featureselection.tsv");
using (var ch = Env.Start("save"))
@@ -115,11 +115,11 @@ public void CountFeatureSelectionWorkout()
var data = ML.Data.Cache(reader.Load(new MultiFileSource(dataPath)).AsDynamic);
var columns = new[] {
- new CountFeatureSelectingEstimator.ColumnOptions("FeatureSelectDouble", "VectorDouble", minCount: 1),
- new CountFeatureSelectingEstimator.ColumnOptions("ScalFeatureSelectMissing690", "ScalarFloat", minCount: 690),
- new CountFeatureSelectingEstimator.ColumnOptions("ScalFeatureSelectMissing100", "ScalarFloat", minCount: 100),
- new CountFeatureSelectingEstimator.ColumnOptions("VecFeatureSelectMissing690", "VectorDouble", minCount: 690),
- new CountFeatureSelectingEstimator.ColumnOptions("VecFeatureSelectMissing100", "VectorDouble", minCount: 100)
+ new CountFeatureSelectingEstimator.ColumnOptions("FeatureSelectDouble", "VectorDouble", count: 1),
+ new CountFeatureSelectingEstimator.ColumnOptions("ScalFeatureSelectMissing690", "ScalarFloat", count: 690),
+ new CountFeatureSelectingEstimator.ColumnOptions("ScalFeatureSelectMissing100", "ScalarFloat", count: 100),
+ new CountFeatureSelectingEstimator.ColumnOptions("VecFeatureSelectMissing690", "VectorDouble", count: 690),
+ new CountFeatureSelectingEstimator.ColumnOptions("VecFeatureSelectMissing100", "VectorDouble", count: 100)
};
var est = ML.Transforms.FeatureSelection.SelectFeaturesBasedOnCount("FeatureSelect", "VectorFloat", count: 1)
.Append(ML.Transforms.FeatureSelection.SelectFeaturesBasedOnCount(columns));
@@ -182,8 +182,8 @@ public void MutualInformationSelectionWorkout()
var data = reader.Load(new MultiFileSource(dataPath)).AsDynamic;
- var est = ML.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation("FeatureSelect", "VectorFloat", slotsInOutput: 1, labelColumn: "Label")
- .Append(ML.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation(labelColumn: "Label", slotsInOutput: 2, numBins: 100,
+ var est = ML.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation("FeatureSelect", "VectorFloat", slotsInOutput: 1, labelColumnName: "Label")
+ .Append(ML.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation(labelColumnName: "Label", slotsInOutput: 2, numberOfBins: 100,
columns: new ColumnOptions[] {
("out1", "VectorFloat"),
("out2", "VectorDouble")
@@ -220,7 +220,7 @@ public void TestMutualInformationOldSavingAndLoading()
var dataView = reader.Load(new MultiFileSource(dataPath)).AsDynamic;
- var pipe = ML.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation("FeatureSelect", "VectorFloat", slotsInOutput: 1, labelColumn: "Label");
+ var pipe = ML.Transforms.FeatureSelection.SelectFeaturesBasedOnMutualInformation("FeatureSelect", "VectorFloat", slotsInOutput: 1, labelColumnName: "Label");
var result = pipe.Fit(dataView).Transform(dataView);
var resultRoles = new RoleMappedData(result);