Skip to content

Commit 1fe8d51

Browse files
committed
Address comments
1 parent aa0b9c2 commit 1fe8d51

File tree

11 files changed

+50
-51
lines changed

11 files changed

+50
-51
lines changed

src/Microsoft.ML.Data/Prediction/Calibrator.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
using Microsoft.ML.CommandLine;
1515
using Microsoft.ML.Data;
1616
using Microsoft.ML.EntryPoints;
17-
using Microsoft.ML.Internal.Internallearn;
1817
using Microsoft.ML.Internal.Utilities;
1918
using Microsoft.ML.Model;
2019
using Microsoft.ML.Model.OnnxConverter;

src/Microsoft.ML.FastTree/BoostingFastTree.cs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,12 @@ private protected override void CheckOptions(IChannel ch)
6363
private protected override TreeLearner ConstructTreeLearner(IChannel ch)
6464
{
6565
return new LeastSquaresRegressionTreeLearner(
66-
TrainSet, FastTreeTrainerOptions.NumberOfLeaves, FastTreeTrainerOptions.MinExampleCountPerLeaf, FastTreeTrainerOptions.EntropyCoefficient,
66+
TrainSet, FastTreeTrainerOptions.NumberOfLeaves, FastTreeTrainerOptions.MinimumExampleCountPerLeaf, FastTreeTrainerOptions.EntropyCoefficient,
6767
FastTreeTrainerOptions.FeatureFirstUsePenalty, FastTreeTrainerOptions.FeatureReusePenalty, FastTreeTrainerOptions.SoftmaxTemperature,
68-
FastTreeTrainerOptions.HistogramPoolSize, FastTreeTrainerOptions.RandomSeed, FastTreeTrainerOptions.FeatureFractionPerSplit, FastTreeTrainerOptions.FilterZeroLambdas,
69-
FastTreeTrainerOptions.AllowEmptyTrees, FastTreeTrainerOptions.GainConfidenceLevel, FastTreeTrainerOptions.MaxCategoricalGroupsPerNode,
70-
FastTreeTrainerOptions.MaxCategoricalSplitPoints, BsrMaxTreeOutput(), ParallelTraining,
71-
FastTreeTrainerOptions.MinExamplePercentageForCategoricalSplit, FastTreeTrainerOptions.Bundling, FastTreeTrainerOptions.MinExamplesForCategoricalSplit, FastTreeTrainerOptions.Bias);
68+
FastTreeTrainerOptions.HistogramPoolSize, FastTreeTrainerOptions.Seed, FastTreeTrainerOptions.FeatureFractionPerSplit, FastTreeTrainerOptions.FilterZeroLambdas,
69+
FastTreeTrainerOptions.AllowEmptyTrees, FastTreeTrainerOptions.GainConfidenceLevel, FastTreeTrainerOptions.MaximumCategoricalGroupsPerNode,
70+
FastTreeTrainerOptions.MaximumCategoricalSplitPoints, BsrMaxTreeOutput(), ParallelTraining,
71+
FastTreeTrainerOptions.MinimumExampleFractionForCategoricalSplit, FastTreeTrainerOptions.Bundling, FastTreeTrainerOptions.MinimumExamplesForCategoricalSplit, FastTreeTrainerOptions.Bias);
7272
}
7373

7474
private protected override OptimizationAlgorithm ConstructOptimizationAlgorithm(IChannel ch)
@@ -96,7 +96,7 @@ private protected override OptimizationAlgorithm ConstructOptimizationAlgorithm(
9696
optimizationAlgorithm.ObjectiveFunction = ConstructObjFunc(ch);
9797
optimizationAlgorithm.Smoothing = FastTreeTrainerOptions.Smoothing;
9898
optimizationAlgorithm.DropoutRate = FastTreeTrainerOptions.DropoutRate;
99-
optimizationAlgorithm.DropoutRng = new Random(FastTreeTrainerOptions.RandomSeed);
99+
optimizationAlgorithm.DropoutRng = new Random(FastTreeTrainerOptions.Seed);
100100
optimizationAlgorithm.PreScoreUpdateEvent += PrintTestGraph;
101101

102102
return optimizationAlgorithm;

src/Microsoft.ML.FastTree/FastTree.cs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ private protected FastTreeTrainerBase(IHostEnvironment env,
116116
// override with the directly provided values.
117117
FastTreeTrainerOptions.NumberOfLeaves = numLeaves;
118118
FastTreeTrainerOptions.NumberOfTrees = numTrees;
119-
FastTreeTrainerOptions.MinExampleCountPerLeaf = minDatapointsInLeaves;
119+
FastTreeTrainerOptions.MinimumExampleCountPerLeaf = minDatapointsInLeaves;
120120

121121
FastTreeTrainerOptions.LabelColumn = label.Name;
122122
FastTreeTrainerOptions.FeatureColumn = featureColumn;
@@ -199,7 +199,7 @@ private protected void ConvertData(RoleMappedData trainData)
199199
{
200200
AnnotationUtils.TryGetCategoricalFeatureIndices(trainData.Schema.Schema, trainData.Schema.Feature.Value.Index, out CategoricalFeatures);
201201
var useTranspose = UseTranspose(FastTreeTrainerOptions.DiskTranspose, trainData) && (ValidData == null || UseTranspose(FastTreeTrainerOptions.DiskTranspose, ValidData));
202-
var instanceConverter = new ExamplesToFastTreeBins(Host, FastTreeTrainerOptions.MaxBinCountPerFeature, useTranspose, !FastTreeTrainerOptions.FeatureFlocks, FastTreeTrainerOptions.MinExampleCountPerLeaf, GetMaxLabel());
202+
var instanceConverter = new ExamplesToFastTreeBins(Host, FastTreeTrainerOptions.MaximumBinCountPerFeature, useTranspose, !FastTreeTrainerOptions.FeatureFlocks, FastTreeTrainerOptions.MinimumExampleCountPerLeaf, GetMaxLabel());
203203

204204
TrainSet = instanceConverter.FindBinsAndReturnDataset(trainData, PredictionKind, ParallelTraining, CategoricalFeatures, FastTreeTrainerOptions.CategoricalSplit);
205205
FeatureMap = instanceConverter.FeatureMap;
@@ -237,7 +237,7 @@ private protected void TrainCore(IChannel ch)
237237
}
238238
using (Timer.Time(TimerEvent.TotalTrain))
239239
Train(ch);
240-
if (FastTreeTrainerOptions.ExecutionTimes)
240+
if (FastTreeTrainerOptions.ExecutionTime)
241241
PrintExecutionTimes(ch);
242242
TrainedEnsemble = Ensemble;
243243
if (FeatureMap != null)
@@ -424,7 +424,7 @@ private protected bool[] GetActiveFeatures()
424424
if (FastTreeTrainerOptions.FeatureFraction < 1.0)
425425
{
426426
if (_featureSelectionRandom == null)
427-
_featureSelectionRandom = new Random(FastTreeTrainerOptions.FeatureSelectionRandomSeed);
427+
_featureSelectionRandom = new Random(FastTreeTrainerOptions.FeatureSelectionSeed);
428428

429429
for (int i = 0; i < TrainSet.NumFeatures; ++i)
430430
{
@@ -594,7 +594,7 @@ private void GenerateActiveFeatureLists(int numberOfItems)
594594
private protected virtual BaggingProvider CreateBaggingProvider()
595595
{
596596
Contracts.Assert(FastTreeTrainerOptions.BaggingSize > 0);
597-
return new BaggingProvider(TrainSet, FastTreeTrainerOptions.NumberOfLeaves, FastTreeTrainerOptions.RandomSeed, FastTreeTrainerOptions.BaggingExampleFraction);
597+
return new BaggingProvider(TrainSet, FastTreeTrainerOptions.NumberOfLeaves, FastTreeTrainerOptions.Seed, FastTreeTrainerOptions.BaggingExampleFraction);
598598
}
599599

600600
private protected virtual bool ShouldRandomStartOptimizer()
@@ -625,7 +625,7 @@ private protected virtual void Train(IChannel ch)
625625
if (Ensemble.NumTrees < numTotalTrees && ShouldRandomStartOptimizer())
626626
{
627627
ch.Info("Randomizing start point");
628-
OptimizationAlgorithm.TrainingScores.RandomizeScores(FastTreeTrainerOptions.RandomSeed, false);
628+
OptimizationAlgorithm.TrainingScores.RandomizeScores(FastTreeTrainerOptions.Seed, false);
629629
revertRandomStart = true;
630630
}
631631

@@ -712,7 +712,7 @@ private protected virtual void Train(IChannel ch)
712712
{
713713
revertRandomStart = false;
714714
ch.Info("Reverting random score assignment");
715-
OptimizationAlgorithm.TrainingScores.RandomizeScores(FastTreeTrainerOptions.RandomSeed, true);
715+
OptimizationAlgorithm.TrainingScores.RandomizeScores(FastTreeTrainerOptions.Seed, true);
716716
}
717717

718718
#if !NO_STORE

src/Microsoft.ML.FastTree/FastTreeArguments.cs

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -175,15 +175,15 @@ public abstract class TreeOptions : LearnerInputBaseWithGroupId
175175
/// The seed of the random number generator.
176176
/// </summary>
177177
[Argument(ArgumentType.LastOccurenceWins, HelpText = "The seed of the random number generator", ShortName = "r1,RngSeed")]
178-
public int RandomSeed = 123;
178+
public int Seed = 123;
179179

180180
// this random seed is only for active feature selection
181181
/// <summary>
182182
/// The seed of the active feature selection.
183183
/// </summary>
184184
[Argument(ArgumentType.LastOccurenceWins, HelpText = "The seed of the active feature selection", ShortName = "r3,FeatureSelectSeed", Hide = true)]
185185
[TGUI(NotGui = true)]
186-
public int FeatureSelectionRandomSeed = 123;
186+
public int FeatureSelectionSeed = 123;
187187

188188
/// <summary>
189189
/// The entropy (regularization) coefficient between 0 and 1.
@@ -221,26 +221,26 @@ public abstract class TreeOptions : LearnerInputBaseWithGroupId
221221
/// </summary>
222222
[Argument(ArgumentType.LastOccurenceWins, HelpText = "Maximum categorical split groups to consider when splitting on a categorical feature. " +
223223
"Split groups are a collection of split points. This is used to reduce overfitting when " +
224-
"there many categorical features.", ShortName = "mcg")]
225-
public int MaxCategoricalGroupsPerNode = 64;
224+
"there many categorical features.", ShortName = "mcg,MaxCategoricalGroupsPerNode")]
225+
public int MaximumCategoricalGroupsPerNode = 64;
226226

227227
/// <summary>
228228
/// Maximum categorical split points to consider when splitting on a categorical feature.
229229
/// </summary>
230-
[Argument(ArgumentType.LastOccurenceWins, HelpText = "Maximum categorical split points to consider when splitting on a categorical feature.", ShortName = "maxcat")]
231-
public int MaxCategoricalSplitPoints = 64;
230+
[Argument(ArgumentType.LastOccurenceWins, HelpText = "Maximum categorical split points to consider when splitting on a categorical feature.", ShortName = "maxcat,MaxCategoricalSplitPoints")]
231+
public int MaximumCategoricalSplitPoints = 64;
232232

233233
/// <summary>
234-
/// Minimum categorical example percentage in a bin to consider for a split.
234+
/// Minimum categorical example percentage in a bin to consider for a split. Default is 0.1% of all training examples.
235235
/// </summary>
236236
[Argument(ArgumentType.LastOccurenceWins, HelpText = "Minimum categorical docs percentage in a bin to consider for a split.", ShortName = "mdop,MinDocsPercentageForCategoricalSplit")]
237-
public double MinExamplePercentageForCategoricalSplit = 0.001;
237+
public double MinimumExampleFractionForCategoricalSplit = 0.001;
238238

239239
/// <summary>
240240
/// Minimum categorical example count in a bin to consider for a split.
241241
/// </summary>
242242
[Argument(ArgumentType.LastOccurenceWins, HelpText = "Minimum categorical doc count in a bin to consider for a split.", ShortName = "mdo,MinDocsForCategoricalSplit")]
243-
public int MinExamplesForCategoricalSplit = 100;
243+
public int MinimumExamplesForCategoricalSplit = 100;
244244

245245
/// <summary>
246246
/// Bias for calculating gradient for each feature bin for a categorical feature.
@@ -263,7 +263,7 @@ public abstract class TreeOptions : LearnerInputBaseWithGroupId
263263
/// Maximum number of distinct values (bins) per feature.
264264
/// </summary>
265265
[Argument(ArgumentType.LastOccurenceWins, HelpText = "Maximum number of distinct values (bins) per feature", ShortName = "mb,MaxBins")]
266-
public int MaxBinCountPerFeature = 255; // save one for undefs
266+
public int MaximumBinCountPerFeature = 255; // save one for undefs
267267

268268
/// <summary>
269269
/// Sparsity level needed to use sparse feature representation.
@@ -300,8 +300,8 @@ public abstract class TreeOptions : LearnerInputBaseWithGroupId
300300
/// <summary>
301301
/// Print execution time breakdown to stdout.
302302
/// </summary>
303-
[Argument(ArgumentType.AtMostOnce, HelpText = "Print execution time breakdown to stdout", ShortName = "et")]
304-
public bool ExecutionTimes;
303+
[Argument(ArgumentType.AtMostOnce, HelpText = "Print execution time breakdown to stdout", ShortName = "et,ExecutionTimes")]
304+
public bool ExecutionTime;
305305

306306
// REVIEW: Different from original FastRank arguments (shortname l vs. nl). Different default from TLC FR Wrapper (20 vs. 20).
307307
/// <summary>
@@ -320,7 +320,7 @@ public abstract class TreeOptions : LearnerInputBaseWithGroupId
320320
[Argument(ArgumentType.LastOccurenceWins, HelpText = "The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data", ShortName = "mil,MinDocumentsInLeafs", SortOrder = 3)]
321321
[TGUI(Description = "Minimum number of training instances required to form a leaf", SuggestedSweeps = "1,10,50")]
322322
[TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })]
323-
public int MinExampleCountPerLeaf = Defaults.MinimumExampleCountPerLeaf;
323+
public int MinimumExampleCountPerLeaf = Defaults.MinimumExampleCountPerLeaf;
324324

325325
/// <summary>
326326
/// Total number of decision trees to create in the ensemble.
@@ -332,7 +332,7 @@ public abstract class TreeOptions : LearnerInputBaseWithGroupId
332332
public int NumberOfTrees = Defaults.NumberOfTrees;
333333

334334
/// <summary>
335-
/// The fraction of features (chosen randomly) to use on each iteration.
335+
/// The fraction of features (chosen randomly) to use on each iteration. Use 0.9 if only 90% of features is needed.
336336
/// </summary>
337337
[Argument(ArgumentType.AtMostOnce, HelpText = "The fraction of features (chosen randomly) to use on each iteration", ShortName = "ff")]
338338
public Double FeatureFraction = 1;
@@ -344,7 +344,7 @@ public abstract class TreeOptions : LearnerInputBaseWithGroupId
344344
public int BaggingSize;
345345

346346
/// <summary>
347-
/// Percentage of training examples used in each bag.
347+
/// Percentage of training examples used in each bag. Default is 0.7 (70%).
348348
/// </summary>
349349
[Argument(ArgumentType.AtMostOnce, HelpText = "Percentage of training examples used in each bag", ShortName = "bagfrac,BaggingTrainFraction")]
350350
// REVIEW: sweeping bagfrac doesn't make sense unless 'baggingSize' is non-zero. The 'SuggestedSweeps' here
@@ -354,13 +354,13 @@ public abstract class TreeOptions : LearnerInputBaseWithGroupId
354354
public Double BaggingExampleFraction = 0.7;
355355

356356
/// <summary>
357-
/// The fraction of features (chosen randomly) to use on each split.
357+
/// The fraction of features (chosen randomly) to use on each split. If it's value is 0.9, 90% of all features would be dropped in expectation.
358358
/// </summary>
359359
[Argument(ArgumentType.AtMostOnce, HelpText = "The fraction of features (chosen randomly) to use on each split", ShortName = "sf,SplitFraction")]
360360
public Double FeatureFractionPerSplit = 1;
361361

362362
/// <summary>
363-
/// Smoothing paramter for tree regularization.
363+
/// Smoothing parameter for tree regularization.
364364
/// </summary>
365365
[Argument(ArgumentType.AtMostOnce, HelpText = "Smoothing paramter for tree regularization", ShortName = "s")]
366366
public Double Smoothing;
@@ -392,7 +392,7 @@ public abstract class TreeOptions : LearnerInputBaseWithGroupId
392392
// REVIEW: Not used.
393393
[Argument(ArgumentType.AtMostOnce, HelpText = "Maximum Number of trees after compression", ShortName = "cmpmax,MaxTreesAfterCompression", Hide = true)]
394394
[TGUI(NotGui = true)]
395-
public int MaxTreeCountAfterCompression = -1;
395+
public int MaximumTreeCountAfterCompression = -1;
396396

397397
/// <summary>
398398
/// Print metrics graph for the first test set.
@@ -425,18 +425,18 @@ internal virtual void Check(IExceptionContext ectx)
425425
ectx.CheckUserArg(0 <= FeatureFraction && FeatureFraction <= 1, nameof(FeatureFraction), "Must be between 0 and 1.");
426426
ectx.CheckUserArg(0 <= FeatureFractionPerSplit && FeatureFractionPerSplit <= 1, nameof(FeatureFractionPerSplit), "Must be between 0 and 1.");
427427
ectx.CheckUserArg(0 <= SoftmaxTemperature, nameof(SoftmaxTemperature), "Must be non-negative.");
428-
ectx.CheckUserArg(0 < MaxBinCountPerFeature, nameof(MaxBinCountPerFeature), "Must greater than 0.");
428+
ectx.CheckUserArg(0 < MaximumBinCountPerFeature, nameof(MaximumBinCountPerFeature), "Must greater than 0.");
429429
ectx.CheckUserArg(0 <= SparsifyThreshold && SparsifyThreshold <= 1, nameof(SparsifyThreshold), "Must be between 0 and 1.");
430430
ectx.CheckUserArg(0 < NumberOfTrees, nameof(NumberOfTrees), "Must be positive.");
431431
ectx.CheckUserArg(0 <= Smoothing && Smoothing <= 1, nameof(Smoothing), "Must be between 0 and 1.");
432432
ectx.CheckUserArg(0 <= BaggingSize, nameof(BaggingSize), "Must be non-negative.");
433433
ectx.CheckUserArg(0 <= BaggingExampleFraction && BaggingExampleFraction <= 1, nameof(BaggingExampleFraction), "Must be between 0 and 1.");
434434
ectx.CheckUserArg(0 <= FeatureFirstUsePenalty, nameof(FeatureFirstUsePenalty), "Must be non-negative.");
435435
ectx.CheckUserArg(0 <= FeatureReusePenalty, nameof(FeatureReusePenalty), "Must be non-negative.");
436-
ectx.CheckUserArg(0 <= MaxCategoricalGroupsPerNode, nameof(MaxCategoricalGroupsPerNode), "Must be non-negative.");
437-
ectx.CheckUserArg(0 <= MaxCategoricalSplitPoints, nameof(MaxCategoricalSplitPoints), "Must be non-negative.");
438-
ectx.CheckUserArg(0 <= MinExamplePercentageForCategoricalSplit, nameof(MinExamplePercentageForCategoricalSplit), "Must be non-negative.");
439-
ectx.CheckUserArg(0 <= MinExamplesForCategoricalSplit, nameof(MinExamplesForCategoricalSplit), "Must be non-negative.");
436+
ectx.CheckUserArg(0 <= MaximumCategoricalGroupsPerNode, nameof(MaximumCategoricalGroupsPerNode), "Must be non-negative.");
437+
ectx.CheckUserArg(0 <= MaximumCategoricalSplitPoints, nameof(MaximumCategoricalSplitPoints), "Must be non-negative.");
438+
ectx.CheckUserArg(0 <= MinimumExampleFractionForCategoricalSplit, nameof(MinimumExampleFractionForCategoricalSplit), "Must be non-negative.");
439+
ectx.CheckUserArg(0 <= MinimumExamplesForCategoricalSplit, nameof(MinimumExamplesForCategoricalSplit), "Must be non-negative.");
440440
ectx.CheckUserArg(Bundle.None <= Bundling && Bundling <= Bundle.Adjacent, nameof(Bundling), "Must be between 0 and 2.");
441441
ectx.CheckUserArg(Bias >= 0, nameof(Bias), "Must be greater than equal to zero.");
442442
}

src/Microsoft.ML.FastTree/FastTreeClassification.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ private protected override ObjectiveFunctionBase ConstructObjFunc(IChannel ch)
197197
FastTreeTrainerOptions.MaximumTreeOutput,
198198
FastTreeTrainerOptions.GetDerivativesSampleRate,
199199
FastTreeTrainerOptions.BestStepRankingRegressionTrees,
200-
FastTreeTrainerOptions.RandomSeed,
200+
FastTreeTrainerOptions.Seed,
201201
ParallelTraining);
202202
}
203203

src/Microsoft.ML.FastTree/FastTreeRanking.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ private protected override void Initialize(IChannel ch)
177177
if (FastTreeTrainerOptions.CompressEnsemble)
178178
{
179179
_ensembleCompressor = new LassoBasedEnsembleCompressor();
180-
_ensembleCompressor.Initialize(FastTreeTrainerOptions.NumberOfTrees, TrainSet, TrainSet.Ratings, FastTreeTrainerOptions.RandomSeed);
180+
_ensembleCompressor.Initialize(FastTreeTrainerOptions.NumberOfTrees, TrainSet, TrainSet.Ratings, FastTreeTrainerOptions.Seed);
181181
}
182182
}
183183

@@ -200,7 +200,7 @@ private protected override OptimizationAlgorithm ConstructOptimizationAlgorithm(
200200
private protected override BaggingProvider CreateBaggingProvider()
201201
{
202202
Host.Assert(FastTreeTrainerOptions.BaggingSize > 0);
203-
return new RankingBaggingProvider(TrainSet, FastTreeTrainerOptions.NumberOfLeaves, FastTreeTrainerOptions.RandomSeed, FastTreeTrainerOptions.BaggingExampleFraction);
203+
return new RankingBaggingProvider(TrainSet, FastTreeTrainerOptions.NumberOfLeaves, FastTreeTrainerOptions.Seed, FastTreeTrainerOptions.BaggingExampleFraction);
204204
}
205205

206206
private protected override void PrepareLabels(IChannel ch)
@@ -556,7 +556,7 @@ public LambdaRankObjectiveFunction(Dataset trainset, short[] labels, Options opt
556556
options.MaximumTreeOutput,
557557
options.GetDerivativesSampleRate,
558558
options.BestStepRankingRegressionTrees,
559-
options.RandomSeed)
559+
options.Seed)
560560
{
561561

562562
_labels = labels;

0 commit comments

Comments
 (0)