Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,19 @@ private static TMetrics GetAverageMetrics(IEnumerable<TMetrics> metrics, TMetric

private static double[] GetAverageOfNonNaNScoresInNestedEnumerable(IEnumerable<IEnumerable<double>> results)
{
if (results.All(result => result == null))
{
// If all nested enumerables are null, we say the average is a null enumerable as well.
// This is expected to happen on Multiclass metrics where the TopKAccuracyForAllK
// array can be null if the topKPredictionCount isn't a valid number.
// In that case all of the "results" enumerables will be null anyway, and so
// returning null is the expected solution.
return null;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd recommend taking the average of the non-null elements. In the TopKAccuracyForAllK case, since all are expected to be null, we would check for all values being null, and return null.

That would be a modification of GetAverageOfNonNaNScores() below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, by only modifying GetAverageOfNonNaNScoresInNestedEnumerable


// In case there are only some null elements, we'll ignore them:
results = results.Where(result => result != null);

double[] arr = new double[results.ElementAt(0).Count()];
for (int i = 0; i < arr.Length; i++)
{
Expand Down
45 changes: 37 additions & 8 deletions test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,48 @@ public void AutoFitBinaryTest()
Assert.NotNull(result.BestRun.TrainerName);
}

[Fact]
public void AutoFitMultiTest()
[Theory]
[InlineData(true)]
[InlineData(false)]
public void AutoFitMultiTest(bool useNumberOfCVFolds)
{
var context = new MLContext(0);
var columnInference = context.Auto().InferColumns(DatasetUtil.TrivialMulticlassDatasetPath, DatasetUtil.TrivialMulticlassDatasetLabel);
var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions);
var trainData = textLoader.Load(DatasetUtil.TrivialMulticlassDatasetPath);
var result = context.Auto()
.CreateMulticlassClassificationExperiment(0)
.Execute(trainData, 5, DatasetUtil.TrivialMulticlassDatasetLabel);
Assert.True(result.BestRun.Results.First().ValidationMetrics.MicroAccuracy >= 0.7);
var scoredData = result.BestRun.Results.First().Model.Transform(trainData);
Assert.Equal(NumberDataViewType.Single, scoredData.Schema[DefaultColumnNames.PredictedLabel].Type);

if (useNumberOfCVFolds)
{
// When setting numberOfCVFolds
// The results object is a CrossValidationExperimentResults<> object
uint numberOfCVFolds = 5;
var result = context.Auto()
.CreateMulticlassClassificationExperiment(0)
.Execute(trainData, numberOfCVFolds, DatasetUtil.TrivialMulticlassDatasetLabel);

Assert.True(result.BestRun.Results.First().ValidationMetrics.MicroAccuracy >= 0.7);
var scoredData = result.BestRun.Results.First().Model.Transform(trainData);
Comment on lines +64 to +65
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So here it's enough to check the accuracy of only the first CrossValidationExperimentResult?

Copy link
Contributor Author

@antoniovs1029 antoniovs1029 Dec 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before my modifications, the test simply checked the accuracy of the first result, so I've pretty much left that test untouched. I've just added another test inside this one, to reuse code.

Assert.Equal(NumberDataViewType.Single, scoredData.Schema[DefaultColumnNames.PredictedLabel].Type);
}
else
{
// When using this other API, if the trainset is under the
// crossValRowCounThreshold, AutoML will also perform CrossValidation
// but through a very different path that the one above,
// throw a CrossValSummaryRunner and will return
// a different type of object as "result" which would now be
// simply a ExperimentResult<> object

int crossValRowCountThreshold = 15000;
trainData = context.Data.TakeRows(trainData, crossValRowCountThreshold - 1);
var result = context.Auto()
.CreateMulticlassClassificationExperiment(0)
.Execute(trainData, DatasetUtil.TrivialMulticlassDatasetLabel);

Assert.True(result.BestRun.ValidationMetrics.MicroAccuracy >= 0.7);
var scoredData = result.BestRun.Model.Transform(trainData);
Assert.Equal(NumberDataViewType.Single, scoredData.Schema[DefaultColumnNames.PredictedLabel].Type);
}
}

[TensorFlowFact]
Expand Down