From 2e4d4b071b82028230566e510d0c8eb04ceb7f40 Mon Sep 17 00:00:00 2001
From: Rogan Carr <rocarr@microsoft.com>
Date: Tue, 19 Feb 2019 16:55:06 -0800
Subject: [PATCH 1/9] Adding functional tests for all training and evaluation
 tasks

---
 test/Microsoft.ML.Functional.Tests/Common.cs  |  71 +++++
 .../Datasets/Iris.cs                          |  82 ++++++
 .../Datasets/MnistOneClass.cs                 |  24 ++
 .../Datasets/Sentiment.cs                     |  24 ++
 .../Datasets/TrivialMatrixFactorization.cs    |  45 +++
 .../Evaluation.cs                             | 278 ++++++++++++++++++
 .../Prediction.cs                             |   3 +-
 .../Validation.cs                             |   6 +-
 test/Microsoft.ML.TestFramework/Datasets.cs   |  12 +
 .../Scenarios/Api/Estimators/Evaluation.cs    |  39 ---
 10 files changed, 542 insertions(+), 42 deletions(-)
 create mode 100644 test/Microsoft.ML.Functional.Tests/Datasets/Iris.cs
 create mode 100644 test/Microsoft.ML.Functional.Tests/Datasets/MnistOneClass.cs
 create mode 100644 test/Microsoft.ML.Functional.Tests/Datasets/Sentiment.cs
 create mode 100644 test/Microsoft.ML.Functional.Tests/Datasets/TrivialMatrixFactorization.cs
 create mode 100644 test/Microsoft.ML.Functional.Tests/Evaluation.cs
 delete mode 100644 test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs
diff --git a/test/Microsoft.ML.Functional.Tests/Common.cs b/test/Microsoft.ML.Functional.Tests/Common.cs
index 5c80ab21a7..b097d7ddf1 100644
--- a/test/Microsoft.ML.Functional.Tests/Common.cs
+++ b/test/Microsoft.ML.Functional.Tests/Common.cs
@@ -7,6 +7,7 @@
 using System.Linq;
 using Microsoft.Data.DataView;
 using Microsoft.ML.Data;
+using Microsoft.ML.Data.Evaluators.Metrics;
 using Microsoft.ML.Functional.Tests.Datasets;
 using Xunit;
 
@@ -160,6 +161,76 @@ public static void AssertEqual(TypeTestData testType1, TypeTestData testType2)
             Assert.True(testType1.Ug.Equals(testType2.Ug));
         }
 
+        /// <summary>
+        /// Check that a <see cref="AnomalyDetectionMetrics"/> object is valid.
+        /// </summary>
+        /// <remarks>
+        /// TODO #2644: At times, AnomalyDetection.Evaluate will return a set of NaN metrics.
+        /// </remarks>
+        /// <param name="metrics">The metrics object.</param>
+        public static void CheckMetrics(AnomalyDetectionMetrics metrics)
+        {
+            // Perform sanity checks on the metrics.
+            Assert.InRange(metrics.Auc, 0, 1);
+            Assert.InRange(metrics.DrAtK, 0, 1);
+        }
+
+        /// <summary>
+        /// Check that a <see cref="BinaryClassificationMetrics"/> object is valid.
+        /// </summary>
+        /// <param name="metrics">The metrics object.</param>
+        public static void CheckMetrics(BinaryClassificationMetrics metrics)
+        {
+            // Perform sanity checks on the metrics.
+            Assert.InRange(metrics.Accuracy, 0, 1);
+            Assert.InRange(metrics.Auc, 0, 1);
+            Assert.InRange(metrics.Auprc, 0, 1);
+            Assert.InRange(metrics.F1Score, 0, 1);
+            Assert.InRange(metrics.NegativePrecision, 0, 1);
+            Assert.InRange(metrics.NegativeRecall, 0, 1);
+            Assert.InRange(metrics.PositivePrecision, 0, 1);
+            Assert.InRange(metrics.PositiveRecall, 0, 1);
+        }
+
+        /// <summary>
+        /// Check that a <see cref="ClusteringMetrics"/> object is valid.
+        /// </summary>
+        /// <param name="metrics">The metrics object.</param>
+        public static void CheckMetrics(ClusteringMetrics metrics)
+        {
+            // Perform sanity checks on the metrics.
+            Assert.True(metrics.AvgMinScore >= 0);
+            Assert.True(metrics.Dbi >= 0);
+            if (!double.IsNaN(metrics.Nmi))
+                Assert.True(metrics.Nmi >= 0 && metrics.Nmi <= 1);
+        }
+
+        /// <summary>
+        /// Check that a <see cref="MultiClassClassifierMetrics"/> object is valid.
+        /// </summary>
+        /// <param name="metrics">The metrics object.</param>
+        public static void CheckMetrics(MultiClassClassifierMetrics metrics)
+        {
+            // Perform sanity checks on the metrics.
+            Assert.InRange(metrics.AccuracyMacro, 0, 1);
+            Assert.InRange(metrics.AccuracyMicro, 0, 1);
+            Assert.True(metrics.LogLoss >= 0);
+            Assert.InRange(metrics.TopKAccuracy, 0, 1);
+        }
+
+        /// <summary>
+        /// Check that a <see cref="RankerMetrics"/> object is valid.
+        /// </summary>
+        /// <param name="metrics">The metrics object.</param>
+        public static void CheckMetrics(RankerMetrics metrics)
+        {
+            // Perform sanity checks on the metrics.
+            foreach (var dcg in metrics.Dcg)
+                Assert.True(dcg >= 0);
+            foreach (var ndcg in metrics.Ndcg)
+                Assert.InRange(ndcg, 0, 100);
+        }
+
         /// <summary>
         /// Check that a <see cref="RegressionMetrics"/> object is valid.
         /// </summary>
diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/Iris.cs b/test/Microsoft.ML.Functional.Tests/Datasets/Iris.cs
new file mode 100644
index 0000000000..fde531bd6e
--- /dev/null
+++ b/test/Microsoft.ML.Functional.Tests/Datasets/Iris.cs
@@ -0,0 +1,82 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+using System;
+using Microsoft.Data.DataView;
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.Functional.Tests.Datasets
+{
+    /// <summary>
+    /// A class for the Iris dataset.
+    /// </summary>
+    /// <remarks>
+    /// This class has annotations for automatic deserialization from a file, and contains helper methods
+    /// for reading from a file and for generating a random dataset as an IEnumerable.
+    /// </remarks>
+    internal sealed class Iris
+    {
+        [LoadColumn(0)]
+        public float Label { get; set; }
+
+        [LoadColumn(1)]
+        public float SepalLength { get; set; }
+
+        [LoadColumn(2)]
+        public float SepalWidth { get; set; }
+
+        [LoadColumn(4)]
+        public float PetalLength { get; set; }
+
+        [LoadColumn(5)]
+        public float PetalWidth { get; set; }
+
+        /// <summary>
+        /// The list of columns commonly used as features.
+        /// </summary>
+        public static readonly string[] Features = new string[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" };
+
+        public static IDataView LoadAsRankingProblem(MLContext mlContext, string filePath, bool hasHeader, char separatorChar, int seed = 1)
+        {
+            // Load the Iris data.
+            var data = mlContext.Data.ReadFromTextFile<Iris>(filePath, hasHeader: hasHeader, separatorChar: separatorChar);
+
+            // Create a function that generates a random groupId.
+            var rng = new Random(seed);
+            Action<Iris, IrisWithGroup> generateGroupId = (input, output) =>
+            {
+                output.Label = input.Label;
+                // The standard set used in tests has 150 rows
+                output.GroupId = (ushort)rng.Next(0, 30);
+                output.PetalLength = input.PetalLength;
+                output.PetalWidth = input.PetalWidth;
+                output.SepalLength = input.SepalLength;
+                output.SepalWidth = input.SepalWidth;
+            };
+
+            // Describe a pipeline that generates a groupId and converts it to a key.
+            var pipeline = mlContext.Transforms.CustomMapping(generateGroupId, null)
+                .Append(mlContext.Transforms.Conversion.MapValueToKey("GroupId"));
+
+            // Transform the data
+            var transformedData = pipeline.Fit(data).Transform(data);
+
+            return transformedData;
+        }
+    }
+
+    /// <summary>
+    /// A class for the Iris dataset with a GroupId column.
+    /// </summary>
+    internal sealed class IrisWithGroup
+    {
+        public float Label { get; set; }
+        public ushort GroupId { get; set; }
+        public float SepalLength { get; set; }
+        public float SepalWidth { get; set; }
+        public float PetalLength { get; set; }
+        public float PetalWidth { get; set; }
+    }
+}
diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/MnistOneClass.cs b/test/Microsoft.ML.Functional.Tests/Datasets/MnistOneClass.cs
new file mode 100644
index 0000000000..a4ea599d42
--- /dev/null
+++ b/test/Microsoft.ML.Functional.Tests/Datasets/MnistOneClass.cs
@@ -0,0 +1,24 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.Functional.Tests.Datasets
+{
+    /// <summary>
+    /// A class containing one property per <see cref="DataKind"/>.
+    /// </summary>
+    /// <remarks>
+    /// This class has annotations for automatic deserialization from a file, and contains helper methods
+    /// for reading from a file and for generating a random dataset as an IEnumerable.
+    /// </remarks>
+    internal sealed class MnistOneClass
+    {
+        [LoadColumn(0)]
+        public float Label { get; set; }
+
+        [LoadColumn(1, 784), VectorType(784)]
+        public float[] Features { get; set; }
+    }
+}
diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/Sentiment.cs b/test/Microsoft.ML.Functional.Tests/Datasets/Sentiment.cs
new file mode 100644
index 0000000000..6886c6e094
--- /dev/null
+++ b/test/Microsoft.ML.Functional.Tests/Datasets/Sentiment.cs
@@ -0,0 +1,24 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.Functional.Tests.Datasets
+{
+    /// <summary>
+    /// A class containing one property per <see cref="DataKind"/>.
+    /// </summary>
+    /// <remarks>
+    /// This class has annotations for automatic deserialization from a file, and contains helper methods
+    /// for reading from a file and for generating a random dataset as an IEnumerable.
+    /// </remarks>
+    internal sealed class TweetSentiment
+    {
+        [LoadColumn(0), ColumnName("Label")]
+        public bool Sentiment { get; set; }
+
+        [LoadColumn(1)]
+        public string SentimentText { get; set; }
+    }
+}
diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/TrivialMatrixFactorization.cs b/test/Microsoft.ML.Functional.Tests/Datasets/TrivialMatrixFactorization.cs
new file mode 100644
index 0000000000..7d8c1b6398
--- /dev/null
+++ b/test/Microsoft.ML.Functional.Tests/Datasets/TrivialMatrixFactorization.cs
@@ -0,0 +1,45 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+using System;
+using Microsoft.Data.DataView;
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.Functional.Tests.Datasets
+{
+    /// <summary>
+    /// A class containing one property per <see cref="DataKind"/>.
+    /// </summary>
+    /// <remarks>
+    /// This class has annotations for automatic deserialization from a file, and contains helper methods
+    /// for reading from a file and for generating a random dataset as an IEnumerable.
+    /// </remarks>
+    internal sealed class TrivialMatrixFactorization
+    {
+        [LoadColumn(0)]
+        public float Label { get; set; }
+
+        [LoadColumn(1)]
+        public uint MatrixColumnIndex { get; set; }
+
+        [LoadColumn(2)]
+        public uint MatrixRowIndex { get; set; }
+
+        public static IDataView LoadAndFeaturizeFromTextFile(MLContext mlContext, string filePath, bool hasHeader, char separatorChar)
+        {
+            // Load the data from a textfile.
+            var data = mlContext.Data.ReadFromTextFile<TrivialMatrixFactorization>(filePath, hasHeader: hasHeader, separatorChar: separatorChar);
+
+            // Describe a pipeline to translate the uints to keys.
+            var pipeline = mlContext.Transforms.Conversion.MapValueToKey("MatrixColumnIndex")
+                .Append(mlContext.Transforms.Conversion.MapValueToKey("MatrixRowIndex"));
+
+            // Transform the data.
+            var transformedData = pipeline.Fit(data).Transform(data);
+
+            return transformedData;
+        }
+    }
+}
diff --git a/test/Microsoft.ML.Functional.Tests/Evaluation.cs b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
new file mode 100644
index 0000000000..0ada188bb0
--- /dev/null
+++ b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
@@ -0,0 +1,278 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Functional.Tests.Datasets;
+using Microsoft.ML.RunTests;
+using Microsoft.ML.TestFramework;
+using Microsoft.ML.Trainers;
+using Microsoft.ML.Trainers.FastTree;
+using Microsoft.ML.Trainers.KMeans;
+using Microsoft.ML.Trainers.PCA;
+using Xunit;
+using Xunit.Abstractions;
+
+namespace Microsoft.ML.Functional.Tests
+{
+    public class Evaluation : BaseTestClass
+    {
+        public Evaluation(ITestOutputHelper output): base(output)
+        {
+        }
+
+        /// <summary>
+        /// Train and Evaluate: Anomaly Detection.
+        /// </summary>
+        [Fact]
+        public void TrainAndEvaluateAnomalyDetection()
+        {
+            var mlContext = new MLContext(seed: 1, conc: 1);
+
+            var trainData = mlContext.Data.ReadFromTextFile<MnistOneClass>(GetDataPath(TestDatasets.mnistOneClass.trainFilename),
+                hasHeader: TestDatasets.mnistOneClass.fileHasHeader,
+                separatorChar: TestDatasets.mnistOneClass.fileSeparator);
+            var testData = mlContext.Data.ReadFromTextFile<MnistOneClass>(GetDataPath(TestDatasets.mnistOneClass.testFilename),
+                hasHeader: TestDatasets.mnistOneClass.fileHasHeader,
+                separatorChar: TestDatasets.mnistOneClass.fileSeparator);
+
+            // Create a training pipeline.
+            var pipeline = mlContext.AnomalyDetection.Trainers.RandomizedPca();
+
+            // Train the model.
+            var model = pipeline.Fit(trainData);
+
+            // Evaulate the model.
+            //  Note Issue #2464: Using the train dataset will cause NaN metrics to be returned.
+            var scoredTest = model.Transform(testData);
+            var metrics = mlContext.AnomalyDetection.Evaluate(scoredTest);
+
+            // Check that the metrics returned are valid.
+            Common.CheckMetrics(metrics);
+        }
+
+        /// <summary>
+        /// Train and Evaluate: Binary Classification.
+        /// </summary>
+        [Fact]
+        public void TrainAndEvaluateBinaryClassification()
+        {
+            var mlContext = new MLContext(seed: 1, conc: 1);
+
+            var data = mlContext.Data.ReadFromTextFile<TweetSentiment>(GetDataPath(TestDatasets.Sentiment.trainFilename),
+                hasHeader: TestDatasets.Sentiment.fileHasHeader,
+                separatorChar: TestDatasets.Sentiment.fileSeparator);
+
+            // Create a training pipeline.
+            var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
+                .AppendCacheCheckpoint(mlContext)
+                .Append(mlContext.BinaryClassification.Trainers.LogisticRegression(
+                    new LogisticRegression.Options { NumThreads = 1 }));
+
+            // Train the model.
+            var model = pipeline.Fit(data);
+
+            // Evaulate the model.
+            var scoredData = model.Transform(data);
+            var metrics = mlContext.BinaryClassification.Evaluate(scoredData);
+
+            // Check that the metrics returned are valid.
+            Common.CheckMetrics(metrics);
+        }
+
+        /// <summary>
+        /// Train and Evaluate: Clustering.
+        /// </summary>
+        [Fact]
+        public void TrainAndEvaluateClustering()
+        {
+            var mlContext = new MLContext(seed: 1, conc: 1);
+
+            var data = mlContext.Data.ReadFromTextFile<Iris>(GetDataPath(TestDatasets.iris.trainFilename),
+                hasHeader: TestDatasets.iris.fileHasHeader,
+                separatorChar: TestDatasets.iris.fileSeparator);
+
+            // Create a training pipeline.
+            var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
+                .AppendCacheCheckpoint(mlContext)
+                .Append(mlContext.Clustering.Trainers.KMeans(new KMeansPlusPlusTrainer.Options { NumThreads = 1 }));
+
+            // Train the model.
+            var model = pipeline.Fit(data);
+
+            // Evaulate the model.
+            var scoredData = model.Transform(data);
+            var metrics = mlContext.Clustering.Evaluate(scoredData);
+
+            // Check that the metrics returned are valid.
+            Common.CheckMetrics(metrics);
+        }
+
+        /// <summary>
+        /// Train and Evaluate: Multiclass Classification.
+        /// </summary>
+        [Fact]
+        public void TrainAndEvaluateMulticlassClassification()
+        {
+            var mlContext = new MLContext(seed: 1, conc: 1);
+
+            var data = mlContext.Data.ReadFromTextFile<Iris>(GetDataPath(TestDatasets.iris.trainFilename),
+                hasHeader: TestDatasets.iris.fileHasHeader,
+                separatorChar: TestDatasets.iris.fileSeparator);
+
+            // Create a training pipeline.
+            var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
+                .AppendCacheCheckpoint(mlContext)
+                .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(
+                    new SdcaMultiClassTrainer.Options { NumThreads = 1}));
+
+            // Train the model.
+            var model = pipeline.Fit(data);
+
+            // Evaulate the model.
+            var scoredData = model.Transform(data);
+            var metrics = mlContext.MulticlassClassification.Evaluate(scoredData);
+
+            // Check that the metrics returned are valid.
+            Common.CheckMetrics(metrics);
+        }
+
+        /// <summary>
+        /// Train and Evaluate: Ranking.
+        /// </summary>
+        [Fact]
+        public void TrainAndEvaluateRanking()
+        {
+            var mlContext = new MLContext(seed: 1, conc: 1);
+
+            var data = Iris.LoadAsRankingProblem(mlContext,
+                GetDataPath(TestDatasets.iris.trainFilename),
+                hasHeader: TestDatasets.iris.fileHasHeader,
+                separatorChar: TestDatasets.iris.fileSeparator);
+
+            // Create a training pipeline.
+            var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
+                .Append(mlContext.Ranking.Trainers.FastTree(new FastTreeRankingTrainer.Options { NumThreads = 1 }));
+
+            // Train the model.
+            var model = pipeline.Fit(data);
+
+            // Evaulate the model.
+            var scoredData = model.Transform(data);
+            var metrics = mlContext.Ranking.Evaluate(scoredData, label: "Label", groupId: "GroupId");
+
+            // Check that the metrics returned are valid.
+            Common.CheckMetrics(metrics);
+        }
+
+        /// <summary>
+        /// Train and Evaluate: Recommendation.
+        /// </summary>
+        [Fact]
+        public void TrainAndEvaluateRecommendation()
+        {
+            var mlContext = new MLContext(seed: 1, conc: 1);
+
+            // Get the dataset.
+            var data = TrivialMatrixFactorization.LoadAndFeaturizeFromTextFile(
+                mlContext,
+                GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename),
+                TestDatasets.trivialMatrixFactorization.fileHasHeader,
+                TestDatasets.trivialMatrixFactorization.fileSeparator);
+
+            // Create a pipeline to train on the sentiment data.
+            var pipeline = mlContext.Recommendation().Trainers.MatrixFactorization(
+                new MatrixFactorizationTrainer.Options{
+                    MatrixColumnIndexColumnName = "MatrixColumnIndex",
+                    MatrixRowIndexColumnName = "MatrixRowIndex",
+                    LabelColumnName = "Label",
+                    NumberOfIterations = 3,
+                    NumberOfThreads = 1,
+                    ApproximationRank = 4,
+                });
+
+            // Train the model.
+            var model = pipeline.Fit(data);
+
+            // Evaulate the model.
+            var scoredData = model.Transform(data);
+            var metrics = mlContext.Recommendation().Evaluate(scoredData);
+
+            // Check that the metrics returned are valid.
+            Common.CheckMetrics(metrics);
+        }
+
+        /// <summary>
+        /// Train and Evaluate: Regression.
+        /// </summary>
+        [Fact]
+        public void TrainAndEvaluateRegression()
+        {
+            var mlContext = new MLContext(seed: 1, conc: 1);
+
+            // Get the dataset.
+            var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(),
+                    hasHeader: TestDatasets.housing.fileHasHeader, separatorChar: TestDatasets.housing.fileSeparator)
+                .Read(GetDataPath(TestDatasets.housing.trainFilename));
+
+            // Create a pipeline to train on the sentiment data.
+            var pipeline = mlContext.Transforms.Concatenate("Features", new string[] {
+                    "CrimesPerCapita", "PercentResidental", "PercentNonRetail", "CharlesRiver", "NitricOxides", "RoomsPerDwelling",
+                    "PercentPre40s", "EmploymentDistance", "HighwayDistance", "TaxRate", "TeacherRatio"})
+                .Append(mlContext.Transforms.CopyColumns("Label", "MedianHomeValue"))
+                .Append(mlContext.Regression.Trainers.FastTree(new FastTreeRegressionTrainer.Options { NumThreads = 1 }));
+
+            // Train the model.
+            var model = pipeline.Fit(data);
+
+            // Evaulate the model.
+            var scoredData = model.Transform(data);
+            var metrics = mlContext.Regression.Evaluate(scoredData);
+
+            // Check that the metrics returned are valid.
+            Common.CheckMetrics(metrics);
+        }
+
+        /// <summary>
+        /// Evaluate With Precision-Recall Curves
+        /// </summary>
+        /// <remarks>
+        /// This is currently not possible using the APIs.
+        /// </remarks>
+        [Fact]
+        public void TrainAndEvaluateWithPrecisionRecallCurves()
+        {
+            var mlContext = new MLContext(seed: 1, conc: 1);
+
+            var data = mlContext.Data.ReadFromTextFile<TweetSentiment>(GetDataPath(TestDatasets.Sentiment.trainFilename),
+                hasHeader: TestDatasets.Sentiment.fileHasHeader,
+                separatorChar: TestDatasets.Sentiment.fileSeparator);
+
+            // Create a training pipeline.
+            var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
+                .AppendCacheCheckpoint(mlContext)
+                .Append(mlContext.BinaryClassification.Trainers.LogisticRegression(
+                    new LogisticRegression.Options { NumThreads = 1 }));
+
+            // Train the model.
+            var model = pipeline.Fit(data);
+
+            // Evaulate the model.
+            var scoredData = model.Transform(data);
+            var metrics = mlContext.BinaryClassification.Evaluate(scoredData);
+
+            Common.CheckMetrics(metrics);
+
+            // This scenario is not possible with the current set of APIs
+            // There could be two ways imaginable:
+            //  1. Getting a list of (P,R) from the Evaluator (as it has these anyways)
+            //     Not possible.
+            //  2. Manually setting the classifier threshold and calling evaluate many times:
+            //     Not currently possible: Todo #2465: Allow the setting of threshold and thresholdColumn for scoring.
+            // Technically, this is possible using custom mappers like so:
+            //  1. Get a list of all unique probability scores
+            //  2. For each value of probability:
+            //     a. Write a custom mapper to produce PredictedLabel at that probability threshold
+            //     b. Calculate Precision and Recall with these labels
+        }
+    }
+}
\ No newline at end of file
diff --git a/test/Microsoft.ML.Functional.Tests/Prediction.cs b/test/Microsoft.ML.Functional.Tests/Prediction.cs
index 24cc049e8f..4f37533ebb 100644
--- a/test/Microsoft.ML.Functional.Tests/Prediction.cs
+++ b/test/Microsoft.ML.Functional.Tests/Prediction.cs
@@ -22,7 +22,8 @@ public void ReconfigurablePrediction()
             var mlContext = new MLContext(seed: 789);
 
             // Get the dataset, create a train and test
-            var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(), hasHeader: true)
+            var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(),
+                    hasHeader: TestDatasets.housing.fileHasHeader, separatorChar: TestDatasets.housing.fileSeparator)
                 .Read(BaseTestClass.GetDataPath(TestDatasets.housing.trainFilename));
             var split = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.2);
 
diff --git a/test/Microsoft.ML.Functional.Tests/Validation.cs b/test/Microsoft.ML.Functional.Tests/Validation.cs
index a39bd14884..cc74ff2227 100644
--- a/test/Microsoft.ML.Functional.Tests/Validation.cs
+++ b/test/Microsoft.ML.Functional.Tests/Validation.cs
@@ -27,7 +27,8 @@ void CrossValidation()
             var mlContext = new MLContext(seed: 1, conc: 1);
 
             // Get the dataset.
-            var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(), hasHeader: true)
+            var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(),
+                    hasHeader: TestDatasets.housing.fileHasHeader, separatorChar: TestDatasets.housing.fileSeparator)
                 .Read(BaseTestClass.GetDataPath(TestDatasets.housing.trainFilename));
 
             // Create a pipeline to train on the sentiment data.
@@ -60,7 +61,8 @@ public void TrainWithValidationSet()
             var mlContext = new MLContext(seed: 1, conc: 1);
 
             // Get the dataset.
-            var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(), hasHeader: true)
+            var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(),
+                    hasHeader: TestDatasets.housing.fileHasHeader, separatorChar: TestDatasets.housing.fileSeparator)
                 .Read(BaseTestClass.GetDataPath(TestDatasets.housing.trainFilename));
             var dataSplit = mlContext.Regression.TrainTestSplit(data, testFraction: 0.2);
             var trainData = dataSplit.TrainSet;
diff --git a/test/Microsoft.ML.TestFramework/Datasets.cs b/test/Microsoft.ML.TestFramework/Datasets.cs
index 7197f1f64b..f654673bf2 100644
--- a/test/Microsoft.ML.TestFramework/Datasets.cs
+++ b/test/Microsoft.ML.TestFramework/Datasets.cs
@@ -14,6 +14,8 @@ public class TestDataset
         public string testFilename;
         public string validFilename;
         public string labelFilename;
+        public char fileSeparator;
+        public bool fileHasHeader;
 
         // REVIEW: Replace these with appropriate SubComponents!
         public string settings;
@@ -158,6 +160,8 @@ public static class TestDatasets
             name = "housing",
             trainFilename = "housing.txt",
             testFilename = "housing.txt",
+            fileSeparator = '\t',
+            fileHasHeader = true,
             loaderSettings = "loader=Text{col=Label:0 col=Features:~ header=+}",
             GetLoaderColumns = () =>
             {
@@ -206,6 +210,8 @@ public static class TestDatasets
             name = "sentiment",
             trainFilename = "wikipedia-detox-250-line-data.tsv",
             testFilename = "wikipedia-detox-250-line-test.tsv",
+            fileHasHeader = true,
+            fileSeparator = '\t',
             GetLoaderColumns = () =>
              {
                  return new[]
@@ -447,6 +453,8 @@ public static class TestDatasets
             name = "iris",
             trainFilename = @"iris.txt",
             testFilename = @"iris.txt",
+            fileHasHeader = true,
+            fileSeparator = '\t'
         };
 
         public static TestDataset irisMissing = new TestDataset()
@@ -655,6 +663,8 @@ public static class TestDatasets
             name = "mnistOneClass",
             trainFilename = @"MNIST.Train.0-class.tiny.txt",
             testFilename = @"MNIST.Test.tiny.txt",
+            fileHasHeader = true,
+            fileSeparator = '\t',
             settings = ""
         };
 
@@ -704,6 +714,8 @@ public static class TestDatasets
             name = "trivialMatrixFactorization",
             trainFilename = @"trivial-train.tsv",
             testFilename = @"trivial-test.tsv",
+            fileHasHeader = true,
+            fileSeparator = '\t',
             loaderSettings = "loader=Text{col=Label:R4:0 col=User:U4[0-19]:1 col=Item:U4[0-39]:2 header+}"
         };
     }
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs
deleted file mode 100644
index 60fad2c0a3..0000000000
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs
+++ /dev/null
@@ -1,39 +0,0 @@
-﻿// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using Microsoft.ML.Data;
-using Microsoft.ML.RunTests;
-using Microsoft.ML.Trainers;
-using Xunit;
-
-namespace Microsoft.ML.Tests.Scenarios.Api
-{
-    public partial class ApiScenariosTests
-    {
-        /// <summary>
-        /// Evaluation: Similar to the simple train scenario, except instead of having some 
-        /// predictive structure, be able to score another "test" data file, run the result 
-        /// through an evaluator and get metrics like AUC, accuracy, PR curves, and whatnot. 
-        /// Getting metrics out of this shoudl be as straightforward and unannoying as possible.
-        /// </summary>
-        [Fact]
-        public void Evaluation()
-        {
-            var ml = new MLContext(seed: 1, conc: 1);
-
-            // Pipeline.
-            var pipeline = ml.Data.CreateTextLoader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true)
-                .Append(ml.Transforms.Text.FeaturizeText("Features", "SentimentText"))
-                .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent(
-                    new SdcaBinaryTrainer.Options { NumThreads = 1 }));
-
-            // Train.
-            var readerModel = pipeline.Fit(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename)));
-
-            // Evaluate on the test set.
-            var dataEval = readerModel.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.testFilename)));
-            var metrics = ml.BinaryClassification.Evaluate(dataEval);
-        }
-    }
-}

From 0b02fb1b9375dda004492935e4c83314d1a07714 Mon Sep 17 00:00:00 2001
From: Rogan Carr <rocarr@microsoft.com>
Date: Tue, 19 Feb 2019 17:00:31 -0800
Subject: [PATCH 2/9] Updating test summaries.

---
 test/Microsoft.ML.Functional.Tests/Datasets/Iris.cs         | 6 +-----
 .../Microsoft.ML.Functional.Tests/Datasets/MnistOneClass.cs | 6 +-----
 test/Microsoft.ML.Functional.Tests/Datasets/Sentiment.cs    | 6 +-----
 3 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/Iris.cs b/test/Microsoft.ML.Functional.Tests/Datasets/Iris.cs
index fde531bd6e..39f741d99a 100644
--- a/test/Microsoft.ML.Functional.Tests/Datasets/Iris.cs
+++ b/test/Microsoft.ML.Functional.Tests/Datasets/Iris.cs
@@ -10,12 +10,8 @@
 namespace Microsoft.ML.Functional.Tests.Datasets
 {
     /// <summary>
-    /// A class for the Iris dataset.
+    /// A class for the Iris test dataset.
     /// </summary>
-    /// <remarks>
-    /// This class has annotations for automatic deserialization from a file, and contains helper methods
-    /// for reading from a file and for generating a random dataset as an IEnumerable.
-    /// </remarks>
     internal sealed class Iris
     {
         [LoadColumn(0)]
diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/MnistOneClass.cs b/test/Microsoft.ML.Functional.Tests/Datasets/MnistOneClass.cs
index a4ea599d42..163216bc64 100644
--- a/test/Microsoft.ML.Functional.Tests/Datasets/MnistOneClass.cs
+++ b/test/Microsoft.ML.Functional.Tests/Datasets/MnistOneClass.cs
@@ -7,12 +7,8 @@
 namespace Microsoft.ML.Functional.Tests.Datasets
 {
     /// <summary>
-    /// A class containing one property per <see cref="DataKind"/>.
+    /// A class for reading in the MNIST One Class test dataset.
     /// </summary>
-    /// <remarks>
-    /// This class has annotations for automatic deserialization from a file, and contains helper methods
-    /// for reading from a file and for generating a random dataset as an IEnumerable.
-    /// </remarks>
     internal sealed class MnistOneClass
     {
         [LoadColumn(0)]
diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/Sentiment.cs b/test/Microsoft.ML.Functional.Tests/Datasets/Sentiment.cs
index 6886c6e094..2465e291b3 100644
--- a/test/Microsoft.ML.Functional.Tests/Datasets/Sentiment.cs
+++ b/test/Microsoft.ML.Functional.Tests/Datasets/Sentiment.cs
@@ -7,12 +7,8 @@
 namespace Microsoft.ML.Functional.Tests.Datasets
 {
     /// <summary>
-    /// A class containing one property per <see cref="DataKind"/>.
+    /// A class for reading in the Sentiment test dataset.
     /// </summary>
-    /// <remarks>
-    /// This class has annotations for automatic deserialization from a file, and contains helper methods
-    /// for reading from a file and for generating a random dataset as an IEnumerable.
-    /// </remarks>
     internal sealed class TweetSentiment
     {
         [LoadColumn(0), ColumnName("Label")]

From acd2f1a5010e47cdf649c50521f7799f05e6f644 Mon Sep 17 00:00:00 2001
From: Rogan Carr <rocarr@microsoft.com>
Date: Thu, 21 Feb 2019 12:42:54 -0800
Subject: [PATCH 3/9] Updating comments to end in a period!

---
 .../Evaluation.cs                              | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/test/Microsoft.ML.Functional.Tests/Evaluation.cs b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
index 0ada188bb0..80d4c789c4 100644
--- a/test/Microsoft.ML.Functional.Tests/Evaluation.cs
+++ b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
@@ -233,7 +233,7 @@ public void TrainAndEvaluateRegression()
         }
 
         /// <summary>
-        /// Evaluate With Precision-Recall Curves
+        /// Evaluate With Precision-Recall Curves.
         /// </summary>
         /// <remarks>
         /// This is currently not possible using the APIs.
@@ -262,17 +262,19 @@ public void TrainAndEvaluateWithPrecisionRecallCurves()
 
             Common.CheckMetrics(metrics);
 
-            // This scenario is not possible with the current set of APIs
+            // This scenario is not possible with the current set of APIs.
             // There could be two ways imaginable:
-            //  1. Getting a list of (P,R) from the Evaluator (as it has these anyways)
-            //     Not possible.
+            //  1. Getting a list of (P,R) from the Evaluator (as it calculates most of the information already).
+            //     Not currently possible.
             //  2. Manually setting the classifier threshold and calling evaluate many times:
             //     Not currently possible: Todo #2465: Allow the setting of threshold and thresholdColumn for scoring.
-            // Technically, this is possible using custom mappers like so:
-            //  1. Get a list of all unique probability scores
+            // Technically, this scenario is possible using custom mappers like so:
+            //  1. Get a list of all unique probability scores.
+            //     e.g. By reading the IDataView as an IEnumerable, and keeping a hash of known probabilities up to some precision.
             //  2. For each value of probability:
-            //     a. Write a custom mapper to produce PredictedLabel at that probability threshold
-            //     b. Calculate Precision and Recall with these labels
+            //     a. Write a custom mapper to produce PredictedLabel at that probability threshold.
+            //     b. Calculate Precision and Recall with these labels.
+            //     c. Append the Precision and Recall to an IList.
         }
     }
 }
\ No newline at end of file

From e9d5bad2107eee55c4869840d41e8b3d3ccf82fd Mon Sep 17 00:00:00 2001
From: Rogan Carr <rocarr@microsoft.com>
Date: Thu, 21 Feb 2019 13:48:20 -0800
Subject: [PATCH 4/9] Addressing PR comments.

---
 test/Microsoft.ML.Functional.Tests/Common.cs  |  3 ---
 .../Evaluation.cs                             | 19 +++++++++----------
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/test/Microsoft.ML.Functional.Tests/Common.cs b/test/Microsoft.ML.Functional.Tests/Common.cs
index b097d7ddf1..d5ab5ce056 100644
--- a/test/Microsoft.ML.Functional.Tests/Common.cs
+++ b/test/Microsoft.ML.Functional.Tests/Common.cs
@@ -164,9 +164,6 @@ public static void AssertEqual(TypeTestData testType1, TypeTestData testType2)
         /// <summary>
         /// Check that a <see cref="AnomalyDetectionMetrics"/> object is valid.
         /// </summary>
-        /// <remarks>
-        /// TODO #2644: At times, AnomalyDetection.Evaluate will return a set of NaN metrics.
-        /// </remarks>
         /// <param name="metrics">The metrics object.</param>
         public static void CheckMetrics(AnomalyDetectionMetrics metrics)
         {
diff --git a/test/Microsoft.ML.Functional.Tests/Evaluation.cs b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
index 80d4c789c4..ed6aba15dc 100644
--- a/test/Microsoft.ML.Functional.Tests/Evaluation.cs
+++ b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
@@ -8,7 +8,6 @@
 using Microsoft.ML.Trainers;
 using Microsoft.ML.Trainers.FastTree;
 using Microsoft.ML.Trainers.KMeans;
-using Microsoft.ML.Trainers.PCA;
 using Xunit;
 using Xunit.Abstractions;
 
@@ -41,8 +40,8 @@ public void TrainAndEvaluateAnomalyDetection()
             // Train the model.
             var model = pipeline.Fit(trainData);
 
-            // Evaulate the model.
-            //  Note Issue #2464: Using the train dataset will cause NaN metrics to be returned.
+            // Evaluate the model.
+            //  TODO #2464: Using the train dataset will cause NaN metrics to be returned.
             var scoredTest = model.Transform(testData);
             var metrics = mlContext.AnomalyDetection.Evaluate(scoredTest);
 
@@ -71,7 +70,7 @@ public void TrainAndEvaluateBinaryClassification()
             // Train the model.
             var model = pipeline.Fit(data);
 
-            // Evaulate the model.
+            // Evaluate the model.
             var scoredData = model.Transform(data);
             var metrics = mlContext.BinaryClassification.Evaluate(scoredData);
 
@@ -99,7 +98,7 @@ public void TrainAndEvaluateClustering()
             // Train the model.
             var model = pipeline.Fit(data);
 
-            // Evaulate the model.
+            // Evaluate the model.
             var scoredData = model.Transform(data);
             var metrics = mlContext.Clustering.Evaluate(scoredData);
 
@@ -128,7 +127,7 @@ public void TrainAndEvaluateMulticlassClassification()
             // Train the model.
             var model = pipeline.Fit(data);
 
-            // Evaulate the model.
+            // Evaluate the model.
             var scoredData = model.Transform(data);
             var metrics = mlContext.MulticlassClassification.Evaluate(scoredData);
 
@@ -156,7 +155,7 @@ public void TrainAndEvaluateRanking()
             // Train the model.
             var model = pipeline.Fit(data);
 
-            // Evaulate the model.
+            // Evaluate the model.
             var scoredData = model.Transform(data);
             var metrics = mlContext.Ranking.Evaluate(scoredData, label: "Label", groupId: "GroupId");
 
@@ -193,7 +192,7 @@ public void TrainAndEvaluateRecommendation()
             // Train the model.
             var model = pipeline.Fit(data);
 
-            // Evaulate the model.
+            // Evaluate the model.
             var scoredData = model.Transform(data);
             var metrics = mlContext.Recommendation().Evaluate(scoredData);
 
@@ -224,7 +223,7 @@ public void TrainAndEvaluateRegression()
             // Train the model.
             var model = pipeline.Fit(data);
 
-            // Evaulate the model.
+            // Evaluate the model.
             var scoredData = model.Transform(data);
             var metrics = mlContext.Regression.Evaluate(scoredData);
 
@@ -256,7 +255,7 @@ public void TrainAndEvaluateWithPrecisionRecallCurves()
             // Train the model.
             var model = pipeline.Fit(data);
 
-            // Evaulate the model.
+            // Evaluate the model.
             var scoredData = model.Transform(data);
             var metrics = mlContext.BinaryClassification.Evaluate(scoredData);
 

From 7012ea621829dd695b67ebeeb36be812a7148294 Mon Sep 17 00:00:00 2001
From: Rogan Carr <rocarr@microsoft.com>
Date: Fri, 22 Feb 2019 12:11:55 -0800
Subject: [PATCH 5/9] Updating binary classification check to also check
 calibrated classifiers

---
 test/Microsoft.ML.Functional.Tests/Common.cs | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/test/Microsoft.ML.Functional.Tests/Common.cs b/test/Microsoft.ML.Functional.Tests/Common.cs
index d5ab5ce056..ae6cb630ae 100644
--- a/test/Microsoft.ML.Functional.Tests/Common.cs
+++ b/test/Microsoft.ML.Functional.Tests/Common.cs
@@ -189,6 +189,18 @@ public static void CheckMetrics(BinaryClassificationMetrics metrics)
             Assert.InRange(metrics.PositiveRecall, 0, 1);
         }
 
+        /// <summary>
+        /// Check that a <see cref="CalibratedBinaryClassificationMetrics"/> object is valid.
+        /// </summary>
+        /// <param name="metrics">The metrics object.</param>
+        public static void CheckMetrics(CalibratedBinaryClassificationMetrics metrics)
+        {
+            Assert.InRange(metrics.Entropy, double.NegativeInfinity, 1);
+            Assert.InRange(metrics.LogLoss, double.NegativeInfinity, 1);
+            Assert.InRange(metrics.LogLossReduction, double.NegativeInfinity, 100);
+            CheckMetrics(metrics as BinaryClassificationMetrics);
+        }
+
         /// <summary>
         /// Check that a <see cref="ClusteringMetrics"/> object is valid.
         /// </summary>

From 12d3dfe10d10ca40b9cfd9ee6ec8d4839941e0bb Mon Sep 17 00:00:00 2001
From: Rogan Carr <rocarr@microsoft.com>
Date: Fri, 22 Feb 2019 15:35:56 -0800
Subject: [PATCH 6/9] Normalizing names after merge with master.

---
 test/Microsoft.ML.Functional.Tests/Common.cs  | 22 ++++++-------------
 .../Evaluation.cs                             | 16 +++++++-------
 2 files changed, 15 insertions(+), 23 deletions(-)

diff --git a/test/Microsoft.ML.Functional.Tests/Common.cs b/test/Microsoft.ML.Functional.Tests/Common.cs
index 61c0e06ba8..bcba3a8e27 100644
--- a/test/Microsoft.ML.Functional.Tests/Common.cs
+++ b/test/Microsoft.ML.Functional.Tests/Common.cs
@@ -165,9 +165,8 @@ public static void AssertEqual(TypeTestData testType1, TypeTestData testType2)
         /// Check that a <see cref="AnomalyDetectionMetrics"/> object is valid.
         /// </summary>
         /// <param name="metrics">The metrics object.</param>
-        public static void CheckMetrics(AnomalyDetectionMetrics metrics)
+        public static void AssertMetrics(AnomalyDetectionMetrics metrics)
         {
-            // Perform sanity checks on the metrics.
             Assert.InRange(metrics.Auc, 0, 1);
             Assert.InRange(metrics.DrAtK, 0, 1);
         }
@@ -176,9 +175,8 @@ public static void CheckMetrics(AnomalyDetectionMetrics metrics)
         /// Check that a <see cref="BinaryClassificationMetrics"/> object is valid.
         /// </summary>
         /// <param name="metrics">The metrics object.</param>
-        public static void CheckMetrics(BinaryClassificationMetrics metrics)
+        public static void AssertMetrics(BinaryClassificationMetrics metrics)
         {
-            // Perform sanity checks on the metrics.
             Assert.InRange(metrics.Accuracy, 0, 1);
             Assert.InRange(metrics.Auc, 0, 1);
             Assert.InRange(metrics.Auprc, 0, 1);
@@ -193,21 +191,20 @@ public static void CheckMetrics(BinaryClassificationMetrics metrics)
         /// Check that a <see cref="CalibratedBinaryClassificationMetrics"/> object is valid.
         /// </summary>
         /// <param name="metrics">The metrics object.</param>
-        public static void CheckMetrics(CalibratedBinaryClassificationMetrics metrics)
+        public static void AssertMetrics(CalibratedBinaryClassificationMetrics metrics)
         {
             Assert.InRange(metrics.Entropy, double.NegativeInfinity, 1);
             Assert.InRange(metrics.LogLoss, double.NegativeInfinity, 1);
             Assert.InRange(metrics.LogLossReduction, double.NegativeInfinity, 100);
-            CheckMetrics(metrics as BinaryClassificationMetrics);
+            AssertMetrics(metrics as BinaryClassificationMetrics);
         }
 
         /// <summary>
         /// Check that a <see cref="ClusteringMetrics"/> object is valid.
         /// </summary>
         /// <param name="metrics">The metrics object.</param>
-        public static void CheckMetrics(ClusteringMetrics metrics)
+        public static void AssertMetrics(ClusteringMetrics metrics)
         {
-            // Perform sanity checks on the metrics.
             Assert.True(metrics.AvgMinScore >= 0);
             Assert.True(metrics.Dbi >= 0);
             if (!double.IsNaN(metrics.Nmi))
@@ -218,9 +215,8 @@ public static void CheckMetrics(ClusteringMetrics metrics)
         /// Check that a <see cref="MultiClassClassifierMetrics"/> object is valid.
         /// </summary>
         /// <param name="metrics">The metrics object.</param>
-        public static void CheckMetrics(MultiClassClassifierMetrics metrics)
+        public static void AssertMetrics(MultiClassClassifierMetrics metrics)
         {
-            // Perform sanity checks on the metrics.
             Assert.InRange(metrics.AccuracyMacro, 0, 1);
             Assert.InRange(metrics.AccuracyMicro, 0, 1);
             Assert.True(metrics.LogLoss >= 0);
@@ -231,9 +227,8 @@ public static void CheckMetrics(MultiClassClassifierMetrics metrics)
         /// Check that a <see cref="RankerMetrics"/> object is valid.
         /// </summary>
         /// <param name="metrics">The metrics object.</param>
-        public static void CheckMetrics(RankerMetrics metrics)
+        public static void AssertMetrics(RankerMetrics metrics)
         {
-            // Perform sanity checks on the metrics.
             foreach (var dcg in metrics.Dcg)
                 Assert.True(dcg >= 0);
             foreach (var ndcg in metrics.Ndcg)
@@ -246,7 +241,6 @@ public static void CheckMetrics(RankerMetrics metrics)
         /// <param name="metrics">The metrics object.</param>
         public static void AssertMetrics(RegressionMetrics metrics)
         {
-            // Perform sanity checks on the metrics.
             Assert.True(metrics.Rms >= 0);
             Assert.True(metrics.L1 >= 0);
             Assert.True(metrics.L2 >= 0);
@@ -259,7 +253,6 @@ public static void AssertMetrics(RegressionMetrics metrics)
         /// <param name="metric">The <see cref="MetricStatistics"/> object.</param>
         public static void AssertMetricStatistics(MetricStatistics metric)
         {
-            // Perform sanity checks on the metrics.
             Assert.True(metric.StandardDeviation >= 0);
             Assert.True(metric.StandardError >= 0);
         }
@@ -270,7 +263,6 @@ public static void AssertMetricStatistics(MetricStatistics metric)
         /// <param name="metrics">The metrics object.</param>
         public static void AssertMetricsStatistics(RegressionMetricsStatistics metrics)
         {
-            // The mean can be any float; the standard deviation and error must be >=0.
             AssertMetricStatistics(metrics.Rms);
             AssertMetricStatistics(metrics.L1);
             AssertMetricStatistics(metrics.L2);
diff --git a/test/Microsoft.ML.Functional.Tests/Evaluation.cs b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
index ed6aba15dc..6435f0806e 100644
--- a/test/Microsoft.ML.Functional.Tests/Evaluation.cs
+++ b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
@@ -46,7 +46,7 @@ public void TrainAndEvaluateAnomalyDetection()
             var metrics = mlContext.AnomalyDetection.Evaluate(scoredTest);
 
             // Check that the metrics returned are valid.
-            Common.CheckMetrics(metrics);
+            Common.AssertMetrics(metrics);
         }
 
         /// <summary>
@@ -75,7 +75,7 @@ public void TrainAndEvaluateBinaryClassification()
             var metrics = mlContext.BinaryClassification.Evaluate(scoredData);
 
             // Check that the metrics returned are valid.
-            Common.CheckMetrics(metrics);
+            Common.AssertMetrics(metrics);
         }
 
         /// <summary>
@@ -103,7 +103,7 @@ public void TrainAndEvaluateClustering()
             var metrics = mlContext.Clustering.Evaluate(scoredData);
 
             // Check that the metrics returned are valid.
-            Common.CheckMetrics(metrics);
+            Common.AssertMetrics(metrics);
         }
 
         /// <summary>
@@ -132,7 +132,7 @@ public void TrainAndEvaluateMulticlassClassification()
             var metrics = mlContext.MulticlassClassification.Evaluate(scoredData);
 
             // Check that the metrics returned are valid.
-            Common.CheckMetrics(metrics);
+            Common.AssertMetrics(metrics);
         }
 
         /// <summary>
@@ -160,7 +160,7 @@ public void TrainAndEvaluateRanking()
             var metrics = mlContext.Ranking.Evaluate(scoredData, label: "Label", groupId: "GroupId");
 
             // Check that the metrics returned are valid.
-            Common.CheckMetrics(metrics);
+            Common.AssertMetrics(metrics);
         }
 
         /// <summary>
@@ -197,7 +197,7 @@ public void TrainAndEvaluateRecommendation()
             var metrics = mlContext.Recommendation().Evaluate(scoredData);
 
             // Check that the metrics returned are valid.
-            Common.CheckMetrics(metrics);
+            Common.AssertMetrics(metrics);
         }
 
         /// <summary>
@@ -228,7 +228,7 @@ public void TrainAndEvaluateRegression()
             var metrics = mlContext.Regression.Evaluate(scoredData);
 
             // Check that the metrics returned are valid.
-            Common.CheckMetrics(metrics);
+            Common.AssertMetrics(metrics);
         }
 
         /// <summary>
@@ -259,7 +259,7 @@ public void TrainAndEvaluateWithPrecisionRecallCurves()
             var scoredData = model.Transform(data);
             var metrics = mlContext.BinaryClassification.Evaluate(scoredData);
 
-            Common.CheckMetrics(metrics);
+            Common.AssertMetrics(metrics);
 
             // This scenario is not possible with the current set of APIs.
             // There could be two ways imaginable:

From 42af510535cab35587d8082bba89f1f3d71ce163 Mon Sep 17 00:00:00 2001
From: Rogan Carr <rocarr@microsoft.com>
Date: Fri, 22 Feb 2019 15:55:36 -0800
Subject: [PATCH 7/9] Addressing PR comments.

---
 .../Datasets/Iris.cs                          |  4 +--
 .../Datasets/TrivialMatrixFactorization.cs    |  6 +---
 .../Evaluation.cs                             | 31 ++++++++++++++++++-
 3 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/Iris.cs b/test/Microsoft.ML.Functional.Tests/Datasets/Iris.cs
index 39f741d99a..d1cbfa3fad 100644
--- a/test/Microsoft.ML.Functional.Tests/Datasets/Iris.cs
+++ b/test/Microsoft.ML.Functional.Tests/Datasets/Iris.cs
@@ -45,7 +45,7 @@ public static IDataView LoadAsRankingProblem(MLContext mlContext, string filePat
             {
                 output.Label = input.Label;
                 // The standard set used in tests has 150 rows
-                output.GroupId = (ushort)rng.Next(0, 30);
+                output.GroupId = rng.Next(0, 30);
                 output.PetalLength = input.PetalLength;
                 output.PetalWidth = input.PetalWidth;
                 output.SepalLength = input.SepalLength;
@@ -69,7 +69,7 @@ public static IDataView LoadAsRankingProblem(MLContext mlContext, string filePat
     internal sealed class IrisWithGroup
     {
         public float Label { get; set; }
-        public ushort GroupId { get; set; }
+        public int GroupId { get; set; }
         public float SepalLength { get; set; }
         public float SepalWidth { get; set; }
         public float PetalLength { get; set; }
diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/TrivialMatrixFactorization.cs b/test/Microsoft.ML.Functional.Tests/Datasets/TrivialMatrixFactorization.cs
index 7d8c1b6398..005fc98c72 100644
--- a/test/Microsoft.ML.Functional.Tests/Datasets/TrivialMatrixFactorization.cs
+++ b/test/Microsoft.ML.Functional.Tests/Datasets/TrivialMatrixFactorization.cs
@@ -10,12 +10,8 @@
 namespace Microsoft.ML.Functional.Tests.Datasets
 {
     /// <summary>
-    /// A class containing one property per <see cref="DataKind"/>.
+    /// A class describing the TrivialMatrixFactorization test dataset.
     /// </summary>
-    /// <remarks>
-    /// This class has annotations for automatic deserialization from a file, and contains helper methods
-    /// for reading from a file and for generating a random dataset as an IEnumerable.
-    /// </remarks>
     internal sealed class TrivialMatrixFactorization
     {
         [LoadColumn(0)]
diff --git a/test/Microsoft.ML.Functional.Tests/Evaluation.cs b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
index 6435f0806e..da3c71d621 100644
--- a/test/Microsoft.ML.Functional.Tests/Evaluation.cs
+++ b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
@@ -50,13 +50,42 @@ public void TrainAndEvaluateAnomalyDetection()
         }
 
         /// <summary>
-        /// Train and Evaluate: Binary Classification.
+        /// Train and Evaluate: Binary Classification with no calibration.
         /// </summary>
         [Fact]
         public void TrainAndEvaluateBinaryClassification()
         {
             var mlContext = new MLContext(seed: 1, conc: 1);
 
+            var data = mlContext.Data.ReadFromTextFile<TweetSentiment>(GetDataPath(TestDatasets.Sentiment.trainFilename),
+                hasHeader: TestDatasets.Sentiment.fileHasHeader,
+                separatorChar: TestDatasets.Sentiment.fileSeparator);
+
+            // Create a training pipeline.
+            var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
+                .AppendCacheCheckpoint(mlContext)
+                .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
+                    new SdcaNonCalibratedBinaryTrainer.Options { NumThreads = 1 }));
+
+            // Train the model.
+            var model = pipeline.Fit(data);
+
+            // Evaluate the model.
+            var scoredData = model.Transform(data);
+            var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(scoredData);
+
+            // Check that the metrics returned are valid.
+            Common.AssertMetrics(metrics);
+        }
+
+        /// <summary>
+        /// Train and Evaluate: Binary Classification with a calibrated predictor.
+        /// </summary>
+        [Fact]
+        public void TrainAndEvaluateBinaryClassificationWithCalibration()
+        {
+            var mlContext = new MLContext(seed: 1, conc: 1);
+
             var data = mlContext.Data.ReadFromTextFile<TweetSentiment>(GetDataPath(TestDatasets.Sentiment.trainFilename),
                 hasHeader: TestDatasets.Sentiment.fileHasHeader,
                 separatorChar: TestDatasets.Sentiment.fileSeparator);

From 89e44cb04e8e5703f4ff6e052bddcf4a9cfd63e4 Mon Sep 17 00:00:00 2001
From: Rogan Carr <rocarr@microsoft.com>
Date: Fri, 22 Feb 2019 16:47:25 -0800
Subject: [PATCH 8/9] Updating to fix master merge issues around test datasets.

---
 .../Datasets/MnistOneClass.cs                 | 19 ++++++++++++++-----
 .../Evaluation.cs                             | 12 ++++++------
 test/Microsoft.ML.TestFramework/Datasets.cs   |  2 +-
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/MnistOneClass.cs b/test/Microsoft.ML.Functional.Tests/Datasets/MnistOneClass.cs
index 163216bc64..07b26d3d9c 100644
--- a/test/Microsoft.ML.Functional.Tests/Datasets/MnistOneClass.cs
+++ b/test/Microsoft.ML.Functional.Tests/Datasets/MnistOneClass.cs
@@ -6,15 +6,24 @@
 
 namespace Microsoft.ML.Functional.Tests.Datasets
 {
-    /// <summary>
-    /// A class for reading in the MNIST One Class test dataset.
-    /// </summary>
     internal sealed class MnistOneClass
     {
-        [LoadColumn(0)]
+        private const int _featureLength = 783;
+
         public float Label { get; set; }
 
-        [LoadColumn(1, 784), VectorType(784)]
         public float[] Features { get; set; }
+
+        public static TextLoader GetTextLoader(MLContext mlContext, bool hasHeader, char separatorChar)
+        {
+            return mlContext.Data.CreateTextLoader(
+                    new[] {
+                        new TextLoader.Column("Label", DataKind.R4, 0),
+                        new TextLoader.Column("Features", DataKind.R4, 1, 1 + _featureLength)
+                    },
+                    separatorChar: separatorChar,
+                    hasHeader: hasHeader,
+                    allowSparse: true);
+        }
     }
 }
diff --git a/test/Microsoft.ML.Functional.Tests/Evaluation.cs b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
index da3c71d621..a3f6d10b05 100644
--- a/test/Microsoft.ML.Functional.Tests/Evaluation.cs
+++ b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
@@ -27,12 +27,12 @@ public void TrainAndEvaluateAnomalyDetection()
         {
             var mlContext = new MLContext(seed: 1, conc: 1);
 
-            var trainData = mlContext.Data.ReadFromTextFile<MnistOneClass>(GetDataPath(TestDatasets.mnistOneClass.trainFilename),
-                hasHeader: TestDatasets.mnistOneClass.fileHasHeader,
-                separatorChar: TestDatasets.mnistOneClass.fileSeparator);
-            var testData = mlContext.Data.ReadFromTextFile<MnistOneClass>(GetDataPath(TestDatasets.mnistOneClass.testFilename),
-                hasHeader: TestDatasets.mnistOneClass.fileHasHeader,
-                separatorChar: TestDatasets.mnistOneClass.fileSeparator);
+            var trainData = MnistOneClass.GetTextLoader(mlContext,
+                    TestDatasets.mnistOneClass.fileHasHeader, TestDatasets.mnistOneClass.fileSeparator)
+                .Read(GetDataPath(TestDatasets.mnistOneClass.trainFilename));
+            var testData = MnistOneClass.GetTextLoader(mlContext,
+                    TestDatasets.mnistOneClass.fileHasHeader, TestDatasets.mnistOneClass.fileSeparator)
+                .Read(GetDataPath(TestDatasets.mnistOneClass.testFilename));
 
             // Create a training pipeline.
             var pipeline = mlContext.AnomalyDetection.Trainers.RandomizedPca();
diff --git a/test/Microsoft.ML.TestFramework/Datasets.cs b/test/Microsoft.ML.TestFramework/Datasets.cs
index f654673bf2..abc9862049 100644
--- a/test/Microsoft.ML.TestFramework/Datasets.cs
+++ b/test/Microsoft.ML.TestFramework/Datasets.cs
@@ -663,7 +663,7 @@ public static class TestDatasets
             name = "mnistOneClass",
             trainFilename = @"MNIST.Train.0-class.tiny.txt",
             testFilename = @"MNIST.Test.tiny.txt",
-            fileHasHeader = true,
+            fileHasHeader = false,
             fileSeparator = '\t',
             settings = ""
         };

From 051ba0171969e3a02a18f722232adf3e9bba0599 Mon Sep 17 00:00:00 2001
From: Rogan Carr <rocarr@microsoft.com>
Date: Sun, 24 Feb 2019 14:08:41 -0800
Subject: [PATCH 9/9] Marking matrix factorization test with the new attribute.

---
 test/Microsoft.ML.Functional.Tests/Evaluation.cs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/Microsoft.ML.Functional.Tests/Evaluation.cs b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
index a3f6d10b05..6ffec01b32 100644
--- a/test/Microsoft.ML.Functional.Tests/Evaluation.cs
+++ b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
@@ -5,6 +5,7 @@
 using Microsoft.ML.Functional.Tests.Datasets;
 using Microsoft.ML.RunTests;
 using Microsoft.ML.TestFramework;
+using Microsoft.ML.TestFramework.Attributes;
 using Microsoft.ML.Trainers;
 using Microsoft.ML.Trainers.FastTree;
 using Microsoft.ML.Trainers.KMeans;
@@ -195,7 +196,7 @@ public void TrainAndEvaluateRanking()
         /// <summary>
         /// Train and Evaluate: Recommendation.
         /// </summary>
-        [Fact]
+        [MatrixFactorizationFact]
         public void TrainAndEvaluateRecommendation()
         {
             var mlContext = new MLContext(seed: 1, conc: 1);