diff --git a/Microsoft.ML.sln b/Microsoft.ML.sln
index 9d969b6be2..2b9bcb6cf9 100644
--- a/Microsoft.ML.sln
+++ b/Microsoft.ML.sln
@@ -33,6 +33,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.TestFramework"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Predictor.Tests", "test\Microsoft.ML.Predictor.Tests\Microsoft.ML.Predictor.Tests.csproj", "{6B047E09-39C9-4583-96F3-685D84CA4117}"
EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Functional.Tests", "test\Microsoft.ML.Functional.Tests\Microsoft.ML.Functional.Tests.csproj", "{CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}"
+EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.ResultProcessor", "src\Microsoft.ML.ResultProcessor\Microsoft.ML.ResultProcessor.csproj", "{3769FCC3-9AFF-4C37-97E9-6854324681DF}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.FastTree", "src\Microsoft.ML.FastTree\Microsoft.ML.FastTree.csproj", "{B7B593C5-FB8C-4ADA-A638-5B53B47D087E}"
@@ -928,6 +930,18 @@ Global
{5E920CAC-5A28-42FB-936E-49C472130953}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
{5E920CAC-5A28-42FB-936E-49C472130953}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
{5E920CAC-5A28-42FB-936E-49C472130953}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
+ {CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
+ {CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU
+ {CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
+ {CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
+ {CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release|Any CPU.Build.0 = Release|Any CPU
+ {CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU
+ {CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
+ {CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
+ {CFED9F0C-FF81-4C96-8D5E-0436264CA7B5}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@@ -1011,6 +1025,7 @@ Global
{85D0CAFD-2FE8-496A-88C7-585D35B94243} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{31D38B21-102B-41C0-9E0A-2FE0BF68D123} = {D3D38B03-B557-484D-8348-8BADEE4DF592}
{5E920CAC-5A28-42FB-936E-49C472130953} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
+ {CFED9F0C-FF81-4C96-8D5E-0436264CA7B5} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D}
diff --git a/build/Dependencies.props b/build/Dependencies.props
index 896ca68978..9d2174267b 100644
--- a/build/Dependencies.props
+++ b/build/Dependencies.props
@@ -43,6 +43,8 @@
0.11.3
0.0.3-test
+ 0.0.7-test
+ 0.0.4-test
diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
index edaf2d55c5..d95d047faf 100644
--- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
+++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
@@ -17,7 +17,12 @@ public static class DatasetUtils
/// Downloads the housing dataset from the ML.NET repo.
///
public static string DownloadHousingRegressionDataset()
- => Download("https://raw.githubusercontent.com/dotnet/machinelearning/024bd4452e1d3660214c757237a19d6123f951ca/test/data/housing.txt", "housing.txt");
+ {
+ var fileName = "housing.txt";
+ if (!File.Exists(fileName))
+ Download("https://raw.githubusercontent.com/dotnet/machinelearning/024bd4452e1d3660214c757237a19d6123f951ca/test/data/housing.txt", fileName);
+ return fileName;
+ }
public static IDataView LoadHousingRegressionDataset(MLContext mlContext)
{
diff --git a/test/Microsoft.ML.Functional.Tests/Common.cs b/test/Microsoft.ML.Functional.Tests/Common.cs
new file mode 100644
index 0000000000..29088298d3
--- /dev/null
+++ b/test/Microsoft.ML.Functional.Tests/Common.cs
@@ -0,0 +1,24 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.Data.DataView;
+using Microsoft.ML.Data;
+using Microsoft.ML.SamplesUtils;
+using Microsoft.ML.Trainers.HalLearners;
+using Xunit;
+
+namespace Microsoft.ML.Functional.Tests
+{
+ internal static class Common
+ {
+ public static void CheckMetrics(RegressionMetrics metrics)
+ {
+ // Perform sanity checks on the metrics
+ Assert.True(metrics.Rms >= 0);
+ Assert.True(metrics.L1 >= 0);
+ Assert.True(metrics.L2 >= 0);
+ Assert.True(metrics.RSquared <= 1);
+ }
+ }
+}
diff --git a/test/Microsoft.ML.Functional.Tests/Microsoft.ML.Functional.Tests.csproj b/test/Microsoft.ML.Functional.Tests/Microsoft.ML.Functional.Tests.csproj
new file mode 100644
index 0000000000..106db8f36c
--- /dev/null
+++ b/test/Microsoft.ML.Functional.Tests/Microsoft.ML.Functional.Tests.csproj
@@ -0,0 +1,52 @@
+
+
+
+
+ false
+ false
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/Microsoft.ML.Functional.Tests/Prediction.cs b/test/Microsoft.ML.Functional.Tests/Prediction.cs
new file mode 100644
index 0000000000..7e0ff2eb44
--- /dev/null
+++ b/test/Microsoft.ML.Functional.Tests/Prediction.cs
@@ -0,0 +1,53 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.RunTests;
+using Microsoft.ML.TestFramework;
+using Xunit;
+
+namespace Microsoft.ML.Functional.Tests
+{
+ public class PredictionScenarios
+ {
+ ///
+ /// Reconfigurable predictions: The following should be possible: A user trains a binary classifier,
+ /// and through the test evaluator gets a PR curve, the based on the PR curve picks a new threshold
+ /// and configures the scorer (or more precisely instantiates a new scorer over the same model parameters)
+ /// with some threshold derived from that.
+ ///
+ [Fact]
+ public void ReconfigurablePrediction()
+ {
+ var mlContext = new MLContext(seed: 789);
+
+ // Get the dataset, create a train and test
+ var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(), hasHeader: true)
+ .Read(BaseTestClass.GetDataPath(TestDatasets.housing.trainFilename));
+ (var train, var test) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.2);
+
+ // Create a pipeline to train on the housing data
+ var pipeline = mlContext.Transforms.Concatenate("Features", new string[] {
+ "CrimesPerCapita", "PercentResidental", "PercentNonRetail", "CharlesRiver", "NitricOxides", "RoomsPerDwelling",
+ "PercentPre40s", "EmploymentDistance", "HighwayDistance", "TaxRate", "TeacherRatio"})
+ .Append(mlContext.Transforms.CopyColumns("Label", "MedianHomeValue"))
+ .Append(mlContext.Regression.Trainers.OrdinaryLeastSquares());
+
+ var model = pipeline.Fit(train);
+
+ var scoredTest = model.Transform(test);
+ var metrics = mlContext.Regression.Evaluate(scoredTest);
+
+ Common.CheckMetrics(metrics);
+
+ // Todo #2465: Allow the setting of threshold and thresholdColumn for scoring.
+ // This is no longer possible in the API
+ //var newModel = new BinaryPredictionTransformer>(ml, model.Model, trainData.Schema, model.FeatureColumn, threshold: 0.01f, thresholdColumn: DefaultColumnNames.Probability);
+ //var newScoredTest = newModel.Transform(pipeline.Transform(testData));
+ //var newMetrics = mlContext.BinaryClassification.Evaluate(scoredTest);
+ // And the Threshold and ThresholdColumn properties are not settable.
+ //var predictor = model.LastTransformer;
+ //predictor.Threshold = 0.01; // Not possible
+ }
+ }
+}
diff --git a/test/Microsoft.ML.Functional.Tests/Validation.cs b/test/Microsoft.ML.Functional.Tests/Validation.cs
new file mode 100644
index 0000000000..b9bb617285
--- /dev/null
+++ b/test/Microsoft.ML.Functional.Tests/Validation.cs
@@ -0,0 +1,53 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.Data.DataView;
+using Microsoft.ML.Data;
+using Microsoft.ML.RunTests;
+using Microsoft.ML.TestFramework;
+using Microsoft.ML.Trainers.HalLearners;
+using Xunit;
+
+namespace Microsoft.ML.Functional.Tests
+{
+ public class ValidationScenarios
+ {
+ ///
+ /// Cross-validation: Have a mechanism to do cross validation, that is, you come up with
+ /// a data source (optionally with stratification column), come up with an instantiable transform
+ /// and trainer pipeline, and it will handle (1) splitting up the data, (2) training the separate
+ /// pipelines on in-fold data, (3) scoring on the out-fold data, (4) returning the set of
+ /// metrics, trained pipelines, and scored test data for each fold.
+ ///
+ [Fact]
+ void CrossValidation()
+ {
+ var mlContext = new MLContext(seed: 789);
+
+ // Get the dataset
+ var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(), hasHeader: true)
+ .Read(BaseTestClass.GetDataPath(TestDatasets.housing.trainFilename));
+
+ // Create a pipeline to train on the sentiment data
+ var pipeline = mlContext.Transforms.Concatenate("Features", new string[] {
+ "CrimesPerCapita", "PercentResidental", "PercentNonRetail", "CharlesRiver", "NitricOxides", "RoomsPerDwelling",
+ "PercentPre40s", "EmploymentDistance", "HighwayDistance", "TaxRate", "TeacherRatio"})
+ .Append(mlContext.Transforms.CopyColumns("Label", "MedianHomeValue"))
+ .Append(mlContext.Regression.Trainers.OrdinaryLeastSquares());
+
+ // Compute the CV result
+ var cvResult = mlContext.Regression.CrossValidate(data, pipeline, numFolds: 5);
+
+ // Check that the results are valid
+ Assert.IsType(cvResult[0].metrics);
+ Assert.IsType>>(cvResult[0].model);
+ Assert.True(cvResult[0].scoredTestData is IDataView);
+ Assert.Equal(5, cvResult.Length);
+
+ // And validate the metrics
+ foreach (var result in cvResult)
+ Common.CheckMetrics(result.metrics);
+ }
+ }
+}
diff --git a/test/Microsoft.ML.OnnxTransformTest/Microsoft.ML.OnnxTransformTest.csproj b/test/Microsoft.ML.OnnxTransformTest/Microsoft.ML.OnnxTransformTest.csproj
index a153655b27..a5dbaca9f8 100644
--- a/test/Microsoft.ML.OnnxTransformTest/Microsoft.ML.OnnxTransformTest.csproj
+++ b/test/Microsoft.ML.OnnxTransformTest/Microsoft.ML.OnnxTransformTest.csproj
@@ -11,7 +11,7 @@
-
+
diff --git a/test/Microsoft.ML.TestFramework/Datasets.cs b/test/Microsoft.ML.TestFramework/Datasets.cs
index 6d6ba61191..1bdfa5048b 100644
--- a/test/Microsoft.ML.TestFramework/Datasets.cs
+++ b/test/Microsoft.ML.TestFramework/Datasets.cs
@@ -158,7 +158,24 @@ public static class TestDatasets
name = "housing",
trainFilename = "housing.txt",
testFilename = "housing.txt",
- loaderSettings = "loader=Text{col=Label:0 col=Features:~ header=+}"
+ loaderSettings = "loader=Text{col=Label:0 col=Features:~ header=+}",
+ GetLoaderColumns = () =>
+ {
+ return new[] {
+ new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
+ new TextLoader.Column("CrimesPerCapita", DataKind.R4, 1),
+ new TextLoader.Column("PercentResidental", DataKind.R4, 2),
+ new TextLoader.Column("PercentNonRetail", DataKind.R4, 3),
+ new TextLoader.Column("CharlesRiver", DataKind.R4, 4),
+ new TextLoader.Column("NitricOxides", DataKind.R4, 5),
+ new TextLoader.Column("RoomsPerDwelling", DataKind.R4, 6),
+ new TextLoader.Column("PercentPre40s", DataKind.R4, 7),
+ new TextLoader.Column("EmploymentDistance", DataKind.R4, 8),
+ new TextLoader.Column("HighwayDistance", DataKind.R4, 9),
+ new TextLoader.Column("TaxRate", DataKind.R4, 10),
+ new TextLoader.Column("TeacherRatio", DataKind.R4, 11),
+ };
+ }
};
public static TestDataset generatedRegressionDatasetmacro = new TestDataset
diff --git a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj
index 2d56666b7f..37f4b25c1e 100644
--- a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj
+++ b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj
@@ -46,7 +46,7 @@
-
-
+
+
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs
deleted file mode 100644
index a4e3afc2cc..0000000000
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs
+++ /dev/null
@@ -1,36 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using Microsoft.ML.RunTests;
-using Microsoft.ML.Trainers;
-using Xunit;
-
-namespace Microsoft.ML.Tests.Scenarios.Api
-{
- public partial class ApiScenariosTests
- {
- ///
- /// Cross-validation: Have a mechanism to do cross validation, that is, you come up with
- /// a data source (optionally with stratification column), come up with an instantiable transform
- /// and trainer pipeline, and it will handle (1) splitting up the data, (2) training the separate
- /// pipelines on in-fold data, (3) scoring on the out-fold data, (4) returning the set of
- /// evaluations and optionally trained pipes. (People always want metrics out of xfold,
- /// they sometimes want the actual models too.)
- ///
- [Fact]
- void CrossValidation()
- {
- var ml = new MLContext(seed: 1, conc: 1);
-
- var data = ml.Data.ReadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), hasHeader: true);
-
- // Pipeline.
- var pipeline = ml.Transforms.Text.FeaturizeText("Features", "SentimentText")
- .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent(
- new SdcaBinaryTrainer.Options { ConvergenceTolerance = 1f, NumThreads = 1, }));
-
- var cvResult = ml.BinaryClassification.CrossValidate(data, pipeline);
- }
- }
-}
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/ReconfigurablePrediction.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/ReconfigurablePrediction.cs
deleted file mode 100644
index 254dd73e45..0000000000
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/ReconfigurablePrediction.cs
+++ /dev/null
@@ -1,47 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using Microsoft.ML.Data;
-using Microsoft.ML.RunTests;
-using Microsoft.ML.Trainers;
-using Xunit;
-
-namespace Microsoft.ML.Tests.Scenarios.Api
-{
- public partial class ApiScenariosTests
- {
- ///
- /// Reconfigurable predictions: The following should be possible: A user trains a binary classifier,
- /// and through the test evaluator gets a PR curve, the based on the PR curve picks a new threshold
- /// and configures the scorer (or more precisely instantiates a new scorer over the same predictor)
- /// with some threshold derived from that.
- ///
- [Fact]
- public void ReconfigurablePrediction()
- {
- var ml = new MLContext(seed: 1, conc: 1);
- var dataReader = ml.Data.ReadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), hasHeader: true);
-
- var data = ml.Data.ReadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), hasHeader: true);
- var testData = ml.Data.ReadFromTextFile(GetDataPath(TestDatasets.Sentiment.testFilename), hasHeader: true);
-
- // Pipeline.
- var pipeline = ml.Transforms.Text.FeaturizeText("Features", "SentimentText")
- .Fit(data);
-
- var trainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent(
- new SdcaBinaryTrainer.Options { NumThreads = 1 });
-
- var trainData = ml.Data.Cache(pipeline.Transform(data)); // Cache the data right before the trainer to boost the training speed.
- var model = trainer.Fit(trainData);
-
- var scoredTest = model.Transform(pipeline.Transform(testData));
- var metrics = ml.BinaryClassification.Evaluate(scoredTest);
-
- var newModel = new BinaryPredictionTransformer>(ml, model.Model, trainData.Schema, model.FeatureColumn, threshold: 0.01f, thresholdColumn: DefaultColumnNames.Probability);
- var newScoredTest = newModel.Transform(pipeline.Transform(testData));
- var newMetrics = ml.BinaryClassification.Evaluate(scoredTest);
- }
- }
-}