|
| 1 | +// Licensed to the .NET Foundation under one or more agreements. |
| 2 | +// The .NET Foundation licenses this file to you under the MIT license. |
| 3 | +// See the LICENSE file in the project root for more information. |
| 4 | + |
| 5 | +using Microsoft.ML.RunTests; |
| 6 | +using Microsoft.ML.TestFramework; |
| 7 | +using Xunit; |
| 8 | + |
| 9 | +namespace Microsoft.ML.Functional.Tests |
| 10 | +{ |
| 11 | + public class PredictionScenarios |
| 12 | + { |
| 13 | + /// <summary> |
| 14 | + /// Reconfigurable predictions: The following should be possible: A user trains a binary classifier, |
| 15 | + /// and through the test evaluator gets a PR curve, the based on the PR curve picks a new threshold |
| 16 | + /// and configures the scorer (or more precisely instantiates a new scorer over the same model parameters) |
| 17 | + /// with some threshold derived from that. |
| 18 | + /// </summary> |
| 19 | + [Fact] |
| 20 | + public void ReconfigurablePrediction() |
| 21 | + { |
| 22 | + var mlContext = new MLContext(seed: 789); |
| 23 | + |
| 24 | + // Get the dataset, create a train and test |
| 25 | + var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(), hasHeader: true) |
| 26 | + .Read(BaseTestClass.GetDataPath(TestDatasets.housing.trainFilename)); |
| 27 | + (var train, var test) = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.2); |
| 28 | + |
| 29 | + // Create a pipeline to train on the housing data |
| 30 | + var pipeline = mlContext.Transforms.Concatenate("Features", new string[] { |
| 31 | + "CrimesPerCapita", "PercentResidental", "PercentNonRetail", "CharlesRiver", "NitricOxides", "RoomsPerDwelling", |
| 32 | + "PercentPre40s", "EmploymentDistance", "HighwayDistance", "TaxRate", "TeacherRatio"}) |
| 33 | + .Append(mlContext.Transforms.CopyColumns("Label", "MedianHomeValue")) |
| 34 | + .Append(mlContext.Regression.Trainers.OrdinaryLeastSquares()); |
| 35 | + |
| 36 | + var model = pipeline.Fit(train); |
| 37 | + |
| 38 | + var scoredTest = model.Transform(test); |
| 39 | + var metrics = mlContext.Regression.Evaluate(scoredTest); |
| 40 | + |
| 41 | + Common.CheckMetrics(metrics); |
| 42 | + |
| 43 | + // Todo #2465: Allow the setting of threshold and thresholdColumn for scoring. |
| 44 | + // This is no longer possible in the API |
| 45 | + //var newModel = new BinaryPredictionTransformer<IPredictorProducing<float>>(ml, model.Model, trainData.Schema, model.FeatureColumn, threshold: 0.01f, thresholdColumn: DefaultColumnNames.Probability); |
| 46 | + //var newScoredTest = newModel.Transform(pipeline.Transform(testData)); |
| 47 | + //var newMetrics = mlContext.BinaryClassification.Evaluate(scoredTest); |
| 48 | + // And the Threshold and ThresholdColumn properties are not settable. |
| 49 | + //var predictor = model.LastTransformer; |
| 50 | + //predictor.Threshold = 0.01; // Not possible |
| 51 | + } |
| 52 | + } |
| 53 | +} |
0 commit comments