diff --git a/samples/csharp/common/AutoML/ConsoleHelper.cs b/samples/csharp/common/AutoML/ConsoleHelper.cs index fa3ee6073..f712688b8 100644 --- a/samples/csharp/common/AutoML/ConsoleHelper.cs +++ b/samples/csharp/common/AutoML/ConsoleHelper.cs @@ -44,10 +44,10 @@ public static void PrintBinaryClassificationMetrics(string name, BinaryClassific public static void PrintMulticlassClassificationMetrics(string name, MulticlassClassificationMetrics metrics) { Console.WriteLine($"************************************************************"); - Console.WriteLine($"* Metrics for {name} multi-class classification model "); + Console.WriteLine($"* Metrics for {name} multi-class classification model "); Console.WriteLine($"*-----------------------------------------------------------"); - Console.WriteLine($" MacroAccuracy = {metrics.MacroAccuracy:0.####}, a value between 0 and 1, the closer to 1, the better"); - Console.WriteLine($" MicroAccuracy = {metrics.MicroAccuracy:0.####}, a value between 0 and 1, the closer to 1, the better"); + Console.WriteLine($" MacroAccuracy = {metrics.MacroAccuracy:0.####}, a value from 0 and 1, where closer to 1.0 is better"); + Console.WriteLine($" MicroAccuracy = {metrics.MicroAccuracy:0.####}, a value from 0 and 1, where closer to 1.0 is better"); Console.WriteLine($" LogLoss = {metrics.LogLoss:0.####}, the closer to 0, the better"); Console.WriteLine($" LogLoss for class 1 = {metrics.PerClassLogLoss[0]:0.####}, the closer to 0, the better"); Console.WriteLine($" LogLoss for class 2 = {metrics.PerClassLogLoss[1]:0.####}, the closer to 0, the better"); @@ -55,6 +55,15 @@ public static void PrintMulticlassClassificationMetrics(string name, MulticlassC Console.WriteLine($"************************************************************"); } + public static void PrintRankingMetrics(string name, RankingMetrics metrics, uint optimizationMetricTruncationLevel) + { + Console.WriteLine($"************************************************************"); + Console.WriteLine($"* Metrics for {name} ranking model "); + Console.WriteLine($"*-----------------------------------------------------------"); + Console.WriteLine($" Normalized Discounted Cumulative Gain (NDCG@{optimizationMetricTruncationLevel}) = {metrics?.NormalizedDiscountedCumulativeGains?[(int)optimizationMetricTruncationLevel - 1] ?? double.NaN:0.####}, a value from 0 and 1, where closer to 1.0 is better"); + Console.WriteLine($" Discounted Cumulative Gain (DCG@{optimizationMetricTruncationLevel}) = {metrics?.DiscountedCumulativeGains?[(int)optimizationMetricTruncationLevel - 1] ?? double.NaN:0.####}"); + } + public static void ShowDataViewInConsole(MLContext mlContext, IDataView dataView, int numberOfRows = 4) { string msg = string.Format("Show data in DataView: Showing {0} rows with the columns", numberOfRows.ToString()); @@ -89,6 +98,11 @@ internal static void PrintIterationMetrics(int iteration, string trainerName, Re CreateRow($"{iteration,-4} {trainerName,-35} {metrics?.RSquared ?? double.NaN,8:F4} {metrics?.MeanAbsoluteError ?? double.NaN,13:F2} {metrics?.MeanSquaredError ?? double.NaN,12:F2} {metrics?.RootMeanSquaredError ?? double.NaN,8:F2} {runtimeInSeconds.Value,9:F1}", Width); } + internal static void PrintIterationMetrics(int iteration, string trainerName, RankingMetrics metrics, double? runtimeInSeconds) + { + CreateRow($"{iteration,-4} {trainerName,-15} {metrics?.NormalizedDiscountedCumulativeGains[0] ?? double.NaN,9:F4} {metrics?.NormalizedDiscountedCumulativeGains[2] ?? double.NaN,9:F4} {metrics?.NormalizedDiscountedCumulativeGains[9] ?? double.NaN,9:F4} {metrics?.DiscountedCumulativeGains[9] ?? double.NaN,9:F4} {runtimeInSeconds.Value,9:F1}", Width); + } + internal static void PrintIterationException(Exception ex) { Console.WriteLine($"Exception during AutoML iteration: {ex}"); @@ -109,6 +123,11 @@ internal static void PrintRegressionMetricsHeader() CreateRow($"{"",-4} {"Trainer",-35} {"RSquared",8} {"Absolute-loss",13} {"Squared-loss",12} {"RMS-loss",8} {"Duration",9}", Width); } + internal static void PrintRankingMetricsHeader() + { + CreateRow($"{"",-4} {"Trainer",-15} {"NDCG@1",9} {"NDCG@3",9} {"NDCG@10",9} {"DCG@10",9} {"Duration",9}", Width); + } + private static void CreateRow(string message, int width) { Console.WriteLine("|" + message.PadRight(width - 2) + "|"); @@ -239,10 +258,10 @@ private void AppendTableRow(ICollection tableRows, tableRows.Add(new[] { - columnName, - GetColumnDataType(columnName), - columnPurpose - }); + columnName, + GetColumnDataType(columnName), + columnPurpose + }); } private void AppendTableRows(ICollection tableRows, diff --git a/samples/csharp/common/AutoML/ProgressHandlers.cs b/samples/csharp/common/AutoML/ProgressHandlers.cs index a0a1efdb0..8457efcfa 100644 --- a/samples/csharp/common/AutoML/ProgressHandlers.cs +++ b/samples/csharp/common/AutoML/ProgressHandlers.cs @@ -81,4 +81,27 @@ public void Report(RunDetail iterationResult) } } } + + public class RankingExperimentProgressHandler : IProgress> + { + private int _iterationIndex; + + public void Report(RunDetail iterationResult) + { + if (_iterationIndex++ == 0) + { + ConsoleHelper.PrintRankingMetricsHeader(); + } + + if (iterationResult.Exception != null) + { + ConsoleHelper.PrintIterationException(iterationResult.Exception); + } + else + { + ConsoleHelper.PrintIterationMetrics(_iterationIndex, iterationResult.TrainerName, + iterationResult.ValidationMetrics, iterationResult.RuntimeInSeconds); + } + } + } } diff --git a/samples/csharp/getting-started/Ranking_AutoML/Ranking/README.md b/samples/csharp/getting-started/Ranking_AutoML/Ranking/README.md new file mode 100644 index 000000000..615a79e81 --- /dev/null +++ b/samples/csharp/getting-started/Ranking_AutoML/Ranking/README.md @@ -0,0 +1,206 @@ +## Automated Machine Learning + +Automated machine learning (AutoML) automates the end-to-end process of applying machine learning to real-world problems. Given a dataset, AutoML iterates over different data featurizations, machine learning algorithms, hyperparamters, etc. to select the best model. + +## Problem +The ability to perform ranking is a common problem faced by search engines since users expect query results to be ranked/sorted according to their relevance. This problem extends beyond the needs of search engines to include a variety of business scenarios where personalized sorting is key to the user experience. Here are a few specific examples: +* Travel Agency - Provide a list of hotels with those that are most likely to be purchased/booked by the user positioned highest in the list. +* Shopping - Display items from a product catalog in an order that aligns with a user's shopping preferences. +* Recruiting - Retrieve job applications ranked according to the candidates that are most qualified for a new job opening. + +Ranking is useful to any scenario where it is important to list items in an order that increases the likelihood of a click, purchase, reservation, etc. + +In this sample, we show how to apply ranking to search engine results. AutoML sweeps over the models and associated hyperparameters to optimize [NDCG@N](https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.data.rankingmetrics.normalizeddiscountedcumulativegains?view=ml-dotnet) (normalized discounted cumulative gains) or [DCG@N](https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.data.rankingmetrics.discountedcumulativegains?view=ml-dotnet) (discounted cumulative gains). + +## Dataset +The data used by this sample is based on a public [dataset provided by Microsoft](https://www.microsoft.com/en-us/research/project/mslr/) originally provided Microsoft Bing. The dataset is released under a [CC-by 4.0](https://creativecommons.org/licenses/by/4.0/) license and includes training, validation, and testing data. + +``` +@article{DBLP:journals/corr/QinL13, + author = {Tao Qin and + Tie{-}Yan Liu}, + title = {Introducing {LETOR} 4.0 Datasets}, + journal = {CoRR}, + volume = {abs/1306.2597}, + year = {2013}, + url = {https://arxiv.org/abs/1306.2597}, + timestamp = {Mon, 01 Jul 2013 20:31:25 +0200}, + biburl = {https://dblp.uni-trier.de/rec/bib/journals/corr/QinL13}, + bibsource = {dblp computer science bibliography, https://dblp.org} +} +``` + +The following description is provided for this dataset: + +The datasets are machine learning data, in which queries and urls are represented by IDs. The datasets consist of feature vectors extracted from query-url pairs along with relevance judgment labels: + +* The relevance judgments are obtained from a retired labeling set of a commercial web search engine (Microsoft Bing), which take 5 values from 0 (irrelevant) to 4 (perfectly relevant). + +* The features are basically extracted by us (e.g. Microsoft), and are those widely used in the research community. + +In the data files, each row corresponds to a query-url pair. The first column is relevance label of the pair, the second column is query id, and the following columns are features. The larger value the relevance label has, the more relevant the query-url pair is. A query-url pair is represented by a 136-dimensional feature vector. + +## ML Task - Ranking +As previously mentioned, this sample uses algorithms which is applied using a supervised learning technique known as [**Learning to Rank**](https://en.wikipedia.org/wiki/Learning_to_rank). This technique requires that train/validation/test datasets contain groups of data instances that are each labeled with their relevance score (e.g. relevance judgment label). The label is a numerical/ordinal value, such as {0, 1, 2, 3, 4}. The process for labeling these data instances with their relevance scores can be done manually by subject matter experts. Or, the labels can be determined using other metrics, such as the number of clicks on a given search result. + +It is expected that the dataset will have many more "Bad" relevance scores than "Perfect". This helps to avoid converting a ranked list directly into equally sized bins of {0, 1, 2, 3, 4}. The relevance scores are also reused so that you will have many items **per group** that are labeled 0, which means the result is "Bad". And, only one or a few labeled 4, which means that the result is "Perfect". Here is a breakdown of the dataset's distribution of labels. You'll notice that there are 70x more 0 (e.g. "Bad") than 4 (e.g. "Perfect") labels: +* Label 0 -- 624,263 +* Label 1 -- 386,280 +* Label 2 -- 159,451 +* Label 3 -- 21,317 +* Label 4 -- 8,881 + +Once the train/validation/test datasets are labeled with relevance scores, the model (e.g. ranker) can then be trained and evaluated using this data. Through the model training process, the ranker learns how to score each data instance within a group based on their label value. The resulting score of an individual data instance by itself isn't important -- instead, the scores should be compared against one another to determine the relative ordering of a group's data instances. The higher the score a data instance has, the more relevant and more highly ranked it is within its group. + +## Solution +Since this sample's dataset already is already labeled with relevance scores, we can immediately start using AutoML. In cases where you start with a dataset that isn't labeled, you will need to go through this process first by having subject matter experts provide relevance scores or by using some other metrics to determine relevance. + +Generally, the pattern to train, validate, and test a model includes the following steps: +1. The AutoML model is trained on the **training** dataset. The model's metrics are then evaluated using the **validation** dataset. +2. Step #1 is repeated by re-fitting and reevaluating the model until the desired metrics are achieved. The outcome of this step is a pipeline that applies the necessary data transformations and trainer. +3. The pipeline is used to re-fit on the combined **training** + **validation** datasets. The model's metrics are then evaluated on the **testing** dataset (exactly once) -- this is the final set of metrics used to measure the model's quality. +4. The final step is to re-fit the pipeline on **all** of the combined **training** + **validation** + **testing** datasets. This model is then ready to be deployed into production. + +The final estimate of how well the model will do in production is the metrics from step #3. The final model for production, trained on all available data, is trained in step #4. + +This sample performs a simplified version of the above steps to rank the search engine results: +1. The pipeline is setup with the necessary data transforms and the LightGBM LambdaRank trainer. +2. The model is **trained** using the **training** dataset. The model is then **evaluated** using the **validation** dataset. This results in a **prediction** for each search engine result. The predictions are **evaluated** by examining metrics; specifically the [Normalized Discounted Cumulative Gain](https://en.wikipedia.org/wiki/Discounted_cumulative_gain) (NDCG). +3. The pipeline is used to **retrain** the model using the **training + validation** datasets. The resulting model is **evaluated** using the **test** dataset -- this is our final set of metrics for the model. +4. The model is **retrained** one last time using the **training + validation + testing** datasets. The final step is to **consume** the model to perform ranking predictions for new incoming searches. This results in a **score** for each search engine result. The score is used to determine the ranking relative to other results within the same query (e.g. group). + +### 1. Setup the AutoML Experiment +This sample trains the model using the LightGbmRankingTrainer which relies on LightGBM LambdaRank. The model requires the following input columns: + +* Group Id - Column that contains the group id for each data instance. Data instances are contained in logical groupings representing all candidate results in a single query and each group has an identifier known as the group id. In the case of the search engine dataset, search results are grouped by their corresponding query where the group id corresponds to the query id. The input group id data type must be [key type](https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.data.keydataviewtype). +* Label - Column that contains the relevance label of each data instance where higher values indicate higher relevance. The input label data type must be [key type](https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.data.keydataviewtype) or [Single](https://docs.microsoft.com/en-us/dotnet/api/system.single). +* Features - The columns that are influential in determining the relevance/rank of a data instance. The input feature data must be a fixed size vector of type [Single](https://docs.microsoft.com/en-us/dotnet/api/system.single). + +The following code is used to setup the experiment: + +```CSharp +var textLoaderOptions = new TextLoader.Options +{ + Separators = new[] { '\t' }, + HasHeader = true, + Columns = new[] + { + new TextLoader.Column("Label", DataKind.Single, 0), + new TextLoader.Column("GroupId", DataKind.Int32, 1), + new TextLoader.Column("Features", DataKind.Single, 2, 133), + } +}; + +TextLoader textLoader = mlContext.Data.CreateTextLoader(textLoaderOptions); +IDataView trainDataView = textLoader.Load(TrainDatasetPath); +IDataView validationDataView = textLoader.Load(ValidationDatasetPath); +IDataView testDataView = textLoader.Load(TestDatasetPath); + +// STEP 2: Display first few rows of training data +ConsoleHelper.ShowDataViewInConsole(mlContext, trainDataView); + +// STEP 3: Initialize our user-defined progress handler that AutoML will +// invoke after each model it produces and evaluates. +var progressHandler = new RankingExperimentProgressHandler(); + +// Set up the experiment settings and set the experiment time +// and optimization metric to use. +var experimentSettings = new RankingExperimentSettings +{ + MaxExperimentTimeInSeconds = ExperimentTime, + OptimizingMetric = RankingMetric.Ndcg +}; +````` + +### 2. Train and Evaluate Model +First, we need to train our model using the **train** dataset. Then, we need to evaluate our model to determine how effective it is at ranking. To do so, the model is run against another dataset that was not used in training (e.g. the **validation** dataset). + +`Evaluate()` compares the predicted values for the **validation** dataset against the dataset's labels and produces various metrics you can explore. Specifically, we can gauge the quality of our model using Discounted Cumulative Gain (DCG) and Normalized Discounted Cumulative Gain (NDCG) which are included in the `RankingMetrics` returned by `Evaluate()`. + +When evaluating the `RankingMetrics` for this sample's model, you'll notice that the following metrics are reported for DCG and NDCG (the values that you see when running the sample will be similar to these): +* DCG - @1:11.9736, @2:17.5429, @3:21.2532, @4:24.4245, @5:27.0554, @6:29.5571, @7:31.7560, @8:33.7904, @9:35.7949, @10:37.6874 + +* NDCG: @1:0.4847, @2:0.4820, @3:0.4833, @4:0.4910, @5:0.4977, @6:0.5058, @7:0.5125, @8:0.5182, @9:0.5247, @10:0.5312 + +The NDCG values are most useful to examine since this allows us to compare our model's ranking ability across different datasets. The potential value of NDCG ranges from **0.0** to **1.0**, with 1.0 being a perfect model that exactly matches the ideal ranking. + +With this in mind, let's look at our model's values for NDCG. In particular, let's look at the value for **NDCG@10** which is **0.5312**. This is the average NDCG for a query returning the top **10** search engine results and is useful to gauge whether the top **10** results will be ranked correctly. To increase the model's ranking ability, we would need to experiment with feature engineering and model hyperparameters and modify the pipeline accordingly. We would continue to iterate on this by modifying the pipeline, training the model, and evaluating the metrics until the desired model quality is achieved. + +Refer to the following code used to auto train and evaluate the model: + +```CSharp +ExperimentResult experimentResult = mlContext.Auto() + .CreateRankingExperiment(experimentSettings) + .Execute( + trainData: trainDataView, + validationData: validationDataView, + progressHandler: progressHandler); + +var metrics = mlContext.Ranking.Evaluate(_predictions, rankingEvaluatorOptions); +````` +### 3. Retrain and Perform Final Evaluation of Model +Once the desired metrics are achieved, the resulting pipeline is used to train on the combined **train + validation** datasets. We then evaluate this model one last time using the **test** dataset to get the model's final metrics. + +Refer to the following code: + +```CSharp +// Re-fit best pipeline on train and validation data, to produce +// a model that is trained on as much data as is available while +// still having test data for the final estimate of how well the +// model will do in production. +Console.WriteLine("\n===== Refitting on train+valid and evaluating model's nDCG with test data ====="); +var trainPlusValidationDataView = textLoader.Load(new MultiFileSource(TrainDatasetPath, ValidationDatasetPath)); + +var refitModel = experimentResult.BestRun.Estimator.Fit(trainPlusValidationDataView); + +IDataView predictionsRefitOnTrainPlusValidation = refitModel.Transform(validationDataView); + +// Setting the DCG trunctation level +var rankingEvaluatorOptions = new RankingEvaluatorOptions { DcgTruncationLevel = 10 }; + +var metricsRefitOnTrainPlusValidation = mlContext.Ranking.Evaluate(predictionsRefitOnTrainPlusValidation, rankingEvaluatorOptions); +``` + +### 4. Retrain and Consume the Model + +The final step is to retrain the model using the all of the data, **training + validation + testing**. + +After the model is trained, we can use the `Predict()` API to predict the ranking of search engine results for a new, incoming user query. + +```CSharp +// Retrain the model on all of the data, train + validate + test. +// Re-fit best pipeline on train and validation data, to produce +// a model that is trained on as much data as is available while +// still having test data for the final estimate of how well the +// model will do in production. +Console.WriteLine("\n===== Refitting on train+valid and evaluating model's nDCG with test data ====="); +var trainPlusValidationDataView = textLoader.Load(new MultiFileSource(TrainDatasetPath, ValidationDatasetPath)); + +var refitModel = experimentResult.BestRun.Estimator.Fit(trainPlusValidationDataView); + +IDataView predictionsRefitOnTrainPlusValidation = refitModel.Transform(validationDataView); + +// Setting the DCG truncation level +var rankingEvaluatorOptions = new RankingEvaluatorOptions { DcgTruncationLevel = 10 }; + +var metricsRefitOnTrainPlusValidation = mlContext.Ranking.Evaluate(predictionsRefitOnTrainPlusValidation, rankingEvaluatorOptions); + +RunDetail bestRun = experimentResult.BestRun; + +ITransformer trainedModel = bestRun.Model; +_predictions = trainedModel.Transform(testDataView); + +[...] + +// In the predictions, get the scores of the search results included in the first query (e.g. group). +var searchQueries = mlContext.Data.CreateEnumerable(_predictions, reuseRowObject: false); +var firstGroupId = searchQueries.First().GroupId; +var firstGroupPredictions = searchQueries.Take(100).Where(p => p.GroupId == firstGroupId).OrderByDescending(p => p.Score).ToList(); + +// The individual scores themselves are NOT a useful measure of result quality; instead, they are only useful as a relative measure to other scores in the group. +// The scores are used to determine the ranking where a higher score indicates a higher ranking versus another candidate result. +foreach (var prediction in firstGroupPredictions) +{ + Console.WriteLine($"GroupId: {prediction.GroupId}, Score: {prediction.Score}"); +} +````` \ No newline at end of file diff --git a/samples/csharp/getting-started/Ranking_AutoML/Ranking/Ranking.sln b/samples/csharp/getting-started/Ranking_AutoML/Ranking/Ranking.sln new file mode 100644 index 000000000..331b77209 --- /dev/null +++ b/samples/csharp/getting-started/Ranking_AutoML/Ranking/Ranking.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.30509.190 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Ranking", "Ranking\Ranking.csproj", "{F25640E5-1CF4-4AAE-87E1-73E45F72C1BC}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {F25640E5-1CF4-4AAE-87E1-73E45F72C1BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {F25640E5-1CF4-4AAE-87E1-73E45F72C1BC}.Debug|Any CPU.Build.0 = Debug|Any CPU + {F25640E5-1CF4-4AAE-87E1-73E45F72C1BC}.Release|Any CPU.ActiveCfg = Release|Any CPU + {F25640E5-1CF4-4AAE-87E1-73E45F72C1BC}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {7A2EA918-DD10-41BB-B19B-F53EE61EF7D3} + EndGlobalSection +EndGlobal diff --git a/samples/csharp/getting-started/Ranking_AutoML/Ranking/Ranking/DataStructures/RankingData.cs b/samples/csharp/getting-started/Ranking_AutoML/Ranking/Ranking/DataStructures/RankingData.cs new file mode 100644 index 000000000..70916fb54 --- /dev/null +++ b/samples/csharp/getting-started/Ranking_AutoML/Ranking/Ranking/DataStructures/RankingData.cs @@ -0,0 +1,17 @@ +using Microsoft.ML.Data; + +namespace Ranking.DataStructures +{ + public class RankingData + { + [LoadColumn(0)] + public float Label { get; set; } + + [LoadColumn(1)] + public int GroupId { get; set; } + + [LoadColumn(2, 133)] + [VectorType(133)] + public float[] Features { get; set; } + } +} diff --git a/samples/csharp/getting-started/Ranking_AutoML/Ranking/Ranking/DataStructures/RankingPrediction.cs b/samples/csharp/getting-started/Ranking_AutoML/Ranking/Ranking/DataStructures/RankingPrediction.cs new file mode 100644 index 000000000..9a647c155 --- /dev/null +++ b/samples/csharp/getting-started/Ranking_AutoML/Ranking/Ranking/DataStructures/RankingPrediction.cs @@ -0,0 +1,13 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Ranking.DataStructures +{ + public class RankingPrediction + { + public uint GroupId { get; set; } + + public float Score { get; set; } + } +} diff --git a/samples/csharp/getting-started/Ranking_AutoML/Ranking/Ranking/Program.cs b/samples/csharp/getting-started/Ranking_AutoML/Ranking/Ranking/Program.cs new file mode 100644 index 000000000..0c58535d2 --- /dev/null +++ b/samples/csharp/getting-started/Ranking_AutoML/Ranking/Ranking/Program.cs @@ -0,0 +1,227 @@ +using Common; +using Microsoft.ML; +using Microsoft.ML.AutoML; +using Microsoft.ML.Data; +using Ranking.DataStructures; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Net; +using System.Net.Http.Headers; + +namespace Ranking +{ + class Program + { + const string AssetsPath = @"../../../Assets"; + const string TrainDatasetUrl = "https://aka.ms/mlnet-resources/benchmarks/MSLRWeb10KTrain720kRows.tsv"; + const string ValidationDatasetUrl = "https://aka.ms/mlnet-resources/benchmarks/MSLRWeb10KValidate240kRows.tsv"; + const string TestDatasetUrl = "https://aka.ms/mlnet-resources/benchmarks/MSLRWeb10KTest240kRows.tsv"; + + readonly static string InputPath = Path.Combine(AssetsPath, "Input"); + readonly static string OutputPath = Path.Combine(AssetsPath, "Output"); + readonly static string TrainDatasetPath = Path.Combine(InputPath, "MSLRWeb10KTrain720kRows.tsv"); + readonly static string ValidationDatasetPath = Path.Combine(InputPath, "MSLRWeb10KValidate240kRows.tsv"); + readonly static string TestDatasetPath = Path.Combine(InputPath, "MSLRWeb10KTest240kRows.tsv"); + readonly static string ModelPath = Path.Combine(OutputPath, "RankingModel.zip"); + + // Runtime should allow for the sweeping to plateau, which begins near iteration 60 + private static uint ExperimentTime = 600; + + static void Main(string[] args) + { + var mlContext = new MLContext(seed: 0); + + // Create, train, evaluate and save a model + (var predictions, var testDataView) = BuildTrainEvaluateAndSaveModel(mlContext); + + // Make a single test prediction loading the model from .ZIP file + TestSinglePrediction(mlContext, predictions, testDataView); + + Console.WriteLine("=============== End of process, hit any key to finish ==============="); + Console.ReadKey(); + } + + private static (IDataView, IDataView) BuildTrainEvaluateAndSaveModel(MLContext mlContext) + { + // STEP 1: Download and load the data + GetData(InputPath, OutputPath, TrainDatasetPath, TrainDatasetUrl, TestDatasetUrl, TestDatasetPath, + ValidationDatasetUrl, ValidationDatasetPath); + + //ColumnInferenceResults columnInference = mlContext.Auto().InferColumns(TrainDatasetPath, labelColumnIndex: 0, + // separatorChar: '\t', hasHeader: true, groupColumns: false, allowSparse: true); + + var textLoaderOptions = new TextLoader.Options + { + Separators = new[] { '\t' }, + HasHeader = true, + Columns = new[] + { + new TextLoader.Column("Label", DataKind.Single, 0), + new TextLoader.Column("GroupId", DataKind.Int32, 1), + new TextLoader.Column("Features", DataKind.Single, 2, 133), + } + }; + + TextLoader textLoader = mlContext.Data.CreateTextLoader(textLoaderOptions); + IDataView trainDataView = textLoader.Load(TrainDatasetPath); + IDataView validationDataView = textLoader.Load(ValidationDatasetPath); + IDataView testDataView = textLoader.Load(TestDatasetPath); + + // STEP 2: Display first few rows of training data + ConsoleHelper.ShowDataViewInConsole(mlContext, trainDataView); + + // STEP 3: Initialize our user-defined progress handler that AutoML will + // invoke after each model it produces and evaluates. + var progressHandler = new RankingExperimentProgressHandler(); + + // STEP 4: Run AutoML ranking experiment + ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ==============="); + Console.WriteLine($"Running AutoML ranking experiment for {ExperimentTime} seconds..."); + + var experimentSettings = new RankingExperimentSettings + { + MaxExperimentTimeInSeconds = ExperimentTime, + OptimizingMetric = RankingMetric.Ndcg, + OptimizationMetricTruncationLevel = 10 + }; + + ExperimentResult experimentResult = mlContext.Auto() + .CreateRankingExperiment(experimentSettings) + .Execute( + trainData: trainDataView, + validationData: validationDataView, + progressHandler: progressHandler); + + // Print top models found by AutoML + Console.WriteLine("\n===== Evaluating model's NDCG (on validation data) ====="); + PrintTopModels(experimentResult, experimentSettings.OptimizationMetricTruncationLevel); + + var rankingEvaluatorOptions = new RankingEvaluatorOptions + { + DcgTruncationLevel = Math.Min(10, (int)experimentSettings.OptimizationMetricTruncationLevel * 2) + }; + + Console.WriteLine("\n===== Evaluating model's NDCG (on test data) ====="); + IDataView predictions = experimentResult.BestRun.Model.Transform(testDataView); + var metrics = mlContext.Ranking.Evaluate(predictions, rankingEvaluatorOptions); + ConsoleHelper.PrintRankingMetrics(experimentResult.BestRun.TrainerName, metrics, experimentSettings.OptimizationMetricTruncationLevel); + + // STEP 5: Refit the model and get final metrics + // Re-fit best pipeline on train and validation data, to produce + // a model that is trained on as much data as is available while + // still having test data for the final estimate of how well the + // model will do in production. + Console.WriteLine("\n===== Refitting on train+valid and evaluating model's NDCG (on test data) ====="); + var trainPlusValidationDataView = textLoader.Load(new MultiFileSource(TrainDatasetPath, ValidationDatasetPath)); + var refitModel = experimentResult.BestRun.Estimator.Fit(trainPlusValidationDataView); + IDataView predictionsRefitOnTrainPlusValidation = refitModel.Transform(testDataView); + var metricsRefitOnTrainPlusValidation = mlContext.Ranking.Evaluate(predictionsRefitOnTrainPlusValidation, rankingEvaluatorOptions); + ConsoleHelper.PrintRankingMetrics(experimentResult.BestRun.TrainerName, metricsRefitOnTrainPlusValidation, experimentSettings.OptimizationMetricTruncationLevel); + + // STEP 6: Refit the model with all available data + // Re-fit best pipeline again on train, validation, and test data, to + // produce a model that is trained on as much data as is available. + // This is the final model that can be deployed to production. + // No metrics are printed since we no longer have an independent + // scoring dataset. + Console.WriteLine("\n===== Refitting on train+valid+test to get the final model to launch to production ====="); + var trainPlusValidationPlusTestDataView = textLoader.Load(new MultiFileSource(TrainDatasetPath, ValidationDatasetPath, TestDatasetPath)); + var refitModelOnTrainValidTest = experimentResult.BestRun.Estimator.Fit(trainPlusValidationPlusTestDataView); + + // STEP 7: Save/persist the trained model to a .ZIP file + mlContext.Model.Save(refitModelOnTrainValidTest, trainDataView.Schema, ModelPath); + + Console.WriteLine("The model is saved to {0}", ModelPath); + + return (predictionsRefitOnTrainPlusValidation, testDataView); + } + + private static void TestSinglePrediction(MLContext mlContext, IDataView predictions, IDataView testDataView) + { + ConsoleHelper.ConsoleWriteHeader("=============== Testing prediction engine ==============="); + + ITransformer trainedModel = mlContext.Model.Load(ModelPath, out var modelInputSchema); + Console.WriteLine($"=============== Loaded Model OK ==============="); + + // In the predictions, get the scores of the search results included in the first query (e.g. group). + var searchQueries = mlContext.Data.CreateEnumerable(predictions, reuseRowObject: false); + var firstGroupId = searchQueries.First().GroupId; + var firstGroupPredictions = searchQueries.Take(100).Where(p => p.GroupId == firstGroupId).OrderByDescending(p => p.Score).ToList(); + + // Label values from the test dataset (not the predicted scores/labels) + IEnumerator labelEnumerator = mlContext.Data.CreateEnumerable(testDataView, true) + .Select(a => a.Label).GetEnumerator(); + + // The individual scores themselves are NOT a useful measure of result quality; instead, they are only useful as a relative measure to other scores in the group. + // The scores are used to determine the ranking where a higher score indicates a higher ranking versus another candidate result. + foreach (var prediction in firstGroupPredictions) + { + labelEnumerator.MoveNext(); + Console.WriteLine($"GroupId: {prediction.GroupId}, Score: {prediction.Score}, Correct Label: {labelEnumerator.Current}"); + } + } + + private static void GetData(string inputPath, string outputPath, string trainDatasetPath, string trainDatasetUrl, + string testDatasetUrl, string testDatasetPath, string validationDatasetUrl, string validationDatasetPath) + { + Console.WriteLine("===== Prepare data =====\n"); + + if (!Directory.Exists(outputPath)) + { + Directory.CreateDirectory(outputPath); + } + + if (!Directory.Exists(inputPath)) + { + Directory.CreateDirectory(inputPath); + } + + if (!File.Exists(trainDatasetPath)) + { + Console.WriteLine("===== Downloading the train dataset - this may take several minutes =====\n"); + using (var client = new WebClient()) + { + client.DownloadFile(trainDatasetUrl, TrainDatasetPath); + } + } + + if (!File.Exists(validationDatasetPath)) + { + Console.WriteLine("===== Downloading the validation dataset - this may take several minutes =====\n"); + using (var client = new WebClient()) + { + client.DownloadFile(validationDatasetUrl, validationDatasetPath); + } + } + + if (!File.Exists(testDatasetPath)) + { + Console.WriteLine("===== Downloading the test dataset - this may take several minutes =====\n"); + using (var client = new WebClient()) + { + client.DownloadFile(testDatasetUrl, testDatasetPath); + } + } + + Console.WriteLine("===== Download is finished =====\n"); + } + + private static void PrintTopModels(ExperimentResult experimentResult, uint optimizationMetricTruncationLevel) + { + // Get top few runs ordered by NDCG + var topRuns = experimentResult.RunDetails + .Where(r => r.ValidationMetrics != null && !double.IsNaN(r.ValidationMetrics.NormalizedDiscountedCumulativeGains[(int)optimizationMetricTruncationLevel - 1])) + .OrderByDescending(r => r.ValidationMetrics.NormalizedDiscountedCumulativeGains[(int)optimizationMetricTruncationLevel - 1]).Take(5); + + Console.WriteLine($"Top models ordered by NDCG@{optimizationMetricTruncationLevel}"); + ConsoleHelper.PrintRankingMetricsHeader(); + for (var i = 0; i < topRuns.Count(); i++) + { + var run = topRuns.ElementAt(i); + ConsoleHelper.PrintIterationMetrics(i + 1, run.TrainerName, run.ValidationMetrics, run.RuntimeInSeconds); + } + } + } +} diff --git a/samples/csharp/getting-started/Ranking_AutoML/Ranking/Ranking/Ranking.csproj b/samples/csharp/getting-started/Ranking_AutoML/Ranking/Ranking/Ranking.csproj new file mode 100644 index 000000000..ca11766fe --- /dev/null +++ b/samples/csharp/getting-started/Ranking_AutoML/Ranking/Ranking/Ranking.csproj @@ -0,0 +1,22 @@ + + + + Exe + netcoreapp3.1 + + + + + + + + + + + + + + + + +