diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorization.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorization.cs index f77cc7ee8d..8c2dd2509d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorization.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorization.cs @@ -9,25 +9,32 @@ namespace Samples.Dynamic.Trainers.Recommendation public static class MatrixFactorization { - // This example requires installation of additional nuget package Microsoft.ML.Recommender. - // In this example we will create in-memory data and then use it to train - // a matrix factorization model with default parameters. Afterward, quality metrics are reported. + // This example requires installation of additional nuget package at + // for Microsoft.ML.Recommender at + // https://www.nuget.org/packages/Microsoft.ML.Recommender/ + // In this example we will create in-memory data and then use it to train + // a matrix factorization model with default parameters. Afterward, quality + // metrics are reported. public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateMatrix(); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. - var pipeline = mlContext.Recommendation().Trainers.MatrixFactorization(nameof(MatrixElement.Value), nameof(MatrixElement.MatrixColumnIndex), - nameof(MatrixElement.MatrixRowIndex), 10, 0.2, 1); + var pipeline = mlContext.Recommendation().Trainers. + MatrixFactorization(nameof(MatrixElement.Value), + nameof(MatrixElement.MatrixColumnIndex), + nameof(MatrixElement.MatrixRowIndex), 10, 0.2, 1); // Train the model. var model = pipeline.Fit(trainingData); @@ -36,11 +43,15 @@ public static void Example() var transformedData = model.Transform(trainingData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false).Take(5).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedData, + reuseRowObject: false).Take(5).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) - Console.WriteLine($"Actual value: {p.Value:F3}, Predicted score: {p.Score:F3}"); + Console.WriteLine($"Actual value: {p.Value:F3}," + + $"Predicted score: {p.Score:F3}"); // Expected output: // Actual value: 0.000, Predicted score: 1.234 @@ -50,7 +61,10 @@ public static void Example() // Actual value: 4.000, Predicted score: 2.362 // Evaluate the overall metrics - var metrics = mlContext.Regression.Evaluate(transformedData, labelColumnName: nameof(MatrixElement.Value), scoreColumnName: nameof(MatrixElement.Score)); + var metrics = mlContext.Regression.Evaluate(transformedData, + labelColumnName: nameof(MatrixElement.Value), + scoreColumnName: nameof(MatrixElement.Score)); + PrintMetrics(metrics); // Expected output: @@ -60,11 +74,15 @@ public static void Example() // RSquared: 0.61 (closer to 1 is better. The worest case is 0) } - // The following variables are used to define the shape of the example matrix. Its shape is MatrixRowCount-by-MatrixColumnCount. - // Because in ML.NET key type's minimal value is zero, the first row index is always zero in C# data structure (e.g., MatrixColumnIndex=0 - // and MatrixRowIndex=0 in MatrixElement below specifies the value at the upper-left corner in the training matrix). If user's row index - // starts with 1, their row index 1 would be mapped to the 2nd row in matrix factorization module and their first row may contain no values. - // This behavior is also true to column index. + // The following variables are used to define the shape of the example + // matrix. Its shape is MatrixRowCount-by-MatrixColumnCount. Because in + // ML.NET key type's minimal value is zero, the first row index is always + // zero in C# data structure (e.g., MatrixColumnIndex=0 and MatrixRowIndex=0 + // in MatrixElement below specifies the value at the upper-left corner in + // the training matrix). If user's row index starts with 1, their row index + // 1 would be mapped to the 2nd row in matrix factorization module and their + // first row may contain no values. This behavior is also true to column + // index. private const uint MatrixColumnCount = 60; private const uint MatrixRowCount = 100; @@ -74,32 +92,40 @@ private static List GenerateMatrix() var dataMatrix = new List(); for (uint i = 0; i < MatrixColumnCount; ++i) for (uint j = 0; j < MatrixRowCount; ++j) - dataMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = (i + j) % 5 }); + dataMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, + MatrixRowIndex = j, Value = (i + j) % 5 }); + return dataMatrix; } - // A class used to define a matrix element and capture its prediction result. + // A class used to define a matrix element and capture its prediction + // result. private class MatrixElement { - // Matrix column index. Its allowed range is from 0 to MatrixColumnCount - 1. + // Matrix column index. Its allowed range is from 0 to + // MatrixColumnCount - 1. [KeyType(MatrixColumnCount)] public uint MatrixColumnIndex { get; set; } // Matrix row index. Its allowed range is from 0 to MatrixRowCount - 1. [KeyType(MatrixRowCount)] public uint MatrixRowIndex { get; set; } - // The actual value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row. + // The actual value at the MatrixColumnIndex-th column and the + // MatrixRowIndex-th row. public float Value { get; set; } - // The predicted value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row. + // The predicted value at the MatrixColumnIndex-th column and the + // MatrixRowIndex-th row. public float Score { get; set; } } // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine("Root Mean Squared Error: " + + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorization.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorization.tt index 496330722e..0c0092d12c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorization.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorization.tt @@ -2,13 +2,18 @@ <#+ string ClassHeader = @" - // This example requires installation of additional nuget package Microsoft.ML.Recommender. - // In this example we will create in-memory data and then use it to train - // a matrix factorization model with default parameters. Afterward, quality metrics are reported."; + // This example requires installation of additional nuget package at + // for Microsoft.ML.Recommender at + // https://www.nuget.org/packages/Microsoft.ML.Recommender/ + // In this example we will create in-memory data and then use it to train + // a matrix factorization model with default parameters. Afterward, quality + // metrics are reported."; string ClassName="MatrixFactorization"; string ExtraUsing = null; -string Trainer = @"MatrixFactorization(nameof(MatrixElement.Value), nameof(MatrixElement.MatrixColumnIndex), - nameof(MatrixElement.MatrixRowIndex), 10, 0.2, 1)"; +string Trainer = @" + MatrixFactorization(nameof(MatrixElement.Value), + nameof(MatrixElement.MatrixColumnIndex), + nameof(MatrixElement.MatrixRowIndex), 10, 0.2, 1)"; string TrainerOptions = null; string ExpectedOutputPerInstance= @"// Expected output: @@ -23,4 +28,4 @@ string ExpectedOutput = @"// Expected output: // Mean Squared Error: 0.79 // Root Mean Squared Error: 0.89 // RSquared: 0.61 (closer to 1 is better. The worest case is 0)"; -#> +#> \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationTemplate.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationTemplate.ttinclude index c2ce0fe08d..1a6f593c8a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationTemplate.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationTemplate.ttinclude @@ -16,15 +16,17 @@ namespace Samples.Dynamic.Trainers.Recommendation <# } #> public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateMatrix(); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); <# if (TrainerOptions == null) { #> @@ -35,7 +37,8 @@ namespace Samples.Dynamic.Trainers.Recommendation var options = new <#=TrainerOptions#>; // Define the trainer. - var pipeline = mlContext.Recommendation().Trainers.<#=Trainer#>(options); + var pipeline = mlContext.Recommendation().Trainers.<#=Trainer#>( + options); <# } #> // Train the model. @@ -45,26 +48,37 @@ namespace Samples.Dynamic.Trainers.Recommendation var transformedData = model.Transform(trainingData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false).Take(5).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedData, + reuseRowObject: false).Take(5).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) - Console.WriteLine($"Actual value: {p.Value:F3}, Predicted score: {p.Score:F3}"); + Console.WriteLine($"Actual value: {p.Value:F3}," + + $"Predicted score: {p.Score:F3}"); <#=ExpectedOutputPerInstance#> // Evaluate the overall metrics - var metrics = mlContext.Regression.Evaluate(transformedData, labelColumnName: nameof(MatrixElement.Value), scoreColumnName: nameof(MatrixElement.Score)); + var metrics = mlContext.Regression.Evaluate(transformedData, + labelColumnName: nameof(MatrixElement.Value), + scoreColumnName: nameof(MatrixElement.Score)); + PrintMetrics(metrics); <#=ExpectedOutput#> } - // The following variables are used to define the shape of the example matrix. Its shape is MatrixRowCount-by-MatrixColumnCount. - // Because in ML.NET key type's minimal value is zero, the first row index is always zero in C# data structure (e.g., MatrixColumnIndex=0 - // and MatrixRowIndex=0 in MatrixElement below specifies the value at the upper-left corner in the training matrix). If user's row index - // starts with 1, their row index 1 would be mapped to the 2nd row in matrix factorization module and their first row may contain no values. - // This behavior is also true to column index. + // The following variables are used to define the shape of the example + // matrix. Its shape is MatrixRowCount-by-MatrixColumnCount. Because in + // ML.NET key type's minimal value is zero, the first row index is always + // zero in C# data structure (e.g., MatrixColumnIndex=0 and MatrixRowIndex=0 + // in MatrixElement below specifies the value at the upper-left corner in + // the training matrix). If user's row index starts with 1, their row index + // 1 would be mapped to the 2nd row in matrix factorization module and their + // first row may contain no values. This behavior is also true to column + // index. private const uint MatrixColumnCount = 60; private const uint MatrixRowCount = 100; @@ -74,32 +88,40 @@ namespace Samples.Dynamic.Trainers.Recommendation var dataMatrix = new List(); for (uint i = 0; i < MatrixColumnCount; ++i) for (uint j = 0; j < MatrixRowCount; ++j) - dataMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = (i + j) % 5 }); + dataMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, + MatrixRowIndex = j, Value = (i + j) % 5 }); + return dataMatrix; } - // A class used to define a matrix element and capture its prediction result. + // A class used to define a matrix element and capture its prediction + // result. private class MatrixElement { - // Matrix column index. Its allowed range is from 0 to MatrixColumnCount - 1. + // Matrix column index. Its allowed range is from 0 to + // MatrixColumnCount - 1. [KeyType(MatrixColumnCount)] public uint MatrixColumnIndex { get; set; } // Matrix row index. Its allowed range is from 0 to MatrixRowCount - 1. [KeyType(MatrixRowCount)] public uint MatrixRowIndex { get; set; } - // The actual value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row. + // The actual value at the MatrixColumnIndex-th column and the + // MatrixRowIndex-th row. public float Value { get; set; } - // The predicted value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row. + // The predicted value at the MatrixColumnIndex-th column and the + // MatrixRowIndex-th row. public float Score { get; set; } } // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine("Root Mean Squared Error: " + + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationWithOptions.cs index 45ea6813af..4d27300dfb 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationWithOptions.cs @@ -10,44 +10,55 @@ namespace Samples.Dynamic.Trainers.Recommendation public static class MatrixFactorizationWithOptions { - // This example requires installation of additional nuget package Microsoft.ML.Recommender. - // In this example we will create in-memory data and then use it to train - // a matrix factorization model with non-default parameters. Afterward, quality metrics are reported. + // This example requires installation of additional nuget package at + // for Microsoft.ML.Recommender at + // https://www.nuget.org/packages/Microsoft.ML.Recommender/ + // In this example we will create in-memory data and then use it to train + // a matrix factorization model with default parameters. Afterward, quality + // metrics are reported. public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateMatrix(); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. var options = new MatrixFactorizationTrainer.Options { - // Specify IDataView colum which stores matrix column indexes. - MatrixColumnIndexColumnName = nameof(MatrixElement.MatrixColumnIndex), - // Specify IDataView colum which stores matrix row indexes. + // Specify IDataView colum which stores matrix column indexes. + MatrixColumnIndexColumnName = nameof(MatrixElement.MatrixColumnIndex + ), + + // Specify IDataView colum which stores matrix row indexes. MatrixRowIndexColumnName = nameof(MatrixElement.MatrixRowIndex), - // Specify IDataView colum which stores matrix elements' values. + // Specify IDataView colum which stores matrix elements' values. LabelColumnName = nameof(MatrixElement.Value), - // Time of going through the entire data set once. + // Time of going through the entire data set once. NumberOfIterations = 10, - // Number of threads used to run this trainers. + // Number of threads used to run this trainers. NumberOfThreads = 1, - // The rank of factor matrices. Note that the product of the two factor matrices approximates the training matrix. + // The rank of factor matrices. Note that the product of the two + // factor matrices approximates the training matrix. ApproximationRank = 32, - // Step length when moving toward stochastic gradient. Training algorithm may adjust it for faster convergence. - // Note that faster convergence means we can use less iterations to achieve similar test scores. + // Step length when moving toward stochastic gradient. Training + // algorithm may adjust it for faster convergence. Note that faster + // convergence means we can use less iterations to achieve similar + // test scores. LearningRate = 0.3 }; // Define the trainer. - var pipeline = mlContext.Recommendation().Trainers.MatrixFactorization(options); + var pipeline = mlContext.Recommendation().Trainers.MatrixFactorization( + options); // Train the model. var model = pipeline.Fit(trainingData); @@ -56,11 +67,15 @@ public static void Example() var transformedData = model.Transform(trainingData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false).Take(5).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedData, + reuseRowObject: false).Take(5).ToList(); - // Look at 5 predictions for the Label, side by side with the actual Label for comparison. + // Look at 5 predictions for the Label, side by side with the actual + // Label for comparison. foreach (var p in predictions) - Console.WriteLine($"Actual value: {p.Value:F3}, Predicted score: {p.Score:F3}"); + Console.WriteLine($"Actual value: {p.Value:F3}," + + $"Predicted score: {p.Score:F3}"); // Expected output: // Actual value: 0.000, Predicted score: 0.031 @@ -70,7 +85,10 @@ public static void Example() // Actual value: 4.000, Predicted score: 3.176 // Evaluate the overall metrics - var metrics = mlContext.Regression.Evaluate(transformedData, labelColumnName: nameof(MatrixElement.Value), scoreColumnName: nameof(MatrixElement.Score)); + var metrics = mlContext.Regression.Evaluate(transformedData, + labelColumnName: nameof(MatrixElement.Value), + scoreColumnName: nameof(MatrixElement.Score)); + PrintMetrics(metrics); // Expected output: @@ -80,11 +98,15 @@ public static void Example() // RSquared: 0.97 (closer to 1 is better. The worest case is 0) } - // The following variables are used to define the shape of the example matrix. Its shape is MatrixRowCount-by-MatrixColumnCount. - // Because in ML.NET key type's minimal value is zero, the first row index is always zero in C# data structure (e.g., MatrixColumnIndex=0 - // and MatrixRowIndex=0 in MatrixElement below specifies the value at the upper-left corner in the training matrix). If user's row index - // starts with 1, their row index 1 would be mapped to the 2nd row in matrix factorization module and their first row may contain no values. - // This behavior is also true to column index. + // The following variables are used to define the shape of the example + // matrix. Its shape is MatrixRowCount-by-MatrixColumnCount. Because in + // ML.NET key type's minimal value is zero, the first row index is always + // zero in C# data structure (e.g., MatrixColumnIndex=0 and MatrixRowIndex=0 + // in MatrixElement below specifies the value at the upper-left corner in + // the training matrix). If user's row index starts with 1, their row index + // 1 would be mapped to the 2nd row in matrix factorization module and their + // first row may contain no values. This behavior is also true to column + // index. private const uint MatrixColumnCount = 60; private const uint MatrixRowCount = 100; @@ -94,32 +116,40 @@ private static List GenerateMatrix() var dataMatrix = new List(); for (uint i = 0; i < MatrixColumnCount; ++i) for (uint j = 0; j < MatrixRowCount; ++j) - dataMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = (i + j) % 5 }); + dataMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, + MatrixRowIndex = j, Value = (i + j) % 5 }); + return dataMatrix; } - // A class used to define a matrix element and capture its prediction result. + // A class used to define a matrix element and capture its prediction + // result. private class MatrixElement { - // Matrix column index. Its allowed range is from 0 to MatrixColumnCount - 1. + // Matrix column index. Its allowed range is from 0 to + // MatrixColumnCount - 1. [KeyType(MatrixColumnCount)] public uint MatrixColumnIndex { get; set; } // Matrix row index. Its allowed range is from 0 to MatrixRowCount - 1. [KeyType(MatrixRowCount)] public uint MatrixRowIndex { get; set; } - // The actual value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row. + // The actual value at the MatrixColumnIndex-th column and the + // MatrixRowIndex-th row. public float Value { get; set; } - // The predicted value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row. + // The predicted value at the MatrixColumnIndex-th column and the + // MatrixRowIndex-th row. public float Score { get; set; } } // Print some evaluation metrics to regression problems. private static void PrintMetrics(RegressionMetrics metrics) { - Console.WriteLine($"Mean Absolute Error: {metrics.MeanAbsoluteError:F2}"); - Console.WriteLine($"Mean Squared Error: {metrics.MeanSquaredError:F2}"); - Console.WriteLine($"Root Mean Squared Error: {metrics.RootMeanSquaredError:F2}"); - Console.WriteLine($"RSquared: {metrics.RSquared:F2}"); + Console.WriteLine("Mean Absolute Error: " + metrics.MeanAbsoluteError); + Console.WriteLine("Mean Squared Error: " + metrics.MeanSquaredError); + Console.WriteLine("Root Mean Squared Error: " + + metrics.RootMeanSquaredError); + + Console.WriteLine("RSquared: " + metrics.RSquared); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationWithOptions.tt index e4d483fcbe..63aadfaf35 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationWithOptions.tt @@ -2,28 +2,36 @@ <#+ string ClassHeader = @" - // This example requires installation of additional nuget package Microsoft.ML.Recommender. - // In this example we will create in-memory data and then use it to train - // a matrix factorization model with non-default parameters. Afterward, quality metrics are reported."; + // This example requires installation of additional nuget package at + // for Microsoft.ML.Recommender at + // https://www.nuget.org/packages/Microsoft.ML.Recommender/ + // In this example we will create in-memory data and then use it to train + // a matrix factorization model with default parameters. Afterward, quality + // metrics are reported."; string ClassName="MatrixFactorizationWithOptions"; string ExtraUsing = "using Microsoft.ML.Trainers;"; string Trainer = "MatrixFactorization"; string TrainerOptions = @"MatrixFactorizationTrainer.Options { - // Specify IDataView colum which stores matrix column indexes. - MatrixColumnIndexColumnName = nameof(MatrixElement.MatrixColumnIndex), - // Specify IDataView colum which stores matrix row indexes. + // Specify IDataView colum which stores matrix column indexes. + MatrixColumnIndexColumnName = nameof(MatrixElement.MatrixColumnIndex + ), + + // Specify IDataView colum which stores matrix row indexes. MatrixRowIndexColumnName = nameof(MatrixElement.MatrixRowIndex), - // Specify IDataView colum which stores matrix elements' values. + // Specify IDataView colum which stores matrix elements' values. LabelColumnName = nameof(MatrixElement.Value), - // Time of going through the entire data set once. + // Time of going through the entire data set once. NumberOfIterations = 10, - // Number of threads used to run this trainers. + // Number of threads used to run this trainers. NumberOfThreads = 1, - // The rank of factor matrices. Note that the product of the two factor matrices approximates the training matrix. + // The rank of factor matrices. Note that the product of the two + // factor matrices approximates the training matrix. ApproximationRank = 32, - // Step length when moving toward stochastic gradient. Training algorithm may adjust it for faster convergence. - // Note that faster convergence means we can use less iterations to achieve similar test scores. + // Step length when moving toward stochastic gradient. Training + // algorithm may adjust it for faster convergence. Note that faster + // convergence means we can use less iterations to achieve similar + // test scores. LearningRate = 0.3 }"; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/OneClassMatrixFactorizationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/OneClassMatrixFactorizationWithOptions.cs index 57cb7f2e6b..2553efe5b0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/OneClassMatrixFactorizationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/OneClassMatrixFactorizationWithOptions.cs @@ -9,60 +9,80 @@ namespace Samples.Dynamic.Trainers.Recommendation { public static class OneClassMatrixFactorizationWithOptions { - // This example shows the use of ML.NET's one-class matrix factorization module which implements a coordinate descent method - // described in Algorithm 1 in a paper. - // See page 28 in of slides for a brief introduction to - // one-class matrix factorization. - // In this example we will create in-memory data and then use it to train a one-class matrix factorization model. - // Afterward, prediction values are reported. - // To run this example, it requires installation of additional nuget package - // Microsoft.ML.Recommender. + // This example shows the use of ML.NET's one-class matrix factorization + // module which implements a coordinate descent method described in + // Algorithm 1 in the paper found at + // https://www.csie.ntu.edu.tw/~cjlin/papers/one-class-mf/biased-mf-sdm-with-supp.pdf + // See page 28 in of the slides + // at https://www.csie.ntu.edu.tw/~cjlin/talks/facebook.pdf for a brief + // introduction to one-class matrix factorization. + // In this example we will create in-memory data and then use it to train a + // one-class matrix factorization model. Afterward, prediction values are + // reported. To run this example, it requires installation of additional + // nuget package Microsoft.ML.Recommender found at + // https://www.nuget.org/packages/Microsoft.ML.Recommender/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. var mlContext = new MLContext(seed: 0); // Get a small in-memory dataset. - GetOneClassMatrix(out List data, out List testData); + GetOneClassMatrix(out List data, + out List testData); - // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it. + // Convert the in-memory matrix into an IDataView so that ML.NET + // components can consume it. var dataView = mlContext.Data.LoadFromEnumerable(data); - // Create a matrix factorization trainer which takes "Value" as the training label, "MatrixColumnIndex" as the - // matrix's column index, and "MatrixRowIndex" as the matrix's row index. Here nameof(...) is used to extract field + // Create a matrix factorization trainer which takes "Value" as the + // training label, "MatrixColumnIndex" as the matrix's column index, and + // "MatrixRowIndex" as the matrix's row index. Here nameof(...) is used + // to extract field // names' in MatrixElement class. var options = new MatrixFactorizationTrainer.Options { - MatrixColumnIndexColumnName = nameof(MatrixElement.MatrixColumnIndex), - MatrixRowIndexColumnName = nameof(MatrixElement.MatrixRowIndex), - LabelColumnName = nameof(MatrixElement.Value), - NumberOfIterations = 20, - NumberOfThreads = 8, - ApproximationRank = 32, - Alpha = 1, - // The desired values of matrix elements not specified in the training set. - // If the training set doesn't tell the value at the u-th row and v-th column, - // its desired value would be set 0.15. In other words, this parameter determines - // the value of all missing matrix elements. + MatrixColumnIndexColumnName = nameof( + MatrixElement.MatrixColumnIndex), + MatrixRowIndexColumnName = nameof(MatrixElement.MatrixRowIndex), + LabelColumnName = nameof(MatrixElement.Value), + NumberOfIterations = 20, + NumberOfThreads = 8, + ApproximationRank = 32, + Alpha = 1, + + // The desired values of matrix elements not specified in the + // training set. If the training set doesn't tell the value at the + // u -th row and v-th column, its desired value would be set 0.15. + // In other words, this parameter determines the value of all + // missing matrix elements. C = 0.15, // This argument enables one-class matrix factorization. - LossFunction = MatrixFactorizationTrainer.LossFunctionType.SquareLossOneClass + LossFunction = MatrixFactorizationTrainer.LossFunctionType + .SquareLossOneClass }; - var pipeline = mlContext.Recommendation().Trainers.MatrixFactorization(options); + var pipeline = mlContext.Recommendation().Trainers.MatrixFactorization( + options); // Train a matrix factorization model. var model = pipeline.Fit(dataView); - // Apply the trained model to the test set. Notice that training is a partial - var prediction = model.Transform(mlContext.Data.LoadFromEnumerable(testData)); + // Apply the trained model to the test set. Notice that training is a + // partial + var prediction = model.Transform(mlContext.Data.LoadFromEnumerable( + testData)); - var results = mlContext.Data.CreateEnumerable(prediction, false).ToList(); - // Feed the test data into the model and then iterate through a few predictions. + var results = mlContext.Data.CreateEnumerable(prediction, + false).ToList(); + // Feed the test data into the model and then iterate through a few + // predictions. foreach (var pred in results.Take(15)) - Console.WriteLine($"Predicted value at row {pred.MatrixRowIndex - 1} and column {pred.MatrixColumnIndex - 1} is " + - $"{pred.Score} and its expected value is {pred.Value}."); + Console.WriteLine($"Predicted value at row " + + $"{pred.MatrixRowIndex - 1} and column " + + $"{pred.MatrixColumnIndex - 1} is {pred.Score} and its " + + $"expected value is {pred.Value}."); // Expected output similar to: // Predicted value at row 0 and column 0 is 0.9873335 and its expected value is 1. @@ -81,17 +101,24 @@ public static void Example() // Predicted value at row 13 and column 0 is 0.1499254 and its expected value is 0.15. // Predicted value at row 14 and column 0 is 0.1499074 and its expected value is 0.15. // - // Note: use the advanced options constructor to set the number of threads to 1 for a deterministic behavior. + // Note: use the advanced options constructor to set the number of + // threads to 1 for a deterministic behavior. - // Assume that row index is user ID and column index game ID, the following list contains the games recommended by the trained model. - // Note that sometime, you may want to exclude training data from your predicted results because those would represent games that - // were already purchased. - // The variable topColumns stores two matrix elements with the highest predicted scores on the 1st row. - var topColumns = results.Where(element => element.MatrixRowIndex == 1).OrderByDescending(element => element.Score).Take(2); + // Assume that row index is user ID and column index game ID, the + // following list contains the games recommended by the trained model. + // Note that sometime, you may want to exclude training data from your + // predicted results because those would represent games that were + // already purchased. The variable topColumns stores two matrix elements + // with the highest predicted scores on the 1st row. + var topColumns = results.Where(element => element.MatrixRowIndex == 1) + .OrderByDescending(element => element.Score).Take(2); Console.WriteLine("Top 2 predictions on the 1st row:"); foreach (var top in topColumns) - Console.WriteLine($"Predicted value at row {top.MatrixRowIndex - 1} and column {top.MatrixColumnIndex - 1} is {top.Score} and its expected value is {top.Value}."); + Console.WriteLine($"Predicted value at row " + + $"{top.MatrixRowIndex - 1} and column " + + $"{top.MatrixColumnIndex - 1} is {top.Score} and its " + + $"expected value is {top.Value}."); // Expected output similar to: // Top 2 predictions at the 2nd row: @@ -99,10 +126,14 @@ public static void Example() // Predicted value at row 0 and column 10 is 0.9871138 and its expected value is 1. } - // The following variables defines the shape of a matrix. Its shape is _synthesizedMatrixRowCount-by-_synthesizedMatrixColumnCount. - // Because in ML.NET key type's minimal value is zero, the first row index is always zero in C# data structure (e.g., MatrixColumnIndex=0 - // and MatrixRowIndex=0 in MatrixElement below specifies the value at the upper-left corner in the training matrix). If user's row index - // starts with 1, their row index 1 would be mapped to the 2nd row in matrix factorization module and their first row may contain no values. + // The following variables defines the shape of a matrix. Its shape is + // _synthesizedMatrixRowCount-by-_synthesizedMatrixColumnCount. + // Because in ML.NET key type's minimal value is zero, the first row index + // is always zero in C# data structure (e.g., MatrixColumnIndex=0 and + // MatrixRowIndex=0 in MatrixElement below specifies the value at the + // upper-left corner in the training matrix). If user's row index + // starts with 1, their row index 1 would be mapped to the 2nd row in matrix + // factorization module and their first row may contain no values. // This behavior is also true to column index. private const uint _synthesizedMatrixColumnCount = 60; private const uint _synthesizedMatrixRowCount = 100; @@ -110,25 +141,33 @@ public static void Example() // A data structure used to encode a single value in matrix private class MatrixElement { - // Matrix column index. Its allowed range is from 0 to _synthesizedMatrixColumnCount - 1. + // Matrix column index. Its allowed range is from 0 to + // _synthesizedMatrixColumnCount - 1. [KeyType(_synthesizedMatrixColumnCount)] public uint MatrixColumnIndex { get; set; } - // Matrix row index. Its allowed range is from 0 to _synthesizedMatrixRowCount - 1. + // Matrix row index. Its allowed range is from 0 to + // _synthesizedMatrixRowCount - 1. [KeyType(_synthesizedMatrixRowCount)] public uint MatrixRowIndex { get; set; } - // The value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row. + // The value at the MatrixColumnIndex-th column and the + // MatrixRowIndex-th row. public float Value { get; set; } - // The predicted value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row. + // The predicted value at the MatrixColumnIndex-th column and the + // MatrixRowIndex-th row. public float Score { get; set; } } - // Create an in-memory matrix as a list of tuples (column index, row index, value). Notice that one-class matrix - // factorization handle scenerios where only positive signals (e.g., on Facebook, only likes are recorded and no dislike before) - // can be observed so that all values are set to 1. - private static void GetOneClassMatrix(out List observedMatrix, out List fullMatrix) + // Create an in-memory matrix as a list of tuples (column index, row index, + // value). Notice that one-class matrix factorization handle scenerios where + // only positive signals (e.g., on Facebook, only likes are recorded and no + // dislike before) can be observed so that all values are set to 1. + private static void GetOneClassMatrix( + out List observedMatrix, + out List fullMatrix) { - // The matrix factorization model will be trained only using observedMatrix but we will see it can learn all information - // carried in fullMatrix. + // The matrix factorization model will be trained only using + // observedMatrix but we will see it can learn all information carried + // sin fullMatrix. observedMatrix = new List(); fullMatrix = new List(); for (uint i = 0; i < _synthesizedMatrixColumnCount; ++i) @@ -137,13 +176,32 @@ private static void GetOneClassMatrix(out List observedMatrix, ou if ((i + j) % 10 == 0) { // Set observed elements' values to 1 (means like). - observedMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = 1, Score = 0 }); - fullMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = 1, Score = 0 }); + observedMatrix.Add(new MatrixElement() + { + MatrixColumnIndex = i, + MatrixRowIndex = j, + Value = 1, + Score = 0 + }); + fullMatrix.Add(new MatrixElement() + { + MatrixColumnIndex = i, + MatrixRowIndex = j, + Value = 1, + Score = 0 + }); } else - // Set unobserved elements' values to 0.15, a value smaller than observed values (means dislike). - fullMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = 0.15f, Score = 0 }); + // Set unobserved elements' values to 0.15, a value smaller + // than observed values (means dislike). + fullMatrix.Add(new MatrixElement() + { + MatrixColumnIndex = i, + MatrixRowIndex = j, + Value = 0.15f, + Score = 0 + }); } } } -} +} \ No newline at end of file diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/TreeEnsembleFeaturizerTest.cs b/test/Microsoft.ML.Tests/TrainerEstimators/TreeEnsembleFeaturizerTest.cs index ea8026f99e..71253a32b3 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/TreeEnsembleFeaturizerTest.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/TreeEnsembleFeaturizerTest.cs @@ -812,7 +812,7 @@ public void TreeEnsembleFeaturizingPipelineMulticlass() private class RowWithKey { - [KeyType()] + [KeyType(4)] public uint KeyLabel { get; set; } }