From b2cdfb101941f764a357c03caddac09fd5e0894f Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Wed, 10 Apr 2019 16:09:17 -0700 Subject: [PATCH 1/3] Add a sample for OCMF Add missing file --- .../OneClassMatrixFactorizationWithOptions.cs | 135 ++++++++++++++++++ .../RecommenderCatalog.cs | 1 + .../MatrixFactorizationTests.cs | 114 +++++++++++++++ 3 files changed, 250 insertions(+) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/OneClassMatrixFactorizationWithOptions.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/OneClassMatrixFactorizationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/OneClassMatrixFactorizationWithOptions.cs new file mode 100644 index 0000000000..3b4e92ed89 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/OneClassMatrixFactorizationWithOptions.cs @@ -0,0 +1,135 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; +using Microsoft.ML.Trainers; + +namespace Samples.Dynamic.Trainers.Recommendation +{ + public static class OneClassMatrixFactorizationWithOptions + { + // This example shows the use of ML.NET's one-class matrix factorization module which implements + // Algorithm 1 in a paper. + // See page 28 in of slides for a brief introduction to + // one-class matrix factorization. + // In this example we will create in-memory data and then use it to train a one-class matrix factorization model. + // Afterward, prediction values are reported. + // To run this example, it requires installation of additional nuget package + // Microsoft.ML.Recommender. + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(seed: 0); + + // Get a small in-memory dataset. + GetOneClassMatrix(out List data, out List testData); + + // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it. + var dataView = mlContext.Data.LoadFromEnumerable(data); + + // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the + // matrix's column index, and "MatrixRowIndex" as the matrix's row index. Here nameof(...) is used to extract field + // names' in MatrixElement class. + var options = new MatrixFactorizationTrainer.Options + { + MatrixColumnIndexColumnName = nameof(MatrixElement.MatrixColumnIndex), + MatrixRowIndexColumnName = nameof(MatrixElement.MatrixRowIndex), + LabelColumnName = nameof(MatrixElement.Value), + NumberOfIterations = 20, + NumberOfThreads = 8, + ApproximationRank = 32, + Alpha = 1, + // The desired of unobserved values. + C = 0.15, + // To enable one-class matrix factorization, the following line is required. + LossFunction = MatrixFactorizationTrainer.LossFunctionType.SquareLossOneClass + }; + + var pipeline = mlContext.Recommendation().Trainers.MatrixFactorization(options); + + // Train a matrix factorization model. + var model = pipeline.Fit(dataView); + + // Apply the trained model to the test set. Notice that training is a partial + var prediction = model.Transform(mlContext.Data.LoadFromEnumerable(testData)); + + var results = mlContext.Data.CreateEnumerable(prediction, false).ToList(); + // Feed the test data into the model and then iterate through a few predictions. + foreach (var pred in results.Take(15)) + Console.WriteLine($"Predicted value at row {pred.MatrixRowIndex - 1} and column {pred.MatrixColumnIndex - 1} is {pred.Score} and its expected value is {pred.Value}."); + + // Expected output similar to: + // Predicted value at row 0 and column 0 is 0.9873335 and its expected value is 1. + // Predicted value at row 1 and column 0 is 0.1499522 and its expected value is 0.15. + // Predicted value at row 2 and column 0 is 0.1499791 and its expected value is 0.15. + // Predicted value at row 3 and column 0 is 0.1499254 and its expected value is 0.15. + // Predicted value at row 4 and column 0 is 0.1499074 and its expected value is 0.15. + // Predicted value at row 5 and column 0 is 0.1499968 and its expected value is 0.15. + // Predicted value at row 6 and column 0 is 0.1499791 and its expected value is 0.15. + // Predicted value at row 7 and column 0 is 0.1499805 and its expected value is 0.15. + // Predicted value at row 8 and column 0 is 0.1500055 and its expected value is 0.15. + // Predicted value at row 9 and column 0 is 0.1499199 and its expected value is 0.15. + // Predicted value at row 10 and column 0 is 0.9873335 and its expected value is 1. + // Predicted value at row 11 and column 0 is 0.1499522 and its expected value is 0.15. + // Predicted value at row 12 and column 0 is 0.1499791 and its expected value is 0.15. + // Predicted value at row 13 and column 0 is 0.1499254 and its expected value is 0.15. + // Predicted value at row 14 and column 0 is 0.1499074 and its expected value is 0.15. + // + // Note: use the advanced options constructor to set the number of threads to 1 for a deterministic behavior. + + // Two columns with highest predicted score to the 2nd row (indexed by 1). If we view row index as user ID and column as game ID, + // the following list contains the games recommended by the trained model. Note that sometime, you may want to exclude training + // data from your predicted results because those games were already purchased. + var topColumns = results.Where(element => element.MatrixRowIndex == 1).OrderByDescending(element => element.Score).Take(2); + } + + // The following variables defines the shape of a matrix. Its shape is _synthesizedMatrixRowCount-by-_synthesizedMatrixColumnCount. + // Because in ML.NET key type's minimal value is zero, the first row index is always zero in C# data structure (e.g., MatrixColumnIndex=0 + // and MatrixRowIndex=0 in MatrixElement below specifies the value at the upper-left corner in the training matrix). If user's row index + // starts with 1, their row index 1 would be mapped to the 2nd row in matrix factorization module and their first row may contain no values. + // This behavior is also true to column index. + private const uint _synthesizedMatrixColumnCount = 60; + private const uint _synthesizedMatrixRowCount = 100; + + // A data structure used to encode a single value in matrix + private class MatrixElement + { + // Matrix column index. Its allowed range is from 0 to _synthesizedMatrixColumnCount - 1. + [KeyType(_synthesizedMatrixColumnCount)] + public uint MatrixColumnIndex { get; set; } + // Matrix row index. Its allowed range is from 0 to _synthesizedMatrixRowCount - 1. + [KeyType(_synthesizedMatrixRowCount)] + public uint MatrixRowIndex { get; set; } + // The value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row. + public float Value { get; set; } + // The predicted value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row. + public float Score { get; set; } + } + + // Create an in-memory matrix as a list of tuples (column index, row index, value). Notice that one-class matrix + // factorization handle scenerios where only positive signals (e.g., on Facebook, only likes are recorded and no dislike before) + // can be observed so that all values are set to 1. + private static void GetOneClassMatrix(out List observedMatrix, out List fullMatrix) + { + // The matrix factorization model will be trained only using observedMatrix but we will see it can learn all information + // carried in fullMatrix. + observedMatrix = new List(); + fullMatrix = new List(); + for (uint i = 0; i < _synthesizedMatrixColumnCount; ++i) + for (uint j = 0; j < _synthesizedMatrixRowCount; ++j) + { + if ((i + j) % 10 == 0) + { + // Set observed elements' values to 1 (means like). + observedMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = 1, Score = 0 }); + fullMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = 1, Score = 0 }); + } + else + // Set unobserved elements' values to 0.15, a value smaller than observed values (means dislike). + fullMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = 0.15f, Score = 0 }); + } + } + } +} diff --git a/src/Microsoft.ML.Recommender/RecommenderCatalog.cs b/src/Microsoft.ML.Recommender/RecommenderCatalog.cs index 8b2fe169f6..81d896f658 100644 --- a/src/Microsoft.ML.Recommender/RecommenderCatalog.cs +++ b/src/Microsoft.ML.Recommender/RecommenderCatalog.cs @@ -87,6 +87,7 @@ public MatrixFactorizationTrainer MatrixFactorization( /// /// /// public MatrixFactorizationTrainer MatrixFactorization( diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs index 23289dcd48..871d56888f 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs @@ -603,5 +603,119 @@ public void OneClassMatrixFactorizationWithUnseenColumnAndRow() CompareNumbersWithTolerance(0.05511549, testResults[1].Score, digitsOfPrecision: 5); CompareNumbersWithTolerance(0.00316973357, testResults[2].Score, digitsOfPrecision: 5); } + + [MatrixFactorizationFact] + public void OneClassMatrixFactorizationSample() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(seed: 0); + + // Get a small in-memory dataset. + GetOneClassMatrix(out List data, out List testData); + + // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it. + var dataView = mlContext.Data.LoadFromEnumerable(data); + + // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the + // matrix's column index, and "MatrixRowIndex" as the matrix's row index. Here nameof(...) is used to extract field + // names' in MatrixElement class. + var options = new MatrixFactorizationTrainer.Options + { + MatrixColumnIndexColumnName = nameof(OneClassMatrixElement.MatrixColumnIndex), + MatrixRowIndexColumnName = nameof(OneClassMatrixElement.MatrixRowIndex), + LabelColumnName = nameof(OneClassMatrixElement.Value), + NumberOfIterations = 20, + NumberOfThreads = 8, + ApproximationRank = 32, + Alpha = 1, + // The desired of unobserved values. + C = 0.15, + // To enable one-class matrix factorization, the following line is required. + LossFunction = MatrixFactorizationTrainer.LossFunctionType.SquareLossOneClass + }; + + var pipeline = mlContext.Recommendation().Trainers.MatrixFactorization(options); + + // Train a matrix factorization model. + var model = pipeline.Fit(dataView); + + // Apply the trained model to the test set. Notice that training is a partial + var prediction = model.Transform(mlContext.Data.LoadFromEnumerable(testData)); + + var results = mlContext.Data.CreateEnumerable(prediction, false).ToList(); + + Assert.Equal(6000, results.Count); + + var firstElement = results.First(); + var lastElement = results.Last(); + + Assert.Equal(1u, firstElement.MatrixColumnIndex); + Assert.Equal(1u, firstElement.MatrixRowIndex); + Assert.Equal(0.987113833, firstElement.Score, 3); + Assert.Equal(1, firstElement.Value, 3); + + Assert.Equal(60u, lastElement.MatrixColumnIndex); + Assert.Equal(100u, lastElement.MatrixRowIndex); + Assert.Equal(0.149993762, lastElement.Score, 3); + Assert.Equal(0.15, lastElement.Value, 3); + + // Two columns with highest predicted score to the 2nd row (indexed by 1). If we view row index as user ID and column as game ID, + // the following list contains the games recommended by the trained model. Note that sometime, you may want to exclude training + // data from your predicted results because those games were already purchased. + var topColumns = results.Where(element => element.MatrixRowIndex == 1).OrderByDescending(element => element.Score).Take(2); + + firstElement = topColumns.First(); + lastElement = topColumns.Last(); + + Assert.Equal(1u, firstElement.MatrixColumnIndex); + Assert.Equal(1u, firstElement.MatrixRowIndex); + Assert.Equal(0.987113833, firstElement.Score, 3); + Assert.Equal(1, firstElement.Value, 3); + + Assert.Equal(11u, lastElement.MatrixColumnIndex); + Assert.Equal(1u, lastElement.MatrixRowIndex); + Assert.Equal(0.987113833, lastElement.Score, 3); + Assert.Equal(1, lastElement.Value, 3); + } + + // A data structure used to encode a single value in matrix + private class OneClassMatrixElement + { + // Matrix column index. Its allowed range is from 0 to _synthesizedMatrixColumnCount - 1. + [KeyType(_synthesizedMatrixColumnCount)] + public uint MatrixColumnIndex { get; set; } + // Matrix row index. Its allowed range is from 0 to _synthesizedMatrixRowCount - 1. + [KeyType(_synthesizedMatrixRowCount)] + public uint MatrixRowIndex { get; set; } + // The value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row. + public float Value { get; set; } + // The predicted value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row. + public float Score { get; set; } + } + + // Create an in-memory matrix as a list of tuples (column index, row index, value). Notice that one-class matrix + // factorization handle scenerios where only positive signals (e.g., on Facebook, only likes are recorded and no dislike before) + // can be observed so that all values are set to 1. + private static void GetOneClassMatrix(out List observedMatrix, out List fullMatrix) + { + // The matrix factorization model will be trained only using observedMatrix but we will see it can learn all information + // carried in fullMatrix. + observedMatrix = new List(); + fullMatrix = new List(); + for (uint i = 0; i < _synthesizedMatrixColumnCount; ++i) + for (uint j = 0; j < _synthesizedMatrixRowCount; ++j) + { + if ((i + j) % 10 == 0) + { + // Set observed elements' values to 1 (means like). + observedMatrix.Add(new OneClassMatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = 1, Score = 0 }); + fullMatrix.Add(new OneClassMatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = 1, Score = 0 }); + } + else + // Set unobserved elements' values to 0.15, a value smaller than observed values (means dislike). + fullMatrix.Add(new OneClassMatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = 0.15f, Score = 0 }); + } + } } } \ No newline at end of file From b83ffbe8604be3e2fa46761a0d5c9647bcc67ff9 Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Thu, 11 Apr 2019 10:26:58 -0700 Subject: [PATCH 2/3] Address comments --- .../OneClassMatrixFactorizationWithOptions.cs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/OneClassMatrixFactorizationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/OneClassMatrixFactorizationWithOptions.cs index 3b4e92ed89..fe621b017b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/OneClassMatrixFactorizationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/OneClassMatrixFactorizationWithOptions.cs @@ -83,6 +83,15 @@ public static void Example() // the following list contains the games recommended by the trained model. Note that sometime, you may want to exclude training // data from your predicted results because those games were already purchased. var topColumns = results.Where(element => element.MatrixRowIndex == 1).OrderByDescending(element => element.Score).Take(2); + + Console.WriteLine("Top 2 predictions at the 1nd row:"); + foreach (var top in topColumns) + Console.WriteLine($"Predicted value at row {top.MatrixRowIndex - 1} and column {top.MatrixColumnIndex - 1} is {top.Score} and its expected value is {top.Value}."); + + // Expected output similar to: + // Top 2 predictions at the 2nd row: + // Predicted value at row 0 and column 0 is 0.9871138 and its expected value is 1. + // Predicted value at row 0 and column 10 is 0.9871138 and its expected value is 1. } // The following variables defines the shape of a matrix. Its shape is _synthesizedMatrixRowCount-by-_synthesizedMatrixColumnCount. From a3177596eb60b33508116a9d5c1e5d3cd379d632 Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Thu, 11 Apr 2019 13:08:39 -0700 Subject: [PATCH 3/3] Address comments --- .../OneClassMatrixFactorizationWithOptions.cs | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/OneClassMatrixFactorizationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/OneClassMatrixFactorizationWithOptions.cs index fe621b017b..57cb7f2e6b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/OneClassMatrixFactorizationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/OneClassMatrixFactorizationWithOptions.cs @@ -9,8 +9,8 @@ namespace Samples.Dynamic.Trainers.Recommendation { public static class OneClassMatrixFactorizationWithOptions { - // This example shows the use of ML.NET's one-class matrix factorization module which implements - // Algorithm 1 in a paper. + // This example shows the use of ML.NET's one-class matrix factorization module which implements a coordinate descent method + // described in Algorithm 1 in a paper. // See page 28 in of slides for a brief introduction to // one-class matrix factorization. // In this example we will create in-memory data and then use it to train a one-class matrix factorization model. @@ -29,7 +29,7 @@ public static void Example() // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it. var dataView = mlContext.Data.LoadFromEnumerable(data); - // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the + // Create a matrix factorization trainer which takes "Value" as the training label, "MatrixColumnIndex" as the // matrix's column index, and "MatrixRowIndex" as the matrix's row index. Here nameof(...) is used to extract field // names' in MatrixElement class. var options = new MatrixFactorizationTrainer.Options @@ -41,9 +41,12 @@ public static void Example() NumberOfThreads = 8, ApproximationRank = 32, Alpha = 1, - // The desired of unobserved values. + // The desired values of matrix elements not specified in the training set. + // If the training set doesn't tell the value at the u-th row and v-th column, + // its desired value would be set 0.15. In other words, this parameter determines + // the value of all missing matrix elements. C = 0.15, - // To enable one-class matrix factorization, the following line is required. + // This argument enables one-class matrix factorization. LossFunction = MatrixFactorizationTrainer.LossFunctionType.SquareLossOneClass }; @@ -58,7 +61,8 @@ public static void Example() var results = mlContext.Data.CreateEnumerable(prediction, false).ToList(); // Feed the test data into the model and then iterate through a few predictions. foreach (var pred in results.Take(15)) - Console.WriteLine($"Predicted value at row {pred.MatrixRowIndex - 1} and column {pred.MatrixColumnIndex - 1} is {pred.Score} and its expected value is {pred.Value}."); + Console.WriteLine($"Predicted value at row {pred.MatrixRowIndex - 1} and column {pred.MatrixColumnIndex - 1} is " + + $"{pred.Score} and its expected value is {pred.Value}."); // Expected output similar to: // Predicted value at row 0 and column 0 is 0.9873335 and its expected value is 1. @@ -79,12 +83,13 @@ public static void Example() // // Note: use the advanced options constructor to set the number of threads to 1 for a deterministic behavior. - // Two columns with highest predicted score to the 2nd row (indexed by 1). If we view row index as user ID and column as game ID, - // the following list contains the games recommended by the trained model. Note that sometime, you may want to exclude training - // data from your predicted results because those games were already purchased. + // Assume that row index is user ID and column index game ID, the following list contains the games recommended by the trained model. + // Note that sometime, you may want to exclude training data from your predicted results because those would represent games that + // were already purchased. + // The variable topColumns stores two matrix elements with the highest predicted scores on the 1st row. var topColumns = results.Where(element => element.MatrixRowIndex == 1).OrderByDescending(element => element.Score).Take(2); - Console.WriteLine("Top 2 predictions at the 1nd row:"); + Console.WriteLine("Top 2 predictions on the 1st row:"); foreach (var top in topColumns) Console.WriteLine($"Predicted value at row {top.MatrixRowIndex - 1} and column {top.MatrixColumnIndex - 1} is {top.Score} and its expected value is {top.Value}.");