diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs index ab45d4ba53..7130a7c9ea 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs @@ -10,39 +10,48 @@ public static class LbfgsMaximumEntropy { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey(nameof(DataPoint.Label)) - // Apply LbfgsMaximumEntropy multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy()); + // Convert the string labels into key types. + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply LbfgsMaximumEntropy multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .LbfgsMaximumEntropy()); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -52,7 +61,9 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -72,8 +83,11 @@ public static void Example() // Precision ||0.9308 |0.9593 |0.8580 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -85,13 +99,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -114,8 +132,11 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.tt index a58f4f0917..519c549bd9 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.tt @@ -8,7 +8,8 @@ string TrainerOptions = null; string OptionsInclude = ""; string Comments = ""; bool CacheData = false; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; string ExpectedOutputPerInstance = @"// Expected output: // Label: 1, Prediction: 1 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs index a818057677..597f34af06 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs @@ -11,15 +11,17 @@ public static class LbfgsMaximumEntropyWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -32,27 +34,32 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply LbfgsMaximumEntropy multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy(options)); - + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply LbfgsMaximumEntropy multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .LbfgsMaximumEntropy(options)); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -62,7 +69,9 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -82,8 +91,11 @@ public static void Example() // Precision ||0.9304 |0.9593 |0.8529 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -95,13 +107,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -124,8 +140,11 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.tt index 12caedbd3b..1e96c1eab9 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.tt @@ -12,7 +12,9 @@ string TrainerOptions = @"LbfgsMaximumEntropyMulticlassTrainer.Options string OptionsInclude = "using Microsoft.ML.Trainers;"; string Comments = ""; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; + bool CacheData = false; string ExpectedOutputPerInstance = @"// Expected output: diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs index 84ac92ecef..7ab6d5f80e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs @@ -8,43 +8,53 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification { public static class LightGbm { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey(nameof(DataPoint.Label)) - // Apply LightGbm multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.LightGbm()); + // Convert the string labels into key types. + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply LightGbm multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .LightGbm()); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -54,7 +64,9 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -74,8 +86,11 @@ public static void Example() // Precision ||0.9936 |1.0000 |0.9701 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -87,13 +102,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -116,8 +135,11 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.tt index 216ca2a0b6..47944467ba 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.tt @@ -7,10 +7,13 @@ string TrainerOptions = null; string OptionsInclude = ""; string Comments = @" - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree."; + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; + bool CacheData = false; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; string ExpectedOutputPerInstance = @"// Expected output: // Label: 1, Prediction: 1 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs index 3b4444081c..76337146dc 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs @@ -9,19 +9,22 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification { public static class LightGbmWithOptions { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -36,27 +39,32 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply LightGbm multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.LightGbm(options)); - + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply LightGbm multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .LightGbm(options)); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -66,7 +74,9 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -86,8 +96,11 @@ public static void Example() // Precision ||0.9936 |1.0000 |0.9419 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -99,13 +112,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -128,8 +145,11 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.tt index 7ec6706227..b267db33f0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.tt @@ -14,11 +14,13 @@ string TrainerOptions = @"LightGbmMulticlassTrainer.Options string OptionsInclude = "using Microsoft.ML.Trainers.LightGbm;"; string Comments = @" - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree."; + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; bool CacheData = false; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; string ExpectedOutputPerInstance = @"// Expected output: // Label: 1, Prediction: 1 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LogLossPerClass.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LogLossPerClass.cs index 86ab646083..8b1076f830 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LogLossPerClass.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LogLossPerClass.cs @@ -10,42 +10,50 @@ public static class LogLossPerClass { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey(nameof(DataPoint.Label)) - // Apply a multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.LightGbm()); + // Convert the string labels into key types. + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply a multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .LightGbm()); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); // Find the original label values. VBuffer keys = default; transformedTestData.Schema["PredictedLabel"].GetKeyValues(ref keys); var originalLabels = keys.DenseValues().ToArray(); for (var i = 0; i < originalLabels.Length; i++) - Console.WriteLine($"LogLoss for label {originalLabels[i]}: {metrics.PerClassLogLoss[i]:F4}"); + Console.WriteLine($"LogLoss for label " + + $"{originalLabels[i]}: {metrics.PerClassLogLoss[i]:F4}"); // Expected output: // LogLoss for label 7: 0.2578 @@ -60,7 +68,9 @@ public static void Example() } // Generates data points with random features and labels 1 to 9. - private static IEnumerable GenerateRandomDataPoints(int count, int seed = 0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -72,13 +82,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -95,4 +109,4 @@ private class Prediction public uint PredictedLabel { get; set; } } } -} +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude index 87201f12a3..ee3f245566 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude @@ -13,79 +13,97 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification {<#=Comments#> public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); <# if (CacheData) { #> - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); <# } #> <# if (MetaTrainer != null) { #> // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply <#=MetaTrainer#> multiclass meta trainer on top of binary trainer. - .Append(mlContext.MulticlassClassification.Trainers.<#=MetaTrainer#>(<#=Trainer#>())); + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply <#=MetaTrainer#> multiclass meta trainer on top of + // binary trainer. + .Append(mlContext.MulticlassClassification.Trainers + .<#=MetaTrainer#>( + <#=Trainer#>())); <# } else if (TrainerOptions == null) { #> // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey(nameof(DataPoint.Label)) - // Apply <#=Trainer#> multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.<#=Trainer#>()); + // Convert the string labels into key types. + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply <#=Trainer#> multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .<#=Trainer#>()); <# } else { #> // Define trainer options. var options = new <#=TrainerOptions#>; // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply <#=Trainer#> multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.<#=Trainer#>(options)); - + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply <#=Trainer#> multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .<#=Trainer#>(options)); <# } #> // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); <#=ExpectedOutputPerInstance#> // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); <#=ExpectedOutput#> } <#=DataGenerationComments#> - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -97,13 +115,17 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -126,8 +148,10 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} \ No newline at end of file +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs index d76006bf2c..4dc7f44f9e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs @@ -9,44 +9,55 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification public static class NaiveBayes { // Naive Bayes classifier is based on Bayes' theorem. - // It assumes independence among the presence of features in a class even though they may be dependent on each other. - // It is a multi-class trainer that accepts binary feature values of type float, i.e., feature values are either true or false. - // Specifically a feature value greater than zero is treated as true, zero or less is treated as false. + // It assumes independence among the presence of features in a class even + // though they may be dependent on each other. It is a multi-class trainer + // that accepts binary feature values of type float, i.e., feature values + // are either true or false. Specifically a feature value greater than zero + // is treated as true, zero or less is treated as false. public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey(nameof(DataPoint.Label)) - // Apply NaiveBayes multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.NaiveBayes()); + // Convert the string labels into key types. + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply NaiveBayes multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .NaiveBayes()); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -56,7 +67,9 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -77,9 +90,12 @@ public static void Example() } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - // For NaiveBayes values greater than zero are treated as true, zero or less are treated as false. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) range with labels + // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, + // zero or less are treated as false. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -91,13 +107,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -120,8 +140,11 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt index a74ad64a80..26e0162b12 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt @@ -9,13 +9,16 @@ bool CacheData = false; string OptionsInclude = ""; string Comments= @" // Naive Bayes classifier is based on Bayes' theorem. - // It assumes independence among the presence of features in a class even though they may be dependent on each other. - // It is a multi-class trainer that accepts binary feature values of type float, i.e., feature values are either true or false. - // Specifically a feature value greater than zero is treated as true, zero or less is treated as false."; + // It assumes independence among the presence of features in a class even + // though they may be dependent on each other. It is a multi-class trainer + // that accepts binary feature values of type float, i.e., feature values + // are either true or false. Specifically a feature value greater than zero + // is treated as true, zero or less is treated as false."; string DataGenerationComments= @" - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - // For NaiveBayes values greater than zero are treated as true, zero or less are treated as false."; + // Generates random uniform doubles in [-0.5, 0.5) range with labels + // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, + // zero or less are treated as false."; string ExpectedOutputPerInstance= @"// Expected output: // Label: 1, Prediction: 1 @@ -39,4 +42,4 @@ string ExpectedOutput = @"// Expected output: // 2 || 9 | 21 | 133 | 0.8160 // ||======================== // Precision ||0.9467 |0.8735 |0.8061 |"; -#> \ No newline at end of file +#> diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs index 9b93fd3cd0..40a040325c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs @@ -10,39 +10,49 @@ public static class OneVersusAll { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply OneVersusAll multiclass meta trainer on top of binary trainer. - .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply OneVersusAll multiclass meta trainer on top of + // binary trainer. + .Append(mlContext.MulticlassClassification.Trainers + .OneVersusAll( + mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -52,7 +62,9 @@ public static void Example() // Label: 3, Prediction: 2 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -72,8 +84,11 @@ public static void Example() // Precision ||0.8994 |0.9180 |0.8851 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -85,13 +100,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -114,8 +133,11 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.tt index 0233d9948e..ce355c56b0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.tt @@ -7,7 +7,8 @@ string TrainerOptions = null; string OptionsInclude = ""; string Comments= ""; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; bool CacheData = false; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs index de0dab8a9c..be42ec3bbd 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs @@ -10,39 +10,49 @@ public static class PairwiseCoupling { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply PairwiseCoupling multiclass meta trainer on top of binary trainer. - .Append(mlContext.MulticlassClassification.Trainers.PairwiseCoupling(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply PairwiseCoupling multiclass meta trainer on top of + // binary trainer. + .Append(mlContext.MulticlassClassification.Trainers + .PairwiseCoupling( + mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -52,7 +62,9 @@ public static void Example() // Label: 3, Prediction: 2 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -72,8 +84,11 @@ public static void Example() // Precision ||0.9091 |0.9171 |0.8636 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -85,13 +100,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -114,8 +133,11 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.tt index 4f3617e693..a568fd9736 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.tt @@ -7,7 +7,9 @@ string TrainerOptions = null; string OptionsInclude = ""; string Comments= ""; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; + bool CacheData = false; string ExpectedOutputPerInstance= @"// Expected output: diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PermutationFeatureImportance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PermutationFeatureImportance.cs index 963fd238ca..71d1c02106 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PermutationFeatureImportance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PermutationFeatureImportance.cs @@ -9,9 +9,10 @@ public static class PermutationFeatureImportance { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - var mlContext = new MLContext(seed:1); + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. + var mlContext = new MLContext(seed: 1); // Create sample data. var samples = GenerateData(); @@ -19,13 +20,17 @@ public static void Example() // Load the sample data as an IDataView. var data = mlContext.Data.LoadFromEnumerable(samples); - // Define a training pipeline that concatenates features into a vector, normalizes them, and then - // trains a linear model. - var featureColumns = new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }; - var pipeline = mlContext.Transforms.Concatenate("Features", featureColumns) - .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) - .Append(mlContext.Transforms.NormalizeMinMax("Features")) - .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy()); + // Define a training pipeline that concatenates features into a vector, + // normalizes them, and then trains a linear model. + var featureColumns = + new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }; + + var pipeline = mlContext.Transforms + .Concatenate("Features", featureColumns) + .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) + .Append(mlContext.MulticlassClassification.Trainers + .SdcaMaximumEntropy()); // Fit the pipeline to the data. var model = pipeline.Fit(data); @@ -36,18 +41,26 @@ public static void Example() // Extract the predictor. var linearPredictor = model.LastTransformer; - // Compute the permutation metrics for the linear model using the normalized data. - var permutationMetrics = mlContext.MulticlassClassification.PermutationFeatureImportance( - linearPredictor, transformedData, permutationCount: 30); - - // Now let's look at which features are most important to the model overall. - // Get the feature indices sorted by their impact on microaccuracy. - var sortedIndices = permutationMetrics.Select((metrics, index) => new { index, metrics.MicroAccuracy}) + // Compute the permutation metrics for the linear model using the + // normalized data. + var permutationMetrics = mlContext.MulticlassClassification + .PermutationFeatureImportance(linearPredictor, transformedData, + permutationCount: 30); + + // Now let's look at which features are most important to the model + // overall. Get the feature indices sorted by their impact on + // microaccuracy. + var sortedIndices = permutationMetrics + .Select((metrics, index) => new { index, metrics.MicroAccuracy }) .OrderByDescending(feature => Math.Abs(feature.MicroAccuracy.Mean)) .Select(feature => feature.index); - Console.WriteLine("Feature\tChange in MicroAccuracy\t95% Confidence in the Mean Change in MicroAccuracy"); - var microAccuracy = permutationMetrics.Select(x => x.MicroAccuracy).ToArray(); + Console.WriteLine("Feature\tChange in MicroAccuracy\t95% Confidence in " + + "the Mean Change in MicroAccuracy"); + + var microAccuracy = permutationMetrics.Select(x => x.MicroAccuracy) + .ToArray(); + foreach (int i in sortedIndices) { Console.WriteLine("{0}\t{1:G4}\t{2:G4}", @@ -76,10 +89,14 @@ private class Data /// linear combination of the features. /// /// The number of examples. - /// The bias, or offset, in the calculation of the label. - /// The weight to multiply the first feature with to compute the label. - /// The weight to multiply the second feature with to compute the label. - /// The seed for generating feature values and label noise. + /// The bias, or offset, in the calculation of the + /// label. + /// The weight to multiply the first feature with to + /// compute the label. + /// The weight to multiply the second feature with to + /// compute the label. + /// The seed for generating feature values and label + /// noise. /// An enumerable of Data objects. private static IEnumerable GenerateData(int nExamples = 10000, double bias = 0, double weight1 = 1, double weight2 = 2, int seed = 1) @@ -95,7 +112,10 @@ private static IEnumerable GenerateData(int nExamples = 10000, }; // Create a noisy label. - var value = (float)(bias + weight1 * data.Feature1 + weight2 * data.Feature2 + rng.NextDouble() - 0.5); + var value = (float) + (bias + weight1 * data.Feature1 + weight2 * data.Feature2 + + rng.NextDouble() - 0.5); + if (value < max / 3) data.Label = 0; else if (value < 2 * max / 3) @@ -106,4 +126,4 @@ private static IEnumerable GenerateData(int nExamples = 10000, } } } -} +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs index 1cc66a33cc..2a83a84f3a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs @@ -10,45 +10,56 @@ public static class SdcaMaximumEntropy { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey(nameof(DataPoint.Label)) - // Apply SdcaMaximumEntropy multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy()); + // Convert the string labels into key types. + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply SdcaMaximumEntropy multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .SdcaMaximumEntropy()); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -58,7 +69,9 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -77,8 +90,11 @@ public static void Example() // Precision ||0.9130 |0.9538 |0.8494 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -90,13 +106,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -119,8 +139,11 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.tt index 4eccf95005..f83d618268 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.tt @@ -8,7 +8,8 @@ string TrainerOptions = null; string OptionsInclude = ""; string Comments = ""; bool CacheData = true; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; string ExpectedOutputPerInstance = @"// Expected output: // Label: 1, Prediction: 1 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs index a3cfcf4cbc..62ed191c0f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs @@ -11,22 +11,26 @@ public static class SdcaMaximumEntropyWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define trainer options. var options = new SdcaMaximumEntropyMulticlassTrainer.Options @@ -39,27 +43,32 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply SdcaMaximumEntropy multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(options)); - + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply SdcaMaximumEntropy multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .SdcaMaximumEntropy(options)); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -69,7 +78,9 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -89,8 +100,11 @@ public static void Example() // Precision ||0.9363 |0.9647 |0.8497 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -102,13 +116,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -131,8 +149,11 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.tt index 88eead7e0d..f6bb41ac40 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.tt @@ -14,7 +14,8 @@ string TrainerOptions = @"SdcaMaximumEntropyMulticlassTrainer.Options string OptionsInclude = "using Microsoft.ML.Trainers;"; string Comments = ""; bool CacheData = true; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; string ExpectedOutputPerInstance = @"// Expected output: diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs index d9f59fe3e7..6a899f9432 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs @@ -10,45 +10,56 @@ public static class SdcaNonCalibrated { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey(nameof(DataPoint.Label)) - // Apply SdcaNonCalibrated multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.SdcaNonCalibrated()); + // Convert the string labels into key types. + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply SdcaNonCalibrated multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .SdcaNonCalibrated()); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -58,7 +69,9 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -78,8 +91,11 @@ public static void Example() // Precision ||0.9304 |0.9538 |0.8521 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -91,13 +107,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -120,8 +140,11 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.tt index 5437228267..cd403b1a44 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.tt @@ -8,7 +8,8 @@ string TrainerOptions = null; string OptionsInclude = ""; string Comments = ""; bool CacheData = true; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; string ExpectedOutputPerInstance = @"// Expected output: // Label: 1, Prediction: 1 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs index 2577756d9b..6367e9fe21 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs @@ -11,22 +11,26 @@ public static class SdcaNonCalibratedWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define trainer options. var options = new SdcaNonCalibratedMulticlassTrainer.Options @@ -39,27 +43,32 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply SdcaNonCalibrated multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.SdcaNonCalibrated(options)); - + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply SdcaNonCalibrated multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .SdcaNonCalibrated(options)); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -69,7 +78,9 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -89,8 +100,11 @@ public static void Example() // Precision ||0.9236 |0.9591 |0.8372 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -102,13 +116,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -131,8 +149,11 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.tt index 19f7ee9dd9..5f0f2a4775 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.tt @@ -14,7 +14,8 @@ string TrainerOptions = @"SdcaNonCalibratedMulticlassTrainer.Options string OptionsInclude = "using Microsoft.ML.Trainers;"; string Comments = ""; bool CacheData = true; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; string ExpectedOutputPerInstance = @"// Expected output: // Label: 1, Prediction: 1