From 539a0f7b7afe3c465b3cea151c304b4005487e1f Mon Sep 17 00:00:00 2001 From: sierralee51 <36902786+sierralee51@users.noreply.github.com> Date: Mon, 1 Jul 2019 13:12:44 -0700 Subject: [PATCH 01/44] reformatted multiclassclassification samples --- .../LbfgsMaximumEntropy.cs | 62 +++++++++----- .../LbfgsMaximumEntropy.tt | 3 +- .../LbfgsMaximumEntropyWithOptions.cs | 71 ++++++++++------ .../LbfgsMaximumEntropyWithOptions.tt | 4 +- .../MulticlassClassification/LightGbm.cs | 67 ++++++++++----- .../MulticlassClassification/LightGbm.tt | 9 +- .../LightGbmWithOptions.cs | 82 ++++++++++++------- .../LightGbmWithOptions.tt | 8 +- .../LogLossPerClass.cs | 42 ++++++---- .../MulticlassClassification.ttinclude | 72 +++++++++++----- .../MulticlassClassification/NaiveBayes.cs | 74 +++++++++++------ .../MulticlassClassification/NaiveBayes.tt | 13 +-- .../MulticlassClassification/OneVersusAll.cs | 62 +++++++++----- .../MulticlassClassification/OneVersusAll.tt | 3 +- .../PairwiseCoupling.cs | 60 +++++++++----- .../PairwiseCoupling.tt | 4 +- .../PermutationFeatureImportance.cs | 66 +++++++++------ .../SdcaMaximumEntropy.cs | 72 ++++++++++------ .../SdcaMaximumEntropy.tt | 3 +- .../SdcaMaximumEntropyWithOptions.cs | 73 +++++++++++------ .../SdcaMaximumEntropyWithOptions.tt | 3 +- .../SdcaNonCalibrated.cs | 64 ++++++++++----- .../SdcaNonCalibrated.tt | 3 +- .../SdcaNonCalibratedWithOptions.cs | 73 +++++++++++------ .../SdcaNonCalibratedWithOptions.tt | 27 +++--- 25 files changed, 668 insertions(+), 352 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs index ab45d4ba53..f280564b7a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs @@ -10,39 +10,50 @@ public static class LbfgsMaximumEntropy { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey(nameof(DataPoint.Label)) + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply LbfgsMaximumEntropy multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy()); + .Append(mlContext.MulticlassClassification.Trainers + .LbfgsMaximumEntropy()); + // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -52,15 +63,17 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.91 // Macro Accuracy: 0.91 // Log Loss: 0.24 // Log Loss Reduction: 0.79 - + // Confusion table // ||======================== // PREDICTED || 0 | 1 | 2 | Recall @@ -72,8 +85,11 @@ public static void Example() // Precision ||0.9308 |0.9593 |0.8580 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -85,13 +101,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -114,8 +134,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.tt index a58f4f0917..519c549bd9 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.tt @@ -8,7 +8,8 @@ string TrainerOptions = null; string OptionsInclude = ""; string Comments = ""; bool CacheData = false; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; string ExpectedOutputPerInstance = @"// Expected output: // Label: 1, Prediction: 1 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs index a818057677..5e0f57caab 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs @@ -11,48 +11,56 @@ public static class LbfgsMaximumEntropyWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. var options = new LbfgsMaximumEntropyMulticlassTrainer.Options - { - HistorySize = 50, - L1Regularization = 0.1f, - NumberOfThreads = 1 - }; + { + HistorySize = 50, + L1Regularization = 0.1f, + NumberOfThreads = 1 + }; // Define the trainer. - var pipeline = - // Convert the string labels into key types. + var pipeline = + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply LbfgsMaximumEntropy multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy(options)); - + .Append(mlContext.MulticlassClassification.Trainers + .LbfgsMaximumEntropy(options)); + // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -62,9 +70,11 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.91 // Macro Accuracy: 0.91 @@ -82,8 +92,11 @@ public static void Example() // Precision ||0.9304 |0.9593 |0.8529 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -95,13 +108,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -124,8 +141,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.tt index 12caedbd3b..1e96c1eab9 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.tt @@ -12,7 +12,9 @@ string TrainerOptions = @"LbfgsMaximumEntropyMulticlassTrainer.Options string OptionsInclude = "using Microsoft.ML.Trainers;"; string Comments = ""; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; + bool CacheData = false; string ExpectedOutputPerInstance = @"// Expected output: diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs index 84ac92ecef..3376bfcf33 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs @@ -8,43 +8,55 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification { public static class LightGbm { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey(nameof(DataPoint.Label)) + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply LightGbm multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.LightGbm()); + .Append(mlContext.MulticlassClassification.Trainers + .LightGbm()); + // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -54,15 +66,17 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.99 // Macro Accuracy: 0.99 // Log Loss: 0.05 // Log Loss Reduction: 0.95 - + // Confusion table // ||======================== // PREDICTED || 0 | 1 | 2 | Recall @@ -74,8 +88,11 @@ public static void Example() // Precision ||0.9936 |1.0000 |0.9701 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -87,13 +104,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -116,8 +137,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.tt index 216ca2a0b6..47944467ba 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.tt @@ -7,10 +7,13 @@ string TrainerOptions = null; string OptionsInclude = ""; string Comments = @" - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree."; + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; + bool CacheData = false; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; string ExpectedOutputPerInstance = @"// Expected output: // Label: 1, Prediction: 1 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs index 3b4444081c..c5a7f42fef 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs @@ -9,54 +9,63 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification { public static class LightGbmWithOptions { - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree. + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/ public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. var options = new LightGbmMulticlassTrainer.Options - { - Booster = new DartBooster.Options() - { - TreeDropFraction = 0.15, - XgboostDartMode = false - } - }; + { + Booster = new DartBooster.Options() + { + TreeDropFraction = 0.15, + XgboostDartMode = false + } + }; // Define the trainer. - var pipeline = - // Convert the string labels into key types. + var pipeline = + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply LightGbm multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.LightGbm(options)); - + .Append(mlContext.MulticlassClassification.Trainers + .LightGbm(options)); + // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -66,15 +75,17 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.98 // Macro Accuracy: 0.98 // Log Loss: 0.07 // Log Loss Reduction: 0.94 - + // Confusion table // ||======================== // PREDICTED || 0 | 1 | 2 | Recall @@ -86,8 +97,11 @@ public static void Example() // Precision ||0.9936 |1.0000 |0.9419 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -99,13 +113,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -128,8 +146,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.tt index 7ec6706227..b267db33f0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.tt @@ -14,11 +14,13 @@ string TrainerOptions = @"LightGbmMulticlassTrainer.Options string OptionsInclude = "using Microsoft.ML.Trainers.LightGbm;"; string Comments = @" - // This example requires installation of additional NuGet package - // Microsoft.ML.FastTree."; + // This example requires installation of additional NuGet package for + // Microsoft.ML.FastTree at + // https://www.nuget.org/packages/Microsoft.ML.FastTree/"; bool CacheData = false; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; string ExpectedOutputPerInstance = @"// Expected output: // Label: 1, Prediction: 1 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LogLossPerClass.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LogLossPerClass.cs index 86ab646083..c875616ada 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LogLossPerClass.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LogLossPerClass.cs @@ -10,42 +10,50 @@ public static class LogLossPerClass { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey(nameof(DataPoint.Label)) + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply a multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers.LightGbm()); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); // Find the original label values. VBuffer keys = default; transformedTestData.Schema["PredictedLabel"].GetKeyValues(ref keys); var originalLabels = keys.DenseValues().ToArray(); for (var i = 0; i < originalLabels.Length; i++) - Console.WriteLine($"LogLoss for label {originalLabels[i]}: {metrics.PerClassLogLoss[i]:F4}"); + Console.WriteLine($"LogLoss for label " + + $"{originalLabels[i]}: {metrics.PerClassLogLoss[i]:F4}"); // Expected output: // LogLoss for label 7: 0.2578 @@ -60,7 +68,9 @@ public static void Example() } // Generates data points with random features and labels 1 to 9. - private static IEnumerable GenerateRandomDataPoints(int count, int seed = 0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -72,13 +82,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -95,4 +109,4 @@ private class Prediction public uint PredictedLabel { get; set; } } } -} +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude index 87201f12a3..38b5a86dcf 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude @@ -13,21 +13,25 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification {<#=Comments#> public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); <# if (CacheData) { #> - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. trainingData = mlContext.Data.Cache(trainingData); <# } #> @@ -37,15 +41,23 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply <#=MetaTrainer#> multiclass meta trainer on top of binary trainer. - .Append(mlContext.MulticlassClassification.Trainers.<#=MetaTrainer#>(<#=Trainer#>())); + // Apply <#=MetaTrainer#> multiclass meta trainer on top of + // binary trainer. + .Append(mlContext.MulticlassClassification.Trainers + .<#=MetaTrainer#>( + <#=Trainer#>())); + <# } else if (TrainerOptions == null) { #> // Define the trainer. var pipeline = // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey(nameof(DataPoint.Label)) + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply <#=Trainer#> multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.<#=Trainer#>()); + .Append(mlContext.MulticlassClassification.Trainers + .<#=Trainer#>()); + <# } else { #> // Define trainer options. var options = new <#=TrainerOptions#>; @@ -55,37 +67,47 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply <#=Trainer#> multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.<#=Trainer#>(options)); + .Append(mlContext.MulticlassClassification.Trainers + .<#=Trainer#>(options)); <# } #> // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); <#=ExpectedOutputPerInstance#> // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); <#=ExpectedOutput#> } <#=DataGenerationComments#> - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -97,13 +119,17 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -126,7 +152,9 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs index d76006bf2c..aa4c714873 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs @@ -9,44 +9,57 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification public static class NaiveBayes { // Naive Bayes classifier is based on Bayes' theorem. - // It assumes independence among the presence of features in a class even though they may be dependent on each other. - // It is a multi-class trainer that accepts binary feature values of type float, i.e., feature values are either true or false. - // Specifically a feature value greater than zero is treated as true, zero or less is treated as false. + // It assumes independence among the presence of features in a class even + // though they may be dependent on each other. It is a multi-class trainer + // that accepts binary feature values of type float, i.e., feature values + // are either true or false. Specifically a feature value greater than zero + // is treated as true, zero or less is treated as false. public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey(nameof(DataPoint.Label)) + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply NaiveBayes multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.NaiveBayes()); + .Append(mlContext.MulticlassClassification.Trainers + .NaiveBayes()); + // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -56,15 +69,17 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.88 // Macro Accuracy: 0.88 // Log Loss: 34.54 // Log Loss Reduction: -30.47 - + // Confusion table // ||======================== // PREDICTED || 0 | 1 | 2 | Recall @@ -76,10 +91,13 @@ public static void Example() // Precision ||0.9467 |0.8735 |0.8061 | } - - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - // For NaiveBayes values greater than zero are treated as true, zero or less are treated as false. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + + // Generates random uniform doubles in [-0.5, 0.5) range with labels + // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, + // zero or less are treated as false. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -91,13 +109,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -120,8 +142,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt index a74ad64a80..04ace790a7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt @@ -9,13 +9,16 @@ bool CacheData = false; string OptionsInclude = ""; string Comments= @" // Naive Bayes classifier is based on Bayes' theorem. - // It assumes independence among the presence of features in a class even though they may be dependent on each other. - // It is a multi-class trainer that accepts binary feature values of type float, i.e., feature values are either true or false. - // Specifically a feature value greater than zero is treated as true, zero or less is treated as false."; + // It assumes independence among the presence of features in a class even + // though they may be dependent on each other. It is a multi-class trainer + // that accepts binary feature values of type float, i.e., feature values + // are either true or false. Specifically a feature value greater than zero + // is treated as true, zero or less is treated as false."; string DataGenerationComments= @" - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - // For NaiveBayes values greater than zero are treated as true, zero or less are treated as false."; + // Generates random uniform doubles in [-0.5, 0.5) range with labels + // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, + // zero or less are treated as false."; string ExpectedOutputPerInstance= @"// Expected output: // Label: 1, Prediction: 1 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs index 9b93fd3cd0..3893736257 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs @@ -10,39 +10,50 @@ public static class OneVersusAll { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply OneVersusAll multiclass meta trainer on top of binary trainer. - .Append(mlContext.MulticlassClassification.Trainers.OneVersusAll(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); + // Apply OneVersusAll multiclass meta trainer on top of + // binary trainer. + .Append(mlContext.MulticlassClassification.Trainers + .OneVersusAll( + mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); + // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -52,15 +63,17 @@ public static void Example() // Label: 3, Prediction: 2 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.90 // Macro Accuracy: 0.90 // Log Loss: 0.36 // Log Loss Reduction: 0.68 - + // Confusion table // ||======================== // PREDICTED || 0 | 1 | 2 | Recall @@ -72,8 +85,11 @@ public static void Example() // Precision ||0.8994 |0.9180 |0.8851 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -85,13 +101,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -114,8 +134,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.tt index 0233d9948e..ce355c56b0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.tt @@ -7,7 +7,8 @@ string TrainerOptions = null; string OptionsInclude = ""; string Comments= ""; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; bool CacheData = false; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs index de0dab8a9c..6b00af2c39 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs @@ -10,39 +10,50 @@ public static class PairwiseCoupling { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply PairwiseCoupling multiclass meta trainer on top of binary trainer. - .Append(mlContext.MulticlassClassification.Trainers.PairwiseCoupling(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); + // Apply PairwiseCoupling multiclass meta trainer on top of + // binary trainer. + .Append(mlContext.MulticlassClassification.Trainers + .PairwiseCoupling( + mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); + // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -52,9 +63,11 @@ public static void Example() // Label: 3, Prediction: 2 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.90 // Macro Accuracy: 0.90 @@ -72,8 +85,11 @@ public static void Example() // Precision ||0.9091 |0.9171 |0.8636 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -85,13 +101,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -114,8 +134,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.tt index 4f3617e693..a568fd9736 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.tt @@ -7,7 +7,9 @@ string TrainerOptions = null; string OptionsInclude = ""; string Comments= ""; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; + bool CacheData = false; string ExpectedOutputPerInstance= @"// Expected output: diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PermutationFeatureImportance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PermutationFeatureImportance.cs index 963fd238ca..714c69361b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PermutationFeatureImportance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PermutationFeatureImportance.cs @@ -9,9 +9,10 @@ public static class PermutationFeatureImportance { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - var mlContext = new MLContext(seed:1); + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. + var mlContext = new MLContext(seed: 1); // Create sample data. var samples = GenerateData(); @@ -19,13 +20,17 @@ public static void Example() // Load the sample data as an IDataView. var data = mlContext.Data.LoadFromEnumerable(samples); - // Define a training pipeline that concatenates features into a vector, normalizes them, and then - // trains a linear model. - var featureColumns = new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }; - var pipeline = mlContext.Transforms.Concatenate("Features", featureColumns) + // Define a training pipeline that concatenates features into a vector, + // normalizes them, and then trains a linear model. + var featureColumns = + new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }; + + var pipeline = mlContext.Transforms + .Concatenate("Features", featureColumns) .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) .Append(mlContext.Transforms.NormalizeMinMax("Features")) - .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy()); + .Append(mlContext.MulticlassClassification.Trainers + .SdcaMaximumEntropy()); // Fit the pipeline to the data. var model = pipeline.Fit(data); @@ -36,18 +41,26 @@ public static void Example() // Extract the predictor. var linearPredictor = model.LastTransformer; - // Compute the permutation metrics for the linear model using the normalized data. - var permutationMetrics = mlContext.MulticlassClassification.PermutationFeatureImportance( - linearPredictor, transformedData, permutationCount: 30); - - // Now let's look at which features are most important to the model overall. - // Get the feature indices sorted by their impact on microaccuracy. - var sortedIndices = permutationMetrics.Select((metrics, index) => new { index, metrics.MicroAccuracy}) + // Compute the permutation metrics for the linear model using the + // normalized data. + var permutationMetrics = mlContext.MulticlassClassification + .PermutationFeatureImportance(linearPredictor, transformedData, + permutationCount: 30); + + // Now let's look at which features are most important to the model + // overall. Get the feature indices sorted by their impact on + // microaccuracy. + var sortedIndices = permutationMetrics + .Select((metrics, index) => new { index, metrics.MicroAccuracy }) .OrderByDescending(feature => Math.Abs(feature.MicroAccuracy.Mean)) .Select(feature => feature.index); - Console.WriteLine("Feature\tChange in MicroAccuracy\t95% Confidence in the Mean Change in MicroAccuracy"); - var microAccuracy = permutationMetrics.Select(x => x.MicroAccuracy).ToArray(); + Console.WriteLine("Feature\tChange in MicroAccuracy\t95% Confidence in " + + "the Mean Change in MicroAccuracy"); + + var microAccuracy = permutationMetrics.Select(x => x.MicroAccuracy) + .ToArray(); + foreach (int i in sortedIndices) { Console.WriteLine("{0}\t{1:G4}\t{2:G4}", @@ -76,10 +89,14 @@ private class Data /// linear combination of the features. /// /// The number of examples. - /// The bias, or offset, in the calculation of the label. - /// The weight to multiply the first feature with to compute the label. - /// The weight to multiply the second feature with to compute the label. - /// The seed for generating feature values and label noise. + /// The bias, or offset, in the calculation of the + /// label. + /// The weight to multiply the first feature with to + /// compute the label. + /// The weight to multiply the second feature with to + /// compute the label. + /// The seed for generating feature values and label + /// noise. /// An enumerable of Data objects. private static IEnumerable GenerateData(int nExamples = 10000, double bias = 0, double weight1 = 1, double weight2 = 2, int seed = 1) @@ -95,7 +112,10 @@ private static IEnumerable GenerateData(int nExamples = 10000, }; // Create a noisy label. - var value = (float)(bias + weight1 * data.Feature1 + weight2 * data.Feature2 + rng.NextDouble() - 0.5); + var value = (float) + (bias + weight1 * data.Feature1 + weight2 * data.Feature2 + + rng.NextDouble() - 0.5); + if (value < max / 3) data.Label = 0; else if (value < 2 * max / 3) @@ -106,4 +126,4 @@ private static IEnumerable GenerateData(int nExamples = 10000, } } } -} +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs index 1cc66a33cc..966cc14155 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs @@ -10,45 +10,58 @@ public static class SdcaMaximumEntropy { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. var pipeline = // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey(nameof(DataPoint.Label)) + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply SdcaMaximumEntropy multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy()); + .Append(mlContext.MulticlassClassification.Trainers + .SdcaMaximumEntropy()); + // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -58,9 +71,11 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.91 // Macro Accuracy: 0.91 @@ -77,8 +92,11 @@ public static void Example() // Precision ||0.9130 |0.9538 |0.8494 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -90,13 +108,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -119,8 +141,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.tt index 4eccf95005..f83d618268 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.tt @@ -8,7 +8,8 @@ string TrainerOptions = null; string OptionsInclude = ""; string Comments = ""; bool CacheData = true; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; string ExpectedOutputPerInstance = @"// Expected output: // Label: 1, Prediction: 1 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs index a3cfcf4cbc..d4205cb71d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs @@ -11,22 +11,26 @@ public static class SdcaMaximumEntropyWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define trainer options. var options = new SdcaMaximumEntropyMulticlassTrainer.Options @@ -38,28 +42,34 @@ public static void Example() }; // Define the trainer. - var pipeline = - // Convert the string labels into key types. + var pipeline = + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply SdcaMaximumEntropy multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(options)); - + .Append(mlContext.MulticlassClassification.Trainers + .SdcaMaximumEntropy(options)); + // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -69,9 +79,11 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.92 // Macro Accuracy: 0.92 @@ -89,8 +101,11 @@ public static void Example() // Precision ||0.9363 |0.9647 |0.8497 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -102,13 +117,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -131,8 +150,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.tt index 88eead7e0d..f6bb41ac40 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.tt @@ -14,7 +14,8 @@ string TrainerOptions = @"SdcaMaximumEntropyMulticlassTrainer.Options string OptionsInclude = "using Microsoft.ML.Trainers;"; string Comments = ""; bool CacheData = true; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; string ExpectedOutputPerInstance = @"// Expected output: diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs index d9f59fe3e7..912e409e81 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs @@ -10,45 +10,58 @@ public static class SdcaNonCalibrated { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms // which needs many data passes. trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. var pipeline = // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey(nameof(DataPoint.Label)) + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply SdcaNonCalibrated multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.SdcaNonCalibrated()); + .Append(mlContext.MulticlassClassification.Trainers + .SdcaNonCalibrated()); + // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -58,7 +71,9 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); // Expected output: @@ -78,8 +93,11 @@ public static void Example() // Precision ||0.9304 |0.9538 |0.8521 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -91,13 +109,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -120,7 +142,9 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.tt index 5437228267..cd403b1a44 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.tt @@ -8,7 +8,8 @@ string TrainerOptions = null; string OptionsInclude = ""; string Comments = ""; bool CacheData = true; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; string ExpectedOutputPerInstance = @"// Expected output: // Label: 1, Prediction: 1 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs index 2577756d9b..9f1cd73e9b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs @@ -11,22 +11,26 @@ public static class SdcaNonCalibratedWithOptions { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define trainer options. var options = new SdcaNonCalibratedMulticlassTrainer.Options @@ -38,28 +42,34 @@ public static void Example() }; // Define the trainer. - var pipeline = - // Convert the string labels into key types. + var pipeline = + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply SdcaNonCalibrated multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.SdcaNonCalibrated(options)); - + .Append(mlContext.MulticlassClassification.Trainers + .SdcaNonCalibrated(options)); + // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -69,9 +79,11 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.91 // Macro Accuracy: 0.91 @@ -89,8 +101,11 @@ public static void Example() // Precision ||0.9236 |0.9591 |0.8372 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed = 0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -102,13 +117,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -131,8 +150,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.tt index 19f7ee9dd9..cd403b1a44 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.tt @@ -1,20 +1,15 @@ <#@ include file="MulticlassClassification.ttinclude"#> <#+ -string ClassName = "SdcaNonCalibratedWithOptions"; +string ClassName = "SdcaNonCalibrated"; string Trainer = "SdcaNonCalibrated"; string MetaTrainer = null; -string TrainerOptions = @"SdcaNonCalibratedMulticlassTrainer.Options - { - Loss = new HingeLoss(), - L1Regularization = 0.1f, - BiasLearningRate = 0.01f, - NumberOfThreads = 1 - }"; +string TrainerOptions = null; -string OptionsInclude = "using Microsoft.ML.Trainers;"; +string OptionsInclude = ""; string Comments = ""; bool CacheData = true; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; string ExpectedOutputPerInstance = @"// Expected output: // Label: 1, Prediction: 1 @@ -26,16 +21,16 @@ string ExpectedOutputPerInstance = @"// Expected output: string ExpectedOutput = @"// Expected output: // Micro Accuracy: 0.91 // Macro Accuracy: 0.91 - // Log Loss: 0.22 - // Log Loss Reduction: 0.80 + // Log Loss: 0.57 + // Log Loss Reduction: 0.48 // Confusion table // ||======================== // PREDICTED || 0 | 1 | 2 | Recall // TRUTH ||======================== - // 0 || 145 | 0 | 15 | 0.9063 - // 1 || 0 | 164 | 13 | 0.9266 - // 2 || 12 | 7 | 144 | 0.8834 + // 0 || 147 | 0 | 13 | 0.9188 + // 1 || 0 | 165 | 12 | 0.9322 + // 2 || 11 | 8 | 144 | 0.8834 // ||======================== - // Precision ||0.9236 |0.9591 |0.8372 |"; + // Precision ||0.9304 |0.9538 |0.8521 |"; #> \ No newline at end of file From d18c419fef235cce982f92f4c0788959ce001d3b Mon Sep 17 00:00:00 2001 From: sierralee51 <36902786+sierralee51@users.noreply.github.com> Date: Mon, 1 Jul 2019 13:17:47 -0700 Subject: [PATCH 02/44] fixing errors --- .../SdcaMaximumEntropyWithOptions.cs | 56 ++++++------- .../SdcaNonCalibrated.cs | 72 ++++++----------- .../SdcaNonCalibrated.tt | 3 +- .../SdcaNonCalibratedWithOptions.cs | 78 +++++++++---------- 4 files changed, 88 insertions(+), 121 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs index d4205cb71d..ab030b46b2 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs @@ -12,25 +12,25 @@ public static class SdcaMaximumEntropyWithOptions public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define trainer options. var options = new SdcaMaximumEntropyMulticlassTrainer.Options @@ -42,34 +42,34 @@ public static void Example() }; // Define the trainer. - var pipeline = - // Convert the string labels into key types. + var pipeline = + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply SdcaMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .SdcaMaximumEntropy(options)); - + .SdcaMaximumEntropy(options)); + // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -80,10 +80,10 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.92 // Macro Accuracy: 0.92 @@ -104,7 +104,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed = 0) + int seed=0) { var random = new Random(seed); @@ -118,16 +118,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -151,9 +151,9 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} \ No newline at end of file +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs index 912e409e81..b63449958c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs @@ -10,58 +10,45 @@ public static class SdcaNonCalibrated { public static void Example() { - // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, + // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, + // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. var pipeline = // Convert the string labels into key types. - mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) - + mlContext.Transforms.Conversion.MapValueToKey(nameof(DataPoint.Label)) // Apply SdcaNonCalibrated multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers - .SdcaNonCalibrated()); - + .Append(mlContext.MulticlassClassification.Trainers.SdcaNonCalibrated()); // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different - // from training data. - var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different from training data. + var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -71,11 +58,9 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); - + var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.91 // Macro Accuracy: 0.91 @@ -93,11 +78,8 @@ public static void Example() // Precision ||0.9304 |0.9538 |0.8521 | } - // Generates random uniform doubles in [-0.5, 0.5) - // range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) - + // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, int seed = 0) { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -109,17 +91,13 @@ private static IEnumerable GenerateRandomDataPoints(int count, { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a - // constant multiple of label. - Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() - + // The feature values are slightly increased by adding a constant multiple of label. + Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() }; } } - // Example with label and 20 feature values. A data set is a collection of - // such examples. + // Example with label and 20 feature values. A data set is a collection of such examples. private class DataPoint { public uint Label { get; set; } @@ -142,10 +120,8 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); - + Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.tt index cd403b1a44..5437228267 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.tt @@ -8,8 +8,7 @@ string TrainerOptions = null; string OptionsInclude = ""; string Comments = ""; bool CacheData = true; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" - + "\n // range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; string ExpectedOutputPerInstance = @"// Expected output: // Label: 1, Prediction: 1 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs index 9f1cd73e9b..912e409e81 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs @@ -3,73 +3,65 @@ using System.Linq; using Microsoft.ML; using Microsoft.ML.Data; -using Microsoft.ML.Trainers; namespace Samples.Dynamic.Trainers.MulticlassClassification { - public static class SdcaNonCalibratedWithOptions + public static class SdcaNonCalibrated { public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); - - // Define trainer options. - var options = new SdcaNonCalibratedMulticlassTrainer.Options - { - Loss = new HingeLoss(), - L1Regularization = 0.1f, - BiasLearningRate = 0.01f, - NumberOfThreads = 1 - }; + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. var pipeline = // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply SdcaNonCalibrated multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .SdcaNonCalibrated(options)); + .SdcaNonCalibrated()); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -80,31 +72,31 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.91 // Macro Accuracy: 0.91 - // Log Loss: 0.22 - // Log Loss Reduction: 0.80 + // Log Loss: 0.57 + // Log Loss Reduction: 0.48 // Confusion table // ||======================== // PREDICTED || 0 | 1 | 2 | Recall // TRUTH ||======================== - // 0 || 145 | 0 | 15 | 0.9063 - // 1 || 0 | 164 | 13 | 0.9266 - // 2 || 12 | 7 | 144 | 0.8834 + // 0 || 147 | 0 | 13 | 0.9188 + // 1 || 0 | 165 | 12 | 0.9322 + // 2 || 11 | 8 | 144 | 0.8834 // ||======================== - // Precision ||0.9236 |0.9591 |0.8372 | + // Precision ||0.9304 |0.9538 |0.8521 | } // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed = 0) + int seed=0) { var random = new Random(seed); @@ -118,16 +110,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -151,9 +143,9 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} \ No newline at end of file +} From 0b4aff68f031246785aadce04e9f6d39d509434d Mon Sep 17 00:00:00 2001 From: sierralee51 <36902786+sierralee51@users.noreply.github.com> Date: Mon, 1 Jul 2019 13:36:26 -0700 Subject: [PATCH 03/44] reformatted MulticlassClassification samples --- .../LbfgsMaximumEntropy.cs | 42 +++++------ .../LbfgsMaximumEntropyWithOptions.cs | 54 +++++++------- .../MulticlassClassification/LightGbm.cs | 42 +++++------ .../LightGbmWithOptions.cs | 60 ++++++++-------- .../MulticlassClassification/NaiveBayes.cs | 56 +++++++-------- .../MulticlassClassification/OneVersusAll.cs | 44 ++++++------ .../PairwiseCoupling.cs | 42 +++++------ .../SdcaMaximumEntropy.cs | 52 +++++++------- .../SdcaNonCalibrated.cs | 72 ++++++++++++------- .../SdcaNonCalibrated.tt | 3 +- .../SdcaNonCalibratedWithOptions.cs | 36 ++++++---- .../SdcaNonCalibratedWithOptions.tt | 24 ++++--- 12 files changed, 283 insertions(+), 244 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs index f280564b7a..59adefec38 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs @@ -11,49 +11,49 @@ public static class LbfgsMaximumEntropy public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + .MapValueToKey(nameof(DataPoint.Label)) // Apply LbfgsMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .LbfgsMaximumEntropy()); + .LbfgsMaximumEntropy()); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -64,16 +64,16 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.91 // Macro Accuracy: 0.91 // Log Loss: 0.24 // Log Loss Reduction: 0.79 - + // Confusion table // ||======================== // PREDICTED || 0 | 1 | 2 | Recall @@ -88,7 +88,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed = 0) + int seed=0) { var random = new Random(seed); @@ -102,16 +102,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -135,9 +135,9 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} \ No newline at end of file +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs index 5e0f57caab..4f33bdeb2b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs @@ -12,55 +12,55 @@ public static class LbfgsMaximumEntropyWithOptions public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. var options = new LbfgsMaximumEntropyMulticlassTrainer.Options - { - HistorySize = 50, - L1Regularization = 0.1f, - NumberOfThreads = 1 - }; + { + HistorySize = 50, + L1Regularization = 0.1f, + NumberOfThreads = 1 + }; // Define the trainer. - var pipeline = - // Convert the string labels into key types. + var pipeline = + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply LbfgsMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .LbfgsMaximumEntropy(options)); - + .LbfgsMaximumEntropy(options)); + // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -71,10 +71,10 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.91 // Macro Accuracy: 0.91 @@ -95,7 +95,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed = 0) + int seed=0) { var random = new Random(seed); @@ -109,16 +109,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -142,9 +142,9 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} \ No newline at end of file +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs index 3376bfcf33..2092a7785b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs @@ -14,49 +14,49 @@ public static class LightGbm public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + .MapValueToKey(nameof(DataPoint.Label)) // Apply LightGbm multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .LightGbm()); + .LightGbm()); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -67,16 +67,16 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.99 // Macro Accuracy: 0.99 // Log Loss: 0.05 // Log Loss Reduction: 0.95 - + // Confusion table // ||======================== // PREDICTED || 0 | 1 | 2 | Recall @@ -91,7 +91,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed = 0) + int seed=0) { var random = new Random(seed); @@ -105,16 +105,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -138,9 +138,9 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} \ No newline at end of file +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs index c5a7f42fef..c68a2cd032 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs @@ -15,57 +15,57 @@ public static class LightGbmWithOptions public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. var options = new LightGbmMulticlassTrainer.Options - { - Booster = new DartBooster.Options() - { - TreeDropFraction = 0.15, - XgboostDartMode = false - } - }; + { + Booster = new DartBooster.Options() + { + TreeDropFraction = 0.15, + XgboostDartMode = false + } + }; // Define the trainer. - var pipeline = - // Convert the string labels into key types. + var pipeline = + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply LightGbm multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .LightGbm(options)); - + .LightGbm(options)); + // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -76,16 +76,16 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.98 // Macro Accuracy: 0.98 // Log Loss: 0.07 // Log Loss Reduction: 0.94 - + // Confusion table // ||======================== // PREDICTED || 0 | 1 | 2 | Recall @@ -100,7 +100,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed = 0) + int seed=0) { var random = new Random(seed); @@ -114,16 +114,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -147,9 +147,9 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} \ No newline at end of file +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs index aa4c714873..1e654c25fd 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs @@ -10,56 +10,56 @@ public static class NaiveBayes { // Naive Bayes classifier is based on Bayes' theorem. // It assumes independence among the presence of features in a class even - // though they may be dependent on each other. It is a multi-class trainer - // that accepts binary feature values of type float, i.e., feature values - // are either true or false. Specifically a feature value greater than zero - // is treated as true, zero or less is treated as false. + // though they may be dependent on each other. It is a multi-class trainer + // that accepts binary feature values of type float, i.e., feature values + // are either true or false. Specifically a feature value greater than zero + // is treated as true, zero or less is treated as false. public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + .MapValueToKey(nameof(DataPoint.Label)) // Apply NaiveBayes multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .NaiveBayes()); + .NaiveBayes()); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -70,16 +70,16 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.88 // Macro Accuracy: 0.88 // Log Loss: 34.54 // Log Loss Reduction: -30.47 - + // Confusion table // ||======================== // PREDICTED || 0 | 1 | 2 | Recall @@ -91,12 +91,12 @@ public static void Example() // Precision ||0.9467 |0.8735 |0.8061 | } - + // Generates random uniform doubles in [-0.5, 0.5) range with labels - // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, - // zero or less are treated as false. + // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, + // zero or less are treated as false. private static IEnumerable GenerateRandomDataPoints(int count, - int seed = 0) + int seed=0) { var random = new Random(seed); @@ -110,16 +110,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -143,9 +143,9 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} \ No newline at end of file +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs index 3893736257..001641831d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs @@ -11,16 +11,16 @@ public static class OneVersusAll public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. @@ -28,32 +28,32 @@ public static void Example() // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply OneVersusAll multiclass meta trainer on top of - // binary trainer. + // binary trainer. .Append(mlContext.MulticlassClassification.Trainers - .OneVersusAll( - mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); + .OneVersusAll( + mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -64,16 +64,16 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.90 // Macro Accuracy: 0.90 // Log Loss: 0.36 // Log Loss Reduction: 0.68 - + // Confusion table // ||======================== // PREDICTED || 0 | 1 | 2 | Recall @@ -88,7 +88,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed = 0) + int seed=0) { var random = new Random(seed); @@ -102,16 +102,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -135,9 +135,9 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} \ No newline at end of file +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs index 6b00af2c39..e9f49f4454 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs @@ -11,16 +11,16 @@ public static class PairwiseCoupling public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. @@ -28,32 +28,32 @@ public static void Example() // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply PairwiseCoupling multiclass meta trainer on top of - // binary trainer. + // binary trainer. .Append(mlContext.MulticlassClassification.Trainers - .PairwiseCoupling( - mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); + .PairwiseCoupling( + mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -64,10 +64,10 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.90 // Macro Accuracy: 0.90 @@ -88,7 +88,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed = 0) + int seed=0) { var random = new Random(seed); @@ -102,16 +102,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -135,9 +135,9 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} \ No newline at end of file +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs index 966cc14155..82832b06bd 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs @@ -11,57 +11,57 @@ public static class SdcaMaximumEntropy public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + .MapValueToKey(nameof(DataPoint.Label)) // Apply SdcaMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .SdcaMaximumEntropy()); + .SdcaMaximumEntropy()); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -72,10 +72,10 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); - + // Expected output: // Micro Accuracy: 0.91 // Macro Accuracy: 0.91 @@ -95,7 +95,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed = 0) + int seed=0) { var random = new Random(seed); @@ -109,16 +109,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -142,9 +142,9 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} \ No newline at end of file +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs index b63449958c..912e409e81 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs @@ -10,45 +10,58 @@ public static class SdcaNonCalibrated { public static void Example() { - // Create a new context for ML.NET operations. It can be used for exception tracking and logging, - // as a catalog of available operations and as the source of randomness. - // Setting the seed to a fixed number in this example to make outputs deterministic. + // Create a new context for ML.NET operations. It can be used for + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); - // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + // Convert the list of data points to an IDataView object, which is + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); - // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, - // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, - // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // ML.NET doesn't cache data set by default. Therefore, if one reads a + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. var pipeline = // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey(nameof(DataPoint.Label)) + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply SdcaNonCalibrated multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.SdcaNonCalibrated()); + .Append(mlContext.MulticlassClassification.Trainers + .SdcaNonCalibrated()); + // Train the model. var model = pipeline.Fit(trainingData); - // Create testing data. Use different random seed to make it different from training data. - var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + // Create testing data. Use different random seed to make it different + // from training data. + var testData = mlContext.Data + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. - var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + var predictions = mlContext.Data + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) - Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + Console.WriteLine($"Label: {p.Label}, " + + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -58,9 +71,11 @@ public static void Example() // Label: 3, Prediction: 3 // Evaluate the overall metrics - var metrics = mlContext.MulticlassClassification.Evaluate(transformedTestData); - PrintMetrics(metrics); + var metrics = mlContext.MulticlassClassification + .Evaluate(transformedTestData); + PrintMetrics(metrics); + // Expected output: // Micro Accuracy: 0.91 // Macro Accuracy: 0.91 @@ -78,8 +93,11 @@ public static void Example() // Precision ||0.9304 |0.9538 |0.8521 | } - // Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, int seed = 0) + // Generates random uniform doubles in [-0.5, 0.5) + // range with labels 1, 2 or 3. + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) + { var random = new Random(seed); float randomFloat() => (float)(random.NextDouble() - 0.5); @@ -91,13 +109,17 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se { Label = (uint)label, // Create random features that are correlated with the label. - // The feature values are slightly increased by adding a constant multiple of label. - Features = Enumerable.Repeat(label, 20).Select(x => randomFloat() + label * 0.2f).ToArray() + // The feature values are slightly increased by adding a + // constant multiple of label. + Features = Enumerable.Repeat(label, 20) + .Select(x => randomFloat() + label * 0.2f).ToArray() + }; } } - // Example with label and 20 feature values. A data set is a collection of such examples. + // Example with label and 20 feature values. A data set is a collection of + // such examples. private class DataPoint { public uint Label { get; set; } @@ -120,8 +142,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Micro Accuracy: {metrics.MicroAccuracy:F2}"); Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); - Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine( + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} \ No newline at end of file +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.tt index 5437228267..cd403b1a44 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.tt @@ -8,7 +8,8 @@ string TrainerOptions = null; string OptionsInclude = ""; string Comments = ""; bool CacheData = true; -string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5) range with labels 1, 2 or 3."; +string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" + + "\n // range with labels 1, 2 or 3."; string ExpectedOutputPerInstance = @"// Expected output: // Label: 1, Prediction: 1 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs index 912e409e81..da44e924aa 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs @@ -3,10 +3,11 @@ using System.Linq; using Microsoft.ML; using Microsoft.ML.Data; +using Microsoft.ML.Trainers; namespace Samples.Dynamic.Trainers.MulticlassClassification { - public static class SdcaNonCalibrated + public static class SdcaNonCalibratedWithOptions { public static void Example() { @@ -31,16 +32,23 @@ public static void Example() // which needs many data passes. trainingData = mlContext.Data.Cache(trainingData); - // Define the trainer. - var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + // Define trainer options. + var options = new SdcaNonCalibratedMulticlassTrainer.Options + { + Loss = new HingeLoss(), + L1Regularization = 0.1f, + BiasLearningRate = 0.01f, + NumberOfThreads = 1 + }; + // Define the trainer. + var pipeline = + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply SdcaNonCalibrated multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .SdcaNonCalibrated()); - + .SdcaNonCalibrated(options)); + // Train the model. var model = pipeline.Fit(trainingData); @@ -79,18 +87,18 @@ public static void Example() // Expected output: // Micro Accuracy: 0.91 // Macro Accuracy: 0.91 - // Log Loss: 0.57 - // Log Loss Reduction: 0.48 + // Log Loss: 0.22 + // Log Loss Reduction: 0.80 // Confusion table // ||======================== // PREDICTED || 0 | 1 | 2 | Recall // TRUTH ||======================== - // 0 || 147 | 0 | 13 | 0.9188 - // 1 || 0 | 165 | 12 | 0.9322 - // 2 || 11 | 8 | 144 | 0.8834 + // 0 || 145 | 0 | 15 | 0.9063 + // 1 || 0 | 164 | 13 | 0.9266 + // 2 || 12 | 7 | 144 | 0.8834 // ||======================== - // Precision ||0.9304 |0.9538 |0.8521 | + // Precision ||0.9236 |0.9591 |0.8372 | } // Generates random uniform doubles in [-0.5, 0.5) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.tt index cd403b1a44..5f0f2a4775 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.tt @@ -1,11 +1,17 @@ <#@ include file="MulticlassClassification.ttinclude"#> <#+ -string ClassName = "SdcaNonCalibrated"; +string ClassName = "SdcaNonCalibratedWithOptions"; string Trainer = "SdcaNonCalibrated"; string MetaTrainer = null; -string TrainerOptions = null; +string TrainerOptions = @"SdcaNonCalibratedMulticlassTrainer.Options + { + Loss = new HingeLoss(), + L1Regularization = 0.1f, + BiasLearningRate = 0.01f, + NumberOfThreads = 1 + }"; -string OptionsInclude = ""; +string OptionsInclude = "using Microsoft.ML.Trainers;"; string Comments = ""; bool CacheData = true; string DataGenerationComments= "// Generates random uniform doubles in [-0.5, 0.5)" @@ -21,16 +27,16 @@ string ExpectedOutputPerInstance = @"// Expected output: string ExpectedOutput = @"// Expected output: // Micro Accuracy: 0.91 // Macro Accuracy: 0.91 - // Log Loss: 0.57 - // Log Loss Reduction: 0.48 + // Log Loss: 0.22 + // Log Loss Reduction: 0.80 // Confusion table // ||======================== // PREDICTED || 0 | 1 | 2 | Recall // TRUTH ||======================== - // 0 || 147 | 0 | 13 | 0.9188 - // 1 || 0 | 165 | 12 | 0.9322 - // 2 || 11 | 8 | 144 | 0.8834 + // 0 || 145 | 0 | 15 | 0.9063 + // 1 || 0 | 164 | 13 | 0.9266 + // 2 || 12 | 7 | 144 | 0.8834 // ||======================== - // Precision ||0.9304 |0.9538 |0.8521 |"; + // Precision ||0.9236 |0.9591 |0.8372 |"; #> \ No newline at end of file From f84939f07c215aa9540735232a2725bda2075a8c Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 13:43:16 -0700 Subject: [PATCH 04/44] Update LbfgsMaximumEntropy.cs getting rid of whitespace --- .../MulticlassClassification/LbfgsMaximumEntropy.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs index 59adefec38..538fd79635 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs @@ -11,16 +11,16 @@ public static class LbfgsMaximumEntropy public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. From 130c6daab193c01c0d4ee0178bae01bb1cc89321 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 13:43:41 -0700 Subject: [PATCH 05/44] Update LbfgsMaximumEntropy.cs --- .../Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs index 538fd79635..ca9ee99501 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs @@ -38,7 +38,7 @@ public static void Example() var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); From 71e4c198295832a422404a44af5a772d1c714abc Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 13:44:46 -0700 Subject: [PATCH 06/44] Update LbfgsMaximumEntropyWithOptions.cs getting rid of whitespace --- .../LbfgsMaximumEntropyWithOptions.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs index 4f33bdeb2b..8e9153e42b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs @@ -12,16 +12,16 @@ public static class LbfgsMaximumEntropyWithOptions public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -45,7 +45,7 @@ public static void Example() var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); @@ -118,7 +118,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } From 8166627c354d06433c5215f827a41b224ef4894c Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 13:45:49 -0700 Subject: [PATCH 07/44] Update LightGbmWithOptions.cs fixing whitespace --- .../MulticlassClassification/LightGbmWithOptions.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs index c68a2cd032..824abf418a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs @@ -15,16 +15,16 @@ public static class LightGbmWithOptions public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -50,7 +50,7 @@ public static void Example() var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); @@ -123,7 +123,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } From 5ea92f3f10be159b78c87f470291f222661d2cc0 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 13:46:26 -0700 Subject: [PATCH 08/44] Update LbfgsMaximumEntropy.cs --- .../Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs index ca9ee99501..af2b3ab2f5 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs @@ -111,7 +111,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } From b63c891061297b8f2112f43bff561a107cb99334 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 13:47:25 -0700 Subject: [PATCH 09/44] Update LightGbm.cs fixing whitespace --- .../Trainers/MulticlassClassification/LightGbm.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs index 2092a7785b..1c39dad324 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs @@ -14,16 +14,16 @@ public static class LightGbm public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. @@ -41,7 +41,7 @@ public static void Example() var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); @@ -114,7 +114,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } From 4d37328acf94b647ae04eca25645b92211453a8f Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 13:50:52 -0700 Subject: [PATCH 10/44] Update LightGbm.cs fixing whitespace --- .../Dynamic/Trainers/MulticlassClassification/LightGbm.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs index 1c39dad324..34df10ea8b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs @@ -105,7 +105,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) .Select(x => randomFloat() + label * 0.2f).ToArray() From 6421a8baf53deb63e9c7aad2d90d700dc182fa05 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 13:52:09 -0700 Subject: [PATCH 11/44] Update LightGbmWithOptions.cs fixing whitespace --- .../Trainers/MulticlassClassification/LightGbmWithOptions.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs index 824abf418a..16c73ff68b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs @@ -39,7 +39,7 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply LightGbm multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers @@ -114,7 +114,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) .Select(x => randomFloat() + label * 0.2f).ToArray() From 79093c83543a9b447ca19b87a3d92601fddf4281 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 13:53:46 -0700 Subject: [PATCH 12/44] Update MulticlassClassification.ttinclude fixing whitespace --- .../MulticlassClassification.ttinclude | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude index 38b5a86dcf..2efcd99a97 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude @@ -14,16 +14,16 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); <# if (CacheData) { #> @@ -42,7 +42,7 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply <#=MetaTrainer#> multiclass meta trainer on top of - // binary trainer. + // binary trainer. .Append(mlContext.MulticlassClassification.Trainers .<#=MetaTrainer#>( <#=Trainer#>())); @@ -64,7 +64,7 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply <#=Trainer#> multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers @@ -76,7 +76,7 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); @@ -120,7 +120,7 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) .Select(x => randomFloat() + label * 0.2f).ToArray() @@ -129,7 +129,7 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -158,4 +158,4 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } -} \ No newline at end of file +} From 4c2f7235d29adf3f5a13b98fc5c0bc1c88949475 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 13:54:42 -0700 Subject: [PATCH 13/44] Update MulticlassClassification.ttinclude fixing whitespace --- .../MulticlassClassification.ttinclude | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude index 2efcd99a97..9790c6a359 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude @@ -28,11 +28,11 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification <# if (CacheData) { #> // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. trainingData = mlContext.Data.Cache(trainingData); <# } #> From 4a48dc5db416cadfaf422ba0c78a66ae627f5071 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 13:56:04 -0700 Subject: [PATCH 14/44] Update NaiveBayes.cs fixing whitespace --- .../MulticlassClassification/NaiveBayes.cs | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs index 1e654c25fd..354352b3c7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs @@ -10,23 +10,23 @@ public static class NaiveBayes { // Naive Bayes classifier is based on Bayes' theorem. // It assumes independence among the presence of features in a class even - // though they may be dependent on each other. It is a multi-class trainer - // that accepts binary feature values of type float, i.e., feature values - // are either true or false. Specifically a feature value greater than zero - // is treated as true, zero or less is treated as false. + // though they may be dependent on each other. It is a multi-class trainer + // that accepts binary feature values of type float, i.e., feature values + // are either true or false. Specifically a feature value greater than zero + // is treated as true, zero or less is treated as false. public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. @@ -44,7 +44,7 @@ public static void Example() var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); @@ -93,8 +93,8 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) range with labels - // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, - // zero or less are treated as false. + // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, + // zero or less are treated as false. private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) @@ -110,7 +110,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) .Select(x => randomFloat() + label * 0.2f).ToArray() @@ -119,7 +119,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } From cc1b3f8c79bd6ef630941071333ac24d03f60bb5 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 13:56:38 -0700 Subject: [PATCH 15/44] Update NaiveBayes.tt fixing whitespace --- .../MulticlassClassification/NaiveBayes.tt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt index 04ace790a7..ca83a2b378 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt @@ -10,15 +10,15 @@ string OptionsInclude = ""; string Comments= @" // Naive Bayes classifier is based on Bayes' theorem. // It assumes independence among the presence of features in a class even - // though they may be dependent on each other. It is a multi-class trainer - // that accepts binary feature values of type float, i.e., feature values - // are either true or false. Specifically a feature value greater than zero - // is treated as true, zero or less is treated as false."; + // though they may be dependent on each other. It is a multi-class trainer + // that accepts binary feature values of type float, i.e., feature values + // are either true or false. Specifically a feature value greater than zero + // is treated as true, zero or less is treated as false."; string DataGenerationComments= @" // Generates random uniform doubles in [-0.5, 0.5) range with labels - // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, - // zero or less are treated as false."; + // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, + // zero or less are treated as false."; string ExpectedOutputPerInstance= @"// Expected output: // Label: 1, Prediction: 1 @@ -42,4 +42,4 @@ string ExpectedOutput = @"// Expected output: // 2 || 9 | 21 | 133 | 0.8160 // ||======================== // Precision ||0.9467 |0.8735 |0.8061 |"; -#> \ No newline at end of file +#> From c6d4cfd7bb3cdcc606180293c432ac54fd5813d7 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 13:57:09 -0700 Subject: [PATCH 16/44] Update NaiveBayes.tt From 97b3c9959404868c56d1ca00200dbfb6acf2885c Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 13:58:07 -0700 Subject: [PATCH 17/44] Update OneVersusAll.cs fixing whitespace --- .../MulticlassClassification/OneVersusAll.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs index 001641831d..bab768a9d7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs @@ -11,16 +11,16 @@ public static class OneVersusAll public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. @@ -28,7 +28,7 @@ public static void Example() // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply OneVersusAll multiclass meta trainer on top of - // binary trainer. + // binary trainer. .Append(mlContext.MulticlassClassification.Trainers .OneVersusAll( mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); @@ -38,7 +38,7 @@ public static void Example() var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); @@ -102,7 +102,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) .Select(x => randomFloat() + label * 0.2f).ToArray() @@ -111,7 +111,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } From 6be7d8ae57733338777db46582f525311fac1bf2 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 13:59:01 -0700 Subject: [PATCH 18/44] Update PairwiseCoupling.cs fixing whitespace --- .../MulticlassClassification/PairwiseCoupling.cs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs index e9f49f4454..f933989392 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs @@ -11,16 +11,16 @@ public static class PairwiseCoupling public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. @@ -38,7 +38,7 @@ public static void Example() var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); @@ -102,7 +102,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) .Select(x => randomFloat() + label * 0.2f).ToArray() @@ -111,7 +111,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } From 3f7418790670b8c1aa80cebf7d66fc301e9f3004 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:00:26 -0700 Subject: [PATCH 19/44] Update SdcaMaximumEntropy.cs fixing whitespace --- .../SdcaMaximumEntropy.cs | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs index 82832b06bd..be4cf22296 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs @@ -11,24 +11,24 @@ public static class SdcaMaximumEntropy public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. @@ -46,7 +46,7 @@ public static void Example() var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); @@ -109,7 +109,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) .Select(x => randomFloat() + label * 0.2f).ToArray() @@ -118,7 +118,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } From 0d23ad83845f9c6232f3da7754aaeb90a8bf5032 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:02:59 -0700 Subject: [PATCH 20/44] Update SdcaMaximumEntropyWithOptions.cs fixing whitespace --- .../SdcaMaximumEntropyWithOptions.cs | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs index ab030b46b2..0501abb458 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs @@ -12,24 +12,24 @@ public static class SdcaMaximumEntropyWithOptions public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. trainingData = mlContext.Data.Cache(trainingData); // Define trainer options. @@ -43,7 +43,7 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply SdcaMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers @@ -54,7 +54,7 @@ public static void Example() var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); @@ -118,7 +118,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) .Select(x => randomFloat() + label * 0.2f).ToArray() @@ -127,7 +127,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } From c728577cd70a6149d0843a0eef802fe30b0601d2 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:04:17 -0700 Subject: [PATCH 21/44] Update SdcaNonCalibrated.cs fixing whitespace --- .../SdcaNonCalibrated.cs | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs index 912e409e81..cf3d4d5f09 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs @@ -11,24 +11,24 @@ public static class SdcaNonCalibrated public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. @@ -46,7 +46,7 @@ public static void Example() var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); @@ -110,7 +110,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) .Select(x => randomFloat() + label * 0.2f).ToArray() @@ -119,7 +119,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } From 6e82a100af43b975d5152afda0276f9e131b54b7 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:05:48 -0700 Subject: [PATCH 22/44] Update SdcaNonCalibratedWithOptions.cs fixing whitespace --- .../SdcaNonCalibratedWithOptions.cs | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs index da44e924aa..44e40ff6df 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs @@ -12,25 +12,25 @@ public static class SdcaNonCalibratedWithOptions public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define trainer options. var options = new SdcaNonCalibratedMulticlassTrainer.Options @@ -43,7 +43,7 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply SdcaNonCalibrated multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers @@ -54,7 +54,7 @@ public static void Example() var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); @@ -118,7 +118,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) .Select(x => randomFloat() + label * 0.2f).ToArray() @@ -127,7 +127,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } From 1f1ff3b4aa2437a54e1b5e63aec1a08ad5ed4f61 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:07:04 -0700 Subject: [PATCH 23/44] Update SdcaNonCalibrated.cs fixing whitespace --- .../Trainers/MulticlassClassification/SdcaNonCalibrated.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs index cf3d4d5f09..a128c7a65f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs @@ -29,7 +29,7 @@ public static void Example() // data can fit into memory, a solution is to cache the data in memory. // Caching is especially helpful when working with iterative algorithms // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. var pipeline = From 239e26ad33fb0cef8386954b94233778ede210ee Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:08:05 -0700 Subject: [PATCH 24/44] Update SdcaNonCalibrated.cs --- .../Trainers/MulticlassClassification/SdcaNonCalibrated.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs index a128c7a65f..cf33d4ae1b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs @@ -56,7 +56,7 @@ public static void Example() // Convert IDataView object to a list. var predictions = mlContext.Data .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) From 32db971a935efa1cb7acda5ab33ec846fac6f860 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:15:41 -0700 Subject: [PATCH 25/44] Update LbfgsMaximumEntropy.cs --- .../LbfgsMaximumEntropy.cs | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs index af2b3ab2f5..db93fac36e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs @@ -27,11 +27,11 @@ public static void Example() var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + .MapValueToKey(nameof(DataPoint.Label)) // Apply LbfgsMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .LbfgsMaximumEntropy()); + .LbfgsMaximumEntropy()); // Train the model. @@ -40,20 +40,20 @@ public static void Example() // Create testing data. Use different random seed to make it different // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -64,7 +64,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -87,8 +87,8 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); @@ -102,9 +102,9 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } From e3e595bebd4512a2b716e8df75f4d839885ae264 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:17:08 -0700 Subject: [PATCH 26/44] Update LbfgsMaximumEntropy.cs --- .../Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs index db93fac36e..d24869e6a4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs @@ -135,7 +135,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } From 969af4df73502a84fb22a1422c9b18898387ff0c Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:19:09 -0700 Subject: [PATCH 27/44] Update LbfgsMaximumEntropyWithOptions.cs --- .../LbfgsMaximumEntropyWithOptions.cs | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs index 8e9153e42b..89728c59f9 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs @@ -34,11 +34,11 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply LbfgsMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .LbfgsMaximumEntropy(options)); + .LbfgsMaximumEntropy(options)); // Train the model. @@ -47,20 +47,20 @@ public static void Example() // Create testing data. Use different random seed to make it different // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -71,7 +71,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -95,7 +95,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -109,9 +109,9 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -142,7 +142,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } From b18c50853a5dfd45eb73fc2dfc212e9ba5b6e391 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:22:33 -0700 Subject: [PATCH 28/44] Update LightGbm.cs --- .../MulticlassClassification/LightGbm.cs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs index 34df10ea8b..0d898b6790 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs @@ -30,11 +30,11 @@ public static void Example() var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + .MapValueToKey(nameof(DataPoint.Label)) // Apply LightGbm multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .LightGbm()); + .LightGbm()); // Train the model. @@ -43,20 +43,20 @@ public static void Example() // Create testing data. Use different random seed to make it different // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -67,7 +67,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -91,7 +91,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -107,7 +107,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, // The feature values are slightly increased by adding a // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -138,7 +138,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } From 1af15548da2c3a8c4998d01f888159969d61d5b3 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:24:53 -0700 Subject: [PATCH 29/44] Update LightGbmWithOptions.cs --- .../LightGbmWithOptions.cs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs index 16c73ff68b..322b371ac1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs @@ -43,7 +43,7 @@ public static void Example() mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply LightGbm multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .LightGbm(options)); + .LightGbm(options)); // Train the model. @@ -52,20 +52,20 @@ public static void Example() // Create testing data. Use different random seed to make it different // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -76,7 +76,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -100,7 +100,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -116,7 +116,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, // The feature values are slightly increased by adding a // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -147,7 +147,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } From d0e1a019f4af4de36399867a0b42a8a2267f4cfb Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:28:34 -0700 Subject: [PATCH 30/44] Update MulticlassClassification.ttinclude --- .../MulticlassClassification.ttinclude | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude index 9790c6a359..01973d367b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude @@ -33,7 +33,7 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification // data can fit into memory, a solution is to cache the data in memory. // Caching is especially helpful when working with iterative algorithms // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + trainingData = mlContext.Data.Cache(trainingData); <# } #> <# if (MetaTrainer != null) { #> @@ -44,19 +44,19 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification // Apply <#=MetaTrainer#> multiclass meta trainer on top of // binary trainer. .Append(mlContext.MulticlassClassification.Trainers - .<#=MetaTrainer#>( - <#=Trainer#>())); + .<#=MetaTrainer#>( + <#=Trainer#>())); <# } else if (TrainerOptions == null) { #> // Define the trainer. var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + .MapValueToKey(nameof(DataPoint.Label)) // Apply <#=Trainer#> multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .<#=Trainer#>()); + .<#=Trainer#>()); <# } else { #> // Define trainer options. @@ -68,7 +68,7 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply <#=Trainer#> multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .<#=Trainer#>(options)); + .<#=Trainer#>(options)); <# } #> @@ -78,26 +78,26 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification // Create testing data. Use different random seed to make it different // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); <#=ExpectedOutputPerInstance#> // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -106,7 +106,7 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification <#=DataGenerationComments#> private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -122,7 +122,7 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification // The feature values are slightly increased by adding a // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -153,7 +153,7 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } From 092808dc5a532ed56a651a29a20866945766504a Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:30:27 -0700 Subject: [PATCH 31/44] Update NaiveBayes.cs --- .../MulticlassClassification/NaiveBayes.cs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs index 354352b3c7..0afd943982 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs @@ -33,11 +33,11 @@ public static void Example() var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + .MapValueToKey(nameof(DataPoint.Label)) // Apply NaiveBayes multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .NaiveBayes()); + .NaiveBayes()); // Train the model. @@ -46,20 +46,20 @@ public static void Example() // Create testing data. Use different random seed to make it different // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -70,7 +70,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -96,7 +96,7 @@ public static void Example() // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, // zero or less are treated as false. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -112,7 +112,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, // The feature values are slightly increased by adding a // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -143,7 +143,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } From e9432305836aec563082b4b0d6604e1d2b298212 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:32:44 -0700 Subject: [PATCH 32/44] Update OneVersusAll.cs --- .../MulticlassClassification/OneVersusAll.cs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs index bab768a9d7..96ce036af9 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs @@ -30,8 +30,8 @@ public static void Example() // Apply OneVersusAll multiclass meta trainer on top of // binary trainer. .Append(mlContext.MulticlassClassification.Trainers - .OneVersusAll( - mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); + .OneVersusAll( + mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); // Train the model. @@ -40,20 +40,20 @@ public static void Example() // Create testing data. Use different random seed to make it different // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -64,7 +64,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -88,7 +88,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -104,7 +104,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, // The feature values are slightly increased by adding a // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -135,7 +135,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } From 9d014e3f827d5d0ad9d6a0027040a332f623702d Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:34:22 -0700 Subject: [PATCH 33/44] Update PairwiseCoupling.cs --- .../PairwiseCoupling.cs | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs index f933989392..6a1bae9cd3 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs @@ -28,10 +28,10 @@ public static void Example() // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply PairwiseCoupling multiclass meta trainer on top of - // binary trainer. + // binary trainer. .Append(mlContext.MulticlassClassification.Trainers - .PairwiseCoupling( - mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); + .PairwiseCoupling( + mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); // Train the model. @@ -40,20 +40,20 @@ public static void Example() // Create testing data. Use different random seed to make it different // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -64,7 +64,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -88,7 +88,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -104,7 +104,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, // The feature values are slightly increased by adding a // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -135,7 +135,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } From 407c4d8df26691d715c0acc93fac0c1b22e5238f Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:37:23 -0700 Subject: [PATCH 34/44] Update SdcaMaximumEntropy.cs --- .../SdcaMaximumEntropy.cs | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs index be4cf22296..2c94c858df 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs @@ -29,17 +29,17 @@ public static void Example() // data can fit into memory, a solution is to cache the data in memory. // Caching is especially helpful when working with iterative algorithms // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + .MapValueToKey(nameof(DataPoint.Label)) // Apply SdcaMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .SdcaMaximumEntropy()); + .SdcaMaximumEntropy()); // Train the model. @@ -48,20 +48,20 @@ public static void Example() // Create testing data. Use different random seed to make it different // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -72,7 +72,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -95,7 +95,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -111,7 +111,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, // The feature values are slightly increased by adding a // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -142,7 +142,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } From b4c95757cacdfea0d106c3690867bc1d39c063e3 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:39:05 -0700 Subject: [PATCH 35/44] Update SdcaMaximumEntropy.cs --- .../Trainers/MulticlassClassification/SdcaMaximumEntropy.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs index 2c94c858df..72981bc20a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs @@ -35,11 +35,11 @@ public static void Example() var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + .MapValueToKey(nameof(DataPoint.Label)) // Apply SdcaMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .SdcaMaximumEntropy()); + .SdcaMaximumEntropy()); // Train the model. From 053c85bbb0cb0f8b1cda57336b7d25c128366241 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:41:08 -0700 Subject: [PATCH 36/44] Update SdcaMaximumEntropyWithOptions.cs --- .../SdcaMaximumEntropyWithOptions.cs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs index 0501abb458..891c9cfc3f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs @@ -30,7 +30,7 @@ public static void Example() // data can fit into memory, a solution is to cache the data in memory. // Caching is especially helpful when working with iterative algorithms // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + trainingData = mlContext.Data.Cache(trainingData); // Define trainer options. var options = new SdcaMaximumEntropyMulticlassTrainer.Options @@ -47,7 +47,7 @@ public static void Example() mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply SdcaMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .SdcaMaximumEntropy(options)); + .SdcaMaximumEntropy(options)); // Train the model. @@ -56,20 +56,20 @@ public static void Example() // Create testing data. Use different random seed to make it different // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -80,7 +80,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -104,7 +104,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -120,7 +120,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, // The feature values are slightly increased by adding a // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -151,7 +151,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } From 0d2baa69f0689eb3dfeb59078496d2777fdec331 Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:42:49 -0700 Subject: [PATCH 37/44] Update SdcaNonCalibrated.cs --- .../SdcaNonCalibrated.cs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs index cf33d4ae1b..e67ea997c4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs @@ -35,11 +35,11 @@ public static void Example() var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + .MapValueToKey(nameof(DataPoint.Label)) // Apply SdcaNonCalibrated multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .SdcaNonCalibrated()); + .SdcaNonCalibrated()); // Train the model. @@ -48,20 +48,20 @@ public static void Example() // Create testing data. Use different random seed to make it different // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -72,7 +72,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -96,7 +96,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -112,7 +112,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, // The feature values are slightly increased by adding a // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -143,7 +143,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } From 6e410cc2e4ff65774e1ced31fc46dbb2311d57ff Mon Sep 17 00:00:00 2001 From: Sierra Lee Date: Mon, 1 Jul 2019 14:44:56 -0700 Subject: [PATCH 38/44] Update SdcaNonCalibratedWithOptions.cs --- .../SdcaNonCalibratedWithOptions.cs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs index 44e40ff6df..f8a945127c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs @@ -47,7 +47,7 @@ public static void Example() mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply SdcaNonCalibrated multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .SdcaNonCalibrated(options)); + .SdcaNonCalibrated(options)); // Train the model. @@ -56,20 +56,20 @@ public static void Example() // Create testing data. Use different random seed to make it different // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -80,7 +80,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -104,7 +104,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -120,7 +120,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, // The feature values are slightly increased by adding a // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -151,7 +151,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } From 85ed0e6ce2ed95e61cd08455d6ec4828ba3d6e9c Mon Sep 17 00:00:00 2001 From: sierralee51 <36902786+sierralee51@users.noreply.github.com> Date: Mon, 1 Jul 2019 23:24:52 -0700 Subject: [PATCH 39/44] fixed tabbing issue --- .../LbfgsMaximumEntropy.cs | 24 ++++----- .../LbfgsMaximumEntropyWithOptions.cs | 22 ++++---- .../MulticlassClassification/LightGbm.cs | 32 ++++++------ .../LightGbmWithOptions.cs | 34 ++++++------ .../MulticlassClassification.ttinclude | 52 +++++++++---------- .../MulticlassClassification/NaiveBayes.cs | 47 +++++++++-------- .../MulticlassClassification/NaiveBayes.tt | 10 ++-- .../MulticlassClassification/OneVersusAll.cs | 37 ++++++------- .../PairwiseCoupling.cs | 32 ++++++------ .../SdcaMaximumEntropy.cs | 46 ++++++++-------- .../SdcaMaximumEntropyWithOptions.cs | 48 ++++++++--------- .../SdcaNonCalibrated.cs | 42 +++++++-------- .../SdcaNonCalibratedWithOptions.cs | 49 ++++++++--------- 13 files changed, 239 insertions(+), 236 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs index d24869e6a4..a5b27c2693 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs @@ -11,34 +11,34 @@ public static class LbfgsMaximumEntropy public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + .MapValueToKey(nameof(DataPoint.Label)) // Apply LbfgsMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .LbfgsMaximumEntropy()); + .LbfgsMaximumEntropy()); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); @@ -48,12 +48,12 @@ public static void Example() // Convert IDataView object to a list. var predictions = mlContext.Data .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -88,7 +88,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -102,9 +102,9 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs index 89728c59f9..08bfd399b7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs @@ -12,16 +12,16 @@ public static class LbfgsMaximumEntropyWithOptions public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -34,18 +34,18 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply LbfgsMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .LbfgsMaximumEntropy(options)); - + .LbfgsMaximumEntropy(options)); + // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); @@ -55,7 +55,7 @@ public static void Example() // Convert IDataView object to a list. var predictions = mlContext.Data .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) @@ -109,9 +109,9 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs index 0d898b6790..91033d8969 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs @@ -14,36 +14,36 @@ public static class LightGbm public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + .MapValueToKey(nameof(DataPoint.Label)) // Apply LightGbm multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .LightGbm()); + .LightGbm()); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); @@ -51,12 +51,12 @@ public static void Example() // Convert IDataView object to a list. var predictions = mlContext.Data .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -67,7 +67,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -91,7 +91,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -105,16 +105,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -138,7 +138,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs index 322b371ac1..e68fe66b6e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs @@ -15,16 +15,16 @@ public static class LightGbmWithOptions public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -39,33 +39,33 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply LightGbm multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .LightGbm(options)); - + .LightGbm(options)); + // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -76,7 +76,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -100,7 +100,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -114,9 +114,9 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -147,7 +147,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude index 01973d367b..21006d12f2 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude @@ -14,26 +14,26 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); <# if (CacheData) { #> // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); <# } #> <# if (MetaTrainer != null) { #> @@ -42,21 +42,21 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply <#=MetaTrainer#> multiclass meta trainer on top of - // binary trainer. + // binary trainer. .Append(mlContext.MulticlassClassification.Trainers - .<#=MetaTrainer#>( - <#=Trainer#>())); + .<#=MetaTrainer#>( + <#=Trainer#>())); <# } else if (TrainerOptions == null) { #> // Define the trainer. var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + .MapValueToKey(nameof(DataPoint.Label)) // Apply <#=Trainer#> multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .<#=Trainer#>()); + .<#=Trainer#>()); <# } else { #> // Define trainer options. @@ -68,36 +68,36 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply <#=Trainer#> multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .<#=Trainer#>(options)); - + .<#=Trainer#>(options)); + <# } #> // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); <#=ExpectedOutputPerInstance#> // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -120,7 +120,7 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) .Select(x => randomFloat() + label * 0.2f).ToArray() @@ -129,7 +129,7 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -153,7 +153,7 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs index 0afd943982..ef62fac759 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs @@ -10,56 +10,56 @@ public static class NaiveBayes { // Naive Bayes classifier is based on Bayes' theorem. // It assumes independence among the presence of features in a class even - // though they may be dependent on each other. It is a multi-class trainer - // that accepts binary feature values of type float, i.e., feature values - // are either true or false. Specifically a feature value greater than zero - // is treated as true, zero or less is treated as false. + // though they may be dependent on each other. It is a multi-class trainer + // that accepts binary feature values of type float, i.e., feature values + // are either true or false. Specifically a feature value greater than zero + // is treated as true, zero or less is treated as false. public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + .MapValueToKey(nameof(DataPoint.Label)) // Apply NaiveBayes multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .NaiveBayes()); + .NaiveBayes()); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -70,7 +70,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -93,10 +93,10 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) range with labels - // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, - // zero or less are treated as false. + // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, + // zero or less are treated as false. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -110,16 +110,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -143,9 +143,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt index ca83a2b378..2e15f6af2e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt @@ -11,14 +11,14 @@ string Comments= @" // Naive Bayes classifier is based on Bayes' theorem. // It assumes independence among the presence of features in a class even // though they may be dependent on each other. It is a multi-class trainer - // that accepts binary feature values of type float, i.e., feature values - // are either true or false. Specifically a feature value greater than zero - // is treated as true, zero or less is treated as false."; + // that accepts binary feature values of type float, i.e., feature values + // are either true or false. Specifically a feature value greater than zero + // is treated as true, zero or less is treated as false."; string DataGenerationComments= @" // Generates random uniform doubles in [-0.5, 0.5) range with labels - // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, - // zero or less are treated as false."; + // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, + // zero or less are treated as false."; string ExpectedOutputPerInstance= @"// Expected output: // Label: 1, Prediction: 1 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs index 96ce036af9..357cb28a98 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs @@ -11,16 +11,16 @@ public static class OneVersusAll public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. @@ -28,32 +28,32 @@ public static void Example() // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply OneVersusAll multiclass meta trainer on top of - // binary trainer. + // binary trainer. .Append(mlContext.MulticlassClassification.Trainers - .OneVersusAll( - mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); + .OneVersusAll( + mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -64,7 +64,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -88,7 +88,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -102,16 +102,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -135,9 +135,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs index 6a1bae9cd3..1eee975e42 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs @@ -11,16 +11,16 @@ public static class PairwiseCoupling public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. @@ -28,32 +28,32 @@ public static void Example() // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply PairwiseCoupling multiclass meta trainer on top of - // binary trainer. + // binary trainer. .Append(mlContext.MulticlassClassification.Trainers - .PairwiseCoupling( - mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); + .PairwiseCoupling( + mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -64,7 +64,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -102,9 +102,9 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -135,7 +135,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs index 72981bc20a..b2175bb7bc 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs @@ -11,57 +11,57 @@ public static class SdcaMaximumEntropy public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. var pipeline = // Convert the string labels into key types. mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) + .MapValueToKey(nameof(DataPoint.Label)) // Apply SdcaMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .SdcaMaximumEntropy()); + .SdcaMaximumEntropy()); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -72,7 +72,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -95,7 +95,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -109,16 +109,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -142,7 +142,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs index 891c9cfc3f..f7fd099a98 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs @@ -12,25 +12,25 @@ public static class SdcaMaximumEntropyWithOptions public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define trainer options. var options = new SdcaMaximumEntropyMulticlassTrainer.Options @@ -43,33 +43,33 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply SdcaMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .SdcaMaximumEntropy(options)); - + .SdcaMaximumEntropy(options)); + // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -80,7 +80,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -104,7 +104,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -118,16 +118,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -151,7 +151,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs index e67ea997c4..83415a1e6c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs @@ -11,25 +11,25 @@ public static class SdcaNonCalibrated public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. var pipeline = @@ -39,29 +39,29 @@ public static void Example() // Apply SdcaNonCalibrated multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .SdcaNonCalibrated()); + .SdcaNonCalibrated()); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -72,7 +72,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -96,7 +96,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -110,9 +110,9 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -143,7 +143,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs index f8a945127c..7d842848b2 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs @@ -12,25 +12,25 @@ public static class SdcaNonCalibratedWithOptions public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define trainer options. var options = new SdcaNonCalibratedMulticlassTrainer.Options @@ -43,33 +43,33 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") // Apply SdcaNonCalibrated multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers - .SdcaNonCalibrated(options)); - + .SdcaNonCalibrated(options)); + // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -80,7 +80,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -104,7 +104,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -118,16 +118,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -151,9 +151,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + From f51c1478b8e84ead4ea6683a9c7fca2bf8ccbffd Mon Sep 17 00:00:00 2001 From: sierralee51 <36902786+sierralee51@users.noreply.github.com> Date: Mon, 1 Jul 2019 23:40:27 -0700 Subject: [PATCH 40/44] fixed indentations --- .../LbfgsMaximumEntropy.cs | 34 +++++----- .../LbfgsMaximumEntropyWithOptions.cs | 27 ++++---- .../MulticlassClassification/LightGbm.cs | 40 ++++++------ .../LightGbmWithOptions.cs | 37 ++++++----- .../MulticlassClassification.ttinclude | 65 +++++++++---------- 5 files changed, 102 insertions(+), 101 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs index a5b27c2693..3b30065324 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs @@ -11,34 +11,33 @@ public static class LbfgsMaximumEntropy public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion + // Convert the string labels into key types. + mlContext.Transforms.Conversion .MapValueToKey(nameof(DataPoint.Label)) - - // Apply LbfgsMaximumEntropy multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers - .LbfgsMaximumEntropy()); + // Apply LbfgsMaximumEntropy multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .LbfgsMaximumEntropy()); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); @@ -48,12 +47,12 @@ public static void Example() // Convert IDataView object to a list. var predictions = mlContext.Data .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -87,8 +86,8 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. - private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + private static IEnumerable GenerateRandomDataPoints(int count, + int seed=0) { var random = new Random(seed); @@ -102,9 +101,9 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -141,3 +140,4 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs index 08bfd399b7..af7922618d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs @@ -12,16 +12,16 @@ public static class LbfgsMaximumEntropyWithOptions public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -34,18 +34,18 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply LbfgsMaximumEntropy multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers - .LbfgsMaximumEntropy(options)); + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply LbfgsMaximumEntropy multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .LbfgsMaximumEntropy(options)); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); @@ -55,7 +55,7 @@ public static void Example() // Convert IDataView object to a list. var predictions = mlContext.Data .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) @@ -109,9 +109,9 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -148,3 +148,4 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs index 91033d8969..7992abe2bf 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs @@ -14,36 +14,35 @@ public static class LightGbm public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion + // Convert the string labels into key types. + mlContext.Transforms.Conversion .MapValueToKey(nameof(DataPoint.Label)) - - // Apply LightGbm multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers - .LightGbm()); + // Apply LightGbm multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .LightGbm()); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); @@ -51,12 +50,12 @@ public static void Example() // Convert IDataView object to a list. var predictions = mlContext.Data .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -67,7 +66,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -91,7 +90,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -105,16 +104,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -138,9 +137,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs index e68fe66b6e..e814030061 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs @@ -15,16 +15,16 @@ public static class LightGbmWithOptions public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. @@ -39,10 +39,10 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply LightGbm multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply LightGbm multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers .LightGbm(options)); @@ -50,22 +50,22 @@ public static void Example() var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -76,7 +76,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -100,7 +100,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -114,9 +114,9 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -147,9 +147,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude index 21006d12f2..7ccc793feb 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude @@ -14,48 +14,47 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); <# if (CacheData) { #> // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); <# } #> <# if (MetaTrainer != null) { #> // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply <#=MetaTrainer#> multiclass meta trainer on top of + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply <#=MetaTrainer#> multiclass meta trainer on top of // binary trainer. - .Append(mlContext.MulticlassClassification.Trainers + .Append(mlContext.MulticlassClassification.Trainers .<#=MetaTrainer#>( - <#=Trainer#>())); + <#=Trainer#>())); <# } else if (TrainerOptions == null) { #> // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion + // Convert the string labels into key types. + mlContext.Transforms.Conversion .MapValueToKey(nameof(DataPoint.Label)) - - // Apply <#=Trainer#> multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers + // Apply <#=Trainer#> multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers .<#=Trainer#>()); <# } else { #> @@ -64,10 +63,10 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply <#=Trainer#> multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply <#=Trainer#> multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers .<#=Trainer#>(options)); <# } #> @@ -76,28 +75,28 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); <#=ExpectedOutputPerInstance#> // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -120,7 +119,7 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) .Select(x => randomFloat() + label * 0.2f).ToArray() @@ -129,7 +128,7 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -153,7 +152,7 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } From 1e9a3d881bd1a7ee7c303cb56aa77c1cd6399b0e Mon Sep 17 00:00:00 2001 From: sierralee51 <36902786+sierralee51@users.noreply.github.com> Date: Mon, 1 Jul 2019 23:49:22 -0700 Subject: [PATCH 41/44] aligned comments --- .../LogLossPerClass.cs | 3 +- .../MulticlassClassification/NaiveBayes.cs | 45 ++++++++-------- .../MulticlassClassification/NaiveBayes.tt | 10 ++-- .../MulticlassClassification/OneVersusAll.cs | 36 ++++++------- .../PairwiseCoupling.cs | 37 ++++++------- .../SdcaMaximumEntropy.cs | 52 +++++++++--------- .../SdcaMaximumEntropyWithOptions.cs | 53 ++++++++++--------- .../SdcaNonCalibrated.cs | 52 +++++++++--------- .../SdcaNonCalibratedWithOptions.cs | 46 ++++++++-------- 9 files changed, 168 insertions(+), 166 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LogLossPerClass.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LogLossPerClass.cs index c875616ada..02550a5bb1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LogLossPerClass.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LogLossPerClass.cs @@ -30,7 +30,8 @@ public static void Example() .MapValueToKey(nameof(DataPoint.Label)) // Apply a multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers.LightGbm()); + .Append(mlContext.MulticlassClassification.Trainers + .LightGbm()); // Train the model. var model = pipeline.Fit(trainingData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs index ef62fac759..33272b2fb0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs @@ -11,32 +11,31 @@ public static class NaiveBayes // Naive Bayes classifier is based on Bayes' theorem. // It assumes independence among the presence of features in a class even // though they may be dependent on each other. It is a multi-class trainer - // that accepts binary feature values of type float, i.e., feature values - // are either true or false. Specifically a feature value greater than zero - // is treated as true, zero or less is treated as false. + // that accepts binary feature values of type float, i.e., feature values + // are either true or false. Specifically a feature value greater than zero + // is treated as true, zero or less is treated as false. public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion + // Convert the string labels into key types. + mlContext.Transforms.Conversion .MapValueToKey(nameof(DataPoint.Label)) - - // Apply NaiveBayes multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers + // Apply NaiveBayes multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers .NaiveBayes()); @@ -44,22 +43,22 @@ public static void Example() var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -70,7 +69,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -93,8 +92,8 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) range with labels - // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, - // zero or less are treated as false. + // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, + // zero or less are treated as false. private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) @@ -110,7 +109,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) .Select(x => randomFloat() + label * 0.2f).ToArray() @@ -119,7 +118,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -143,7 +142,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt index 2e15f6af2e..26e0162b12 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.tt @@ -11,14 +11,14 @@ string Comments= @" // Naive Bayes classifier is based on Bayes' theorem. // It assumes independence among the presence of features in a class even // though they may be dependent on each other. It is a multi-class trainer - // that accepts binary feature values of type float, i.e., feature values - // are either true or false. Specifically a feature value greater than zero - // is treated as true, zero or less is treated as false."; + // that accepts binary feature values of type float, i.e., feature values + // are either true or false. Specifically a feature value greater than zero + // is treated as true, zero or less is treated as false."; string DataGenerationComments= @" // Generates random uniform doubles in [-0.5, 0.5) range with labels - // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, - // zero or less are treated as false."; + // 1, 2 or 3. For NaiveBayes values greater than zero are treated as true, + // zero or less are treated as false."; string ExpectedOutputPerInstance= @"// Expected output: // Label: 1, Prediction: 1 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs index 357cb28a98..044cfcd3fb 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs @@ -11,49 +11,49 @@ public static class OneVersusAll public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply OneVersusAll multiclass meta trainer on top of + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply OneVersusAll multiclass meta trainer on top of // binary trainer. - .Append(mlContext.MulticlassClassification.Trainers + .Append(mlContext.MulticlassClassification.Trainers .OneVersusAll( - mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); + mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -64,7 +64,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -102,7 +102,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) .Select(x => randomFloat() + label * 0.2f).ToArray() @@ -111,7 +111,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -135,7 +135,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs index 1eee975e42..a2b518bc1a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs @@ -11,49 +11,49 @@ public static class PairwiseCoupling public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply PairwiseCoupling multiclass meta trainer on top of + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply PairwiseCoupling multiclass meta trainer on top of // binary trainer. - .Append(mlContext.MulticlassClassification.Trainers + .Append(mlContext.MulticlassClassification.Trainers .PairwiseCoupling( - mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); + mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -64,7 +64,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -102,9 +102,9 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -135,9 +135,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs index b2175bb7bc..6521c0e5e1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs @@ -11,34 +11,33 @@ public static class SdcaMaximumEntropy public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion + // Convert the string labels into key types. + mlContext.Transforms.Conversion .MapValueToKey(nameof(DataPoint.Label)) - - // Apply SdcaMaximumEntropy multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers + // Apply SdcaMaximumEntropy multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers .SdcaMaximumEntropy()); @@ -46,22 +45,22 @@ public static void Example() var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -72,7 +71,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -95,7 +94,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -109,16 +108,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -142,9 +141,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs index f7fd099a98..b0a61c0061 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs @@ -12,25 +12,25 @@ public static class SdcaMaximumEntropyWithOptions public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define trainer options. var options = new SdcaMaximumEntropyMulticlassTrainer.Options @@ -43,33 +43,33 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply SdcaMaximumEntropy multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers - .SdcaMaximumEntropy(options)); + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply SdcaMaximumEntropy multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .SdcaMaximumEntropy(options)); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -80,7 +80,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -104,7 +104,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -118,16 +118,16 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -151,9 +151,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs index 83415a1e6c..4e4a612428 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs @@ -11,34 +11,33 @@ public static class SdcaNonCalibrated public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) - - // Apply SdcaNonCalibrated multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers + // Convert the string labels into key types. + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply SdcaNonCalibrated multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers .SdcaNonCalibrated()); @@ -46,22 +45,22 @@ public static void Example() var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -72,7 +71,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -96,7 +95,7 @@ public static void Example() // Generates random uniform doubles in [-0.5, 0.5) // range with labels 1, 2 or 3. private static IEnumerable GenerateRandomDataPoints(int count, - int seed=0) + int seed=0) { var random = new Random(seed); @@ -110,9 +109,9 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) - .Select(x => randomFloat() + label * 0.2f).ToArray() + .Select(x => randomFloat() + label * 0.2f).ToArray() }; } @@ -143,9 +142,10 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } } } + diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs index 7d842848b2..d86bd7d1c5 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs @@ -12,25 +12,25 @@ public static class SdcaNonCalibratedWithOptions public static void Example() { // Create a new context for ML.NET operations. It can be used for - // exception tracking and logging, as a catalog of available operations - // and as the source of randomness. Setting the seed to a fixed number - // in this example to make outputs deterministic. + // exception tracking and logging, as a catalog of available operations + // and as the source of randomness. Setting the seed to a fixed number + // in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is - // consumable by ML.NET API. + // consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a - // data set from a file and accesses it many times, it can be slow due - // to expensive featurization and disk operations. When the considered - // data can fit into memory, a solution is to cache the data in memory. - // Caching is especially helpful when working with iterative algorithms - // which needs many data passes. - trainingData = mlContext.Data.Cache(trainingData); + // data set from a file and accesses it many times, it can be slow due + // to expensive featurization and disk operations. When the considered + // data can fit into memory, a solution is to cache the data in memory. + // Caching is especially helpful when working with iterative algorithms + // which needs many data passes. + trainingData = mlContext.Data.Cache(trainingData); // Define trainer options. var options = new SdcaNonCalibratedMulticlassTrainer.Options @@ -43,10 +43,10 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply SdcaNonCalibrated multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply SdcaNonCalibrated multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers .SdcaNonCalibrated(options)); @@ -54,22 +54,22 @@ public static void Example() var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different - // from training data. + // from training data. var testData = mlContext.Data - .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); + .LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data - .CreateEnumerable(transformedTestData, - reuseRowObject: false).ToList(); + .CreateEnumerable(transformedTestData, + reuseRowObject: false).ToList(); // Look at 5 predictions foreach (var p in predictions.Take(5)) Console.WriteLine($"Label: {p.Label}, " + - $"Prediction: {p.PredictedLabel}"); + $"Prediction: {p.PredictedLabel}"); // Expected output: // Label: 1, Prediction: 1 @@ -80,7 +80,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.MulticlassClassification - .Evaluate(transformedTestData); + .Evaluate(transformedTestData); PrintMetrics(metrics); @@ -118,7 +118,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, Label = (uint)label, // Create random features that are correlated with the label. // The feature values are slightly increased by adding a - // constant multiple of label. + // constant multiple of label. Features = Enumerable.Repeat(label, 20) .Select(x => randomFloat() + label * 0.2f).ToArray() @@ -127,7 +127,7 @@ private static IEnumerable GenerateRandomDataPoints(int count, } // Example with label and 20 feature values. A data set is a collection of - // such examples. + // such examples. private class DataPoint { public uint Label { get; set; } @@ -151,7 +151,7 @@ public static void PrintMetrics(MulticlassClassificationMetrics metrics) Console.WriteLine($"Macro Accuracy: {metrics.MacroAccuracy:F2}"); Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}"); Console.WriteLine( - $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); + $"Log Loss Reduction: {metrics.LogLossReduction:F2}\n"); Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable()); } From c3d3499e219a704c09ed0ce50404b806d96bd43f Mon Sep 17 00:00:00 2001 From: sierralee51 <36902786+sierralee51@users.noreply.github.com> Date: Tue, 2 Jul 2019 13:19:05 -0700 Subject: [PATCH 42/44] fixed some indentation and spacing issues --- .../LbfgsMaximumEntropy.cs | 4 ++-- .../LbfgsMaximumEntropyWithOptions.cs | 4 ++-- .../Trainers/MulticlassClassification/LightGbm.cs | 4 ++-- .../LightGbmWithOptions.cs | 4 ++-- .../MulticlassClassification/LogLossPerClass.cs | 13 ++++++------- .../MulticlassClassification.ttinclude | 14 +++++++------- .../MulticlassClassification/NaiveBayes.cs | 4 ++-- .../MulticlassClassification/OneVersusAll.cs | 6 +++--- .../MulticlassClassification/PairwiseCoupling.cs | 6 +++--- .../PermutationFeatureImportance.cs | 10 +++++----- .../MulticlassClassification/SdcaMaximumEntropy.cs | 4 ++-- .../SdcaMaximumEntropyWithOptions.cs | 4 ++-- .../MulticlassClassification/SdcaNonCalibrated.cs | 4 ++-- .../SdcaNonCalibratedWithOptions.cs | 4 ++-- 14 files changed, 42 insertions(+), 43 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs index 3b30065324..7670cf5f3d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs @@ -25,10 +25,10 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion .MapValueToKey(nameof(DataPoint.Label)) - // Apply LbfgsMaximumEntropy multiclass trainer. + // Apply LbfgsMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .LbfgsMaximumEntropy()); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs index af7922618d..d99c7e0d6d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs @@ -34,9 +34,9 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply LbfgsMaximumEntropy multiclass trainer. + // Apply LbfgsMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .LbfgsMaximumEntropy(options)); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs index 7992abe2bf..2cea68b970 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs @@ -28,10 +28,10 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion .MapValueToKey(nameof(DataPoint.Label)) - // Apply LightGbm multiclass trainer. + // Apply LightGbm multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .LightGbm()); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs index e814030061..4664fad79a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs @@ -39,9 +39,9 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply LightGbm multiclass trainer. + // Apply LightGbm multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .LightGbm(options)); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LogLossPerClass.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LogLossPerClass.cs index 02550a5bb1..8b1076f830 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LogLossPerClass.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LogLossPerClass.cs @@ -25,13 +25,12 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. - mlContext.Transforms.Conversion - .MapValueToKey(nameof(DataPoint.Label)) - - // Apply a multiclass trainer. - .Append(mlContext.MulticlassClassification.Trainers - .LightGbm()); + // Convert the string labels into key types. + mlContext.Transforms.Conversion + .MapValueToKey(nameof(DataPoint.Label)) + // Apply a multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers + .LightGbm()); // Train the model. var model = pipeline.Fit(trainingData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude index 7ccc793feb..47f5ea50cd 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude @@ -39,10 +39,10 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification <# if (MetaTrainer != null) { #> // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply <#=MetaTrainer#> multiclass meta trainer on top of - // binary trainer. + // Apply <#=MetaTrainer#> multiclass meta trainer on top of + // binary trainer. .Append(mlContext.MulticlassClassification.Trainers .<#=MetaTrainer#>( <#=Trainer#>())); @@ -50,10 +50,10 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification <# } else if (TrainerOptions == null) { #> // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion .MapValueToKey(nameof(DataPoint.Label)) - // Apply <#=Trainer#> multiclass trainer. + // Apply <#=Trainer#> multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .<#=Trainer#>()); @@ -63,9 +63,9 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply <#=Trainer#> multiclass trainer. + // Apply <#=Trainer#> multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .<#=Trainer#>(options)); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs index 33272b2fb0..d30413c663 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs @@ -31,10 +31,10 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion .MapValueToKey(nameof(DataPoint.Label)) - // Apply NaiveBayes multiclass trainer. + // Apply NaiveBayes multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .NaiveBayes()); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs index 044cfcd3fb..e0dde48af7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs @@ -25,10 +25,10 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply OneVersusAll multiclass meta trainer on top of - // binary trainer. + // Apply OneVersusAll multiclass meta trainer on top of + // binary trainer. .Append(mlContext.MulticlassClassification.Trainers .OneVersusAll( mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs index a2b518bc1a..2afb17e76c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs @@ -25,10 +25,10 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply PairwiseCoupling multiclass meta trainer on top of - // binary trainer. + // Apply PairwiseCoupling multiclass meta trainer on top of + // binary trainer. .Append(mlContext.MulticlassClassification.Trainers .PairwiseCoupling( mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PermutationFeatureImportance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PermutationFeatureImportance.cs index 714c69361b..71d1c02106 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PermutationFeatureImportance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PermutationFeatureImportance.cs @@ -26,11 +26,11 @@ public static void Example() new string[] { nameof(Data.Feature1), nameof(Data.Feature2) }; var pipeline = mlContext.Transforms - .Concatenate("Features", featureColumns) - .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) - .Append(mlContext.Transforms.NormalizeMinMax("Features")) - .Append(mlContext.MulticlassClassification.Trainers - .SdcaMaximumEntropy()); + .Concatenate("Features", featureColumns) + .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) + .Append(mlContext.MulticlassClassification.Trainers + .SdcaMaximumEntropy()); // Fit the pipeline to the data. var model = pipeline.Fit(data); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs index 6521c0e5e1..4b5608e30f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs @@ -33,10 +33,10 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion .MapValueToKey(nameof(DataPoint.Label)) - // Apply SdcaMaximumEntropy multiclass trainer. + // Apply SdcaMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .SdcaMaximumEntropy()); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs index b0a61c0061..4ec7c86cd9 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs @@ -43,9 +43,9 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply SdcaMaximumEntropy multiclass trainer. + // Apply SdcaMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .SdcaMaximumEntropy(options)); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs index 4e4a612428..dc3a2013c4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs @@ -33,10 +33,10 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion .MapValueToKey(nameof(DataPoint.Label)) - // Apply SdcaNonCalibrated multiclass trainer. + // Apply SdcaNonCalibrated multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .SdcaNonCalibrated()); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs index d86bd7d1c5..f5004c3c84 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs @@ -43,9 +43,9 @@ public static void Example() // Define the trainer. var pipeline = - // Convert the string labels into key types. + // Convert the string labels into key types. mlContext.Transforms.Conversion.MapValueToKey("Label") - // Apply SdcaNonCalibrated multiclass trainer. + // Apply SdcaNonCalibrated multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .SdcaNonCalibrated(options)); From e497794faf0f193dc1dc815f8a442be77e9f5887 Mon Sep 17 00:00:00 2001 From: sierralee51 <36902786+sierralee51@users.noreply.github.com> Date: Tue, 2 Jul 2019 14:33:35 -0700 Subject: [PATCH 43/44] fixed extra empty lines --- .../Trainers/MulticlassClassification/LightGbmWithOptions.cs | 1 - .../MulticlassClassification.ttinclude | 3 --- .../Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs | 1 - .../Trainers/MulticlassClassification/PairwiseCoupling.cs | 1 - 4 files changed, 6 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs index 4664fad79a..76337146dc 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs @@ -44,7 +44,6 @@ public static void Example() // Apply LightGbm multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .LightGbm(options)); - // Train the model. var model = pipeline.Fit(trainingData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude index 47f5ea50cd..ee3f245566 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/MulticlassClassification.ttinclude @@ -46,7 +46,6 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification .Append(mlContext.MulticlassClassification.Trainers .<#=MetaTrainer#>( <#=Trainer#>())); - <# } else if (TrainerOptions == null) { #> // Define the trainer. var pipeline = @@ -56,7 +55,6 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification // Apply <#=Trainer#> multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .<#=Trainer#>()); - <# } else { #> // Define trainer options. var options = new <#=TrainerOptions#>; @@ -68,7 +66,6 @@ namespace Samples.Dynamic.Trainers.MulticlassClassification // Apply <#=Trainer#> multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .<#=Trainer#>(options)); - <# } #> // Train the model. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs index d30413c663..4dc7f44f9e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/NaiveBayes.cs @@ -38,7 +38,6 @@ public static void Example() .Append(mlContext.MulticlassClassification.Trainers .NaiveBayes()); - // Train the model. var model = pipeline.Fit(trainingData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs index 2afb17e76c..be42ec3bbd 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/PairwiseCoupling.cs @@ -33,7 +33,6 @@ public static void Example() .PairwiseCoupling( mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); - // Train the model. var model = pipeline.Fit(trainingData); From 6224ed5a76c8001208a29618d5cdba51631f2db7 Mon Sep 17 00:00:00 2001 From: sierralee51 <36902786+sierralee51@users.noreply.github.com> Date: Tue, 2 Jul 2019 16:15:08 -0700 Subject: [PATCH 44/44] fixed some more indentation issue --- .../Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs | 1 - .../MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs | 1 - .../Dynamic/Trainers/MulticlassClassification/LightGbm.cs | 1 - .../Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs | 1 - .../Trainers/MulticlassClassification/SdcaMaximumEntropy.cs | 1 - .../MulticlassClassification/SdcaMaximumEntropyWithOptions.cs | 1 - .../Trainers/MulticlassClassification/SdcaNonCalibrated.cs | 1 - .../MulticlassClassification/SdcaNonCalibratedWithOptions.cs | 1 - 8 files changed, 8 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs index 7670cf5f3d..7130a7c9ea 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropy.cs @@ -32,7 +32,6 @@ public static void Example() .Append(mlContext.MulticlassClassification.Trainers .LbfgsMaximumEntropy()); - // Train the model. var model = pipeline.Fit(trainingData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs index d99c7e0d6d..597f34af06 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LbfgsMaximumEntropyWithOptions.cs @@ -39,7 +39,6 @@ public static void Example() // Apply LbfgsMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .LbfgsMaximumEntropy(options)); - // Train the model. var model = pipeline.Fit(trainingData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs index 2cea68b970..7ab6d5f80e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs @@ -35,7 +35,6 @@ public static void Example() .Append(mlContext.MulticlassClassification.Trainers .LightGbm()); - // Train the model. var model = pipeline.Fit(trainingData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs index e0dde48af7..40a040325c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/OneVersusAll.cs @@ -33,7 +33,6 @@ public static void Example() .OneVersusAll( mlContext.BinaryClassification.Trainers.SdcaLogisticRegression())); - // Train the model. var model = pipeline.Fit(trainingData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs index 4b5608e30f..2a83a84f3a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropy.cs @@ -40,7 +40,6 @@ public static void Example() .Append(mlContext.MulticlassClassification.Trainers .SdcaMaximumEntropy()); - // Train the model. var model = pipeline.Fit(trainingData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs index 4ec7c86cd9..62ed191c0f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaMaximumEntropyWithOptions.cs @@ -48,7 +48,6 @@ public static void Example() // Apply SdcaMaximumEntropy multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .SdcaMaximumEntropy(options)); - // Train the model. var model = pipeline.Fit(trainingData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs index dc3a2013c4..6a899f9432 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibrated.cs @@ -40,7 +40,6 @@ public static void Example() .Append(mlContext.MulticlassClassification.Trainers .SdcaNonCalibrated()); - // Train the model. var model = pipeline.Fit(trainingData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs index f5004c3c84..6367e9fe21 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/SdcaNonCalibratedWithOptions.cs @@ -48,7 +48,6 @@ public static void Example() // Apply SdcaNonCalibrated multiclass trainer. .Append(mlContext.MulticlassClassification.Trainers .SdcaNonCalibrated(options)); - // Train the model. var model = pipeline.Fit(trainingData);