@@ -10,38 +10,47 @@ public static class KMeans
1010 {
1111 public static void Example ( )
1212 {
13- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
14- // as a catalog of available operations and as the source of randomness.
15- // Setting the seed to a fixed number in this example to make outputs deterministic.
13+ // Create a new context for ML.NET operations. It can be used for
14+ // exception tracking and logging, as a catalog of available operations
15+ // and as the source of randomness. Setting the seed to a fixed number
16+ // in this example to make outputs deterministic.
1617 var mlContext = new MLContext ( seed : 0 ) ;
1718
18- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API .
19+ // Create a list of training data points.
1920 var dataPoints = GenerateRandomDataPoints ( 1000 , 123 ) ;
2021
21- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
22- var trainingData = mlContext . Data . LoadFromEnumerable ( dataPoints ) ;
22+ // Convert the list of data points to an IDataView object, which is
23+ // consumable by ML.NET API.
24+ IDataView trainingData = mlContext . Data . LoadFromEnumerable ( dataPoints ) ;
2325
2426 // Define the trainer.
25- var pipeline = mlContext . Clustering . Trainers . KMeans ( numberOfClusters : 2 ) ;
27+ var pipeline = mlContext . Clustering . Trainers . KMeans (
28+ numberOfClusters : 2 ) ;
2629
2730 // Train the model.
2831 var model = pipeline . Fit ( trainingData ) ;
2932
30- // Create testing data. Use different random seed to make it different from training data.
31- var testData = mlContext . Data . LoadFromEnumerable ( GenerateRandomDataPoints ( 500 , seed : 123 ) ) ;
33+ // Create testing data. Use a different random seed to make it different
34+ // from the training data.
35+ var testData = mlContext . Data . LoadFromEnumerable (
36+ GenerateRandomDataPoints ( 500 , seed : 123 ) ) ;
3237
3338 // Run the model on test data set.
3439 var transformedTestData = model . Transform ( testData ) ;
3540
3641 // Convert IDataView object to a list.
37- var predictions = mlContext . Data . CreateEnumerable < Prediction > ( transformedTestData , reuseRowObject : false ) . ToList ( ) ;
42+ var predictions = mlContext . Data . CreateEnumerable < Prediction > (
43+ transformedTestData , reuseRowObject : false ) . ToList ( ) ;
3844
39- // Print 5 predictions. Note that the label is only used as a comparison wiht the predicted label.
40- // It is not used during training.
45+ // Print 5 predictions. Note that the label is only used as a comparison
46+ // with the predicted label. It is not used during training.
4147 foreach ( var p in predictions . Take ( 2 ) )
42- Console . WriteLine ( $ "Label: { p . Label } , Prediction: { p . PredictedLabel } ") ;
48+ Console . WriteLine (
49+ $ "Label: { p . Label } , Prediction: { p . PredictedLabel } ") ;
50+
4351 foreach ( var p in predictions . TakeLast ( 3 ) )
44- Console . WriteLine ( $ "Label: { p . Label } , Prediction: { p . PredictedLabel } ") ;
52+ Console . WriteLine (
53+ $ "Label: { p . Label } , Prediction: { p . PredictedLabel } ") ;
4554
4655 // Expected output:
4756 // Label: 1, Prediction: 1
@@ -51,28 +60,37 @@ public static void Example()
5160 // Label: 2, Prediction: 2
5261
5362 // Evaluate the overall metrics
54- var metrics = mlContext . Clustering . Evaluate ( transformedTestData , "Label" , "Score" , "Features" ) ;
63+ var metrics = mlContext . Clustering . Evaluate (
64+ transformedTestData , "Label" , "Score" , "Features" ) ;
65+
5566 PrintMetrics ( metrics ) ;
56-
67+
5768 // Expected output:
5869 // Normalized Mutual Information: 0.95
5970 // Average Distance: 4.17
6071 // Davies Bouldin Index: 2.87
6172
62- // Get cluster centroids and the number of clusters k from KMeansModelParameters.
73+ // Get the cluster centroids and the number of clusters k from
74+ // KMeansModelParameters.
6375 VBuffer < float > [ ] centroids = default ;
6476
6577 var modelParams = model . Model ;
6678 modelParams . GetClusterCentroids ( ref centroids , out int k ) ;
67- Console . WriteLine ( $ "The first 3 coordinates of the first centroid are: ({ string . Join ( ", " , centroids [ 0 ] . GetValues ( ) . ToArray ( ) . Take ( 3 ) ) } )") ;
68- Console . WriteLine ( $ "The first 3 coordinates of the second centroid are: ({ string . Join ( ", " , centroids [ 1 ] . GetValues ( ) . ToArray ( ) . Take ( 3 ) ) } )") ;
79+ Console . WriteLine (
80+ $ "The first 3 coordinates of the first centroid are: " +
81+ string . Join ( ", " , centroids [ 0 ] . GetValues ( ) . ToArray ( ) . Take ( 3 ) ) ) ;
82+
83+ Console . WriteLine (
84+ $ "The first 3 coordinates of the second centroid are: " +
85+ string . Join ( ", " , centroids [ 1 ] . GetValues ( ) . ToArray ( ) . Take ( 3 ) ) ) ;
6986
7087 // Expected output similar to:
7188 // The first 3 coordinates of the first centroid are: (0.6035213, 0.6017533, 0.5964218)
7289 // The first 3 coordinates of the second centroid are: (0.4031044, 0.4175443, 0.4082336)
7390 }
7491
75- private static IEnumerable < DataPoint > GenerateRandomDataPoints ( int count , int seed = 0 )
92+ private static IEnumerable < DataPoint > GenerateRandomDataPoints ( int count ,
93+ int seed = 0 )
7694 {
7795 var random = new Random ( seed ) ;
7896 float randomFloat ( ) => ( float ) random . NextDouble ( ) ;
@@ -83,16 +101,21 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int se
83101 {
84102 Label = ( uint ) label ,
85103 // Create random features with two clusters.
86- // The first half has feature values centered around 0.6 the second half has values centered around 0.4.
87- Features = Enumerable . Repeat ( label , 50 ) . Select ( index => label == 0 ? randomFloat ( ) + 0.1f : randomFloat ( ) - 0.1f ) . ToArray ( )
104+ // The first half has feature values centered around 0.6, while
105+ // the second half has values centered around 0.4.
106+ Features = Enumerable . Repeat ( label , 50 )
107+ . Select ( index => label == 0 ? randomFloat ( ) + 0.1f :
108+ randomFloat ( ) - 0.1f ) . ToArray ( )
88109 } ;
89110 }
90111 }
91112
92- // Example with label and 50 feature values. A data set is a collection of such examples.
113+ // Example with label and 50 feature values. A data set is a collection of
114+ // such examples.
93115 private class DataPoint
94116 {
95- // The label is not used during training, just for comparison with the predicted label.
117+ // The label is not used during training, just for comparison with the
118+ // predicted label.
96119 [ KeyType ( 2 ) ]
97120 public uint Label { get ; set ; }
98121
@@ -112,9 +135,14 @@ private class Prediction
112135 // Pretty-print of ClusteringMetrics object.
113136 private static void PrintMetrics ( ClusteringMetrics metrics )
114137 {
115- Console . WriteLine ( $ "Normalized Mutual Information: { metrics . NormalizedMutualInformation : F2} ") ;
116- Console . WriteLine ( $ "Average Distance: { metrics . AverageDistance : F2} ") ;
117- Console . WriteLine ( $ "Davies Bouldin Index: { metrics . DaviesBouldinIndex : F2} ") ;
138+ Console . WriteLine ( $ "Normalized Mutual Information: " +
139+ $ "{ metrics . NormalizedMutualInformation : F2} ") ;
140+
141+ Console . WriteLine ( $ "Average Distance: " +
142+ $ "{ metrics . AverageDistance : F2} ") ;
143+
144+ Console . WriteLine ( $ "Davies Bouldin Index: " +
145+ $ "{ metrics . DaviesBouldinIndex : F2} ") ;
118146 }
119147 }
120148}
0 commit comments