@@ -7,59 +7,64 @@ public static class RandomTrainer
77 {
88 public static void Example ( )
99 {
10- // Downloading the dataset from github.com/dotnet/machinelearning.
11- // This will create a sentiment.tsv file in the filesystem.
12- // You can open this file, if you want to see the data.
13- string dataFile = SamplesUtils . DatasetUtils . DownloadSentimentDataset ( ) [ 0 ] ;
10+ // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
11+ // as a catalog of available operations and as the source of randomness.
12+ var mlContext = new MLContext ( seed : 1 ) ;
13+
14+ // Download and featurize the dataset.
15+ var dataFiles = SamplesUtils . DatasetUtils . DownloadSentimentDataset ( ) ;
16+ var trainFile = dataFiles [ 0 ] ;
17+ var testFile = dataFiles [ 1 ] ;
1418
1519 // A preview of the data.
1620 // Sentiment SentimentText
1721 // 0 " :Erm, thank you. "
1822 // 1 ==You're cool==
1923
20- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
21- // as a catalog of available operations and as the source of randomness.
22- var mlContext = new MLContext ( seed : 1 ) ;
23-
24- // Step 1: Load the data as an IDataView.
25- // First, we define the loader: specify the data columns and where to find them in the text file.
26- var loader = mlContext . Data . CreateTextLoader (
24+ // Step 1: Read the data as an IDataView.
25+ // First, we define the reader: specify the data columns and where to find them in the text file.
26+ var reader = mlContext . Data . CreateTextLoader (
2727 columns : new [ ]
2828 {
2929 new TextLoader . Column ( "Sentiment" , DataKind . Single , 0 ) ,
3030 new TextLoader . Column ( "SentimentText" , DataKind . String , 1 )
3131 } ,
3232 hasHeader : true
3333 ) ;
34-
35- // Load the data
36- var data = loader . Load ( dataFile ) ;
3734
38- // Split it between training and test data
39- var trainTestData = mlContext . BinaryClassification . TrainTestSplit ( data ) ;
35+ // Read the data
36+ var trainData = reader . Load ( trainFile ) ;
4037
4138 // Step 2: Pipeline
4239 // Featurize the text column through the FeaturizeText API.
4340 // Then append a binary classifier, setting the "Label" column as the label of the dataset, and
4441 // the "Features" column produced by FeaturizeText as the features column.
4542 var pipeline = mlContext . Transforms . Text . FeaturizeText ( "Features" , "SentimentText" )
46- . AppendCacheCheckpoint ( mlContext ) // Add a data-cache step within a pipeline.
43+ . AppendCacheCheckpoint ( mlContext )
4744 . Append ( mlContext . BinaryClassification . Trainers . Random ( ) ) ;
4845
4946 // Step 3: Train the pipeline
50- var trainedPipeline = pipeline . Fit ( trainTestData . TrainSet ) ;
47+ var trainedPipeline = pipeline . Fit ( trainData ) ;
5148
5249 // Step 4: Evaluate on the test set
53- var transformedData = trainedPipeline . Transform ( trainTestData . TestSet ) ;
50+ var transformedData = trainedPipeline . Transform ( reader . Load ( testFile ) ) ;
5451 var evalMetrics = mlContext . BinaryClassification . Evaluate ( transformedData , label : "Sentiment" ) ;
55-
56- // Step 5: Inspect the output
57- Console . WriteLine ( "Accuracy: " + evalMetrics . Accuracy ) ;
52+ SamplesUtils . ConsoleUtils . PrintMetrics ( evalMetrics ) ;
5853
5954 // We expect an output probability closet to 0.5 as the Random trainer outputs a random prediction.
6055 // Regardless of the input features, the trainer will predict either positive or negative label with equal probability.
61- // Expected output (close to 0.5):
62- // Accuracy: 0.588235294117647
56+ // Expected output: (close to 0.5):
57+
58+ // Accuracy: 0.56
59+ // AUC: 0.57
60+ // F1 Score: 0.60
61+ // Negative Precision: 0.57
62+ // Negative Recall: 0.44
63+ // Positive Precision: 0.55
64+ // Positive Recall: 0.67
65+ // LogLoss: 1.53
66+ // LogLossReduction: -53.37
67+ // Entropy: 1.00
6368 }
6469 }
6570}
0 commit comments