@@ -38,14 +38,7 @@ public static void Example()
3838 var data = reader . Read ( dataFile ) ;
3939
4040 // Split it between training and test data
41- var ( train , test ) = mlContext . BinaryClassification . TrainTestSplit ( data ) ;
42-
43- // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times,
44- // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, a
45- // solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms which
46- // needs many data passes. Since SDCA is the case, we cache. Inserting a cache step in a pipeline is also possible,
47- // please see the construction of pipeline below.
48- data = mlContext . Data . Cache ( data ) ;
41+ var trainTestData = mlContext . BinaryClassification . TrainTestSplit ( data ) ;
4942
5043 // Step 2: Pipeline
5144 // Featurize the text column through the FeaturizeText API.
@@ -56,10 +49,10 @@ public static void Example()
5649 . Append ( mlContext . BinaryClassification . Trainers . Prior ( labelColumn : "Sentiment" ) ) ;
5750
5851 // Step 3: Train the pipeline
59- var trainedPipeline = pipeline . Fit ( train ) ;
52+ var trainedPipeline = pipeline . Fit ( trainTestData . TrainSet ) ;
6053
6154 // Step 4: Evaluate on the test set
62- var transformedData = trainedPipeline . Transform ( test ) ;
55+ var transformedData = trainedPipeline . Transform ( trainTestData . TestSet ) ;
6356 var evalMetrics = mlContext . BinaryClassification . Evaluate ( transformedData , label : "Sentiment" ) ;
6457
6558 // Step 5: Inspect the output
0 commit comments