@@ -48,32 +48,36 @@ public static void FeatureContributionCalculationTransform_Regression()
4848 var transformPipeline = mlContext . Transforms . Concatenate ( "Features" , "CrimesPerCapita" , "PercentResidental" ,
4949 "PercentNonRetail" , "CharlesRiver" , "NitricOxides" , "RoomsPerDwelling" , "PercentPre40s" ,
5050 "EmploymentDistance" , "HighwayDistance" , "TaxRate" , "TeacherRatio" ) ;
51- var learner = mlContext . Regression . Trainers . StochasticDualCoordinateAscent (
51+ var learner = mlContext . Regression . Trainers . OrdinaryLeastSquares (
5252 labelColumn : "MedianHomeValue" , featureColumn : "Features" ) ;
5353
5454 var transformedData = transformPipeline . Fit ( data ) . Transform ( data ) ;
5555
56+ // Now we train the model and score it on the transformed data.
5657 var model = learner . Fit ( transformedData ) ;
58+ var scoredData = model . Transform ( transformedData ) ;
5759
5860 // Create a Feature Contribution Calculator
59- // Calculate the feature contributions for all features
61+ // Calculate the feature contributions for all features given trained model parameters
6062 // And don't normalize the contribution scores
61- var args = new FeatureContributionCalculationTransform . Arguments ( )
62- {
63- Top = 11 ,
64- Normalize = false
65- } ;
66- var featureContributionCalculator = FeatureContributionCalculationTransform . Create ( mlContext , args , transformedData , model . Model , model . FeatureColumn ) ;
63+ var featureContributionCalculator = mlContext . Model . Explainability . FeatureContributionCalculation ( model . Model , model . FeatureColumn , top : 11 , normalize : false ) ;
64+ var outputData = featureContributionCalculator . Fit ( scoredData ) . Transform ( scoredData ) ;
65+
66+ // FeatureContributionCalculatingEstimator can be use as an intermediary step in a pipeline.
67+ // The features retained by FeatureContributionCalculatingEstimator will be in the FeatureContribution column.
68+ var pipeline = mlContext . Model . Explainability . FeatureContributionCalculation ( model . Model , model . FeatureColumn , top : 11 )
69+ . Append ( mlContext . Regression . Trainers . OrdinaryLeastSquares ( featureColumn : "FeatureContributions" ) ) ;
70+ var outData = featureContributionCalculator . Fit ( scoredData ) . Transform ( scoredData ) ;
6771
6872 // Let's extract the weights from the linear model to use as a comparison
6973 var weights = new VBuffer < float > ( ) ;
7074 model . Model . GetFeatureWeights ( ref weights ) ;
7175
7276 // Let's now walk through the first ten reconds and see which feature drove the values the most
7377 // Get prediction scores and contributions
74- var scoringEnumerator = featureContributionCalculator . AsEnumerable < HousingRegressionScoreAndContribution > ( mlContext , true ) . GetEnumerator ( ) ;
78+ var scoringEnumerator = outputData . AsEnumerable < HousingRegressionScoreAndContribution > ( mlContext , true ) . GetEnumerator ( ) ;
7579 int index = 0 ;
76- Console . WriteLine ( "Label\t Score\t BiggestFeature\t Value\t Weight\t Contribution\t Percent " ) ;
80+ Console . WriteLine ( "Label\t Score\t BiggestFeature\t Value\t Weight\t Contribution" ) ;
7781 while ( scoringEnumerator . MoveNext ( ) && index < 10 )
7882 {
7983 var row = scoringEnumerator . Current ;
@@ -84,26 +88,34 @@ public static void FeatureContributionCalculationTransform_Regression()
8488 // And the corresponding information about the feature
8589 var value = row . Features [ featureOfInterest ] ;
8690 var contribution = row . FeatureContributions [ featureOfInterest ] ;
87- var percentContribution = 100 * contribution / row . Score ;
88- var name = data . Schema [ ( int ) ( featureOfInterest + 1 ) ] . Name ;
91+ var name = data . Schema [ featureOfInterest + 1 ] . Name ;
8992 var weight = weights . GetValues ( ) [ featureOfInterest ] ;
9093
91- Console . WriteLine ( "{0:0.00}\t {1:0.00}\t {2}\t {3:0.00}\t {4:0.00}\t {5:0.00}\t {6:0.00} " ,
94+ Console . WriteLine ( "{0:0.00}\t {1:0.00}\t {2}\t {3:0.00}\t {4:0.00}\t {5:0.00}" ,
9295 row . MedianHomeValue ,
9396 row . Score ,
9497 name ,
9598 value ,
9699 weight ,
97- contribution ,
98- percentContribution
100+ contribution
99101 ) ;
100102
101103 index ++ ;
102104 }
103-
104- // For bulk scoring, the ApplyToData API can also be used
105- var scoredData = featureContributionCalculator . ApplyToData ( mlContext , transformedData ) ;
106- var preview = scoredData . Preview ( 100 ) ;
105+ Console . ReadLine ( ) ;
106+
107+ // The output of the above code is:
108+ // Label Score BiggestFeature Value Weight Contribution
109+ // 24.00 27.74 RoomsPerDwelling 6.58 98.55 39.95
110+ // 21.60 23.85 RoomsPerDwelling 6.42 98.55 39.01
111+ // 34.70 29.29 RoomsPerDwelling 7.19 98.55 43.65
112+ // 33.40 27.17 RoomsPerDwelling 7.00 98.55 42.52
113+ // 36.20 27.68 RoomsPerDwelling 7.15 98.55 43.42
114+ // 28.70 23.13 RoomsPerDwelling 6.43 98.55 39.07
115+ // 22.90 22.71 RoomsPerDwelling 6.01 98.55 36.53
116+ // 27.10 21.72 RoomsPerDwelling 6.17 98.55 37.50
117+ // 16.50 18.04 RoomsPerDwelling 5.63 98.55 34.21
118+ // 18.90 20.14 RoomsPerDwelling 6.00 98.55 36.48
107119 }
108120
109121 private static int GetMostContributingFeature ( float [ ] featureContributions )
0 commit comments