review comments, implementation specific documentation, parameter name change

artidoro · artidoro · commit 02a102854015 · 2019-01-07T13:16:48.000+01:00
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs
@@ -59,12 +59,12 @@ public static void FeatureContributionCalculationTransform_Regression()
             // Create a Feature Contribution Calculator
             // Calculate the feature contributions for all features given trained model parameters
             // And don't normalize the contribution scores
-            var featureContributionCalculator = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, top: 11, normalize: false);
+            var featureContributionCalculator = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, numPositiveContributions: 11, normalize: false);
             var outputData = featureContributionCalculator.Fit(scoredData).Transform(scoredData);
 
             // FeatureContributionCalculatingEstimator can be use as an intermediary step in a pipeline. 
             // The features retained by FeatureContributionCalculatingEstimator will be in the FeatureContribution column.
-            var pipeline = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, top: 11)
+            var pipeline = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, numPositiveContributions: 11)
                 .Append(mlContext.Regression.Trainers.OrdinaryLeastSquares(featureColumn: "FeatureContributions"));
             var outData = featureContributionCalculator.Fit(scoredData).Transform(scoredData);
 
diff --git a/src/Microsoft.ML.Data/Transforms/ExplainabilityCatalog.cs b/src/Microsoft.ML.Data/Transforms/ExplainabilityCatalog.cs
@@ -18,17 +18,17 @@ public static class ExplainabilityCatalog
         /// <param name="catalog">The model explainability operations catalog.</param>
         /// <param name="modelParameters">Trained model parameters that support Feature Contribution Calculation and which will be used for scoring.</param>
         /// <param name="featureColumn">The name of the feature column that will be used as input.</param>
-        /// <param name="top">The number of features with highest positive contributions for each data sample that will be retained in the FeatureContribution column.
-        /// Note that if there are fewer features with positive contributions than <paramref name="top"/>, the rest will be returned as zeros.</param>
-        /// <param name="bottom">The number of features with least negative contributions for each data sample that will be retained in the FeatureContribution column.
-        /// Note that if there are fewer features with negative contributions than <paramref name="bottom"/>, the rest will be returned as zeros.</param>
+        /// <param name="numPositiveContributions">The number of positive contributions to report, sorted from highest magnitude to lowest magnitude.
+        /// Note that if there are fewer features with positive contributions than <paramref name="numPositiveContributions"/>, the rest will be returned as zeros.</param>
+        /// <param name="numNegativeContributions">The number of negative contributions to report, sorted from highest magnitude to lowest magnitude.
+        /// Note that if there are fewer features with negative contributions than <paramref name="numNegativeContributions"/>, the rest will be returned as zeros.</param>
         /// <param name="normalize">Whether the feature contributions should be normalized to the [-1, 1] interval.</param>
         public static FeatureContributionCalculatingEstimator FeatureContributionCalculation(this ModelOperationsCatalog.ExplainabilityTransforms catalog,
             ICalculateFeatureContribution modelParameters,
             string featureColumn = DefaultColumnNames.Features,
-            int top = FeatureContributionDefaults.Top,
-            int bottom = FeatureContributionDefaults.Bottom,
+            int numPositiveContributions = FeatureContributionDefaults.NumPositiveContributions,
+            int numNegativeContributions = FeatureContributionDefaults.NumNegativeContributions,
             bool normalize = FeatureContributionDefaults.Normalize)
-            => new FeatureContributionCalculatingEstimator(CatalogUtils.GetEnvironment(catalog), modelParameters, featureColumn, top, bottom, normalize);
+            => new FeatureContributionCalculatingEstimator(CatalogUtils.GetEnvironment(catalog), modelParameters, featureColumn, numPositiveContributions, numNegativeContributions, normalize);
     }
 }
diff --git a/src/Microsoft.ML.Data/Transforms/FeatureContributionCalculationTransform.cs b/src/Microsoft.ML.Data/Transforms/FeatureContributionCalculationTransform.cs
@@ -25,29 +25,17 @@
 namespace Microsoft.ML.Data
 {
     /// <summary>
-    /// The FeatureContributionCalculationTransformer computes model-specific per-feature contributions to the score of each data point.
+    /// The FeatureContributionCalculationTransformer computes model-specific per-feature contributions to the score of each example.
     /// See the list of currently supported models below.
     /// </summary>
     /// <remarks>
     /// <para>
-    /// Scorind a data set with a trained model produces a score, or prediction, for each data sample. To understand and explain these predictions
+    /// Scoring a dataset with a trained model produces a score, or prediction, for each example. To understand and explain these predictions
     /// it can be useful to inspect which features influenced them most significantly. FeatureContributionCalculationTransformer computes a model-specific
-    /// list of per-feature contributions to the score for each data sample. These contributions can be positive (they make the score higher) or negative
+    /// list of per-feature contributions to the score for each example. These contributions can be positive (they make the score higher) or negative
     /// (they make the score lower).
     /// </para>
     /// <para>
-    /// For linear models, the contribution of a given feature is just equal to the product of feature times the corresponding weight. Similarly, for
-    /// Generalized Additive Models (GAM), the contrubution of a feature is equal to the shape function for the given feature evaluated at the feature value.
-    /// </para>
-    /// <para>
-    /// For tree based models, the contribution of a feature is equal to the change in score produced by exploring the opposite sub-tree every time a decision
-    /// node for the given feature is encountered. Consider a simple case with a singe decision tree that has a decision node for the binary feature F1.
-    /// Given a data sample that has feature F1 equal to true, we can calculate the score it would have obtained if we chose the subtree corresponding to
-    /// the feature F1 being equal to false while keeping the other features constant. The contribution of feature F1 for the given sample is the difference
-    /// between the original score and the score obtained by taking the opposite decision at the node corresponding to feature F1. This algorithm extendes
-    /// naturally to models with many decision trees.
-    /// </para>
-    /// <para>
     /// Feature Contribution Calculation is currently supported for the following models:
     ///     Regression:
     ///         OrdinaryLeastSquares, StochasticDualCoordinateAscent (SDCA), OnlineGradientDescent, PoissonRegression,
@@ -60,6 +48,19 @@ namespace Microsoft.ML.Data
     ///         FastTree, LightGbm
     /// </para>
     /// <para>
+    /// For linear models, the contribution of a given feature is equal to the product of feature value times the corresponding weight. Similarly,
+    /// for Generalized Additive Models (GAM), the contribution of a feature is equal to the shape function for the given feature evaluated at
+    /// the feature value.
+    /// </para>
+    /// <para>
+    /// For tree-based models, the contribution of a feature is equal to the change in score produced by exploring the opposite sub-tree every time a decision
+    /// node for the given feature is encountered. Consider a simple case with a single decision tree that has a decision node for the binary feature F1.
+    /// Given an example that has feature F1 equal to true, we can calculate the score it would have obtained if we chose the subtree corresponding to
+    /// the feature F1 being equal to false while keeping the other features constant. The contribution of feature F1 for the given example is the difference
+    /// between the original score and the score obtained by taking the opposite decision at the node corresponding to feature F1. This algorithm extends
+    /// naturally to models with many decision trees.
+    /// </para>
+    /// <para>
     /// See the sample below for an example of how to compute feature importance using the FeatureContributionCalculatingTransformer.
     /// </para>
     /// </remarks>
@@ -81,10 +82,10 @@ public sealed class Arguments : TransformInputBase
             public string FeatureColumn = DefaultColumnNames.Features;
 
             [Argument(ArgumentType.AtMostOnce, HelpText = "Number of top contributions", SortOrder = 3)]
-            public int Top = FeatureContributionCalculatingEstimator.Defaults.Top;
+            public int Top = FeatureContributionCalculatingEstimator.Defaults.NumPositiveContributions;
 
             [Argument(ArgumentType.AtMostOnce, HelpText = "Number of bottom contributions", SortOrder = 4)]
-            public int Bottom = FeatureContributionCalculatingEstimator.Defaults.Bottom;
+            public int Bottom = FeatureContributionCalculatingEstimator.Defaults.NumNegativeContributions;
 
             [Argument(ArgumentType.AtMostOnce, HelpText = "Whether or not output of Features contribution should be normalized", ShortName = "norm", SortOrder = 5)]
             public bool Normalize = FeatureContributionCalculatingEstimator.Defaults.Normalize;
@@ -119,32 +120,32 @@ private static VersionInfo GetVersionInfo()
         /// <param name="env">The environment to use.</param>
         /// <param name="modelParameters">Trained model parameters that support Feature Contribution Calculation and which will be used for scoring.</param>
         /// <param name="featureColumn">The name of the feature column that will be used as input.</param>
-        /// <param name="top">The number of features with highest positive contributions for each data sample that will be retained in the FeatureContribution column.
-        /// Note that if there are fewer features with positive contributions than <paramref name="top"/>, the rest will be returned as zeros.</param>
-        /// <param name="bottom">The number of features with least negative contributions for each data sample that will be retained in the FeatureContribution column.
-        /// Note that if there are fewer features with negative contributions than <paramref name="bottom"/>, the rest will be returned as zeros.</param>
+        /// <param name="numPositiveContributions">The number of positive contributions to report, sorted from highest magnitude to lowest magnitude.
+        /// Note that if there are fewer features with positive contributions than <paramref name="numPositiveContributions"/>, the rest will be returned as zeros.</param>
+        /// <param name="numNegativeContributions">The number of negative contributions to report, sorted from highest magnitude to lowest magnitude.
+        /// Note that if there are fewer features with negative contributions than <paramref name="numNegativeContributions"/>, the rest will be returned as zeros.</param>
         /// <param name="normalize">Whether the feature contributions should be normalized to the [-1, 1] interval.</param>
         public FeatureContributionCalculatingTransformer(IHostEnvironment env, ICalculateFeatureContribution modelParameters,
             string featureColumn = DefaultColumnNames.Features,
-            int top = FeatureContributionCalculatingEstimator.Defaults.Top,
-            int bottom = FeatureContributionCalculatingEstimator.Defaults.Bottom,
+            int numPositiveContributions = FeatureContributionCalculatingEstimator.Defaults.NumPositiveContributions,
+            int numNegativeContributions = FeatureContributionCalculatingEstimator.Defaults.NumNegativeContributions,
             bool normalize = FeatureContributionCalculatingEstimator.Defaults.Normalize)
             : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(FeatureContributionCalculatingTransformer)), new[] { (input: featureColumn, output: DefaultColumnNames.FeatureContributions) })
         {
             Host.CheckValue(modelParameters, nameof(modelParameters));
             Host.CheckNonEmpty(featureColumn, nameof(featureColumn));
-            if (top < 0)
+            if (numPositiveContributions < 0)
                 throw Host.Except($"Number of top contribution must be non negative");
-            if (bottom < 0)
+            if (numNegativeContributions < 0)
                 throw Host.Except($"Number of bottom contribution must be non negative");
 
             // If a predictor implements ICalculateFeatureContribution, it also implements the internal interface IFeatureContributionMapper.
             // This is how we keep the implementation of feature contribution calculation internal.
             _predictor = modelParameters as IFeatureContributionMapper;
             Host.AssertValue(_predictor);
 
-            Top = top;
-            Bottom = bottom;
+            Top = numPositiveContributions;
+            Bottom = numNegativeContributions;
             Normalize = normalize;
         }
 
@@ -279,8 +280,8 @@ public sealed class FeatureContributionCalculatingEstimator : TrivialEstimator<F
 
         public static class Defaults
         {
-            public const int Top = 10;
-            public const int Bottom = 10;
+            public const int NumPositiveContributions = 10;
+            public const int NumNegativeContributions = 10;
             public const bool Normalize = true;
         }
 
@@ -291,18 +292,18 @@ public static class Defaults
         /// <param name="env">The environment to use.</param>
         /// <param name="modelParameters">Trained model parameters that support Feature Contribution Calculation and which will be used for scoring.</param>
         /// <param name="featureColumn">The name of the feature column that will be used as input.</param>
-        /// <param name="top">The number of features with highest positive contributions for each data sample that will be retained in the FeatureContribution column.
-        /// Note that if there are fewer features with positive contributions than <paramref name="top"/>, the rest will be returned as zeros.</param>
-        /// <param name="bottom">The number of features with least negative contributions for each data sample that will be retained in the FeatureContribution column.
-        /// Note that if there are fewer features with negative contributions than <paramref name="bottom"/>, the rest will be returned as zeros.</param>
+        /// <param name="numPositiveContributions">The number of positive contributions to report, sorted from highest magnitude to lowest magnitude.
+        /// Note that if there are fewer features with positive contributions than <paramref name="numPositiveContributions"/>, the rest will be returned as zeros.</param>
+        /// <param name="numNegativeContributions">The number of negative contributions to report, sorted from highest magnitude to lowest magnitude.
+        /// Note that if there are fewer features with negative contributions than <paramref name="numNegativeContributions"/>, the rest will be returned as zeros.</param>
         /// <param name="normalize">Whether the feature contributions should be normalized to the [-1, 1] interval.</param>
         public FeatureContributionCalculatingEstimator(IHostEnvironment env, ICalculateFeatureContribution modelParameters,
             string featureColumn = DefaultColumnNames.Features,
-            int top = Defaults.Top,
-            int bottom = Defaults.Bottom,
+            int numPositiveContributions = Defaults.NumPositiveContributions,
+            int numNegativeContributions = Defaults.NumNegativeContributions,
             bool normalize = Defaults.Normalize)
             : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(FeatureContributionCalculatingTransformer)),
-                  new FeatureContributionCalculatingTransformer(env, modelParameters, featureColumn, top, bottom, normalize))
+                  new FeatureContributionCalculatingTransformer(env, modelParameters, featureColumn, numPositiveContributions, numNegativeContributions, normalize))
         {
             _featureColumn = featureColumn;
             _predictor = modelParameters;
diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs
@@ -2837,6 +2837,16 @@ public abstract class TreeEnsembleModelParameters :
         bool ICanSavePfa.CanSavePfa => true;
 
         bool ICanSaveOnnx.CanSaveOnnx(OnnxContext ctx) => true;
+
+        /// <summary>
+        /// Used to determine the contribution of each feature to the score of an example by <see cref="FeatureContributionCalculatingTransformer"/>.
+        /// For tree-based models, the contribution of a feature is equal to the change in score produced by exploring the opposite sub-tree every time a decision
+        /// node for the given feature is encountered. Consider a simple case with a single decision tree that has a decision node for the binary feature F1.
+        /// Given an example that has feature F1 equal to true, we can calculate the score it would have obtained if we chose the subtree corresponding to
+        /// the feature F1 being equal to false while keeping the other features constant. The contribution of feature F1 for the given example is the difference
+        /// between the original score and the score obtained by taking the opposite decision at the node corresponding to feature F1. This algorithm extends
+        /// naturally to models with many decision trees.
+        /// </summary>
         public FeatureContributionCalculator FeatureContributionClaculator => new FeatureContributionCalculator(this);
 
         public TreeEnsembleModelParameters(IHostEnvironment env, string name, TreeEnsemble trainedEnsemble, int numFeatures, string innerArgs)
diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs
@@ -668,6 +668,11 @@ public abstract class GamModelParametersBase : ModelParametersBase<float>, IValu
         ColumnType IValueMapper.InputType => _inputType;
         ColumnType IValueMapper.OutputType => _outputType;
 
+        /// <summary>
+        /// Used to determine the contribution of each feature to the score of an example by <see cref="FeatureContributionCalculatingTransformer"/>.
+        /// For Generalized Additive Models (GAM), the contribution of a feature is equal to the shape function for the given feature evaluated at
+        /// the feature value.
+        /// </summary>
         public FeatureContributionCalculator FeatureContributionClaculator => new FeatureContributionCalculator(this);
 
         private protected GamModelParametersBase(IHostEnvironment env, string name,
diff --git a/src/Microsoft.ML.StandardLearners/Standard/LinearModelParameters.cs b/src/Microsoft.ML.StandardLearners/Standard/LinearModelParameters.cs
@@ -99,6 +99,10 @@ public IEnumerator<float> GetEnumerator()
 
         bool ICanSaveOnnx.CanSaveOnnx(OnnxContext ctx) => true;
 
+        /// <summary>
+        /// Used to determine the contribution of each feature to the score of an example by <see cref="FeatureContributionCalculatingTransformer"/>.
+        /// For linear models, the contribution of a given feature is equal to the product of feature value times the corresponding weight.
+        /// </summary>
         public FeatureContributionCalculator FeatureContributionClaculator => new FeatureContributionCalculator(this);
 
         /// <summary>
diff --git a/test/Microsoft.ML.Tests/FeatureContributionTests.cs b/test/Microsoft.ML.Tests/FeatureContributionTests.cs