Skip to content

Commit 02a1028

Browse files
committed
review comments, implementation specific documentation, parameter name change
1 parent bf67fcd commit 02a1028

File tree

7 files changed

+72
-52
lines changed

7 files changed

+72
-52
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,12 @@ public static void FeatureContributionCalculationTransform_Regression()
5959
// Create a Feature Contribution Calculator
6060
// Calculate the feature contributions for all features given trained model parameters
6161
// And don't normalize the contribution scores
62-
var featureContributionCalculator = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, top: 11, normalize: false);
62+
var featureContributionCalculator = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, numPositiveContributions: 11, normalize: false);
6363
var outputData = featureContributionCalculator.Fit(scoredData).Transform(scoredData);
6464

6565
// FeatureContributionCalculatingEstimator can be use as an intermediary step in a pipeline.
6666
// The features retained by FeatureContributionCalculatingEstimator will be in the FeatureContribution column.
67-
var pipeline = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, top: 11)
67+
var pipeline = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, numPositiveContributions: 11)
6868
.Append(mlContext.Regression.Trainers.OrdinaryLeastSquares(featureColumn: "FeatureContributions"));
6969
var outData = featureContributionCalculator.Fit(scoredData).Transform(scoredData);
7070

src/Microsoft.ML.Data/Transforms/ExplainabilityCatalog.cs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,17 @@ public static class ExplainabilityCatalog
1818
/// <param name="catalog">The model explainability operations catalog.</param>
1919
/// <param name="modelParameters">Trained model parameters that support Feature Contribution Calculation and which will be used for scoring.</param>
2020
/// <param name="featureColumn">The name of the feature column that will be used as input.</param>
21-
/// <param name="top">The number of features with highest positive contributions for each data sample that will be retained in the FeatureContribution column.
22-
/// Note that if there are fewer features with positive contributions than <paramref name="top"/>, the rest will be returned as zeros.</param>
23-
/// <param name="bottom">The number of features with least negative contributions for each data sample that will be retained in the FeatureContribution column.
24-
/// Note that if there are fewer features with negative contributions than <paramref name="bottom"/>, the rest will be returned as zeros.</param>
21+
/// <param name="numPositiveContributions">The number of positive contributions to report, sorted from highest magnitude to lowest magnitude.
22+
/// Note that if there are fewer features with positive contributions than <paramref name="numPositiveContributions"/>, the rest will be returned as zeros.</param>
23+
/// <param name="numNegativeContributions">The number of negative contributions to report, sorted from highest magnitude to lowest magnitude.
24+
/// Note that if there are fewer features with negative contributions than <paramref name="numNegativeContributions"/>, the rest will be returned as zeros.</param>
2525
/// <param name="normalize">Whether the feature contributions should be normalized to the [-1, 1] interval.</param>
2626
public static FeatureContributionCalculatingEstimator FeatureContributionCalculation(this ModelOperationsCatalog.ExplainabilityTransforms catalog,
2727
ICalculateFeatureContribution modelParameters,
2828
string featureColumn = DefaultColumnNames.Features,
29-
int top = FeatureContributionDefaults.Top,
30-
int bottom = FeatureContributionDefaults.Bottom,
29+
int numPositiveContributions = FeatureContributionDefaults.NumPositiveContributions,
30+
int numNegativeContributions = FeatureContributionDefaults.NumNegativeContributions,
3131
bool normalize = FeatureContributionDefaults.Normalize)
32-
=> new FeatureContributionCalculatingEstimator(CatalogUtils.GetEnvironment(catalog), modelParameters, featureColumn, top, bottom, normalize);
32+
=> new FeatureContributionCalculatingEstimator(CatalogUtils.GetEnvironment(catalog), modelParameters, featureColumn, numPositiveContributions, numNegativeContributions, normalize);
3333
}
3434
}

src/Microsoft.ML.Data/Transforms/FeatureContributionCalculationTransform.cs

Lines changed: 37 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -25,29 +25,17 @@
2525
namespace Microsoft.ML.Data
2626
{
2727
/// <summary>
28-
/// The FeatureContributionCalculationTransformer computes model-specific per-feature contributions to the score of each data point.
28+
/// The FeatureContributionCalculationTransformer computes model-specific per-feature contributions to the score of each example.
2929
/// See the list of currently supported models below.
3030
/// </summary>
3131
/// <remarks>
3232
/// <para>
33-
/// Scorind a data set with a trained model produces a score, or prediction, for each data sample. To understand and explain these predictions
33+
/// Scoring a dataset with a trained model produces a score, or prediction, for each example. To understand and explain these predictions
3434
/// it can be useful to inspect which features influenced them most significantly. FeatureContributionCalculationTransformer computes a model-specific
35-
/// list of per-feature contributions to the score for each data sample. These contributions can be positive (they make the score higher) or negative
35+
/// list of per-feature contributions to the score for each example. These contributions can be positive (they make the score higher) or negative
3636
/// (they make the score lower).
3737
/// </para>
3838
/// <para>
39-
/// For linear models, the contribution of a given feature is just equal to the product of feature times the corresponding weight. Similarly, for
40-
/// Generalized Additive Models (GAM), the contrubution of a feature is equal to the shape function for the given feature evaluated at the feature value.
41-
/// </para>
42-
/// <para>
43-
/// For tree based models, the contribution of a feature is equal to the change in score produced by exploring the opposite sub-tree every time a decision
44-
/// node for the given feature is encountered. Consider a simple case with a singe decision tree that has a decision node for the binary feature F1.
45-
/// Given a data sample that has feature F1 equal to true, we can calculate the score it would have obtained if we chose the subtree corresponding to
46-
/// the feature F1 being equal to false while keeping the other features constant. The contribution of feature F1 for the given sample is the difference
47-
/// between the original score and the score obtained by taking the opposite decision at the node corresponding to feature F1. This algorithm extendes
48-
/// naturally to models with many decision trees.
49-
/// </para>
50-
/// <para>
5139
/// Feature Contribution Calculation is currently supported for the following models:
5240
/// Regression:
5341
/// OrdinaryLeastSquares, StochasticDualCoordinateAscent (SDCA), OnlineGradientDescent, PoissonRegression,
@@ -60,6 +48,19 @@ namespace Microsoft.ML.Data
6048
/// FastTree, LightGbm
6149
/// </para>
6250
/// <para>
51+
/// For linear models, the contribution of a given feature is equal to the product of feature value times the corresponding weight. Similarly,
52+
/// for Generalized Additive Models (GAM), the contribution of a feature is equal to the shape function for the given feature evaluated at
53+
/// the feature value.
54+
/// </para>
55+
/// <para>
56+
/// For tree-based models, the contribution of a feature is equal to the change in score produced by exploring the opposite sub-tree every time a decision
57+
/// node for the given feature is encountered. Consider a simple case with a single decision tree that has a decision node for the binary feature F1.
58+
/// Given an example that has feature F1 equal to true, we can calculate the score it would have obtained if we chose the subtree corresponding to
59+
/// the feature F1 being equal to false while keeping the other features constant. The contribution of feature F1 for the given example is the difference
60+
/// between the original score and the score obtained by taking the opposite decision at the node corresponding to feature F1. This algorithm extends
61+
/// naturally to models with many decision trees.
62+
/// </para>
63+
/// <para>
6364
/// See the sample below for an example of how to compute feature importance using the FeatureContributionCalculatingTransformer.
6465
/// </para>
6566
/// </remarks>
@@ -81,10 +82,10 @@ public sealed class Arguments : TransformInputBase
8182
public string FeatureColumn = DefaultColumnNames.Features;
8283

8384
[Argument(ArgumentType.AtMostOnce, HelpText = "Number of top contributions", SortOrder = 3)]
84-
public int Top = FeatureContributionCalculatingEstimator.Defaults.Top;
85+
public int Top = FeatureContributionCalculatingEstimator.Defaults.NumPositiveContributions;
8586

8687
[Argument(ArgumentType.AtMostOnce, HelpText = "Number of bottom contributions", SortOrder = 4)]
87-
public int Bottom = FeatureContributionCalculatingEstimator.Defaults.Bottom;
88+
public int Bottom = FeatureContributionCalculatingEstimator.Defaults.NumNegativeContributions;
8889

8990
[Argument(ArgumentType.AtMostOnce, HelpText = "Whether or not output of Features contribution should be normalized", ShortName = "norm", SortOrder = 5)]
9091
public bool Normalize = FeatureContributionCalculatingEstimator.Defaults.Normalize;
@@ -119,32 +120,32 @@ private static VersionInfo GetVersionInfo()
119120
/// <param name="env">The environment to use.</param>
120121
/// <param name="modelParameters">Trained model parameters that support Feature Contribution Calculation and which will be used for scoring.</param>
121122
/// <param name="featureColumn">The name of the feature column that will be used as input.</param>
122-
/// <param name="top">The number of features with highest positive contributions for each data sample that will be retained in the FeatureContribution column.
123-
/// Note that if there are fewer features with positive contributions than <paramref name="top"/>, the rest will be returned as zeros.</param>
124-
/// <param name="bottom">The number of features with least negative contributions for each data sample that will be retained in the FeatureContribution column.
125-
/// Note that if there are fewer features with negative contributions than <paramref name="bottom"/>, the rest will be returned as zeros.</param>
123+
/// <param name="numPositiveContributions">The number of positive contributions to report, sorted from highest magnitude to lowest magnitude.
124+
/// Note that if there are fewer features with positive contributions than <paramref name="numPositiveContributions"/>, the rest will be returned as zeros.</param>
125+
/// <param name="numNegativeContributions">The number of negative contributions to report, sorted from highest magnitude to lowest magnitude.
126+
/// Note that if there are fewer features with negative contributions than <paramref name="numNegativeContributions"/>, the rest will be returned as zeros.</param>
126127
/// <param name="normalize">Whether the feature contributions should be normalized to the [-1, 1] interval.</param>
127128
public FeatureContributionCalculatingTransformer(IHostEnvironment env, ICalculateFeatureContribution modelParameters,
128129
string featureColumn = DefaultColumnNames.Features,
129-
int top = FeatureContributionCalculatingEstimator.Defaults.Top,
130-
int bottom = FeatureContributionCalculatingEstimator.Defaults.Bottom,
130+
int numPositiveContributions = FeatureContributionCalculatingEstimator.Defaults.NumPositiveContributions,
131+
int numNegativeContributions = FeatureContributionCalculatingEstimator.Defaults.NumNegativeContributions,
131132
bool normalize = FeatureContributionCalculatingEstimator.Defaults.Normalize)
132133
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(FeatureContributionCalculatingTransformer)), new[] { (input: featureColumn, output: DefaultColumnNames.FeatureContributions) })
133134
{
134135
Host.CheckValue(modelParameters, nameof(modelParameters));
135136
Host.CheckNonEmpty(featureColumn, nameof(featureColumn));
136-
if (top < 0)
137+
if (numPositiveContributions < 0)
137138
throw Host.Except($"Number of top contribution must be non negative");
138-
if (bottom < 0)
139+
if (numNegativeContributions < 0)
139140
throw Host.Except($"Number of bottom contribution must be non negative");
140141

141142
// If a predictor implements ICalculateFeatureContribution, it also implements the internal interface IFeatureContributionMapper.
142143
// This is how we keep the implementation of feature contribution calculation internal.
143144
_predictor = modelParameters as IFeatureContributionMapper;
144145
Host.AssertValue(_predictor);
145146

146-
Top = top;
147-
Bottom = bottom;
147+
Top = numPositiveContributions;
148+
Bottom = numNegativeContributions;
148149
Normalize = normalize;
149150
}
150151

@@ -279,8 +280,8 @@ public sealed class FeatureContributionCalculatingEstimator : TrivialEstimator<F
279280

280281
public static class Defaults
281282
{
282-
public const int Top = 10;
283-
public const int Bottom = 10;
283+
public const int NumPositiveContributions = 10;
284+
public const int NumNegativeContributions = 10;
284285
public const bool Normalize = true;
285286
}
286287

@@ -291,18 +292,18 @@ public static class Defaults
291292
/// <param name="env">The environment to use.</param>
292293
/// <param name="modelParameters">Trained model parameters that support Feature Contribution Calculation and which will be used for scoring.</param>
293294
/// <param name="featureColumn">The name of the feature column that will be used as input.</param>
294-
/// <param name="top">The number of features with highest positive contributions for each data sample that will be retained in the FeatureContribution column.
295-
/// Note that if there are fewer features with positive contributions than <paramref name="top"/>, the rest will be returned as zeros.</param>
296-
/// <param name="bottom">The number of features with least negative contributions for each data sample that will be retained in the FeatureContribution column.
297-
/// Note that if there are fewer features with negative contributions than <paramref name="bottom"/>, the rest will be returned as zeros.</param>
295+
/// <param name="numPositiveContributions">The number of positive contributions to report, sorted from highest magnitude to lowest magnitude.
296+
/// Note that if there are fewer features with positive contributions than <paramref name="numPositiveContributions"/>, the rest will be returned as zeros.</param>
297+
/// <param name="numNegativeContributions">The number of negative contributions to report, sorted from highest magnitude to lowest magnitude.
298+
/// Note that if there are fewer features with negative contributions than <paramref name="numNegativeContributions"/>, the rest will be returned as zeros.</param>
298299
/// <param name="normalize">Whether the feature contributions should be normalized to the [-1, 1] interval.</param>
299300
public FeatureContributionCalculatingEstimator(IHostEnvironment env, ICalculateFeatureContribution modelParameters,
300301
string featureColumn = DefaultColumnNames.Features,
301-
int top = Defaults.Top,
302-
int bottom = Defaults.Bottom,
302+
int numPositiveContributions = Defaults.NumPositiveContributions,
303+
int numNegativeContributions = Defaults.NumNegativeContributions,
303304
bool normalize = Defaults.Normalize)
304305
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(FeatureContributionCalculatingTransformer)),
305-
new FeatureContributionCalculatingTransformer(env, modelParameters, featureColumn, top, bottom, normalize))
306+
new FeatureContributionCalculatingTransformer(env, modelParameters, featureColumn, numPositiveContributions, numNegativeContributions, normalize))
306307
{
307308
_featureColumn = featureColumn;
308309
_predictor = modelParameters;

src/Microsoft.ML.FastTree/FastTree.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2837,6 +2837,16 @@ public abstract class TreeEnsembleModelParameters :
28372837
bool ICanSavePfa.CanSavePfa => true;
28382838

28392839
bool ICanSaveOnnx.CanSaveOnnx(OnnxContext ctx) => true;
2840+
2841+
/// <summary>
2842+
/// Used to determine the contribution of each feature to the score of an example by <see cref="FeatureContributionCalculatingTransformer"/>.
2843+
/// For tree-based models, the contribution of a feature is equal to the change in score produced by exploring the opposite sub-tree every time a decision
2844+
/// node for the given feature is encountered. Consider a simple case with a single decision tree that has a decision node for the binary feature F1.
2845+
/// Given an example that has feature F1 equal to true, we can calculate the score it would have obtained if we chose the subtree corresponding to
2846+
/// the feature F1 being equal to false while keeping the other features constant. The contribution of feature F1 for the given example is the difference
2847+
/// between the original score and the score obtained by taking the opposite decision at the node corresponding to feature F1. This algorithm extends
2848+
/// naturally to models with many decision trees.
2849+
/// </summary>
28402850
public FeatureContributionCalculator FeatureContributionClaculator => new FeatureContributionCalculator(this);
28412851

28422852
public TreeEnsembleModelParameters(IHostEnvironment env, string name, TreeEnsemble trainedEnsemble, int numFeatures, string innerArgs)

src/Microsoft.ML.FastTree/GamTrainer.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,11 @@ public abstract class GamModelParametersBase : ModelParametersBase<float>, IValu
668668
ColumnType IValueMapper.InputType => _inputType;
669669
ColumnType IValueMapper.OutputType => _outputType;
670670

671+
/// <summary>
672+
/// Used to determine the contribution of each feature to the score of an example by <see cref="FeatureContributionCalculatingTransformer"/>.
673+
/// For Generalized Additive Models (GAM), the contribution of a feature is equal to the shape function for the given feature evaluated at
674+
/// the feature value.
675+
/// </summary>
671676
public FeatureContributionCalculator FeatureContributionClaculator => new FeatureContributionCalculator(this);
672677

673678
private protected GamModelParametersBase(IHostEnvironment env, string name,

src/Microsoft.ML.StandardLearners/Standard/LinearModelParameters.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,10 @@ public IEnumerator<float> GetEnumerator()
9999

100100
bool ICanSaveOnnx.CanSaveOnnx(OnnxContext ctx) => true;
101101

102+
/// <summary>
103+
/// Used to determine the contribution of each feature to the score of an example by <see cref="FeatureContributionCalculatingTransformer"/>.
104+
/// For linear models, the contribution of a given feature is equal to the product of feature value times the corresponding weight.
105+
/// </summary>
102106
public FeatureContributionCalculator FeatureContributionClaculator => new FeatureContributionCalculator(this);
103107

104108
/// <summary>

0 commit comments

Comments
 (0)