Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ public static void FeatureContributionCalculationTransform_Regression()
// Create a Feature Contribution Calculator
// Calculate the feature contributions for all features given trained model parameters
// And don't normalize the contribution scores
var featureContributionCalculator = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, top: 11, normalize: false);
var featureContributionCalculator = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, numPositiveContributions: 11, normalize: false);
var outputData = featureContributionCalculator.Fit(scoredData).Transform(scoredData);

// FeatureContributionCalculatingEstimator can be use as an intermediary step in a pipeline.
// The features retained by FeatureContributionCalculatingEstimator will be in the FeatureContribution column.
var pipeline = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, top: 11)
var pipeline = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, numPositiveContributions: 11)
.Append(mlContext.Regression.Trainers.OrdinaryLeastSquares(featureColumn: "FeatureContributions"));
var outData = featureContributionCalculator.Fit(scoredData).Transform(scoredData);

Expand Down
14 changes: 7 additions & 7 deletions src/Microsoft.ML.Data/Transforms/ExplainabilityCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,17 @@ public static class ExplainabilityCatalog
/// <param name="catalog">The model explainability operations catalog.</param>
/// <param name="modelParameters">Trained model parameters that support Feature Contribution Calculation and which will be used for scoring.</param>
/// <param name="featureColumn">The name of the feature column that will be used as input.</param>
/// <param name="top">The number of features with highest positive contributions for each data sample that will be retained in the FeatureContribution column.
/// Note that if there are fewer features with positive contributions than <paramref name="top"/>, the rest will be returned as zeros.</param>
/// <param name="bottom">The number of features with least negative contributions for each data sample that will be retained in the FeatureContribution column.
/// Note that if there are fewer features with negative contributions than <paramref name="bottom"/>, the rest will be returned as zeros.</param>
/// <param name="numPositiveContributions">The number of positive contributions to report, sorted from highest magnitude to lowest magnitude.
/// Note that if there are fewer features with positive contributions than <paramref name="numPositiveContributions"/>, the rest will be returned as zeros.</param>
/// <param name="numNegativeContributions">The number of negative contributions to report, sorted from highest magnitude to lowest magnitude.
/// Note that if there are fewer features with negative contributions than <paramref name="numNegativeContributions"/>, the rest will be returned as zeros.</param>
/// <param name="normalize">Whether the feature contributions should be normalized to the [-1, 1] interval.</param>
public static FeatureContributionCalculatingEstimator FeatureContributionCalculation(this ModelOperationsCatalog.ExplainabilityTransforms catalog,
ICalculateFeatureContribution modelParameters,
string featureColumn = DefaultColumnNames.Features,
int top = FeatureContributionDefaults.Top,
int bottom = FeatureContributionDefaults.Bottom,
int numPositiveContributions = FeatureContributionDefaults.NumPositiveContributions,
int numNegativeContributions = FeatureContributionDefaults.NumNegativeContributions,
bool normalize = FeatureContributionDefaults.Normalize)
=> new FeatureContributionCalculatingEstimator(CatalogUtils.GetEnvironment(catalog), modelParameters, featureColumn, top, bottom, normalize);
=> new FeatureContributionCalculatingEstimator(CatalogUtils.GetEnvironment(catalog), modelParameters, featureColumn, numPositiveContributions, numNegativeContributions, normalize);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,17 @@
namespace Microsoft.ML.Data
{
/// <summary>
/// The FeatureContributionCalculationTransformer computes model-specific contribution scores for each feature.
/// The FeatureContributionCalculationTransformer computes model-specific per-feature contributions to the score of each example.
/// See the list of currently supported models below.
/// </summary>
/// <remarks>
/// <para>
/// Scoring a dataset with a trained model produces a score, or prediction, for each example. To understand and explain these predictions
/// it can be useful to inspect which features influenced them most significantly. FeatureContributionCalculationTransformer computes a model-specific
/// list of per-feature contributions to the score for each example. These contributions can be positive (they make the score higher) or negative
/// (they make the score lower).
/// </para>
/// <para>
/// Feature Contribution Calculation is currently supported for the following models:
/// Regression:
/// OrdinaryLeastSquares, StochasticDualCoordinateAscent (SDCA), OnlineGradientDescent, PoissonRegression,
Expand All @@ -39,8 +46,25 @@ namespace Microsoft.ML.Data
/// FastForest, FastTree, LightGbm
/// Ranking:
/// FastTree, LightGbm
///
/// </para>
/// <para>
/// For linear models, the contribution of a given feature is equal to the product of feature value times the corresponding weight. Similarly,
/// for Generalized Additive Models (GAM), the contribution of a feature is equal to the shape function for the given feature evaluated at
/// the feature value.
/// </para>
/// <para>
/// For tree-based models, the calculation of feature contribution essentially consists in determining which splits in the tree have the most impact
/// on the final score and assigning the value of the impact to the features determining the split. More precisely, the contribution of a feature
/// is equal to the change in score produced by exploring the opposite sub-tree every time a decision node for the given feature is encountered.
/// Consider a simple case with a single decision tree that has a decision node for the binary feature F1. Given an example that has feature F1
/// equal to true, we can calculate the score it would have obtained if we chose the subtree corresponding to the feature F1 being equal to false
/// while keeping the other features constant. The contribution of feature F1 for the given example is the difference between the original score
/// and the score obtained by taking the opposite decision at the node corresponding to feature F1. This algorithm extends naturally to models with
/// many decision trees.
/// </para>
/// <para>
/// See the sample below for an example of how to compute feature importance using the FeatureContributionCalculatingTransformer.
/// </para>
/// </remarks>
/// <example>
/// <format type="text/markdown">
Expand All @@ -60,10 +84,10 @@ public sealed class Arguments : TransformInputBase
public string FeatureColumn = DefaultColumnNames.Features;

[Argument(ArgumentType.AtMostOnce, HelpText = "Number of top contributions", SortOrder = 3)]
public int Top = FeatureContributionCalculatingEstimator.Defaults.Top;
public int Top = FeatureContributionCalculatingEstimator.Defaults.NumPositiveContributions;

[Argument(ArgumentType.AtMostOnce, HelpText = "Number of bottom contributions", SortOrder = 4)]
public int Bottom = FeatureContributionCalculatingEstimator.Defaults.Bottom;
public int Bottom = FeatureContributionCalculatingEstimator.Defaults.NumNegativeContributions;

[Argument(ArgumentType.AtMostOnce, HelpText = "Whether or not output of Features contribution should be normalized", ShortName = "norm", SortOrder = 5)]
public bool Normalize = FeatureContributionCalculatingEstimator.Defaults.Normalize;
Expand Down Expand Up @@ -98,32 +122,32 @@ private static VersionInfo GetVersionInfo()
/// <param name="env">The environment to use.</param>
/// <param name="modelParameters">Trained model parameters that support Feature Contribution Calculation and which will be used for scoring.</param>
/// <param name="featureColumn">The name of the feature column that will be used as input.</param>
/// <param name="top">The number of features with highest positive contributions for each data sample that will be retained in the FeatureContribution column.
/// Note that if there are fewer features with positive contributions than <paramref name="top"/>, the rest will be returned as zeros.</param>
/// <param name="bottom">The number of features with least negative contributions for each data sample that will be retained in the FeatureContribution column.
/// Note that if there are fewer features with negative contributions than <paramref name="bottom"/>, the rest will be returned as zeros.</param>
/// <param name="numPositiveContributions">The number of positive contributions to report, sorted from highest magnitude to lowest magnitude.
/// Note that if there are fewer features with positive contributions than <paramref name="numPositiveContributions"/>, the rest will be returned as zeros.</param>
/// <param name="numNegativeContributions">The number of negative contributions to report, sorted from highest magnitude to lowest magnitude.
/// Note that if there are fewer features with negative contributions than <paramref name="numNegativeContributions"/>, the rest will be returned as zeros.</param>
/// <param name="normalize">Whether the feature contributions should be normalized to the [-1, 1] interval.</param>
public FeatureContributionCalculatingTransformer(IHostEnvironment env, ICalculateFeatureContribution modelParameters,
string featureColumn = DefaultColumnNames.Features,
int top = FeatureContributionCalculatingEstimator.Defaults.Top,
int bottom = FeatureContributionCalculatingEstimator.Defaults.Bottom,
int numPositiveContributions = FeatureContributionCalculatingEstimator.Defaults.NumPositiveContributions,
int numNegativeContributions = FeatureContributionCalculatingEstimator.Defaults.NumNegativeContributions,
bool normalize = FeatureContributionCalculatingEstimator.Defaults.Normalize)
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(FeatureContributionCalculatingTransformer)), new[] { (input: featureColumn, output: DefaultColumnNames.FeatureContributions) })
{
Host.CheckValue(modelParameters, nameof(modelParameters));
Host.CheckNonEmpty(featureColumn, nameof(featureColumn));
if (top < 0)
if (numPositiveContributions < 0)
throw Host.Except($"Number of top contribution must be non negative");
if (bottom < 0)
if (numNegativeContributions < 0)
throw Host.Except($"Number of bottom contribution must be non negative");

// If a predictor implements ICalculateFeatureContribution, it also implements the internal interface IFeatureContributionMapper.
// This is how we keep the implementation of feature contribution calculation internal.
_predictor = modelParameters as IFeatureContributionMapper;
Host.AssertValue(_predictor);

Top = top;
Bottom = bottom;
Top = numPositiveContributions;
Bottom = numNegativeContributions;
Normalize = normalize;
}

Expand Down Expand Up @@ -258,8 +282,8 @@ public sealed class FeatureContributionCalculatingEstimator : TrivialEstimator<F

public static class Defaults
{
public const int Top = 10;
public const int Bottom = 10;
public const int NumPositiveContributions = 10;
public const int NumNegativeContributions = 10;
public const bool Normalize = true;
}

Expand All @@ -270,18 +294,18 @@ public static class Defaults
/// <param name="env">The environment to use.</param>
/// <param name="modelParameters">Trained model parameters that support Feature Contribution Calculation and which will be used for scoring.</param>
/// <param name="featureColumn">The name of the feature column that will be used as input.</param>
/// <param name="top">The number of features with highest positive contributions for each data sample that will be retained in the FeatureContribution column.
/// Note that if there are fewer features with positive contributions than <paramref name="top"/>, the rest will be returned as zeros.</param>
/// <param name="bottom">The number of features with least negative contributions for each data sample that will be retained in the FeatureContribution column.
/// Note that if there are fewer features with negative contributions than <paramref name="bottom"/>, the rest will be returned as zeros.</param>
/// <param name="numPositiveContributions">The number of positive contributions to report, sorted from highest magnitude to lowest magnitude.
/// Note that if there are fewer features with positive contributions than <paramref name="numPositiveContributions"/>, the rest will be returned as zeros.</param>
/// <param name="numNegativeContributions">The number of negative contributions to report, sorted from highest magnitude to lowest magnitude.
/// Note that if there are fewer features with negative contributions than <paramref name="numNegativeContributions"/>, the rest will be returned as zeros.</param>
/// <param name="normalize">Whether the feature contributions should be normalized to the [-1, 1] interval.</param>
public FeatureContributionCalculatingEstimator(IHostEnvironment env, ICalculateFeatureContribution modelParameters,
string featureColumn = DefaultColumnNames.Features,
int top = Defaults.Top,
int bottom = Defaults.Bottom,
int numPositiveContributions = Defaults.NumPositiveContributions,
int numNegativeContributions = Defaults.NumNegativeContributions,
bool normalize = Defaults.Normalize)
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(FeatureContributionCalculatingTransformer)),
new FeatureContributionCalculatingTransformer(env, modelParameters, featureColumn, top, bottom, normalize))
new FeatureContributionCalculatingTransformer(env, modelParameters, featureColumn, numPositiveContributions, numNegativeContributions, normalize))
{
_featureColumn = featureColumn;
_predictor = modelParameters;
Expand Down
12 changes: 12 additions & 0 deletions src/Microsoft.ML.FastTree/FastTree.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2837,6 +2837,18 @@ public abstract class TreeEnsembleModelParameters :
bool ICanSavePfa.CanSavePfa => true;

bool ICanSaveOnnx.CanSaveOnnx(OnnxContext ctx) => true;

/// <summary>
/// Used to determine the contribution of each feature to the score of an example by <see cref="FeatureContributionCalculatingTransformer"/>.
/// The calculation of feature contribution essentially consists in determining which splits in the tree have the most impact
/// on the final score and assigning the value of the impact to the features determining the split. More precisely, the contribution of a feature
/// is equal to the change in score produced by exploring the opposite sub-tree every time a decision node for the given feature is encountered.
/// Consider a simple case with a single decision tree that has a decision node for the binary feature F1. Given an example that has feature F1
/// equal to true, we can calculate the score it would have obtained if we chose the subtree corresponding to the feature F1 being equal to false
/// while keeping the other features constant. The contribution of feature F1 for the given example is the difference between the original score
/// and the score obtained by taking the opposite decision at the node corresponding to feature F1. This algorithm extends naturally to models with
/// many decision trees.
/// </summary>
public FeatureContributionCalculator FeatureContributionClaculator => new FeatureContributionCalculator(this);

public TreeEnsembleModelParameters(IHostEnvironment env, string name, TreeEnsemble trainedEnsemble, int numFeatures, string innerArgs)
Expand Down
5 changes: 5 additions & 0 deletions src/Microsoft.ML.FastTree/GamTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,11 @@ public abstract class GamModelParametersBase : ModelParametersBase<float>, IValu
ColumnType IValueMapper.InputType => _inputType;
ColumnType IValueMapper.OutputType => _outputType;

/// <summary>
/// Used to determine the contribution of each feature to the score of an example by <see cref="FeatureContributionCalculatingTransformer"/>.
/// For Generalized Additive Models (GAM), the contribution of a feature is equal to the shape function for the given feature evaluated at
/// the feature value.
/// </summary>
public FeatureContributionCalculator FeatureContributionClaculator => new FeatureContributionCalculator(this);

private protected GamModelParametersBase(IHostEnvironment env, string name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ public IEnumerator<float> GetEnumerator()

bool ICanSaveOnnx.CanSaveOnnx(OnnxContext ctx) => true;

/// <summary>
/// Used to determine the contribution of each feature to the score of an example by <see cref="FeatureContributionCalculatingTransformer"/>.
/// For linear models, the contribution of a given feature is equal to the product of feature value times the corresponding weight.
/// </summary>
public FeatureContributionCalculator FeatureContributionClaculator => new FeatureContributionCalculator(this);

/// <summary>
Expand Down
Loading