Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
28a877c
work on fcc
artidoro Dec 3, 2018
81403e9
conitnuing work on fcc
artidoro Dec 4, 2018
7a6dc5c
work on fcc
artidoro Dec 3, 2018
9a8d897
conitnuing work on fcc
artidoro Dec 4, 2018
3fa528c
looking at load and save
artidoro Dec 5, 2018
c01d504
work on fcc
artidoro Dec 5, 2018
fa296c4
continuing work
artidoro Dec 5, 2018
7448acb
work on fcc
artidoro Dec 6, 2018
f8c53ef
creating IRowMapper instead of IRowToRowMapper
artidoro Dec 7, 2018
53dae2c
working transformer and estimator passing tests with IRowMapper
artidoro Dec 7, 2018
39b6537
put everything back in the original file, made schemboundmapper
artidoro Dec 7, 2018
701801f
rename file
artidoro Dec 7, 2018
0c19775
rebased and fixed merge errors
artidoro Dec 7, 2018
6d1392a
simplified code
artidoro Dec 7, 2018
8979e25
fixed bug for stringify
artidoro Dec 7, 2018
fb8800e
Began adding documentation and argument checks
artidoro Dec 8, 2018
ab4d551
added Scoring utils file to handle all scenarios
artidoro Dec 10, 2018
1ee263c
added explainability subcatalog and mlcontext extension
artidoro Dec 11, 2018
e9dd2f2
fix test error
artidoro Dec 11, 2018
f835bb3
setting up to write tests for all other predictors
artidoro Dec 11, 2018
10816f5
added tests for most predictors
artidoro Dec 12, 2018
3bd537f
review comments
artidoro Dec 12, 2018
490965f
updated tests to include top bottom
artidoro Dec 13, 2018
c51d26b
modified sample and added GAM test
artidoro Dec 13, 2018
6296dd6
cleanup
artidoro Dec 13, 2018
99d862a
rename
artidoro Dec 13, 2018
aadfe79
added more tests for binary predictors
artidoro Dec 13, 2018
9289942
review comments, best friends, secret service to hide interface inter…
artidoro Dec 14, 2018
5ee6186
entrypoints
artidoro Dec 15, 2018
9400dd3
minor changes
artidoro Dec 17, 2018
21cd867
doc change
artidoro Dec 17, 2018
03cfb2d
small comment and precision changes
artidoro Dec 17, 2018
7601baa
precision
artidoro Dec 17, 2018
01296da
skipping poisson regression test
artidoro Dec 17, 2018
a21646e
precision
artidoro Dec 17, 2018
c181182
small improvements
artidoro Dec 18, 2018
a9e9718
transitioning to simpler transformer estimator
artidoro Dec 18, 2018
e26a818
refactored all to make it simpler
artidoro Dec 19, 2018
1d45043
fixed test
artidoro Dec 19, 2018
344dfc9
numthreads and csharpapi
artidoro Dec 19, 2018
651088e
comments
artidoro Dec 19, 2018
d0ee92a
rename and added tests back
artidoro Dec 19, 2018
4f5ae8c
brought files back to data as suggested in review comment
artidoro Dec 19, 2018
4fda4e6
review comment
artidoro Dec 19, 2018
49c69b8
added command line tests
artidoro Dec 19, 2018
ab4f6db
review comments
artidoro Dec 20, 2018
33c5b3b
minor changes
artidoro Dec 20, 2018
5c3197e
precision for test intrinsics fix
artidoro Dec 20, 2018
5df08fc
merge
artidoro Dec 20, 2018
bf22311
csharpapi
artidoro Dec 20, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,32 +48,36 @@ public static void FeatureContributionCalculationTransform_Regression()
var transformPipeline = mlContext.Transforms.Concatenate("Features", "CrimesPerCapita", "PercentResidental",
"PercentNonRetail", "CharlesRiver", "NitricOxides", "RoomsPerDwelling", "PercentPre40s",
"EmploymentDistance", "HighwayDistance", "TaxRate", "TeacherRatio");
var learner = mlContext.Regression.Trainers.StochasticDualCoordinateAscent(
var learner = mlContext.Regression.Trainers.OrdinaryLeastSquares(
labelColumn: "MedianHomeValue", featureColumn: "Features");

var transformedData = transformPipeline.Fit(data).Transform(data);

// Now we train the model and score it on the transformed data.
var model = learner.Fit(transformedData);
var scoredData = model.Transform(transformedData);

// Create a Feature Contribution Calculator
// Calculate the feature contributions for all features
// Calculate the feature contributions for all features given trained model parameters
// And don't normalize the contribution scores
var args = new FeatureContributionCalculationTransform.Arguments()
{
Top = 11,
Normalize = false
};
var featureContributionCalculator = FeatureContributionCalculationTransform.Create(mlContext, args, transformedData, model.Model, model.FeatureColumn);
var featureContributionCalculator = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, top: 11, normalize: false);
var outputData = featureContributionCalculator.Fit(scoredData).Transform(scoredData);

// FeatureContributionCalculatingEstimator can be use as an intermediary step in a pipeline.
// The features retained by FeatureContributionCalculatingEstimator will be in the FeatureContribution column.
var pipeline = mlContext.Model.Explainability.FeatureContributionCalculation(model.Model, model.FeatureColumn, top: 11)
.Append(mlContext.Regression.Trainers.OrdinaryLeastSquares(featureColumn: "FeatureContributions"));
var outData = featureContributionCalculator.Fit(scoredData).Transform(scoredData);

// Let's extract the weights from the linear model to use as a comparison
var weights = new VBuffer<float>();
model.Model.GetFeatureWeights(ref weights);

// Let's now walk through the first ten reconds and see which feature drove the values the most
// Get prediction scores and contributions
var scoringEnumerator = featureContributionCalculator.AsEnumerable<HousingRegressionScoreAndContribution>(mlContext, true).GetEnumerator();
var scoringEnumerator = outputData.AsEnumerable<HousingRegressionScoreAndContribution>(mlContext, true).GetEnumerator();
int index = 0;
Console.WriteLine("Label\tScore\tBiggestFeature\tValue\tWeight\tContribution\tPercent");
Console.WriteLine("Label\tScore\tBiggestFeature\tValue\tWeight\tContribution");
while (scoringEnumerator.MoveNext() && index < 10)
{
var row = scoringEnumerator.Current;
Expand All @@ -84,26 +88,34 @@ public static void FeatureContributionCalculationTransform_Regression()
// And the corresponding information about the feature
var value = row.Features[featureOfInterest];
var contribution = row.FeatureContributions[featureOfInterest];
var percentContribution = 100 * contribution / row.Score;
var name = data.Schema[(int) (featureOfInterest + 1)].Name;
var name = data.Schema[featureOfInterest + 1].Name;
var weight = weights.GetValues()[featureOfInterest];

Console.WriteLine("{0:0.00}\t{1:0.00}\t{2}\t{3:0.00}\t{4:0.00}\t{5:0.00}\t{6:0.00}",
Console.WriteLine("{0:0.00}\t{1:0.00}\t{2}\t{3:0.00}\t{4:0.00}\t{5:0.00}",
row.MedianHomeValue,
row.Score,
name,
value,
weight,
contribution,
percentContribution
contribution
);

index++;
}

// For bulk scoring, the ApplyToData API can also be used
var scoredData = featureContributionCalculator.ApplyToData(mlContext, transformedData);
var preview = scoredData.Preview(100);
Console.ReadLine();

// The output of the above code is:
// Label Score BiggestFeature Value Weight Contribution
// 24.00 27.74 RoomsPerDwelling 6.58 98.55 39.95
// 21.60 23.85 RoomsPerDwelling 6.42 98.55 39.01
// 34.70 29.29 RoomsPerDwelling 7.19 98.55 43.65
// 33.40 27.17 RoomsPerDwelling 7.00 98.55 42.52
// 36.20 27.68 RoomsPerDwelling 7.15 98.55 43.42
// 28.70 23.13 RoomsPerDwelling 6.43 98.55 39.07
// 22.90 22.71 RoomsPerDwelling 6.01 98.55 36.53
// 27.10 21.72 RoomsPerDwelling 6.17 98.55 37.50
// 16.50 18.04 RoomsPerDwelling 5.63 98.55 34.21
// 18.90 20.14 RoomsPerDwelling 6.00 98.55 36.48
}

private static int GetMostContributingFeature(float[] featureContributions)
Expand Down
2 changes: 1 addition & 1 deletion docs/samples/Microsoft.ML.Samples/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ internal static class Program
{
static void Main(string[] args)
{
TensorFlowTransformExample.TensorFlowScoringSample();
FeatureContributionCalculationTransform_RegressionExample.FeatureContributionCalculationTransform_Regression();
}
}
}
11 changes: 6 additions & 5 deletions src/Microsoft.ML.Core/Data/MetadataUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -448,11 +448,11 @@ internal static bool TryGetCategoricalFeatureIndices(Schema schema, int colIndex
if (!(schema[colIndex].Type is VectorType vecType && vecType.Size > 0))
return isValid;

var type = schema[colIndex].Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.CategoricalSlotRanges)?.Type;
var type = schema[colIndex].Metadata.Schema.GetColumnOrNull(Kinds.CategoricalSlotRanges)?.Type;
if (type?.RawType == typeof(VBuffer<int>))
{
VBuffer<int> catIndices = default(VBuffer<int>);
schema[colIndex].Metadata.GetValue(MetadataUtils.Kinds.CategoricalSlotRanges, ref catIndices);
schema[colIndex].Metadata.GetValue(Kinds.CategoricalSlotRanges, ref catIndices);
VBufferUtils.Densify(ref catIndices);
int columnSlotsCount = vecType.Size;
if (catIndices.Length > 0 && catIndices.Length % 2 == 0 && catIndices.Length <= columnSlotsCount * 2)
Expand Down Expand Up @@ -498,14 +498,15 @@ internal static bool TryGetCategoricalFeatureIndices(Schema schema, int colIndex
}

/// <summary>
/// Produces sequence of columns that are generated by multiclass trainer estimators.
/// Produces metadata for the score column generated by trainer estimators for multiclass classification.
/// If input LabelColumn is not available it produces slotnames metadata by default.
/// </summary>
/// <param name="labelColumn">Label column.</param>
[BestFriend]
internal static IEnumerable<SchemaShape.Column> MetadataForMulticlassScoreColumn(SchemaShape.Column labelColumn)
internal static IEnumerable<SchemaShape.Column> MetadataForMulticlassScoreColumn(SchemaShape.Column? labelColumn = null)
{
var cols = new List<SchemaShape.Column>();
if (labelColumn.IsKey && HasKeyValues(labelColumn))
if (labelColumn != null && labelColumn.Value.IsKey && HasKeyValues(labelColumn.Value))
cols.Add(new SchemaShape.Column(Kinds.SlotNames, SchemaShape.Column.VectorKind.Vector, TextType.Instance, false));
cols.AddRange(GetTrainerOutputMetadata());
return cols;
Expand Down
1 change: 0 additions & 1 deletion src/Microsoft.ML.Data/Dirty/PredictorBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
using Float = System.Single;

using System;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Model;

namespace Microsoft.ML.Runtime.Internal.Internallearn
Expand Down
17 changes: 17 additions & 0 deletions src/Microsoft.ML.Data/Dirty/PredictorInterfaces.cs
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,23 @@ internal interface IFeatureContributionMapper : IPredictor
ValueMapper<TSrc, VBuffer<float>> GetFeatureContributionMapper<TSrc, TDst>(int top, int bottom, bool normalize);
}

/// <summary>
/// Allows support for feature contribution calculation.
/// </summary>
public interface ICalculateFeatureContribution : IPredictor
{
FeatureContributionCalculator FeatureContributionClaculator { get; }
}

/// <summary>
/// Support for feature contribution calculation.
/// </summary>
public sealed class FeatureContributionCalculator
{
internal IFeatureContributionMapper ContributionMapper { get; }
internal FeatureContributionCalculator(IFeatureContributionMapper contributionMapper) => ContributionMapper = contributionMapper;
}

/// <summary>
/// Interface for predictors that can return a string array containing the label names from the label column they were trained on.
/// If the training label is a key with text key value metadata, it should return this metadata. The order of the labels should be consistent
Expand Down
25 changes: 25 additions & 0 deletions src/Microsoft.ML.Data/Model/ModelOperationsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,25 @@ public sealed class ModelOperationsCatalog
{
internal IHostEnvironment Environment { get; }

public ExplainabilityTransforms Explainability { get; }

internal ModelOperationsCatalog(IHostEnvironment env)
{
Contracts.AssertValue(env);
Environment = env;

Explainability = new ExplainabilityTransforms(this);
}

public abstract class SubCatalogBase
{
internal IHostEnvironment Environment { get; }

protected SubCatalogBase(ModelOperationsCatalog owner)
{
Environment = owner.Environment;
}

}

/// <summary>
Expand All @@ -36,6 +51,16 @@ internal ModelOperationsCatalog(IHostEnvironment env)
/// <returns>The loaded model.</returns>
public ITransformer Load(Stream stream) => TransformerChain.LoadFrom(Environment, stream);

/// <summary>
/// The catalog of model explainability operations.
/// </summary>
public sealed class ExplainabilityTransforms : SubCatalogBase
{
internal ExplainabilityTransforms(ModelOperationsCatalog owner) : base(owner)
{
}
}

/// <summary>
/// Create a prediction engine for one-time prediction.
/// </summary>
Expand Down
5 changes: 4 additions & 1 deletion src/Microsoft.ML.Data/Prediction/Calibrator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ protected static ICalibrator GetCalibrator(IHostEnvironment env, ModelLoadContex
}
}

public abstract class ValueMapperCalibratedPredictorBase : CalibratedPredictorBase, IValueMapperDist, IFeatureContributionMapper,
public abstract class ValueMapperCalibratedPredictorBase : CalibratedPredictorBase, IValueMapperDist, IFeatureContributionMapper, ICalculateFeatureContribution,
IDistCanSavePfa, IDistCanSaveOnnx
{
private readonly IValueMapper _mapper;
Expand All @@ -216,6 +216,9 @@ public abstract class ValueMapperCalibratedPredictorBase : CalibratedPredictorBa
ColumnType IValueMapper.OutputType => _mapper.OutputType;
ColumnType IValueMapperDist.DistType => NumberType.Float;
bool ICanSavePfa.CanSavePfa => (_mapper as ICanSavePfa)?.CanSavePfa == true;

public FeatureContributionCalculator FeatureContributionClaculator => new FeatureContributionCalculator(this);

bool ICanSaveOnnx.CanSaveOnnx(OnnxContext ctx) => (_mapper as ICanSaveOnnx)?.CanSaveOnnx(ctx) == true;

protected ValueMapperCalibratedPredictorBase(IHostEnvironment env, string name, IPredictorProducing<float> predictor, ICalibrator calibrator)
Expand Down
Loading