Skip to content

Commit 88c8951

Browse files
authored
[ML] Fix total feature importance unit tests (#1519)
We have an occasional failure in the total feature importance unit tests like this one. It happens while for some reason the mean value accumulator fails to produce correct results: { "feature_name": "c2", "classes": [ { "class_name": "foo", "importance": { "mean_magnitude": 0, "min": -3.288343526748277, "max": 3.288343526748277 } }, { "class_name": "bar", "importance": { "mean_magnitude": 0, "min": -3.288343526748277, "max": 3.288343526748277 } } ] } In an attempt to fix this I changed the mean accumulator from using the eigen vector type to an explicit vector of doubles same as the min/max accumulator. I keep the instrumentation of the unit tests for now so we have enough info if the tests continue to fail.
1 parent 53216fe commit 88c8951

File tree

2 files changed

+11
-7
lines changed

2 files changed

+11
-7
lines changed

include/api/CInferenceModelMetadata.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,17 @@ class API_EXPORT CInferenceModelMetadata {
5050
void addToFeatureImportance(std::size_t i, const TVector& values);
5151

5252
private:
53-
using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar<TVector>::TAccumulator;
53+
using TMeanAccumulator =
54+
std::vector<maths::CBasicStatistics::SSampleMean<double>::TAccumulator>;
5455
using TMinMaxAccumulator = std::vector<maths::CBasicStatistics::CMinMax<double>>;
55-
using TSizeMeanVarAccumulatorUMap = std::unordered_map<std::size_t, TMeanVarAccumulator>;
56+
using TSizeMeanAccumulatorUMap = std::unordered_map<std::size_t, TMeanAccumulator>;
5657
using TSizeMinMaxAccumulatorUMap = std::unordered_map<std::size_t, TMinMaxAccumulator>;
5758

5859
private:
5960
void writeTotalFeatureImportance(TRapidJsonWriter& writer) const;
6061

6162
private:
62-
TSizeMeanVarAccumulatorUMap m_TotalShapValuesMeanVar;
63+
TSizeMeanAccumulatorUMap m_TotalShapValuesMean;
6364
TSizeMinMaxAccumulatorUMap m_TotalShapValuesMinMax;
6465
TStrVec m_ColumnNames;
6566
TStrVec m_ClassValues;

lib/api/CInferenceModelMetadata.cc

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
*/
66
#include <api/CInferenceModelMetadata.h>
77

8+
#include <cmath>
9+
810
namespace ml {
911
namespace api {
1012

@@ -15,7 +17,7 @@ void CInferenceModelMetadata::write(TRapidJsonWriter& writer) const {
1517
void CInferenceModelMetadata::writeTotalFeatureImportance(TRapidJsonWriter& writer) const {
1618
writer.Key(JSON_TOTAL_FEATURE_IMPORTANCE_TAG);
1719
writer.StartArray();
18-
for (const auto& item : m_TotalShapValuesMeanVar) {
20+
for (const auto& item : m_TotalShapValuesMean) {
1921
writer.StartObject();
2022
writer.Key(JSON_FEATURE_NAME_TAG);
2123
writer.String(m_ColumnNames[item.first]);
@@ -104,14 +106,15 @@ void CInferenceModelMetadata::predictionFieldTypeResolverWriter(
104106
}
105107

106108
void CInferenceModelMetadata::addToFeatureImportance(std::size_t i, const TVector& values) {
107-
m_TotalShapValuesMeanVar
108-
.emplace(std::make_pair(i, TVector::Zero(values.size())))
109-
.first->second.add(values.cwiseAbs());
109+
auto& meanVector = m_TotalShapValuesMean
110+
.emplace(std::make_pair(i, TMeanAccumulator(values.size())))
111+
.first->second;
110112
auto& minMaxVector =
111113
m_TotalShapValuesMinMax
112114
.emplace(std::make_pair(i, TMinMaxAccumulator(values.size())))
113115
.first->second;
114116
for (std::size_t j = 0; j < minMaxVector.size(); ++j) {
117+
meanVector[j].add(std::fabs(values[j]));
115118
minMaxVector[j].add(values[j]);
116119
}
117120
}

0 commit comments

Comments
 (0)