|
| 1 | +/* |
| 2 | + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one |
| 3 | + * or more contributor license agreements. Licensed under the Elastic License; |
| 4 | + * you may not use this file except in compliance with the Elastic License. |
| 5 | + */ |
| 6 | +#ifndef INCLUDED_ml_api_CInferenceModelMetadata_h |
| 7 | +#define INCLUDED_ml_api_CInferenceModelMetadata_h |
| 8 | + |
| 9 | +#include <maths/CBasicStatistics.h> |
| 10 | +#include <maths/CLinearAlgebraEigen.h> |
| 11 | + |
| 12 | +#include <api/CInferenceModelDefinition.h> |
| 13 | +#include <api/ImportExport.h> |
| 14 | + |
| 15 | +#include <string> |
| 16 | + |
| 17 | +namespace ml { |
| 18 | +namespace api { |
| 19 | + |
| 20 | +//! \brief Class controls the serialization of the model meta information |
| 21 | +//! (such as totol feature importance) into JSON format. |
| 22 | +class API_EXPORT CInferenceModelMetadata { |
| 23 | +public: |
| 24 | + static const std::string JSON_CLASS_NAME_TAG; |
| 25 | + static const std::string JSON_CLASSES_TAG; |
| 26 | + static const std::string JSON_FEATURE_NAME_TAG; |
| 27 | + static const std::string JSON_IMPORTANCE_TAG; |
| 28 | + static const std::string JSON_MAX_TAG; |
| 29 | + static const std::string JSON_MEAN_MAGNITUDE_TAG; |
| 30 | + static const std::string JSON_MIN_TAG; |
| 31 | + static const std::string JSON_MODEL_METADATA_TAG; |
| 32 | + static const std::string JSON_TOTAL_FEATURE_IMPORTANCE_TAG; |
| 33 | + |
| 34 | +public: |
| 35 | + using TVector = maths::CDenseVector<double>; |
| 36 | + using TStrVec = std::vector<std::string>; |
| 37 | + using TRapidJsonWriter = core::CRapidJsonConcurrentLineWriter; |
| 38 | + |
| 39 | +public: |
| 40 | + //! Writes metadata using \p writer. |
| 41 | + void write(TRapidJsonWriter& writer) const; |
| 42 | + void columnNames(const TStrVec& columnNames); |
| 43 | + void classValues(const TStrVec& classValues); |
| 44 | + const std::string& typeString() const; |
| 45 | + //! Add importances \p values to the feature with index \p i to calculate total feature importance. |
| 46 | + //! Total feature importance is the mean of the magnitudes of importances for individual data points. |
| 47 | + void addToFeatureImportance(std::size_t i, const TVector& values); |
| 48 | + |
| 49 | +private: |
| 50 | + using TMeanVarAccumulator = maths::CBasicStatistics::SSampleMeanVar<TVector>::TAccumulator; |
| 51 | + using TMinMaxAccumulator = std::vector<maths::CBasicStatistics::CMinMax<double>>; |
| 52 | + using TSizeMeanVarAccumulatorUMap = std::unordered_map<std::size_t, TMeanVarAccumulator>; |
| 53 | + using TSizeMinMaxAccumulatorUMap = std::unordered_map<std::size_t, TMinMaxAccumulator>; |
| 54 | + |
| 55 | +private: |
| 56 | + void writeTotalFeatureImportance(TRapidJsonWriter& writer) const; |
| 57 | + |
| 58 | +private: |
| 59 | + TSizeMeanVarAccumulatorUMap m_TotalShapValuesMeanVar; |
| 60 | + TSizeMinMaxAccumulatorUMap m_TotalShapValuesMinMax; |
| 61 | + TStrVec m_ColumnNames; |
| 62 | + TStrVec m_ClassValues; |
| 63 | +}; |
| 64 | +} |
| 65 | +} |
| 66 | + |
| 67 | +#endif //INCLUDED_ml_api_CInferenceModelMetadata_h |
0 commit comments