Skip to content

Commit c6ee265

Browse files
committed
Adding documentation for the first group of transforms
1 parent 5f52db8 commit c6ee265

File tree

20 files changed

+396
-55
lines changed

20 files changed

+396
-55
lines changed

src/Microsoft.ML.Data/Transforms/NAFilter.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
namespace Microsoft.ML.Runtime.Data
2828
{
29+
/// <include file='doc.xml' path='doc/members/member[@name="NAFilter"]'/>
2930
public sealed class NAFilter : FilterBase
3031
{
3132
private static class Defaults
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
<?xml version="1.0" encoding="utf-8" ?>
2+
<doc>
3+
<members>
4+
<member name="NAFilter">
5+
<summary>
6+
Removes missing values from vector type columns.
7+
</summary>
8+
<remarks>
9+
This transform emoves the entire row if any of the input columns have a missing value in that row.
10+
This preprocessing is required for many ML algorithms that cannot work with missing values.
11+
Useful if any missing entry invalidates the entire row.
12+
If the <see cref="Microsoft.ML.Runtime.Data.NAFilter.Defaults.Complement"/> is set to true, this transform would do the exact opposite,
13+
it will keep only the rows that have missing values.
14+
</remarks>
15+
<seealso cref="Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"></seealso>
16+
<example>
17+
<code>
18+
pipeline.Add(new MissingValuesRowDropper(&quot;Column1&quot;));
19+
</code>
20+
</example>
21+
</member>
22+
23+
<member name="NAHandle">
24+
<summary>
25+
Handle missing values by replacing them with either the default value or the indicated value.
26+
</summary>
27+
<remarks>
28+
This transform handles missing values in the input columns. For each input column, it creates an output column
29+
where the missing values are replaced by one of these specified values:
30+
<list type="bullet">
31+
<item><description>The default value of the appropriate type.</description></item>
32+
<item><description>The mean value of the appropriate type.</description></item>
33+
<item><description>The max value of the appropriate type.</description></item>
34+
<item><description>The min value of the appropriate type.</description></item>
35+
</list>
36+
<para>The last three work only for numeric/TimeSpan/DateTime kind columns.</para>
37+
<para> The output column can also optionally include an indicator vector for which slots were missing in the input column.
38+
This can be done only when the indicator vector type can be converted to the input column type, i.e. only for numeric columns.
39+
</para>
40+
<para>
41+
When computing the mean/max/min value, there is also an option to compute it over the whole column instead of per slot.
42+
This option has a default value of true for variable length vectors, and false for known length vectors.
43+
It can be changed to true for known length vectors, but it results in an error if changed to false for variable length vectors.
44+
</para>
45+
</remarks>
46+
<seealso cref=" Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"/>
47+
<seealso cref="Microsoft.ML.Data.DataKind"/>
48+
<example>
49+
<code>
50+
pipeline.Add(new MissingValueHandler(&quot;FeatureCol&quot;, &quot;CleanFeatureCol&quot;) { ReplaceWith = NAHandleTransformReplacementKind.Mean });
51+
</code>
52+
</example>
53+
</member>
54+
55+
</members>
56+
</doc>

src/Microsoft.ML.FastTree/TreeEnsembleFeaturizer.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,7 @@ public ISchemaBoundMapper Bind(IHostEnvironment env, RoleMappedSchema schema)
544544
}
545545
}
546546

547+
/// <include file='doc.xml' path='doc/members/member[@name="TreeEnsembleFeaturizerTransform"]'/>
547548
public static class TreeEnsembleFeaturizerTransform
548549
{
549550
public sealed class Arguments : TrainAndScoreTransform.ArgumentsBase<SignatureTreeEnsembleTrainer>
@@ -802,7 +803,11 @@ private static IDataView AppendLabelTransform(IHostEnvironment env, IChannel ch,
802803

803804
public static partial class TreeFeaturize
804805
{
805-
[TlcModule.EntryPoint(Name = "Transforms.TreeLeafFeaturizer", Desc = TreeEnsembleFeaturizerTransform.TreeEnsembleSummary, UserName = TreeEnsembleFeaturizerTransform.UserName, ShortName = TreeEnsembleFeaturizerBindableMapper.LoadNameShort)]
806+
[TlcModule.EntryPoint(Name = "Transforms.TreeLeafFeaturizer",
807+
Desc = TreeEnsembleFeaturizerTransform.TreeEnsembleSummary,
808+
UserName = TreeEnsembleFeaturizerTransform.UserName,
809+
ShortName = TreeEnsembleFeaturizerBindableMapper.LoadNameShort,
810+
XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name=""TreeEnsembleFeaturizerTransform""]'/>" })]
806811
public static CommonOutputs.TransformOutput Featurizer(IHostEnvironment env, TreeEnsembleFeaturizerTransform.ArgumentsForEntryPoint input)
807812
{
808813
Contracts.CheckValue(env, nameof(env));

src/Microsoft.ML.FastTree/doc.xml

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,64 @@
7373
<para><a href='http://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.aos/1013203451'>Greedy function approximation: A gradient boosting machine</a></para>
7474
</remarks>
7575
</member>
76+
77+
<member name="TreeEnsembleFeaturizerTransform">
78+
<summary>
79+
Trains a tree ensemble, or loads it from a file, then maps a numeric feature vector
80+
to three outputs:
81+
<list>
82+
<item>
83+
<description>A vector containing the individual tree outputs of the tree ensemble.</description>
84+
</item>
85+
<item>
86+
<description>A vector indicating the leaves that the feature vector falls on in the tree ensemble.</description>
87+
</item>
88+
<item>
89+
<description>A vector indicating the paths that the feature vector falls on in the tree ensemble.</description>
90+
</item>
91+
</list>
92+
If a both a model file and a trainer are specified - will use the model file. If neither are specified,
93+
will train a default FastTree model.
94+
This can handle key labels by training a regression model towards their optionally permuted indices.
95+
</summary>
96+
<remarks>
97+
In machine learning​ it is a pretty common and powerful approach to utilize the already trained model in the process of defining features.
98+
<para>A most obvious example could be to use the model's scores as features to downstream models. For example, we might run clustering on the original features,
99+
and use the cluster distances as the new feature set.
100+
Instead of consuming the model's output, we could go deeper, and extract the 'intermediate outputs' that are used to produce the final score. </para>
101+
There's a number of famous or popular examples of this technique:
102+
<list>
103+
<item>
104+
<description>A deep neural net trained on the ImageNet dataset, with the last layer removed, is commonly used to compute the 'projection' of the image into the 'semantic feature space'.
105+
It is observed that the Euclidian distance in this space often correlates with the 'semantic similarity': that is, all pictures of pizza are located close together,
106+
and far away from pictures of kittens. </description>
107+
</item>
108+
<item>
109+
<description>A matrix factorization and/or LDA model is also often used to extract the 'latent topics' or 'latent features' associated with users and items.</description>
110+
</item>
111+
<item>
112+
<description>The weights of the linear model are often used as a crude indicator of 'feature importance'. At the very minimum, the 0-weight features are not needed by the model,
113+
and there's no reason to compute them. </description>
114+
</item>
115+
</list>
116+
<para>Tree featurizer uses the decision tree ensembles for feature engineering in the same fashion as above.</para>
117+
<para>Let's assume that we've built a tree ensemble of 100 trees with 100 leaves each (it doesn't matter whether boosting was used or not in training).
118+
If we associate each leaf of each tree with a sequential integer, we can, for every incoming example x,
119+
produce an indicator vector L(x), where Li(x) = 1 if the example x 'falls' into the leaf #i, and 0 otherwise.</para>
120+
<para>Thus, for every example x, we produce a 10000-valued vector L, with exactly 100 1s and the rest zeroes.
121+
This 'leaf indicator' vector can be considered the ensemble-induced 'footprint' of the example.</para>
122+
<para>The 'distance' between two examples in the L-space is actually a Hamming distance, and is equal to the number of trees that do not distinguish the two examples.</para>
123+
<para>We could repeat the same thought process for the non-leaf, or internal, nodes of the trees (we know that each tree has exactly 99 of them in our 100-leaf example),
124+
and produce another indicator vector, N (size 9900), for each example, indicating the 'trajectory' of each example through each of the trees.</para>
125+
<para>The distance in the combined 19900-dimensional LN-space will be equal to the number of 'decisions' in all trees that 'agree' on the given pair of examples.</para>
126+
<para>The TreeLeafFeaturizer is also producing the third vector, T, which is defined as Ti(x) = output of tree #i on example x.</para>
127+
</remarks>
128+
<example>
129+
<code>
130+
pipeline.Add(new TreeLeafFeaturizer())
131+
</code>
132+
</example>
133+
</member>
76134

77135
</members>
78136
</docs>

src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
1111
<ProjectReference Include="..\Microsoft.ML.CpuMath\Microsoft.ML.CpuMath.csproj" />
1212
<ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
13+
<ProjectReference Include="..\Microsoft.ML.FastTree\Microsoft.ML.FastTree.csproj" />
1314
<ProjectReference Include="..\Microsoft.ML\Microsoft.ML.csproj" />
1415
</ItemGroup>
1516

src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,13 @@
2626

2727
namespace Microsoft.ML.Runtime.Learners
2828
{
29+
/// <include file='doc.xml' path='doc/members/member[@name="MultiClassNaiveBayesTrainer"]' />
2930
public sealed class MultiClassNaiveBayesTrainer : TrainerBase<RoleMappedData, MultiClassNaiveBayesPredictor>
3031
{
3132
public const string LoadName = "MultiClassNaiveBayes";
3233
internal const string UserName = "Multiclass Naive Bayes";
3334
internal const string ShortName = "MNB";
3435
internal const string Summary = "Trains a multiclass Naive Bayes predictor that supports binary feature values.";
35-
internal const string Remarks = @"<remarks>
36-
<a href ='https://en.wikipedia.org/wiki/Naive_Bayes_classifier'>Naive Bayes</a> is a probabilistic classifier that can be used for multiclass problems.
37-
Using Bayes' theorem, the conditional probability for a sample belonging to a class can be calculated based on the sample count for each feature combination groups.
38-
However, Naive Bayes Classifier is feasible only if the number of features and the values each feature can take is relatively small.
39-
It also assumes that the features are strictly independent.
40-
</remarks>";
4136

4237
public sealed class Arguments : LearnerInputBaseWithLabel
4338
{
@@ -132,7 +127,9 @@ public override MultiClassNaiveBayesPredictor CreatePredictor()
132127

133128
[TlcModule.EntryPoint(Name = "Trainers.NaiveBayesClassifier",
134129
Desc = "Train a MultiClassNaiveBayesTrainer.",
135-
UserName = UserName, ShortName = ShortName)]
130+
UserName = UserName,
131+
ShortName = ShortName,
132+
XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml' path='doc/members/member[@name=""MultiClassNaiveBayesTrainer""]'/>" } )]
136133
public static CommonOutputs.MulticlassClassificationOutput TrainMultiClassNaiveBayesTrainer(IHostEnvironment env, Arguments input)
137134
{
138135
Contracts.CheckValue(env, nameof(env));

src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ namespace Microsoft.ML.Runtime.Learners
3636
using TScalarPredictor = IPredictorProducing<Float>;
3737
using TScalarTrainer = ITrainer<RoleMappedData, IPredictorProducing<Float>>;
3838

39+
/// <include file='doc.xml' path='doc/members/member[@name="OVA"]' />
3940
public sealed class Ova : MetaMulticlassTrainer<OvaPredictor, Ova.Arguments>
4041
{
4142
internal const string LoadNameValue = "OVA";

src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,29 @@ namespace Microsoft.ML.Runtime.Learners
3131
using TDistPredictor = IDistPredictorProducing<Float, Float>;
3232
using CR = RoleMappedSchema.ColumnRole;
3333

34+
/// <summary>
35+
/// In this strategy, a binary classification algorithm is trained on each pair of classes.
36+
/// The pairs are unordered but created with replacement: so, if there were three classes, 0, 1,
37+
/// 2, we would train classifiers for the pairs (0,0), (0,1), (0,2), (1,1), (1,2),
38+
/// and(2,2). For each binary classifier, an input data point is considered a
39+
/// positive example if it is in either of the two classes in the pair, and a
40+
/// negative example otherwise.At prediction time, the probabilities for each
41+
/// pair of classes is considered as the probability of being in either class of
42+
/// the pair given the data, and the final predictive probabilities out of that
43+
/// per class are calculated given the probability that an example is in any given
44+
/// pair.
45+
///
46+
/// These two can allow you to exploit trainers that do not naturally have a
47+
/// multiclass option, e.g., using <see cref="Microsoft.ML.Runtime.FastTree.FastTreeBinaryClassificationTrainer"/>
48+
/// to solve a multiclass problem.
49+
/// Alternately, it can allow ML.NET to solve a "simpler" problem even in the cases
50+
/// where the trainer has a multiclass option, but using it directly is not
51+
/// practical due to, usually, memory constraints.For example, while a multiclass
52+
/// logistic regression is a more principled way to solve a multiclass problem, it
53+
/// requires that the learner store a lot more intermediate state in the form of
54+
/// L-BFGS history for all classes * simultaneously*, rather than just one-by-one
55+
/// as would be needed for OVA.
56+
/// </summary>
3457
public sealed class Pkpd : MetaMulticlassTrainer<PkpdPredictor, Pkpd.Arguments>
3558
{
3659
internal const string LoadNameValue = "PKPD";
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<docs>
3+
<members>
4+
5+
<member name="MultiClassNaiveBayesTrainer">
6+
<summary>
7+
Trains a multiclass Naive Bayes predictor that supports binary feature values.
8+
</summary>
9+
<remarks>
10+
<a href ='https://en.wikipedia.org/wiki/Naive_Bayes_classifier'>Naive Bayes</a> is a probabilistic classifier that can be used for multiclass problems.
11+
Using Bayes' theorem, the conditional probability for a sample belonging to a class can be calculated based on the sample count for each feature combination groups.
12+
However, Naive Bayes Classifier is feasible only if the number of features and the values each feature can take is relatively small.
13+
It assumes independence among the presence of features in a class even though they may be dependent on each other.
14+
This multi-class trainer accepts binary feature values of type float, i.e., feature values are either true or false.
15+
Specifically a feature value greater than zero is treated as true.
16+
These learner will request normalization from the data pipeline if the
17+
classifier indicates it would benefit from it. Note that even if the
18+
classifier indicates that it does not need caching, OVA will always
19+
request caching, as it will be performing multiple passes over the data set.
20+
</remarks>
21+
<seealso cref='LogisticRegressionClassifier'></seealso>
22+
<seealso cref='LightGbmClassifier'></seealso>
23+
<seealso cref='StochasticDualCoordinateAscentClassifier'></seealso>
24+
<seealso cref='OneVersusAll'></seealso>
25+
<example>
26+
<code>
27+
pipeline.Add(new NaiveBayesClassifier(){ NormalizeFeatures = NormalizeOption.Auto, Caching = CachingOptions.Memory });
28+
</code>
29+
</example>
30+
</member>
31+
32+
<member name="OVA">
33+
<summary>
34+
In this strategy, a binary classification algorithm is used to train one classifier for each class, which distinguishes that class from all other classes.
35+
Prediction is then performed by running these binary classifiers, and choosing the prediction with the highest confidence score.
36+
</summary>
37+
<remarks>
38+
<para>This algorithm can be treated as a wrapper for all the binary classifiers in ML.NET.
39+
A few binary classifiers already have implementation for multi-class problems,
40+
thus users can choose either one depending on the context.
41+
</para>
42+
<para>
43+
The OVA version of a binary classifier, such as wrapping a LightGbmBinaryClassifier ,
44+
can be different from LightGbmClassifier, which develops a multi-class classifier directly.
45+
</para>
46+
</remarks>
47+
<seealso cref='LogisticRegressionClassifier'></seealso>
48+
<seealso cref='LightGbmClassifier'></seealso>
49+
<seealso cref='StochasticDualCoordinateAscentClassifier'></seealso>
50+
<seealso cref='NaiveBayesClassifier'></seealso>
51+
<example>
52+
<code>
53+
pipeline.Add(OneVersusAll.With(new StochasticDualCoordinateAscentBinaryClassifier()));
54+
</code>
55+
</example>
56+
</member>
57+
58+
</members>
59+
</docs>

src/Microsoft.ML.Transforms/CountFeatureSelection.cs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,7 @@
1818

1919
namespace Microsoft.ML.Runtime.Data
2020
{
21-
/// <summary>
22-
/// Selects the slots for which the count of non-default values is greater than a threshold.
23-
/// Uses a set of aggregators to count the number of non-default values for each slot and
24-
/// instantiates a DropSlots transform to actually drop the slots.
25-
/// </summary>
21+
/// <include file='doc.xml' path='doc/members/member[@name="CountFeatureSelection"]' />
2622
public static class CountFeatureSelectionTransform
2723
{
2824
public const string Summary = "Selects the slots for which the count of non-default values is greater than or equal to a threshold.";

0 commit comments

Comments
 (0)