dotnet
diff --git a/‎src/Microsoft.ML.Data/Transforms/NAFilter.cs‎
Lines changed: 1 addition & 0 deletions b/‎src/Microsoft.ML.Data/Transforms/NAFilter.cs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/Microsoft.ML.Data/Transforms/doc.xml‎
Lines changed: 56 additions & 0 deletions b/‎src/Microsoft.ML.Data/Transforms/doc.xml‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎src/Microsoft.ML.FastTree/TreeEnsembleFeaturizer.cs‎
Lines changed: 6 additions & 1 deletion b/‎src/Microsoft.ML.FastTree/TreeEnsembleFeaturizer.cs‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎src/Microsoft.ML.FastTree/doc.xml‎
Lines changed: 58 additions & 0 deletions b/‎src/Microsoft.ML.FastTree/doc.xml‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj‎
Lines changed: 1 addition & 0 deletions b/‎src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs‎
Lines changed: 4 additions & 7 deletions b/‎src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs‎
Lines changed: 4 additions & 7 deletions
diff --git a/‎src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs‎
Lines changed: 1 addition & 0 deletions b/‎src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs‎
Lines changed: 23 additions & 0 deletions b/‎src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎src/Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml‎
Lines changed: 59 additions & 0 deletions b/‎src/Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎src/Microsoft.ML.Transforms/CountFeatureSelection.cs‎
Lines changed: 1 addition & 5 deletions b/‎src/Microsoft.ML.Transforms/CountFeatureSelection.cs‎
Lines changed: 1 addition & 5 deletions
@@ -26,6 +26,7 @@
 
 namespace Microsoft.ML.Runtime.Data
 {
+    /// <include file='doc.xml' path='doc/members/member[@name="NAFilter"]'/>
     public sealed class NAFilter : FilterBase
     {
         private static class Defaults
 
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<doc>
+  <members>
+    <member name="NAFilter">
+      <summary>
+        Removes missing values from vector type columns.
+      </summary>
+      <remarks>
+        This transform emoves the entire row if any of the input columns have a missing value in that row.
+        This preprocessing is required for many ML algorithms that cannot work with missing values.
+        Useful if any missing entry invalidates the entire row.
+        If the <see cref="Microsoft.ML.Runtime.Data.NAFilter.Defaults.Complement"/> is set to true, this transform would do the exact opposite,
+        it will keep only the rows that have missing values.
+      </remarks>
+      <seealso cref="Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"></seealso>
+      <example>
+        <code>
+          pipeline.Add(new MissingValuesRowDropper(&quot;Column1&quot;));
+        </code>
+      </example>
+    </member>
+
+    <member name="NAHandle">
+      <summary>
+        Handle missing values by replacing them with either the default value or the indicated value. 
+      </summary>
+      <remarks>
+        This transform handles missing values in the input columns. For each input column, it creates an output column
+         where the missing values are replaced by one of these specified values:
+         <list type="bullet">
+           <item><description>The default value of the appropriate type.</description></item>
+           <item><description>The mean value of the appropriate type.</description></item>
+           <item><description>The max value of the appropriate type.</description></item>
+           <item><description>The min value of the appropriate type.</description></item>
+         </list>
+         <para>The last three work only for numeric/TimeSpan/DateTime kind columns.</para>       
+         <para> The output column can also optionally include an indicator vector for which slots were missing in the input column.
+         This can be done only when the indicator vector type can be converted to the input column type, i.e. only for numeric columns.
+         </para>
+         <para>
+           When computing the mean/max/min value, there is also an option to compute it over the whole column instead of per slot.
+           This option has a default value of true for variable length vectors, and false for known length vectors. 
+           It can be changed to true for known length vectors, but it results in an error if changed to false for variable length vectors.
+         </para>
+      </remarks>
+      <seealso cref=" Microsoft.ML.Runtime.Data.MetadataUtils.Kinds.HasMissingValues"/>
+      <seealso cref="Microsoft.ML.Data.DataKind"/>
+      <example>
+        <code>
+          pipeline.Add(new MissingValueHandler(&quot;FeatureCol&quot;, &quot;CleanFeatureCol&quot;) { ReplaceWith  = NAHandleTransformReplacementKind.Mean });
+        </code>
+      </example>
+    </member>
+    
+  </members>
+</doc>
@@ -544,6 +544,7 @@ public ISchemaBoundMapper Bind(IHostEnvironment env, RoleMappedSchema schema)
         }
     }
 
+    /// <include file='doc.xml' path='doc/members/member[@name="TreeEnsembleFeaturizerTransform"]'/>
     public static class TreeEnsembleFeaturizerTransform
     {
         public sealed class Arguments : TrainAndScoreTransform.ArgumentsBase<SignatureTreeEnsembleTrainer>
@@ -802,7 +803,11 @@ private static IDataView AppendLabelTransform(IHostEnvironment env, IChannel ch,
 
     public static partial class TreeFeaturize
     {
-        [TlcModule.EntryPoint(Name = "Transforms.TreeLeafFeaturizer", Desc = TreeEnsembleFeaturizerTransform.TreeEnsembleSummary, UserName = TreeEnsembleFeaturizerTransform.UserName, ShortName = TreeEnsembleFeaturizerBindableMapper.LoadNameShort)]
+        [TlcModule.EntryPoint(Name = "Transforms.TreeLeafFeaturizer", 
+            Desc = TreeEnsembleFeaturizerTransform.TreeEnsembleSummary, 
+            UserName = TreeEnsembleFeaturizerTransform.UserName, 
+            ShortName = TreeEnsembleFeaturizerBindableMapper.LoadNameShort,
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name=""TreeEnsembleFeaturizerTransform""]'/>" })]
         public static CommonOutputs.TransformOutput Featurizer(IHostEnvironment env, TreeEnsembleFeaturizerTransform.ArgumentsForEntryPoint input)
         {
             Contracts.CheckValue(env, nameof(env));
 
@@ -73,6 +73,64 @@
         <para><a href='http://projecteuclid.org/DPubS?service=UI&amp;version=1.0&amp;verb=Display&amp;handle=euclid.aos/1013203451'>Greedy function approximation: A gradient boosting machine</a></para>
       </remarks>
     </member>
+
+    <member name="TreeEnsembleFeaturizerTransform">
+      <summary>
+        Trains a tree ensemble, or loads it from a file, then maps a numeric feature vector
+        to three outputs:
+        <list>
+          <item>
+            <description>A vector containing the individual tree outputs of the tree ensemble.</description>
+          </item>
+          <item>
+            <description>A vector indicating the leaves that the feature vector falls on in the tree ensemble.</description>
+          </item>
+          <item>
+            <description>A vector indicating the paths that the feature vector falls on in the tree ensemble.</description>
+          </item>
+        </list>
+        If a both a model file and a trainer are specified - will use the model file. If neither are specified, 
+        will train a default FastTree model. 
+        This can handle key labels by training a regression model towards their optionally permuted indices.
+      </summary>
+      <remarks>
+        In machine learning it is a pretty common and powerful approach to utilize the already trained model in the process of defining features.
+        <para>A most obvious example could be to use the model's scores as features to downstream models. For example, we might run clustering on the original features, 
+        and use the cluster distances as the new feature set.
+        Instead of consuming the model's output, we could go deeper, and extract the 'intermediate outputs' that are used to produce the final score. </para>
+        There's a number of famous or popular examples of this technique:
+        <list>
+          <item>
+            <description>A deep neural net trained on the ImageNet dataset, with the last layer removed, is commonly used to compute the 'projection' of the image into the 'semantic feature space'.
+            It is observed that the Euclidian distance in this space often correlates with the 'semantic similarity': that is, all pictures of pizza are located close together,
+            and far away from pictures of kittens. </description>
+          </item>
+          <item>
+            <description>A matrix factorization and/or LDA model is also often used to extract the 'latent topics' or 'latent features' associated with users and items.</description>
+          </item>
+          <item>
+            <description>The weights of the linear model are often used as a crude indicator of 'feature importance'. At the very minimum, the 0-weight features are not needed by the model,
+            and there's no reason to compute them. </description>
+          </item>
+        </list>
+        <para>Tree featurizer uses the decision tree ensembles for feature engineering in the same fashion as above.</para>
+        <para>Let's assume that we've built a tree ensemble of 100 trees with 100 leaves each (it doesn't matter whether boosting was used or not in training). 
+        If we associate each leaf of each tree with a sequential integer, we can, for every incoming example x, 
+        produce an indicator vector L(x), where Li(x) = 1 if the example x 'falls' into the leaf #i, and 0 otherwise.</para>
+        <para>Thus, for every example x, we produce a 10000-valued vector L, with exactly 100 1s and the rest zeroes. 
+        This 'leaf indicator' vector can be considered the ensemble-induced 'footprint' of the example.</para>
+        <para>The 'distance' between two examples in the L-space is actually a Hamming distance, and is equal to the number of trees that do not distinguish the two examples.</para>
+        <para>We could repeat the same thought process for the non-leaf, or internal, nodes of the trees (we know that each tree has exactly 99 of them in our 100-leaf example), 
+        and produce another indicator vector, N (size 9900), for each example, indicating the 'trajectory' of each example through each of the trees.</para>
+        <para>The distance in the combined 19900-dimensional LN-space will be equal to the number of 'decisions' in all trees that 'agree' on the given pair of examples.</para>
+        <para>The TreeLeafFeaturizer is also producing the third vector, T, which is defined as Ti(x) = output of tree #i on example x.</para>
+      </remarks>
+      <example>
+        <code>
+          pipeline.Add(new TreeLeafFeaturizer())
+        </code>
+      </example>
+    </member>
 
   </members>
 </docs>
@@ -10,6 +10,7 @@
     <ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
     <ProjectReference Include="..\Microsoft.ML.CpuMath\Microsoft.ML.CpuMath.csproj" />
     <ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
+    <ProjectReference Include="..\Microsoft.ML.FastTree\Microsoft.ML.FastTree.csproj" />
     <ProjectReference Include="..\Microsoft.ML\Microsoft.ML.csproj" />
   </ItemGroup>
 
 
@@ -26,18 +26,13 @@
 
 namespace Microsoft.ML.Runtime.Learners
 {
+    /// <include file='doc.xml' path='doc/members/member[@name="MultiClassNaiveBayesTrainer"]' /> 
     public sealed class MultiClassNaiveBayesTrainer : TrainerBase<RoleMappedData, MultiClassNaiveBayesPredictor>
     {
         public const string LoadName = "MultiClassNaiveBayes";
         internal const string UserName = "Multiclass Naive Bayes";
         internal const string ShortName = "MNB";
         internal const string Summary = "Trains a multiclass Naive Bayes predictor that supports binary feature values.";
-        internal const string Remarks = @"<remarks>
-<a href ='https://en.wikipedia.org/wiki/Naive_Bayes_classifier'>Naive Bayes</a> is a probabilistic classifier that can be used for multiclass problems. 
-Using Bayes' theorem, the conditional probability for a sample belonging to a class can be calculated based on the sample count for each feature combination groups.
-However, Naive Bayes Classifier is feasible only if the number of features and the values each feature can take is relatively small.
-It also assumes that the features are strictly independent.
-</remarks>";
 
         public sealed class Arguments : LearnerInputBaseWithLabel
         {
@@ -132,7 +127,9 @@ public override MultiClassNaiveBayesPredictor CreatePredictor()
 
         [TlcModule.EntryPoint(Name = "Trainers.NaiveBayesClassifier",
             Desc = "Train a MultiClassNaiveBayesTrainer.",
-            UserName = UserName, ShortName = ShortName)]
+            UserName = UserName, 
+            ShortName = ShortName, 
+            XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/MultiClass/doc.xml' path='doc/members/member[@name=""MultiClassNaiveBayesTrainer""]'/>" } )]
         public static CommonOutputs.MulticlassClassificationOutput TrainMultiClassNaiveBayesTrainer(IHostEnvironment env, Arguments input)
         {
             Contracts.CheckValue(env, nameof(env));
 
@@ -36,6 +36,7 @@ namespace Microsoft.ML.Runtime.Learners
     using TScalarPredictor = IPredictorProducing<Float>;
     using TScalarTrainer = ITrainer<RoleMappedData, IPredictorProducing<Float>>;
 
+    /// <include file='doc.xml' path='doc/members/member[@name="OVA"]' /> 
     public sealed class Ova : MetaMulticlassTrainer<OvaPredictor, Ova.Arguments>
     {
         internal const string LoadNameValue = "OVA";
 
@@ -31,6 +31,29 @@ namespace Microsoft.ML.Runtime.Learners
     using TDistPredictor = IDistPredictorProducing<Float, Float>;
     using CR = RoleMappedSchema.ColumnRole;
 
+    /// <summary>
+    /// In this strategy, a binary classification algorithm is trained on each pair of classes. 
+    /// The pairs are unordered but created with replacement: so, if there were three classes, 0, 1,
+    /// 2, we would train classifiers for the pairs (0,0), (0,1), (0,2), (1,1), (1,2),
+    /// and(2,2). For each binary classifier, an input data point is considered a
+    /// positive example if it is in either of the two classes in the pair, and a
+    /// negative example otherwise.At prediction time, the probabilities for each
+    /// pair of classes is considered as the probability of being in either class of
+    /// the pair given the data, and the final predictive probabilities out of that
+    /// per class are calculated given the probability that an example is in any given
+    /// pair.
+    ///
+    /// These two can allow you to exploit trainers that do not naturally have a
+    /// multiclass option, e.g., using <see cref="Microsoft.ML.Runtime.FastTree.FastTreeBinaryClassificationTrainer"/> 
+    /// to solve a multiclass problem.
+    /// Alternately, it can allow ML.NET to solve a "simpler" problem even in the cases
+    /// where the trainer has a multiclass option, but using it directly is not
+    /// practical due to, usually, memory constraints.For example, while a multiclass
+    /// logistic regression is a more principled way to solve a multiclass problem, it
+    /// requires that the learner store a lot more intermediate state in the form of
+    /// L-BFGS history for all classes * simultaneously*, rather than just one-by-one
+    /// as would be needed for OVA.
+    /// </summary>
     public sealed class Pkpd : MetaMulticlassTrainer<PkpdPredictor, Pkpd.Arguments>
     {
         internal const string LoadNameValue = "PKPD";
 
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="utf-8"?>
+<docs>
+  <members>
+
+    <member name="MultiClassNaiveBayesTrainer">
+      <summary>
+        Trains a multiclass Naive Bayes predictor that supports binary feature values.
+      </summary>
+      <remarks>
+        <a href ='https://en.wikipedia.org/wiki/Naive_Bayes_classifier'>Naive Bayes</a> is a probabilistic classifier that can be used for multiclass problems.
+        Using Bayes' theorem, the conditional probability for a sample belonging to a class can be calculated based on the sample count for each feature combination groups.
+        However, Naive Bayes Classifier is feasible only if the number of features and the values each feature can take is relatively small.
+        It assumes independence among the presence of features in a class even though they may be dependent on each other.
+        This multi-class trainer accepts binary feature values of type float, i.e., feature values are either true or false.
+        Specifically a feature value greater than zero is treated as true.
+        These learner will request normalization from the data pipeline if the
+        classifier indicates it would benefit from it. Note that even if the
+        classifier indicates that it does not need caching, OVA will always
+        request caching, as it will be performing multiple passes over the data set.
+      </remarks>
+      <seealso cref='LogisticRegressionClassifier'></seealso>
+      <seealso cref='LightGbmClassifier'></seealso>
+      <seealso cref='StochasticDualCoordinateAscentClassifier'></seealso>
+      <seealso cref='OneVersusAll'></seealso>
+      <example>
+        <code>
+          pipeline.Add(new NaiveBayesClassifier(){ NormalizeFeatures = NormalizeOption.Auto, Caching = CachingOptions.Memory });
+        </code>
+      </example>
+    </member>
+
+    <member name="OVA">
+      <summary>
+        In this strategy, a binary classification algorithm is used to train one classifier for each class, which distinguishes that class from all other classes.
+        Prediction is then performed by running these binary classifiers, and choosing the prediction with the highest confidence score.
+      </summary>
+      <remarks>
+        <para>This algorithm can be treated as a wrapper for all the binary classifiers in ML.NET. 
+        A few binary classifiers already have implementation for multi-class problems, 
+        thus users can choose either one depending on the context. 
+        </para>
+        <para>
+          The OVA version of a binary classifier, such as wrapping a LightGbmBinaryClassifier ,
+          can be different from LightGbmClassifier, which develops a multi-class classifier directly. 
+        </para>
+      </remarks>
+      <seealso cref='LogisticRegressionClassifier'></seealso>
+      <seealso cref='LightGbmClassifier'></seealso>
+      <seealso cref='StochasticDualCoordinateAscentClassifier'></seealso>
+      <seealso cref='NaiveBayesClassifier'></seealso>
+      <example>
+        <code>
+          pipeline.Add(OneVersusAll.With(new StochasticDualCoordinateAscentBinaryClassifier()));
+        </code>
+      </example>
+    </member>
+   
+  </members>
+</docs>
@@ -18,11 +18,7 @@
 
 namespace Microsoft.ML.Runtime.Data
 {
-    /// <summary>
-    /// Selects the slots for which the count of non-default values is greater than a threshold.
-    /// Uses a set of aggregators to count the number of non-default values for each slot and
-    /// instantiates a DropSlots transform to actually drop the slots.
-    /// </summary>
+    /// <include file='doc.xml' path='doc/members/member[@name="CountFeatureSelection"]' /> 
     public static class CountFeatureSelectionTransform
     {
         public const string Summary = "Selects the slots for which the count of non-default values is greater than or equal to a threshold.";
Original file line number	Diff line number	Diff line change
`@@ -26,6 +26,7 @@`
`26`	`26`
`27`	`27`	`namespace Microsoft.ML.Runtime.Data`
`28`	`28`	`{`
	`29`	`+ /// <include file='doc.xml' path='doc/members/member[@name="NAFilter"]'/>`
`29`	`30`	`public sealed class NAFilter : FilterBase`
`30`	`31`	`{`
`31`	`32`	`private static class Defaults`
Original file line number	Diff line number	Diff line change
`@@ -544,6 +544,7 @@ public ISchemaBoundMapper Bind(IHostEnvironment env, RoleMappedSchema schema)`
`544`	`544`	`}`
`545`	`545`	`}`
`546`	`546`
	`547`	`+ /// <include file='doc.xml' path='doc/members/member[@name="TreeEnsembleFeaturizerTransform"]'/>`
`547`	`548`	`public static class TreeEnsembleFeaturizerTransform`
`548`	`549`	`{`
`549`	`550`	`public sealed class Arguments : TrainAndScoreTransform.ArgumentsBase<SignatureTreeEnsembleTrainer>`
`@@ -802,7 +803,11 @@ private static IDataView AppendLabelTransform(IHostEnvironment env, IChannel ch,`
`802`	`803`
`803`	`804`	`public static partial class TreeFeaturize`
`804`	`805`	`{`
`805`		`- [TlcModule.EntryPoint(Name = "Transforms.TreeLeafFeaturizer", Desc = TreeEnsembleFeaturizerTransform.TreeEnsembleSummary, UserName = TreeEnsembleFeaturizerTransform.UserName, ShortName = TreeEnsembleFeaturizerBindableMapper.LoadNameShort)]`
	`806`	`+ [TlcModule.EntryPoint(Name = "Transforms.TreeLeafFeaturizer",`
	`807`	`+ Desc = TreeEnsembleFeaturizerTransform.TreeEnsembleSummary,`
	`808`	`+ UserName = TreeEnsembleFeaturizerTransform.UserName,`
	`809`	`+ ShortName = TreeEnsembleFeaturizerBindableMapper.LoadNameShort,`
	`810`	`+ XmlInclude = new[] { @"<include file='../Microsoft.ML.FastTree/doc.xml' path='doc/members/member[@name=""TreeEnsembleFeaturizerTransform""]'/>" })]`
`806`	`811`	`public static CommonOutputs.TransformOutput Featurizer(IHostEnvironment env, TreeEnsembleFeaturizerTransform.ArgumentsForEntryPoint input)`
`807`	`812`	`{`
`808`	`813`	`Contracts.CheckValue(env, nameof(env));`
Original file line number	Diff line number	Diff line change
`@@ -36,6 +36,7 @@ namespace Microsoft.ML.Runtime.Learners`
`36`	`36`	`using TScalarPredictor = IPredictorProducing<Float>;`
`37`	`37`	`using TScalarTrainer = ITrainer<RoleMappedData, IPredictorProducing<Float>>;`
`38`	`38`
	`39`	`+ /// <include file='doc.xml' path='doc/members/member[@name="OVA"]' />`
`39`	`40`	`public sealed class Ova : MetaMulticlassTrainer<OvaPredictor, Ova.Arguments>`
`40`	`41`	`{`
`41`	`42`	`internal const string LoadNameValue = "OVA";`
Original file line number	Diff line number	Diff line change
`@@ -18,11 +18,7 @@`
`18`	`18`
`19`	`19`	`namespace Microsoft.ML.Runtime.Data`
`20`	`20`	`{`
`21`		`- /// <summary>`
`22`		`- /// Selects the slots for which the count of non-default values is greater than a threshold.`
`23`		`- /// Uses a set of aggregators to count the number of non-default values for each slot and`
`24`		`- /// instantiates a DropSlots transform to actually drop the slots.`
`25`		`- /// </summary>`
	`21`	`+ /// <include file='doc.xml' path='doc/members/member[@name="CountFeatureSelection"]' />`
`26`	`22`	`public static class CountFeatureSelectionTransform`
`27`	`23`	`{`
`28`	`24`	`public const string Summary = "Selects the slots for which the count of non-default values is greater than or equal to a threshold.";`