From 80ee90a2e1cf0436f3d41edadf75af8efe7fc132 Mon Sep 17 00:00:00 2001
From: Zeeshan Ahmed <zeahmed@microsoft.com>
Date: Tue, 2 Apr 2019 13:04:02 -0700
Subject: [PATCH 1/8] Created samples for 'ProduceNgrams' and
 'ProduceHashedNgrams' APIs.

---
 .../Transforms/Text/ProduceHashedNgrams.cs    | 69 +++++++++++++++
 .../Dynamic/Transforms/Text/ProduceNgrams.cs  | 88 +++++++++++++++++++
 .../Text/TextCatalog.cs                       |  9 +-
 3 files changed, 165 insertions(+), 1 deletion(-)
 create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
 create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
new file mode 100644
index 0000000000..516695deef
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
@@ -0,0 +1,69 @@
+﻿using System;
+using System.Collections.Generic;
+using Microsoft.ML.Data;
+using Microsoft.ML.Transforms.Text;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+    public static class ProduceHashedNgrams
+    {
+        public static void Example()
+        {
+            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
+            // as well as the source of randomness.
+            var mlContext = new MLContext();
+
+            // Create a small dataset as an IEnumerable.
+            var samples = new List<TextData>()
+            {
+                new TextData(){ Text = "This is an example to compute Ngrams using hashing." },
+                new TextData(){ Text = "Ngram is a sequence of 'N' consecutive words/tokens." },
+                new TextData(){ Text = "ML.NET's ProduceHashedNgrams API produces count of Ngrams and hashes it as an index into a vector of given bit length." },
+                new TextData(){ Text = "The hashing schem reduces the size of the output feature vector" },
+                new TextData(){ Text = "which is useful in case when number of Ngrams is very large." },
+            };
+
+            // Convert training data to IDataView.
+            var dataview = mlContext.Data.LoadFromEnumerable(samples);
+
+            // A pipeline for converting text into numeric hashed Ngram features.
+            // The following call to 'ProduceHashedNgrams' requires the tokenized text/string as input.
+            // This is acheived by calling 'TokenizeIntoWords' first followed by 'ProduceHashedNgrams'.
+            // Please note that the length of the output feature vector depends on the 'numberOfBits' settings.
+            var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text")
+                .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
+                .Append(mlContext.Transforms.Text.ProduceHashedNgrams("NgramFeatures", "Tokens", numberOfBits: 8, ngramLength: 3, useAllLengths: false));
+            
+            // Fit to data.
+            var textTransformer = textPipeline.Fit(dataview);
+
+            // Create the prediction engine to get the features extracted from the text.
+            var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer);
+
+            // Convert the text into numeric features.
+            var prediction = predictionEngine.Predict(samples[0]);
+
+            // Print the length of the feature vector.
+            Console.WriteLine($"Number of Features: {prediction.NgramFeatures.Length}");
+
+            // Print the first 10 feature values.
+            Console.Write("Features: ");
+            for (int i = 0; i < 10; i++)
+                Console.Write($"{prediction.NgramFeatures[i]:F4}  ");
+
+            //  Expected output:
+            //   Number of Features: 256
+            //   Features:    0.0000  0.0000  1.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000
+        }
+
+        public class TextData
+        {
+            public string Text { get; set; }
+        }
+
+        public class TransformedTextData : TextData
+        {
+            public float[] NgramFeatures { get; set; }
+        }
+    }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
new file mode 100644
index 0000000000..884ce2ad58
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
@@ -0,0 +1,88 @@
+﻿using System;
+using System.Collections.Generic;
+using Microsoft.ML.Data;
+using Microsoft.ML.Transforms.Text;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+    public static class ProduceNgrams
+    {
+        public static void Example()
+        {
+            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
+            // as well as the source of randomness.
+            var mlContext = new MLContext();
+
+            // Create a small dataset as an IEnumerable.
+            var samples = new List<TextData>()
+            {
+                new TextData(){ Text = "This is an example to compute Ngrams." },
+                new TextData(){ Text = "Ngram is a sequence of 'N' consecutive words/tokens." },
+                new TextData(){ Text = "ML.NET's ProduceNgrams API produces vector of Ngrams." },
+                new TextData(){ Text = "Each position in the vector corresponds to a particular Ngram." },
+                new TextData(){ Text = "The value at each position corresponds to," },
+                new TextData(){ Text = "the number of times Ngram occured in the data (Tf), or" },
+                new TextData(){ Text = "the inverse of the number of documents that contain the Ngram (Idf), or." },
+                new TextData(){ Text = "or compute both and multipy together (Tf-Idf)." },
+            };
+
+            // Convert training data to IDataView.
+            var dataview = mlContext.Data.LoadFromEnumerable(samples);
+
+            // A pipeline for converting text into numeric Ngram features.
+            // The following call to 'ProduceNgrams' requires the tokenized text/string as input.
+            // This is acheived by calling 'TokenizeIntoWords' first followed by 'ProduceNgrams'.
+            // Please note that the length of the output feature vector depends on the Ngram settings.
+            var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text")
+                .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
+                .Append(mlContext.Transforms.Text.ProduceNgrams("NgramFeatures", "Tokens",
+                ngramLength: 3, useAllLengths: false, weighting: NgramExtractingEstimator.WeightingCriteria.Tf));
+            
+            // Fit to data.
+            var textTransformer = textPipeline.Fit(dataview);
+            var transformedDataView = textTransformer.Transform(dataview);
+
+            // Create the prediction engine to get the Ngram features extracted from the text.
+            var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer);
+
+            // Convert the text into numeric features.
+            var prediction = predictionEngine.Predict(samples[0]);
+
+            // Print the length of the feature vector.
+            Console.WriteLine($"Number of Features: {prediction.NgramFeatures.Length}");
+
+            // Preview of the produced .
+            VBuffer<ReadOnlyMemory<char>> slotNames = default;
+            transformedDataView.Schema["NgramFeatures"].GetSlotNames(ref slotNames);
+            var NgramFeaturesColumn = transformedDataView.GetColumn<VBuffer<float>>(transformedDataView.Schema["NgramFeatures"]);
+            var slots = slotNames.GetValues();
+            Console.Write("Ngrams: ");
+            foreach (var featureRow in NgramFeaturesColumn)
+            {
+                foreach (var item in featureRow.Items())
+                    Console.Write($"{slots[item.Key]}  ");
+                Console.WriteLine();
+            }
+
+            // Print the first 10 feature values.
+            Console.Write("Features: ");
+            for (int i = 0; i < 10; i++)
+                Console.Write($"{prediction.NgramFeatures[i]:F4}  ");
+
+            //  Expected output:
+            //   Number of Features: 332
+            //   Ngrams:   This|is|an  is|an|example  an|example|to  example|to|compute  to|compute|Ngrams.  Ngram|is|a  is|a|sequence  a|sequence|of  sequence|of|'N'  of|'N'|consecutive  ...
+            //   Features:    1.0000      1.0000          1.0000           1.0000             1.0000            0.0000      0.0000          0.0000          0.0000          0.0000          ...
+        }
+
+        public class TextData
+        {
+            public string Text { get; set; }
+        }
+
+        public class TransformedTextData : TextData
+        {
+            public float[] NgramFeatures { get; set; }
+        }
+    }
+}
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 86dafb8807..3b7e3c70dd 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -217,7 +217,7 @@ internal static WordTokenizingEstimator TokenizeIntoWords(this TransformsCatalog
         /// <example>
         /// <format type="text/markdown">
         /// <![CDATA[
-        /// [!code-csharp[LpNormalize](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs?range=1-5,11-74)]
+        /// [!code-csharp[ProduceNgrams](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs)]
         /// ]]>
         /// </format>
         /// </example>
@@ -450,6 +450,13 @@ public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.T
         /// <paramref name="maximumNumberOfInverts"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
         /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
         /// <param name="rehashUnigrams">Whether to rehash unigrams.</param>
+        /// <example>
+        /// <format type="text/markdown">
+        /// <![CDATA[
+        /// [!code-csharp[ProduceHashedNgrams](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs)]
+        /// ]]>
+        /// </format>
+        /// </example>
         public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
             string[] inputColumnNames = null,

From fe6503335b1a563d5aa14e7d8f91fdec2a83b752 Mon Sep 17 00:00:00 2001
From: Zeeshan Ahmed <zeahmed@microsoft.com>
Date: Wed, 3 Apr 2019 10:46:41 -0700
Subject: [PATCH 2/8] Addressed reviewers' comments.

---
 .../Dynamic/Transforms/Text/ProduceNgrams.cs          | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
index 884ce2ad58..87a8a5e410 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
@@ -23,7 +23,7 @@ public static void Example()
                 new TextData(){ Text = "The value at each position corresponds to," },
                 new TextData(){ Text = "the number of times Ngram occured in the data (Tf), or" },
                 new TextData(){ Text = "the inverse of the number of documents that contain the Ngram (Idf), or." },
-                new TextData(){ Text = "or compute both and multipy together (Tf-Idf)." },
+                new TextData(){ Text = "or compute both and multiply together (Tf-Idf)." },
             };
 
             // Convert training data to IDataView.
@@ -34,9 +34,12 @@ public static void Example()
             // This is acheived by calling 'TokenizeIntoWords' first followed by 'ProduceNgrams'.
             // Please note that the length of the output feature vector depends on the Ngram settings.
             var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text")
+                // 'ProduceNgrams' takes key type as input. Converting the tokens into key type using 'MapValueToKey'.
                 .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
                 .Append(mlContext.Transforms.Text.ProduceNgrams("NgramFeatures", "Tokens",
-                ngramLength: 3, useAllLengths: false, weighting: NgramExtractingEstimator.WeightingCriteria.Tf));
+                    ngramLength: 3,
+                    useAllLengths: false,
+                    weighting: NgramExtractingEstimator.WeightingCriteria.Tf));
             
             // Fit to data.
             var textTransformer = textPipeline.Fit(dataview);
@@ -51,7 +54,9 @@ public static void Example()
             // Print the length of the feature vector.
             Console.WriteLine($"Number of Features: {prediction.NgramFeatures.Length}");
 
-            // Preview of the produced .
+            // Preview of the produced Ngrams.
+            // Get the slot names from the column's metadata.
+            // If the column is a vector column the slot names corresponds to the names associated with each position in the vector.
             VBuffer<ReadOnlyMemory<char>> slotNames = default;
             transformedDataView.Schema["NgramFeatures"].GetSlotNames(ref slotNames);
             var NgramFeaturesColumn = transformedDataView.GetColumn<VBuffer<float>>(transformedDataView.Schema["NgramFeatures"]);

From 4ddaf5ccfa1fcd4966660b406174064ddb233cec Mon Sep 17 00:00:00 2001
From: Zeeshan Ahmed <zeahmed@microsoft.com>
Date: Wed, 3 Apr 2019 13:57:22 -0700
Subject: [PATCH 3/8] Addressed reviewers' comments.

---
 .../Dynamic/Transforms/Text/ProduceHashedNgrams.cs    | 11 +++++++----
 .../Dynamic/Transforms/Text/ProduceNgrams.cs          |  4 ++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
index 516695deef..419573292c 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
@@ -19,7 +19,7 @@ public static void Example()
                 new TextData(){ Text = "This is an example to compute Ngrams using hashing." },
                 new TextData(){ Text = "Ngram is a sequence of 'N' consecutive words/tokens." },
                 new TextData(){ Text = "ML.NET's ProduceHashedNgrams API produces count of Ngrams and hashes it as an index into a vector of given bit length." },
-                new TextData(){ Text = "The hashing schem reduces the size of the output feature vector" },
+                new TextData(){ Text = "The hashing reduces the size of the output feature vector" },
                 new TextData(){ Text = "which is useful in case when number of Ngrams is very large." },
             };
 
@@ -32,7 +32,10 @@ public static void Example()
             // Please note that the length of the output feature vector depends on the 'numberOfBits' settings.
             var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text")
                 .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
-                .Append(mlContext.Transforms.Text.ProduceHashedNgrams("NgramFeatures", "Tokens", numberOfBits: 8, ngramLength: 3, useAllLengths: false));
+                .Append(mlContext.Transforms.Text.ProduceHashedNgrams("NgramFeatures", "Tokens", 
+                        numberOfBits: 5,
+                        ngramLength: 3,
+                        useAllLengths: false, maximumNumberOfInverts: -1));
             
             // Fit to data.
             var textTransformer = textPipeline.Fit(dataview);
@@ -52,8 +55,8 @@ public static void Example()
                 Console.Write($"{prediction.NgramFeatures[i]:F4}  ");
 
             //  Expected output:
-            //   Number of Features: 256
-            //   Features:    0.0000  0.0000  1.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000
+            //   Number of Features:  32
+            //   Features:  0.0000  0.0000  2.0000  0.0000  0.0000  1.0000  0.0000  0.0000  1.0000  0.0000
         }
 
         public class TextData
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
index 87a8a5e410..a5606a25ef 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
@@ -22,7 +22,7 @@ public static void Example()
                 new TextData(){ Text = "Each position in the vector corresponds to a particular Ngram." },
                 new TextData(){ Text = "The value at each position corresponds to," },
                 new TextData(){ Text = "the number of times Ngram occured in the data (Tf), or" },
-                new TextData(){ Text = "the inverse of the number of documents that contain the Ngram (Idf), or." },
+                new TextData(){ Text = "the inverse of the number of documents that contain the Ngram (Idf)," },
                 new TextData(){ Text = "or compute both and multiply together (Tf-Idf)." },
             };
 
@@ -75,7 +75,7 @@ public static void Example()
                 Console.Write($"{prediction.NgramFeatures[i]:F4}  ");
 
             //  Expected output:
-            //   Number of Features: 332
+            //   Number of Features: 52
             //   Ngrams:   This|is|an  is|an|example  an|example|to  example|to|compute  to|compute|Ngrams.  Ngram|is|a  is|a|sequence  a|sequence|of  sequence|of|'N'  of|'N'|consecutive  ...
             //   Features:    1.0000      1.0000          1.0000           1.0000             1.0000            0.0000      0.0000          0.0000          0.0000          0.0000          ...
         }

From 4db1cd7c39a09663bf5f0689f50eb649ebf6e4e5 Mon Sep 17 00:00:00 2001
From: Zeeshan Ahmed <zeahmed@microsoft.com>
Date: Wed, 3 Apr 2019 13:59:14 -0700
Subject: [PATCH 4/8] Addressed reviewers' comments.

---
 .../Dynamic/Transforms/Text/ProduceHashedNgrams.cs            | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
index 419573292c..465dae8671 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
@@ -33,9 +33,7 @@ public static void Example()
             var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text")
                 .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
                 .Append(mlContext.Transforms.Text.ProduceHashedNgrams("NgramFeatures", "Tokens", 
-                        numberOfBits: 5,
-                        ngramLength: 3,
-                        useAllLengths: false, maximumNumberOfInverts: -1));
+                        numberOfBits: 5, ngramLength: 3, useAllLengths: false));
             
             // Fit to data.
             var textTransformer = textPipeline.Fit(dataview);

From 12e6d9d540710348b0b90d3d89e332aa5fc2338a Mon Sep 17 00:00:00 2001
From: Zeeshan Ahmed <zeahmed@microsoft.com>
Date: Wed, 3 Apr 2019 16:08:03 -0700
Subject: [PATCH 5/8] Addressed reviewers' comments.

---
 .../Transforms/Text/ProduceHashedNgrams.cs    | 24 +++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
index 465dae8671..ee6e1de642 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
@@ -33,10 +33,14 @@ public static void Example()
             var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text")
                 .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
                 .Append(mlContext.Transforms.Text.ProduceHashedNgrams("NgramFeatures", "Tokens", 
-                        numberOfBits: 5, ngramLength: 3, useAllLengths: false));
+                        numberOfBits: 5,
+                        ngramLength: 3,
+                        useAllLengths: false,
+                        maximumNumberOfInverts: 1));
             
             // Fit to data.
             var textTransformer = textPipeline.Fit(dataview);
+            var transformedDataView = textTransformer.Transform(dataview);
 
             // Create the prediction engine to get the features extracted from the text.
             var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer);
@@ -47,6 +51,21 @@ public static void Example()
             // Print the length of the feature vector.
             Console.WriteLine($"Number of Features: {prediction.NgramFeatures.Length}");
 
+            // Preview of the produced Ngrams.
+            // Get the slot names from the column's metadata.
+            // If the column is a vector column the slot names corresponds to the names associated with each position in the vector.
+            VBuffer<ReadOnlyMemory<char>> slotNames = default;
+            transformedDataView.Schema["NgramFeatures"].GetSlotNames(ref slotNames);
+            var NgramFeaturesColumn = transformedDataView.GetColumn<VBuffer<float>>(transformedDataView.Schema["NgramFeatures"]);
+            var slots = slotNames.GetValues();
+            Console.Write("Ngrams: ");
+            foreach (var featureRow in NgramFeaturesColumn)
+            {
+                foreach (var item in featureRow.Items())
+                    Console.Write($"{slots[item.Key]}  ");
+                Console.WriteLine();
+            }
+
             // Print the first 10 feature values.
             Console.Write("Features: ");
             for (int i = 0; i < 10; i++)
@@ -54,7 +73,8 @@ public static void Example()
 
             //  Expected output:
             //   Number of Features:  32
-            //   Features:  0.0000  0.0000  2.0000  0.0000  0.0000  1.0000  0.0000  0.0000  1.0000  0.0000
+            //   Ngrams:   This|is|an  example|to|compute  compute|Ngrams|using  Ngrams|using|hashing.  an|example|to  is|an|example  a|sequence|of  of|'N'|consecutive  is|a|sequence  Ngram|is|a  ...
+            //   Features:   0.0000          0.0000               2.0000               0.0000               0.0000        1.0000          0.0000        0.0000              1.0000          0.0000  ...
         }
 
         public class TextData

From 91eed9c1f34509e7c672af7e9c6f7af00b3194eb Mon Sep 17 00:00:00 2001
From: Zeeshan Ahmed <zeahmed@microsoft.com>
Date: Wed, 3 Apr 2019 16:16:11 -0700
Subject: [PATCH 6/8] Addressed reviewers' comments.

---
 .../Transforms/Text/ProduceHashedNgrams.cs    | 18 ++++++-------
 .../Dynamic/Transforms/Text/ProduceNgrams.cs  | 26 +++++++++----------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
index ee6e1de642..6cbdd8b471 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
@@ -16,17 +16,17 @@ public static void Example()
             // Create a small dataset as an IEnumerable.
             var samples = new List<TextData>()
             {
-                new TextData(){ Text = "This is an example to compute Ngrams using hashing." },
-                new TextData(){ Text = "Ngram is a sequence of 'N' consecutive words/tokens." },
-                new TextData(){ Text = "ML.NET's ProduceHashedNgrams API produces count of Ngrams and hashes it as an index into a vector of given bit length." },
+                new TextData(){ Text = "This is an example to compute n-grams using hashing." },
+                new TextData(){ Text = "N-gram is a sequence of 'N' consecutive words/tokens." },
+                new TextData(){ Text = "ML.NET's ProduceHashedNgrams API produces count of n-grams and hashes it as an index into a vector of given bit length." },
                 new TextData(){ Text = "The hashing reduces the size of the output feature vector" },
-                new TextData(){ Text = "which is useful in case when number of Ngrams is very large." },
+                new TextData(){ Text = "which is useful in case when number of n-grams is very large." },
             };
 
             // Convert training data to IDataView.
             var dataview = mlContext.Data.LoadFromEnumerable(samples);
 
-            // A pipeline for converting text into numeric hashed Ngram features.
+            // A pipeline for converting text into numeric hashed n-gram features.
             // The following call to 'ProduceHashedNgrams' requires the tokenized text/string as input.
             // This is acheived by calling 'TokenizeIntoWords' first followed by 'ProduceHashedNgrams'.
             // Please note that the length of the output feature vector depends on the 'numberOfBits' settings.
@@ -51,14 +51,14 @@ public static void Example()
             // Print the length of the feature vector.
             Console.WriteLine($"Number of Features: {prediction.NgramFeatures.Length}");
 
-            // Preview of the produced Ngrams.
+            // Preview of the produced n-grams.
             // Get the slot names from the column's metadata.
             // If the column is a vector column the slot names corresponds to the names associated with each position in the vector.
             VBuffer<ReadOnlyMemory<char>> slotNames = default;
             transformedDataView.Schema["NgramFeatures"].GetSlotNames(ref slotNames);
             var NgramFeaturesColumn = transformedDataView.GetColumn<VBuffer<float>>(transformedDataView.Schema["NgramFeatures"]);
             var slots = slotNames.GetValues();
-            Console.Write("Ngrams: ");
+            Console.Write("N-grams: ");
             foreach (var featureRow in NgramFeaturesColumn)
             {
                 foreach (var item in featureRow.Items())
@@ -73,8 +73,8 @@ public static void Example()
 
             //  Expected output:
             //   Number of Features:  32
-            //   Ngrams:   This|is|an  example|to|compute  compute|Ngrams|using  Ngrams|using|hashing.  an|example|to  is|an|example  a|sequence|of  of|'N'|consecutive  is|a|sequence  Ngram|is|a  ...
-            //   Features:   0.0000          0.0000               2.0000               0.0000               0.0000        1.0000          0.0000        0.0000              1.0000          0.0000  ...
+            //   N-grams:   This|is|an  example|to|compute  compute|n-grams|using  n-grams|using|hashing.  an|example|to  is|an|example  a|sequence|of  of|'N'|consecutive  is|a|sequence  N-gram|is|a  ...
+            //   Features:    0.0000          0.0000               2.0000               0.0000               0.0000        1.0000          0.0000        0.0000              1.0000          0.0000  ...
         }
 
         public class TextData
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
index a5606a25ef..40653043e1 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
@@ -16,23 +16,23 @@ public static void Example()
             // Create a small dataset as an IEnumerable.
             var samples = new List<TextData>()
             {
-                new TextData(){ Text = "This is an example to compute Ngrams." },
-                new TextData(){ Text = "Ngram is a sequence of 'N' consecutive words/tokens." },
-                new TextData(){ Text = "ML.NET's ProduceNgrams API produces vector of Ngrams." },
-                new TextData(){ Text = "Each position in the vector corresponds to a particular Ngram." },
+                new TextData(){ Text = "This is an example to compute n-grams." },
+                new TextData(){ Text = "N-gram is a sequence of 'N' consecutive words/tokens." },
+                new TextData(){ Text = "ML.NET's ProduceNgrams API produces vector of n-grams." },
+                new TextData(){ Text = "Each position in the vector corresponds to a particular n-gram." },
                 new TextData(){ Text = "The value at each position corresponds to," },
-                new TextData(){ Text = "the number of times Ngram occured in the data (Tf), or" },
-                new TextData(){ Text = "the inverse of the number of documents that contain the Ngram (Idf)," },
+                new TextData(){ Text = "the number of times n-gram occured in the data (Tf), or" },
+                new TextData(){ Text = "the inverse of the number of documents that contain the n-gram (Idf)," },
                 new TextData(){ Text = "or compute both and multiply together (Tf-Idf)." },
             };
 
             // Convert training data to IDataView.
             var dataview = mlContext.Data.LoadFromEnumerable(samples);
 
-            // A pipeline for converting text into numeric Ngram features.
+            // A pipeline for converting text into numeric n-gram features.
             // The following call to 'ProduceNgrams' requires the tokenized text/string as input.
             // This is acheived by calling 'TokenizeIntoWords' first followed by 'ProduceNgrams'.
-            // Please note that the length of the output feature vector depends on the Ngram settings.
+            // Please note that the length of the output feature vector depends on the n-gram settings.
             var textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text")
                 // 'ProduceNgrams' takes key type as input. Converting the tokens into key type using 'MapValueToKey'.
                 .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
@@ -45,7 +45,7 @@ public static void Example()
             var textTransformer = textPipeline.Fit(dataview);
             var transformedDataView = textTransformer.Transform(dataview);
 
-            // Create the prediction engine to get the Ngram features extracted from the text.
+            // Create the prediction engine to get the n-gram features extracted from the text.
             var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(textTransformer);
 
             // Convert the text into numeric features.
@@ -54,14 +54,14 @@ public static void Example()
             // Print the length of the feature vector.
             Console.WriteLine($"Number of Features: {prediction.NgramFeatures.Length}");
 
-            // Preview of the produced Ngrams.
+            // Preview of the produced n-grams.
             // Get the slot names from the column's metadata.
             // If the column is a vector column the slot names corresponds to the names associated with each position in the vector.
             VBuffer<ReadOnlyMemory<char>> slotNames = default;
             transformedDataView.Schema["NgramFeatures"].GetSlotNames(ref slotNames);
             var NgramFeaturesColumn = transformedDataView.GetColumn<VBuffer<float>>(transformedDataView.Schema["NgramFeatures"]);
             var slots = slotNames.GetValues();
-            Console.Write("Ngrams: ");
+            Console.Write("N-grams: ");
             foreach (var featureRow in NgramFeaturesColumn)
             {
                 foreach (var item in featureRow.Items())
@@ -76,8 +76,8 @@ public static void Example()
 
             //  Expected output:
             //   Number of Features: 52
-            //   Ngrams:   This|is|an  is|an|example  an|example|to  example|to|compute  to|compute|Ngrams.  Ngram|is|a  is|a|sequence  a|sequence|of  sequence|of|'N'  of|'N'|consecutive  ...
-            //   Features:    1.0000      1.0000          1.0000           1.0000             1.0000            0.0000      0.0000          0.0000          0.0000          0.0000          ...
+            //   N-grams:   This|is|an  is|an|example  an|example|to  example|to|compute  to|compute|n-grams.  N-gram|is|a  is|a|sequence  a|sequence|of  sequence|of|'N'  of|'N'|consecutive  ...
+            //   Features:     1.0000      1.0000          1.0000           1.0000             1.0000            0.0000      0.0000          0.0000          0.0000          0.0000          ...
         }
 
         public class TextData

From c177c6b0a33e1b1fda874b5518be78dfa94bf1b5 Mon Sep 17 00:00:00 2001
From: Zeeshan Ahmed <zeahmed@microsoft.com>
Date: Thu, 4 Apr 2019 10:31:53 -0700
Subject: [PATCH 7/8] Changed input/output classes to private.

---
 .../Dynamic/Transforms/Text/ProduceHashedNgrams.cs            | 4 ++--
 .../Dynamic/Transforms/Text/ProduceNgrams.cs                  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
index 6cbdd8b471..404ee7e0e0 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
@@ -77,12 +77,12 @@ public static void Example()
             //   Features:    0.0000          0.0000               2.0000               0.0000               0.0000        1.0000          0.0000        0.0000              1.0000          0.0000  ...
         }
 
-        public class TextData
+        private class TextData
         {
             public string Text { get; set; }
         }
 
-        public class TransformedTextData : TextData
+        private class TransformedTextData : TextData
         {
             public float[] NgramFeatures { get; set; }
         }
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
index 40653043e1..742f297205 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
@@ -80,12 +80,12 @@ public static void Example()
             //   Features:     1.0000      1.0000          1.0000           1.0000             1.0000            0.0000      0.0000          0.0000          0.0000          0.0000          ...
         }
 
-        public class TextData
+        private class TextData
         {
             public string Text { get; set; }
         }
 
-        public class TransformedTextData : TextData
+        private class TransformedTextData : TextData
         {
             public float[] NgramFeatures { get; set; }
         }

From 8b5001cefe0ee982ef4e361af331795e71f114e4 Mon Sep 17 00:00:00 2001
From: Zeeshan Ahmed <zeahmed@microsoft.com>
Date: Thu, 4 Apr 2019 12:29:27 -0700
Subject: [PATCH 8/8] Addressed reviewers' comments.

---
 .../Dynamic/Transforms/Text/ProduceHashedNgrams.cs              | 2 +-
 .../Dynamic/Transforms/Text/ProduceNgrams.cs                    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
index 404ee7e0e0..952c751309 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceHashedNgrams.cs
@@ -53,7 +53,7 @@ public static void Example()
 
             // Preview of the produced n-grams.
             // Get the slot names from the column's metadata.
-            // If the column is a vector column the slot names corresponds to the names associated with each position in the vector.
+            // The slot names for a vector column corresponds to the names associated with each position in the vector.
             VBuffer<ReadOnlyMemory<char>> slotNames = default;
             transformedDataView.Schema["NgramFeatures"].GetSlotNames(ref slotNames);
             var NgramFeaturesColumn = transformedDataView.GetColumn<VBuffer<float>>(transformedDataView.Schema["NgramFeatures"]);
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
index 742f297205..dd26441b06 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/ProduceNgrams.cs
@@ -56,7 +56,7 @@ public static void Example()
 
             // Preview of the produced n-grams.
             // Get the slot names from the column's metadata.
-            // If the column is a vector column the slot names corresponds to the names associated with each position in the vector.
+            // The slot names for a vector column corresponds to the names associated with each position in the vector.
             VBuffer<ReadOnlyMemory<char>> slotNames = default;
             transformedDataView.Schema["NgramFeatures"].GetSlotNames(ref slotNames);
             var NgramFeaturesColumn = transformedDataView.GetColumn<VBuffer<float>>(transformedDataView.Schema["NgramFeatures"]);