dotnet · shmoradims · Apr 25, 2019 · Apr 23, 2019 · Apr 23, 2019 · Apr 23, 2019
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
@@ -24,7 +24,7 @@ public static void Example()
             // false        ==RUDE== Dude, 2.
             // true          Until the next game, this is the best Xbox game!
 
-            // A pipeline to tokenize text as characters and then combine them together into ngrams
+            // A pipeline to tokenize text as characters and then combine them together into n-grams
             // The pipeline uses the default settings to featurize.
 
             var charsPipeline = ml.Transforms.Text.TokenizeIntoCharactersAsKeys("Chars", "SentimentText", useMarkerCharacters: false);

diff --git a/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs b/src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs
@@ -166,7 +166,7 @@ public int GetHashCode(Pair obj)
         /// <param name="copier">For copying input values into a value to actually store. Useful for
         /// types of objects where it is possible to do a comparison relatively quickly on some sort
         /// of "unsafe" object, but for which when we decide to actually store it we need to provide
-        /// a "safe" version of the object. Utilized in the ngram hash transform, for example.</param>
+        /// a "safe" version of the object. Utilized in the n-gram hash transform, for example.</param>
         public InvertHashCollector(int slots, int maxCount, ValueMapper<T, StringBuilder> mapper,
             IEqualityComparer<T> comparer, ValueMapper<T, T> copier = null)
         {

diff --git a/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
@@ -303,14 +303,14 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         }
 
         /// <summary>
-        /// Produces a bag of counts of ngrams (sequences of consecutive words ) in a given text.
-        /// It does so by building a dictionary of ngrams and using the id in the dictionary as the index in the bag.
+        /// Produces a bag of counts of n-grams (sequences of consecutive words) in a given text.
+        /// It does so by building a dictionary of n-grams and using the id in the dictionary as the index in the bag.
         /// </summary>
         /// <param name="input">The column to apply to.</param>
         /// <param name="ngramLength">Ngram length.</param>
-        /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
-        /// <param name="maximumNgramsCount">Maximum number of ngrams to store in the dictionary.</param>
+        /// <param name="skipLength">Maximum number of tokens to skip when constructing an n-gram.</param>
+        /// <param name="useAllLengths">Whether to include all n-gram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         public static Vector<float> ProduceWordBags(this Scalar<string> input,
             int ngramLength = 1,
@@ -393,14 +393,14 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         }
 
         /// <summary>
-        /// Produces a bag of counts of ngrams (sequences of consecutive words of length 1-n) in a given text.
-        /// It does so by hashing each ngram and using the hash value as the index in the bag.
+        /// Produces a bag of counts of n-grams (sequences of consecutive words of length 1-n) in a given text.
+        /// It does so by hashing each n-gram and using the hash value as the index in the bag.
         /// </summary>
         /// <param name="input">The column to apply to.</param>
         /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
-        /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="skipLength">Maximum number of tokens to skip when constructing an n-gram.</param>
+        /// <param name="useAllLengths">Whether to include all n-gram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
         /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values.
@@ -418,7 +418,7 @@ public static Vector<float> ProduceHashedWordBags(this Scalar<string> input,
     }
 
     /// <summary>
-    /// Extensions for statically typed ngram estimator.
+    /// Extensions for statically typed n-gram estimator.
     /// </summary>
     public static class NgramEstimatorStaticExtensions
     {
@@ -482,16 +482,16 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         }
 
         /// <summary>
-        /// Produces a bag of counts of ngrams (sequences of consecutive words ) in a given tokenized text.
-        /// It does so by building a dictionary of ngrams and using the id in the dictionary as the index in the bag.
+        /// Produces a bag of counts of n-grams (sequences of consecutive words ) in a given tokenized text.
+        /// It does so by building a dictionary of n-grams and using the id in the dictionary as the index in the bag.
         ///
         /// /// <see cref="ProduceNgrams"/> is different from <see cref="WordBagEstimatorStaticExtensions.ProduceWordBags"/>
         /// in a way that <see cref="ProduceNgrams"/> takes tokenized text as input while <see cref="WordBagEstimatorStaticExtensions.ProduceWordBags"/> tokenizes text internally.
         /// </summary>
         /// <param name="input">The column to apply to.</param>
         /// <param name="ngramLength">Ngram length.</param>
-        /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="skipLength">Maximum number of tokens to skip when constructing an n-gram.</param>
+        /// <param name="useAllLengths">Whether to include all n-gram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
         /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
         public static Vector<float> ProduceNgrams<TKey>(this VarVector<Key<TKey, string>> input,
@@ -504,7 +504,7 @@ public static Vector<float> ProduceNgrams<TKey>(this VarVector<Key<TKey, string>
     }
 
     /// <summary>
-    /// Extensions for statically typed ngram hash estimator.
+    /// Extensions for statically typed n-gram hash estimator.
     /// </summary>
     public static class NgramHashEstimatorStaticExtensions
     {
@@ -568,17 +568,17 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
         }
 
         /// <summary>
-        /// Produces a bag of counts of ngrams (sequences of consecutive words of length 1-n) in a given tokenized text.
-        /// It does so by hashing each ngram and using the hash value as the index in the bag.
+        /// Produces a bag of counts of n-grams (sequences of n consecutive words of length 1-n) in a given tokenized text.
+        /// It does so by hashing each n-gram and using the hash value as the index in the bag.
         ///
         /// <see cref="ProduceHashedNgrams"/> is different from <see cref="WordHashBagEstimatorStaticExtensions.ProduceHashedWordBags"/>
         /// in a way that <see cref="ProduceHashedNgrams"/> takes tokenized text as input while <see cref="WordHashBagEstimatorStaticExtensions.ProduceHashedWordBags"/> tokenizes text internally.
         /// </summary>
         /// <param name="input">The column to apply to.</param>
         /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
         /// <param name="ngramLength">Ngram length.</param>
-        /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
-        /// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
+        /// <param name="skipLength">Maximum number of tokens to skip when constructing an n-gram.</param>
+        /// <param name="useAllLengths">Whether to include all n-gram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
         /// <param name="seed">Hashing seed.</param>
         /// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
         /// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values.

diff --git a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs
@@ -1535,7 +1535,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
             }
         }
         /// <summary>
-        /// Accept text data and converts it to array which represent combinations of ngram/skip-gram token counts.
+        /// Accept text data and converts it to array which represent combinations of n-gram/skip-gram token counts.
         /// </summary>
         /// <param name="input">Input data.</param>
         /// <param name="otherInputs">Additional data.</param>