Skip to content

Commit e3c2043

Browse files
Ivanidzo4kaShahab Moradi
authored andcommitted
Text catalog fixes (#3570)
* Typos as described in #3491 Changing "ngrams" to "n-gram" * changing ngram -> n-gram * typos * addressing PR comments * regenerating the manifest * restoring the argument short name * regenerating the manifest
1 parent 6e92a9e commit e3c2043

File tree

16 files changed

+232
-231
lines changed

16 files changed

+232
-231
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ public static void Example()
2424
// false ==RUDE== Dude, 2.
2525
// true Until the next game, this is the best Xbox game!
2626

27-
// A pipeline to tokenize text as characters and then combine them together into ngrams
27+
// A pipeline to tokenize text as characters and then combine them together into n-grams
2828
// The pipeline uses the default settings to featurize.
2929

3030
var charsPipeline = ml.Transforms.Text.TokenizeIntoCharactersAsKeys("Chars", "SentimentText", useMarkerCharacters: false);

src/Microsoft.ML.Data/Transforms/InvertHashUtils.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ public int GetHashCode(Pair obj)
166166
/// <param name="copier">For copying input values into a value to actually store. Useful for
167167
/// types of objects where it is possible to do a comparison relatively quickly on some sort
168168
/// of "unsafe" object, but for which when we decide to actually store it we need to provide
169-
/// a "safe" version of the object. Utilized in the ngram hash transform, for example.</param>
169+
/// a "safe" version of the object. Utilized in the n-gram hash transform, for example.</param>
170170
public InvertHashCollector(int slots, int maxCount, ValueMapper<T, StringBuilder> mapper,
171171
IEqualityComparer<T> comparer, ValueMapper<T, T> copier = null)
172172
{

src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -303,14 +303,14 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
303303
}
304304

305305
/// <summary>
306-
/// Produces a bag of counts of ngrams (sequences of consecutive words ) in a given text.
307-
/// It does so by building a dictionary of ngrams and using the id in the dictionary as the index in the bag.
306+
/// Produces a bag of counts of n-grams (sequences of consecutive words) in a given text.
307+
/// It does so by building a dictionary of n-grams and using the id in the dictionary as the index in the bag.
308308
/// </summary>
309309
/// <param name="input">The column to apply to.</param>
310310
/// <param name="ngramLength">Ngram length.</param>
311-
/// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
312-
/// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
313-
/// <param name="maximumNgramsCount">Maximum number of ngrams to store in the dictionary.</param>
311+
/// <param name="skipLength">Maximum number of tokens to skip when constructing an n-gram.</param>
312+
/// <param name="useAllLengths">Whether to include all n-gram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
313+
/// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
314314
/// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
315315
public static Vector<float> ProduceWordBags(this Scalar<string> input,
316316
int ngramLength = 1,
@@ -393,14 +393,14 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
393393
}
394394

395395
/// <summary>
396-
/// Produces a bag of counts of ngrams (sequences of consecutive words of length 1-n) in a given text.
397-
/// It does so by hashing each ngram and using the hash value as the index in the bag.
396+
/// Produces a bag of counts of n-grams (sequences of consecutive words of length 1-n) in a given text.
397+
/// It does so by hashing each n-gram and using the hash value as the index in the bag.
398398
/// </summary>
399399
/// <param name="input">The column to apply to.</param>
400400
/// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
401401
/// <param name="ngramLength">Ngram length.</param>
402-
/// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
403-
/// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
402+
/// <param name="skipLength">Maximum number of tokens to skip when constructing an n-gram.</param>
403+
/// <param name="useAllLengths">Whether to include all n-gram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
404404
/// <param name="seed">Hashing seed.</param>
405405
/// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
406406
/// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values.
@@ -418,7 +418,7 @@ public static Vector<float> ProduceHashedWordBags(this Scalar<string> input,
418418
}
419419

420420
/// <summary>
421-
/// Extensions for statically typed ngram estimator.
421+
/// Extensions for statically typed n-gram estimator.
422422
/// </summary>
423423
public static class NgramEstimatorStaticExtensions
424424
{
@@ -482,16 +482,16 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
482482
}
483483

484484
/// <summary>
485-
/// Produces a bag of counts of ngrams (sequences of consecutive words ) in a given tokenized text.
486-
/// It does so by building a dictionary of ngrams and using the id in the dictionary as the index in the bag.
485+
/// Produces a bag of counts of n-grams (sequences of consecutive words ) in a given tokenized text.
486+
/// It does so by building a dictionary of n-grams and using the id in the dictionary as the index in the bag.
487487
///
488488
/// /// <see cref="ProduceNgrams"/> is different from <see cref="WordBagEstimatorStaticExtensions.ProduceWordBags"/>
489489
/// in a way that <see cref="ProduceNgrams"/> takes tokenized text as input while <see cref="WordBagEstimatorStaticExtensions.ProduceWordBags"/> tokenizes text internally.
490490
/// </summary>
491491
/// <param name="input">The column to apply to.</param>
492492
/// <param name="ngramLength">Ngram length.</param>
493-
/// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
494-
/// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
493+
/// <param name="skipLength">Maximum number of tokens to skip when constructing an n-gram.</param>
494+
/// <param name="useAllLengths">Whether to include all n-gram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
495495
/// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
496496
/// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
497497
public static Vector<float> ProduceNgrams<TKey>(this VarVector<Key<TKey, string>> input,
@@ -504,7 +504,7 @@ public static Vector<float> ProduceNgrams<TKey>(this VarVector<Key<TKey, string>
504504
}
505505

506506
/// <summary>
507-
/// Extensions for statically typed ngram hash estimator.
507+
/// Extensions for statically typed n-gram hash estimator.
508508
/// </summary>
509509
public static class NgramHashEstimatorStaticExtensions
510510
{
@@ -568,17 +568,17 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
568568
}
569569

570570
/// <summary>
571-
/// Produces a bag of counts of ngrams (sequences of consecutive words of length 1-n) in a given tokenized text.
572-
/// It does so by hashing each ngram and using the hash value as the index in the bag.
571+
/// Produces a bag of counts of n-grams (sequences of n consecutive words of length 1-n) in a given tokenized text.
572+
/// It does so by hashing each n-gram and using the hash value as the index in the bag.
573573
///
574574
/// <see cref="ProduceHashedNgrams"/> is different from <see cref="WordHashBagEstimatorStaticExtensions.ProduceHashedWordBags"/>
575575
/// in a way that <see cref="ProduceHashedNgrams"/> takes tokenized text as input while <see cref="WordHashBagEstimatorStaticExtensions.ProduceHashedWordBags"/> tokenizes text internally.
576576
/// </summary>
577577
/// <param name="input">The column to apply to.</param>
578578
/// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
579579
/// <param name="ngramLength">Ngram length.</param>
580-
/// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
581-
/// <param name="useAllLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
580+
/// <param name="skipLength">Maximum number of tokens to skip when constructing an n-gram.</param>
581+
/// <param name="useAllLengths">Whether to include all n-gram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
582582
/// <param name="seed">Hashing seed.</param>
583583
/// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
584584
/// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values.

src/Microsoft.ML.StaticPipe/TransformsStatic.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1535,7 +1535,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
15351535
}
15361536
}
15371537
/// <summary>
1538-
/// Accept text data and converts it to array which represent combinations of ngram/skip-gram token counts.
1538+
/// Accept text data and converts it to array which represent combinations of n-gram/skip-gram token counts.
15391539
/// </summary>
15401540
/// <param name="input">Input data.</param>
15411541
/// <param name="otherInputs">Additional data.</param>

0 commit comments

Comments
 (0)