Skip to content

Commit dd4477a

Browse files
committed
Address one more comment
1 parent 26127b2 commit dd4477a

File tree

3 files changed

+5
-5
lines changed

3 files changed

+5
-5
lines changed

src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -485,16 +485,16 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
485485
/// Produces a bag of counts of ngrams (sequences of consecutive words ) in a given tokenized text.
486486
/// It does so by building a dictionary of ngrams and using the id in the dictionary as the index in the bag.
487487
///
488-
/// /// <see cref="ToNgrams"/> is different from <see cref="WordBagEstimatorStaticExtensions.ToBagofWords"/>
489-
/// in a way that <see cref="ToNgrams"/> takes tokenized text as input while <see cref="WordBagEstimatorStaticExtensions.ToBagofWords"/> tokenizes text internally.
488+
/// /// <see cref="ProduceNgrams"/> is different from <see cref="WordBagEstimatorStaticExtensions.ToBagofWords"/>
489+
/// in a way that <see cref="ProduceNgrams"/> takes tokenized text as input while <see cref="WordBagEstimatorStaticExtensions.ToBagofWords"/> tokenizes text internally.
490490
/// </summary>
491491
/// <param name="input">The column to apply to.</param>
492492
/// <param name="ngramLength">Ngram length.</param>
493493
/// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
494494
/// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
495495
/// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
496496
/// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
497-
public static Vector<float> ToNgrams<TKey>(this VarVector<Key<TKey, string>> input,
497+
public static Vector<float> ProduceNgrams<TKey>(this VarVector<Key<TKey, string>> input,
498498
int ngramLength = 1,
499499
int skipLength = 0,
500500
bool allLengths = true,

test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,7 @@ public void Ngrams()
604604
var est = data.MakeNewEstimator()
605605
.Append(r => (
606606
r.label,
607-
ngrams: r.text.TokenizeText().ToKey().ToNgrams(),
607+
ngrams: r.text.TokenizeText().ToKey().ProduceNgrams(),
608608
ngramshash: r.text.TokenizeText().ToKey().ApplyNgramHashing()));
609609

610610
var tdata = est.Fit(data).Transform(data);

test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ private void TextFeaturizationOn(string dataPath)
467467
BagOfBigrams: r.Message.NormalizeText().ToBagofHashedWords(ngramLength: 2, allLengths: false),
468468

469469
// NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
470-
BagOfTrichar: r.Message.TokenizeIntoCharacters().ToNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),
470+
BagOfTrichar: r.Message.TokenizeIntoCharacters().ProduceNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),
471471

472472
// NLP pipeline 4: word embeddings.
473473
// PretrainedModelKind.Sswe is used here for performance of the test. In a real

0 commit comments

Comments
 (0)