Skip to content

Commit 26127b2

Browse files
committed
Address comments
1 parent 062d70c commit 26127b2

File tree

4 files changed

+43
-64
lines changed

4 files changed

+43
-64
lines changed

src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -443,15 +443,15 @@ private sealed class Reconciler : EstimatorReconciler, IEquatable<Reconciler>
443443
private readonly int _ngramLength;
444444
private readonly int _skipLength;
445445
private readonly bool _allLengths;
446-
private readonly int _maxNumTerms;
446+
private readonly int _maxNgramsCount;
447447
private readonly NgramExtractingEstimator.WeightingCriteria _weighting;
448448

449449
public Reconciler(int ngramLength, int skipLength, bool allLengths, int maxNumTerms, NgramExtractingEstimator.WeightingCriteria weighting)
450450
{
451451
_ngramLength = ngramLength;
452452
_skipLength = skipLength;
453453
_allLengths = allLengths;
454-
_maxNumTerms = maxNumTerms;
454+
_maxNgramsCount = maxNumTerms;
455455
_weighting = weighting;
456456

457457
}
@@ -461,7 +461,7 @@ public bool Equals(Reconciler other)
461461
return _ngramLength == other._ngramLength &&
462462
_skipLength == other._skipLength &&
463463
_allLengths == other._allLengths &&
464-
_maxNumTerms == other._maxNumTerms &&
464+
_maxNgramsCount == other._maxNgramsCount &&
465465
_weighting == other._weighting;
466466
}
467467

@@ -477,7 +477,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
477477
foreach (var outCol in toOutput)
478478
pairs.Add((outputNames[outCol], inputNames[((OutPipelineColumn)outCol).Input]));
479479

480-
return new NgramExtractingEstimator(env, pairs.ToArray(), _ngramLength, _skipLength, _allLengths, _maxNumTerms, _weighting);
480+
return new NgramExtractingEstimator(env, pairs.ToArray(), _ngramLength, _skipLength, _allLengths, _maxNgramsCount, _weighting);
481481
}
482482
}
483483

@@ -492,15 +492,15 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
492492
/// <param name="ngramLength">Ngram length.</param>
493493
/// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
494494
/// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
495-
/// <param name="maximumTermCount">Maximum number of ngrams to store in the dictionary.</param>
495+
/// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
496496
/// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
497497
public static Vector<float> ToNgrams<TKey>(this VarVector<Key<TKey, string>> input,
498498
int ngramLength = 1,
499499
int skipLength = 0,
500500
bool allLengths = true,
501-
int maximumTermCount = 10000000,
501+
int maximumNgramsCount = 10000000,
502502
NgramExtractingEstimator.WeightingCriteria weighting = NgramExtractingEstimator.WeightingCriteria.Tf)
503-
=> new OutPipelineColumn(input, ngramLength, skipLength, allLengths, maximumTermCount, weighting);
503+
=> new OutPipelineColumn(input, ngramLength, skipLength, allLengths, maximumNgramsCount, weighting);
504504
}
505505

506506
/// <summary>

src/Microsoft.ML.Transforms/Text/NgramTransform.cs

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ internal sealed class Options : TransformInputBase
9393
public int SkipLength = NgramExtractingEstimator.Defaults.SkipLength;
9494

9595
[Argument(ArgumentType.Multiple, HelpText = "Maximum number of ngrams to store in the dictionary", ShortName = "max")]
96-
public int[] MaxNumTerms = new int[] { NgramExtractingEstimator.Defaults.MaximumTermCount };
96+
public int[] MaxNumTerms = new int[] { NgramExtractingEstimator.Defaults.MaximumNgramsCount };
9797

9898
[Argument(ArgumentType.AtMostOnce, HelpText = "The weighting criteria")]
9999
public NgramExtractingEstimator.WeightingCriteria Weighting = NgramExtractingEstimator.Defaults.Weighting;
@@ -253,7 +253,7 @@ private static SequencePool[] Train(IHostEnvironment env, NgramExtractingEstimat
253253
// Note: GetNgramIdFinderAdd will control how many ngrams of a specific length will
254254
// be added (using lims[iinfo]), therefore we set slotLim to the maximum
255255
helpers[iinfo] = new NgramBufferBuilder(ngramLength, skipLength, Utils.ArrayMaxSize,
256-
GetNgramIdFinderAdd(env, counts[iinfo], columns[iinfo].MaximumTermCounts, ngramMaps[iinfo], transformInfos[iinfo].RequireIdf));
256+
GetNgramIdFinderAdd(env, counts[iinfo], columns[iinfo].MaximumNgramsCounts, ngramMaps[iinfo], transformInfos[iinfo].RequireIdf));
257257
}
258258

259259
int cInfoFull = 0;
@@ -293,7 +293,7 @@ private static SequencePool[] Train(IHostEnvironment env, NgramExtractingEstimat
293293
}
294294
}
295295
}
296-
AssertValid(env, counts[iinfo], columns[iinfo].MaximumTermCounts, ngramMaps[iinfo]);
296+
AssertValid(env, counts[iinfo], columns[iinfo].MaximumNgramsCounts, ngramMaps[iinfo]);
297297
}
298298
}
299299

@@ -307,7 +307,7 @@ private static SequencePool[] Train(IHostEnvironment env, NgramExtractingEstimat
307307

308308
for (int iinfo = 0; iinfo < columns.Length; iinfo++)
309309
{
310-
AssertValid(env, counts[iinfo], columns[iinfo].MaximumTermCounts, ngramMaps[iinfo]);
310+
AssertValid(env, counts[iinfo], columns[iinfo].MaximumNgramsCounts, ngramMaps[iinfo]);
311311

312312
int ngramLength = transformInfos[iinfo].NgramLength;
313313
for (int i = 0; i < ngramLength; i++)
@@ -695,7 +695,7 @@ internal static class Defaults
695695
public const int NgramLength = 2;
696696
public const bool AllLengths = true;
697697
public const int SkipLength = 0;
698-
public const int MaximumTermCount = 10000000;
698+
public const int MaximumNgramsCount = 10000000;
699699
public const WeightingCriteria Weighting = WeightingCriteria.Tf;
700700
}
701701

@@ -712,16 +712,16 @@ internal static class Defaults
712712
/// <param name="ngramLength">Ngram length.</param>
713713
/// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
714714
/// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
715-
/// <param name="maximumTermCount">Maximum number of ngrams to store in the dictionary.</param>
715+
/// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
716716
/// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
717717
internal NgramExtractingEstimator(IHostEnvironment env,
718718
string outputColumnName, string inputColumnName = null,
719719
int ngramLength = Defaults.NgramLength,
720720
int skipLength = Defaults.SkipLength,
721721
bool allLengths = Defaults.AllLengths,
722-
int maximumTermCount = Defaults.MaximumTermCount,
722+
int maximumNgramsCount = Defaults.MaximumNgramsCount,
723723
WeightingCriteria weighting = Defaults.Weighting)
724-
: this(env, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }, ngramLength, skipLength, allLengths, maximumTermCount, weighting)
724+
: this(env, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }, ngramLength, skipLength, allLengths, maximumNgramsCount, weighting)
725725
{
726726
}
727727

@@ -734,16 +734,16 @@ internal NgramExtractingEstimator(IHostEnvironment env,
734734
/// <param name="ngramLength">Ngram length.</param>
735735
/// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
736736
/// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
737-
/// <param name="maximumTermCount">Maximum number of ngrams to store in the dictionary.</param>
737+
/// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
738738
/// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
739739
internal NgramExtractingEstimator(IHostEnvironment env,
740740
(string outputColumnName, string inputColumnName)[] columns,
741741
int ngramLength = Defaults.NgramLength,
742742
int skipLength = Defaults.SkipLength,
743743
bool allLengths = Defaults.AllLengths,
744-
int maximumTermCount = Defaults.MaximumTermCount,
744+
int maximumNgramsCount = Defaults.MaximumNgramsCount,
745745
WeightingCriteria weighting = Defaults.Weighting)
746-
: this(env, columns.Select(x => new ColumnOptions(x.outputColumnName, x.inputColumnName, ngramLength, skipLength, allLengths, weighting, maximumTermCount)).ToArray())
746+
: this(env, columns.Select(x => new ColumnOptions(x.outputColumnName, x.inputColumnName, ngramLength, skipLength, allLengths, weighting, maximumNgramsCount)).ToArray())
747747
{
748748
}
749749

@@ -809,14 +809,14 @@ public sealed class ColumnOptions
809809
/// <summary>The weighting criteria.</summary>
810810
public readonly WeightingCriteria Weighting;
811811
/// <summary>
812-
/// Underlying state of <see cref="MaximumTermCounts"/>.
812+
/// Underlying state of <see cref="MaximumNgramsCounts"/>.
813813
/// </summary>
814-
private readonly ImmutableArray<int> _maximumTermCounts;
814+
private readonly ImmutableArray<int> _maximumNgramsCounts;
815815
/// <summary>
816-
/// Contains the maximum number of grams to store in the dictionary, for each level of ngrams,
817-
/// from 1 (in position 0) up to ngramLength (in position ngramLength-1)
816+
/// Contains the maximum number of terms (that is, n-grams) to store in the dictionary, for each level of n-grams,
817+
/// from n=1 (in position 0) up to n=<see cref="NgramLength"/> (in position <see cref="NgramLength"/>-1)
818818
/// </summary>
819-
public IReadOnlyList<int> MaximumTermCounts => _maximumTermCounts;
819+
public IReadOnlyList<int> MaximumNgramsCounts => _maximumNgramsCounts;
820820

821821
/// <summary>
822822
/// Describes how the transformer handles one Gcn column pair.
@@ -827,14 +827,14 @@ public sealed class ColumnOptions
827827
/// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
828828
/// <param name="allLengths">Whether to store all ngram lengths up to ngramLength, or only ngramLength.</param>
829829
/// <param name="weighting">The weighting criteria.</param>
830-
/// <param name="maximumTermCount">Maximum number of ngrams to store in the dictionary.</param>
830+
/// <param name="maximumNgramsCount">Maximum number of n-grams to store in the dictionary.</param>
831831
public ColumnOptions(string name, string inputColumnName = null,
832832
int ngramLength = Defaults.NgramLength,
833833
int skipLength = Defaults.SkipLength,
834834
bool allLengths = Defaults.AllLengths,
835835
WeightingCriteria weighting = Defaults.Weighting,
836-
int maximumTermCount = Defaults.MaximumTermCount)
837-
: this(name, ngramLength, skipLength, allLengths, weighting, new int[] { maximumTermCount }, inputColumnName ?? name)
836+
int maximumNgramsCount = Defaults.MaximumNgramsCount)
837+
: this(name, ngramLength, skipLength, allLengths, weighting, new int[] { maximumNgramsCount }, inputColumnName ?? name)
838838
{
839839
}
840840

@@ -843,7 +843,7 @@ internal ColumnOptions(string name,
843843
int skipLength,
844844
bool allLengths,
845845
WeightingCriteria weighting,
846-
int[] maximumTermCounts,
846+
int[] maximumNgramsCounts,
847847
string inputColumnName = null)
848848
{
849849
Name = name;
@@ -861,18 +861,18 @@ internal ColumnOptions(string name,
861861
var limits = new int[ngramLength];
862862
if (!AllLengths)
863863
{
864-
Contracts.CheckUserArg(Utils.Size(maximumTermCounts) == 0 ||
865-
Utils.Size(maximumTermCounts) == 1 && maximumTermCounts[0] > 0, nameof(maximumTermCounts));
866-
limits[ngramLength - 1] = Utils.Size(maximumTermCounts) == 0 ? Defaults.MaximumTermCount : maximumTermCounts[0];
864+
Contracts.CheckUserArg(Utils.Size(maximumNgramsCounts) == 0 ||
865+
Utils.Size(maximumNgramsCounts) == 1 && maximumNgramsCounts[0] > 0, nameof(maximumNgramsCounts));
866+
limits[ngramLength - 1] = Utils.Size(maximumNgramsCounts) == 0 ? Defaults.MaximumNgramsCount : maximumNgramsCounts[0];
867867
}
868868
else
869869
{
870-
Contracts.CheckUserArg(Utils.Size(maximumTermCounts) <= ngramLength, nameof(maximumTermCounts));
871-
Contracts.CheckUserArg(Utils.Size(maximumTermCounts) == 0 || maximumTermCounts.All(i => i >= 0) && maximumTermCounts[maximumTermCounts.Length - 1] > 0, nameof(maximumTermCounts));
872-
var extend = Utils.Size(maximumTermCounts) == 0 ? Defaults.MaximumTermCount : maximumTermCounts[maximumTermCounts.Length - 1];
873-
limits = Utils.BuildArray(ngramLength, i => i < Utils.Size(maximumTermCounts) ? maximumTermCounts[i] : extend);
870+
Contracts.CheckUserArg(Utils.Size(maximumNgramsCounts) <= ngramLength, nameof(maximumNgramsCounts));
871+
Contracts.CheckUserArg(Utils.Size(maximumNgramsCounts) == 0 || maximumNgramsCounts.All(i => i >= 0) && maximumNgramsCounts[maximumNgramsCounts.Length - 1] > 0, nameof(maximumNgramsCounts));
872+
var extend = Utils.Size(maximumNgramsCounts) == 0 ? Defaults.MaximumNgramsCount : maximumNgramsCounts[maximumNgramsCounts.Length - 1];
873+
limits = Utils.BuildArray(ngramLength, i => i < Utils.Size(maximumNgramsCounts) ? maximumNgramsCounts[i] : extend);
874874
}
875-
_maximumTermCounts = ImmutableArray.Create(limits);
875+
_maximumNgramsCounts = ImmutableArray.Create(limits);
876876
}
877877
}
878878

0 commit comments

Comments
 (0)