@@ -93,7 +93,7 @@ internal sealed class Options : TransformInputBase
9393 public int SkipLength = NgramExtractingEstimator . Defaults . SkipLength ;
9494
9595 [ Argument ( ArgumentType . Multiple , HelpText = "Maximum number of ngrams to store in the dictionary" , ShortName = "max" ) ]
96- public int [ ] MaxNumTerms = new int [ ] { NgramExtractingEstimator . Defaults . MaximumTermCount } ;
96+ public int [ ] MaxNumTerms = new int [ ] { NgramExtractingEstimator . Defaults . MaximumNgramsCount } ;
9797
9898 [ Argument ( ArgumentType . AtMostOnce , HelpText = "The weighting criteria" ) ]
9999 public NgramExtractingEstimator . WeightingCriteria Weighting = NgramExtractingEstimator . Defaults . Weighting ;
@@ -253,7 +253,7 @@ private static SequencePool[] Train(IHostEnvironment env, NgramExtractingEstimat
253253 // Note: GetNgramIdFinderAdd will control how many ngrams of a specific length will
254254 // be added (using lims[iinfo]), therefore we set slotLim to the maximum
255255 helpers [ iinfo ] = new NgramBufferBuilder ( ngramLength , skipLength , Utils . ArrayMaxSize ,
256- GetNgramIdFinderAdd ( env , counts [ iinfo ] , columns [ iinfo ] . MaximumTermCounts , ngramMaps [ iinfo ] , transformInfos [ iinfo ] . RequireIdf ) ) ;
256+ GetNgramIdFinderAdd ( env , counts [ iinfo ] , columns [ iinfo ] . MaximumNgramsCounts , ngramMaps [ iinfo ] , transformInfos [ iinfo ] . RequireIdf ) ) ;
257257 }
258258
259259 int cInfoFull = 0 ;
@@ -293,7 +293,7 @@ private static SequencePool[] Train(IHostEnvironment env, NgramExtractingEstimat
293293 }
294294 }
295295 }
296- AssertValid ( env , counts [ iinfo ] , columns [ iinfo ] . MaximumTermCounts , ngramMaps [ iinfo ] ) ;
296+ AssertValid ( env , counts [ iinfo ] , columns [ iinfo ] . MaximumNgramsCounts , ngramMaps [ iinfo ] ) ;
297297 }
298298 }
299299
@@ -307,7 +307,7 @@ private static SequencePool[] Train(IHostEnvironment env, NgramExtractingEstimat
307307
308308 for ( int iinfo = 0 ; iinfo < columns . Length ; iinfo ++ )
309309 {
310- AssertValid ( env , counts [ iinfo ] , columns [ iinfo ] . MaximumTermCounts , ngramMaps [ iinfo ] ) ;
310+ AssertValid ( env , counts [ iinfo ] , columns [ iinfo ] . MaximumNgramsCounts , ngramMaps [ iinfo ] ) ;
311311
312312 int ngramLength = transformInfos [ iinfo ] . NgramLength ;
313313 for ( int i = 0 ; i < ngramLength ; i ++ )
@@ -695,7 +695,7 @@ internal static class Defaults
695695 public const int NgramLength = 2 ;
696696 public const bool AllLengths = true ;
697697 public const int SkipLength = 0 ;
698- public const int MaximumTermCount = 10000000 ;
698+ public const int MaximumNgramsCount = 10000000 ;
699699 public const WeightingCriteria Weighting = WeightingCriteria . Tf ;
700700 }
701701
@@ -712,16 +712,16 @@ internal static class Defaults
712712 /// <param name="ngramLength">Ngram length.</param>
713713 /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
714714 /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
715- /// <param name="maximumTermCount ">Maximum number of ngrams to store in the dictionary.</param>
715+ /// <param name="maximumNgramsCount ">Maximum number of n-grams to store in the dictionary.</param>
716716 /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
717717 internal NgramExtractingEstimator ( IHostEnvironment env ,
718718 string outputColumnName , string inputColumnName = null ,
719719 int ngramLength = Defaults . NgramLength ,
720720 int skipLength = Defaults . SkipLength ,
721721 bool allLengths = Defaults . AllLengths ,
722- int maximumTermCount = Defaults . MaximumTermCount ,
722+ int maximumNgramsCount = Defaults . MaximumNgramsCount ,
723723 WeightingCriteria weighting = Defaults . Weighting )
724- : this ( env , new [ ] { ( outputColumnName , inputColumnName ?? outputColumnName ) } , ngramLength , skipLength , allLengths , maximumTermCount , weighting )
724+ : this ( env , new [ ] { ( outputColumnName , inputColumnName ?? outputColumnName ) } , ngramLength , skipLength , allLengths , maximumNgramsCount , weighting )
725725 {
726726 }
727727
@@ -734,16 +734,16 @@ internal NgramExtractingEstimator(IHostEnvironment env,
734734 /// <param name="ngramLength">Ngram length.</param>
735735 /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
736736 /// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
737- /// <param name="maximumTermCount ">Maximum number of ngrams to store in the dictionary.</param>
737+ /// <param name="maximumNgramsCount ">Maximum number of n-grams to store in the dictionary.</param>
738738 /// <param name="weighting">Statistical measure used to evaluate how important a word is to a document in a corpus.</param>
739739 internal NgramExtractingEstimator ( IHostEnvironment env ,
740740 ( string outputColumnName , string inputColumnName ) [ ] columns ,
741741 int ngramLength = Defaults . NgramLength ,
742742 int skipLength = Defaults . SkipLength ,
743743 bool allLengths = Defaults . AllLengths ,
744- int maximumTermCount = Defaults . MaximumTermCount ,
744+ int maximumNgramsCount = Defaults . MaximumNgramsCount ,
745745 WeightingCriteria weighting = Defaults . Weighting )
746- : this ( env , columns . Select ( x => new ColumnOptions ( x . outputColumnName , x . inputColumnName , ngramLength , skipLength , allLengths , weighting , maximumTermCount ) ) . ToArray ( ) )
746+ : this ( env , columns . Select ( x => new ColumnOptions ( x . outputColumnName , x . inputColumnName , ngramLength , skipLength , allLengths , weighting , maximumNgramsCount ) ) . ToArray ( ) )
747747 {
748748 }
749749
@@ -809,14 +809,14 @@ public sealed class ColumnOptions
809809 /// <summary>The weighting criteria.</summary>
810810 public readonly WeightingCriteria Weighting ;
811811 /// <summary>
812- /// Underlying state of <see cref="MaximumTermCounts "/>.
812+ /// Underlying state of <see cref="MaximumNgramsCounts "/>.
813813 /// </summary>
814- private readonly ImmutableArray < int > _maximumTermCounts ;
814+ private readonly ImmutableArray < int > _maximumNgramsCounts ;
815815 /// <summary>
816- /// Contains the maximum number of grams to store in the dictionary, for each level of ngrams ,
817- /// from 1 (in position 0) up to ngramLength (in position ngramLength -1)
816+ /// Contains the maximum number of terms (that is, n- grams) to store in the dictionary, for each level of n-grams ,
817+ /// from n= 1 (in position 0) up to n=<see cref="NgramLength"/> (in position <see cref="NgramLength"/> -1)
818818 /// </summary>
819- public IReadOnlyList < int > MaximumTermCounts => _maximumTermCounts ;
819+ public IReadOnlyList < int > MaximumNgramsCounts => _maximumNgramsCounts ;
820820
821821 /// <summary>
822822 /// Describes how the transformer handles one Gcn column pair.
@@ -827,14 +827,14 @@ public sealed class ColumnOptions
827827 /// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
828828 /// <param name="allLengths">Whether to store all ngram lengths up to ngramLength, or only ngramLength.</param>
829829 /// <param name="weighting">The weighting criteria.</param>
830- /// <param name="maximumTermCount ">Maximum number of ngrams to store in the dictionary.</param>
830+ /// <param name="maximumNgramsCount ">Maximum number of n-grams to store in the dictionary.</param>
831831 public ColumnOptions ( string name , string inputColumnName = null ,
832832 int ngramLength = Defaults . NgramLength ,
833833 int skipLength = Defaults . SkipLength ,
834834 bool allLengths = Defaults . AllLengths ,
835835 WeightingCriteria weighting = Defaults . Weighting ,
836- int maximumTermCount = Defaults . MaximumTermCount )
837- : this ( name , ngramLength , skipLength , allLengths , weighting , new int [ ] { maximumTermCount } , inputColumnName ?? name )
836+ int maximumNgramsCount = Defaults . MaximumNgramsCount )
837+ : this ( name , ngramLength , skipLength , allLengths , weighting , new int [ ] { maximumNgramsCount } , inputColumnName ?? name )
838838 {
839839 }
840840
@@ -843,7 +843,7 @@ internal ColumnOptions(string name,
843843 int skipLength ,
844844 bool allLengths ,
845845 WeightingCriteria weighting ,
846- int [ ] maximumTermCounts ,
846+ int [ ] maximumNgramsCounts ,
847847 string inputColumnName = null )
848848 {
849849 Name = name ;
@@ -861,18 +861,18 @@ internal ColumnOptions(string name,
861861 var limits = new int [ ngramLength ] ;
862862 if ( ! AllLengths )
863863 {
864- Contracts . CheckUserArg ( Utils . Size ( maximumTermCounts ) == 0 ||
865- Utils . Size ( maximumTermCounts ) == 1 && maximumTermCounts [ 0 ] > 0 , nameof ( maximumTermCounts ) ) ;
866- limits [ ngramLength - 1 ] = Utils . Size ( maximumTermCounts ) == 0 ? Defaults . MaximumTermCount : maximumTermCounts [ 0 ] ;
864+ Contracts . CheckUserArg ( Utils . Size ( maximumNgramsCounts ) == 0 ||
865+ Utils . Size ( maximumNgramsCounts ) == 1 && maximumNgramsCounts [ 0 ] > 0 , nameof ( maximumNgramsCounts ) ) ;
866+ limits [ ngramLength - 1 ] = Utils . Size ( maximumNgramsCounts ) == 0 ? Defaults . MaximumNgramsCount : maximumNgramsCounts [ 0 ] ;
867867 }
868868 else
869869 {
870- Contracts . CheckUserArg ( Utils . Size ( maximumTermCounts ) <= ngramLength , nameof ( maximumTermCounts ) ) ;
871- Contracts . CheckUserArg ( Utils . Size ( maximumTermCounts ) == 0 || maximumTermCounts . All ( i => i >= 0 ) && maximumTermCounts [ maximumTermCounts . Length - 1 ] > 0 , nameof ( maximumTermCounts ) ) ;
872- var extend = Utils . Size ( maximumTermCounts ) == 0 ? Defaults . MaximumTermCount : maximumTermCounts [ maximumTermCounts . Length - 1 ] ;
873- limits = Utils . BuildArray ( ngramLength , i => i < Utils . Size ( maximumTermCounts ) ? maximumTermCounts [ i ] : extend ) ;
870+ Contracts . CheckUserArg ( Utils . Size ( maximumNgramsCounts ) <= ngramLength , nameof ( maximumNgramsCounts ) ) ;
871+ Contracts . CheckUserArg ( Utils . Size ( maximumNgramsCounts ) == 0 || maximumNgramsCounts . All ( i => i >= 0 ) && maximumNgramsCounts [ maximumNgramsCounts . Length - 1 ] > 0 , nameof ( maximumNgramsCounts ) ) ;
872+ var extend = Utils . Size ( maximumNgramsCounts ) == 0 ? Defaults . MaximumNgramsCount : maximumNgramsCounts [ maximumNgramsCounts . Length - 1 ] ;
873+ limits = Utils . BuildArray ( ngramLength , i => i < Utils . Size ( maximumNgramsCounts ) ? maximumNgramsCounts [ i ] : extend ) ;
874874 }
875- _maximumTermCounts = ImmutableArray . Create ( limits ) ;
875+ _maximumNgramsCounts = ImmutableArray . Create ( limits ) ;
876876 }
877877 }
878878
0 commit comments