Skip to content

Commit b178211

Browse files
wschinTomFinley
authored andcommitted
Scrub Latent Dirichlet Allocation Transform (Just Renaming) (#2890)
* Scrub LDA * Scrub static LDA * Rename static LDA API * Remove some advanced parameters in simple API
1 parent cd333c5 commit b178211

File tree

8 files changed

+180
-182
lines changed

8 files changed

+180
-182
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
namespace Microsoft.ML.Samples.Dynamic
66
{
7-
public static class LdaTransform
7+
public static class LatentDirichletAllocationTransform
88
{
99
public static void Example()
1010
{
@@ -30,7 +30,7 @@ public static void Example()
3030

3131
// A pipeline for featurizing the "Review" column
3232
var pipeline = ml.Transforms.Text.ProduceWordBags(review).
33-
Append(ml.Transforms.Text.LatentDirichletAllocation(review, ldaFeatures, numTopic:3));
33+
Append(ml.Transforms.Text.LatentDirichletAllocation(review, ldaFeatures, numberOfTopics: 3));
3434

3535
// The transformed data
3636
var transformer = pipeline.Fit(trainData);

src/Microsoft.ML.StaticPipe/LdaStaticExtensions.cs

Lines changed: 47 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -12,65 +12,65 @@ namespace Microsoft.ML.StaticPipe
1212
/// <summary>
1313
/// Information on the result of fitting a LDA transform.
1414
/// </summary>
15-
public sealed class LdaFitResult
15+
public sealed class LatentDirichletAllocationFitResult
1616
{
1717
/// <summary>
1818
/// For user defined delegates that accept instances of the containing type.
1919
/// </summary>
2020
/// <param name="result"></param>
21-
public delegate void OnFit(LdaFitResult result);
21+
public delegate void OnFit(LatentDirichletAllocationFitResult result);
2222

2323
public LatentDirichletAllocationTransformer.LdaSummary LdaTopicSummary;
24-
public LdaFitResult(LatentDirichletAllocationTransformer.LdaSummary ldaTopicSummary)
24+
public LatentDirichletAllocationFitResult(LatentDirichletAllocationTransformer.LdaSummary ldaTopicSummary)
2525
{
2626
LdaTopicSummary = ldaTopicSummary;
2727
}
2828
}
2929

30-
public static class LdaStaticExtensions
30+
public static class LatentDirichletAllocationStaticExtensions
3131
{
3232
private struct Config
3333
{
34-
public readonly int NumTopic;
34+
public readonly int NumberOfTopics;
3535
public readonly Single AlphaSum;
3636
public readonly Single Beta;
37-
public readonly int MHStep;
38-
public readonly int NumIter;
37+
public readonly int SamplingStepCount;
38+
public readonly int MaximumNumberOfIterations;
3939
public readonly int LikelihoodInterval;
40-
public readonly int NumThread;
41-
public readonly int NumMaxDocToken;
42-
public readonly int NumSummaryTermPerTopic;
43-
public readonly int NumBurninIter;
40+
public readonly int NumberOfThreads;
41+
public readonly int MaximumTokenCountPerDocument;
42+
public readonly int NumberOfSummaryTermsPerTopic;
43+
public readonly int NumberOfBurninIterations;
4444
public readonly bool ResetRandomGenerator;
4545

4646
public readonly Action<LatentDirichletAllocationTransformer.LdaSummary> OnFit;
4747

48-
public Config(int numTopic, Single alphaSum, Single beta, int mhStep, int numIter, int likelihoodInterval,
49-
int numThread, int numMaxDocToken, int numSummaryTermPerTopic, int numBurninIter, bool resetRandomGenerator,
48+
public Config(int numberOfTopics, Single alphaSum, Single beta, int samplingStepCount, int maximumNumberOfIterations, int likelihoodInterval,
49+
int numberOfThreads, int maximumTokenCountPerDocument, int numberOfSummaryTermsPerTopic, int numberOfBurninIterations, bool resetRandomGenerator,
5050
Action<LatentDirichletAllocationTransformer.LdaSummary> onFit)
5151
{
52-
NumTopic = numTopic;
52+
NumberOfTopics = numberOfTopics;
5353
AlphaSum = alphaSum;
5454
Beta = beta;
55-
MHStep = mhStep;
56-
NumIter = numIter;
55+
SamplingStepCount = samplingStepCount;
56+
MaximumNumberOfIterations = maximumNumberOfIterations;
5757
LikelihoodInterval = likelihoodInterval;
58-
NumThread = numThread;
59-
NumMaxDocToken = numMaxDocToken;
60-
NumSummaryTermPerTopic = numSummaryTermPerTopic;
61-
NumBurninIter = numBurninIter;
58+
NumberOfThreads = numberOfThreads;
59+
MaximumTokenCountPerDocument = maximumTokenCountPerDocument;
60+
NumberOfSummaryTermsPerTopic = numberOfSummaryTermsPerTopic;
61+
NumberOfBurninIterations = numberOfBurninIterations;
6262
ResetRandomGenerator = resetRandomGenerator;
6363

6464
OnFit = onFit;
6565
}
6666
}
6767

68-
private static Action<LatentDirichletAllocationTransformer.LdaSummary> Wrap(LdaFitResult.OnFit onFit)
68+
private static Action<LatentDirichletAllocationTransformer.LdaSummary> Wrap(LatentDirichletAllocationFitResult.OnFit onFit)
6969
{
7070
if (onFit == null)
7171
return null;
7272

73-
return ldaTopicSummary => onFit(new LdaFitResult(ldaTopicSummary));
73+
return ldaTopicSummary => onFit(new LatentDirichletAllocationFitResult(ldaTopicSummary));
7474
}
7575

7676
private interface ILdaCol
@@ -108,16 +108,16 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
108108

109109
infos[i] = new LatentDirichletAllocationEstimator.ColumnOptions(outputNames[toOutput[i]],
110110
inputNames[tcol.Input],
111-
tcol.Config.NumTopic,
111+
tcol.Config.NumberOfTopics,
112112
tcol.Config.AlphaSum,
113113
tcol.Config.Beta,
114-
tcol.Config.MHStep,
115-
tcol.Config.NumIter,
114+
tcol.Config.SamplingStepCount,
115+
tcol.Config.MaximumNumberOfIterations,
116116
tcol.Config.LikelihoodInterval,
117-
tcol.Config.NumThread,
118-
tcol.Config.NumMaxDocToken,
119-
tcol.Config.NumSummaryTermPerTopic,
120-
tcol.Config.NumBurninIter,
117+
tcol.Config.NumberOfThreads,
118+
tcol.Config.MaximumTokenCountPerDocument,
119+
tcol.Config.NumberOfSummaryTermsPerTopic,
120+
tcol.Config.NumberOfBurninIterations,
121121
tcol.Config.ResetRandomGenerator);
122122

123123
if (tcol.Config.OnFit != null)
@@ -137,36 +137,36 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
137137

138138
/// <include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name="LightLDA"]/*' />
139139
/// <param name="input">A vector of floats representing the document.</param>
140-
/// <param name="numTopic">The number of topics.</param>
140+
/// <param name="numberOfTopics">The number of topics.</param>
141141
/// <param name="alphaSum">Dirichlet prior on document-topic vectors.</param>
142142
/// <param name="beta">Dirichlet prior on vocab-topic vectors.</param>
143-
/// <param name="mhstep">Number of Metropolis Hasting step.</param>
144-
/// <param name="numIterations">Number of iterations.</param>
143+
/// <param name="samplingStepCount">Number of Metropolis Hasting step.</param>
144+
/// <param name="maximumNumberOfIterations">Number of iterations.</param>
145145
/// <param name="likelihoodInterval">Compute log likelihood over local dataset on this iteration interval.</param>
146-
/// <param name="numThreads">The number of training threads. Default value depends on number of logical processors.</param>
147-
/// <param name="numMaxDocToken">The threshold of maximum count of tokens per doc.</param>
148-
/// <param name="numSummaryTermPerTopic">The number of words to summarize the topic.</param>
149-
/// <param name="numBurninIterations">The number of burn-in iterations.</param>
146+
/// <param name="numberOfThreads">The number of training threads. Default value depends on number of logical processors.</param>
147+
/// <param name="maximumTokenCountPerDocument">The threshold of maximum count of tokens per doc.</param>
148+
/// <param name="numberOfSummaryTermsPerTopic">The number of words to summarize the topic.</param>
149+
/// <param name="numberOfBurninIterations">The number of burn-in iterations.</param>
150150
/// <param name="resetRandomGenerator">Reset the random number generator for each document.</param>
151151
/// <param name="onFit">Called upon fitting with the learnt enumeration on the dataset.</param>
152-
public static Vector<float> ToLdaTopicVector(this Vector<float> input,
153-
int numTopic = LatentDirichletAllocationEstimator.Defaults.NumTopic,
152+
public static Vector<float> LatentDirichletAllocation(this Vector<float> input,
153+
int numberOfTopics = LatentDirichletAllocationEstimator.Defaults.NumberOfTopics,
154154
Single alphaSum = LatentDirichletAllocationEstimator.Defaults.AlphaSum,
155155
Single beta = LatentDirichletAllocationEstimator.Defaults.Beta,
156-
int mhstep = LatentDirichletAllocationEstimator.Defaults.Mhstep,
157-
int numIterations = LatentDirichletAllocationEstimator.Defaults.NumIterations,
156+
int samplingStepCount = LatentDirichletAllocationEstimator.Defaults.SamplingStepCount,
157+
int maximumNumberOfIterations = LatentDirichletAllocationEstimator.Defaults.MaximumNumberOfIterations,
158158
int likelihoodInterval = LatentDirichletAllocationEstimator.Defaults.LikelihoodInterval,
159-
int numThreads = LatentDirichletAllocationEstimator.Defaults.NumThreads,
160-
int numMaxDocToken = LatentDirichletAllocationEstimator.Defaults.NumMaxDocToken,
161-
int numSummaryTermPerTopic = LatentDirichletAllocationEstimator.Defaults.NumSummaryTermPerTopic,
162-
int numBurninIterations = LatentDirichletAllocationEstimator.Defaults.NumBurninIterations,
159+
int numberOfThreads = LatentDirichletAllocationEstimator.Defaults.NumberOfThreads,
160+
int maximumTokenCountPerDocument = LatentDirichletAllocationEstimator.Defaults.MaximumTokenCountPerDocument,
161+
int numberOfSummaryTermsPerTopic = LatentDirichletAllocationEstimator.Defaults.NumberOfSummaryTermsPerTopic,
162+
int numberOfBurninIterations = LatentDirichletAllocationEstimator.Defaults.NumberOfBurninIterations,
163163
bool resetRandomGenerator = LatentDirichletAllocationEstimator.Defaults.ResetRandomGenerator,
164-
LdaFitResult.OnFit onFit = null)
164+
LatentDirichletAllocationFitResult.OnFit onFit = null)
165165
{
166166
Contracts.CheckValue(input, nameof(input));
167167
return new ImplVector(input,
168-
new Config(numTopic, alphaSum, beta, mhstep, numIterations, likelihoodInterval, numThreads, numMaxDocToken, numSummaryTermPerTopic,
169-
numBurninIterations, resetRandomGenerator, Wrap(onFit)));
168+
new Config(numberOfTopics, alphaSum, beta, samplingStepCount, maximumNumberOfIterations, likelihoodInterval, numberOfThreads, maximumTokenCountPerDocument, numberOfSummaryTermsPerTopic,
169+
numberOfBurninIterations, resetRandomGenerator, Wrap(onFit)));
170170
}
171171
}
172172
}

0 commit comments

Comments
 (0)