Skip to content

Commit a433ef2

Browse files
committed
Scrub static LDA
1 parent 9b84259 commit a433ef2

File tree

4 files changed

+49
-50
lines changed

4 files changed

+49
-50
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
namespace Microsoft.ML.Samples.Dynamic
66
{
7-
public static class LdaTransform
7+
public static class LatentDirichletAllocationTransform
88
{
99
public static void Example()
1010
{

src/Microsoft.ML.StaticPipe/LdaStaticExtensions.cs

Lines changed: 47 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -11,65 +11,65 @@ namespace Microsoft.ML.StaticPipe
1111
/// <summary>
1212
/// Information on the result of fitting a LDA transform.
1313
/// </summary>
14-
public sealed class LdaFitResult
14+
public sealed class LatentDirichletAllocationFitResult
1515
{
1616
/// <summary>
1717
/// For user defined delegates that accept instances of the containing type.
1818
/// </summary>
1919
/// <param name="result"></param>
20-
public delegate void OnFit(LdaFitResult result);
20+
public delegate void OnFit(LatentDirichletAllocationFitResult result);
2121

2222
public LatentDirichletAllocationTransformer.LdaSummary LdaTopicSummary;
23-
public LdaFitResult(LatentDirichletAllocationTransformer.LdaSummary ldaTopicSummary)
23+
public LatentDirichletAllocationFitResult(LatentDirichletAllocationTransformer.LdaSummary ldaTopicSummary)
2424
{
2525
LdaTopicSummary = ldaTopicSummary;
2626
}
2727
}
2828

29-
public static class LdaStaticExtensions
29+
public static class LatentDirichletAllocationStaticExtensions
3030
{
3131
private struct Config
3232
{
33-
public readonly int NumTopic;
33+
public readonly int NumberOfTopics;
3434
public readonly Single AlphaSum;
3535
public readonly Single Beta;
36-
public readonly int MHStep;
37-
public readonly int NumIter;
36+
public readonly int SamplingStepCount;
37+
public readonly int MaximumNumberOfIterations;
3838
public readonly int LikelihoodInterval;
39-
public readonly int NumThread;
40-
public readonly int NumMaxDocToken;
41-
public readonly int NumSummaryTermPerTopic;
42-
public readonly int NumBurninIter;
39+
public readonly int NumberOfThreads;
40+
public readonly int MaximumTokenCountPerDocument;
41+
public readonly int NumberOfSummaryTermsPerTopic;
42+
public readonly int NumberOfBurninIterations;
4343
public readonly bool ResetRandomGenerator;
4444

4545
public readonly Action<LatentDirichletAllocationTransformer.LdaSummary> OnFit;
4646

47-
public Config(int numTopic, Single alphaSum, Single beta, int mhStep, int numIter, int likelihoodInterval,
48-
int numThread, int numMaxDocToken, int numSummaryTermPerTopic, int numBurninIter, bool resetRandomGenerator,
47+
public Config(int numberOfTopics, Single alphaSum, Single beta, int samplingStepCount, int maximumNumberOfIterations, int likelihoodInterval,
48+
int numberOfThreads, int maximumTokenCountPerDocument, int numberOfSummaryTermsPerTopic, int numberOfBurninIterations, bool resetRandomGenerator,
4949
Action<LatentDirichletAllocationTransformer.LdaSummary> onFit)
5050
{
51-
NumTopic = numTopic;
51+
NumberOfTopics = numberOfTopics;
5252
AlphaSum = alphaSum;
5353
Beta = beta;
54-
MHStep = mhStep;
55-
NumIter = numIter;
54+
SamplingStepCount = samplingStepCount;
55+
MaximumNumberOfIterations = maximumNumberOfIterations;
5656
LikelihoodInterval = likelihoodInterval;
57-
NumThread = numThread;
58-
NumMaxDocToken = numMaxDocToken;
59-
NumSummaryTermPerTopic = numSummaryTermPerTopic;
60-
NumBurninIter = numBurninIter;
57+
NumberOfThreads = numberOfThreads;
58+
MaximumTokenCountPerDocument = maximumTokenCountPerDocument;
59+
NumberOfSummaryTermsPerTopic = numberOfSummaryTermsPerTopic;
60+
NumberOfBurninIterations = numberOfBurninIterations;
6161
ResetRandomGenerator = resetRandomGenerator;
6262

6363
OnFit = onFit;
6464
}
6565
}
6666

67-
private static Action<LatentDirichletAllocationTransformer.LdaSummary> Wrap(LdaFitResult.OnFit onFit)
67+
private static Action<LatentDirichletAllocationTransformer.LdaSummary> Wrap(LatentDirichletAllocationFitResult.OnFit onFit)
6868
{
6969
if (onFit == null)
7070
return null;
7171

72-
return ldaTopicSummary => onFit(new LdaFitResult(ldaTopicSummary));
72+
return ldaTopicSummary => onFit(new LatentDirichletAllocationFitResult(ldaTopicSummary));
7373
}
7474

7575
private interface ILdaCol
@@ -107,16 +107,16 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
107107

108108
infos[i] = new LatentDirichletAllocationEstimator.ColumnOptions(outputNames[toOutput[i]],
109109
inputNames[tcol.Input],
110-
tcol.Config.NumTopic,
110+
tcol.Config.NumberOfTopics,
111111
tcol.Config.AlphaSum,
112112
tcol.Config.Beta,
113-
tcol.Config.MHStep,
114-
tcol.Config.NumIter,
113+
tcol.Config.SamplingStepCount,
114+
tcol.Config.MaximumNumberOfIterations,
115115
tcol.Config.LikelihoodInterval,
116-
tcol.Config.NumThread,
117-
tcol.Config.NumMaxDocToken,
118-
tcol.Config.NumSummaryTermPerTopic,
119-
tcol.Config.NumBurninIter,
116+
tcol.Config.NumberOfThreads,
117+
tcol.Config.MaximumTokenCountPerDocument,
118+
tcol.Config.NumberOfSummaryTermsPerTopic,
119+
tcol.Config.NumberOfBurninIterations,
120120
tcol.Config.ResetRandomGenerator);
121121

122122
if (tcol.Config.OnFit != null)
@@ -136,36 +136,36 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
136136

137137
/// <include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name="LightLDA"]/*' />
138138
/// <param name="input">A vector of floats representing the document.</param>
139-
/// <param name="numTopic">The number of topics.</param>
139+
/// <param name="numberOfTopics">The number of topics.</param>
140140
/// <param name="alphaSum">Dirichlet prior on document-topic vectors.</param>
141141
/// <param name="beta">Dirichlet prior on vocab-topic vectors.</param>
142-
/// <param name="mhstep">Number of Metropolis Hasting step.</param>
143-
/// <param name="numIterations">Number of iterations.</param>
142+
/// <param name="samplingStepCount">Number of Metropolis Hasting step.</param>
143+
/// <param name="maximumNumberOfIterations">Number of iterations.</param>
144144
/// <param name="likelihoodInterval">Compute log likelihood over local dataset on this iteration interval.</param>
145-
/// <param name="numThreads">The number of training threads. Default value depends on number of logical processors.</param>
146-
/// <param name="numMaxDocToken">The threshold of maximum count of tokens per doc.</param>
147-
/// <param name="numSummaryTermPerTopic">The number of words to summarize the topic.</param>
148-
/// <param name="numBurninIterations">The number of burn-in iterations.</param>
145+
/// <param name="numberOfThreads">The number of training threads. Default value depends on number of logical processors.</param>
146+
/// <param name="maximumTokenCountPerDocument">The threshold of maximum count of tokens per doc.</param>
147+
/// <param name="numberOfSummaryTermsPerTopic">The number of words to summarize the topic.</param>
148+
/// <param name="numberOfBurninIterations">The number of burn-in iterations.</param>
149149
/// <param name="resetRandomGenerator">Reset the random number generator for each document.</param>
150150
/// <param name="onFit">Called upon fitting with the learnt enumeration on the dataset.</param>
151-
public static Vector<float> ToLdaTopicVector(this Vector<float> input,
152-
int numTopic = LatentDirichletAllocationEstimator.Defaults.NumberOfTopics,
151+
public static Vector<float> ToLatentDirichletAllocationTopicVector(this Vector<float> input,
152+
int numberOfTopics = LatentDirichletAllocationEstimator.Defaults.NumberOfTopics,
153153
Single alphaSum = LatentDirichletAllocationEstimator.Defaults.AlphaSum,
154154
Single beta = LatentDirichletAllocationEstimator.Defaults.Beta,
155-
int mhstep = LatentDirichletAllocationEstimator.Defaults.SamplingStepCount,
156-
int numIterations = LatentDirichletAllocationEstimator.Defaults.MaximumNumberOfIterations,
155+
int samplingStepCount = LatentDirichletAllocationEstimator.Defaults.SamplingStepCount,
156+
int maximumNumberOfIterations = LatentDirichletAllocationEstimator.Defaults.MaximumNumberOfIterations,
157157
int likelihoodInterval = LatentDirichletAllocationEstimator.Defaults.LikelihoodInterval,
158-
int numThreads = LatentDirichletAllocationEstimator.Defaults.NumThreads,
159-
int numMaxDocToken = LatentDirichletAllocationEstimator.Defaults.NumMaxDocToken,
160-
int numSummaryTermPerTopic = LatentDirichletAllocationEstimator.Defaults.NumSummaryTermPerTopic,
161-
int numBurninIterations = LatentDirichletAllocationEstimator.Defaults.NumBurninIterations,
158+
int numberOfThreads = LatentDirichletAllocationEstimator.Defaults.NumThreads,
159+
int maximumTokenCountPerDocument = LatentDirichletAllocationEstimator.Defaults.NumMaxDocToken,
160+
int numberOfSummaryTermsPerTopic = LatentDirichletAllocationEstimator.Defaults.NumSummaryTermPerTopic,
161+
int numberOfBurninIterations = LatentDirichletAllocationEstimator.Defaults.NumBurninIterations,
162162
bool resetRandomGenerator = LatentDirichletAllocationEstimator.Defaults.ResetRandomGenerator,
163-
LdaFitResult.OnFit onFit = null)
163+
LatentDirichletAllocationFitResult.OnFit onFit = null)
164164
{
165165
Contracts.CheckValue(input, nameof(input));
166166
return new ImplVector(input,
167-
new Config(numTopic, alphaSum, beta, mhstep, numIterations, likelihoodInterval, numThreads, numMaxDocToken, numSummaryTermPerTopic,
168-
numBurninIterations, resetRandomGenerator, Wrap(onFit)));
167+
new Config(numberOfTopics, alphaSum, beta, samplingStepCount, maximumNumberOfIterations, likelihoodInterval, numberOfThreads, maximumTokenCountPerDocument, numberOfSummaryTermsPerTopic,
168+
numberOfBurninIterations, resetRandomGenerator, Wrap(onFit)));
169169
}
170170
}
171171
}

src/Microsoft.ML.Transforms/Text/LdaTransform.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
using Microsoft.ML.EntryPoints;
1515
using Microsoft.ML.Internal.Internallearn;
1616
using Microsoft.ML.Internal.Utilities;
17-
using Microsoft.ML.Model;
1817
using Microsoft.ML.TextAnalytics;
1918
using Microsoft.ML.Transforms.Text;
2019

test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,7 @@ public void LdaTopicModel()
674674
var est = data.MakeNewEstimator()
675675
.Append(r => (
676676
r.label,
677-
topics: r.text.ToBagofWords().ToLdaTopicVector(numTopic: 3, numSummaryTermPerTopic:5, alphaSum: 10, onFit: m => ldaSummary = m.LdaTopicSummary)));
677+
topics: r.text.ToBagofWords().ToLatentDirichletAllocationTopicVector(numberOfTopics: 3, numberOfSummaryTermsPerTopic:5, alphaSum: 10, onFit: m => ldaSummary = m.LdaTopicSummary)));
678678

679679
var transformer = est.Fit(data);
680680
var tdata = transformer.Transform(data);

0 commit comments

Comments
 (0)