Skip to content

Commit 0835c52

Browse files
authored
Remove duplicate NormalizeFeatures from FFM trainer (#2964)
* Remove duplicate NormalizeFeatures from FFM trainer * add comment * set FFM feature normalization default to false * by default normalization is off for FFM * Hide base class NormalizeFeatures field * hide base option class NormalizeField for FFM Options
1 parent ffe9b33 commit 0835c52

File tree

3 files changed

+15
-25
lines changed

3 files changed

+15
-25
lines changed

src/Microsoft.ML.EntryPoints/JsonUtils/JsonManifestUtils.cs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,15 +153,24 @@ private static JArray BuildInputManifest(IExceptionContext ectx, Type inputType,
153153

154154
// Instantiate a value of the input, to pull defaults out of.
155155
var defaults = Activator.CreateInstance(inputType);
156-
156+
var collectedFields = new HashSet<string>();
157157
var inputs = new List<KeyValuePair<Double, JObject>>();
158158
foreach (var fieldInfo in inputType.GetFields(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance))
159159
{
160160
var inputAttr = fieldInfo.GetCustomAttributes(typeof(ArgumentAttribute), false).FirstOrDefault() as ArgumentAttribute;
161161
if (inputAttr == null || inputAttr.Visibility == ArgumentAttribute.VisibilityType.CmdLineOnly)
162162
continue;
163+
var name = inputAttr.Name ?? fieldInfo.Name;
164+
// The order of GetFields is stable, meaning that
165+
// fields are always returned in the same order and for this reason
166+
// unit tests to compare manifest are passing. For the same reason
167+
// duplicate name skipped are always in the same correct order.
168+
// Same name field can bubble up from base class even though
169+
// its overidden / hidden, skip it.
170+
if (collectedFields.Contains(name))
171+
continue;
163172
var jo = new JObject();
164-
jo[FieldNames.Name] = inputAttr.Name ?? fieldInfo.Name;
173+
jo[FieldNames.Name] = name;
165174
jo[FieldNames.Type] = BuildTypeToken(ectx, fieldInfo, fieldInfo.FieldType, catalog);
166175
jo[FieldNames.Desc] = inputAttr.HelpText;
167176
if (inputAttr.Aliases != null)
@@ -262,6 +271,7 @@ private static JArray BuildInputManifest(IExceptionContext ectx, Type inputType,
262271
}
263272

264273
inputs.Add(new KeyValuePair<Double, JObject>(inputAttr.SortOrder, jo));
274+
collectedFields.Add(name);
265275
}
266276
return new JArray(inputs.OrderBy(x => x.Key).Select(x => x.Value).ToArray());
267277
}

src/Microsoft.ML.StandardTrainers/FactorizationMachine/FactorizationMachineTrainer.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ public sealed class Options : TrainerInputBaseWithWeight
8080
/// Whether to normalize the input vectors so that the concatenation of all fields' feature vectors is unit-length.
8181
/// </summary>
8282
[Argument(ArgumentType.AtMostOnce, HelpText = "Whether to normalize the input vectors so that the concatenation of all fields' feature vectors is unit-length", ShortName = "norm", SortOrder = 6)]
83-
public bool Normalize = true;
83+
public new bool NormalizeFeatures = true;
8484

8585
/// <summary>
8686
/// Extra feature column names. The column named <see cref="TrainerInputBase.FeatureColumnName"/> stores features from the first field.
@@ -136,7 +136,7 @@ public sealed class Options : TrainerInputBaseWithWeight
136136
/// The <see cref="TrainerInfo"/> containing at least the training data for this trainer.
137137
/// </summary>
138138
TrainerInfo ITrainer.Info => _info;
139-
private static readonly TrainerInfo _info = new TrainerInfo(supportValid: true, supportIncrementalTrain: true);
139+
private static readonly TrainerInfo _info = new TrainerInfo(normalization: false, supportValid: true, supportIncrementalTrain: true);
140140

141141
private int _latentDim;
142142
private int _latentDimAligned;
@@ -224,7 +224,7 @@ private void Initialize(IHostEnvironment env, Options options)
224224
_lambdaLatent = options.LambdaLatent;
225225
_learningRate = options.LearningRate;
226226
_numIterations = options.NumberOfIterations;
227-
_norm = options.Normalize;
227+
_norm = options.NormalizeFeatures;
228228
_shuffle = options.Shuffle;
229229
_verbose = options.Verbose;
230230
_radius = options.Radius;

test/BaselineOutput/Common/EntryPoints/core_manifest.json

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10155,26 +10155,6 @@
1015510155
},
1015610156
{
1015710157
"Name": "NormalizeFeatures",
10158-
"Type": {
10159-
"Kind": "Enum",
10160-
"Values": [
10161-
"No",
10162-
"Warn",
10163-
"Auto",
10164-
"Yes"
10165-
]
10166-
},
10167-
"Desc": "Normalize option for the feature column",
10168-
"Aliases": [
10169-
"norm"
10170-
],
10171-
"Required": false,
10172-
"SortOrder": 5.0,
10173-
"IsNullable": false,
10174-
"Default": "Auto"
10175-
},
10176-
{
10177-
"Name": "Normalize",
1017810158
"Type": "Bool",
1017910159
"Desc": "Whether to normalize the input vectors so that the concatenation of all fields' feature vectors is unit-length",
1018010160
"Aliases": [

0 commit comments

Comments
 (0)