From d9c4a297c790e6423fdd8077e8eaab44eaf04b8e Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Tue, 10 Jul 2018 12:59:08 -0700 Subject: [PATCH 1/6] [Part 3] Added convenience constructors for set of transforms. --- src/Microsoft.ML.Transforms/GroupTransform.cs | 12 ++++++ .../HashJoinTransform.cs | 35 +++++++++++++++-- .../KeyToBinaryVectorTransform.cs | 12 ++++++ src/Microsoft.ML.Transforms/LoadTransform.cs | 19 +++++++++ .../MissingValueIndicatorTransform.cs | 12 ++++++ .../MutualInformationFeatureSelection.cs | 39 +++++++++++++++++-- .../NADropTransform.cs | 12 ++++++ .../NAHandleTransform.cs | 21 ++++++++++ .../NAIndicatorTransform.cs | 12 ++++++ .../NAReplaceTransform.cs | 13 +++++++ .../OptionalColumnTransform.cs | 11 ++++++ .../ProduceIdTransform.cs | 18 ++++++++- src/Microsoft.ML.Transforms/RffTransform.cs | 29 +++++++++++++- .../UngroupTransform.cs | 12 ++++++ .../WhiteningTransform.cs | 36 ++++++++++++++--- 15 files changed, 278 insertions(+), 15 deletions(-) diff --git a/src/Microsoft.ML.Transforms/GroupTransform.cs b/src/Microsoft.ML.Transforms/GroupTransform.cs index 1d5b823278..55720e19b9 100644 --- a/src/Microsoft.ML.Transforms/GroupTransform.cs +++ b/src/Microsoft.ML.Transforms/GroupTransform.cs @@ -88,6 +88,18 @@ public sealed class Arguments : TransformInputBase private readonly GroupSchema _schema; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Columns to group by + /// Columns to group together + public GroupTransform(IHostEnvironment env, IDataView input, string[] groupKey, params string[] columns) + : this(env, new Arguments() { GroupKey = groupKey, Column = columns }, input) + { + } + public GroupTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, input) { diff --git a/src/Microsoft.ML.Transforms/HashJoinTransform.cs b/src/Microsoft.ML.Transforms/HashJoinTransform.cs index 098564bef3..f7c2259a3a 100644 --- a/src/Microsoft.ML.Transforms/HashJoinTransform.cs +++ b/src/Microsoft.ML.Transforms/HashJoinTransform.cs @@ -37,6 +37,14 @@ public sealed class HashJoinTransform : OneToOneTransformBase public const int NumBitsMin = 1; public const int NumBitsLim = 32; + private static class Defaults + { + public const bool Join = true; + public const int HashBits = NumBitsLim - 1; + public const uint Seed = 314489979; + public const bool Ordered = true; + } + public sealed class Arguments : TransformInputBase { [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", @@ -45,17 +53,17 @@ public sealed class Arguments : TransformInputBase public Column[] Column; [Argument(ArgumentType.AtMostOnce, HelpText = "Whether the values need to be combined for a single hash")] - public bool Join = true; + public bool Join = Defaults.Join; [Argument(ArgumentType.AtMostOnce, HelpText = "Number of bits to hash into. Must be between 1 and 31, inclusive.", ShortName = "bits", SortOrder = 2)] - public int HashBits = NumBitsLim - 1; + public int HashBits = Defaults.HashBits; [Argument(ArgumentType.AtMostOnce, HelpText = "Hashing seed")] - public uint Seed = 314489979; + public uint Seed = Defaults.Seed; [Argument(ArgumentType.AtMostOnce, HelpText = "Whether the position of each term should be included in the hash", ShortName = "ord")] - public bool Ordered = true; + public bool Ordered = Defaults.Ordered; } public sealed class Column : OneToOneColumn @@ -166,6 +174,25 @@ private static VersionInfo GetVersionInfo() private readonly ColumnInfoEx[] _exes; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + /// Whether the values need to be combined for a single hash. + /// Number of bits to hash into. Must be between 1 and 31, inclusive. + public HashJoinTransform(IHostEnvironment env, + IDataView input, + string name, + string source = null, + bool join = Defaults.Join, + int hashBits = Defaults.HashBits) + : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, Join = join, HashBits = hashBits }, input) + { + } + public HashJoinTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, Contracts.CheckRef(args, nameof(args)).Column, input, TestColumnType) { diff --git a/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs b/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs index 5983efb244..0cfaf75500 100644 --- a/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs +++ b/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs @@ -54,6 +54,18 @@ private static VersionInfo GetVersionInfo() private readonly VectorType[] _types; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + public KeyToBinaryVectorTransform(IHostEnvironment env, IDataView input, string name, string source = null) + : this(env, new Arguments() { Column = new[] { new KeyToVectorTransform.Column() { Source = source ?? name, Name = name } } }, input) + { + } + /// /// Public constructor corresponding to SignatureDataTransform. /// diff --git a/src/Microsoft.ML.Transforms/LoadTransform.cs b/src/Microsoft.ML.Transforms/LoadTransform.cs index 83ff43274d..64a494702d 100644 --- a/src/Microsoft.ML.Transforms/LoadTransform.cs +++ b/src/Microsoft.ML.Transforms/LoadTransform.cs @@ -39,6 +39,25 @@ public class Arguments internal const string Summary = "Loads specified transforms from the model file and applies them to current data."; + /// + /// A helper method to create for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Model file to load the transforms from. + /// The tags (comma-separated) to be loaded (or omitted, if complement is true). + /// Whether to load all transforms except those marked by tags. + public static IDataTransform Create(IHostEnvironment env, IDataView input, string modelFile, string[] tag, bool complement = false) + { + var args = new Arguments() + { + ModelFile = modelFile, + Tag = tag, + Complement = complement + }; + return Create(env, args, input); + } + public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); diff --git a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs index 7020bc0830..5c757a2593 100644 --- a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs +++ b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs @@ -69,6 +69,18 @@ private static VersionInfo GetVersionInfo() // The output column types, parallel to Infos. private readonly VectorType[] _types; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + public MissingValueIndicatorTransform(IHostEnvironment env, IDataView input, string name, string source = null) + : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } } }, input) + { + } + /// /// Public constructor corresponding to SignatureDataTransform. /// diff --git a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs index d8c20f03ca..41ff9930a2 100644 --- a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs @@ -33,6 +33,13 @@ public static class MutualInformationFeatureSelectionTransform public const string UserName = "Mutual Information Feature Selection Transform"; public const string ShortName = "MIFeatureSelection"; + private static class Defaults + { + public const string LabelColumn = DefaultColumnNames.Label; + public const int SlotsInOutput = 1000; + public const int NumBins = 256; + } + public sealed class Arguments : TransformInputBase { [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Columns to use for feature selection", ShortName = "col", @@ -41,19 +48,45 @@ public sealed class Arguments : TransformInputBase [Argument(ArgumentType.LastOccurenceWins, HelpText = "Column to use for labels", ShortName = "lab", SortOrder = 4, Purpose = SpecialPurpose.ColumnName)] - public string LabelColumn = DefaultColumnNames.Label; + public string LabelColumn = Defaults.LabelColumn; [Argument(ArgumentType.AtMostOnce, HelpText = "The maximum number of slots to preserve in output", ShortName = "topk,numSlotsToKeep", SortOrder = 1)] - public int SlotsInOutput = 1000; + public int SlotsInOutput = Defaults.SlotsInOutput; [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of bins for R4/R8 columns, power of 2 recommended", ShortName = "bins")] - public int NumBins = 256; + public int NumBins = Defaults.NumBins; } internal static string RegistrationName = "MutualInformationFeatureSelectionTransform"; + /// + /// A helper method to create for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Column to use for labels. + /// The maximum number of slots to preserve in output. + /// Max number of bins for R4/R8 columns, power of 2 recommended. + /// Columns to use for feature selection. + public static IDataTransform Create(IHostEnvironment env, + IDataView input, + string labelColumn = Defaults.LabelColumn, + int slotsInOutput = Defaults.SlotsInOutput, + int numBins = Defaults.NumBins, + params string[] columns) + { + var args = new Arguments() + { + Column = columns, + LabelColumn = labelColumn, + SlotsInOutput = slotsInOutput, + NumBins = numBins + }; + return Create(env, args, input); + } + /// /// Create method corresponding to SignatureDataTransform. /// diff --git a/src/Microsoft.ML.Transforms/NADropTransform.cs b/src/Microsoft.ML.Transforms/NADropTransform.cs index 347e889a5b..ded5add732 100644 --- a/src/Microsoft.ML.Transforms/NADropTransform.cs +++ b/src/Microsoft.ML.Transforms/NADropTransform.cs @@ -69,6 +69,18 @@ private static VersionInfo GetVersionInfo() // The isNA delegates, parallel to Infos. private readonly Delegate[] _isNAs; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + public NADropTransform(IHostEnvironment env, IDataView input, string name, string source = null) + : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } } }, input) + { + } + public NADropTransform(IHostEnvironment env, Arguments args, IDataView input) : base(Contracts.CheckRef(env, nameof(env)), RegistrationName, env.CheckRef(args, nameof(args)).Column, input, TestType) { diff --git a/src/Microsoft.ML.Transforms/NAHandleTransform.cs b/src/Microsoft.ML.Transforms/NAHandleTransform.cs index 1b82fe3e1e..ceff4869a6 100644 --- a/src/Microsoft.ML.Transforms/NAHandleTransform.cs +++ b/src/Microsoft.ML.Transforms/NAHandleTransform.cs @@ -105,6 +105,27 @@ public bool TryUnparse(StringBuilder sb) internal const string FriendlyName = "NA Handle Transform"; internal const string ShortName = "NAHandle"; + /// + /// A helper method to create for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + /// The replacement method to utilize. + public static IDataTransform Create(IHostEnvironment env, IDataView input, string name, string source = null, ReplacementKind replaceWith = ReplacementKind.DefaultValue) + { + var args = new Arguments() + { + Column = new[] + { + new Column() { Source = source ?? name, Name = name } + }, + ReplaceWith = replaceWith + }; + return Create(env, args, input); + } + public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); diff --git a/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs b/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs index 38ecc2c817..c35a90d748 100644 --- a/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs +++ b/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs @@ -85,6 +85,18 @@ private static string TestType(ColumnType type) // The output column types, parallel to Infos. private readonly ColumnType[] _types; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + public NAIndicatorTransform(IHostEnvironment env, IDataView input, string name, string source = null) + : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } } }, input) + { + } + /// /// Public constructor corresponding to SignatureDataTransform. /// diff --git a/src/Microsoft.ML.Transforms/NAReplaceTransform.cs b/src/Microsoft.ML.Transforms/NAReplaceTransform.cs index 44832ee517..30384780e9 100644 --- a/src/Microsoft.ML.Transforms/NAReplaceTransform.cs +++ b/src/Microsoft.ML.Transforms/NAReplaceTransform.cs @@ -186,6 +186,19 @@ private static string TestType(ColumnType type) public override bool CanSaveOnnx => true; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + /// The replacement method to utilize. + public NAReplaceTransform(IHostEnvironment env, IDataView input, string name, string source = null, ReplacementKind replacementKind = ReplacementKind.DefaultValue) + : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, ReplacementKind = replacementKind }, input) + { + } + /// /// Public constructor corresponding to SignatureDataTransform. /// diff --git a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs index 5d3ab591b2..8a3fa6f7ae 100644 --- a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs +++ b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs @@ -232,6 +232,17 @@ private static VersionInfo GetVersionInfo() private const string RegistrationName = "OptionalColumn"; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Columns to transform. + public OptionalColumnTransform(IHostEnvironment env, IDataView input, params string[] columns) + : this(env, new Arguments() { Column = columns }, input) + { + } + /// /// Public constructor corresponding to SignatureDataTransform. /// diff --git a/src/Microsoft.ML.Transforms/ProduceIdTransform.cs b/src/Microsoft.ML.Transforms/ProduceIdTransform.cs index c66489d386..f437d6319b 100644 --- a/src/Microsoft.ML.Transforms/ProduceIdTransform.cs +++ b/src/Microsoft.ML.Transforms/ProduceIdTransform.cs @@ -25,10 +25,15 @@ namespace Microsoft.ML.Runtime.Data /// public sealed class ProduceIdTransform : RowToRowTransformBase { + private static class Defaults + { + public const string Column = "Id"; + } + public sealed class Arguments { [Argument(ArgumentType.AtMostOnce, HelpText = "Name of the column to produce", ShortName = "col", SortOrder = 1)] - public string Column = "Id"; + public string Column = Defaults.Column; } private sealed class Bindings : ColumnBindingsBase @@ -93,6 +98,17 @@ private static VersionInfo GetVersionInfo() public override bool CanShuffle { get { return Source.CanShuffle; } } + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the column to produce. + public ProduceIdTransform(IHostEnvironment env, IDataView input, string column = Defaults.Column) + : this(env, new Arguments() { Column = column }, input) + { + } + public ProduceIdTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, LoaderSignature, input) { diff --git a/src/Microsoft.ML.Transforms/RffTransform.cs b/src/Microsoft.ML.Transforms/RffTransform.cs index b8f49b4dce..25ff320ec5 100644 --- a/src/Microsoft.ML.Transforms/RffTransform.cs +++ b/src/Microsoft.ML.Transforms/RffTransform.cs @@ -27,20 +27,26 @@ namespace Microsoft.ML.Runtime.Data public sealed class RffTransform : OneToOneTransformBase { + private static class Defaults + { + public const int NewDim = 1000; + public const bool UseSin = false; + } + public sealed class Arguments { [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)] public Column[] Column; [Argument(ArgumentType.AtMostOnce, HelpText = "The number of random Fourier features to create", ShortName = "dim")] - public int NewDim = 1000; + public int NewDim = Defaults.NewDim; [Argument(ArgumentType.Multiple, HelpText = "which kernel to use?", ShortName = "kernel")] public SubComponent MatrixGenerator = new SubComponent(GaussianFourierSampler.LoadName); [Argument(ArgumentType.AtMostOnce, HelpText = "create two features for every random Fourier frequency? (one for cos and one for sin)")] - public bool UseSin = false; + public bool UseSin = Defaults.UseSin; [Argument(ArgumentType.LastOccurenceWins, HelpText = "The seed of the random number generator for generating the new features (if unspecified, " + @@ -232,6 +238,25 @@ private static string TestColumnType(ColumnType type) return "Expected R4 or vector of R4 with known size"; } + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + /// The number of random Fourier features to create. + /// create two features for every random Fourier frequency? (one for cos and one for sin). + public RffTransform(IHostEnvironment env, + IDataView input, + string name, + string source = null, + int newDim = Defaults.NewDim, + bool useSin = Defaults.UseSin) + : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, NewDim = newDim, UseSin = useSin }, input) + { + } + /// /// Public constructor corresponding to . /// diff --git a/src/Microsoft.ML.Transforms/UngroupTransform.cs b/src/Microsoft.ML.Transforms/UngroupTransform.cs index cb97e1c3b3..89c17f22d1 100644 --- a/src/Microsoft.ML.Transforms/UngroupTransform.cs +++ b/src/Microsoft.ML.Transforms/UngroupTransform.cs @@ -79,6 +79,18 @@ public sealed class Arguments : TransformInputBase private readonly SchemaImpl _schemaImpl; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Specifies how to unroll multiple pivot columns of different size. + /// Columns to unroll, or 'pivot' + public UngroupTransform(IHostEnvironment env, IDataView input, UngroupMode mode, params string[] columns) + : this(env, new Arguments() { Column = columns, Mode = mode }, input) + { + } + public UngroupTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, LoaderSignature, input) { diff --git a/src/Microsoft.ML.Transforms/WhiteningTransform.cs b/src/Microsoft.ML.Transforms/WhiteningTransform.cs index 2ae3824ba8..8b36078a04 100644 --- a/src/Microsoft.ML.Transforms/WhiteningTransform.cs +++ b/src/Microsoft.ML.Transforms/WhiteningTransform.cs @@ -46,25 +46,34 @@ public enum WhiteningKind /// public sealed class WhiteningTransform : OneToOneTransformBase { + private static class Defaults + { + public const WhiteningKind Kind = WhiteningKind.Zca; + public const Float Eps = (Float)1e-5; + public const int MaxRows = 100 * 1000; + public const bool SaveInverse = false; + public const int PcaNum = 0; + } + public sealed class Arguments { [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)] public Column[] Column; [Argument(ArgumentType.AtMostOnce, HelpText = "Whitening kind (PCA/ZCA)")] - public WhiteningKind Kind = WhiteningKind.Zca; + public WhiteningKind Kind = Defaults.Kind; [Argument(ArgumentType.AtMostOnce, HelpText = "Scaling regularizer")] - public Float Eps = (Float)1e-5; + public Float Eps = Defaults.Eps; [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of rows", ShortName = "rows")] - public int MaxRows = 100 * 1000; + public int MaxRows = Defaults.MaxRows; [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to save inverse (recovery) matrix", ShortName = "saveInv")] - public bool SaveInverse = false; + public bool SaveInverse = Defaults.SaveInverse; [Argument(ArgumentType.AtMostOnce, HelpText = "PCA components to retain")] - public int PcaNum = 0; + public int PcaNum = Defaults.PcaNum; // REVIEW: add the following options: // 1. Currently there is no way to apply an inverse transform AFTER the the transform is trained. @@ -209,6 +218,23 @@ private static VersionInfo GetVersionInfo() private const string RegistrationName = "Whitening"; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + /// Whitening kind (PCA/ZCA). + public WhiteningTransform(IHostEnvironment env, + IDataView input, + string name, + string source = null, + WhiteningKind kind = Defaults.Kind) + : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, Kind = kind }, input) + { + } + /// /// Public constructor corresponding to SignatureDataTransform. /// From 6cd77eb96fa37276f147572af0ed90d7f86b2b96 Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Thu, 12 Jul 2018 11:57:17 -0700 Subject: [PATCH 2/6] Addressed reviewers' comments. --- src/Microsoft.ML.Transforms/GroupTransform.cs | 4 ++-- .../MissingValueIndicatorTransform.cs | 12 ------------ .../MutualInformationFeatureSelection.cs | 2 +- .../NAHandleTransform.cs | 18 ++++++++++++++++++ .../ProduceIdTransform.cs | 11 ----------- src/Microsoft.ML.Transforms/RffTransform.cs | 10 ++++------ .../UngroupTransform.cs | 14 ++++++++++++++ 7 files changed, 39 insertions(+), 32 deletions(-) diff --git a/src/Microsoft.ML.Transforms/GroupTransform.cs b/src/Microsoft.ML.Transforms/GroupTransform.cs index 55720e19b9..2170c3b373 100644 --- a/src/Microsoft.ML.Transforms/GroupTransform.cs +++ b/src/Microsoft.ML.Transforms/GroupTransform.cs @@ -95,8 +95,8 @@ public sealed class Arguments : TransformInputBase /// Input . This is the output from previous transform or loader. /// Columns to group by /// Columns to group together - public GroupTransform(IHostEnvironment env, IDataView input, string[] groupKey, params string[] columns) - : this(env, new Arguments() { GroupKey = groupKey, Column = columns }, input) + public GroupTransform(IHostEnvironment env, IDataView input, string groupKey, params string[] columns) + : this(env, new Arguments() { GroupKey = new[] { groupKey }, Column = columns }, input) { } diff --git a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs index 5c757a2593..7020bc0830 100644 --- a/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs +++ b/src/Microsoft.ML.Transforms/MissingValueIndicatorTransform.cs @@ -69,18 +69,6 @@ private static VersionInfo GetVersionInfo() // The output column types, parallel to Infos. private readonly VectorType[] _types; - /// - /// Convenience constructor for public facing API. - /// - /// Host Environment. - /// Input . This is the output from previous transform or loader. - /// Name of the output column. - /// Name of the column to be transformed. If this is null '' will be used. - public MissingValueIndicatorTransform(IHostEnvironment env, IDataView input, string name, string source = null) - : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } } }, input) - { - } - /// /// Public constructor corresponding to SignatureDataTransform. /// diff --git a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs index 41ff9930a2..39fec2b208 100644 --- a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs @@ -62,7 +62,7 @@ public sealed class Arguments : TransformInputBase internal static string RegistrationName = "MutualInformationFeatureSelectionTransform"; /// - /// A helper method to create for public facing API. + /// A helper method to create for selecting the top k slots ordered by their mutual information. /// /// Host Environment. /// Input . This is the output from previous transform or loader. diff --git a/src/Microsoft.ML.Transforms/NAHandleTransform.cs b/src/Microsoft.ML.Transforms/NAHandleTransform.cs index ceff4869a6..3b8956f2a2 100644 --- a/src/Microsoft.ML.Transforms/NAHandleTransform.cs +++ b/src/Microsoft.ML.Transforms/NAHandleTransform.cs @@ -34,12 +34,30 @@ namespace Microsoft.ML.Runtime.Data /// public static class NAHandleTransform { + /// + /// + /// public enum ReplacementKind { + /// + /// Replace with the default value of the column based on it's type. For example, 'zero' for numeric and 'empty' for string/text columns. + /// [EnumValueDisplay("Zero/empty")] DefaultValue, + + /// + /// Replace with the mean value of the column. Supports only numeric/time span/ DateTime columns. + /// Mean, + + /// + /// Replace with the minimum value of the column. Supports only numeric/time span/ DateTime columns. + /// Minimum, + + /// + /// Replace with the maximum value of the column. Supports only numeric/time span/ DateTime columns. + /// Maximum, [HideEnumValue] diff --git a/src/Microsoft.ML.Transforms/ProduceIdTransform.cs b/src/Microsoft.ML.Transforms/ProduceIdTransform.cs index f437d6319b..fedaf3cb25 100644 --- a/src/Microsoft.ML.Transforms/ProduceIdTransform.cs +++ b/src/Microsoft.ML.Transforms/ProduceIdTransform.cs @@ -98,17 +98,6 @@ private static VersionInfo GetVersionInfo() public override bool CanShuffle { get { return Source.CanShuffle; } } - /// - /// Convenience constructor for public facing API. - /// - /// Host Environment. - /// Input . This is the output from previous transform or loader. - /// Name of the column to produce. - public ProduceIdTransform(IHostEnvironment env, IDataView input, string column = Defaults.Column) - : this(env, new Arguments() { Column = column }, input) - { - } - public ProduceIdTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, LoaderSignature, input) { diff --git a/src/Microsoft.ML.Transforms/RffTransform.cs b/src/Microsoft.ML.Transforms/RffTransform.cs index 25ff320ec5..85c4203512 100644 --- a/src/Microsoft.ML.Transforms/RffTransform.cs +++ b/src/Microsoft.ML.Transforms/RffTransform.cs @@ -243,17 +243,15 @@ private static string TestColumnType(ColumnType type) /// /// Host Environment. /// Input . This is the output from previous transform or loader. + /// The number of random Fourier features to create. /// Name of the output column. /// Name of the column to be transformed. If this is null '' will be used. - /// The number of random Fourier features to create. - /// create two features for every random Fourier frequency? (one for cos and one for sin). public RffTransform(IHostEnvironment env, IDataView input, + int newDim, string name, - string source = null, - int newDim = Defaults.NewDim, - bool useSin = Defaults.UseSin) - : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, NewDim = newDim, UseSin = useSin }, input) + string source = null) + : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, NewDim = newDim }, input) { } diff --git a/src/Microsoft.ML.Transforms/UngroupTransform.cs b/src/Microsoft.ML.Transforms/UngroupTransform.cs index 89c17f22d1..0b6aeea1b5 100644 --- a/src/Microsoft.ML.Transforms/UngroupTransform.cs +++ b/src/Microsoft.ML.Transforms/UngroupTransform.cs @@ -61,10 +61,24 @@ private static VersionInfo GetVersionInfo() loaderSignature: LoaderSignature); } + /// + /// Controls the number of output rows produced by the transform + /// public enum UngroupMode { + /// + /// A number of output rows are equal to the minimum length of pivot columns + /// Inner, + + /// + /// A number of output rows are equal to the maximum length of pivot columns + /// Outer, + + /// + /// A number of output rows are equal to the length of the first pivot column. + /// First } From 75a8b6dc31b1d7e78980e3b179dd011e597400f2 Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Fri, 13 Jul 2018 11:32:43 -0700 Subject: [PATCH 3/6] Addressed reviewers' comments. --- src/Microsoft.ML.Transforms/NAHandleTransform.cs | 3 --- src/Microsoft.ML.Transforms/OptionalColumnTransform.cs | 4 ++++ src/Microsoft.ML.Transforms/ProduceIdTransform.cs | 7 +------ src/Microsoft.ML.Transforms/UngroupTransform.cs | 6 +++--- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/Microsoft.ML.Transforms/NAHandleTransform.cs b/src/Microsoft.ML.Transforms/NAHandleTransform.cs index 3b8956f2a2..840a080ae6 100644 --- a/src/Microsoft.ML.Transforms/NAHandleTransform.cs +++ b/src/Microsoft.ML.Transforms/NAHandleTransform.cs @@ -34,9 +34,6 @@ namespace Microsoft.ML.Runtime.Data /// public static class NAHandleTransform { - /// - /// - /// public enum ReplacementKind { /// diff --git a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs index 8a3fa6f7ae..e294ea5066 100644 --- a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs +++ b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs @@ -26,6 +26,10 @@ namespace Microsoft.ML.Runtime.DataPipe { + /// + /// This transform is used to mark some of the columns (e.g. Label) optional during training So that the columns is not required during scoring.. + /// At scoring time, it is checked if the data schema for scoring matches the data used for training except for the optional columns. + /// public class OptionalColumnTransform : RowToRowMapperTransformBase { public sealed class Arguments : TransformInputBase diff --git a/src/Microsoft.ML.Transforms/ProduceIdTransform.cs b/src/Microsoft.ML.Transforms/ProduceIdTransform.cs index fedaf3cb25..c66489d386 100644 --- a/src/Microsoft.ML.Transforms/ProduceIdTransform.cs +++ b/src/Microsoft.ML.Transforms/ProduceIdTransform.cs @@ -25,15 +25,10 @@ namespace Microsoft.ML.Runtime.Data /// public sealed class ProduceIdTransform : RowToRowTransformBase { - private static class Defaults - { - public const string Column = "Id"; - } - public sealed class Arguments { [Argument(ArgumentType.AtMostOnce, HelpText = "Name of the column to produce", ShortName = "col", SortOrder = 1)] - public string Column = Defaults.Column; + public string Column = "Id"; } private sealed class Bindings : ColumnBindingsBase diff --git a/src/Microsoft.ML.Transforms/UngroupTransform.cs b/src/Microsoft.ML.Transforms/UngroupTransform.cs index 0b6aeea1b5..4781492076 100644 --- a/src/Microsoft.ML.Transforms/UngroupTransform.cs +++ b/src/Microsoft.ML.Transforms/UngroupTransform.cs @@ -67,17 +67,17 @@ private static VersionInfo GetVersionInfo() public enum UngroupMode { /// - /// A number of output rows are equal to the minimum length of pivot columns + /// The number of output rows are equal to the minimum length of pivot columns /// Inner, /// - /// A number of output rows are equal to the maximum length of pivot columns + /// The number of output rows are equal to the maximum length of pivot columns /// Outer, /// - /// A number of output rows are equal to the length of the first pivot column. + /// The number of output rows are equal to the length of the first pivot column. /// First } From d72a4acaf5b56d55823ab94d4cb8783d5c6b52ea Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Fri, 13 Jul 2018 15:35:47 -0700 Subject: [PATCH 4/6] Addressed reviewers' comments. --- src/Microsoft.ML.Transforms/OptionalColumnTransform.cs | 10 +++++++--- src/Microsoft.ML.Transforms/UngroupTransform.cs | 6 +++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs index e294ea5066..d900715d30 100644 --- a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs +++ b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs @@ -27,10 +27,14 @@ namespace Microsoft.ML.Runtime.DataPipe { /// - /// This transform is used to mark some of the columns (e.g. Label) optional during training So that the columns is not required during scoring.. - /// At scoring time, it is checked if the data schema for scoring matches the data used for training except for the optional columns. + /// This transform is used to mark some of the columns (e.g. Label) optional during training so that the columns are not required during scoring. + /// When applied to new data, if optional columns are not present a meta column is created having the same properties (e.g. 'name', 'type' etc.) as used during training. + /// The columns are filled with default values. The value is + /// - scalar for scalar column + /// - totally sparse vector for vector column. + /// If value of the column is requested the default value will be returned. /// - public class OptionalColumnTransform : RowToRowMapperTransformBase + public sealed class OptionalColumnTransform : RowToRowMapperTransformBase { public sealed class Arguments : TransformInputBase { diff --git a/src/Microsoft.ML.Transforms/UngroupTransform.cs b/src/Microsoft.ML.Transforms/UngroupTransform.cs index 4781492076..49cae8ae6e 100644 --- a/src/Microsoft.ML.Transforms/UngroupTransform.cs +++ b/src/Microsoft.ML.Transforms/UngroupTransform.cs @@ -67,17 +67,17 @@ private static VersionInfo GetVersionInfo() public enum UngroupMode { /// - /// The number of output rows are equal to the minimum length of pivot columns + /// The number of output rows is equal to the minimum length of pivot columns /// Inner, /// - /// The number of output rows are equal to the maximum length of pivot columns + /// The number of output rows is equal to the maximum length of pivot columns /// Outer, /// - /// The number of output rows are equal to the length of the first pivot column. + /// The number of output rows is equal to the length of the first pivot column. /// First } From 3dd984782d43fc34d8718e32b7c34928e060047f Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Fri, 13 Jul 2018 16:09:41 -0700 Subject: [PATCH 5/6] Updated OptionalColumnTransform comments. --- src/Microsoft.ML.Transforms/OptionalColumnTransform.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs index d900715d30..ff0ff03c93 100644 --- a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs +++ b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs @@ -28,11 +28,10 @@ namespace Microsoft.ML.Runtime.DataPipe { /// /// This transform is used to mark some of the columns (e.g. Label) optional during training so that the columns are not required during scoring. - /// When applied to new data, if optional columns are not present a meta column is created having the same properties (e.g. 'name', 'type' etc.) as used during training. + /// When applied to new data, if optional columns are not present dummy columns are created having the same properties (e.g. 'name', 'type' etc.) as used during training. /// The columns are filled with default values. The value is /// - scalar for scalar column /// - totally sparse vector for vector column. - /// If value of the column is requested the default value will be returned. /// public sealed class OptionalColumnTransform : RowToRowMapperTransformBase { From 730fe9312520c21585d086f90fbbbe3155555f00 Mon Sep 17 00:00:00 2001 From: Zeeshan Ahmed Date: Fri, 13 Jul 2018 16:15:42 -0700 Subject: [PATCH 6/6] Updated comments. --- src/Microsoft.ML.Transforms/OptionalColumnTransform.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs index ff0ff03c93..9e1bad374e 100644 --- a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs +++ b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs @@ -28,7 +28,7 @@ namespace Microsoft.ML.Runtime.DataPipe { /// /// This transform is used to mark some of the columns (e.g. Label) optional during training so that the columns are not required during scoring. - /// When applied to new data, if optional columns are not present dummy columns are created having the same properties (e.g. 'name', 'type' etc.) as used during training. + /// When applied to new data, if any of the optional columns is not present a dummy columns is created having the same properties (e.g. 'name', 'type' etc.) as used during training. /// The columns are filled with default values. The value is /// - scalar for scalar column /// - totally sparse vector for vector column.