diff --git a/src/Microsoft.ML.Transforms/GroupTransform.cs b/src/Microsoft.ML.Transforms/GroupTransform.cs index 1d5b823278..2170c3b373 100644 --- a/src/Microsoft.ML.Transforms/GroupTransform.cs +++ b/src/Microsoft.ML.Transforms/GroupTransform.cs @@ -88,6 +88,18 @@ public sealed class Arguments : TransformInputBase private readonly GroupSchema _schema; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Columns to group by + /// Columns to group together + public GroupTransform(IHostEnvironment env, IDataView input, string groupKey, params string[] columns) + : this(env, new Arguments() { GroupKey = new[] { groupKey }, Column = columns }, input) + { + } + public GroupTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, input) { diff --git a/src/Microsoft.ML.Transforms/HashJoinTransform.cs b/src/Microsoft.ML.Transforms/HashJoinTransform.cs index 098564bef3..f7c2259a3a 100644 --- a/src/Microsoft.ML.Transforms/HashJoinTransform.cs +++ b/src/Microsoft.ML.Transforms/HashJoinTransform.cs @@ -37,6 +37,14 @@ public sealed class HashJoinTransform : OneToOneTransformBase public const int NumBitsMin = 1; public const int NumBitsLim = 32; + private static class Defaults + { + public const bool Join = true; + public const int HashBits = NumBitsLim - 1; + public const uint Seed = 314489979; + public const bool Ordered = true; + } + public sealed class Arguments : TransformInputBase { [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", @@ -45,17 +53,17 @@ public sealed class Arguments : TransformInputBase public Column[] Column; [Argument(ArgumentType.AtMostOnce, HelpText = "Whether the values need to be combined for a single hash")] - public bool Join = true; + public bool Join = Defaults.Join; [Argument(ArgumentType.AtMostOnce, HelpText = "Number of bits to hash into. Must be between 1 and 31, inclusive.", ShortName = "bits", SortOrder = 2)] - public int HashBits = NumBitsLim - 1; + public int HashBits = Defaults.HashBits; [Argument(ArgumentType.AtMostOnce, HelpText = "Hashing seed")] - public uint Seed = 314489979; + public uint Seed = Defaults.Seed; [Argument(ArgumentType.AtMostOnce, HelpText = "Whether the position of each term should be included in the hash", ShortName = "ord")] - public bool Ordered = true; + public bool Ordered = Defaults.Ordered; } public sealed class Column : OneToOneColumn @@ -166,6 +174,25 @@ private static VersionInfo GetVersionInfo() private readonly ColumnInfoEx[] _exes; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + /// Whether the values need to be combined for a single hash. + /// Number of bits to hash into. Must be between 1 and 31, inclusive. + public HashJoinTransform(IHostEnvironment env, + IDataView input, + string name, + string source = null, + bool join = Defaults.Join, + int hashBits = Defaults.HashBits) + : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, Join = join, HashBits = hashBits }, input) + { + } + public HashJoinTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, Contracts.CheckRef(args, nameof(args)).Column, input, TestColumnType) { diff --git a/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs b/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs index 5983efb244..0cfaf75500 100644 --- a/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs +++ b/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs @@ -54,6 +54,18 @@ private static VersionInfo GetVersionInfo() private readonly VectorType[] _types; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + public KeyToBinaryVectorTransform(IHostEnvironment env, IDataView input, string name, string source = null) + : this(env, new Arguments() { Column = new[] { new KeyToVectorTransform.Column() { Source = source ?? name, Name = name } } }, input) + { + } + /// /// Public constructor corresponding to SignatureDataTransform. /// diff --git a/src/Microsoft.ML.Transforms/LoadTransform.cs b/src/Microsoft.ML.Transforms/LoadTransform.cs index 83ff43274d..64a494702d 100644 --- a/src/Microsoft.ML.Transforms/LoadTransform.cs +++ b/src/Microsoft.ML.Transforms/LoadTransform.cs @@ -39,6 +39,25 @@ public class Arguments internal const string Summary = "Loads specified transforms from the model file and applies them to current data."; + /// + /// A helper method to create for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Model file to load the transforms from. + /// The tags (comma-separated) to be loaded (or omitted, if complement is true). + /// Whether to load all transforms except those marked by tags. + public static IDataTransform Create(IHostEnvironment env, IDataView input, string modelFile, string[] tag, bool complement = false) + { + var args = new Arguments() + { + ModelFile = modelFile, + Tag = tag, + Complement = complement + }; + return Create(env, args, input); + } + public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); diff --git a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs index d8c20f03ca..39fec2b208 100644 --- a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs +++ b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs @@ -33,6 +33,13 @@ public static class MutualInformationFeatureSelectionTransform public const string UserName = "Mutual Information Feature Selection Transform"; public const string ShortName = "MIFeatureSelection"; + private static class Defaults + { + public const string LabelColumn = DefaultColumnNames.Label; + public const int SlotsInOutput = 1000; + public const int NumBins = 256; + } + public sealed class Arguments : TransformInputBase { [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Columns to use for feature selection", ShortName = "col", @@ -41,19 +48,45 @@ public sealed class Arguments : TransformInputBase [Argument(ArgumentType.LastOccurenceWins, HelpText = "Column to use for labels", ShortName = "lab", SortOrder = 4, Purpose = SpecialPurpose.ColumnName)] - public string LabelColumn = DefaultColumnNames.Label; + public string LabelColumn = Defaults.LabelColumn; [Argument(ArgumentType.AtMostOnce, HelpText = "The maximum number of slots to preserve in output", ShortName = "topk,numSlotsToKeep", SortOrder = 1)] - public int SlotsInOutput = 1000; + public int SlotsInOutput = Defaults.SlotsInOutput; [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of bins for R4/R8 columns, power of 2 recommended", ShortName = "bins")] - public int NumBins = 256; + public int NumBins = Defaults.NumBins; } internal static string RegistrationName = "MutualInformationFeatureSelectionTransform"; + /// + /// A helper method to create for selecting the top k slots ordered by their mutual information. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Column to use for labels. + /// The maximum number of slots to preserve in output. + /// Max number of bins for R4/R8 columns, power of 2 recommended. + /// Columns to use for feature selection. + public static IDataTransform Create(IHostEnvironment env, + IDataView input, + string labelColumn = Defaults.LabelColumn, + int slotsInOutput = Defaults.SlotsInOutput, + int numBins = Defaults.NumBins, + params string[] columns) + { + var args = new Arguments() + { + Column = columns, + LabelColumn = labelColumn, + SlotsInOutput = slotsInOutput, + NumBins = numBins + }; + return Create(env, args, input); + } + /// /// Create method corresponding to SignatureDataTransform. /// diff --git a/src/Microsoft.ML.Transforms/NADropTransform.cs b/src/Microsoft.ML.Transforms/NADropTransform.cs index 347e889a5b..ded5add732 100644 --- a/src/Microsoft.ML.Transforms/NADropTransform.cs +++ b/src/Microsoft.ML.Transforms/NADropTransform.cs @@ -69,6 +69,18 @@ private static VersionInfo GetVersionInfo() // The isNA delegates, parallel to Infos. private readonly Delegate[] _isNAs; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + public NADropTransform(IHostEnvironment env, IDataView input, string name, string source = null) + : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } } }, input) + { + } + public NADropTransform(IHostEnvironment env, Arguments args, IDataView input) : base(Contracts.CheckRef(env, nameof(env)), RegistrationName, env.CheckRef(args, nameof(args)).Column, input, TestType) { diff --git a/src/Microsoft.ML.Transforms/NAHandleTransform.cs b/src/Microsoft.ML.Transforms/NAHandleTransform.cs index 1b82fe3e1e..840a080ae6 100644 --- a/src/Microsoft.ML.Transforms/NAHandleTransform.cs +++ b/src/Microsoft.ML.Transforms/NAHandleTransform.cs @@ -36,10 +36,25 @@ public static class NAHandleTransform { public enum ReplacementKind { + /// + /// Replace with the default value of the column based on it's type. For example, 'zero' for numeric and 'empty' for string/text columns. + /// [EnumValueDisplay("Zero/empty")] DefaultValue, + + /// + /// Replace with the mean value of the column. Supports only numeric/time span/ DateTime columns. + /// Mean, + + /// + /// Replace with the minimum value of the column. Supports only numeric/time span/ DateTime columns. + /// Minimum, + + /// + /// Replace with the maximum value of the column. Supports only numeric/time span/ DateTime columns. + /// Maximum, [HideEnumValue] @@ -105,6 +120,27 @@ public bool TryUnparse(StringBuilder sb) internal const string FriendlyName = "NA Handle Transform"; internal const string ShortName = "NAHandle"; + /// + /// A helper method to create for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + /// The replacement method to utilize. + public static IDataTransform Create(IHostEnvironment env, IDataView input, string name, string source = null, ReplacementKind replaceWith = ReplacementKind.DefaultValue) + { + var args = new Arguments() + { + Column = new[] + { + new Column() { Source = source ?? name, Name = name } + }, + ReplaceWith = replaceWith + }; + return Create(env, args, input); + } + public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); diff --git a/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs b/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs index 38ecc2c817..c35a90d748 100644 --- a/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs +++ b/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs @@ -85,6 +85,18 @@ private static string TestType(ColumnType type) // The output column types, parallel to Infos. private readonly ColumnType[] _types; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + public NAIndicatorTransform(IHostEnvironment env, IDataView input, string name, string source = null) + : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } } }, input) + { + } + /// /// Public constructor corresponding to SignatureDataTransform. /// diff --git a/src/Microsoft.ML.Transforms/NAReplaceTransform.cs b/src/Microsoft.ML.Transforms/NAReplaceTransform.cs index 44832ee517..30384780e9 100644 --- a/src/Microsoft.ML.Transforms/NAReplaceTransform.cs +++ b/src/Microsoft.ML.Transforms/NAReplaceTransform.cs @@ -186,6 +186,19 @@ private static string TestType(ColumnType type) public override bool CanSaveOnnx => true; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + /// The replacement method to utilize. + public NAReplaceTransform(IHostEnvironment env, IDataView input, string name, string source = null, ReplacementKind replacementKind = ReplacementKind.DefaultValue) + : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, ReplacementKind = replacementKind }, input) + { + } + /// /// Public constructor corresponding to SignatureDataTransform. /// diff --git a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs index 5d3ab591b2..9e1bad374e 100644 --- a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs +++ b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs @@ -26,7 +26,14 @@ namespace Microsoft.ML.Runtime.DataPipe { - public class OptionalColumnTransform : RowToRowMapperTransformBase + /// + /// This transform is used to mark some of the columns (e.g. Label) optional during training so that the columns are not required during scoring. + /// When applied to new data, if any of the optional columns is not present a dummy columns is created having the same properties (e.g. 'name', 'type' etc.) as used during training. + /// The columns are filled with default values. The value is + /// - scalar for scalar column + /// - totally sparse vector for vector column. + /// + public sealed class OptionalColumnTransform : RowToRowMapperTransformBase { public sealed class Arguments : TransformInputBase { @@ -232,6 +239,17 @@ private static VersionInfo GetVersionInfo() private const string RegistrationName = "OptionalColumn"; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Columns to transform. + public OptionalColumnTransform(IHostEnvironment env, IDataView input, params string[] columns) + : this(env, new Arguments() { Column = columns }, input) + { + } + /// /// Public constructor corresponding to SignatureDataTransform. /// diff --git a/src/Microsoft.ML.Transforms/RffTransform.cs b/src/Microsoft.ML.Transforms/RffTransform.cs index b8f49b4dce..85c4203512 100644 --- a/src/Microsoft.ML.Transforms/RffTransform.cs +++ b/src/Microsoft.ML.Transforms/RffTransform.cs @@ -27,20 +27,26 @@ namespace Microsoft.ML.Runtime.Data public sealed class RffTransform : OneToOneTransformBase { + private static class Defaults + { + public const int NewDim = 1000; + public const bool UseSin = false; + } + public sealed class Arguments { [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)] public Column[] Column; [Argument(ArgumentType.AtMostOnce, HelpText = "The number of random Fourier features to create", ShortName = "dim")] - public int NewDim = 1000; + public int NewDim = Defaults.NewDim; [Argument(ArgumentType.Multiple, HelpText = "which kernel to use?", ShortName = "kernel")] public SubComponent MatrixGenerator = new SubComponent(GaussianFourierSampler.LoadName); [Argument(ArgumentType.AtMostOnce, HelpText = "create two features for every random Fourier frequency? (one for cos and one for sin)")] - public bool UseSin = false; + public bool UseSin = Defaults.UseSin; [Argument(ArgumentType.LastOccurenceWins, HelpText = "The seed of the random number generator for generating the new features (if unspecified, " + @@ -232,6 +238,23 @@ private static string TestColumnType(ColumnType type) return "Expected R4 or vector of R4 with known size"; } + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// The number of random Fourier features to create. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + public RffTransform(IHostEnvironment env, + IDataView input, + int newDim, + string name, + string source = null) + : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, NewDim = newDim }, input) + { + } + /// /// Public constructor corresponding to . /// diff --git a/src/Microsoft.ML.Transforms/UngroupTransform.cs b/src/Microsoft.ML.Transforms/UngroupTransform.cs index cb97e1c3b3..49cae8ae6e 100644 --- a/src/Microsoft.ML.Transforms/UngroupTransform.cs +++ b/src/Microsoft.ML.Transforms/UngroupTransform.cs @@ -61,10 +61,24 @@ private static VersionInfo GetVersionInfo() loaderSignature: LoaderSignature); } + /// + /// Controls the number of output rows produced by the transform + /// public enum UngroupMode { + /// + /// The number of output rows is equal to the minimum length of pivot columns + /// Inner, + + /// + /// The number of output rows is equal to the maximum length of pivot columns + /// Outer, + + /// + /// The number of output rows is equal to the length of the first pivot column. + /// First } @@ -79,6 +93,18 @@ public sealed class Arguments : TransformInputBase private readonly SchemaImpl _schemaImpl; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Specifies how to unroll multiple pivot columns of different size. + /// Columns to unroll, or 'pivot' + public UngroupTransform(IHostEnvironment env, IDataView input, UngroupMode mode, params string[] columns) + : this(env, new Arguments() { Column = columns, Mode = mode }, input) + { + } + public UngroupTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, LoaderSignature, input) { diff --git a/src/Microsoft.ML.Transforms/WhiteningTransform.cs b/src/Microsoft.ML.Transforms/WhiteningTransform.cs index 2ae3824ba8..8b36078a04 100644 --- a/src/Microsoft.ML.Transforms/WhiteningTransform.cs +++ b/src/Microsoft.ML.Transforms/WhiteningTransform.cs @@ -46,25 +46,34 @@ public enum WhiteningKind /// public sealed class WhiteningTransform : OneToOneTransformBase { + private static class Defaults + { + public const WhiteningKind Kind = WhiteningKind.Zca; + public const Float Eps = (Float)1e-5; + public const int MaxRows = 100 * 1000; + public const bool SaveInverse = false; + public const int PcaNum = 0; + } + public sealed class Arguments { [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)] public Column[] Column; [Argument(ArgumentType.AtMostOnce, HelpText = "Whitening kind (PCA/ZCA)")] - public WhiteningKind Kind = WhiteningKind.Zca; + public WhiteningKind Kind = Defaults.Kind; [Argument(ArgumentType.AtMostOnce, HelpText = "Scaling regularizer")] - public Float Eps = (Float)1e-5; + public Float Eps = Defaults.Eps; [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of rows", ShortName = "rows")] - public int MaxRows = 100 * 1000; + public int MaxRows = Defaults.MaxRows; [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to save inverse (recovery) matrix", ShortName = "saveInv")] - public bool SaveInverse = false; + public bool SaveInverse = Defaults.SaveInverse; [Argument(ArgumentType.AtMostOnce, HelpText = "PCA components to retain")] - public int PcaNum = 0; + public int PcaNum = Defaults.PcaNum; // REVIEW: add the following options: // 1. Currently there is no way to apply an inverse transform AFTER the the transform is trained. @@ -209,6 +218,23 @@ private static VersionInfo GetVersionInfo() private const string RegistrationName = "Whitening"; + /// + /// Convenience constructor for public facing API. + /// + /// Host Environment. + /// Input . This is the output from previous transform or loader. + /// Name of the output column. + /// Name of the column to be transformed. If this is null '' will be used. + /// Whitening kind (PCA/ZCA). + public WhiteningTransform(IHostEnvironment env, + IDataView input, + string name, + string source = null, + WhiteningKind kind = Defaults.Kind) + : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, Kind = kind }, input) + { + } + /// /// Public constructor corresponding to SignatureDataTransform. ///