diff --git a/src/Microsoft.ML.Transforms/GroupTransform.cs b/src/Microsoft.ML.Transforms/GroupTransform.cs
index 1d5b823278..2170c3b373 100644
--- a/src/Microsoft.ML.Transforms/GroupTransform.cs
+++ b/src/Microsoft.ML.Transforms/GroupTransform.cs
@@ -88,6 +88,18 @@ public sealed class Arguments : TransformInputBase
private readonly GroupSchema _schema;
+ ///
+ /// Convenience constructor for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Columns to group by
+ /// Columns to group together
+ public GroupTransform(IHostEnvironment env, IDataView input, string groupKey, params string[] columns)
+ : this(env, new Arguments() { GroupKey = new[] { groupKey }, Column = columns }, input)
+ {
+ }
+
public GroupTransform(IHostEnvironment env, Arguments args, IDataView input)
: base(env, RegistrationName, input)
{
diff --git a/src/Microsoft.ML.Transforms/HashJoinTransform.cs b/src/Microsoft.ML.Transforms/HashJoinTransform.cs
index 098564bef3..f7c2259a3a 100644
--- a/src/Microsoft.ML.Transforms/HashJoinTransform.cs
+++ b/src/Microsoft.ML.Transforms/HashJoinTransform.cs
@@ -37,6 +37,14 @@ public sealed class HashJoinTransform : OneToOneTransformBase
public const int NumBitsMin = 1;
public const int NumBitsLim = 32;
+ private static class Defaults
+ {
+ public const bool Join = true;
+ public const int HashBits = NumBitsLim - 1;
+ public const uint Seed = 314489979;
+ public const bool Ordered = true;
+ }
+
public sealed class Arguments : TransformInputBase
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)",
@@ -45,17 +53,17 @@ public sealed class Arguments : TransformInputBase
public Column[] Column;
[Argument(ArgumentType.AtMostOnce, HelpText = "Whether the values need to be combined for a single hash")]
- public bool Join = true;
+ public bool Join = Defaults.Join;
[Argument(ArgumentType.AtMostOnce, HelpText = "Number of bits to hash into. Must be between 1 and 31, inclusive.",
ShortName = "bits", SortOrder = 2)]
- public int HashBits = NumBitsLim - 1;
+ public int HashBits = Defaults.HashBits;
[Argument(ArgumentType.AtMostOnce, HelpText = "Hashing seed")]
- public uint Seed = 314489979;
+ public uint Seed = Defaults.Seed;
[Argument(ArgumentType.AtMostOnce, HelpText = "Whether the position of each term should be included in the hash", ShortName = "ord")]
- public bool Ordered = true;
+ public bool Ordered = Defaults.Ordered;
}
public sealed class Column : OneToOneColumn
@@ -166,6 +174,25 @@ private static VersionInfo GetVersionInfo()
private readonly ColumnInfoEx[] _exes;
+ ///
+ /// Convenience constructor for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the output column.
+ /// Name of the column to be transformed. If this is null '' will be used.
+ /// Whether the values need to be combined for a single hash.
+ /// Number of bits to hash into. Must be between 1 and 31, inclusive.
+ public HashJoinTransform(IHostEnvironment env,
+ IDataView input,
+ string name,
+ string source = null,
+ bool join = Defaults.Join,
+ int hashBits = Defaults.HashBits)
+ : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, Join = join, HashBits = hashBits }, input)
+ {
+ }
+
public HashJoinTransform(IHostEnvironment env, Arguments args, IDataView input)
: base(env, RegistrationName, Contracts.CheckRef(args, nameof(args)).Column, input, TestColumnType)
{
diff --git a/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs b/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs
index 5983efb244..0cfaf75500 100644
--- a/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs
+++ b/src/Microsoft.ML.Transforms/KeyToBinaryVectorTransform.cs
@@ -54,6 +54,18 @@ private static VersionInfo GetVersionInfo()
private readonly VectorType[] _types;
+ ///
+ /// Convenience constructor for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the output column.
+ /// Name of the column to be transformed. If this is null '' will be used.
+ public KeyToBinaryVectorTransform(IHostEnvironment env, IDataView input, string name, string source = null)
+ : this(env, new Arguments() { Column = new[] { new KeyToVectorTransform.Column() { Source = source ?? name, Name = name } } }, input)
+ {
+ }
+
///
/// Public constructor corresponding to SignatureDataTransform.
///
diff --git a/src/Microsoft.ML.Transforms/LoadTransform.cs b/src/Microsoft.ML.Transforms/LoadTransform.cs
index 83ff43274d..64a494702d 100644
--- a/src/Microsoft.ML.Transforms/LoadTransform.cs
+++ b/src/Microsoft.ML.Transforms/LoadTransform.cs
@@ -39,6 +39,25 @@ public class Arguments
internal const string Summary = "Loads specified transforms from the model file and applies them to current data.";
+ ///
+ /// A helper method to create for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Model file to load the transforms from.
+ /// The tags (comma-separated) to be loaded (or omitted, if complement is true).
+ /// Whether to load all transforms except those marked by tags.
+ public static IDataTransform Create(IHostEnvironment env, IDataView input, string modelFile, string[] tag, bool complement = false)
+ {
+ var args = new Arguments()
+ {
+ ModelFile = modelFile,
+ Tag = tag,
+ Complement = complement
+ };
+ return Create(env, args, input);
+ }
+
public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs
index d8c20f03ca..39fec2b208 100644
--- a/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs
+++ b/src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs
@@ -33,6 +33,13 @@ public static class MutualInformationFeatureSelectionTransform
public const string UserName = "Mutual Information Feature Selection Transform";
public const string ShortName = "MIFeatureSelection";
+ private static class Defaults
+ {
+ public const string LabelColumn = DefaultColumnNames.Label;
+ public const int SlotsInOutput = 1000;
+ public const int NumBins = 256;
+ }
+
public sealed class Arguments : TransformInputBase
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "Columns to use for feature selection", ShortName = "col",
@@ -41,19 +48,45 @@ public sealed class Arguments : TransformInputBase
[Argument(ArgumentType.LastOccurenceWins, HelpText = "Column to use for labels", ShortName = "lab",
SortOrder = 4, Purpose = SpecialPurpose.ColumnName)]
- public string LabelColumn = DefaultColumnNames.Label;
+ public string LabelColumn = Defaults.LabelColumn;
[Argument(ArgumentType.AtMostOnce, HelpText = "The maximum number of slots to preserve in output", ShortName = "topk,numSlotsToKeep",
SortOrder = 1)]
- public int SlotsInOutput = 1000;
+ public int SlotsInOutput = Defaults.SlotsInOutput;
[Argument(ArgumentType.AtMostOnce, HelpText = "Max number of bins for R4/R8 columns, power of 2 recommended",
ShortName = "bins")]
- public int NumBins = 256;
+ public int NumBins = Defaults.NumBins;
}
internal static string RegistrationName = "MutualInformationFeatureSelectionTransform";
+ ///
+ /// A helper method to create for selecting the top k slots ordered by their mutual information.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Column to use for labels.
+ /// The maximum number of slots to preserve in output.
+ /// Max number of bins for R4/R8 columns, power of 2 recommended.
+ /// Columns to use for feature selection.
+ public static IDataTransform Create(IHostEnvironment env,
+ IDataView input,
+ string labelColumn = Defaults.LabelColumn,
+ int slotsInOutput = Defaults.SlotsInOutput,
+ int numBins = Defaults.NumBins,
+ params string[] columns)
+ {
+ var args = new Arguments()
+ {
+ Column = columns,
+ LabelColumn = labelColumn,
+ SlotsInOutput = slotsInOutput,
+ NumBins = numBins
+ };
+ return Create(env, args, input);
+ }
+
///
/// Create method corresponding to SignatureDataTransform.
///
diff --git a/src/Microsoft.ML.Transforms/NADropTransform.cs b/src/Microsoft.ML.Transforms/NADropTransform.cs
index 347e889a5b..ded5add732 100644
--- a/src/Microsoft.ML.Transforms/NADropTransform.cs
+++ b/src/Microsoft.ML.Transforms/NADropTransform.cs
@@ -69,6 +69,18 @@ private static VersionInfo GetVersionInfo()
// The isNA delegates, parallel to Infos.
private readonly Delegate[] _isNAs;
+ ///
+ /// Convenience constructor for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the output column.
+ /// Name of the column to be transformed. If this is null '' will be used.
+ public NADropTransform(IHostEnvironment env, IDataView input, string name, string source = null)
+ : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } } }, input)
+ {
+ }
+
public NADropTransform(IHostEnvironment env, Arguments args, IDataView input)
: base(Contracts.CheckRef(env, nameof(env)), RegistrationName, env.CheckRef(args, nameof(args)).Column, input, TestType)
{
diff --git a/src/Microsoft.ML.Transforms/NAHandleTransform.cs b/src/Microsoft.ML.Transforms/NAHandleTransform.cs
index 1b82fe3e1e..840a080ae6 100644
--- a/src/Microsoft.ML.Transforms/NAHandleTransform.cs
+++ b/src/Microsoft.ML.Transforms/NAHandleTransform.cs
@@ -36,10 +36,25 @@ public static class NAHandleTransform
{
public enum ReplacementKind
{
+ ///
+ /// Replace with the default value of the column based on it's type. For example, 'zero' for numeric and 'empty' for string/text columns.
+ ///
[EnumValueDisplay("Zero/empty")]
DefaultValue,
+
+ ///
+ /// Replace with the mean value of the column. Supports only numeric/time span/ DateTime columns.
+ ///
Mean,
+
+ ///
+ /// Replace with the minimum value of the column. Supports only numeric/time span/ DateTime columns.
+ ///
Minimum,
+
+ ///
+ /// Replace with the maximum value of the column. Supports only numeric/time span/ DateTime columns.
+ ///
Maximum,
[HideEnumValue]
@@ -105,6 +120,27 @@ public bool TryUnparse(StringBuilder sb)
internal const string FriendlyName = "NA Handle Transform";
internal const string ShortName = "NAHandle";
+ ///
+ /// A helper method to create for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the output column.
+ /// Name of the column to be transformed. If this is null '' will be used.
+ /// The replacement method to utilize.
+ public static IDataTransform Create(IHostEnvironment env, IDataView input, string name, string source = null, ReplacementKind replaceWith = ReplacementKind.DefaultValue)
+ {
+ var args = new Arguments()
+ {
+ Column = new[]
+ {
+ new Column() { Source = source ?? name, Name = name }
+ },
+ ReplaceWith = replaceWith
+ };
+ return Create(env, args, input);
+ }
+
public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
{
Contracts.CheckValue(env, nameof(env));
diff --git a/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs b/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs
index 38ecc2c817..c35a90d748 100644
--- a/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs
+++ b/src/Microsoft.ML.Transforms/NAIndicatorTransform.cs
@@ -85,6 +85,18 @@ private static string TestType(ColumnType type)
// The output column types, parallel to Infos.
private readonly ColumnType[] _types;
+ ///
+ /// Convenience constructor for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the output column.
+ /// Name of the column to be transformed. If this is null '' will be used.
+ public NAIndicatorTransform(IHostEnvironment env, IDataView input, string name, string source = null)
+ : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } } }, input)
+ {
+ }
+
///
/// Public constructor corresponding to SignatureDataTransform.
///
diff --git a/src/Microsoft.ML.Transforms/NAReplaceTransform.cs b/src/Microsoft.ML.Transforms/NAReplaceTransform.cs
index 44832ee517..30384780e9 100644
--- a/src/Microsoft.ML.Transforms/NAReplaceTransform.cs
+++ b/src/Microsoft.ML.Transforms/NAReplaceTransform.cs
@@ -186,6 +186,19 @@ private static string TestType(ColumnType type)
public override bool CanSaveOnnx => true;
+ ///
+ /// Convenience constructor for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the output column.
+ /// Name of the column to be transformed. If this is null '' will be used.
+ /// The replacement method to utilize.
+ public NAReplaceTransform(IHostEnvironment env, IDataView input, string name, string source = null, ReplacementKind replacementKind = ReplacementKind.DefaultValue)
+ : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, ReplacementKind = replacementKind }, input)
+ {
+ }
+
///
/// Public constructor corresponding to SignatureDataTransform.
///
diff --git a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs
index 5d3ab591b2..9e1bad374e 100644
--- a/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs
+++ b/src/Microsoft.ML.Transforms/OptionalColumnTransform.cs
@@ -26,7 +26,14 @@
namespace Microsoft.ML.Runtime.DataPipe
{
- public class OptionalColumnTransform : RowToRowMapperTransformBase
+ ///
+ /// This transform is used to mark some of the columns (e.g. Label) optional during training so that the columns are not required during scoring.
+ /// When applied to new data, if any of the optional columns is not present a dummy columns is created having the same properties (e.g. 'name', 'type' etc.) as used during training.
+ /// The columns are filled with default values. The value is
+ /// - scalar for scalar column
+ /// - totally sparse vector for vector column.
+ ///
+ public sealed class OptionalColumnTransform : RowToRowMapperTransformBase
{
public sealed class Arguments : TransformInputBase
{
@@ -232,6 +239,17 @@ private static VersionInfo GetVersionInfo()
private const string RegistrationName = "OptionalColumn";
+ ///
+ /// Convenience constructor for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Columns to transform.
+ public OptionalColumnTransform(IHostEnvironment env, IDataView input, params string[] columns)
+ : this(env, new Arguments() { Column = columns }, input)
+ {
+ }
+
///
/// Public constructor corresponding to SignatureDataTransform.
///
diff --git a/src/Microsoft.ML.Transforms/RffTransform.cs b/src/Microsoft.ML.Transforms/RffTransform.cs
index b8f49b4dce..85c4203512 100644
--- a/src/Microsoft.ML.Transforms/RffTransform.cs
+++ b/src/Microsoft.ML.Transforms/RffTransform.cs
@@ -27,20 +27,26 @@ namespace Microsoft.ML.Runtime.Data
public sealed class RffTransform : OneToOneTransformBase
{
+ private static class Defaults
+ {
+ public const int NewDim = 1000;
+ public const bool UseSin = false;
+ }
+
public sealed class Arguments
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)]
public Column[] Column;
[Argument(ArgumentType.AtMostOnce, HelpText = "The number of random Fourier features to create", ShortName = "dim")]
- public int NewDim = 1000;
+ public int NewDim = Defaults.NewDim;
[Argument(ArgumentType.Multiple, HelpText = "which kernel to use?", ShortName = "kernel")]
public SubComponent MatrixGenerator =
new SubComponent(GaussianFourierSampler.LoadName);
[Argument(ArgumentType.AtMostOnce, HelpText = "create two features for every random Fourier frequency? (one for cos and one for sin)")]
- public bool UseSin = false;
+ public bool UseSin = Defaults.UseSin;
[Argument(ArgumentType.LastOccurenceWins,
HelpText = "The seed of the random number generator for generating the new features (if unspecified, " +
@@ -232,6 +238,23 @@ private static string TestColumnType(ColumnType type)
return "Expected R4 or vector of R4 with known size";
}
+ ///
+ /// Convenience constructor for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// The number of random Fourier features to create.
+ /// Name of the output column.
+ /// Name of the column to be transformed. If this is null '' will be used.
+ public RffTransform(IHostEnvironment env,
+ IDataView input,
+ int newDim,
+ string name,
+ string source = null)
+ : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, NewDim = newDim }, input)
+ {
+ }
+
///
/// Public constructor corresponding to .
///
diff --git a/src/Microsoft.ML.Transforms/UngroupTransform.cs b/src/Microsoft.ML.Transforms/UngroupTransform.cs
index cb97e1c3b3..49cae8ae6e 100644
--- a/src/Microsoft.ML.Transforms/UngroupTransform.cs
+++ b/src/Microsoft.ML.Transforms/UngroupTransform.cs
@@ -61,10 +61,24 @@ private static VersionInfo GetVersionInfo()
loaderSignature: LoaderSignature);
}
+ ///
+ /// Controls the number of output rows produced by the transform
+ ///
public enum UngroupMode
{
+ ///
+ /// The number of output rows is equal to the minimum length of pivot columns
+ ///
Inner,
+
+ ///
+ /// The number of output rows is equal to the maximum length of pivot columns
+ ///
Outer,
+
+ ///
+ /// The number of output rows is equal to the length of the first pivot column.
+ ///
First
}
@@ -79,6 +93,18 @@ public sealed class Arguments : TransformInputBase
private readonly SchemaImpl _schemaImpl;
+ ///
+ /// Convenience constructor for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Specifies how to unroll multiple pivot columns of different size.
+ /// Columns to unroll, or 'pivot'
+ public UngroupTransform(IHostEnvironment env, IDataView input, UngroupMode mode, params string[] columns)
+ : this(env, new Arguments() { Column = columns, Mode = mode }, input)
+ {
+ }
+
public UngroupTransform(IHostEnvironment env, Arguments args, IDataView input)
: base(env, LoaderSignature, input)
{
diff --git a/src/Microsoft.ML.Transforms/WhiteningTransform.cs b/src/Microsoft.ML.Transforms/WhiteningTransform.cs
index 2ae3824ba8..8b36078a04 100644
--- a/src/Microsoft.ML.Transforms/WhiteningTransform.cs
+++ b/src/Microsoft.ML.Transforms/WhiteningTransform.cs
@@ -46,25 +46,34 @@ public enum WhiteningKind
///
public sealed class WhiteningTransform : OneToOneTransformBase
{
+ private static class Defaults
+ {
+ public const WhiteningKind Kind = WhiteningKind.Zca;
+ public const Float Eps = (Float)1e-5;
+ public const int MaxRows = 100 * 1000;
+ public const bool SaveInverse = false;
+ public const int PcaNum = 0;
+ }
+
public sealed class Arguments
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)]
public Column[] Column;
[Argument(ArgumentType.AtMostOnce, HelpText = "Whitening kind (PCA/ZCA)")]
- public WhiteningKind Kind = WhiteningKind.Zca;
+ public WhiteningKind Kind = Defaults.Kind;
[Argument(ArgumentType.AtMostOnce, HelpText = "Scaling regularizer")]
- public Float Eps = (Float)1e-5;
+ public Float Eps = Defaults.Eps;
[Argument(ArgumentType.AtMostOnce, HelpText = "Max number of rows", ShortName = "rows")]
- public int MaxRows = 100 * 1000;
+ public int MaxRows = Defaults.MaxRows;
[Argument(ArgumentType.AtMostOnce, HelpText = "Whether to save inverse (recovery) matrix", ShortName = "saveInv")]
- public bool SaveInverse = false;
+ public bool SaveInverse = Defaults.SaveInverse;
[Argument(ArgumentType.AtMostOnce, HelpText = "PCA components to retain")]
- public int PcaNum = 0;
+ public int PcaNum = Defaults.PcaNum;
// REVIEW: add the following options:
// 1. Currently there is no way to apply an inverse transform AFTER the the transform is trained.
@@ -209,6 +218,23 @@ private static VersionInfo GetVersionInfo()
private const string RegistrationName = "Whitening";
+ ///
+ /// Convenience constructor for public facing API.
+ ///
+ /// Host Environment.
+ /// Input . This is the output from previous transform or loader.
+ /// Name of the output column.
+ /// Name of the column to be transformed. If this is null '' will be used.
+ /// Whitening kind (PCA/ZCA).
+ public WhiteningTransform(IHostEnvironment env,
+ IDataView input,
+ string name,
+ string source = null,
+ WhiteningKind kind = Defaults.Kind)
+ : this(env, new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, Kind = kind }, input)
+ {
+ }
+
///
/// Public constructor corresponding to SignatureDataTransform.
///