diff --git a/ZBaselines/Common/EntryPoints/core_ep-list.tsv b/ZBaselines/Common/EntryPoints/core_ep-list.tsv index 568a6066f9..7d6d02b672 100644 --- a/ZBaselines/Common/EntryPoints/core_ep-list.tsv +++ b/ZBaselines/Common/EntryPoints/core_ep-list.tsv @@ -15,11 +15,13 @@ Models.NaiveCalibrator Apply a Naive calibrator to an input model Microsoft.ML.R Models.OneVersusAll One-vs-All macro (OVA) Microsoft.ML.Runtime.EntryPoints.OneVersusAllMacro OVA Microsoft.ML.Runtime.EntryPoints.OneVersusAllMacro+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MacroOutput`1[Microsoft.ML.Runtime.EntryPoints.OneVersusAllMacro+Output] Models.OvaModelCombiner Combines a sequence of PredictorModels into a single model Microsoft.ML.Runtime.Learners.OvaPredictor CombineOvaModels Microsoft.ML.Runtime.EntryPoints.ModelOperations+CombineOvaPredictorModelsInput Microsoft.ML.Runtime.EntryPoints.ModelOperations+PredictorModelOutput Models.PAVCalibrator Apply a PAV calibrator to an input model Microsoft.ML.Runtime.Internal.Calibration.Calibrate Pav Microsoft.ML.Runtime.Internal.Calibration.Calibrate+NoArgumentsInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+CalibratorOutput +Models.PipelineSweeper AutoML pipeline sweeping optimzation macro. Microsoft.ML.Runtime.EntryPoints.PipelineSweeperMacro PipelineSweep Microsoft.ML.Runtime.EntryPoints.PipelineSweeperMacro+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MacroOutput`1[Microsoft.ML.Runtime.EntryPoints.PipelineSweeperMacro+Output] Models.PlattCalibrator Apply a Platt calibrator to an input model Microsoft.ML.Runtime.Internal.Calibration.Calibrate Platt Microsoft.ML.Runtime.Internal.Calibration.Calibrate+NoArgumentsInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+CalibratorOutput Models.QuantileRegressionEvaluator Evaluates a quantile regression scored dataset. Microsoft.ML.Runtime.Data.Evaluate QuantileRegression Microsoft.ML.Runtime.Data.QuantileRegressionMamlEvaluator+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+CommonEvaluateOutput Models.RankerEvaluator Evaluates a ranking scored dataset. Microsoft.ML.Runtime.Data.Evaluate Ranking Microsoft.ML.Runtime.Data.RankerMamlEvaluator+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+CommonEvaluateOutput Models.RegressionEvaluator Evaluates a regression scored dataset. Microsoft.ML.Runtime.Data.Evaluate Regression Microsoft.ML.Runtime.Data.RegressionMamlEvaluator+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+CommonEvaluateOutput Models.Summarizer Summarize a linear regression predictor. Microsoft.ML.Runtime.EntryPoints.SummarizePredictor Summarize Microsoft.ML.Runtime.EntryPoints.SummarizePredictor+Input Microsoft.ML.Runtime.EntryPoints.CommonOutputs+SummaryOutput +Models.SweepResultExtractor Extracts the sweep result. Microsoft.ML.Runtime.EntryPoints.PipelineSweeperMacro ExtractSweepResult Microsoft.ML.Runtime.EntryPoints.PipelineSweeperMacro+ResultInput Microsoft.ML.Runtime.EntryPoints.PipelineSweeperMacro+Output Models.TrainTestBinaryEvaluator Train test for binary classification Microsoft.ML.Runtime.EntryPoints.TrainTestBinaryMacro TrainTestBinary Microsoft.ML.Runtime.EntryPoints.TrainTestBinaryMacro+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MacroOutput`1[Microsoft.ML.Runtime.EntryPoints.TrainTestBinaryMacro+Output] Models.TrainTestEvaluator General train test for any supported evaluator Microsoft.ML.Runtime.EntryPoints.TrainTestMacro TrainTest Microsoft.ML.Runtime.EntryPoints.TrainTestMacro+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MacroOutput`1[Microsoft.ML.Runtime.EntryPoints.TrainTestMacro+Output] Trainers.AveragedPerceptronBinaryClassifier Train a Average perceptron. Microsoft.ML.Runtime.Learners.AveragedPerceptronTrainer TrainBinary Microsoft.ML.Runtime.Learners.AveragedPerceptronTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+BinaryClassificationOutput diff --git a/ZBaselines/Common/EntryPoints/core_manifest.json b/ZBaselines/Common/EntryPoints/core_manifest.json index a3778a7f7f..32da6f246f 100644 --- a/ZBaselines/Common/EntryPoints/core_manifest.json +++ b/ZBaselines/Common/EntryPoints/core_manifest.json @@ -1723,6 +1723,105 @@ "ITrainerOutput" ] }, + { + "Name": "Models.PipelineSweeper", + "Desc": "AutoML pipeline sweeping optimzation macro.", + "FriendlyName": null, + "ShortName": null, + "Inputs": [ + { + "Name": "TrainingData", + "Type": "DataView", + "Desc": "The data to be used for training.", + "Aliases": [ + "train" + ], + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + }, + { + "Name": "TestingData", + "Type": "DataView", + "Desc": "The data to be used for testing.", + "Aliases": [ + "test" + ], + "Required": true, + "SortOrder": 2.0, + "IsNullable": false + }, + { + "Name": "StateArguments", + "Type": { + "Kind": "Component", + "ComponentKind": "AutoMlStateBase" + }, + "Desc": "The arguments for creating an AutoMlState component.", + "Aliases": [ + "args" + ], + "Required": false, + "SortOrder": 3.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "State", + "Type": { + "Kind": "C# Object", + "ItemType": "Microsoft.ML.Runtime.EntryPoints.IMlState" + }, + "Desc": "The stateful object conducting of the autoML search.", + "Aliases": [ + "state" + ], + "Required": false, + "SortOrder": 3.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "BatchSize", + "Type": "Int", + "Desc": "Number of candidate pipelines to retrieve each round.", + "Aliases": [ + "bsize" + ], + "Required": true, + "SortOrder": 4.0, + "IsNullable": false, + "Default": 0 + }, + { + "Name": "CandidateOutputs", + "Type": { + "Kind": "Array", + "ItemType": "DataView" + }, + "Desc": "Output datasets from previous iteration of sweep.", + "Required": false, + "SortOrder": 7.0, + "IsNullable": false, + "Default": null + } + ], + "Outputs": [ + { + "Name": "State", + "Type": { + "Kind": "C# Object", + "ItemType": "Microsoft.ML.Runtime.EntryPoints.IMlState" + }, + "Desc": "Stateful autoML object, keeps track of where the search in progress." + }, + { + "Name": "Results", + "Type": "DataView", + "Desc": "Results of the sweep, including pipelines (as graph strings), IDs, and metric values." + } + ] + }, { "Name": "Models.PlattCalibrator", "Desc": "Apply a Platt calibrator to an input model", @@ -2209,6 +2308,44 @@ } ] }, + { + "Name": "Models.SweepResultExtractor", + "Desc": "Extracts the sweep result.", + "FriendlyName": null, + "ShortName": null, + "Inputs": [ + { + "Name": "State", + "Type": { + "Kind": "C# Object", + "ItemType": "Microsoft.ML.Runtime.EntryPoints.IMlState" + }, + "Desc": "The stateful object conducting of the autoML search.", + "Aliases": [ + "state" + ], + "Required": false, + "SortOrder": 1.0, + "IsNullable": false, + "Default": null + } + ], + "Outputs": [ + { + "Name": "State", + "Type": { + "Kind": "C# Object", + "ItemType": "Microsoft.ML.Runtime.EntryPoints.IMlState" + }, + "Desc": "Stateful autoML object, keeps track of where the search in progress." + }, + { + "Name": "Results", + "Type": "DataView", + "Desc": "Results of the sweep, including pipelines (as graph strings), IDs, and metric values." + } + ] + }, { "Name": "Models.TrainTestBinaryEvaluator", "Desc": "Train test for binary classification", @@ -17200,6 +17337,203 @@ } ], "Components": [ + { + "Kind": "AutoMlEngine", + "Components": [ + { + "Name": "Defaults", + "Desc": "AutoML engine that returns learners with default settings.", + "FriendlyName": "Defaults Engine", + "Settings": [] + }, + { + "Name": "Rocket", + "Desc": "AutoML engine that consists of distinct, hierarchical stages of operation.", + "FriendlyName": "Rocket Engine", + "Settings": [ + { + "Name": "TopKLearners", + "Type": "Int", + "Desc": "Number of learners to retain for second stage.", + "Aliases": [ + "topk" + ], + "Required": false, + "SortOrder": 1.0, + "IsNullable": false, + "Default": 2 + }, + { + "Name": "SecondRoundTrialsPerLearner", + "Type": "Int", + "Desc": "Number of trials for retained second stage learners.", + "Aliases": [ + "stage2num" + ], + "Required": false, + "SortOrder": 2.0, + "IsNullable": false, + "Default": 5 + }, + { + "Name": "RandomInitialization", + "Type": "Bool", + "Desc": "Use random initialization only.", + "Aliases": [ + "randinit" + ], + "Required": false, + "SortOrder": 3.0, + "IsNullable": false, + "Default": false + }, + { + "Name": "NumInitializationPipelines", + "Type": "Int", + "Desc": "Number of initilization pipelines, used for random initialization only.", + "Aliases": [ + "numinitseeds" + ], + "Required": false, + "SortOrder": 4.0, + "IsNullable": false, + "Default": 20 + } + ] + }, + { + "Name": "UniformRandom", + "Desc": "AutoML engine using uniform random sampling.", + "FriendlyName": "Uniform Random Engine", + "Settings": [] + } + ] + }, + { + "Kind": "AutoMlStateBase", + "Components": [ + { + "Name": "AutoMlState", + "Desc": "State of an AutoML search and search space.", + "FriendlyName": "AutoML State", + "Aliases": [ + "automlst" + ], + "Settings": [ + { + "Name": "Metric", + "Type": { + "Kind": "Enum", + "Values": [ + "Auc", + "AccuracyMicro", + "AccuracyMacro", + "L2", + "F1", + "AuPrc", + "TopKAccuracy", + "Rms", + "LossFn", + "RSquared", + "LogLoss", + "LogLossReduction", + "Ndcg", + "Dcg", + "PositivePrecision", + "PositiveRecall", + "NegativePrecision", + "NegativeRecall", + "DrAtK", + "DrAtPFpr", + "DrAtNumPos", + "NumAnomalies", + "ThreshAtK", + "ThreshAtP", + "ThreshAtNumPos", + "Nmi", + "AvgMinScore", + "Dbi" + ] + }, + "Desc": "Supported metric for evaluator.", + "Aliases": [ + "metric" + ], + "Required": true, + "SortOrder": 150.0, + "IsNullable": false, + "Default": "Auc" + }, + { + "Name": "Engine", + "Type": { + "Kind": "Component", + "ComponentKind": "AutoMlEngine" + }, + "Desc": "AutoML engine (pipeline optimizer) that generates next candidates.", + "Aliases": [ + "engine" + ], + "Required": true, + "SortOrder": 150.0, + "IsNullable": false + }, + { + "Name": "TrainerKind", + "Type": { + "Kind": "Enum", + "Values": [ + "SignatureBinaryClassifierTrainer", + "SignatureMultiClassClassifierTrainer", + "SignatureRankerTrainer", + "SignatureRegressorTrainer", + "SignatureMultiOutputRegressorTrainer", + "SignatureAnomalyDetectorTrainer", + "SignatureClusteringTrainer" + ] + }, + "Desc": "Kind of trainer for task, such as binary classification trainer, multiclass trainer, etc.", + "Aliases": [ + "tk" + ], + "Required": true, + "SortOrder": 150.0, + "IsNullable": false, + "Default": "SignatureBinaryClassifierTrainer" + }, + { + "Name": "TerminatorArgs", + "Type": { + "Kind": "Component", + "ComponentKind": "SearchTerminator" + }, + "Desc": "Arguments for creating terminator, which determines when to stop search.", + "Aliases": [ + "term" + ], + "Required": true, + "SortOrder": 150.0, + "IsNullable": false + }, + { + "Name": "RequestedLearners", + "Type": { + "Kind": "Array", + "ItemType": "String" + }, + "Desc": "Learner set to sweep over (if available).", + "Aliases": [ + "learners" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": null + } + ] + } + ] + }, { "Kind": "CalibratorTrainer", "Components": [ @@ -21441,6 +21775,30 @@ } ] }, + { + "Kind": "SearchTerminator", + "Components": [ + { + "Name": "IterationLimited", + "Desc": "Terminators a sweep based on total number of iterations.", + "FriendlyName": "Pipeline Sweep Iteration Terminator", + "Settings": [ + { + "Name": "FinalHistoryLength", + "Type": "Int", + "Desc": "Total number of iterations.", + "Aliases": [ + "length" + ], + "Required": true, + "SortOrder": 150.0, + "IsNullable": false, + "Default": 0 + } + ] + } + ] + }, { "Kind": "StopWordsRemover", "Components": [ diff --git a/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs b/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs index a4cff4df3b..58f44b9ce8 100644 --- a/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs +++ b/src/Microsoft.ML.PipelineInference/Macros/PipelineSweeperMacro.cs @@ -154,6 +154,12 @@ public static CommonOutputs.MacroOutput PipelineSweep( if (candidatePipelines == null || candidatePipelines.Length == 0) { // Add a node to extract the sweep result. + var resultSubgraph = new Experiment(env); + var resultNode = new Microsoft.ML.Models.SweepResultExtractor() { State = amlsVarObj }; + var resultOutput = new Models.SweepResultExtractor.Output() { State = outStateVar, Results = outDvVar }; + resultSubgraph.Add(resultNode, resultOutput); + var resultSubgraphNodes = EntryPointNode.ValidateNodes(env, node.Context, resultSubgraph.GetNodes(), node.Catalog); + expNodes.AddRange(resultSubgraphNodes); return new CommonOutputs.MacroOutput() { Nodes = expNodes }; } @@ -176,6 +182,22 @@ public static CommonOutputs.MacroOutput PipelineSweep( pipelineIndicators.Add(trainTestOutput.OverallMetrics); } + // Add recursive macro node + var macroSubgraph = new Experiment(env); + var macroNode = new Models.PipelineSweeper() + { + BatchSize = input.BatchSize, + CandidateOutputs = new ArrayVar(pipelineIndicators.ToArray()), + TrainingData = training, + TestingData = testing, + State = amlsVarObj + }; + var output = new Models.PipelineSweeper.Output() { Results = outDvVar, State = outStateVar }; + macroSubgraph.Add(macroNode, output); + + var subgraphNodes = EntryPointNode.ValidateNodes(env, node.Context, macroSubgraph.GetNodes(), node.Catalog); + expNodes.AddRange(subgraphNodes); + return new CommonOutputs.MacroOutput() { Nodes = expNodes }; } } diff --git a/src/Microsoft.ML.PipelineInference/RecipeInference.cs b/src/Microsoft.ML.PipelineInference/RecipeInference.cs index 1d5c15614c..8c08e650ec 100644 --- a/src/Microsoft.ML.PipelineInference/RecipeInference.cs +++ b/src/Microsoft.ML.PipelineInference/RecipeInference.cs @@ -507,7 +507,7 @@ public static SuggestedRecipe.SuggestedLearner[] AllowedLearners(IHostEnvironmen var type = typeof(CommonInputs.ITrainerInput); var trainerTypes = typeof(Experiment).Assembly.GetTypes() .Where(p => type.IsAssignableFrom(p) && - p.Name.Equals(MacroUtils.GetTrainerName(trainerKind))); + MacroUtils.IsTrainerOfKind(p, trainerKind)); foreach (var tt in trainerTypes) { @@ -516,7 +516,7 @@ public static SuggestedRecipe.SuggestedLearner[] AllowedLearners(IHostEnvironmen var sl = new SuggestedRecipe.SuggestedLearner { PipelineNode = new TrainerPipelineNode(epInputObj, sweepParams), - LearnerName = tt.Namespace + LearnerName = tt.Name }; if (sl.PipelineNode != null && availableLearnersList.FirstOrDefault(l=> l.Name.Equals(sl.PipelineNode.GetEpName())) != null) diff --git a/src/Microsoft.ML/CSharpApi.cs b/src/Microsoft.ML/CSharpApi.cs index ecea73a495..99ca759385 100644 --- a/src/Microsoft.ML/CSharpApi.cs +++ b/src/Microsoft.ML/CSharpApi.cs @@ -226,6 +226,18 @@ public void Add(Microsoft.ML.Models.PAVCalibrator input, Microsoft.ML.Models.PAV _jsonNodes.Add(Serialize("Models.PAVCalibrator", input, output)); } + public Microsoft.ML.Models.PipelineSweeper.Output Add(Microsoft.ML.Models.PipelineSweeper input) + { + var output = new Microsoft.ML.Models.PipelineSweeper.Output(); + Add(input, output); + return output; + } + + public void Add(Microsoft.ML.Models.PipelineSweeper input, Microsoft.ML.Models.PipelineSweeper.Output output) + { + _jsonNodes.Add(Serialize("Models.PipelineSweeper", input, output)); + } + public Microsoft.ML.Models.PlattCalibrator.Output Add(Microsoft.ML.Models.PlattCalibrator input) { var output = new Microsoft.ML.Models.PlattCalibrator.Output(); @@ -286,6 +298,18 @@ public void Add(Microsoft.ML.Models.Summarizer input, Microsoft.ML.Models.Summar _jsonNodes.Add(Serialize("Models.Summarizer", input, output)); } + public Microsoft.ML.Models.SweepResultExtractor.Output Add(Microsoft.ML.Models.SweepResultExtractor input) + { + var output = new Microsoft.ML.Models.SweepResultExtractor.Output(); + Add(input, output); + return output; + } + + public void Add(Microsoft.ML.Models.SweepResultExtractor input, Microsoft.ML.Models.SweepResultExtractor.Output output) + { + _jsonNodes.Add(Serialize("Models.SweepResultExtractor", input, output)); + } + public Microsoft.ML.Models.TrainTestBinaryEvaluator.Output Add(Microsoft.ML.Models.TrainTestBinaryEvaluator input) { var output = new Microsoft.ML.Models.TrainTestBinaryEvaluator.Output(); @@ -2423,6 +2447,64 @@ public PAVCalibratorPipelineStep(Output output) } } + namespace Models + { + + /// + /// AutoML pipeline sweeping optimzation macro. + /// + public sealed partial class PipelineSweeper + { + + + /// + /// The data to be used for training. + /// + public Var TrainingData { get; set; } = new Var(); + + /// + /// The data to be used for testing. + /// + public Var TestingData { get; set; } = new Var(); + + /// + /// The arguments for creating an AutoMlState component. + /// + [JsonConverter(typeof(ComponentSerializer))] + public AutoMlStateBase StateArguments { get; set; } + + /// + /// The stateful object conducting of the autoML search. + /// + public Var State { get; set; } = new Var(); + + /// + /// Number of candidate pipelines to retrieve each round. + /// + public int BatchSize { get; set; } + + /// + /// Output datasets from previous iteration of sweep. + /// + public ArrayVar CandidateOutputs { get; set; } = new ArrayVar(); + + + public sealed class Output + { + /// + /// Stateful autoML object, keeps track of where the search in progress. + /// + public Var State { get; set; } = new Var(); + + /// + /// Results of the sweep, including pipelines (as graph strings), IDs, and metric values. + /// + public Var Results { get; set; } = new Var(); + + } + } + } + namespace Models { @@ -2732,6 +2814,38 @@ public sealed class Output } } + namespace Models + { + + /// + /// Extracts the sweep result. + /// + public sealed partial class SweepResultExtractor + { + + + /// + /// The stateful object conducting of the autoML search. + /// + public Var State { get; set; } = new Var(); + + + public sealed class Output + { + /// + /// Stateful autoML object, keeps track of where the search in progress. + /// + public Var State { get; set; } = new Var(); + + /// + /// Results of the sweep, including pipelines (as graph strings), IDs, and metric values. + /// + public Var Results { get; set; } = new Var(); + + } + } + } + namespace Models { @@ -12298,6 +12412,129 @@ public WordTokenizerPipelineStep(Output output) namespace Runtime { + public abstract class AutoMlEngine : ComponentKind {} + + + + /// + /// AutoML engine that returns learners with default settings. + /// + public sealed class DefaultsAutoMlEngine : AutoMlEngine + { + internal override string ComponentName => "Defaults"; + } + + + + /// + /// AutoML engine that consists of distinct, hierarchical stages of operation. + /// + public sealed class RocketAutoMlEngine : AutoMlEngine + { + /// + /// Number of learners to retain for second stage. + /// + public int TopKLearners { get; set; } = 2; + + /// + /// Number of trials for retained second stage learners. + /// + public int SecondRoundTrialsPerLearner { get; set; } = 5; + + /// + /// Use random initialization only. + /// + public bool RandomInitialization { get; set; } = false; + + /// + /// Number of initilization pipelines, used for random initialization only. + /// + public int NumInitializationPipelines { get; set; } = 20; + + internal override string ComponentName => "Rocket"; + } + + + + /// + /// AutoML engine using uniform random sampling. + /// + public sealed class UniformRandomAutoMlEngine : AutoMlEngine + { + internal override string ComponentName => "UniformRandom"; + } + + public abstract class AutoMlStateBase : ComponentKind {} + + public enum AutoInferenceAutoMlMlStateArgumentsMetrics + { + Auc = 0, + AccuracyMicro = 1, + AccuracyMacro = 2, + L2 = 3, + F1 = 4, + AuPrc = 5, + TopKAccuracy = 6, + Rms = 7, + LossFn = 8, + RSquared = 9, + LogLoss = 10, + LogLossReduction = 11, + Ndcg = 12, + Dcg = 13, + PositivePrecision = 14, + PositiveRecall = 15, + NegativePrecision = 16, + NegativeRecall = 17, + DrAtK = 18, + DrAtPFpr = 19, + DrAtNumPos = 20, + NumAnomalies = 21, + ThreshAtK = 22, + ThreshAtP = 23, + ThreshAtNumPos = 24, + Nmi = 25, + AvgMinScore = 26, + Dbi = 27 + } + + + + /// + /// State of an AutoML search and search space. + /// + public sealed class AutoMlStateAutoMlStateBase : AutoMlStateBase + { + /// + /// Supported metric for evaluator. + /// + public Microsoft.ML.Runtime.AutoInferenceAutoMlMlStateArgumentsMetrics Metric { get; set; } = Microsoft.ML.Runtime.AutoInferenceAutoMlMlStateArgumentsMetrics.Auc; + + /// + /// AutoML engine (pipeline optimizer) that generates next candidates. + /// + [JsonConverter(typeof(ComponentSerializer))] + public AutoMlEngine Engine { get; set; } + + /// + /// Kind of trainer for task, such as binary classification trainer, multiclass trainer, etc. + /// + public Microsoft.ML.Models.MacroUtilsTrainerKinds TrainerKind { get; set; } = Microsoft.ML.Models.MacroUtilsTrainerKinds.SignatureBinaryClassifierTrainer; + + /// + /// Arguments for creating terminator, which determines when to stop search. + /// + [JsonConverter(typeof(ComponentSerializer))] + public SearchTerminator TerminatorArgs { get; set; } + + /// + /// Learner set to sweep over (if available). + /// + public string[] RequestedLearners { get; set; } + + internal override string ComponentName => "AutoMlState"; + } + public abstract class CalibratorTrainer : ComponentKind {} @@ -14130,6 +14367,23 @@ public sealed class SquaredLossSDCARegressionLossFunction : SDCARegressionLossFu internal override string ComponentName => "SquaredLoss"; } + public abstract class SearchTerminator : ComponentKind {} + + + + /// + /// Terminators a sweep based on total number of iterations. + /// + public sealed class IterationLimitedSearchTerminator : SearchTerminator + { + /// + /// Total number of iterations. + /// + public int FinalHistoryLength { get; set; } + + internal override string ComponentName => "IterationLimited"; + } + public abstract class StopWordsRemover : ComponentKind {} diff --git a/src/Microsoft.ML/Runtime/EntryPoints/MacroUtils.cs b/src/Microsoft.ML/Runtime/EntryPoints/MacroUtils.cs index f1809e4b0e..8c5613014e 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/MacroUtils.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/MacroUtils.cs @@ -55,7 +55,7 @@ private static Dictionary { TrainerKinds.SignatureBinaryClassifierTrainer, new TaskInformationBundle { - TrainerFunctionName = "TrainBinary", + TrainerFunctionName = "BinaryClassifier", TrainerSignatureType = typeof(SignatureBinaryClassifierTrainer), EvaluatorInput = settings => new Models.BinaryClassificationEvaluator { @@ -71,7 +71,7 @@ private static Dictionary { TrainerKinds.SignatureMultiClassClassifierTrainer, new TaskInformationBundle{ - TrainerFunctionName = "TrainMultiClass", + TrainerFunctionName = "Classifier", TrainerSignatureType = typeof(SignatureMultiClassClassifierTrainer), EvaluatorInput = settings => new Models.ClassificationEvaluator { @@ -87,7 +87,7 @@ private static Dictionary { TrainerKinds.SignatureRankerTrainer, new TaskInformationBundle { - TrainerFunctionName = "TrainRanking", + TrainerFunctionName = "Ranker", TrainerSignatureType = typeof(SignatureRankerTrainer), EvaluatorInput = settings => new Models.RankerEvaluator { @@ -103,7 +103,7 @@ private static Dictionary { TrainerKinds.SignatureRegressorTrainer, new TaskInformationBundle{ - TrainerFunctionName = "TrainRegression", + TrainerFunctionName = "Regressor", TrainerSignatureType = typeof(SignatureRegressorTrainer), EvaluatorInput = settings => new Models.RegressionEvaluator { @@ -119,7 +119,7 @@ private static Dictionary { TrainerKinds.SignatureMultiOutputRegressorTrainer, new TaskInformationBundle { - TrainerFunctionName = "TrainMultiRegression", + TrainerFunctionName = "MultiOutputRegressor", TrainerSignatureType = typeof(SignatureMultiOutputRegressorTrainer), EvaluatorInput = settings => new Models.MultiOutputRegressionEvaluator { @@ -135,7 +135,7 @@ private static Dictionary { TrainerKinds.SignatureAnomalyDetectorTrainer, new TaskInformationBundle { - TrainerFunctionName = "TrainAnomalyDetection", + TrainerFunctionName = "AnomalyDetector", TrainerSignatureType = typeof(SignatureAnomalyDetectorTrainer), EvaluatorInput = settings => new Models.AnomalyDetectionEvaluator { @@ -151,7 +151,7 @@ private static Dictionary { TrainerKinds.SignatureClusteringTrainer, new TaskInformationBundle { - TrainerFunctionName = "TrainClustering", + TrainerFunctionName = "Clusterer", TrainerSignatureType = typeof(SignatureClusteringTrainer), EvaluatorInput = settings => new Models.ClusterEvaluator { @@ -186,7 +186,7 @@ public static TrainerKinds SignatureTypeToTrainerKind(Type sigType) public static TrainerKinds[] SignatureTypesToTrainerKinds(IEnumerable sigTypes) => sigTypes.Select(SignatureTypeToTrainerKind).ToArray(); - public static string GetTrainerName(TrainerKinds kind) => TrainerKindDict[kind].TrainerFunctionName; + private static string GetTrainerName(TrainerKinds kind) => TrainerKindDict[kind].TrainerFunctionName; public static T TrainerKindApiValue(TrainerKinds trainerKind) { @@ -194,5 +194,22 @@ public static T TrainerKindApiValue(TrainerKinds trainerKind) return (T)Enum.Parse(typeof(T), name); throw new Exception($"Could not interpret enum value: {trainerKind}"); } + + public static bool IsTrainerOfKind(Type type, TrainerKinds trainerKind) + { + if (type == typeof(Trainers.BinaryLogisticRegressor)) + return trainerKind == TrainerKinds.SignatureBinaryClassifierTrainer; + + if (type == typeof(Trainers.LogisticRegressor)) + return trainerKind == TrainerKinds.SignatureMultiClassClassifierTrainer; + + if (trainerKind != TrainerKinds.SignatureMultiClassClassifierTrainer && trainerKind != TrainerKinds.SignatureMultiOutputRegressorTrainer) + return type.Name.EndsWith(GetTrainerName(trainerKind)); + + if (trainerKind == TrainerKinds.SignatureMultiClassClassifierTrainer) + return type.Name.EndsWith(GetTrainerName(trainerKind)) && !type.Name.EndsWith(GetTrainerName(TrainerKinds.SignatureBinaryClassifierTrainer)); + + return type.Name.EndsWith(GetTrainerName(trainerKind)) && !type.Name.EndsWith(GetTrainerName(TrainerKinds.SignatureRegressorTrainer)); + } } } diff --git a/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj b/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj index 028e5bebd9..da16090d43 100644 --- a/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj +++ b/test/Microsoft.ML.Core.Tests/Microsoft.ML.Core.Tests.csproj @@ -9,6 +9,7 @@ + @@ -18,5 +19,5 @@ - + \ No newline at end of file diff --git a/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs b/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs index a169816a79..5166540ccd 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestAutoInference.cs @@ -134,78 +134,75 @@ public void EntryPointPipelineSweepSerialization() Assert.True(rows.Length == numIterations); } - [Fact(Skip = "Need CoreTLC specific baseline update")] + [Fact] public void EntryPointPipelineSweep() { - //// Get datasets - //var pathData = GetDataPath(@"../UCI/adult.train"); - //var pathDataTest = GetDataPath(@"../UCI/adult.test"); - //const int numOfSampleRows = 1000; - //int numIterations = 10; - //const string schema = - //"sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " + - //"col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+"; - //var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); - //var datasetTrain = ImportTextData.ImportText(Env, - //new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); - //var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); - //var datasetTest = ImportTextData.ImportText(Env, - //new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); + // Get datasets + var pathData = GetDataPath(@"adult.tiny.with-schema.txt"); + var pathDataTest = GetDataPath(@"adult.tiny.with-schema.txt"); + const int numOfSampleRows = 1000; + int numIterations = 4; + var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); + var datasetTrain = ImportTextData.ImportText(Env, + new ImportTextData.Input { InputFile = inputFileTrain }).Data.Take(numOfSampleRows); + var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); + var datasetTest = ImportTextData.ImportText(Env, + new ImportTextData.Input { InputFile = inputFileTest }).Data.Take(numOfSampleRows); - //// Define entrypoint graph - //string inputGraph = @" - //{ - //'Nodes': [ - //{ - //'Name': 'Commands.PipelineSweep', - //'Inputs': { - //'TrainingData': '$TrainingData', - //'TestingData': '$TestingData', - //'StateArguments': { - //'Name': 'AutoMlState', - //'Settings': { - //'Metric': 'Auc', - //'Engine': { - //'Name': 'UniformRandom' - //}, - //'TerminatorArgs': { - //'Name': 'IterationLimited', - //'Settings': { - //'FinalHistoryLength': 10 - //} - //}, - //'TrainerKind': 'SignatureBinaryClassifierTrainer' - //} - //}, - //'BatchSize': 5 - //}, - //'Outputs': { - //'State': '$StateOut', - //'Results': '$ResultsOut' - //} - //}, - //] - //}"; + // Define entrypoint graph + string inputGraph = @" + { + 'Nodes': [ + { + 'Name': 'Models.PipelineSweeper', + 'Inputs': { + 'TrainingData': '$TrainingData', + 'TestingData': '$TestingData', + 'StateArguments': { + 'Name': 'AutoMlState', + 'Settings': { + 'Metric': 'Auc', + 'Engine': { + 'Name': 'UniformRandom' + }, + 'TerminatorArgs': { + 'Name': 'IterationLimited', + 'Settings': { + 'FinalHistoryLength': 4 + } + }, + 'TrainerKind': 'SignatureBinaryClassifierTrainer' + } + }, + 'BatchSize': 2 + }, + 'Outputs': { + 'State': '$StateOut', + 'Results': '$ResultsOut' + } + }, + ] + }"; - //JObject graph = JObject.Parse(inputGraph); - //var catalog = ModuleCatalog.CreateInstance(Env); + JObject graph = JObject.Parse(inputGraph); + var catalog = ModuleCatalog.CreateInstance(Env); - //var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray); - //runner.SetInput("TrainingData", datasetTrain); - //runner.SetInput("TestingData", datasetTest); - //runner.RunAll(); + var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray); + runner.SetInput("TrainingData", datasetTrain); + runner.SetInput("TestingData", datasetTest); + runner.RunAll(); - //var autoMlState = runner.GetOutput("StateOut"); - //Assert.IsNotNull(autoMlState); - //var allPipelines = autoMlState.GetAllEvaluatedPipelines(); - //var bestPipeline = autoMlState.GetBestPipeline(); - //Assert.AreEqual(allPipelines.Length, numIterations); - //Assert.IsTrue(bestPipeline.PerformanceSummary.MetricValue > 0.1); + var autoMlState = runner.GetOutput("StateOut"); + Assert.NotNull(autoMlState); + var allPipelines = autoMlState.GetAllEvaluatedPipelines(); + var bestPipeline = autoMlState.GetBestPipeline(); + Assert.Equal(allPipelines.Length, numIterations); + Assert.True(bestPipeline.PerformanceSummary.MetricValue > 0.1); - //var results = runner.GetOutput("ResultsOut"); - //Assert.IsNotNull(results); - //var rows = PipelinePattern.ExtractResults(Env, results, "Graph", "MetricValue", "PipelineId"); - //Assert.IsTrue(rows.Length == numIterations); + var results = runner.GetOutput("ResultsOut"); + Assert.NotNull(results); + var rows = PipelinePattern.ExtractResults(Env, results, "Graph", "MetricValue", "PipelineId"); + Assert.True(rows.Length == numIterations); } [Fact(Skip = "Datasets Not Present")] diff --git a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj index 76432a7a0f..59c6d8f6c6 100644 --- a/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj +++ b/test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj @@ -1,5 +1,6 @@  +