From fd85b7406e62b3629c520f1dedbe0928585e5694 Mon Sep 17 00:00:00 2001 From: XiaoYun Zhang Date: Fri, 11 Nov 2022 18:34:16 -0800 Subject: [PATCH 1/3] add lbfgsME in MultiClassificaton APIU --- src/Microsoft.ML.AutoML/API/AutoCatalog.cs | 133 ++++++++++++++---- .../API/BinaryClassificationExperiment.cs | 4 +- .../API/MulticlassClassificationExperiment.cs | 7 +- .../API/RegressionExperiment.cs | 2 +- .../AutoMLExperimentTests.cs | 4 +- 5 files changed, 111 insertions(+), 39 deletions(-) diff --git a/src/Microsoft.ML.AutoML/API/AutoCatalog.cs b/src/Microsoft.ML.AutoML/API/AutoCatalog.cs index 280777db70..ab69800e49 100644 --- a/src/Microsoft.ML.AutoML/API/AutoCatalog.cs +++ b/src/Microsoft.ML.AutoML/API/AutoCatalog.cs @@ -10,6 +10,7 @@ using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.SearchSpace; +using Microsoft.ML.Trainers; using Microsoft.ML.Trainers.FastTree; namespace Microsoft.ML.AutoML @@ -313,8 +314,8 @@ public AutoMLExperiment CreateExperiment(AutoMLExperiment.AutoMLExperimentSettin /// true if use fast forest as available trainer. /// true if use lgbm as available trainer. /// true if use fast tree as available trainer. - /// true if use lbfgs as available trainer. - /// true if use sdca as available trainer. + /// true if use as available trainer. + /// true if use as available trainer. /// if provided, use it as initial option for fast tree, otherwise the default option will be used. /// if provided, use it as initial option for lgbm, otherwise the default option will be used. /// if provided, use it as initial option for fast forest, otherwise the default option will be used. @@ -323,12 +324,27 @@ public AutoMLExperiment CreateExperiment(AutoMLExperiment.AutoMLExperimentSettin /// if provided, use it as search space for fast tree, otherwise the default search space will be used. /// if provided, use it as search space for lgbm, otherwise the default search space will be used. /// if provided, use it as search space for fast forest, otherwise the default search space will be used. - /// if provided, use it as search space for lbfgs, otherwise the default search space will be used. - /// if provided, use it as search space for sdca, otherwise the default search space will be used. + /// if provided, use it as search space for , otherwise the default search space will be used. + /// if provided, use it as search space for , otherwise the default search space will be used. /// - public SweepablePipeline BinaryClassification(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true, - FastTreeOption fastTreeOption = null, LgbmOption lgbmOption = null, FastForestOption fastForestOption = null, LbfgsOption lbfgsOption = null, SdcaOption sdcaOption = null, - SearchSpace fastTreeSearchSpace = null, SearchSpace lgbmSearchSpace = null, SearchSpace fastForestSearchSpace = null, SearchSpace lbfgsSearchSpace = null, SearchSpace sdcaSearchSpace = null) + public SweepablePipeline BinaryClassification(string labelColumnName = DefaultColumnNames.Label, + string featureColumnName = DefaultColumnNames.Features, + string exampleWeightColumnName = null, + bool useFastForest = true, + bool useLgbm = true, + bool useFastTree = true, + bool useLbfgsLogisticRegression = true, + bool useSdcaLogisticRegression = true, + FastTreeOption fastTreeOption = null, + LgbmOption lgbmOption = null, + FastForestOption fastForestOption = null, + LbfgsOption lbfgsOption = null, + SdcaOption sdcaOption = null, + SearchSpace fastTreeSearchSpace = null, + SearchSpace lgbmSearchSpace = null, + SearchSpace fastForestSearchSpace = null, + SearchSpace lbfgsLogisticRegressionSearchSpace = null, + SearchSpace sdcaLogisticRegressionSearchSpace = null) { var res = new List(); @@ -359,16 +375,16 @@ public SweepablePipeline BinaryClassification(string labelColumnName = DefaultCo res.Add(SweepableEstimatorFactory.CreateLightGbmBinary(lgbmOption, lgbmSearchSpace ?? new SearchSpace(lgbmOption))); } - if (useLbfgs) + if (useLbfgsLogisticRegression) { lbfgsOption = lbfgsOption ?? new LbfgsOption(); lbfgsOption.LabelColumnName = labelColumnName; lbfgsOption.FeatureColumnName = featureColumnName; lbfgsOption.ExampleWeightColumnName = exampleWeightColumnName; - res.Add(SweepableEstimatorFactory.CreateLbfgsLogisticRegressionBinary(lbfgsOption, lbfgsSearchSpace ?? new SearchSpace(lbfgsOption))); + res.Add(SweepableEstimatorFactory.CreateSdcaLogisticRegressionBinary(lbfgsOption, lbfgsSearchSpace ?? new SearchSpace(lbfgsOption))); } - if (useSdca) + if (useSdcaLogisticRegression) { sdcaOption = sdcaOption ?? new SdcaOption(); sdcaOption.LabelColumnName = labelColumnName; @@ -389,8 +405,10 @@ public SweepablePipeline BinaryClassification(string labelColumnName = DefaultCo /// true if use fast forest as available trainer. /// true if use lgbm as available trainer. /// true if use fast tree as available trainer. - /// true if use lbfgs as available trainer. - /// true if use sdca as available trainer. + /// true if use as available trainer. + /// true if use as available trainer. + /// true if use as available trainer. + /// true if use as available trainer. /// if provided, use it as initial option for fast tree, otherwise the default option will be used. /// if provided, use it as initial option for lgbm, otherwise the default option will be used. /// if provided, use it as initial option for fast forest, otherwise the default option will be used. @@ -399,12 +417,34 @@ public SweepablePipeline BinaryClassification(string labelColumnName = DefaultCo /// if provided, use it as search space for fast tree, otherwise the default search space will be used. /// if provided, use it as search space for lgbm, otherwise the default search space will be used. /// if provided, use it as search space for fast forest, otherwise the default search space will be used. - /// if provided, use it as search space for lbfgs, otherwise the default search space will be used. - /// if provided, use it as search space for sdca, otherwise the default search space will be used. + /// if provided, use it as search space for , otherwise the default search space will be used. + /// if provided, use it as search space for , otherwise the default search space will be used. + /// if provided, use it as search space for , otherwise the default search space will be used. + /// if provided, use it as search space for , otherwise the default search space will be used. /// - public SweepablePipeline MultiClassification(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true, - FastTreeOption fastTreeOption = null, LgbmOption lgbmOption = null, FastForestOption fastForestOption = null, LbfgsOption lbfgsOption = null, SdcaOption sdcaOption = null, - SearchSpace fastTreeSearchSpace = null, SearchSpace lgbmSearchSpace = null, SearchSpace fastForestSearchSpace = null, SearchSpace lbfgsSearchSpace = null, SearchSpace sdcaSearchSpace = null) + public SweepablePipeline MultiClassification( + string labelColumnName = DefaultColumnNames.Label, + string featureColumnName = DefaultColumnNames.Features, + string exampleWeightColumnName = null, + bool useFastForest = true, + bool useLgbm = true, + bool useFastTree = true, + bool useLbfgsMaximumEntrophy = true, + bool useLbfgsLogisticRegression = true, + bool useSdcaMaximumEntrophy = true, + bool useSdcaLogisticRegression = true, + FastTreeOption fastTreeOption = null, + LgbmOption lgbmOption = null, + FastForestOption fastForestOption = null, + LbfgsOption lbfgsOption = null, + SdcaOption sdcaOption = null, + SearchSpace fastTreeSearchSpace = null, + SearchSpace lgbmSearchSpace = null, + SearchSpace fastForestSearchSpace = null, + SearchSpace lbfgsMaximumEntrophySearchSpace = null, + SearchSpace lbfgsLogisticRegressionSearchSpace = null, + SearchSpace sdcaMaximumEntorphySearchSpace = null, + SearchSpace sdcaLogisticRegressionSearchSpace = null) { var res = new List(); @@ -435,24 +475,40 @@ public SweepablePipeline MultiClassification(string labelColumnName = DefaultCol res.Add(SweepableEstimatorFactory.CreateLightGbmMulti(lgbmOption, lgbmSearchSpace ?? new SearchSpace(lgbmOption))); } - if (useLbfgs) + if (useLbfgsMaximumEntrophy) { lbfgsOption = lbfgsOption ?? new LbfgsOption(); lbfgsOption.LabelColumnName = labelColumnName; lbfgsOption.FeatureColumnName = featureColumnName; lbfgsOption.ExampleWeightColumnName = exampleWeightColumnName; - res.Add(SweepableEstimatorFactory.CreateLbfgsLogisticRegressionOva(lbfgsOption, lbfgsSearchSpace ?? new SearchSpace(lbfgsOption))); - res.Add(SweepableEstimatorFactory.CreateLbfgsMaximumEntropyMulti(lbfgsOption, lbfgsSearchSpace ?? new SearchSpace(lbfgsOption))); + res.Add(SweepableEstimatorFactory.CreateLbfgsMaximumEntropyMulti(lbfgsOption, lbfgsMaximumEntrophySearchSpace ?? new SearchSpace(lbfgsOption))); } - if (useSdca) + if (useLbfgsLogisticRegression) + { + lbfgsOption = lbfgsOption ?? new LbfgsOption(); + lbfgsOption.LabelColumnName = labelColumnName; + lbfgsOption.FeatureColumnName = featureColumnName; + lbfgsOption.ExampleWeightColumnName = exampleWeightColumnName; + res.Add(SweepableEstimatorFactory.CreateLbfgsLogisticRegressionOva(lbfgsOption, lbfgsLogisticRegressionSearchSpace ?? new SearchSpace(lbfgsOption))); + } + + if (useSdcaMaximumEntrophy) + { + sdcaOption = sdcaOption ?? new SdcaOption(); + sdcaOption.LabelColumnName = labelColumnName; + sdcaOption.FeatureColumnName = featureColumnName; + sdcaOption.ExampleWeightColumnName = exampleWeightColumnName; + res.Add(SweepableEstimatorFactory.CreateSdcaMaximumEntropyMulti(sdcaOption, sdcaMaximumEntorphySearchSpace ?? new SearchSpace(sdcaOption))); + } + + if (useSdcaLogisticRegression) { sdcaOption = sdcaOption ?? new SdcaOption(); sdcaOption.LabelColumnName = labelColumnName; sdcaOption.FeatureColumnName = featureColumnName; sdcaOption.ExampleWeightColumnName = exampleWeightColumnName; - res.Add(SweepableEstimatorFactory.CreateSdcaMaximumEntropyMulti(sdcaOption, sdcaSearchSpace ?? new SearchSpace(sdcaOption))); - res.Add(SweepableEstimatorFactory.CreateSdcaLogisticRegressionOva(sdcaOption, sdcaSearchSpace ?? new SearchSpace(sdcaOption))); + res.Add(SweepableEstimatorFactory.CreateSdcaLogisticRegressionOva(sdcaOption, sdcaLogisticRegressionSearchSpace ?? new SearchSpace(sdcaOption))); } return new SweepablePipeline().Append(res.ToArray()); @@ -467,8 +523,7 @@ public SweepablePipeline MultiClassification(string labelColumnName = DefaultCol /// true if use fast forest as available trainer. /// true if use lgbm as available trainer. /// true if use fast tree as available trainer. - /// true if use lbfgs as available trainer. - /// true if use sdca as available trainer. + /// true if use as available trainer. /// if provided, use it as initial option for fast tree, otherwise the default option will be used. /// if provided, use it as initial option for lgbm, otherwise the default option will be used. /// if provided, use it as initial option for fast forest, otherwise the default option will be used. @@ -477,12 +532,28 @@ public SweepablePipeline MultiClassification(string labelColumnName = DefaultCol /// if provided, use it as search space for fast tree, otherwise the default search space will be used. /// if provided, use it as search space for lgbm, otherwise the default search space will be used. /// if provided, use it as search space for fast forest, otherwise the default search space will be used. - /// if provided, use it as search space for lbfgs, otherwise the default search space will be used. + /// if provided, use it as search space for , otherwise the default search space will be used. /// if provided, use it as search space for sdca, otherwise the default search space will be used. /// - public SweepablePipeline Regression(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true, - FastTreeOption fastTreeOption = null, LgbmOption lgbmOption = null, FastForestOption fastForestOption = null, LbfgsOption lbfgsOption = null, SdcaOption sdcaOption = null, - SearchSpace fastTreeSearchSpace = null, SearchSpace lgbmSearchSpace = null, SearchSpace fastForestSearchSpace = null, SearchSpace lbfgsSearchSpace = null, SearchSpace sdcaSearchSpace = null) + public SweepablePipeline Regression( + string labelColumnName = DefaultColumnNames.Label, + string featureColumnName = DefaultColumnNames.Features, + string exampleWeightColumnName = null, + bool useFastForest = true, + bool useLgbm = true, + bool useFastTree = true, + bool useLbfgsPoissonRegression = true, + bool useSdca = true, + FastTreeOption fastTreeOption = null, + LgbmOption lgbmOption = null, + FastForestOption fastForestOption = null, + LbfgsOption lbfgsOption = null, + SdcaOption sdcaOption = null, + SearchSpace fastTreeSearchSpace = null, + SearchSpace lgbmSearchSpace = null, + SearchSpace fastForestSearchSpace = null, + SearchSpace lbfgsPoissonRegressionSearchSpace = null, + SearchSpace sdcaSearchSpace = null) { var res = new List(); @@ -513,13 +584,13 @@ public SweepablePipeline Regression(string labelColumnName = DefaultColumnNames. res.Add(SweepableEstimatorFactory.CreateLightGbmRegression(lgbmOption, lgbmSearchSpace ?? new SearchSpace(lgbmOption))); } - if (useLbfgs) + if (useLbfgsPoissonRegression) { lbfgsOption = lbfgsOption ?? new LbfgsOption(); lbfgsOption.LabelColumnName = labelColumnName; lbfgsOption.FeatureColumnName = featureColumnName; lbfgsOption.ExampleWeightColumnName = exampleWeightColumnName; - res.Add(SweepableEstimatorFactory.CreateLbfgsPoissonRegressionRegression(lbfgsOption, lbfgsSearchSpace ?? new SearchSpace(lbfgsOption))); + res.Add(SweepableEstimatorFactory.CreateLbfgsPoissonRegressionRegression(lbfgsOption, lbfgsPoissonRegressionSearchSpace ?? new SearchSpace(lbfgsOption))); } if (useSdca) diff --git a/src/Microsoft.ML.AutoML/API/BinaryClassificationExperiment.cs b/src/Microsoft.ML.AutoML/API/BinaryClassificationExperiment.cs index f658a6f1aa..761aac2f4c 100644 --- a/src/Microsoft.ML.AutoML/API/BinaryClassificationExperiment.cs +++ b/src/Microsoft.ML.AutoML/API/BinaryClassificationExperiment.cs @@ -327,12 +327,12 @@ private SweepablePipeline CreateBinaryClassificationPipeline(IDataView trainData if (preFeaturizer != null) { return preFeaturizer.Append(Context.Auto().Featurizer(trainData, columnInformation, Features)) - .Append(Context.Auto().BinaryClassification(labelColumnName: columnInformation.LabelColumnName, useSdca: useSdca, useFastTree: useFastTree, useLgbm: useLgbm, useLbfgs: uselbfgs, useFastForest: useFastForest, featureColumnName: Features)); + .Append(Context.Auto().BinaryClassification(labelColumnName: columnInformation.LabelColumnName, useSdcaLogisticRegression: useSdca, useFastTree: useFastTree, useLgbm: useLgbm, useLbfgsLogisticRegression: uselbfgs, useFastForest: useFastForest, featureColumnName: Features)); } else { return Context.Auto().Featurizer(trainData, columnInformation, Features) - .Append(Context.Auto().BinaryClassification(labelColumnName: columnInformation.LabelColumnName, useSdca: useSdca, useFastTree: useFastTree, useLgbm: useLgbm, useLbfgs: uselbfgs, useFastForest: useFastForest, featureColumnName: Features)); + .Append(Context.Auto().BinaryClassification(labelColumnName: columnInformation.LabelColumnName, useSdcaLogisticRegression: useSdca, useFastTree: useFastTree, useLgbm: useLgbm, useLbfgsLogisticRegression: uselbfgs, useFastForest: useFastForest, featureColumnName: Features)); } } } diff --git a/src/Microsoft.ML.AutoML/API/MulticlassClassificationExperiment.cs b/src/Microsoft.ML.AutoML/API/MulticlassClassificationExperiment.cs index 836eddedb5..b7344e7b0f 100644 --- a/src/Microsoft.ML.AutoML/API/MulticlassClassificationExperiment.cs +++ b/src/Microsoft.ML.AutoML/API/MulticlassClassificationExperiment.cs @@ -313,8 +313,9 @@ private protected override RunDetail GetBestRun private SweepablePipeline CreateMulticlassClassificationPipeline(IDataView trainData, ColumnInformation columnInformation, IEstimator preFeaturizer = null) { - var useSdca = Settings.Trainers.Contains(MulticlassClassificationTrainer.SdcaMaximumEntropy); - var uselbfgs = Settings.Trainers.Contains(MulticlassClassificationTrainer.LbfgsLogisticRegressionOva); + var useSdcaMaximumEntrophy = Settings.Trainers.Contains(MulticlassClassificationTrainer.SdcaMaximumEntropy); + var uselbfgsLR = Settings.Trainers.Contains(MulticlassClassificationTrainer.LbfgsLogisticRegressionOva); + var uselbfgsME = Settings.Trainers.Contains(MulticlassClassificationTrainer.LbfgsMaximumEntropy); var useLgbm = Settings.Trainers.Contains(MulticlassClassificationTrainer.LightGbm); var useFastForest = Settings.Trainers.Contains(MulticlassClassificationTrainer.FastForestOva); var useFastTree = Settings.Trainers.Contains(MulticlassClassificationTrainer.FastTreeOva); @@ -329,7 +330,7 @@ private SweepablePipeline CreateMulticlassClassificationPipeline(IDataView train pipeline = pipeline.Append(Context.Auto().Featurizer(trainData, columnInformation, Features)); pipeline = pipeline.Append(Context.Transforms.Conversion.MapValueToKey(label, label)); - pipeline = pipeline.Append(Context.Auto().MultiClassification(label, useSdca: useSdca, useFastTree: useFastTree, useLgbm: useLgbm, useLbfgs: uselbfgs, useFastForest: useFastForest, featureColumnName: Features)); + pipeline = pipeline.Append(Context.Auto().MultiClassification(label, useSdcaMaximumEntrophy: useSdcaMaximumEntrophy, useFastTree: useFastTree, useLgbm: useLgbm, useLbfgsMaximumEntrophy: uselbfgsME, useLbfgsLogisticRegression: uselbfgsLR, useFastForest: useFastForest, featureColumnName: Features)); pipeline = pipeline.Append(Context.Transforms.Conversion.MapKeyToValue(DefaultColumnNames.PredictedLabel, DefaultColumnNames.PredictedLabel)); return pipeline; diff --git a/src/Microsoft.ML.AutoML/API/RegressionExperiment.cs b/src/Microsoft.ML.AutoML/API/RegressionExperiment.cs index a836b6c0c0..3d93a63b0a 100644 --- a/src/Microsoft.ML.AutoML/API/RegressionExperiment.cs +++ b/src/Microsoft.ML.AutoML/API/RegressionExperiment.cs @@ -310,7 +310,7 @@ private SweepablePipeline CreateRegressionPipeline(IDataView trainData, ColumnIn var label = columnInformation.LabelColumnName; pipeline = pipeline.Append(Context.Auto().Featurizer(trainData, columnInformation, Features)); - pipeline = pipeline.Append(Context.Auto().Regression(label, useSdca: useSdca, useFastTree: useFastTree, useLgbm: useLgbm, useLbfgs: uselbfgs, useFastForest: useFastForest, featureColumnName: Features)); + pipeline = pipeline.Append(Context.Auto().Regression(label, useSdca: useSdca, useFastTree: useFastTree, useLgbm: useLgbm, useLbfgsPoissonRegression: uselbfgs, useFastForest: useFastForest, featureColumnName: Features)); return pipeline; } diff --git a/test/Microsoft.ML.AutoML.Tests/AutoMLExperimentTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoMLExperimentTests.cs index 6d14b266bb..2402412bf5 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoMLExperimentTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoMLExperimentTests.cs @@ -264,7 +264,7 @@ public async Task AutoMLExperiment_Iris_CV_5_Test() var label = "Label"; var pipeline = context.Auto().Featurizer(data, excludeColumns: new[] { label }) .Append(context.Transforms.Conversion.MapValueToKey(label, label)) - .Append(context.Auto().MultiClassification(label, useLgbm: false, useSdca: false, useLbfgs: false)); + .Append(context.Auto().MultiClassification(label, useLgbm: false, useSdcaMaximumEntrophy: false, useLbfgsMaximumEntrophy: false)); experiment.SetDataset(data, 5) .SetMulticlassClassificationMetric(MulticlassClassificationMetric.MacroAccuracy, label) @@ -291,7 +291,7 @@ public async Task AutoMLExperiment_Iris_Train_Test_Split_Test() var label = "Label"; var pipeline = context.Auto().Featurizer(data, excludeColumns: new[] { label }) .Append(context.Transforms.Conversion.MapValueToKey(label, label)) - .Append(context.Auto().MultiClassification(label, useLgbm: false, useSdca: false, useLbfgs: false)); + .Append(context.Auto().MultiClassification(label, useLgbm: false, useSdcaMaximumEntrophy: false, useLbfgsMaximumEntrophy: false)); experiment.SetDataset(context.Data.TrainTestSplit(data)) .SetMulticlassClassificationMetric(MulticlassClassificationMetric.MacroAccuracy, label) From be2dbcb15415b8265b39c97618ece29d8d7863de Mon Sep 17 00:00:00 2001 From: XiaoYun Zhang Date: Fri, 11 Nov 2022 19:09:05 -0800 Subject: [PATCH 2/3] fix build error --- src/Microsoft.ML.AutoML/API/AutoCatalog.cs | 101 +++++++++--------- .../AutoMLExperimentTests.cs | 8 +- 2 files changed, 57 insertions(+), 52 deletions(-) diff --git a/src/Microsoft.ML.AutoML/API/AutoCatalog.cs b/src/Microsoft.ML.AutoML/API/AutoCatalog.cs index ab69800e49..16ac3b5b82 100644 --- a/src/Microsoft.ML.AutoML/API/AutoCatalog.cs +++ b/src/Microsoft.ML.AutoML/API/AutoCatalog.cs @@ -319,8 +319,8 @@ public AutoMLExperiment CreateExperiment(AutoMLExperiment.AutoMLExperimentSettin /// if provided, use it as initial option for fast tree, otherwise the default option will be used. /// if provided, use it as initial option for lgbm, otherwise the default option will be used. /// if provided, use it as initial option for fast forest, otherwise the default option will be used. - /// if provided, use it as initial option for lbfgs, otherwise the default option will be used. - /// if provided, use it as initial option for sdca, otherwise the default option will be used. + /// if provided, use it as initial option for , otherwise the default option will be used. + /// if provided, use it as initial option for , otherwise the default option will be used. /// if provided, use it as search space for fast tree, otherwise the default search space will be used. /// if provided, use it as search space for lgbm, otherwise the default search space will be used. /// if provided, use it as search space for fast forest, otherwise the default search space will be used. @@ -338,8 +338,8 @@ public SweepablePipeline BinaryClassification(string labelColumnName = DefaultCo FastTreeOption fastTreeOption = null, LgbmOption lgbmOption = null, FastForestOption fastForestOption = null, - LbfgsOption lbfgsOption = null, - SdcaOption sdcaOption = null, + LbfgsOption lbfgsLogisticRegressionOption = null, + SdcaOption sdcaLogisticRegressionOption = null, SearchSpace fastTreeSearchSpace = null, SearchSpace lgbmSearchSpace = null, SearchSpace fastForestSearchSpace = null, @@ -377,20 +377,20 @@ public SweepablePipeline BinaryClassification(string labelColumnName = DefaultCo if (useLbfgsLogisticRegression) { - lbfgsOption = lbfgsOption ?? new LbfgsOption(); - lbfgsOption.LabelColumnName = labelColumnName; - lbfgsOption.FeatureColumnName = featureColumnName; - lbfgsOption.ExampleWeightColumnName = exampleWeightColumnName; - res.Add(SweepableEstimatorFactory.CreateSdcaLogisticRegressionBinary(lbfgsOption, lbfgsSearchSpace ?? new SearchSpace(lbfgsOption))); + lbfgsLogisticRegressionOption = lbfgsLogisticRegressionOption ?? new LbfgsOption(); + lbfgsLogisticRegressionOption.LabelColumnName = labelColumnName; + lbfgsLogisticRegressionOption.FeatureColumnName = featureColumnName; + lbfgsLogisticRegressionOption.ExampleWeightColumnName = exampleWeightColumnName; + res.Add(SweepableEstimatorFactory.CreateLbfgsPoissonRegressionRegression(lbfgsLogisticRegressionOption, lbfgsLogisticRegressionSearchSpace ?? new SearchSpace(lbfgsLogisticRegressionOption))); } if (useSdcaLogisticRegression) { - sdcaOption = sdcaOption ?? new SdcaOption(); - sdcaOption.LabelColumnName = labelColumnName; - sdcaOption.FeatureColumnName = featureColumnName; - sdcaOption.ExampleWeightColumnName = exampleWeightColumnName; - res.Add(SweepableEstimatorFactory.CreateSdcaLogisticRegressionBinary(sdcaOption, sdcaSearchSpace ?? new SearchSpace(sdcaOption))); + sdcaLogisticRegressionOption = sdcaLogisticRegressionOption ?? new SdcaOption(); + sdcaLogisticRegressionOption.LabelColumnName = labelColumnName; + sdcaLogisticRegressionOption.FeatureColumnName = featureColumnName; + sdcaLogisticRegressionOption.ExampleWeightColumnName = exampleWeightColumnName; + res.Add(SweepableEstimatorFactory.CreateSdcaLogisticRegressionBinary(sdcaLogisticRegressionOption, sdcaLogisticRegressionSearchSpace ?? new SearchSpace(sdcaLogisticRegressionOption))); } return new SweepablePipeline().Append(res.ToArray()); @@ -412,15 +412,17 @@ public SweepablePipeline BinaryClassification(string labelColumnName = DefaultCo /// if provided, use it as initial option for fast tree, otherwise the default option will be used. /// if provided, use it as initial option for lgbm, otherwise the default option will be used. /// if provided, use it as initial option for fast forest, otherwise the default option will be used. - /// if provided, use it as initial option for lbfgs, otherwise the default option will be used. - /// if provided, use it as initial option for sdca, otherwise the default option will be used. + /// if provided, use it as initial option for , otherwise the default option will be used. + /// if provided, use it as initial option for , otherwise the default option will be used. + /// if provided, use it as initial option for , otherwise the default option will be used. + /// if provided, use it as initial option for , otherwise the default option will be used. /// if provided, use it as search space for fast tree, otherwise the default search space will be used. /// if provided, use it as search space for lgbm, otherwise the default search space will be used. /// if provided, use it as search space for fast forest, otherwise the default search space will be used. /// if provided, use it as search space for , otherwise the default search space will be used. - /// if provided, use it as search space for , otherwise the default search space will be used. + /// if provided, use it as search space for , otherwise the default search space will be used. /// if provided, use it as search space for , otherwise the default search space will be used. - /// if provided, use it as search space for , otherwise the default search space will be used. + /// if provided, use it as search space for , otherwise the default search space will be used. /// public SweepablePipeline MultiClassification( string labelColumnName = DefaultColumnNames.Label, @@ -436,8 +438,10 @@ public SweepablePipeline MultiClassification( FastTreeOption fastTreeOption = null, LgbmOption lgbmOption = null, FastForestOption fastForestOption = null, - LbfgsOption lbfgsOption = null, - SdcaOption sdcaOption = null, + LbfgsOption lbfgsMaximumEntrophyOption = null, + LbfgsOption lbfgsLogisticRegressionOption = null, + SdcaOption sdcaMaximumEntrophyOption = null, + SdcaOption sdcaLogisticRegressionOption = null, SearchSpace fastTreeSearchSpace = null, SearchSpace lgbmSearchSpace = null, SearchSpace fastForestSearchSpace = null, @@ -477,38 +481,38 @@ public SweepablePipeline MultiClassification( if (useLbfgsMaximumEntrophy) { - lbfgsOption = lbfgsOption ?? new LbfgsOption(); - lbfgsOption.LabelColumnName = labelColumnName; - lbfgsOption.FeatureColumnName = featureColumnName; - lbfgsOption.ExampleWeightColumnName = exampleWeightColumnName; - res.Add(SweepableEstimatorFactory.CreateLbfgsMaximumEntropyMulti(lbfgsOption, lbfgsMaximumEntrophySearchSpace ?? new SearchSpace(lbfgsOption))); + lbfgsMaximumEntrophyOption = lbfgsMaximumEntrophyOption ?? new LbfgsOption(); + lbfgsMaximumEntrophyOption.LabelColumnName = labelColumnName; + lbfgsMaximumEntrophyOption.FeatureColumnName = featureColumnName; + lbfgsMaximumEntrophyOption.ExampleWeightColumnName = exampleWeightColumnName; + res.Add(SweepableEstimatorFactory.CreateLbfgsMaximumEntropyMulti(lbfgsMaximumEntrophyOption, lbfgsMaximumEntrophySearchSpace ?? new SearchSpace(lbfgsMaximumEntrophyOption))); } if (useLbfgsLogisticRegression) { - lbfgsOption = lbfgsOption ?? new LbfgsOption(); - lbfgsOption.LabelColumnName = labelColumnName; - lbfgsOption.FeatureColumnName = featureColumnName; - lbfgsOption.ExampleWeightColumnName = exampleWeightColumnName; - res.Add(SweepableEstimatorFactory.CreateLbfgsLogisticRegressionOva(lbfgsOption, lbfgsLogisticRegressionSearchSpace ?? new SearchSpace(lbfgsOption))); + lbfgsLogisticRegressionOption = lbfgsLogisticRegressionOption ?? new LbfgsOption(); + lbfgsLogisticRegressionOption.LabelColumnName = labelColumnName; + lbfgsLogisticRegressionOption.FeatureColumnName = featureColumnName; + lbfgsLogisticRegressionOption.ExampleWeightColumnName = exampleWeightColumnName; + res.Add(SweepableEstimatorFactory.CreateLbfgsLogisticRegressionOva(lbfgsLogisticRegressionOption, lbfgsLogisticRegressionSearchSpace ?? new SearchSpace(lbfgsLogisticRegressionOption))); } if (useSdcaMaximumEntrophy) { - sdcaOption = sdcaOption ?? new SdcaOption(); - sdcaOption.LabelColumnName = labelColumnName; - sdcaOption.FeatureColumnName = featureColumnName; - sdcaOption.ExampleWeightColumnName = exampleWeightColumnName; - res.Add(SweepableEstimatorFactory.CreateSdcaMaximumEntropyMulti(sdcaOption, sdcaMaximumEntorphySearchSpace ?? new SearchSpace(sdcaOption))); + sdcaMaximumEntrophyOption = sdcaMaximumEntrophyOption ?? new SdcaOption(); + sdcaMaximumEntrophyOption.LabelColumnName = labelColumnName; + sdcaMaximumEntrophyOption.FeatureColumnName = featureColumnName; + sdcaMaximumEntrophyOption.ExampleWeightColumnName = exampleWeightColumnName; + res.Add(SweepableEstimatorFactory.CreateSdcaMaximumEntropyMulti(sdcaMaximumEntrophyOption, sdcaMaximumEntorphySearchSpace ?? new SearchSpace(sdcaMaximumEntrophyOption))); } if (useSdcaLogisticRegression) { - sdcaOption = sdcaOption ?? new SdcaOption(); - sdcaOption.LabelColumnName = labelColumnName; - sdcaOption.FeatureColumnName = featureColumnName; - sdcaOption.ExampleWeightColumnName = exampleWeightColumnName; - res.Add(SweepableEstimatorFactory.CreateSdcaLogisticRegressionOva(sdcaOption, sdcaLogisticRegressionSearchSpace ?? new SearchSpace(sdcaOption))); + sdcaLogisticRegressionOption = sdcaLogisticRegressionOption ?? new SdcaOption(); + sdcaLogisticRegressionOption.LabelColumnName = labelColumnName; + sdcaLogisticRegressionOption.FeatureColumnName = featureColumnName; + sdcaLogisticRegressionOption.ExampleWeightColumnName = exampleWeightColumnName; + res.Add(SweepableEstimatorFactory.CreateSdcaLogisticRegressionOva(sdcaLogisticRegressionOption, sdcaLogisticRegressionSearchSpace ?? new SearchSpace(sdcaLogisticRegressionOption))); } return new SweepablePipeline().Append(res.ToArray()); @@ -524,11 +528,12 @@ public SweepablePipeline MultiClassification( /// true if use lgbm as available trainer. /// true if use fast tree as available trainer. /// true if use as available trainer. + /// true if use as available trainer. /// if provided, use it as initial option for fast tree, otherwise the default option will be used. /// if provided, use it as initial option for lgbm, otherwise the default option will be used. /// if provided, use it as initial option for fast forest, otherwise the default option will be used. - /// if provided, use it as initial option for lbfgs, otherwise the default option will be used. - /// if provided, use it as initial option for sdca, otherwise the default option will be used. + /// if provided, use it as initial option for , otherwise the default option will be used. + /// if provided, use it as initial option for , otherwise the default option will be used. /// if provided, use it as search space for fast tree, otherwise the default search space will be used. /// if provided, use it as search space for lgbm, otherwise the default search space will be used. /// if provided, use it as search space for fast forest, otherwise the default search space will be used. @@ -547,7 +552,7 @@ public SweepablePipeline Regression( FastTreeOption fastTreeOption = null, LgbmOption lgbmOption = null, FastForestOption fastForestOption = null, - LbfgsOption lbfgsOption = null, + LbfgsOption lbfgsPoissonRegressionOption = null, SdcaOption sdcaOption = null, SearchSpace fastTreeSearchSpace = null, SearchSpace lgbmSearchSpace = null, @@ -586,11 +591,11 @@ public SweepablePipeline Regression( if (useLbfgsPoissonRegression) { - lbfgsOption = lbfgsOption ?? new LbfgsOption(); - lbfgsOption.LabelColumnName = labelColumnName; - lbfgsOption.FeatureColumnName = featureColumnName; - lbfgsOption.ExampleWeightColumnName = exampleWeightColumnName; - res.Add(SweepableEstimatorFactory.CreateLbfgsPoissonRegressionRegression(lbfgsOption, lbfgsPoissonRegressionSearchSpace ?? new SearchSpace(lbfgsOption))); + lbfgsPoissonRegressionOption = lbfgsPoissonRegressionOption ?? new LbfgsOption(); + lbfgsPoissonRegressionOption.LabelColumnName = labelColumnName; + lbfgsPoissonRegressionOption.FeatureColumnName = featureColumnName; + lbfgsPoissonRegressionOption.ExampleWeightColumnName = exampleWeightColumnName; + res.Add(SweepableEstimatorFactory.CreateLbfgsPoissonRegressionRegression(lbfgsPoissonRegressionOption, lbfgsPoissonRegressionSearchSpace ?? new SearchSpace(lbfgsPoissonRegressionOption))); } if (useSdca) diff --git a/test/Microsoft.ML.AutoML.Tests/AutoMLExperimentTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoMLExperimentTests.cs index 2402412bf5..18a0245718 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoMLExperimentTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoMLExperimentTests.cs @@ -212,7 +212,7 @@ public async Task AutoMLExperiment_UCI_Adult_Train_Test_Split_Test() var data = DatasetUtil.GetUciAdultDataView(); var experiment = context.Auto().CreateExperiment(); var pipeline = context.Auto().Featurizer(data, "_Features_", excludeColumns: new[] { DatasetUtil.UciAdultLabel }) - .Append(context.Auto().BinaryClassification(DatasetUtil.UciAdultLabel, "_Features_", useLgbm: false, useSdca: false, useLbfgs: false)); + .Append(context.Auto().BinaryClassification(DatasetUtil.UciAdultLabel, "_Features_", useLgbm: false, useSdcaLogisticRegression: false, useLbfgsLogisticRegression: false)); experiment.SetDataset(context.Data.TrainTestSplit(data)) .SetBinaryClassificationMetric(BinaryClassificationMetric.AreaUnderRocCurve, DatasetUtil.UciAdultLabel) @@ -237,7 +237,7 @@ public async Task AutoMLExperiment_UCI_Adult_CV_5_Test() var data = DatasetUtil.GetUciAdultDataView(); var experiment = context.Auto().CreateExperiment(); var pipeline = context.Auto().Featurizer(data, "_Features_", excludeColumns: new[] { DatasetUtil.UciAdultLabel }) - .Append(context.Auto().BinaryClassification(DatasetUtil.UciAdultLabel, "_Features_", useLgbm: false, useSdca: false, useLbfgs: false)); + .Append(context.Auto().BinaryClassification(DatasetUtil.UciAdultLabel, "_Features_", useLgbm: false, useSdcaLogisticRegression: false, useLbfgsLogisticRegression: false)); experiment.SetDataset(data, 5) .SetBinaryClassificationMetric(BinaryClassificationMetric.AreaUnderRocCurve, DatasetUtil.UciAdultLabel) @@ -318,7 +318,7 @@ public async Task AutoMLExperiment_Taxi_Fare_Train_Test_Split_Test() var experiment = context.Auto().CreateExperiment(); var label = DatasetUtil.TaxiFareLabel; var pipeline = context.Auto().Featurizer(train, excludeColumns: new[] { label }) - .Append(context.Auto().Regression(label, useLgbm: false, useSdca: false, useLbfgs: false)); + .Append(context.Auto().Regression(label, useLgbm: false, useSdca: false, useLbfgsPoissonRegression: false)); experiment.SetDataset(train, test) .SetRegressionMetric(RegressionMetric.RSquared, label) @@ -337,7 +337,7 @@ public async Task AutoMLExperiment_Taxi_Fare_CV_5_Test() var experiment = context.Auto().CreateExperiment(); var label = DatasetUtil.TaxiFareLabel; var pipeline = context.Auto().Featurizer(train, excludeColumns: new[] { label }) - .Append(context.Auto().Regression(label, useLgbm: false, useSdca: false, useLbfgs: false)); + .Append(context.Auto().Regression(label, useLgbm: false, useSdca: false, useLbfgsPoissonRegression: false)); experiment.SetDataset(train, 5) .SetRegressionMetric(RegressionMetric.RSquared, label) From 2dff87b28cb585f075cf0dfa8ca6f693f49d6b9f Mon Sep 17 00:00:00 2001 From: XiaoYun Zhang Date: Tue, 3 Jan 2023 13:19:19 -0800 Subject: [PATCH 3/3] fix bug --- src/Microsoft.ML.AutoML/API/AutoCatalog.cs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/Microsoft.ML.AutoML/API/AutoCatalog.cs b/src/Microsoft.ML.AutoML/API/AutoCatalog.cs index 16ac3b5b82..a965fcf832 100644 --- a/src/Microsoft.ML.AutoML/API/AutoCatalog.cs +++ b/src/Microsoft.ML.AutoML/API/AutoCatalog.cs @@ -381,7 +381,7 @@ public SweepablePipeline BinaryClassification(string labelColumnName = DefaultCo lbfgsLogisticRegressionOption.LabelColumnName = labelColumnName; lbfgsLogisticRegressionOption.FeatureColumnName = featureColumnName; lbfgsLogisticRegressionOption.ExampleWeightColumnName = exampleWeightColumnName; - res.Add(SweepableEstimatorFactory.CreateLbfgsPoissonRegressionRegression(lbfgsLogisticRegressionOption, lbfgsLogisticRegressionSearchSpace ?? new SearchSpace(lbfgsLogisticRegressionOption))); + res.Add(SweepableEstimatorFactory.CreateLbfgsLogisticRegressionBinary(lbfgsLogisticRegressionOption, lbfgsLogisticRegressionSearchSpace ?? new SearchSpace(lbfgsLogisticRegressionOption))); } if (useSdcaLogisticRegression) @@ -479,15 +479,6 @@ public SweepablePipeline MultiClassification( res.Add(SweepableEstimatorFactory.CreateLightGbmMulti(lgbmOption, lgbmSearchSpace ?? new SearchSpace(lgbmOption))); } - if (useLbfgsMaximumEntrophy) - { - lbfgsMaximumEntrophyOption = lbfgsMaximumEntrophyOption ?? new LbfgsOption(); - lbfgsMaximumEntrophyOption.LabelColumnName = labelColumnName; - lbfgsMaximumEntrophyOption.FeatureColumnName = featureColumnName; - lbfgsMaximumEntrophyOption.ExampleWeightColumnName = exampleWeightColumnName; - res.Add(SweepableEstimatorFactory.CreateLbfgsMaximumEntropyMulti(lbfgsMaximumEntrophyOption, lbfgsMaximumEntrophySearchSpace ?? new SearchSpace(lbfgsMaximumEntrophyOption))); - } - if (useLbfgsLogisticRegression) { lbfgsLogisticRegressionOption = lbfgsLogisticRegressionOption ?? new LbfgsOption(); @@ -497,6 +488,16 @@ public SweepablePipeline MultiClassification( res.Add(SweepableEstimatorFactory.CreateLbfgsLogisticRegressionOva(lbfgsLogisticRegressionOption, lbfgsLogisticRegressionSearchSpace ?? new SearchSpace(lbfgsLogisticRegressionOption))); } + + if (useLbfgsMaximumEntrophy) + { + lbfgsMaximumEntrophyOption = lbfgsMaximumEntrophyOption ?? new LbfgsOption(); + lbfgsMaximumEntrophyOption.LabelColumnName = labelColumnName; + lbfgsMaximumEntrophyOption.FeatureColumnName = featureColumnName; + lbfgsMaximumEntrophyOption.ExampleWeightColumnName = exampleWeightColumnName; + res.Add(SweepableEstimatorFactory.CreateLbfgsMaximumEntropyMulti(lbfgsMaximumEntrophyOption, lbfgsMaximumEntrophySearchSpace ?? new SearchSpace(lbfgsMaximumEntrophyOption))); + } + if (useSdcaMaximumEntrophy) { sdcaMaximumEntrophyOption = sdcaMaximumEntrophyOption ?? new SdcaOption();