Skip to content

Commit bd43894

Browse files
srsaggamDmitry-A
authored andcommitted
Logging support in CLI + Implementation of cmd args [--name,--output,--verbosity] (dotnet#121)
* addded logging and helper methods * fixing code after merge * added resx files, added logger framework, added logging messages * added new options * added spacing * minor fixes * change command description * rename option, add headers, include new param in test * formatted * build fix * changed option name * Added NlogConfig file * added back config package * fix tests
1 parent e71ab96 commit bd43894

File tree

15 files changed

+589
-41
lines changed

15 files changed

+589
-41
lines changed

src/mlnet.Test/CommandLineTests.cs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public void TestCommandLineArgs()
1717

1818
// Create handler outside so that commandline and the handler is decoupled and testable.
1919
var handler = CommandHandler.Create<FileInfo, FileInfo, FileInfo, TaskKind, string, uint, uint>(
20-
(trainDataset, testDataset, validationDataset, mlTask, labelColumnName, timeout, labelColumnIndex) =>
20+
(trainDataset, validationDataset, testDataset, mlTask, labelColumnName, maxExplorationTime, labelColumnIndex) =>
2121
{
2222
parsingSuccessful = true;
2323
});
@@ -43,7 +43,7 @@ public void TestCommandLineArgsFailTest()
4343

4444
// Create handler outside so that commandline and the handler is decoupled and testable.
4545
var handler = CommandHandler.Create<FileInfo, FileInfo, FileInfo, TaskKind, string, uint, uint>(
46-
(trainDataset, testDataset, validationDataset, mlTask, labelColumnName, timeout, labelColumnIndex) =>
46+
(trainDataset, validationDataset, testDataset, mlTask, labelColumnName, maxExplorationTime, labelColumnIndex) =>
4747
{
4848
parsingSuccessful = true;
4949
});
@@ -87,13 +87,14 @@ public void TestCommandLineArgsValuesTest()
8787

8888
// Create handler outside so that commandline and the handler is decoupled and testable.
8989
var handler = CommandHandler.Create<FileInfo, FileInfo, FileInfo, TaskKind, string, uint, uint>(
90-
(trainDataset, validationDataset, testDataset, mlTask, labelColumnName, timeout, labelColumnIndex) =>
90+
(trainDataset, validationDataset, testDataset, mlTask, labelColumnName, maxExplorationTime, labelColumnIndex) =>
9191
{
9292
parsingSuccessful = true;
9393
Assert.AreEqual(mlTask, TaskKind.BinaryClassification);
9494
Assert.AreEqual(trainDataset, file1);
9595
Assert.AreEqual(testDataset, file2);
9696
Assert.AreEqual(labelColumnName, labelName);
97+
Assert.AreEqual(maxExplorationTime, 5);
9798
});
9899

99100
var parser = new CommandLineBuilder()
@@ -103,7 +104,7 @@ public void TestCommandLineArgsValuesTest()
103104
.Build();
104105

105106
// Incorrect mltask test
106-
string[] args = new[] { "new", "--ml-task", "BinaryClassification", "--train-dataset", file1, "--label-column-name", labelName, "--test-dataset", file2 };
107+
string[] args = new[] { "new", "--ml-task", "BinaryClassification", "--train-dataset", file1, "--label-column-name", labelName, "--test-dataset", file2, "--max-exploration-time", "5" };
107108
parser.InvokeAsync(args).Wait();
108109
File.Delete(file1);
109110
File.Delete(file2);

src/mlnet/CodeGenerator/CodeGenerator.cs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
using System.Linq;
99
using System.Text;
1010
using Microsoft.ML.Auto;
11-
using Microsoft.ML.Data;
1211
using mlnet.Templates;
1312
using static Microsoft.ML.Data.TextLoader;
1413

@@ -80,13 +79,14 @@ internal void GenerateOutput()
8079
var trainScoreCode = codeGen.TransformText();
8180
var projectSourceCode = csProjGenerator.TransformText();
8281
var consoleHelperCode = consoleHelper.TransformText();
83-
if (!Directory.Exists("./BestModel"))
82+
var outputFolder = Path.Combine(options.OutputBaseDir, options.OutputName);
83+
if (!Directory.Exists(outputFolder))
8484
{
85-
Directory.CreateDirectory("./BestModel");
85+
Directory.CreateDirectory(outputFolder);
8686
}
87-
File.WriteAllText("./BestModel/Train.cs", trainScoreCode);
88-
File.WriteAllText("./BestModel/MyML.csproj", projectSourceCode);
89-
File.WriteAllText("./BestModel/ConsoleHelper.cs", consoleHelperCode);
87+
File.WriteAllText($"{outputFolder}/Train.cs", trainScoreCode);
88+
File.WriteAllText($"{outputFolder}/{options.OutputName}.csproj", projectSourceCode);
89+
File.WriteAllText($"{outputFolder}/ConsoleHelper.cs", consoleHelperCode);
9090
}
9191

9292
internal IList<(string, string)> GenerateTransformsAndUsings()

src/mlnet/CodeGenerator/CodeGeneratorOptions.cs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.IO;
4-
using System.Text;
1+
using System.IO;
52
using Microsoft.ML.Auto;
63

74
namespace Microsoft.ML.CLI
85
{
96
internal class CodeGeneratorOptions
107
{
8+
internal string OutputName { get; set; }
9+
10+
internal string OutputBaseDir { get; set; }
11+
1112
internal FileInfo TrainDataset { get; set; }
1213

1314
internal FileInfo TestDataset { get; set; }

src/mlnet/Commands/CommandDefinitions.cs

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System;
6+
using System.Collections.Generic;
67
using System.CommandLine;
78
using System.CommandLine.Builder;
89
using System.CommandLine.Invocation;
@@ -18,13 +19,17 @@ internal static System.CommandLine.Command New(ICommandHandler handler)
1819
{
1920
var newCommand = new System.CommandLine.Command("new", "ML.NET CLI tool for code generation", handler: handler)
2021
{
22+
//Dataset(),
2123
TrainDataset(),
2224
ValidationDataset(),
2325
TestDataset(),
2426
MlTask(),
2527
LabelName(),
26-
Timeout(),
27-
LabelColumnIndex()
28+
MaxExplorationTime(),
29+
LabelColumnIndex(),
30+
Verbosity(),
31+
Name(),
32+
OutputBaseDir()
2833
};
2934

3035
newCommand.Argument.AddValidator((sym) =>
@@ -46,6 +51,9 @@ internal static System.CommandLine.Command New(ICommandHandler handler)
4651

4752
return newCommand;
4853

54+
/*Option Dataset() =>
55+
new Option("--dataset", "Dataset file path.",
56+
new Argument<FileInfo>().ExistingOnly()); */
4957

5058
Option TrainDataset() =>
5159
new Option("--train-dataset", "Train dataset file path.",
@@ -71,15 +79,32 @@ Option LabelColumnIndex() =>
7179
new Option("--label-column-index", "Index of the label column.",
7280
new Argument<uint>());
7381

74-
Option Timeout() =>
75-
new Option("--timeout", "Timeout in seconds for exploring models.",
82+
Option MaxExplorationTime() =>
83+
new Option("--max-exploration-time", "Timeout in seconds for exploring models.",
7684
new Argument<uint>(defaultValue: 10));
7785

86+
Option Verbosity() =>
87+
new Option(new List<string>() { "--verbosity" }, "Verbosity of the output to be shown by the tool.",
88+
new Argument<string>(defaultValue: "m").WithSuggestions(GetVerbositySuggestions()));
89+
90+
Option Name() =>
91+
new Option(new List<string>() { "--name" }, "Name of the output files(project and folder).",
92+
new Argument<string>(defaultValue: "Sample"));
93+
94+
Option OutputBaseDir() =>
95+
new Option(new List<string>() { "--output" }, "Output folder path.",
96+
new Argument<string>(defaultValue: ".\\Sample"));
97+
7898
}
7999

80100
private static string[] GetMlTaskSuggestions()
81101
{
82102
return Enum.GetValues(typeof(TaskKind)).Cast<TaskKind>().Select(v => v.ToString()).ToArray();
83103
}
104+
105+
private static string[] GetVerbositySuggestions()
106+
{
107+
return new[] { "q", "m", "diag" };
108+
}
84109
}
85110
}

src/mlnet/Commands/NewCommand.cs

Lines changed: 51 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,20 @@
55
using System;
66
using System.Collections.Generic;
77
using System.IO;
8-
using System.Linq;
98
using Microsoft.Data.DataView;
109
using Microsoft.ML.Auto;
1110
using Microsoft.ML.Core.Data;
1211
using Microsoft.ML.Data;
12+
using mlnet;
13+
using mlnet.Utilities;
14+
using NLog;
1315

1416
namespace Microsoft.ML.CLI
1517
{
1618
internal class NewCommand
1719
{
1820
private Options options;
21+
private static Logger logger = LogManager.GetCurrentClassLogger();
1922

2023
internal NewCommand(Options options)
2124
{
@@ -26,12 +29,13 @@ internal void Run()
2629
{
2730
if (options.MlTask == TaskKind.MulticlassClassification)
2831
{
29-
Console.WriteLine($"Unsupported ml-task: {options.MlTask}");
32+
Console.WriteLine($"{Strings.UnsupportedMlTask}: {options.MlTask}");
3033
}
3134

3235
var context = new MLContext();
3336

3437
//Check what overload method of InferColumns needs to be called.
38+
logger.Log(LogLevel.Info, Strings.InferColumns);
3539
(TextLoader.Arguments TextLoaderArgs, IEnumerable<(string Name, ColumnPurpose Purpose)> ColumnPurpopses) columnInference = default((TextLoader.Arguments TextLoaderArgs, IEnumerable<(string Name, ColumnPurpose Purpose)> ColumnPurpopses));
3640
if (options.LabelName != null)
3741
{
@@ -42,50 +46,80 @@ internal void Run()
4246
columnInference = context.Data.InferColumns(options.TrainDataset.FullName, options.LabelIndex, groupColumns: false);
4347
}
4448

49+
logger.Log(LogLevel.Info, Strings.CreateDataLoader);
4550
var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderArgs);
4651

52+
logger.Log(LogLevel.Info, Strings.LoadData);
4753
IDataView trainData = textLoader.Read(options.TrainDataset.FullName);
4854
IDataView validationData = options.ValidationDataset == null ? null : textLoader.Read(options.ValidationDataset.FullName);
4955

5056
//Explore the models
51-
Pipeline pipeline = null;
52-
var result = ExploreModels(context, trainData, validationData, pipeline);
57+
(Pipeline, ITransformer) result = default;
58+
Console.WriteLine($"{Strings.ExplorePipeline}: {options.MlTask}");
59+
try
60+
{
61+
result = ExploreModels(context, trainData, validationData);
62+
}
63+
catch (Exception e)
64+
{
65+
logger.Log(LogLevel.Error, $"{Strings.ExplorePipelineException}:");
66+
logger.Log(LogLevel.Error, e.StackTrace);
67+
logger.Log(LogLevel.Error, Strings.Exiting);
68+
return;
69+
}
5370

5471
//Get the best pipeline
72+
Pipeline pipeline = null;
5573
pipeline = result.Item1;
5674
var model = result.Item2;
5775

76+
//Save the model
77+
logger.Log(LogLevel.Info, Strings.SavingBestModel);
78+
var modelPath = Path.Combine(@options.OutputBaseDir, options.OutputName);
79+
SaveModel(model, modelPath, $"{options.OutputName}_model.zip", context);
80+
81+
5882
//Generate code
59-
var codeGenerator = new CodeGenerator(pipeline, columnInference, new CodeGeneratorOptions() { TrainDataset = options.TrainDataset, MlTask = options.MlTask, TestDataset = options.TestDataset });
83+
logger.Log(LogLevel.Info, Strings.GenerateProject);
84+
var codeGenerator = new CodeGenerator(
85+
pipeline,
86+
columnInference,
87+
new CodeGeneratorOptions()
88+
{
89+
TrainDataset = options.TrainDataset,
90+
MlTask = options.MlTask,
91+
TestDataset = options.TestDataset,
92+
OutputName = options.OutputName,
93+
OutputBaseDir = options.OutputBaseDir
94+
});
6095
codeGenerator.GenerateOutput();
61-
62-
//Save the model
63-
SaveModel(model, @"./BestModel", "model.zip", context);
6496
}
6597

6698
private (Pipeline, ITransformer) ExploreModels(
6799
MLContext context,
68100
IDataView trainData,
69-
IDataView validationData,
70-
Pipeline pipeline)
101+
IDataView validationData)
71102
{
72103
ITransformer model = null;
73104
string label = options.LabelName ?? "Label"; // It is guaranteed training dataview to have Label column
105+
Pipeline pipeline = null;
74106

75107
if (options.MlTask == TaskKind.BinaryClassification)
76108
{
77-
var result = context.BinaryClassification.AutoFit(trainData, label, validationData, options.Timeout);
78-
result = result.OrderByDescending(t => t.Metrics.Accuracy).ToList();
79-
var bestIteration = result.FirstOrDefault();
109+
var progressReporter = new ProgressHandlers.BinaryClassificationHandler();
110+
var result = context.BinaryClassification.AutoFit(trainData, label, validationData, options.Timeout, progressCallback: progressReporter);
111+
logger.Log(LogLevel.Info, Strings.RetrieveBestPipeline);
112+
var bestIteration = result.Best();
80113
pipeline = bestIteration.Pipeline;
81114
model = bestIteration.Model;
82115
}
83116

84117
if (options.MlTask == TaskKind.Regression)
85118
{
86-
var result = context.Regression.AutoFit(trainData, label, validationData, options.Timeout);
87-
result = result.OrderByDescending(t => t.Metrics.RSquared).ToList();
88-
var bestIteration = result.FirstOrDefault();
119+
var progressReporter = new ProgressHandlers.RegressionHandler();
120+
var result = context.Regression.AutoFit(trainData, label, validationData, options.Timeout, progressCallback: progressReporter);
121+
logger.Log(LogLevel.Info, Strings.RetrieveBestPipeline);
122+
var bestIteration = result.Best();
89123
pipeline = bestIteration.Pipeline;
90124
model = bestIteration.Model;
91125
}
@@ -105,7 +139,7 @@ private static void SaveModel(ITransformer model, string ModelPath, string model
105139
{
106140
Directory.CreateDirectory(ModelPath);
107141
}
108-
ModelPath = ModelPath + "/" + modelName;
142+
ModelPath = Path.Combine(ModelPath, modelName);
109143
using (var fs = File.Create(ModelPath))
110144
model.SaveTo(mlContext, fs);
111145
}

src/mlnet/Data/Options.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ namespace Microsoft.ML.CLI
99
{
1010
internal class Options
1111
{
12+
internal string OutputName { get; set; }
13+
1214
internal string Name { get; set; }
1315

1416
internal FileInfo Dataset { get; set; }
@@ -27,5 +29,7 @@ internal class Options
2729

2830
internal uint Timeout { get; set; }
2931

32+
internal string OutputBaseDir { get; set; }
33+
3034
}
3135
}

src/mlnet/NLog.config

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<?xml version="1.0" encoding="utf-8" ?>
2+
<nlog xmlns="http://www.nlog-project.org/schemas/NLog.xsd"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
4+
5+
<targets>
6+
<target name="logfile" xsi:type="File" fileName="debug_log.txt" />
7+
<target name="logconsole" xsi:type="Console" layout="${message}" />
8+
</targets>
9+
10+
<rules>
11+
<logger name="*" minlevel="Debug" writeTo="logfile" />
12+
</rules>
13+
</nlog>

src/mlnet/Program.cs

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5-
using System;
65
using System.CommandLine.Builder;
76
using System.CommandLine.Invocation;
87
using System.IO;
98
using Microsoft.ML.Auto;
9+
using NLog;
10+
using NLog.Config;
11+
using NLog.Targets;
1012

1113
namespace Microsoft.ML.CLI
1214
{
@@ -16,18 +18,41 @@ public static void Main(string[] args)
1618
{
1719
// Create handler outside so that commandline and the handler is decoupled and testable.
1820
var handler = CommandHandler.Create<FileInfo, FileInfo, FileInfo, TaskKind, string, uint, uint>(
19-
(trainDataset, validationDataset, testDataset, mlTask, labelColumnName, timeout, labelColumnIndex) =>
21+
(trainDataset, validationDataset, testDataset, mlTask, labelColumnName, maxExplorationTime, labelColumnIndex) =>
2022
{
23+
/* The below variables needs to be initialized via command line api. Since there is a
24+
restriction at this moment on the number of args and its bindings. .Net team is working
25+
on making this feature to make it possible to bind directly to a type till them we shall
26+
have this place holder by initializing the fields below .
27+
The PR that addresses this issue : https://github.com/dotnet/command-line-api/pull/408
28+
*/
29+
var basedir = "."; // This needs to be obtained from command line args.
30+
var name = "Sample"; // This needs to be obtained from command line args.
31+
32+
// Todo: q,m,diag needs to be mapped into LogLevel here.
33+
var verbosity = LogLevel.Info;
34+
2135
var command = new NewCommand(new Options()
2236
{
2337
TrainDataset = trainDataset,
2438
ValidationDataset = validationDataset,
2539
TestDataset = testDataset,
2640
MlTask = mlTask,
2741
LabelName = labelColumnName,
28-
Timeout = timeout,
29-
LabelIndex = labelColumnIndex
42+
Timeout = maxExplorationTime,
43+
LabelIndex = labelColumnIndex,
44+
OutputBaseDir = basedir,
45+
OutputName = name
3046
});
47+
48+
// Override the Logger Configuration
49+
var logconsole = LogManager.Configuration.FindTargetByName("logconsole");
50+
var logfile = (FileTarget)LogManager.Configuration.FindTargetByName("logfile");
51+
logfile.FileName = $"{basedir}/debug_log.txt";
52+
var config = LogManager.Configuration;
53+
config.AddRule(verbosity, LogLevel.Fatal, logconsole);
54+
55+
// Run the command
3156
command.Run();
3257
});
3358

0 commit comments

Comments
 (0)