Skip to content

Commit 99e8aa5

Browse files
committed
Merge remote-tracking branch 'origin/master' into singlis/drop4
2 parents c8b5a56 + 263a67b commit 99e8aa5

File tree

157 files changed

+4062
-6304
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

157 files changed

+4062
-6304
lines changed

build/Dependencies.props

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
<!-- Test-only Dependencies -->
3939
<PropertyGroup>
4040
<BenchmarkDotNetVersion>0.11.1</BenchmarkDotNetVersion>
41-
<MicrosoftMLTestModelsPackageVersion>0.0.2-test</MicrosoftMLTestModelsPackageVersion>
41+
<MicrosoftMLTestModelsPackageVersion>0.0.3-test</MicrosoftMLTestModelsPackageVersion>
4242
</PropertyGroup>
4343

4444
</Project>

docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77

88
<ItemGroup>
99
<ProjectReference Include="..\..\..\src\Microsoft.ML.StandardLearners\Microsoft.ML.StandardLearners.csproj" />
10-
<ProjectReference Include="..\..\..\src\Microsoft.ML.SamplesUtils\Microsoft.ML.SamplesUtils.csproj" />
10+
<ProjectReference Include="..\..\..\src\Microsoft.ML.SamplesUtils\Microsoft.ML.SamplesUtils.csproj" />
11+
<ProjectReference Include="..\..\..\src\Microsoft.ML.FastTree\Microsoft.ML.FastTree.csproj" />
12+
<ProjectReference Include="..\..\..\src\Microsoft.ML.LightGBM\Microsoft.ML.LightGBM.csproj" />
13+
1114

1215
<NativeAssemblyReference Include="CpuMathNative" />
1316

docs/samples/Microsoft.ML.Samples/Trainers.cs

Lines changed: 120 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,24 @@
1-
// Licensed to the .NET Foundation under one or more agreements.
1+
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

55
// the alignment of the usings with the methods is intentional so they can display on the same level in the docs site.
66
using Microsoft.ML.Runtime.Data;
77
using Microsoft.ML.Runtime.Learners;
8+
using Microsoft.ML.Runtime.LightGBM;
9+
using Microsoft.ML.Trainers.FastTree;
810
using Microsoft.ML.StaticPipe;
911
using System;
12+
using System.Linq;
1013

1114
// NOTE: WHEN ADDING TO THE FILE, ALWAYS APPEND TO THE END OF IT.
1215
// If you change the existinc content, check that the files referencing it in the XML documentation are still correct, as they reference
1316
// line by line.
1417
namespace Microsoft.ML.Samples
1518
{
1619
public static class Trainers
17-
{
18-
20+
{
21+
1922
public static void SdcaRegression()
2023
{
2124
// Downloading a regression dataset from github.com/dotnet/machinelearning
@@ -74,5 +77,119 @@ public static void SdcaRegression()
7477
Console.WriteLine($"RMS - {metrics.Rms}"); // 4.924493
7578
Console.WriteLine($"RSquared - {metrics.RSquared}"); // 0.565467
7679
}
80+
81+
public static void FastTreeRegression()
82+
{
83+
// Downloading a regression dataset from github.com/dotnet/machinelearning
84+
// this will create a housing.txt file in the filsystem this code will run
85+
// you can open the file to see the data.
86+
string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset();
87+
88+
// Creating the ML.Net IHostEnvironment object, needed for the pipeline
89+
var env = new LocalEnvironment(seed: 0);
90+
91+
// Creating the ML context, based on the task performed.
92+
var regressionContext = new RegressionContext(env);
93+
94+
// Creating a data reader, based on the format of the data
95+
var reader = TextLoader.CreateReader(env, c => (
96+
label: c.LoadFloat(0),
97+
features: c.LoadFloat(1, 6)
98+
),
99+
separator: '\t', hasHeader: true);
100+
101+
// Read the data, and leave 10% out, so we can use them for testing
102+
var data = reader.Read(new MultiFileSource(dataFile));
103+
104+
// The predictor that gets produced out of training
105+
FastTreeRegressionPredictor pred = null;
106+
107+
// Create the estimator
108+
var learningPipeline = reader.MakeNewEstimator()
109+
.Append(r => (r.label, score: regressionContext.Trainers.FastTree(
110+
r.label,
111+
r.features,
112+
numTrees: 100, // try: (int) 20-2000
113+
numLeaves: 20, // try: (int) 2-128
114+
minDatapointsInLeafs: 10, // try: (int) 1-100
115+
learningRate: 0.2, // try: (float) 0.025-0.4
116+
onFit: p => pred = p)
117+
)
118+
);
119+
120+
var cvResults = regressionContext.CrossValidate(data, learningPipeline, r => r.label, numFolds: 5);
121+
var averagedMetrics = (
122+
L1: cvResults.Select(r => r.metrics.L1).Average(),
123+
L2: cvResults.Select(r => r.metrics.L2).Average(),
124+
LossFn: cvResults.Select(r => r.metrics.LossFn).Average(),
125+
Rms: cvResults.Select(r => r.metrics.Rms).Average(),
126+
RSquared: cvResults.Select(r => r.metrics.RSquared).Average()
127+
);
128+
Console.WriteLine($"L1 - {averagedMetrics.L1}");
129+
Console.WriteLine($"L2 - {averagedMetrics.L2}");
130+
Console.WriteLine($"LossFunction - {averagedMetrics.LossFn}");
131+
Console.WriteLine($"RMS - {averagedMetrics.Rms}");
132+
Console.WriteLine($"RSquared - {averagedMetrics.RSquared}");
133+
}
134+
135+
public static void LightGbmRegression()
136+
{
137+
// Downloading a regression dataset from github.com/dotnet/machinelearning
138+
// this will create a housing.txt file in the filsystem this code will run
139+
// you can open the file to see the data.
140+
string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset();
141+
142+
// Creating the ML.Net IHostEnvironment object, needed for the pipeline
143+
var env = new LocalEnvironment(seed: 0);
144+
145+
// Creating the ML context, based on the task performed.
146+
var regressionContext = new RegressionContext(env);
147+
148+
// Creating a data reader, based on the format of the data
149+
var reader = TextLoader.CreateReader(env, c => (
150+
label: c.LoadFloat(0),
151+
features: c.LoadFloat(1, 6)
152+
),
153+
separator: '\t', hasHeader: true);
154+
155+
// Read the data, and leave 10% out, so we can use them for testing
156+
var data = reader.Read(new MultiFileSource(dataFile));
157+
var (trainData, testData) = regressionContext.TrainTestSplit(data, testFraction: 0.1);
158+
159+
// The predictor that gets produced out of training
160+
LightGbmRegressionPredictor pred = null;
161+
162+
// Create the estimator
163+
var learningPipeline = reader.MakeNewEstimator()
164+
.Append(r => (r.label, score: regressionContext.Trainers.LightGbm(
165+
r.label,
166+
r.features,
167+
numLeaves: 4,
168+
minDataPerLeaf: 6,
169+
learningRate: 0.001,
170+
onFit: p => pred = p)
171+
)
172+
);
173+
174+
// Fit this pipeline to the training data
175+
var model = learningPipeline.Fit(trainData);
176+
177+
// Check the weights that the model learned
178+
VBuffer<float> weights = default;
179+
pred.GetFeatureWeights(ref weights);
180+
181+
Console.WriteLine($"weight 0 - {weights.Values[0]}");
182+
Console.WriteLine($"weight 1 - {weights.Values[1]}");
183+
184+
// Evaluate how the model is doing on the test data
185+
var dataWithPredictions = model.Transform(testData);
186+
var metrics = regressionContext.Evaluate(dataWithPredictions, r => r.label, r => r.score);
187+
188+
Console.WriteLine($"L1 - {metrics.L1}");
189+
Console.WriteLine($"L2 - {metrics.L2}");
190+
Console.WriteLine($"LossFunction - {metrics.LossFn}");
191+
Console.WriteLine($"RMS - {metrics.Rms}");
192+
Console.WriteLine($"RSquared - {metrics.RSquared}");
193+
}
77194
}
78195
}

src/Common/AssemblyLoadingUtils.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using System;
77
using System.IO;
88
using System.IO.Compression;
9+
using System.Linq;
910
using System.Reflection;
1011

1112
namespace Microsoft.ML.Runtime

src/Microsoft.ML.Api/InternalSchemaDefinition.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,10 @@ public static InternalSchemaDefinition Create(Type userType, Direction direction
217217
public static InternalSchemaDefinition Create(Type userType, SchemaDefinition userSchemaDefinition)
218218
{
219219
Contracts.AssertValue(userType);
220-
Contracts.AssertValue(userSchemaDefinition);
220+
Contracts.AssertValueOrNull(userSchemaDefinition);
221+
222+
if (userSchemaDefinition == null)
223+
userSchemaDefinition = SchemaDefinition.Create(userType);
221224

222225
Column[] dstCols = new Column[userSchemaDefinition.Count];
223226

src/Microsoft.ML.Api/TypedCursor.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ public IRowCursor<TRow>[] GetCursorSet(out IRowCursorConsolidator consolidator,
231231
/// <summary>
232232
/// Create a Cursorable object on a given data view.
233233
/// </summary>
234-
/// <param name="env">Host enviroment.</param>
234+
/// <param name="env">Host environment.</param>
235235
/// <param name="data">The underlying data view.</param>
236236
/// <param name="ignoreMissingColumns">Whether to ignore missing columns in the data view.</param>
237237
/// <param name="schemaDefinition">The optional user-provided schema.</param>

src/Microsoft.ML.Core/ComponentModel/LoadableClassAttribute.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public sealed class LoadableClassAttribute : LoadableClassAttributeBase
1919
{
2020
/// <summary>
2121
/// Assembly attribute used to specify that a class is loadable by a machine learning
22-
/// host enviroment, such as TLC
22+
/// host environment, such as TLC
2323
/// </summary>
2424
/// <param name="instType">The class type that is loadable</param>
2525
/// <param name="argType">The argument type that the constructor takes (may be null)</param>
@@ -33,7 +33,7 @@ public LoadableClassAttribute(Type instType, Type argType, Type sigType, string
3333

3434
/// <summary>
3535
/// Assembly attribute used to specify that a class is loadable by a machine learning
36-
/// host enviroment, such as TLC
36+
/// host environment, such as TLC
3737
/// </summary>
3838
/// <param name="instType">The class type that is loadable</param>
3939
/// <param name="loaderType">The class type that contains the construction method</param>
@@ -58,7 +58,7 @@ public LoadableClassAttribute(Type instType, Type loaderType, Type argType, Type
5858

5959
/// <summary>
6060
/// Assembly attribute used to specify that a class is loadable by a machine learning
61-
/// host enviroment, such as TLC
61+
/// host environment, such as TLC
6262
/// </summary>
6363
/// <param name="summary">The description summary of the class type</param>
6464
/// <param name="instType">The class type that is loadable</param>
@@ -73,7 +73,7 @@ public LoadableClassAttribute(string summary, Type instType, Type argType, Type
7373

7474
/// <summary>
7575
/// Assembly attribute used to specify that a class is loadable by a machine learning
76-
/// host enviroment, such as TLC
76+
/// host environment, such as TLC
7777
/// </summary>
7878
/// <param name="summary">The description summary of the class type</param>
7979
/// <param name="instType">The class type that is loadable</param>

src/Microsoft.ML.Core/Data/IHostEnvironment.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ public interface IHostEnvironment : IChannelProvider, IProgressChannelProvider
6969
/// handles and ensure that they are disposed properly when the environment is "shut down".
7070
///
7171
/// The suffix and prefix are optional. A common use for suffix is to specify an extension, eg, ".txt".
72-
/// The use of suffix and prefix, including whether they have any affect, is up to the host enviroment.
72+
/// The use of suffix and prefix, including whether they have any affect, is up to the host environment.
7373
/// </summary>
7474
IFileHandle CreateTempFile(string suffix = null, string prefix = null);
7575
}

src/Microsoft.ML.Core/Data/ProgressReporter.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,17 @@ public List<ProgressEvent> GetAllProgress()
468468

469469
return list;
470470
}
471+
472+
public void Reset()
473+
{
474+
lock (_lock)
475+
{
476+
while (!_pendingEvents.IsEmpty)
477+
_pendingEvents.TryDequeue(out var res);
478+
_namesUsed.Clear();
479+
_index = 0;
480+
}
481+
}
471482
}
472483

473484
/// <summary>

src/Microsoft.ML.Core/Environment/ConsoleEnvironment.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,11 @@ internal IDisposable RedirectChannelOutput(TextWriter newOutWriter, TextWriter n
430430
return new OutputRedirector(this, newOutWriter, newErrWriter);
431431
}
432432

433+
internal void ResetProgressChannel()
434+
{
435+
ProgressTracker.Reset();
436+
}
437+
433438
private sealed class OutputRedirector : IDisposable
434439
{
435440
private readonly ConsoleEnvironment _root;

0 commit comments

Comments
 (0)