Skip to content

Commit b958c37

Browse files
committed
Merge remote-tracking branch 'origin/master' into singlis/scrub-lightgbm
2 parents ce21121 + 1942c8f commit b958c37

File tree

40 files changed

+216
-212
lines changed

40 files changed

+216
-212
lines changed

.vsts-dotnet-ci.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,15 @@ phases:
3131
parameters:
3232
name: Centos
3333
buildScript: ./build.sh
34+
customMatrixes:
35+
Build_Debug_Intrinsics:
36+
_configuration: Debug-Intrinsics
37+
_config_short: DI
38+
_includeBenchmarkData: false
39+
Build_Release:
40+
_configuration: Release
41+
_config_short: R
42+
_includeBenchmarkData: true
3443
queue:
3544
name: Hosted Ubuntu 1604
3645
container: CentosContainer
@@ -39,6 +48,15 @@ phases:
3948
parameters:
4049
name: Ubuntu
4150
buildScript: ./build.sh
51+
customMatrixes:
52+
Build_Debug:
53+
_configuration: Debug
54+
_config_short: D
55+
_includeBenchmarkData: false
56+
Build_Release_Intrinsics:
57+
_configuration: Release-Intrinsics
58+
_config_short: RI
59+
_includeBenchmarkData: true
4260
queue:
4361
name: Hosted Ubuntu 1604
4462
container: UbuntuContainer

BuildToolsVersion.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
3.0.0-preview1-03129-01
1+
3.0.0-preview1-03721-01

docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,7 @@ public static void Example()
4444
var outData = featureContributionCalculator.Fit(scoredData).Transform(scoredData);
4545

4646
// Let's extract the weights from the linear model to use as a comparison
47-
var weights = new VBuffer<float>();
48-
model.Model.GetFeatureWeights(ref weights);
47+
var weights = model.Model.Weights;
4948

5049
// Let's now walk through the first ten records and see which feature drove the values the most
5150
// Get prediction scores and contributions
@@ -63,7 +62,7 @@ public static void Example()
6362
var value = row.Features[featureOfInterest];
6463
var contribution = row.FeatureContributions[featureOfInterest];
6564
var name = data.Schema[featureOfInterest + 1].Name;
66-
var weight = weights.GetValues()[featureOfInterest];
65+
var weight = weights[featureOfInterest];
6766

6867
Console.WriteLine("{0:0.00}\t{1:0.00}\t{2}\t{3:0.00}\t{4:0.00}\t{5:0.00}",
6968
row.MedianHomeValue,

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs renamed to docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PriorTrainerSample.cs

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,22 @@ public class PriorTrainer
77
{
88
public static void Example()
99
{
10-
// Downloading the dataset from github.com/dotnet/machinelearning.
11-
// This will create a sentiment.tsv file in the filesystem.
12-
// You can open this file, if you want to see the data.
13-
string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0];
10+
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
11+
// as a catalog of available operations and as the source of randomness.
12+
var mlContext = new MLContext();
13+
14+
// Download and featurize the dataset.
15+
var dataFiles = SamplesUtils.DatasetUtils.DownloadSentimentDataset();
16+
var trainFile = dataFiles[0];
17+
var testFile = dataFiles[1];
1418

1519
// A preview of the data.
1620
// Sentiment SentimentText
1721
// 0 " :Erm, thank you. "
1822
// 1 ==You're cool==
1923

20-
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
21-
// as a catalog of available operations and as the source of randomness.
22-
var mlContext = new MLContext();
23-
24-
// Step 1: Load the data as an IDataView.
25-
// First, we define the loader: specify the data columns and where to find them in the text file.
24+
// Step 1: Read the data as an IDataView.
25+
// First, we define the reader: specify the data columns and where to find them in the text file.
2626
var loader = mlContext.Data.CreateTextLoader(
2727
columns: new[]
2828
{
@@ -31,12 +31,9 @@ public static void Example()
3131
},
3232
hasHeader: true
3333
);
34-
35-
// Load the data
36-
var data = loader.Load(dataFile);
3734

38-
// Split it between training and test data
39-
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data);
35+
// Load the data
36+
var trainData = loader.Load(trainFile);
4037

4138
// Step 2: Pipeline
4239
// Featurize the text column through the FeaturizeText API.
@@ -47,19 +44,27 @@ public static void Example()
4744
.Append(mlContext.BinaryClassification.Trainers.Prior(labelColumnName: "Sentiment"));
4845

4946
// Step 3: Train the pipeline
50-
var trainedPipeline = pipeline.Fit(trainTestData.TrainSet);
47+
var trainedPipeline = pipeline.Fit(trainData);
5148

5249
// Step 4: Evaluate on the test set
53-
var transformedData = trainedPipeline.Transform(trainTestData.TestSet);
50+
var transformedData = trainedPipeline.Transform(loader.Load(testFile));
5451
var evalMetrics = mlContext.BinaryClassification.Evaluate(transformedData, label: "Sentiment");
55-
56-
// Step 5: Inspect the output
57-
Console.WriteLine("Accuracy: " + evalMetrics.Accuracy);
52+
SamplesUtils.ConsoleUtils.PrintMetrics(evalMetrics);
5853

5954
// The Prior trainer outputs the proportion of a label in the dataset as the probability of that label.
60-
// In this case it means that there is a split of around 64%-36% of positive and negative labels in the dataset.
55+
// In this case 'Accuracy: 0.50' means that there is a split of around 50%-50% of positive and negative labels in the test dataset.
6156
// Expected output:
62-
// Accuracy: 0.647058823529412
57+
58+
// Accuracy: 0.50
59+
// AUC: 0.50
60+
// F1 Score: 0.67
61+
// Negative Precision: 0.00
62+
// Negative Recall: 0.00
63+
// Positive Precision: 0.50
64+
// Positive Recall: 1.00
65+
// LogLoss: 1.05
66+
// LogLossReduction: -4.89
67+
// Entropy: 1.00
6368
}
6469
}
6570
}

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs renamed to docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/RandomTrainerSample.cs

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,59 +7,64 @@ public static class RandomTrainer
77
{
88
public static void Example()
99
{
10-
// Downloading the dataset from github.com/dotnet/machinelearning.
11-
// This will create a sentiment.tsv file in the filesystem.
12-
// You can open this file, if you want to see the data.
13-
string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0];
10+
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
11+
// as a catalog of available operations and as the source of randomness.
12+
var mlContext = new MLContext(seed: 1);
13+
14+
// Download and featurize the dataset.
15+
var dataFiles = SamplesUtils.DatasetUtils.DownloadSentimentDataset();
16+
var trainFile = dataFiles[0];
17+
var testFile = dataFiles[1];
1418

1519
// A preview of the data.
1620
// Sentiment SentimentText
1721
// 0 " :Erm, thank you. "
1822
// 1 ==You're cool==
1923

20-
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
21-
// as a catalog of available operations and as the source of randomness.
22-
var mlContext = new MLContext(seed: 1);
23-
24-
// Step 1: Load the data as an IDataView.
25-
// First, we define the loader: specify the data columns and where to find them in the text file.
26-
var loader = mlContext.Data.CreateTextLoader(
24+
// Step 1: Read the data as an IDataView.
25+
// First, we define the reader: specify the data columns and where to find them in the text file.
26+
var reader = mlContext.Data.CreateTextLoader(
2727
columns: new[]
2828
{
2929
new TextLoader.Column("Sentiment", DataKind.Single, 0),
3030
new TextLoader.Column("SentimentText", DataKind.String, 1)
3131
},
3232
hasHeader: true
3333
);
34-
35-
// Load the data
36-
var data = loader.Load(dataFile);
3734

38-
// Split it between training and test data
39-
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data);
35+
// Read the data
36+
var trainData = reader.Load(trainFile);
4037

4138
// Step 2: Pipeline
4239
// Featurize the text column through the FeaturizeText API.
4340
// Then append a binary classifier, setting the "Label" column as the label of the dataset, and
4441
// the "Features" column produced by FeaturizeText as the features column.
4542
var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
46-
.AppendCacheCheckpoint(mlContext) // Add a data-cache step within a pipeline.
43+
.AppendCacheCheckpoint(mlContext)
4744
.Append(mlContext.BinaryClassification.Trainers.Random());
4845

4946
// Step 3: Train the pipeline
50-
var trainedPipeline = pipeline.Fit(trainTestData.TrainSet);
47+
var trainedPipeline = pipeline.Fit(trainData);
5148

5249
// Step 4: Evaluate on the test set
53-
var transformedData = trainedPipeline.Transform(trainTestData.TestSet);
50+
var transformedData = trainedPipeline.Transform(reader.Load(testFile));
5451
var evalMetrics = mlContext.BinaryClassification.Evaluate(transformedData, label: "Sentiment");
55-
56-
// Step 5: Inspect the output
57-
Console.WriteLine("Accuracy: " + evalMetrics.Accuracy);
52+
SamplesUtils.ConsoleUtils.PrintMetrics(evalMetrics);
5853

5954
// We expect an output probability closet to 0.5 as the Random trainer outputs a random prediction.
6055
// Regardless of the input features, the trainer will predict either positive or negative label with equal probability.
61-
// Expected output (close to 0.5):
62-
// Accuracy: 0.588235294117647
56+
// Expected output: (close to 0.5):
57+
58+
// Accuracy: 0.56
59+
// AUC: 0.57
60+
// F1 Score: 0.60
61+
// Negative Precision: 0.57
62+
// Negative Recall: 0.44
63+
// Positive Precision: 0.55
64+
// Positive Recall: 0.67
65+
// LogLoss: 1.53
66+
// LogLossReduction: -53.37
67+
// Entropy: 1.00
6368
}
6469
}
6570
}

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ public static void Example()
2020
var split = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
2121
// Create data training pipeline
2222
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicStochasticGradientDescent(
23-
new ML.Trainers.HalLearners.SymSgdClassificationTrainer.Options()
23+
new ML.Trainers.HalLearners.SymbolicStochasticGradientDescentClassificationTrainer.Options()
2424
{
2525
LearningRate = 0.2f,
2626
NumberOfIterations = 10,

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ public static void Example()
4444

4545
// Create the estimator, here we only need OrdinaryLeastSquares trainer
4646
// as data is already processed in a form consumable by the trainer
47-
var pipeline = mlContext.Regression.Trainers.OrdinaryLeastSquares(new OlsLinearRegressionTrainer.Options()
47+
var pipeline = mlContext.Regression.Trainers.OrdinaryLeastSquares(new OrdinaryLeastSquaresRegressionTrainer.Options()
4848
{
4949
L2Weight = 0.1f,
5050
PerParameterSignificance = false

docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,10 @@ public static void SdcaRegression()
4646
var model = learningPipeline.Fit(trainData);
4747

4848
// Check the weights that the model learned
49-
VBuffer<float> weights = default;
50-
pred.GetFeatureWeights(ref weights);
49+
var weights = pred.Weights;
5150

52-
var weightsValues = weights.GetValues();
53-
Console.WriteLine($"weight 0 - {weightsValues[0]}");
54-
Console.WriteLine($"weight 1 - {weightsValues[1]}");
51+
Console.WriteLine($"weight 0 - {weights[0]}");
52+
Console.WriteLine($"weight 1 - {weights[1]}");
5553

5654
// Evaluate how the model is doing on the test data
5755
var dataWithPredictions = model.Transform(testData);

src/Microsoft.ML.Data/Dirty/PredictorInterfaces.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,8 @@ internal interface ICanSaveInSourceCode
146146
/// <summary>
147147
/// Interface implemented by components that can assign weights to features.
148148
/// </summary>
149-
public interface IHaveFeatureWeights
149+
[BestFriend]
150+
internal interface IHaveFeatureWeights
150151
{
151152
/// <summary>
152153
/// Returns the weights for the features.

src/Microsoft.ML.DnnImageFeaturizer.AlexNet/AlexNetExtension.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using System;
56
using System.IO;
67
using Microsoft.ML.Data;
78

@@ -21,7 +22,7 @@ public static class AlexNetExtension
2122
/// </summary>
2223
public static EstimatorChain<ColumnCopyingTransformer> AlexNet(this DnnImageModelSelector dnnModelContext, IHostEnvironment env, string outputColumnName, string inputColumnName)
2324
{
24-
return AlexNet(dnnModelContext, env, outputColumnName, inputColumnName, Path.Combine(AssemblyPathHelpers.GetExecutingAssemblyLocation(), "DnnImageModels"));
25+
return AlexNet(dnnModelContext, env, outputColumnName, inputColumnName, Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "DnnImageModels"));
2526
}
2627

2728
/// <summary>

0 commit comments

Comments
 (0)