Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions Microsoft.ML.sln
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.KMeansClusteri
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.PCA", "src\Microsoft.ML.PCA\Microsoft.ML.PCA.csproj", "{58E06735-1129-4DD5-86E0-6BBFF049AAD9}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Maml", "src\Microsoft.ML.Maml\Microsoft.ML.Maml.csproj", "{D956E291-F6E5-4474-9023-91793F45ABEB}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Api", "src\Microsoft.ML.Api\Microsoft.ML.Api.csproj", "{2F636A2C-062C-49F4-85F3-60DCADAB6A43}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Tests", "test\Microsoft.ML.Tests\Microsoft.ML.Tests.csproj", "{64BC22D3-1E76-41EF-94D8-C79E471FF2DD}"
Expand Down Expand Up @@ -104,6 +102,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Microsoft.ML.Parquet", "Mic
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Benchmarks", "test\Microsoft.ML.Benchmarks\Microsoft.ML.Benchmarks.csproj", "{7A9DB75F-2CA5-4184-9EF5-1F17EB39483F}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Maml", "src\Microsoft.ML.Maml\Microsoft.ML.Maml.csproj", "{64F40A0D-D4C2-4AA7-8470-E9CC437827E4}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Console", "src\Microsoft.ML.Console\Microsoft.ML.Console.csproj", "{362A98CF-FBF7-4EBB-A11B-990BBF845B15}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -158,10 +160,6 @@ Global
{58E06735-1129-4DD5-86E0-6BBFF049AAD9}.Debug|Any CPU.Build.0 = Debug|Any CPU
{58E06735-1129-4DD5-86E0-6BBFF049AAD9}.Release|Any CPU.ActiveCfg = Release|Any CPU
{58E06735-1129-4DD5-86E0-6BBFF049AAD9}.Release|Any CPU.Build.0 = Release|Any CPU
{D956E291-F6E5-4474-9023-91793F45ABEB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{D956E291-F6E5-4474-9023-91793F45ABEB}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D956E291-F6E5-4474-9023-91793F45ABEB}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D956E291-F6E5-4474-9023-91793F45ABEB}.Release|Any CPU.Build.0 = Release|Any CPU
{2F636A2C-062C-49F4-85F3-60DCADAB6A43}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{2F636A2C-062C-49F4-85F3-60DCADAB6A43}.Debug|Any CPU.Build.0 = Debug|Any CPU
{2F636A2C-062C-49F4-85F3-60DCADAB6A43}.Release|Any CPU.ActiveCfg = Release|Any CPU
Expand Down Expand Up @@ -202,6 +200,14 @@ Global
{7A9DB75F-2CA5-4184-9EF5-1F17EB39483F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{7A9DB75F-2CA5-4184-9EF5-1F17EB39483F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{7A9DB75F-2CA5-4184-9EF5-1F17EB39483F}.Release|Any CPU.Build.0 = Release|Any CPU
{64F40A0D-D4C2-4AA7-8470-E9CC437827E4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{64F40A0D-D4C2-4AA7-8470-E9CC437827E4}.Debug|Any CPU.Build.0 = Debug|Any CPU
{64F40A0D-D4C2-4AA7-8470-E9CC437827E4}.Release|Any CPU.ActiveCfg = Release|Any CPU
{64F40A0D-D4C2-4AA7-8470-E9CC437827E4}.Release|Any CPU.Build.0 = Release|Any CPU
{362A98CF-FBF7-4EBB-A11B-990BBF845B15}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{362A98CF-FBF7-4EBB-A11B-990BBF845B15}.Debug|Any CPU.Build.0 = Debug|Any CPU
{362A98CF-FBF7-4EBB-A11B-990BBF845B15}.Release|Any CPU.ActiveCfg = Release|Any CPU
{362A98CF-FBF7-4EBB-A11B-990BBF845B15}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand All @@ -219,7 +225,6 @@ Global
{7288C084-11C0-43BE-AC7F-45DCFEAEEBF6} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{F1CAE3AB-4F86-4BC0-BBA8-C4A58E7E8A4A} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{58E06735-1129-4DD5-86E0-6BBFF049AAD9} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{D956E291-F6E5-4474-9023-91793F45ABEB} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{2F636A2C-062C-49F4-85F3-60DCADAB6A43} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{64BC22D3-1E76-41EF-94D8-C79E471FF2DD} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
{FDA2FD2C-A708-43AC-A941-4D941B0853BF} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
Expand All @@ -236,6 +241,8 @@ Global
{DEC8F776-49F7-4D87-836C-FE4DC057D08C} = {D3D38B03-B557-484D-8348-8BADEE4DF592}
{6C95FC87-F5F2-4EEF-BB97-567F2F5DD141} = {D3D38B03-B557-484D-8348-8BADEE4DF592}
{7A9DB75F-2CA5-4184-9EF5-1F17EB39483F} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
{64F40A0D-D4C2-4AA7-8470-E9CC437827E4} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{362A98CF-FBF7-4EBB-A11B-990BBF845B15} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D}
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Along with these ML capabilities this first release of ML.NET also brings the fi

ML.NET runs on Windows, Linux, and macOS - any platform where 64 bit [.NET Core](https://github.com/dotnet/core) or later is available.

The current release is 0.1. Check out the [release notes](docs/release-notes/0.1/release-0.1.md).
The current release is 0.2. Check out the [release notes](docs/release-notes/0.2/release-0.2.md).

First ensure you have installed [.NET Core 2.0](https://www.microsoft.com/net/learn/get-started) or later. ML.NET also works on the .NET Framework. Note that ML.NET currently must run in a 64 bit process.

Expand Down Expand Up @@ -66,7 +66,7 @@ Here's an example of code to train a model to predict sentiment from text sample

```C#
var pipeline = new LearningPipeline();
pipeline.Add(new TextLoader<SentimentData>(dataPath, separator: ","));
pipeline.Add(new TextLoader(dataPath).CreateFrom<SentimentData>(separator: ','));
pipeline.Add(new TextFeaturizer("Features", "SentimentText"));
pipeline.Add(new FastTreeBinaryClassifier());
var model = pipeline.Train<SentimentData, SentimentPrediction>();
Expand Down
95 changes: 95 additions & 0 deletions docs/release-notes/0.2/release-0.2.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# ML.NET 0.2 Release Notes

We would like to thank the community for the engagement so far and helping us
shape ML.NET.

Today we are releasing ML.NET 0.2. This release focuses on addressing
questions/issues, adding clustering to the list of supported machine learning
tasks, enabling using data from memory to train models, easier model
validation, and more.

### Installation

ML.NET supports Windows, MacOS, and Linux. See [supported OS versions of .NET
Core
2.0](https://github.com/dotnet/core/blob/master/release-notes/2.0/2.0-supported-os.md)
for more details.

You can install ML.NET NuGet from the CLI using:
```
dotnet add package Microsoft.ML
```

From package manager:
```
Install-Package Microsoft.ML
```

### Release Notes

Below are some of the highlights from this release.

* Added clustering to the list of supported machine learning tasks

* Clustering is an unsupervised learning task that groups sets of items
based on their features. It identifies which items are more similar to
each other than other items. This might be useful in scenarios such as
organizing news articles into groups based on their topics, segmenting
users based on their shopping habits, and grouping viewers based on
their taste in movies.

* ML.NET 0.2 exposes `KMeansPlusPlusClusterer` which implements [K-Means++
clustering](http://theory.stanford.edu/~sergei/papers/vldb12-kmpar.pdf)
with [Yinyang K-means
acceleration](https://www.microsoft.com/en-us/research/publication/yinyang-k-means-a-drop-in-replacement-of-the-classic-k-means-with-consistent-speedup/?from=http%3A%2F%2Fresearch.microsoft.com%2Fapps%2Fpubs%2Fdefault.aspx%3Fid%3D252149).
[This
test](https://github.com/dotnet/machinelearning/blob/78810563616f3fcb0b63eb8a50b8b2e62d9d65fc/test/Microsoft.ML.Tests/Scenarios/ClusteringTests.cs)
shows how to use it (from
[#222](https://github.com/dotnet/machinelearning/pull/222)).

* Train using data objects in addition to loading data from a file using
`CollectionDataSource`. ML.NET 0.1 enabled loading data from a delimited
text file. `CollectionDataSource` in ML.NET 0.2 adds the ability to use a
collection of objects as the input to a `LearningPipeline`. See sample usage
[here](https://github.com/dotnet/machinelearning/blob/78810563616f3fcb0b63eb8a50b8b2e62d9d65fc/test/Microsoft.ML.Tests/CollectionDataSourceTests.cs#L133)
(from [#106](https://github.com/dotnet/machinelearning/pull/106)).

* Easier model validation with cross-validation and train-test

* [Cross-validation](https://en.wikipedia.org/wiki/Cross-validation_(statistics))
is an approach to validating how well your model statistically performs.
It does not require a separate test dataset, but rather uses your
training data to test your model (it partitions the data so different
data is used for training and testing, and it does this multiple times).
[Here](https://github.com/dotnet/machinelearning/blob/78810563616f3fcb0b63eb8a50b8b2e62d9d65fc/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs#L51)
is an example for doing cross-validation (from
[#212](https://github.com/dotnet/machinelearning/pull/212)).

* Train-test is a shortcut to testing your model on a separate dataset.
See example usage
[here](https://github.com/dotnet/machinelearning/blob/78810563616f3fcb0b63eb8a50b8b2e62d9d65fc/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs#L36).

* Note that the `LearningPipeline` is prepared the same way in both cases.

* Speed improvement for predictions: by not creating a parallel cursor for
dataviews that only have one element, we get a significant speed-up for
predictions (see
[#179](https://github.com/dotnet/machinelearning/issues/179) for a few
measurements).

* Updated `TextLoader` API: the `TextLoader` API is now code generated and was
updated to take explicit declarations for the columns in the data, which is
required in some scenarios. See
[#142](https://github.com/dotnet/machinelearning/pull/142).

* Added daily NuGet builds of the project: daily NuGet builds of ML.NET are
now available
[here](https://dotnet.myget.org/feed/dotnet-core/package/nuget/Microsoft.ML).

Additional issues closed in this milestone can be found [here](https://github.com/dotnet/machinelearning/milestone/1?closed=1).

### Acknowledgements

Shoutout to tincann, rantri, yamachu, pkulikov, Sorrien, v-tsymbalistyi, Ky7m,
forki, jessebenson, mfaticaearnin, and the ML.NET team for their contributions
as part of this release!
2 changes: 1 addition & 1 deletion pkg/Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
<PackageLicenseUrl>https://github.com/dotnet/machinelearning/blob/master/LICENSE</PackageLicenseUrl>
<PackageProjectUrl>https://dot.net/ml</PackageProjectUrl>
<PackageIconUrl>https://aka.ms/mlnetlogo</PackageIconUrl>
<PackageReleaseNotes>https://github.com/dotnet/machinelearning/tree/master/Documentation/release-notes</PackageReleaseNotes>
<PackageReleaseNotes>https://aka.ms/mlnetreleasenotes</PackageReleaseNotes>
</PropertyGroup>

<ItemGroup>
Expand Down
11 changes: 11 additions & 0 deletions src/Microsoft.ML.Console/Console.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

namespace Microsoft.ML.Runtime.Tools.Console
{
public static class Console
{
public static int Main(string[] args) => Maml.Main(args);
}
}
20 changes: 20 additions & 0 deletions src/Microsoft.ML.Console/Microsoft.ML.Console.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<DefineConstants>CORECLR</DefineConstants>
<IncludeInPackage>Microsoft.ML</IncludeInPackage>
<TargetFramework>netcoreapp2.0</TargetFramework>
<OutputType>Exe</OutputType>
<AssemblyName>MML</AssemblyName>
<StartupObject>Microsoft.ML.Runtime.Tools.Console.Console</StartupObject>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
<ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
<ProjectReference Include="..\Microsoft.ML.Maml\Microsoft.ML.Maml.csproj" />
<ProjectReference Include="..\Microsoft.ML.PipelineInference\Microsoft.ML.PipelineInference.csproj" />
</ItemGroup>

</Project>
10 changes: 7 additions & 3 deletions src/Microsoft.ML.Core/Utilities/PathUtils.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Licensed to the .NET Foundation under one or more agreements.
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

Expand Down Expand Up @@ -67,13 +67,17 @@ public static string FindExistentFileOrNull(string fileName, string folderPrefix
// 1. Search in customSearchDir.
if (!string.IsNullOrWhiteSpace(customSearchDir)
&& TryFindFile(fileName, folderPrefix, customSearchDir, out candidate))
return candidate;
{
return candidate;
}

// 2. Search in the path specified by the environment variable.
var envDir = Environment.GetEnvironmentVariable(CustomSearchDirEnvVariable);
if (!string.IsNullOrWhiteSpace(envDir)
&& TryFindFile(fileName, folderPrefix, envDir, out candidate))
return candidate;
{
return candidate;
}

// 3. Search in the path specified by the assemblyForBasePath.
if (assemblyForBasePath != null)
Expand Down
32 changes: 23 additions & 9 deletions src/Microsoft.ML.Data/Commands/DataCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,38 +20,38 @@ public static class DataCommand
{
public abstract class ArgumentsBase
{
[Argument(ArgumentType.Multiple, HelpText = "The data loader", ShortName = "loader", SortOrder = 1, NullName = "<Auto>")]
[Argument(ArgumentType.Multiple, Visibility = ArgumentAttribute.VisibilityType.CmdLineOnly, HelpText = "The data loader", ShortName = "loader", SortOrder = 1, NullName = "<Auto>")]
public SubComponent<IDataLoader, SignatureDataLoader> Loader;

[Argument(ArgumentType.AtMostOnce, IsInputFileName = true, HelpText = "The data file", ShortName = "data", SortOrder = 0)]
public string DataFile;

[Argument(ArgumentType.AtMostOnce, HelpText = "Model file to save", ShortName = "out")]
[Argument(ArgumentType.AtMostOnce, Visibility = ArgumentAttribute.VisibilityType.CmdLineOnly, HelpText = "Model file to save", ShortName = "out")]
public string OutputModelFile;

[Argument(ArgumentType.AtMostOnce, IsInputFileName = true, HelpText = "Model file to load", ShortName = "in", SortOrder = 90)]
[Argument(ArgumentType.AtMostOnce, Visibility = ArgumentAttribute.VisibilityType.CmdLineOnly, IsInputFileName = true, HelpText = "Model file to load", ShortName = "in", SortOrder = 90)]
public string InputModelFile;

[Argument(ArgumentType.Multiple, HelpText = "Load transforms from model file?", ShortName = "loadTrans", SortOrder = 91)]
[Argument(ArgumentType.Multiple, Visibility = ArgumentAttribute.VisibilityType.CmdLineOnly, HelpText = "Load transforms from model file?", ShortName = "loadTrans", SortOrder = 91)]
public bool? LoadTransforms;

[Argument(ArgumentType.AtMostOnce, HelpText = "Random seed", ShortName = "seed", SortOrder = 101)]
[Argument(ArgumentType.AtMostOnce, Visibility = ArgumentAttribute.VisibilityType.CmdLineOnly, HelpText = "Random seed", ShortName = "seed", SortOrder = 101)]
public int? RandomSeed;

[Argument(ArgumentType.AtMostOnce, HelpText = "Verbose?", ShortName = "v", Hide = true)]
[Argument(ArgumentType.AtMostOnce, Visibility = ArgumentAttribute.VisibilityType.CmdLineOnly, HelpText = "Verbose?", ShortName = "v", Hide = true)]
public bool? Verbose;

[Argument(ArgumentType.AtMostOnce, HelpText = "The web server to publish the RESTful API", Hide = true)]
[Argument(ArgumentType.AtMostOnce, Visibility = ArgumentAttribute.VisibilityType.CmdLineOnly, HelpText = "The web server to publish the RESTful API", Hide = true)]
public ServerChannel.IServerFactory Server;

// This is actually an advisory value. The implementations themselves are responsible for
// determining what they consider appropriate, and the actual heuristics is a bit more
// complex than just this.
[Argument(ArgumentType.LastOccurenceWins,
[Argument(ArgumentType.LastOccurenceWins, Visibility = ArgumentAttribute.VisibilityType.CmdLineOnly,
HelpText = "Desired degree of parallelism in the data pipeline", ShortName = "n")]
public int? Parallel;

[Argument(ArgumentType.Multiple, HelpText = "Transform", ShortName = "xf")]
[Argument(ArgumentType.Multiple, Visibility = ArgumentAttribute.VisibilityType.CmdLineOnly, HelpText = "Transform", ShortName = "xf")]
public KeyValuePair<string, SubComponent<IDataTransform, SignatureDataTransform>>[] Transform;
}

Expand Down Expand Up @@ -396,6 +396,20 @@ public static void SaveLoader(IDataLoader loader, IFileHandle file)
Contracts.CheckParam(file.CanWrite, nameof(file), "Must be writable");

using (var stream = file.CreateWriteStream())
{
SaveLoader(loader, stream);
}
}

/// <summary>
/// Saves <paramref name="loader"/> to the specified <paramref name="stream"/>.
/// </summary>
public static void SaveLoader(IDataLoader loader, Stream stream)
{
Contracts.CheckValue(loader, nameof(loader));
Contracts.CheckValue(stream, nameof(stream));
Contracts.CheckParam(stream.CanWrite, nameof(stream), "Must be writable");

using (var rep = RepositoryWriter.CreateNew(stream))
{
ModelSaveContext.SaveModel(rep, loader, ModelFileUtils.DirDataLoaderModel);
Expand Down
Loading