Skip to content

Commit 674ebd8

Browse files
authored
Merge branch 'master' into bump-ort-version
2 parents 44b2a81 + e66e19e commit 674ebd8

File tree

74 files changed

+5509
-1327
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+5509
-1327
lines changed

Microsoft.ML.sln

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,16 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Microsoft.ML.FastTree", "Mi
274274
pkg\Microsoft.ML.FastTree\Microsoft.ML.FastTree.symbols.nupkgproj = pkg\Microsoft.ML.FastTree\Microsoft.ML.FastTree.symbols.nupkgproj
275275
EndProjectSection
276276
EndProject
277+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Extensions.ML", "src\Microsoft.Extensions.ML\Microsoft.Extensions.ML.csproj", "{D6741C37-B5E6-4050-BCBA-9715809EA15B}"
278+
EndProject
279+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.Extensions.ML.Tests", "test\Microsoft.Extensions.ML.Tests\Microsoft.Extensions.ML.Tests.csproj", "{21CAD3A1-5E1F-42C1-BB73-46B6E67F4206}"
280+
EndProject
281+
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Microsoft.Extensions.ML", "Microsoft.Extensions.ML", "{AE4F7569-26F3-4160-8A8B-7A57D0DA3350}"
282+
ProjectSection(SolutionItems) = preProject
283+
pkg\Microsoft.Extensions.ML\Microsoft.Extensions.ML.nupkgproj = pkg\Microsoft.Extensions.ML\Microsoft.Extensions.ML.nupkgproj
284+
pkg\Microsoft.Extensions.ML\Microsoft.Extensions.ML.symbols.nupkgproj = pkg\Microsoft.Extensions.ML\Microsoft.Extensions.ML.symbols.nupkgproj
285+
EndProjectSection
286+
EndProject
277287
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.StableApi", "tools-local\Microsoft.ML.StableApi\Microsoft.ML.StableApi.csproj", "{F308DC6B-7E59-40D7-A581-834E8CD99CFE}"
278288
EndProject
279289
Global
@@ -970,6 +980,30 @@ Global
970980
{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
971981
{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
972982
{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
983+
{D6741C37-B5E6-4050-BCBA-9715809EA15B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
984+
{D6741C37-B5E6-4050-BCBA-9715809EA15B}.Debug|Any CPU.Build.0 = Debug|Any CPU
985+
{D6741C37-B5E6-4050-BCBA-9715809EA15B}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
986+
{D6741C37-B5E6-4050-BCBA-9715809EA15B}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU
987+
{D6741C37-B5E6-4050-BCBA-9715809EA15B}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
988+
{D6741C37-B5E6-4050-BCBA-9715809EA15B}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
989+
{D6741C37-B5E6-4050-BCBA-9715809EA15B}.Release|Any CPU.ActiveCfg = Release|Any CPU
990+
{D6741C37-B5E6-4050-BCBA-9715809EA15B}.Release|Any CPU.Build.0 = Release|Any CPU
991+
{D6741C37-B5E6-4050-BCBA-9715809EA15B}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU
992+
{D6741C37-B5E6-4050-BCBA-9715809EA15B}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
993+
{D6741C37-B5E6-4050-BCBA-9715809EA15B}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
994+
{D6741C37-B5E6-4050-BCBA-9715809EA15B}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
995+
{21CAD3A1-5E1F-42C1-BB73-46B6E67F4206}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
996+
{21CAD3A1-5E1F-42C1-BB73-46B6E67F4206}.Debug|Any CPU.Build.0 = Debug|Any CPU
997+
{21CAD3A1-5E1F-42C1-BB73-46B6E67F4206}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
998+
{21CAD3A1-5E1F-42C1-BB73-46B6E67F4206}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU
999+
{21CAD3A1-5E1F-42C1-BB73-46B6E67F4206}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
1000+
{21CAD3A1-5E1F-42C1-BB73-46B6E67F4206}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
1001+
{21CAD3A1-5E1F-42C1-BB73-46B6E67F4206}.Release|Any CPU.ActiveCfg = Release|Any CPU
1002+
{21CAD3A1-5E1F-42C1-BB73-46B6E67F4206}.Release|Any CPU.Build.0 = Release|Any CPU
1003+
{21CAD3A1-5E1F-42C1-BB73-46B6E67F4206}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU
1004+
{21CAD3A1-5E1F-42C1-BB73-46B6E67F4206}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
1005+
{21CAD3A1-5E1F-42C1-BB73-46B6E67F4206}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
1006+
{21CAD3A1-5E1F-42C1-BB73-46B6E67F4206}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
9731007
{F308DC6B-7E59-40D7-A581-834E8CD99CFE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
9741008
{F308DC6B-7E59-40D7-A581-834E8CD99CFE}.Debug|Any CPU.Build.0 = Debug|Any CPU
9751009
{F308DC6B-7E59-40D7-A581-834E8CD99CFE}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
@@ -1069,6 +1103,9 @@ Global
10691103
{AD7058C9-5608-49A8-BE23-58C33A74EE91} = {D3D38B03-B557-484D-8348-8BADEE4DF592}
10701104
{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
10711105
{B1B3F284-FA3D-4D76-A712-FF04495D244B} = {D3D38B03-B557-484D-8348-8BADEE4DF592}
1106+
{D6741C37-B5E6-4050-BCBA-9715809EA15B} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
1107+
{21CAD3A1-5E1F-42C1-BB73-46B6E67F4206} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
1108+
{AE4F7569-26F3-4160-8A8B-7A57D0DA3350} = {D3D38B03-B557-484D-8348-8BADEE4DF592}
10721109
{F308DC6B-7E59-40D7-A581-834E8CD99CFE} = {7F13E156-3EBA-4021-84A5-CD56BA72F99E}
10731110
EndGlobalSection
10741111
GlobalSection(ExtensibilityGlobals) = postSolution

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ Please check our documentation [here](https://docs.microsoft.com/en-us/dotnet/ma
2222

2323
ML.NET runs on Windows, Linux, and macOS using [.NET Core](https://github.com/dotnet/core), or Windows using .NET Framework. 64 bit is supported on all platforms. 32 bit is supported on Windows, except for TensorFlow, LightGBM, and ONNX related functionality.
2424

25-
The current release is 1.0.0. Check out the [release notes](docs/release-notes/1.0.0/release-1.0.0.md) to see what's new.
25+
Check out the [release notes](docs/release-notes) to see what's new.
2626

2727
First, ensure you have installed [.NET Core 2.1](https://www.microsoft.com/net/learn/get-started) or later. ML.NET also works on the .NET Framework 4.6.1 or later, but 4.7.2 or later is recommended.
2828

ROADMAP.md

Lines changed: 8 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9,59 +9,30 @@ In the meanwhile, we are looking for contributions. An easy place to start is t
99

1010
## Short Term
1111
### Training Improvements
12-
* Improved public API for training and inference
13-
* Enhanced tests and scenarios
14-
* Additional Learners
15-
* [LibSVM](https://www.csie.ntu.edu.tw/~cjlin/libsvm/) for anomaly detection (*)
16-
* [LightGBM](https://github.com/Microsoft/LightGBM) - a high-performance boosted decision tree (*)
17-
* Additional Learning Tasks (*)
18-
* _Ranking_ - problem where the goal is to automatically sort (rank) instances within a group based on ranked examples in training data
19-
* _Anomaly Detection_ - is also known as _outlier detection_. It is a task to identify items, events or observations which do not conform to an expected pattern in the dataset.
20-
* _Quantile Regression_ is a type of regression analysis. Whereas regression results in estimates that approximate the conditional mean of the response variable given certain values of the predictor variables, quantile regression aims at estimating either the conditional median or other quantiles of the response variable
21-
* Additional Data Source support (*)
22-
* Apache Parquet
23-
* Native Binary high-performance format
24-
25-
### Featurization Improvements
26-
We already provide text/NLP and image processing functionalities that will be expanded
27-
* Text (*)
28-
* Natural language text preprocessing such as improving tokenization features, adding part-of-speech tagging, and sentence boundary disambiguation
29-
* Pre-trained text models (beyond current n-gram and pre-trained WordEmbedding text handling) that can further improve the extraction of semantic or sentiment features from text
30-
* Image (*)
31-
* Image preprocessing such as loading, resizing, and normalization of images
32-
* Image featurization, including industry-standard pre-trained ImageNet neural models, such as ResNet and AlexNet
12+
* Deep Learning Training Support
13+
* Integrate with leading DNN package(s)
14+
* Support for transfer learning.
15+
* Hybrid training of pipelines containing both DNN and non-DNN predictors.
16+
* Fast.ai like APIs.
3317

3418
### Trained Model Management
3519
* Export models to [ONNX](https://github.com/onnx/models) (*)
3620

37-
### GUI
38-
* Release the Model Builder tool to ease model development (*)
39-
* Design improvements to make the design adhere better to Fluent principles
40-
* Add a view for an easier comparison of several experiments
41-
* Ability to select the best performing pipeline, by sweeping transforms, the same way learners are swept.
42-
4321
## Longer Term
4422

4523
### Training Improvements
4624
* Add more learners, perhaps, including: (*)
47-
* Generative Additive Models
48-
* [SymSGD](https://arxiv.org/pdf/1705.08030.pdf) -a fast linear SGD learner
49-
* Factorization Machines
50-
* [ProtoNN and Bonsaii](https://www.microsoft.com/en-us/research/project/resource-efficient-ml-for-the-edge-and-endpoint-iot-devices/) for compact and efficient models
25+
* [ProtoNN and Bonsaii](https://www.microsoft.com/en-us/research/project/resource-efficient-ml-for-the-edge-and-endpoint-iot-devices/) for compact and efficient models.
5126
* Integration with other ML packages
5227
* Accord.NET
5328
* etc.
54-
* Deep Learning Support
55-
* Integrate with leading DNN package(s)
56-
* Support for transfer learning
57-
* Hybrid training of pipelines containing both DNN and non-DNN predictors
5829
* Additional ML tasks (*)
59-
* _Recommendation_ - Is a problem that can be phrased a: "For a given user, predict the ratings this user would give to the items that they have not explicitly rated yet"
60-
* _Anomaly Detection_, also known as _outlier detection_. It is a task to identify items, events or observations which do not conform to an expected pattern in the dataset. Typical examples are: detecting credit card fraud, medical problems or errors in text. Anomalies are also referred to as outliers, novelties, noise, deviations and exceptions
6130
* _Sequence Classification_ - learns from a series of examples in a sequence, and each item is assigned a distinct label, akin to a multiclass classification task
6231
* Additional Data source support
6332
* Data from SQL Databases, such as SQL Server
6433
* Data located on the cloud
34+
* Apache Parquet
35+
* Native Binary high-performance format
6536
* Distributed Training
6637
* Easily train models on the cloud
6738
* Whole-pipeline optimizations for both training and inference

build/BranchInfo.props

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@
2222
</PropertyGroup>
2323
<PropertyGroup Condition="'$(IsStableProject)' == 'true'">
2424
<MajorVersion>1</MajorVersion>
25-
<MinorVersion>1</MinorVersion>
25+
<MinorVersion>2</MinorVersion>
2626
<PatchVersion>0</PatchVersion>
2727
<PreReleaseLabel>preview</PreReleaseLabel>
2828
</PropertyGroup>
2929
<PropertyGroup Condition="'$(IsStableProject)' != 'true'">
3030
<MajorVersion>0</MajorVersion>
31-
<MinorVersion>13</MinorVersion>
31+
<MinorVersion>14</MinorVersion>
3232
<PatchVersion>0</PatchVersion>
3333
<PreReleaseLabel>preview</PreReleaseLabel>
3434
</PropertyGroup>

build/Dependencies.props

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
<PropertyGroup>
1515
<GoogleProtobufPackageVersion>3.5.1</GoogleProtobufPackageVersion>
1616
<LightGBMPackageVersion>2.2.3</LightGBMPackageVersion>
17+
<MicrosoftExtensionsPackageVersion>2.1.0</MicrosoftExtensionsPackageVersion>
1718
<MicrosoftMLOnnxRuntimePackageVersion>0.4.0</MicrosoftMLOnnxRuntimePackageVersion>
1819
<MlNetMklDepsPackageVersion>0.0.0.9</MlNetMklDepsPackageVersion>
1920
<ParquetDotNetPackageVersion>2.1.3</ParquetDotNetPackageVersion>

build/vsts-ci.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ phases:
182182
_SignType: real
183183
_UseEsrpSigning: true
184184
_TeamName: DotNetCore
185-
_NuGetFeedUrl: https://www.myget.org/F/shmoradi-mlnet2/api/v2/package
185+
_NuGetFeedUrl: https://dotnet.myget.org/F/dotnet-core/api/v2/package
186186
_SymwebSymbolServerPath: https://microsoft.artifacts.visualstudio.com/DefaultCollection
187187
_MsdlSymbolServerPath: https://microsoftpublicsymbols.artifacts.visualstudio.com/DefaultCollection
188188
queue:
@@ -236,12 +236,12 @@ phases:
236236
nuGetFeedType: internal
237237
feedPublish: MachineLearning
238238

239-
- task: MSBuild@1
240-
displayName: Publish Packages to MyGet Feed
241-
inputs:
242-
solution: build/publish.proj
243-
msbuildArguments: /t:PublishPackages /p:NuGetFeedUrl=$(_NuGetFeedUrl) /p:NuGetApiKey=$(myget-shmoradi-mlnet2-api-key)
244-
msbuildVersion: 15.0
239+
# - task: MSBuild@1
240+
# displayName: Publish Packages to MyGet Feed
241+
# inputs:
242+
# solution: build/publish.proj
243+
# msbuildArguments: /t:PublishPackages /p:NuGetFeedUrl=$(_NuGetFeedUrl) /p:NuGetApiKey=$(dotnet-myget-org-api-key)
244+
# msbuildVersion: 15.0
245245

246246
- task: MSBuild@1
247247
displayName: Publish Symbols to SymWeb Symbol Server

docs/building/unix-instructions.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ The following components are needed:
2626
* clang-3.9
2727
* cmake 2.8.12
2828
* libunwind8
29+
* libomp-dev
2930
* curl
3031
* All the requirements necessary to run .NET Core 2.0 applications: libssl1.0.0 (1.0.2 for Debian 9) and libicu5x (libicu52 for ubuntu 14.x, libicu55 for ubuntu 16.x, and libicu57 for ubuntu 17.x). For more information on prerequisites in different linux distributions click [here](https://docs.microsoft.com/en-us/dotnet/core/linux-prerequisites?tabs=netcore2x).
3132

@@ -35,6 +36,7 @@ For example, for Ubuntu 16.x:
3536
sudo apt-get update
3637
sudo apt-get install git clang-3.9 cmake libunwind8 curl
3738
sudo apt-get install libssl1.0.0 libicu55
39+
sudo apt-get install libomp-dev
3840
```
3941

4042
### macOS

docs/code/IDataViewTypeSystem.md

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,12 +190,11 @@ text value to the value zero.
190190

191191
### Missing Value
192192

193-
Most of the standard primitive types support the notion of a missing value. In
194-
particular, the text type, floating-point types, signed integer types, and key
195-
types all have an internal representation of missing. We follow R's lead and
196-
denote such values as `NA`.
193+
Only floating-point types and key types have an internal representation of missing.
194+
Other types don't support missing value. We follow R's lead and
195+
denote the missing values as `NA`.
197196

198-
Unlike R, the standard primitive types do not distinguish between missing and
197+
Unlike R, the floating-point types do not distinguish between missing and
199198
invalid. For example, in floating-point arithmetic, computing zero divided by
200199
zero, or infinity minus infinity, produces an invalid value known as a `NaN`
201200
(for Not-a-Number). R uses a specific `NaN` value to represent its `NA` value,
@@ -207,7 +206,7 @@ A standard conversion from a source type with `NA` to a destination type with
207206
`NA` maps `NA` to `NA`. A standard conversion from a source type with `NA` to
208207
a destination type without `NA` maps `NA` to the default value of the
209208
destination type. For example, converting a `R4` to `R8` produces a `NaN`, but
210-
converting a an `R4` `NA` to `U4` results in zero. Note that this
209+
converting an `R4` `NA` to `U4` results in zero. Note that this
211210
specification does not address diagnostic user messages, so, in certain
212211
environments, the latter situation may generate a warning to the user, or even
213212
an exception.

docs/code/SchemaComprehension.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ If you ever see the error message that says: `An attempt was made to keep iterat
144144
`IDataView` [type system](IDataViewTypeSystem.md) differs slightly from the C# type system, so a 1-1 mapping between column types and C# types is not always feasible.
145145
Below are the most notable examples of the differences:
146146

147-
* `IDataView` vector columns often have a fixed (and known) size. The C# array type best corresponds to a 'variable size' vector: the one that can have different number of slots on every row. You can use `[VectorType(N)]` attribute to an array field to specify that the column is a vector of fixed size N. This is often necessary: most ML components don't work with variable-size vectors, they require fixed-size ones.
147+
* `IDataView` vector columns often have a fixed (and known) size. The C# array type best corresponds to a 'variable size' vector: the one that can have different number of slots on every row. You can use `[VectorDataViewType(N)]` attribute to an array field to specify that the column is a vector of fixed size N. This is often necessary: most ML components don't work with variable-size vectors, they require fixed-size ones.
148148
* `IDataView`'s [key types](IDataViewTypeSystem.md#key-types) don't have a natural underlying C# type either. To declare a key-type column, you need to make your field an `uint`, and decorate it with `[KeyType]` to denote that the field is a key, and not a regular unsigned integer.
149149

150150
### Full list of type mappings
@@ -169,7 +169,7 @@ The below table illustrates what C# types are mapped to what `IDataView` types:
169169
| `DT` | `DvDateTime` | |
170170
| `DZ` | `DvDateTimeZone` | |
171171
| Variable-size vector | `VBuffer<T>` | `T[]`, and the vector is always dense |
172-
| Fixed-size vector | `VBuffer<T>` with `[VectorType(N)]` | `T[]` with `VectorType(N)`, and the vector is always dense |
172+
| Fixed-size vector | `VBuffer<T>` with `[VectorDataViewType(N)]` | `T[]` with `VectorDataViewType(N)`, and the vector is always dense |
173173
| Key type | `uint` with `[KeyType]` | |
174174

175175
### Additional attributes to affect type mapping
@@ -193,7 +193,7 @@ int numberOfFeatures = 4;
193193
var schemaDef = SchemaDefinition.Create(typeof(IrisVectorData));
194194

195195
// Specify the right vector size.
196-
schemaDef["Features"].ColumnType = new VectorType(NumberType.R4, numberOfFeatures);
196+
schemaDef["Features"].ColumnType = new VectorDataViewType(NumberType.R4, numberOfFeatures);
197197

198198
// Create a data view.
199199
var dataView = env.CreateDataView<IrisVectorData>(arr, schemaDef);

0 commit comments

Comments
 (0)