diff --git a/.vsts-dotnet-ci.yml b/.vsts-dotnet-ci.yml
index b3668f511b..3c727fc99a 100644
--- a/.vsts-dotnet-ci.yml
+++ b/.vsts-dotnet-ci.yml
@@ -10,11 +10,12 @@ resources:
- container: UbuntuContainer
image: mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-16.04-mlnet-207e097-20190312152303
-phases:
-- template: /build/ci/phase-template.yml
+jobs:
+- template: /build/ci/job-template.yml
parameters:
name: Centos_x64_NetCoreApp30
buildScript: ./build.sh
+ container: CentosContainer
customMatrixes:
Debug_Build:
_configuration: Debug-Intrinsics
@@ -24,26 +25,25 @@ phases:
_configuration: Release-Intrinsics
_config_short: RI
_includeBenchmarkData: true
- queue:
+ pool:
name: Hosted Ubuntu 1604
- container: CentosContainer
-- template: /build/ci/phase-template.yml
+- template: /build/ci/job-template.yml
parameters:
name: Ubuntu_x64_NetCoreApp21
buildScript: ./build.sh
- queue:
+ container: UbuntuContainer
+ pool:
name: Hosted Ubuntu 1604
- container: UbuntuContainer
-- template: /build/ci/phase-template.yml
+- template: /build/ci/job-template.yml
parameters:
name: MacOS_x64_NetCoreApp21
buildScript: ./build.sh
- queue:
+ pool:
name: Hosted macOS
-- template: /build/ci/phase-template.yml
+- template: /build/ci/job-template.yml
parameters:
name: Windows_x64_NetCoreApp30
buildScript: build.cmd
@@ -56,17 +56,18 @@ phases:
_configuration: Release-Intrinsics
_config_short: RI
_includeBenchmarkData: true
- queue:
- name: Hosted VS2017
+ pool:
+ name: NetCorePublic-Pool
+ queue: buildpool.windows.10.amd64.vs2017.open
-- template: /build/ci/phase-template.yml
+- template: /build/ci/job-template.yml
parameters:
name: Windows_x64_NetCoreApp21
buildScript: build.cmd
- queue:
+ pool:
name: Hosted VS2017
-- template: /build/ci/phase-template.yml
+- template: /build/ci/job-template.yml
parameters:
name: Windows_x64_NetFx461
buildScript: build.cmd
@@ -79,13 +80,13 @@ phases:
_configuration: Release-netfx
_config_short: RFX
_includeBenchmarkData: false
- queue:
+ pool:
name: Hosted VS2017
-- template: /build/ci/phase-template.yml
+- template: /build/ci/job-template.yml
parameters:
name: Windows_x86_NetCoreApp21
architecture: x86
buildScript: build.cmd
- queue:
+ pool:
name: Hosted VS2017
diff --git a/Directory.Build.targets b/Directory.Build.targets
index 5e6446add9..58448682ae 100644
--- a/Directory.Build.targets
+++ b/Directory.Build.targets
@@ -21,6 +21,14 @@
+
+
+
+ $(NativeOutputPath)$(LibPrefix)%(NativeAssemblyReferenceWithMajorVersion.Identity)$(LibExtension).%(NativeAssemblyReferenceWithMajorVersion.MajorVersion)
+ $(NativeOutputPath)$(LibPrefix)%(NativeAssemblyReferenceWithMajorVersion.Identity).%(NativeAssemblyReferenceWithMajorVersion.MajorVersion)$(LibExtension)
+
+
+
-
+
+
+
+
+
+
diff --git a/build/BranchInfo.props b/build/BranchInfo.props
index 016f3c30ac..017da9e572 100644
--- a/build/BranchInfo.props
+++ b/build/BranchInfo.props
@@ -15,6 +15,9 @@
Microsoft.ML.LightGbm;
Microsoft.ML.Mkl.Components;
Microsoft.ML.Mkl.Redist;
+ Microsoft.ML.TimeSeries;
+ Microsoft.ML.TensorFlow;
+ Microsoft.ML.OnnxTransformer;
$(StableProjects.Contains($(MSBuildProjectName)))
$(StableProjects.Contains($(MSBuildProjectName.Substring(0, $(MSBuildProjectName.IndexOf(.symbols))))))
@@ -22,13 +25,13 @@
1
- 2
+ 3
0
preview
0
- 14
+ 15
0
preview
diff --git a/build/Dependencies.props b/build/Dependencies.props
index 2ed9835fb4..ea5a6a8736 100644
--- a/build/Dependencies.props
+++ b/build/Dependencies.props
@@ -15,13 +15,14 @@
3.5.1
2.2.3
2.1.0
- 0.3.0
+ 0.4.0
0.0.0.9
2.1.3
4.5.0
4.5.0
4.5.0
- 1.13.1
+ 1.14.0
+ 1
@@ -44,9 +45,9 @@
0.11.3
1.0.0-beta1-63812-02
- 0.0.4-test
+ 0.0.5-test
0.0.11-test
- 0.0.4-test
+ 0.0.5-test
diff --git a/build/ci/phase-template.yml b/build/ci/job-template.yml
similarity index 69%
rename from build/ci/phase-template.yml
rename to build/ci/job-template.yml
index 6d70c14bda..82415d87ba 100644
--- a/build/ci/phase-template.yml
+++ b/build/ci/job-template.yml
@@ -2,23 +2,18 @@ parameters:
name: ''
architecture: x64
buildScript: ''
- queue: {}
+ pool: {}
customMatrixes: ''
codeCoverage: false
+ container: ''
-phases:
- - phase: ${{ parameters.name }}
- variables:
- _buildScript: ${{ parameters.buildScript }}
- _phaseName: ${{ parameters.name }}
- _arch: ${{ parameters.architecture }}
- _codeCoverage: ${{ parameters.codeCoverage }}
- queue:
- ${{ if eq(variables._codeCoverage, 'false') }}:
- timeoutInMinutes: 30
- ${{ if eq(variables._codeCoverage, 'true') }}:
- timeoutInMinutes: 60
- parallel: 99
+jobs:
+ - job: ${{ parameters.name }}
+ ${{ if eq(parameters.codeCoverage, 'false') }}:
+ timeoutInMinutes: 40
+ ${{ if eq(parameters.codeCoverage, 'true') }}:
+ timeoutInMinutes: 60
+ strategy:
matrix:
${{ if eq(parameters.customMatrixes, '') }}:
Debug_Build:
@@ -31,20 +26,24 @@ phases:
_includeBenchmarkData: true
${{ if ne(parameters.customMatrixes, '') }}:
${{ insert }}: ${{ parameters.customMatrixes }}
- ${{ insert }}: ${{ parameters.queue }}
+
+ pool: ${{ parameters.pool }}
+ ${{ if ne(parameters.container, '') }}:
+ container: ${{ parameters.container }}
+
steps:
- - ${{ if eq(parameters.queue.name, 'Hosted macOS') }}:
+ - ${{ if eq(parameters.pool.name, 'Hosted macOS') }}:
- script: brew update && brew install https://raw.githubusercontent.com/Homebrew/homebrew-core/f5b1ac99a7fba27c19cee0bc4f036775c889b359/Formula/libomp.rb && brew install mono-libgdiplus gettext && brew link gettext --force && brew link libomp --force
displayName: Install build dependencies
- - script: $(_buildScript) -$(_configuration) -buildArch=$(_arch)
+ - script: ${{ parameters.buildScript }} -$(_configuration) -buildArch=${{ parameters.architecture }}
displayName: Build
- - script: $(_buildScript) -- /t:DownloadExternalTestFiles /p:IncludeBenchmarkData=$(_includeBenchmarkData)
+ - script: ${{ parameters.buildScript }} -- /t:DownloadExternalTestFiles /p:IncludeBenchmarkData=$(_includeBenchmarkData)
displayName: Download Benchmark Data
- - script: $(_buildScript) -$(_configuration) -runtests -coverage=$(_codeCoverage)
+ - script: ${{ parameters.buildScript }} -$(_configuration) -runtests -coverage=${{ parameters.codeCoverage }}
displayName: Run Tests.
- script: $(Build.SourcesDirectory)/Tools/dotnetcli/dotnet msbuild build/Codecoverage.proj /p:CodeCovToken=$(CODECOV_TOKEN)
displayName: Upload coverage to codecov.io
- condition: and(succeeded(), eq(variables._codeCoverage, 'true'))
+ condition: and(succeeded(), eq(${{ parameters.codeCoverage }}, True))
- task: PublishTestResults@2
displayName: Publish Test Results
condition: succeededOrFailed()
@@ -52,7 +51,7 @@ phases:
testRunner: 'vSTest'
searchFolder: '$(System.DefaultWorkingDirectory)/bin'
testResultsFiles: '**/*.trx'
- testRunTitle: Machinelearning_Tests_$(_phaseName)_$(_configuration)_$(Build.BuildNumber)
+ testRunTitle: Machinelearning_Tests_${{ parameters.name }}_$(_configuration)_$(Build.BuildNumber)
configuration: $(_configuration)
mergeTestResults: true
- task: CopyFiles@2
@@ -78,5 +77,5 @@ phases:
pathToPublish: $(Build.ArtifactStagingDirectory)
artifactName: ${{ parameters.name }} $(_config_short)
artifactType: container
- - script: $(_buildScript) -buildPackages
+ - script: ${{ parameters.buildScript }} -buildPackages
displayName: Build Packages
diff --git a/build/codecoverage-ci.yml b/build/codecoverage-ci.yml
index 7dc3910255..3f6005f53e 100644
--- a/build/codecoverage-ci.yml
+++ b/build/codecoverage-ci.yml
@@ -3,7 +3,7 @@
################################################################################
phases:
-- template: /build/ci/phase-template.yml
+- template: /build/ci/job-template.yml
parameters:
name: Windows_x64
buildScript: build.cmd
diff --git a/docs/api-reference/io-columns-tree-featurization-binary-classification.md b/docs/api-reference/io-columns-tree-featurization-binary-classification.md
new file mode 100644
index 0000000000..1fd9a68a89
--- /dev/null
+++ b/docs/api-reference/io-columns-tree-featurization-binary-classification.md
@@ -0,0 +1,14 @@
+### Input and Output Columns
+The input label column data must be .
+The input features column data must be a known-sized vector of .
+
+This estimator outputs the following columns:
+
+| Output Column Name | Column Type | Description|
+| -- | -- | -- |
+| `Trees` | Known-sized vector of | The output values of all trees. Its size is identical to the total number of trees in the tree ensemble model. |
+| `Leaves` | Known-sized vector of | 0-1 vector representation to the IDs of all leaves where the input feature vector falls into. Its size is the number of total leaves in the tree ensemble model. |
+| `Paths` | Known-sized vector of | 0-1 vector representation to the paths the input feature vector passed through to reach the leaves. Its size is the number of non-leaf nodes in the tree ensemble model. |
+
+Those output columns are all optional and user can change their names.
+Please set the names of skipped columns to null so that they would not be produced.
\ No newline at end of file
diff --git a/docs/api-reference/io-columns-tree-featurization-ranking.md b/docs/api-reference/io-columns-tree-featurization-ranking.md
new file mode 100644
index 0000000000..375ad18f9c
--- /dev/null
+++ b/docs/api-reference/io-columns-tree-featurization-ranking.md
@@ -0,0 +1,20 @@
+### Input and Output Columns
+The input label data type must be [key](xref:Microsoft.ML.Data.KeyDataViewType)
+type or . The value of the label determines relevance, where
+higher values indicate higher relevance. If the label is a
+[key](xref:Microsoft.ML.Data.KeyDataViewType) type, then the key index is the
+relevance value, where the smallest index is the least relevant. If the label is a
+, larger values indicate higher relevance. The feature
+column must be a known-sized vector of and input row group
+column must be [key](xref:Microsoft.ML.Data.KeyDataViewType) type.
+
+This estimator outputs the following columns:
+
+| Output Column Name | Column Type | Description|
+| -- | -- | -- |
+| `Trees` | Known-sized vector of | The output values of all trees. Its size is identical to the total number of trees in the tree ensemble model. |
+| `Leaves` | Known-sized vector of | 0-1 vector representation to the IDs of all leaves where the input feature vector falls into. Its size is the number of total leaves in the tree ensemble model. |
+| `Paths` | Known-sized vector of | 0-1 vector representation to the paths the input feature vector passed through to reach the leaves. Its size is the number of non-leaf nodes in the tree ensemble model. |
+
+Those output columns are all optional and user can change their names.
+Please set the names of skipped columns to null so that they would not be produced.
\ No newline at end of file
diff --git a/docs/api-reference/io-columns-tree-featurization-regression.md b/docs/api-reference/io-columns-tree-featurization-regression.md
new file mode 100644
index 0000000000..d4acf06f39
--- /dev/null
+++ b/docs/api-reference/io-columns-tree-featurization-regression.md
@@ -0,0 +1,14 @@
+### Input and Output Columns
+The input label column data must be .
+The input features column data must be a known-sized vector of .
+
+This estimator outputs the following columns:
+
+| Output Column Name | Column Type | Description|
+| -- | -- | -- |
+| `Trees` | Known-sized vector of | The output values of all trees. Its size is identical to the total number of trees in the tree ensemble model. |
+| `Leaves` | Known-sized vector of | 0-1 vector representation to the IDs of all leaves where the input feature vector falls into. Its size is the number of total leaves in the tree ensemble model. |
+| `Paths` | Known-sized vector of | 0-1 vector representation to the paths the input feature vector passed through to reach the leaves. Its size is the number of non-leaf nodes in the tree ensemble model. |
+
+Those output columns are all optional and user can change their names.
+Please set the names of skipped columns to null so that they would not be produced.
\ No newline at end of file
diff --git a/docs/api-reference/io-time-series-ssa-forecast.md b/docs/api-reference/io-time-series-ssa-forecast.md
new file mode 100644
index 0000000000..ae510add41
--- /dev/null
+++ b/docs/api-reference/io-time-series-ssa-forecast.md
@@ -0,0 +1,5 @@
+### Input and Output Columns
+There is only one input column.
+The input column must be where a value indicates a value at a timestamp in the time series.
+
+It produces either just one vector of forecasted values or three vectors: a vector of forecasted values, a vector of confidence lower bounds and a vector of confidence upper bounds.
diff --git a/docs/api-reference/regularization-l1-l2.md b/docs/api-reference/regularization-l1-l2.md
index 758d060a32..3f6bda200c 100644
--- a/docs/api-reference/regularization-l1-l2.md
+++ b/docs/api-reference/regularization-l1-l2.md
@@ -1,6 +1,6 @@
-This class uses [empricial risk minimization](https://en.wikipedia.org/wiki/Empirical_risk_minimization) (i.e., ERM)
+This class uses [empirical risk minimization](https://en.wikipedia.org/wiki/Empirical_risk_minimization) (i.e., ERM)
to formulate the optimization problem built upon collected data.
-Note that empricial risk is usually measured by applying a loss function on the model's predictions on collected data points.
+Note that empirical risk is usually measured by applying a loss function on the model's predictions on collected data points.
If the training data does not contain enough data points
(for example, to train a linear model in $n$-dimensional space, we need at least $n$ data points),
[overfitting](https://en.wikipedia.org/wiki/Overfitting) may happen so that
diff --git a/docs/api-reference/tree-featurization-prediction.md b/docs/api-reference/tree-featurization-prediction.md
new file mode 100644
index 0000000000..6516dfef8c
--- /dev/null
+++ b/docs/api-reference/tree-featurization-prediction.md
@@ -0,0 +1,25 @@
+### Prediction Details
+This estimator produces several output columns from a tree ensemble model. Assume that the model contains only one decision tree:
+
+ Node 0
+ / \
+ / \
+ / \
+ / \
+ Node 1 Node 2
+ / \ / \
+ / \ / \
+ / \ Leaf -3 Node 3
+ Leaf -1 Leaf -2 / \
+ / \
+ Leaf -4 Leaf -5
+
+Assume that the input feature vector falls into `Leaf -1`. The output `Trees` may be a 1-element vector where
+the only value is the decision value carried by `Leaf -1`. The output `Leaves` is a 0-1 vector. If the reached
+leaf is the $i$-th (indexed by $-(i+1)$ so the first leaf is `Leaf -1`) leaf in the tree, the $i$-th value in `Leaves`
+would be 1 and all other values would be 0. The output `Paths` is a 0-1 representation of the nodes passed
+through before reaching the leaf. The $i$-th element in `Paths` indicates if the $i$-th node (indexed by $i$) is touched.
+For example, reaching `Leaf -1` lead to $[1, 1, 0, 0]$ as the `Paths`. If there are multiple trees, this estimator
+just concatenates `Trees`'s, `Leaves`'s, `Paths`'s from all trees (first tree's information comes first in the concatenated vectors).
+
+Check the See Also section for links to usage examples.
\ No newline at end of file
diff --git a/docs/release-notes/1.2.0/release-1.2.0.md b/docs/release-notes/1.2.0/release-1.2.0.md
new file mode 100644
index 0000000000..c6b703e434
--- /dev/null
+++ b/docs/release-notes/1.2.0/release-1.2.0.md
@@ -0,0 +1,84 @@
+# [ML.NET](http://dot.net/ml) 1.2.0
+## **General Availability**
+- **Microsoft.ML.TimeSeries**
+ - Anomaly detection algorithms (Spike and Change Point):
+ - Independent and identically distributed.
+ - Singular spectrum analysis.
+ - Spectral residual from Azure Anomaly Detector/Kensho team.
+ - Forecasting models:
+ - Singular spectrum analysis.
+ - Prediction Engine for online learning
+ - Enables updating time series model with new observations at scoring so that the user does not have to re-train the time series with old data each time.
+
+ [Samples](https://github.com/dotnet/machinelearning/tree/master/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries)
+
+- **Microsoft.ML.OnnxTransformer**
+ Enables scoring of ONNX models in the learning pipeline. Uses ONNX Runtime v0.4.
+
+ [Sample](https://github.com/dotnet/machinelearning/blob/master/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApplyOnnxModel.cs)
+
+- **Microsoft.ML.TensorFlow**
+ Enables scoring of TensorFlow models in the learning pipeline. Uses TensorFlow v1.13. Very useful for image and text classification. Users can featurize images or text using DNN models and feed the result into a classical machine learning model like a decision tree or logistic regression trainer.
+
+ [Samples](https://github.com/dotnet/machinelearning/tree/master/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow)
+
+## **New Features**
+- **Tree-based featurization** ([#3812](https://github.com/dotnet/machinelearning/pull/3812))
+
+ Generating features using tree structure has been a popular technique in data mining. Useful for capturing feature interactions when creating a stacked model, dimensionality reduction, or featurizing towards an alternative label. [ML.NET](dot.net/ml)'s tree featurization trains a tree-based model and then maps input feature vector to several non-linear feature vectors. Those generated feature vectors are:
+ - The leaves it falls into. It's a binary vector with ones happens at the indexes of reached leaves,
+ - The paths that the input vector passes before hitting the leaves, and
+ - The reached leaves values.
+
+ Here are two references.
+ - [p. 9](https://www.csie.ntu.edu.tw/~r01922136/kaggle-2014-criteo.pdf) (a Kaggle solution adopted by FB below).
+ - [Section 3](http://www.quinonero.net/Publications/predicting-clicks-facebook.pdf). (Facebook)
+ - [Section of Entity-level personalization with GLMix](https://engineering.linkedin.com/blog/2019/04/ai-behind-linkedin-recruiter-search-and-recommendation-systems). (LinkedIn)
+
+ [Samples](https://github.com/dotnet/machinelearning/tree/master/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TreeFeaturization)
+
+- **Microsoft.Extensions.ML integration package.** ([#3827](https://github.com/dotnet/machinelearning/pull/3827))
+
+ This package makes it easier to use [ML.NET](dot.net/ml) with app models that support Microsoft.Extensions - i.e. ASP.NET and Azure Functions.
+
+ Specifically it contains functionality for:
+ - Dependency Injection
+ - Pooling PredictionEngines
+ - Reloading models when the file or URI has changed
+ - Hooking ML.NET logging to Microsoft.Extensions.Logging
+
+## **Bug Fixes**
+### Serious
+- **Time series Sequential Transform needs to have a binding mechanism:** This bug made it impossible to use time series in NimbusML. ([#3875](https://github.com/dotnet/machinelearning/pull/3875))
+
+- **Build errors resulting from upgrading to VS2019 compilers:** The default CMAKE_C_FLAG for debug configuration sets /ZI to generate a PDB capable of edit and continue. In the new compilers, this is incompatible with /guard:cf which we set for security reasons. ([#3894](https://github.com/dotnet/machinelearning/pull/3894))
+
+- **LightGBM Evaluation metric parameters:** In LightGbm EvaluateMetricType where if a user specified EvaluateMetricType.Default, the metric would not get added to the options Dictionary, and LightGbmWrappedTraining would throw because of that. ([#3815](https://github.com/dotnet/machinelearning/pull/3815))
+
+- **Change default EvaluationMetric for LightGbm:** In [ML.NET](dot.net/ml), the default EvaluationMetric for LightGbm is set to EvaluateMetricType.Error for multiclass, EvaluationMetricType.LogLoss for binary etc. This leads to inconsistent behavior from the user's perspective. ([#3859](https://github.com/dotnet/machinelearning/pull/3859))
+### Other
+- CustomGains should allow multiple values in argument attribute. ([#3854](https://github.com/dotnet/machinelearning/pull/3854))
+
+## **Breaking Changes**
+None
+
+## **Enhancements**
+- Fixes the Hardcoded Sigmoid value from -0.5 to the value specified during training. ([#3850](https://github.com/dotnet/machinelearning/pull/3850))
+- Fix TextLoader constructor and add exception message. ([#3788](https://github.com/dotnet/machinelearning/pull/3788))
+- Introduce the `FixZero` argument to the LogMeanVariance normalizer. ([#3916](https://github.com/dotnet/machinelearning/pull/3916))
+- Ensembles trainer now work with ITrainerEstimators instead of ITrainers. ([#3796](https://github.com/dotnet/machinelearning/pull/3796))
+- LightGBM Unbalanced Data Argument. ([#3925](https://github.com/dotnet/machinelearning/pull/3925))
+- Tree based trainers implement ICanGetSummaryAsIDataView. ([#3892](https://github.com/dotnet/machinelearning/pull/3892))
+
+- **CLI and AutoML API**
+ - Internationalization fixes to generate proper [ML.NET](dot.net/ml) C# code. ([#3725](https://github.com/dotnet/machinelearning/pull/3725))
+ - Automatic Cross Validation for small datasets, and CV stability fixes. ([#3794](https://github.com/dotnet/machinelearning/pull/3794))
+ - Code cleanup to match .NET style. ([#3823](https://github.com/dotnet/machinelearning/pull/3823))
+
+
+## **Documentation and Samples**
+- Samples for applying ONNX model to in-memory images. ([#3851](https://github.com/dotnet/machinelearning/pull/3851))
+- Reformatted all ~200 samples to 85 character width so the horizontal scrollbar does not appear on docs webpage. ([#3930](https://github.com/dotnet/machinelearning/pull/3930), [3941](https://github.com/dotnet/machinelearning/pull/3941), [3949](https://github.com/dotnet/machinelearning/pull/3949), [3950](https://github.com/dotnet/machinelearning/pull/3950), [3947](https://github.com/dotnet/machinelearning/pull/3947), [3943](https://github.com/dotnet/machinelearning/pull/3943), [3942](https://github.com/dotnet/machinelearning/pull/3942), [3946](https://github.com/dotnet/machinelearning/pull/3946), [3948](https://github.com/dotnet/machinelearning/pull/3948))
+
+## **Remarks**
+- Roughly 200 Github issues were closed, the count decreased from **~550 to 351**. Most of the issues got resolved due to the release of stable API and availability of samples.
\ No newline at end of file
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/BootstrapSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/BootstrapSample.cs
index 33413352de..be93d56fae 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/BootstrapSample.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/BootstrapSample.cs
@@ -7,8 +7,9 @@ public static class BootstrapSample
{
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable.
@@ -23,20 +24,27 @@ public static void Example()
var data = mlContext.Data.LoadFromEnumerable(rawData);
- // Now take a bootstrap sample of this dataset to create a new dataset. The bootstrap is a resampling technique that
- // creates a training set of the same size by picking with replacement from the original dataset. With the bootstrap,
- // we expect that the resampled dataset will have about 63% of the rows of the original dataset (i.e. 1-e^-1), with some
- // rows represented more than once.
- // BootstrapSample is a streaming implementation of the boostrap that enables sampling from a dataset too large to hold in memory.
- // To enable streaming, BootstrapSample approximates the bootstrap by sampling each row according to a Poisson(1) distribution.
- // Note that this streaming approximation treats each row independently, thus the resampled dataset is not guaranteed to be the
- // same length as the input dataset.
- // Let's take a look at the behavior of the BootstrapSample by examining a few draws:
+ // Now take a bootstrap sample of this dataset to create a new dataset.
+ // The bootstrap is a resampling technique that creates a training set
+ // of the same size by picking with replacement from the original
+ // dataset. With the bootstrap, we expect that the resampled dataset
+ // will have about 63% of the rows of the original dataset
+ // (i.e. 1-e^-1), with some rows represented more than once.
+ // BootstrapSample is a streaming implementation of the boostrap that
+ // enables sampling from a dataset too large to hold in memory. To
+ // enable streaming, BootstrapSample approximates the bootstrap by
+ // sampling each row according to a Poisson(1) distribution. Note that
+ // this streaming approximation treats each row independently, thus the
+ // resampled dataset is not guaranteed to be the same length as the
+ // input dataset. Let's take a look at the behavior of the
+ // BootstrapSample by examining a few draws:
for (int i = 0; i < 3; i++)
{
var resample = mlContext.Data.BootstrapSample(data, seed: i);
- var enumerable = mlContext.Data.CreateEnumerable(resample, reuseRowObject: false);
+ var enumerable = mlContext.Data
+ .CreateEnumerable(resample, reuseRowObject: false);
+
Console.WriteLine($"Label\tFeature");
foreach (var row in enumerable)
{
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/Cache.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/Cache.cs
index 4ea7c33723..09855b2d07 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/Cache.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/Cache.cs
@@ -8,56 +8,78 @@ public static class Cache
{
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
+ // Create a new context for ML.NET operations. It can be used for except
+ // ion tracking and logging, as a catalog of available operations and as
+ // the source of randomness.
var mlContext = new MLContext();
var data = DatasetUtils.LoadHousingRegressionDataset(mlContext);
// Time how long it takes to page through the records if we don't cache.
- (int lines, double columnAverage, double elapsedSeconds) = TimeToScanIDataView(mlContext, data);
- Console.WriteLine($"Lines={lines}, averageOfColumn0={columnAverage:0.00} and took {elapsedSeconds} seconds.");
+ (int lines, double columnAverage, double elapsedSeconds) =
+ TimeToScanIDataView(mlContext, data);
+
+ Console.WriteLine($"Lines={lines}," +
+ $"averageOfColumn0={columnAverage:0.00} and took {elapsedSeconds}" +
+ $"seconds.");
// Expected output (time is approximate):
// Lines=506, averageOfColumn0=564.17 and took 0.314 seconds.
// Now create a cached view of the data.
var cachedData = mlContext.Data.Cache(data);
- // Time how long it takes to page through the records the first time they're accessed after a cache is applied.
- // This iteration will be longer than subsequent calls, as the dataset is being accessed and stored for later.
- // Note that this operation may be relatively quick, as the system may have cached the file.
- (lines, columnAverage, elapsedSeconds) = TimeToScanIDataView(mlContext, cachedData);
- Console.WriteLine($"Lines={lines}, averageOfColumn0={columnAverage:0.00} and took {elapsedSeconds} seconds.");
+ // Time how long it takes to page through the records the first time
+ // they're accessed after a cache is applied. This iteration will be
+ // longer than subsequent calls, as the dataset is being accessed and
+ // stored for later. Note that this operation may be relatively quick,
+ // as the system may have cached the file.
+ (lines, columnAverage, elapsedSeconds) = TimeToScanIDataView(mlContext,
+ cachedData);
+
+ Console.WriteLine($"Lines={lines}," +
+ $"averageOfColumn0={columnAverage:0.00} and took {elapsedSeconds}" +
+ $"seconds.");
// Expected output (time is approximate):
// Lines=506, averageOfColumn0=564.17 and took 0.056 seconds.
- // Time how long it takes to page through the records now that the data is cached. After the first iteration that caches the IDataView,
- // future iterations, like this one, are faster because they are pulling from data cached in memory.
- (lines, columnAverage, elapsedSeconds) = TimeToScanIDataView(mlContext, cachedData);
- Console.WriteLine($"Lines={lines}, averageOfColumn0={columnAverage:0.00} and took {elapsedSeconds} seconds.");
+ // Time how long it takes to page through the records now that the data
+ // is cached. After the first iteration that caches the IDataView,
+ // future iterations, like this one, are faster because they are pulling
+ // from data cached in memory.
+ (lines, columnAverage, elapsedSeconds) = TimeToScanIDataView(mlContext,
+ cachedData);
+
+ Console.WriteLine(
+ $"Lines={lines}, averageOfColumn0={columnAverage:0.00} and took " +
+ $"{elapsedSeconds} seconds.");
// Expected output (time is approximate):
// Lines=506, averageOfColumn0=564.17 and took 0.006 seconds.
}
- private static (int lines, double columnAverage, double elapsedSeconds) TimeToScanIDataView(MLContext mlContext, IDataView data)
+ private static (int lines, double columnAverage, double elapsedSeconds)
+ TimeToScanIDataView(MLContext mlContext, IDataView data)
{
int lines = 0;
double columnAverage = 0.0;
- var enumerable = mlContext.Data.CreateEnumerable(data, reuseRowObject: true);
+ var enumerable = mlContext.Data
+ .CreateEnumerable(data, reuseRowObject: true);
+
var watch = System.Diagnostics.Stopwatch.StartNew();
foreach (var row in enumerable)
{
lines++;
- columnAverage += row.MedianHomeValue + row.CrimesPerCapita + row.PercentResidental + row.PercentNonRetail + row.CharlesRiver
- + row.NitricOxides + row.RoomsPerDwelling + row.PercentPre40s + row.EmploymentDistance
- + row.HighwayDistance + row.TaxRate + row.TeacherRatio;
+ columnAverage += row.MedianHomeValue + row.CrimesPerCapita +
+ row.PercentResidental + row.PercentNonRetail + row.CharlesRiver
+ + row.NitricOxides + row.RoomsPerDwelling + row.PercentPre40s +
+ row.EmploymentDistance + row.HighwayDistance + row.TaxRate +
+ row.TeacherRatio;
}
watch.Stop();
columnAverage /= lines;
var elapsed = watch.Elapsed;
return (lines, columnAverage, elapsed.Seconds);
- }
+ }
///
/// A class to hold the raw housing regression rows.
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/CrossValidationSplit.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/CrossValidationSplit.cs
index d7f3faad94..cbe68ba117 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/CrossValidationSplit.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/CrossValidationSplit.cs
@@ -17,16 +17,28 @@ public static void Example()
// Generate some data points.
var examples = GenerateRandomDataPoints(10);
- // Convert the examples list to an IDataView object, which is consumable by ML.NET API.
+ // Convert the examples list to an IDataView object, which is consumable
+ // by ML.NET API.
var dataview = mlContext.Data.LoadFromEnumerable(examples);
- // Cross validation splits your data randomly into set of "folds", and creates groups of Train and Test sets,
- // where for each group, one fold is the Test and the rest of the folds the Train.
- // So below, we specify Group column as the column containing the sampling keys.
- // If we pass that column to cross validation it would be used to break data into certain chunks.
- var folds = mlContext.Data.CrossValidationSplit(dataview, numberOfFolds: 3, samplingKeyColumnName: "Group");
- var trainSet = mlContext.Data.CreateEnumerable(folds[0].TrainSet, reuseRowObject: false);
- var testSet = mlContext.Data.CreateEnumerable(folds[0].TestSet, reuseRowObject: false);
+ // Cross validation splits your data randomly into set of "folds", and
+ // creates groups of Train and Test sets, where for each group, one fold
+ // is the Test and the rest of the folds the Train. So below, we specify
+ // Group column as the column containing the sampling keys. If we pass
+ // that column to cross validation it would be used to break data into
+ // certain chunks.
+ var folds = mlContext.Data
+ .CrossValidationSplit(dataview, numberOfFolds:3,
+ samplingKeyColumnName: "Group");
+
+ var trainSet = mlContext.Data
+ .CreateEnumerable(folds[0].TrainSet,
+ reuseRowObject: false);
+
+ var testSet = mlContext.Data
+ .CreateEnumerable(folds[0].TestSet,
+ reuseRowObject: false);
+
PrintPreviewRows(trainSet, testSet);
// The data in the Train split.
@@ -43,8 +55,14 @@ public static void Example()
// [Group, 0], [Features, 0.9060271]
// [Group, 0], [Features, 0.2737045]
- trainSet = mlContext.Data.CreateEnumerable(folds[1].TrainSet, reuseRowObject: false);
- testSet = mlContext.Data.CreateEnumerable(folds[1].TestSet, reuseRowObject: false);
+ trainSet = mlContext.Data
+ .CreateEnumerable(folds[1].TrainSet,
+ reuseRowObject: false);
+
+ testSet = mlContext.Data
+ .CreateEnumerable(folds[1].TestSet,
+ reuseRowObject: false);
+
PrintPreviewRows(trainSet, testSet);
// The data in the Train split.
// [Group, 0], [Features, 0.7262433]
@@ -60,8 +78,14 @@ public static void Example()
// [Group, 1], [Features, 0.2060332]
// [Group, 1], [Features, 0.4421779]
- trainSet = mlContext.Data.CreateEnumerable(folds[2].TrainSet, reuseRowObject: false);
- testSet = mlContext.Data.CreateEnumerable(folds[2].TestSet, reuseRowObject: false);
+ trainSet = mlContext.Data
+ .CreateEnumerable(folds[2].TrainSet,
+ reuseRowObject: false);
+
+ testSet = mlContext.Data
+ .CreateEnumerable(folds[2].TestSet,
+ reuseRowObject: false);
+
PrintPreviewRows(trainSet, testSet);
// The data in the Train split.
// [Group, 0], [Features, 0.7262433]
@@ -79,8 +103,14 @@ public static void Example()
// Example of a split without specifying a sampling key column.
folds = mlContext.Data.CrossValidationSplit(dataview, numberOfFolds: 3);
- trainSet = mlContext.Data.CreateEnumerable(folds[0].TrainSet, reuseRowObject: false);
- testSet = mlContext.Data.CreateEnumerable(folds[0].TestSet, reuseRowObject: false);
+ trainSet = mlContext.Data
+ .CreateEnumerable(folds[0].TrainSet,
+ reuseRowObject: false);
+
+ testSet = mlContext.Data
+ .CreateEnumerable(folds[0].TestSet,
+ reuseRowObject: false);
+
PrintPreviewRows(trainSet, testSet);
// The data in the Train split.
// [Group, 0], [Features, 0.7262433]
@@ -96,8 +126,14 @@ public static void Example()
// [Group, 2], [Features, 0.5588848]
// [Group, 0], [Features, 0.9060271]
- trainSet = mlContext.Data.CreateEnumerable(folds[1].TrainSet, reuseRowObject: false);
- testSet = mlContext.Data.CreateEnumerable(folds[1].TestSet, reuseRowObject: false);
+ trainSet = mlContext.Data
+ .CreateEnumerable(folds[1].TrainSet,
+ reuseRowObject: false);
+
+ testSet = mlContext.Data
+ .CreateEnumerable(folds[1].TestSet,
+ reuseRowObject: false);
+
PrintPreviewRows(trainSet, testSet);
// The data in the Train split.
// [Group, 2], [Features, 0.7680227]
@@ -113,8 +149,13 @@ public static void Example()
// [Group, 2], [Features, 0.9775497]
// [Group, 0], [Features, 0.2737045]
- trainSet = mlContext.Data.CreateEnumerable(folds[2].TrainSet, reuseRowObject: false);
- testSet = mlContext.Data.CreateEnumerable(folds[2].TestSet, reuseRowObject: false);
+ trainSet = mlContext.Data
+ .CreateEnumerable(folds[2].TrainSet,
+ reuseRowObject: false);
+
+ testSet = mlContext.Data.CreateEnumerable(folds[2].TestSet,
+ reuseRowObject: false);
+
PrintPreviewRows(trainSet, testSet);
// The data in the Train split.
// [Group, 0], [Features, 0.7262433]
@@ -131,7 +172,9 @@ public static void Example()
// [Group, 1], [Features, 0.4421779]
}
- private static IEnumerable GenerateRandomDataPoints(int count, int seed = 0)
+ private static IEnumerable GenerateRandomDataPoints(int count,
+ int seed = 0)
+
{
var random = new Random(seed);
for (int i = 0; i < count; i++)
@@ -146,7 +189,8 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se
}
}
- // Example with features and group column. A data set is a collection of such examples.
+ // Example with features and group column. A data set is a collection of
+ // such examples.
private class DataPoint
{
public float Group { get; set; }
@@ -155,7 +199,9 @@ private class DataPoint
}
// print helper
- private static void PrintPreviewRows(IEnumerable trainSet, IEnumerable testSet)
+ private static void PrintPreviewRows(IEnumerable trainSet,
+ IEnumerable testSet)
+
{
Console.WriteLine($"The data in the Train split.");
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/DataViewEnumerable.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/DataViewEnumerable.cs
index 667bd39758..aaefc85597 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/DataViewEnumerable.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/DataViewEnumerable.cs
@@ -6,31 +6,41 @@ namespace Samples.Dynamic
{
public static class DataViewEnumerable
{
- // A simple case of creating IDataView from IEnumerable.
+ // A simple case of creating IDataView from
+ //IEnumerable.
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable.
- IEnumerable enumerableOfData = GetSampleTemperatureData(5);
+ IEnumerable enumerableOfData =
+ GetSampleTemperatureData(5);
// Load dataset into an IDataView.
IDataView data = mlContext.Data.LoadFromEnumerable(enumerableOfData);
- // We can now examine the records in the IDataView. We first create an enumerable of rows in the IDataView.
- var rowEnumerable = mlContext.Data.CreateEnumerable(data, reuseRowObject: true);
+ // We can now examine the records in the IDataView. We first create an
+ // enumerable of rows in the IDataView.
+ var rowEnumerable = mlContext.Data
+ .CreateEnumerable(data,
+ reuseRowObject: true);
- // SampleTemperatureDataWithLatitude has the definition of a Latitude column of type float.
- // We can use the parameter ignoreMissingColumns to true to ignore any missing columns in the IDataView.
- // The produced enumerable will have the Latitude field set to the default for the data type, in this case 0.
- var rowEnumerableIgnoreMissing = mlContext.Data.CreateEnumerable(data,
- reuseRowObject: true, ignoreMissingColumns: true);
+ // SampleTemperatureDataWithLatitude has the definition of a Latitude
+ // column of type float. We can use the parameter ignoreMissingColumns
+ // to true to ignore any missing columns in the IDataView. The produced
+ // enumerable will have the Latitude field set to the default for the
+ // data type, in this case 0.
+ var rowEnumerableIgnoreMissing = mlContext.Data
+ .CreateEnumerable(data,
+ reuseRowObject: true, ignoreMissingColumns: true);
Console.WriteLine($"Date\tTemperature");
foreach (var row in rowEnumerable)
- Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}");
+ Console.WriteLine(
+ $"{row.Date.ToString("d")}\t{row.Temperature}");
// Expected output:
// Date Temperature
@@ -42,7 +52,8 @@ public static void Example()
Console.WriteLine($"Date\tTemperature\tLatitude");
foreach (var row in rowEnumerableIgnoreMissing)
- Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}\t{row.Latitude}");
+ Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}"
+ + $"\t{row.Latitude}");
// Expected output:
// Date Temperature Latitude
@@ -71,7 +82,9 @@ private class SampleTemperatureDataWithLatitude
///
/// The number of examples to return.
/// An enumerable of .
- private static IEnumerable GetSampleTemperatureData(int exampleCount)
+ private static IEnumerable GetSampleTemperatureData(
+ int exampleCount)
+
{
var rng = new Random(1234321);
var date = new DateTime(2012, 1, 1);
@@ -81,7 +94,9 @@ private static IEnumerable GetSampleTemperatureData(int e
{
date = date.AddDays(1);
temperature += rng.Next(-5, 5);
- yield return new SampleTemperatureData { Date = date, Temperature = temperature };
+ yield return new SampleTemperatureData { Date = date, Temperature =
+ temperature };
+
}
}
}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/DataViewEnumerable.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/DataViewEnumerable.tt
index 680ef76b66..ebcce161ff 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/DataViewEnumerable.tt
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/DataViewEnumerable.tt
@@ -3,29 +3,39 @@
string NameSpace = "Samples.Dynamic";
string ClassName="DataViewEnumerable";
string AddExtraClass = "true";
-string ExampleShortDoc = @"// A simple case of creating IDataView from IEnumerable.";
-string Example = @"// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
+string ExampleShortDoc = @"// A simple case of creating IDataView from
+ //IEnumerable.";
+string Example = @"// Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable.
- IEnumerable enumerableOfData = GetSampleTemperatureData(5);
+ IEnumerable enumerableOfData =
+ GetSampleTemperatureData(5);
// Load dataset into an IDataView.
IDataView data = mlContext.Data.LoadFromEnumerable(enumerableOfData);
- // We can now examine the records in the IDataView. We first create an enumerable of rows in the IDataView.
- var rowEnumerable = mlContext.Data.CreateEnumerable(data, reuseRowObject: true);
-
- // SampleTemperatureDataWithLatitude has the definition of a Latitude column of type float.
- // We can use the parameter ignoreMissingColumns to true to ignore any missing columns in the IDataView.
- // The produced enumerable will have the Latitude field set to the default for the data type, in this case 0.
- var rowEnumerableIgnoreMissing = mlContext.Data.CreateEnumerable(data,
- reuseRowObject: true, ignoreMissingColumns: true);
+ // We can now examine the records in the IDataView. We first create an
+ // enumerable of rows in the IDataView.
+ var rowEnumerable = mlContext.Data
+ .CreateEnumerable(data,
+ reuseRowObject: true);
+
+ // SampleTemperatureDataWithLatitude has the definition of a Latitude
+ // column of type float. We can use the parameter ignoreMissingColumns
+ // to true to ignore any missing columns in the IDataView. The produced
+ // enumerable will have the Latitude field set to the default for the
+ // data type, in this case 0.
+ var rowEnumerableIgnoreMissing = mlContext.Data
+ .CreateEnumerable(data,
+ reuseRowObject: true, ignoreMissingColumns: true);
Console.WriteLine($""Date\tTemperature"");
foreach (var row in rowEnumerable)
- Console.WriteLine($""{row.Date.ToString(""d"")}\t{row.Temperature}"");
+ Console.WriteLine(
+ $""{row.Date.ToString(""d"")}\t{row.Temperature}"");
// Expected output:
// Date Temperature
@@ -37,7 +47,8 @@ string Example = @"// Create a new context for ML.NET operations. It can be used
Console.WriteLine($""Date\tTemperature\tLatitude"");
foreach (var row in rowEnumerableIgnoreMissing)
- Console.WriteLine($""{row.Date.ToString(""d"")}\t{row.Temperature}\t{row.Latitude}"");
+ Console.WriteLine($""{row.Date.ToString(""d"")}\t{row.Temperature}""
+ + $""\t{row.Latitude}"");
// Expected output:
// Date Temperature Latitude
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByColumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByColumn.cs
index bddc168e78..af6df27200 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByColumn.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByColumn.cs
@@ -6,11 +6,13 @@ namespace Samples.Dynamic
{
public static class FilterRowsByColumn
{
- // // Sample class showing how to filter out some rows in IDataView.
+ // // Sample class showing how to filter out some rows in
+ // IDataView.
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available
+ // operations and as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable.
@@ -21,7 +23,9 @@ public static void Example()
Console.WriteLine($"Date\tTemperature");
foreach (var row in enumerableOfData)
{
- Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}");
+ Console.WriteLine(
+ $"{row.Date.ToString("d")}\t{row.Temperature}");
+
}
Console.WriteLine();
// Expected output:
@@ -37,15 +41,24 @@ public static void Example()
// 1/10/2012 30
// 1/11/2012 29
- // Filter the data by the values of the temperature. The lower bound is inclusive, the upper exclusive.
- var filteredData = mlContext.Data.FilterRowsByColumn(data, columnName: "Temperature", lowerBound: 34, upperBound: 37);
+ // Filter the data by the values of the temperature. The lower bound is
+ // inclusive, the upper exclusive.
+ var filteredData = mlContext.Data
+ .FilterRowsByColumn(data, columnName: "Temperature",
+ lowerBound: 34, upperBound: 37);
+
+ // Look at the filtered data and observe that values outside [34,37)
+ // have been dropped.
+ var enumerable = mlContext.Data
+ .CreateEnumerable(filteredData,
+ reuseRowObject: true);
- // Look at the filtered data and observe that values outside [34,37) have been dropped.
- var enumerable = mlContext.Data.CreateEnumerable(filteredData, reuseRowObject: true);
Console.WriteLine($"Date\tTemperature");
foreach (var row in enumerable)
{
- Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}");
+ Console.WriteLine(
+ $"{row.Date.ToString("d")}\t{row.Temperature}");
+
}
// Expected output:
@@ -69,7 +82,9 @@ private class SampleTemperatureData
///
/// The number of examples to return.
/// An enumerable of .
- private static IEnumerable GetSampleTemperatureData(int exampleCount)
+ private static IEnumerable GetSampleTemperatureData(
+ int exampleCount)
+
{
var rng = new Random(1234321);
var date = new DateTime(2012, 1, 1);
@@ -79,7 +94,9 @@ private static IEnumerable GetSampleTemperatureData(int e
{
date = date.AddDays(1);
temperature += rng.Next(-5, 5);
- yield return new SampleTemperatureData { Date = date, Temperature = temperature };
+ yield return new SampleTemperatureData { Date = date, Temperature =
+ temperature };
+
}
}
}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByColumn.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByColumn.tt
index 4b8ed39faf..878cc2eb9e 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByColumn.tt
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByColumn.tt
@@ -3,9 +3,11 @@
string NameSpace = "Samples.Dynamic";
string ClassName="FilterRowsByColumn";
string AddExtraClass = null;
-string ExampleShortDoc = @"// // Sample class showing how to filter out some rows in IDataView.";
-string Example = @"// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
+string ExampleShortDoc = @"// // Sample class showing how to filter out some rows in
+ // IDataView.";
+string Example = @"// Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available
+ // operations and as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable.
@@ -17,6 +19,7 @@ string Example = @"// Create a new context for ML.NET operations. It can be used
foreach (var row in enumerableOfData)
{
Console.WriteLine($""{row.Date.ToString(""d"")}\t{row.Temperature}"");
+
}
Console.WriteLine();
// Expected output:
@@ -32,15 +35,24 @@ string Example = @"// Create a new context for ML.NET operations. It can be used
// 1/10/2012 30
// 1/11/2012 29
- // Filter the data by the values of the temperature. The lower bound is inclusive, the upper exclusive.
- var filteredData = mlContext.Data.FilterRowsByColumn(data, columnName: ""Temperature"", lowerBound: 34, upperBound: 37);
+ // Filter the data by the values of the temperature. The lower bound is
+ // inclusive, the upper exclusive.
+ var filteredData = mlContext.Data
+ .FilterRowsByColumn(data, columnName: ""Temperature"",
+ lowerBound: 34, upperBound: 37);
+
+ // Look at the filtered data and observe that values outside [34,37)
+ // have been dropped.
+ var enumerable = mlContext.Data
+ .CreateEnumerable(filteredData,
+ reuseRowObject: true);
- // Look at the filtered data and observe that values outside [34,37) have been dropped.
- var enumerable = mlContext.Data.CreateEnumerable(filteredData, reuseRowObject: true);
Console.WriteLine($""Date\tTemperature"");
foreach (var row in enumerable)
{
- Console.WriteLine($""{row.Date.ToString(""d"")}\t{row.Temperature}"");
+ Console.WriteLine(
+ $""{row.Date.ToString(""d"")}\t{row.Temperature}"");
+
}
// Expected output:
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByKeyColumnFraction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByKeyColumnFraction.cs
index 4074579614..6b95bdf05e 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByKeyColumnFraction.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByKeyColumnFraction.cs
@@ -11,7 +11,8 @@ public static class FilterRowsByKeyColumnFraction
{
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
var mlContext = new MLContext();
@@ -36,7 +37,9 @@ public static void Example()
var transformedData = pipeline.Fit(data).Transform(data);
// Before we apply a filter, examine all the records in the dataset.
- var enumerable = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: true);
+ var enumerable = mlContext.Data
+ .CreateEnumerable(transformedData, reuseRowObject: true);
+
Console.WriteLine($"Age");
foreach (var row in enumerable)
{
@@ -56,12 +59,20 @@ public static void Example()
// Now filter down to half the keys, choosing the lower half of values.
// For the keys we have the sorted values: 1 1 2 2 2 3 4 4.
- // Projected in the [0, 1[ interval as per: (key - 0.5)/(Count of Keys) the values of the keys for our data would be:
+ // Projected in the [0, 1[ interval as per: (key - 0.5)/(Count of Keys)
+ // the values of the keys for our data would be:
// 0.125 0.125 0.375 0.375 0.375 0.625 0.875 0.875
- // so the keys resulting from filtering in the [0, 0.5 [ interval are the ones with normalized values 0.125 and 0.375, respectively keys
+ // so the keys resulting from filtering in the [0, 0.5 [ interval are
+ // the ones with normalized values 0.125 and 0.375, respectively keys
// with values 1 and 2.
- var filteredHalfData = mlContext.Data.FilterRowsByKeyColumnFraction(transformedData, columnName: "Age", lowerBound: 0, upperBound: 0.5);
- var filteredHalfEnumerable = mlContext.Data.CreateEnumerable(filteredHalfData, reuseRowObject: true);
+ var filteredHalfData = mlContext.Data
+ .FilterRowsByKeyColumnFraction(transformedData, columnName: "Age",
+ lowerBound: 0, upperBound: 0.5);
+
+ var filteredHalfEnumerable = mlContext.Data
+ .CreateEnumerable(filteredHalfData,
+ reuseRowObject: true);
+
Console.WriteLine($"Age");
foreach (var row in filteredHalfEnumerable)
{
@@ -76,12 +87,21 @@ public static void Example()
// 2
// 1
- // As mentioned above, the normalized keys are: 0.125 0.125 0.375 0.375 0.375 0.625 0.875 0.875
- // so the keys resulting from filtering in the [0.3, 0.6 [ interval are the ones with normalized value 0.375, respectively key
- // with value = 2.
- var filteredMiddleData = mlContext.Data.FilterRowsByKeyColumnFraction(transformedData, columnName: "Age", lowerBound: 0.3, upperBound: 0.6);
- // Look at the data and observe that values above 2 have been filtered out
- var filteredMiddleEnumerable = mlContext.Data.CreateEnumerable(filteredMiddleData, reuseRowObject: true);
+ // As mentioned above, the normalized keys are:
+ // 0.125 0.125 0.375 0.375 0.375 0.625 0.875 0.875
+ // so the keys resulting from filtering in the [0.3, 0.6 [ interval are
+ // the ones with normalized value 0.375, respectively key with
+ // value = 2.
+ var filteredMiddleData = mlContext.Data
+ .FilterRowsByKeyColumnFraction(transformedData, columnName: "Age",
+ lowerBound: 0.3, upperBound: 0.6);
+
+ // Look at the data and observe that values above 2 have been filtered
+ // out
+ var filteredMiddleEnumerable = mlContext.Data
+ .CreateEnumerable(filteredMiddleData,
+ reuseRowObject: true);
+
Console.WriteLine($"Age");
foreach (var row in filteredMiddleEnumerable)
{
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByMissingValues.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByMissingValues.cs
index 306f4f5185..02f40a6d2c 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByMissingValues.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByMissingValues.cs
@@ -12,32 +12,40 @@ public class FilterRowsByMissingValues
///
public static void Example()
{
- // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
- // as well as the source of randomness.
+ // Create a new ML context, for ML.NET operations. It can be used for
+ // exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();
// Create a small dataset as an IEnumerable.
var samples = new List()
{
- new DataPoint(){ Feature1 = 21, Feature2 = new [] { 1, 2, float.NaN} },
+ new DataPoint(){ Feature1 = 21, Feature2 = new [] { 1, 2, float.NaN}
+ },
+
new DataPoint(){ Feature1 = 40, Feature2 = new [] { 1f, 2f, 3f} },
- new DataPoint(){ Feature1 = float.NaN, Feature2 = new [] { 1, 2, float.NaN} }
+ new DataPoint(){ Feature1 = float.NaN, Feature2 = new [] { 1, 2,
+ float.NaN} }
+
};
// Convert training data to IDataView.
var data = mlContext.Data.LoadFromEnumerable(samples);
// Filter out any row with an NaN values in either column
- var filteredData = mlContext.Data.FilterRowsByMissingValues(data, new[] { "Feature1", "Feature2" });
+ var filteredData = mlContext.Data
+ .FilterRowsByMissingValues(data, new[] { "Feature1", "Feature2" });
+
+ // Take a look at the resulting dataset and note that rows with NaNs are
+ // filtered out. Only the second data point is left
+ var enumerable = mlContext.Data
+ .CreateEnumerable(filteredData, reuseRowObject: true);
- // Take a look at the resulting dataset and note that rows with NaNs are filtered out.
- // Only the second data point is left
- var enumerable = mlContext.Data.CreateEnumerable(filteredData, reuseRowObject: true);
Console.WriteLine($"Feature1 Feature2");
foreach (var row in enumerable)
{
- Console.WriteLine($"{row.Feature1}\t({string.Join(", ", row.Feature2)})");
+ Console.WriteLine($"{row.Feature1}" +
+ $"\t({string.Join(", ", row.Feature2)})");
}
// Feature1 Feature2
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/LoadFromEnumerable.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/LoadFromEnumerable.cs
index 64bc929850..0bc83d8831 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/LoadFromEnumerable.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/LoadFromEnumerable.cs
@@ -7,37 +7,48 @@ namespace Samples.Dynamic
{
public static class LoadFromEnumerable
{
- // Creating IDataView from IEnumerable, and setting the size of the vector at runtime.
- // When the data model is defined through types, setting the size of the vector is done through the VectorType
- // annotation. When the size of the data is not known at compile time, the Schema can be directly modified at runtime
- // and the size of the vector set there.
- // This is important, because most of the ML.NET trainers require the Features vector to be of known size.
+ // Creating IDataView from IEnumerable, and setting the size of the vector
+ // at runtime. When the data model is defined through types, setting the
+ // size of the vector is done through the VectorType annotation. When the
+ // size of the data is not known at compile time, the Schema can be directly
+ // modified at runtime and the size of the vector set there. This is
+ // important, because most of the ML.NET trainers require the Features
+ // vector to be of known size.
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable.
IEnumerable enumerableKnownSize = new DataPointVector[]
{
- new DataPointVector{ Features = new float[]{ 1.2f, 3.4f, 4.5f, 3.2f, 7,5f } },
- new DataPointVector{ Features = new float[]{ 4.2f, 3.4f, 14.65f, 3.2f, 3,5f } },
- new DataPointVector{ Features = new float[]{ 1.6f, 3.5f, 4.5f, 6.2f, 3,5f } },
+ new DataPointVector{ Features = new float[]{ 1.2f, 3.4f, 4.5f, 3.2f,
+ 7,5f } },
+
+ new DataPointVector{ Features = new float[]{ 4.2f, 3.4f, 14.65f,
+ 3.2f, 3,5f } },
+
+ new DataPointVector{ Features = new float[]{ 1.6f, 3.5f, 4.5f, 6.2f,
+ 3,5f } },
+
};
// Load dataset into an IDataView.
IDataView data = mlContext.Data.LoadFromEnumerable(enumerableKnownSize);
var featureColumn = data.Schema["Features"].Type as VectorDataViewType;
// Inspecting the schema
- Console.WriteLine($"Is the size of the Features column known: {featureColumn.IsKnownSize}.\nSize: {featureColumn.Size}");
+ Console.WriteLine($"Is the size of the Features column known: " +
+ $"{featureColumn.IsKnownSize}.\nSize: {featureColumn.Size}");
// Preview
//
// Is the size of the Features column known? True.
// Size: 5.
- // If the size of the vector is unknown at compile time, it can be set at runtime.
+ // If the size of the vector is unknown at compile time, it can be set
+ // at runtime.
IEnumerable enumerableUnknownSize = new DataPoint[]
{
new DataPoint{ Features = new float[]{ 1.2f, 3.4f, 4.5f } },
@@ -45,12 +56,15 @@ public static void Example()
new DataPoint{ Features = new float[]{ 1.6f, 3.5f, 4.5f } },
};
- // The feature dimension (typically this will be the Count of the array of the features vector
- // known at runtime).
+ // The feature dimension (typically this will be the Count of the array
+ // of the features vector known at runtime).
int featureDimension = 3;
var definedSchema = SchemaDefinition.Create(typeof(DataPoint));
- featureColumn = definedSchema["Features"].ColumnType as VectorDataViewType;
- Console.WriteLine($"Is the size of the Features column known: {featureColumn.IsKnownSize}.\nSize: {featureColumn.Size}");
+ featureColumn = definedSchema["Features"]
+ .ColumnType as VectorDataViewType;
+
+ Console.WriteLine($"Is the size of the Features column known: " +
+ $"{featureColumn.IsKnownSize}.\nSize: {featureColumn.Size}");
// Preview
//
@@ -58,15 +72,19 @@ public static void Example()
// Size: 0.
// Set the column type to be a known-size vector.
- var vectorItemType = ((VectorDataViewType)definedSchema[0].ColumnType).ItemType;
- definedSchema[0].ColumnType = new VectorDataViewType(vectorItemType, featureDimension);
+ var vectorItemType = ((VectorDataViewType)definedSchema[0].ColumnType)
+ .ItemType;
+ definedSchema[0].ColumnType = new VectorDataViewType(vectorItemType,
+ featureDimension);
// Read the data into an IDataView with the modified schema supplied in
- IDataView data2 = mlContext.Data.LoadFromEnumerable(enumerableUnknownSize, definedSchema);
+ IDataView data2 = mlContext.Data
+ .LoadFromEnumerable(enumerableUnknownSize, definedSchema);
featureColumn = data2.Schema["Features"].Type as VectorDataViewType;
// Inspecting the schema
- Console.WriteLine($"Is the size of the Features column known: {featureColumn.IsKnownSize}.\nSize: {featureColumn.Size}");
+ Console.WriteLine($"Is the size of the Features column known: " +
+ $"{featureColumn.IsKnownSize}.\nSize: {featureColumn.Size}");
// Preview
//
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromBinary.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromBinary.cs
index b6448ec857..9fa1253a62 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromBinary.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromBinary.cs
@@ -9,9 +9,10 @@ public static class SaveAndLoadFromBinary
{
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
@@ -24,18 +25,22 @@ public static void Example()
new DataPoint(){ Label = 1, Features = 9},
};
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+ // Convert the list of data points to an IDataView object, which is
+ // consumable by ML.NET API.
IDataView data = mlContext.Data.LoadFromEnumerable(dataPoints);
- // Create a FileStream object and write the IDataView to it as a binary IDV file.
+ // Create a FileStream object and write the IDataView to it as a binary
+ // IDV file.
using (FileStream stream = new FileStream("data.idv", FileMode.Create))
mlContext.Data.SaveAsBinary(data, stream);
// Create an IDataView object by loading the binary IDV file.
IDataView loadedData = mlContext.Data.LoadFromBinary("data.idv");
- // Inspect the data that is loaded from the previously saved binary file.
- var loadedDataEnumerable = mlContext.Data.CreateEnumerable(loadedData, reuseRowObject: false);
+ // Inspect the data that is loaded from the previously saved binary file
+ var loadedDataEnumerable = mlContext.Data
+ .CreateEnumerable(loadedData, reuseRowObject: false);
+
foreach (DataPoint row in loadedDataEnumerable)
Console.WriteLine($"{row.Label}, {row.Features}");
@@ -47,7 +52,8 @@ public static void Example()
// 1, 9
}
- // Example with label and feature values. A data set is a collection of such examples.
+ // Example with label and feature values. A data set is a collection of such
+ // examples.
private class DataPoint
{
public float Label { get; set; }
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromText.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromText.cs
index 9918b736ce..c61e9cff94 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromText.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromText.cs
@@ -9,9 +9,10 @@ public static class SaveAndLoadFromText
{
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
@@ -24,10 +25,12 @@ public static void Example()
new DataPoint(){ Label = 1, Features = 9},
};
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+ // Convert the list of data points to an IDataView object, which is
+ // consumable by ML.NET API.
IDataView data = mlContext.Data.LoadFromEnumerable(dataPoints);
- // Create a FileStream object and write the IDataView to it as a text file.
+ // Create a FileStream object and write the IDataView to it as a text
+ // file.
using (FileStream stream = new FileStream("data.tsv", FileMode.Create))
mlContext.Data.SaveAsText(data, stream);
@@ -35,7 +38,9 @@ public static void Example()
IDataView loadedData = mlContext.Data.LoadFromTextFile("data.tsv");
// Inspect the data that is loaded from the previously saved text file.
- var loadedDataEnumerable = mlContext.Data.CreateEnumerable(loadedData, reuseRowObject: false);
+ var loadedDataEnumerable = mlContext.Data
+ .CreateEnumerable(loadedData, reuseRowObject: false);
+
foreach (DataPoint row in loadedDataEnumerable)
Console.WriteLine($"{row.Label}, {row.Features}");
@@ -47,7 +52,8 @@ public static void Example()
// 1, 9
}
- // Example with label and feature values. A data set is a collection of such examples.
+ // Example with label and feature values. A data set is a collection of such
+ // examples.
private class DataPoint
{
public float Label { get; set; }
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/ShuffleRows.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/ShuffleRows.cs
index 2b786a7da4..73ef6657e1 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/ShuffleRows.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/ShuffleRows.cs
@@ -6,11 +6,13 @@ namespace Samples.Dynamic
{
public static class ShuffleRows
{
- // Sample class showing how to shuffle rows in IDataView.
+ // Sample class showing how to shuffle rows in
+ // IDataView.
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable.
@@ -21,7 +23,8 @@ public static void Example()
Console.WriteLine($"Date\tTemperature");
foreach (var row in enumerableOfData)
{
- Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}");
+ Console.WriteLine($"{row.Date.ToString("d")}" +
+ $"\t{row.Temperature}");
}
Console.WriteLine();
// Expected output:
@@ -35,12 +38,17 @@ public static void Example()
// Shuffle the dataset.
var shuffledData = mlContext.Data.ShuffleRows(data, seed: 123);
- // Look at the shuffled data and observe that the rows are in a randomized order.
- var enumerable = mlContext.Data.CreateEnumerable(shuffledData, reuseRowObject: true);
+ // Look at the shuffled data and observe that the rows are in a
+ // randomized order.
+ var enumerable = mlContext.Data
+ .CreateEnumerable(shuffledData,
+ reuseRowObject: true);
+
Console.WriteLine($"Date\tTemperature");
foreach (var row in enumerable)
{
- Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}");
+ Console.WriteLine($"{row.Date.ToString("d")}" +
+ $"\t{row.Temperature}");
}
// Expected output:
// Date Temperature
@@ -62,7 +70,9 @@ private class SampleTemperatureData
///
/// The number of examples to return.
/// An enumerable of .
- private static IEnumerable GetSampleTemperatureData(int exampleCount)
+ private static IEnumerable GetSampleTemperatureData(
+ int exampleCount)
+
{
var rng = new Random(1234321);
var date = new DateTime(2012, 1, 1);
@@ -72,7 +82,9 @@ private static IEnumerable GetSampleTemperatureData(int e
{
date = date.AddDays(1);
temperature += rng.Next(-5, 5);
- yield return new SampleTemperatureData { Date = date, Temperature = temperature };
+ yield return new SampleTemperatureData { Date = date, Temperature =
+ temperature };
+
}
}
}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/ShuffleRows.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/ShuffleRows.tt
index fa2adb3e9a..4c77c521a9 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/ShuffleRows.tt
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/ShuffleRows.tt
@@ -3,9 +3,11 @@
string NameSpace = "Samples.Dynamic";
string ClassName="ShuffleRows";
string AddExtraClass = null;
-string ExampleShortDoc = @"// Sample class showing how to shuffle rows in IDataView.";
-string Example = @"// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
+string ExampleShortDoc = @"// Sample class showing how to shuffle rows in
+ // IDataView.";
+string Example = @"// Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable.
@@ -16,7 +18,8 @@ string Example = @"// Create a new context for ML.NET operations. It can be used
Console.WriteLine($""Date\tTemperature"");
foreach (var row in enumerableOfData)
{
- Console.WriteLine($""{row.Date.ToString(""d"")}\t{row.Temperature}"");
+ Console.WriteLine($""{row.Date.ToString(""d"")}"" +
+ $""\t{row.Temperature}"");
}
Console.WriteLine();
// Expected output:
@@ -30,12 +33,17 @@ string Example = @"// Create a new context for ML.NET operations. It can be used
// Shuffle the dataset.
var shuffledData = mlContext.Data.ShuffleRows(data, seed: 123);
- // Look at the shuffled data and observe that the rows are in a randomized order.
- var enumerable = mlContext.Data.CreateEnumerable(shuffledData, reuseRowObject: true);
+ // Look at the shuffled data and observe that the rows are in a
+ // randomized order.
+ var enumerable = mlContext.Data
+ .CreateEnumerable(shuffledData,
+ reuseRowObject: true);
+
Console.WriteLine($""Date\tTemperature"");
foreach (var row in enumerable)
{
- Console.WriteLine($""{row.Date.ToString(""d"")}\t{row.Temperature}"");
+ Console.WriteLine($""{row.Date.ToString(""d"")}"" +
+ $""\t{row.Temperature}"");
}
// Expected output:
// Date Temperature
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SkipRows.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SkipRows.cs
index cd1744a0f6..006082f238 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SkipRows.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SkipRows.cs
@@ -9,8 +9,9 @@ public static class SkipRows
// Sample class showing how to skip rows in IDataView.
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable.
@@ -21,7 +22,8 @@ public static void Example()
Console.WriteLine($"Date\tTemperature");
foreach (var row in enumerableOfData)
{
- Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}");
+ Console.WriteLine($"{row.Date.ToString("d")}" +
+ $"\t{row.Temperature}");
}
Console.WriteLine();
// Expected output:
@@ -40,12 +42,17 @@ public static void Example()
// Skip the first 5 rows in the dataset
var filteredData = mlContext.Data.SkipRows(data, 5);
- // Look at the filtered data and observe that the first 5 rows have been dropped
- var enumerable = mlContext.Data.CreateEnumerable(filteredData, reuseRowObject: true);
+ // Look at the filtered data and observe that the first 5 rows have been
+ // dropped
+ var enumerable = mlContext.Data
+ .CreateEnumerable(filteredData,
+ reuseRowObject: true);
+
Console.WriteLine($"Date\tTemperature");
foreach (var row in enumerable)
{
- Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}");
+ Console.WriteLine($"{row.Date.ToString("d")}" +
+ $"\t{row.Temperature}");
}
// Expected output:
// Date Temperature
@@ -67,7 +74,9 @@ private class SampleTemperatureData
///
/// The number of examples to return.
/// An enumerable of .
- private static IEnumerable GetSampleTemperatureData(int exampleCount)
+ private static IEnumerable GetSampleTemperatureData(
+ int exampleCount)
+
{
var rng = new Random(1234321);
var date = new DateTime(2012, 1, 1);
@@ -77,7 +86,9 @@ private static IEnumerable GetSampleTemperatureData(int e
{
date = date.AddDays(1);
temperature += rng.Next(-5, 5);
- yield return new SampleTemperatureData { Date = date, Temperature = temperature };
+ yield return new SampleTemperatureData { Date = date, Temperature =
+ temperature };
+
}
}
}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SkipRows.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SkipRows.tt
index 411b4cf85a..59bd97c443 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SkipRows.tt
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SkipRows.tt
@@ -4,8 +4,9 @@ string NameSpace = "Samples.Dynamic";
string ClassName="SkipRows";
string AddExtraClass = null;
string ExampleShortDoc = @"// Sample class showing how to skip rows in IDataView.";
-string Example = @"// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
+string Example = @"// Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable.
@@ -16,7 +17,8 @@ string Example = @"// Create a new context for ML.NET operations. It can be used
Console.WriteLine($""Date\tTemperature"");
foreach (var row in enumerableOfData)
{
- Console.WriteLine($""{row.Date.ToString(""d"")}\t{row.Temperature}"");
+ Console.WriteLine($""{row.Date.ToString(""d"")}"" +
+ $""\t{row.Temperature}"");
}
Console.WriteLine();
// Expected output:
@@ -35,12 +37,17 @@ string Example = @"// Create a new context for ML.NET operations. It can be used
// Skip the first 5 rows in the dataset
var filteredData = mlContext.Data.SkipRows(data, 5);
- // Look at the filtered data and observe that the first 5 rows have been dropped
- var enumerable = mlContext.Data.CreateEnumerable(filteredData, reuseRowObject: true);
+ // Look at the filtered data and observe that the first 5 rows have been
+ // dropped
+ var enumerable = mlContext.Data
+ .CreateEnumerable(filteredData,
+ reuseRowObject: true);
+
Console.WriteLine($""Date\tTemperature"");
foreach (var row in enumerable)
{
- Console.WriteLine($""{row.Date.ToString(""d"")}\t{row.Temperature}"");
+ Console.WriteLine($""{row.Date.ToString(""d"")}"" +
+ $""\t{row.Temperature}"");
}
// Expected output:
// Date Temperature
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TakeRows.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TakeRows.cs
index 26a489f5e8..5ad6f13f7f 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TakeRows.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TakeRows.cs
@@ -6,10 +6,12 @@ namespace Samples.Dynamic
{
public static class TakeRows
{
- // Sample class showing how to take some rows from the beginning of IDataView.
+ // Sample class showing how to take some rows from the
+ // beginning of IDataView.
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
+ // Create a new context for ML.NET operations. It can be used for
+ //exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
var mlContext = new MLContext();
@@ -21,7 +23,8 @@ public static void Example()
Console.WriteLine($"Date\tTemperature");
foreach (var row in enumerableOfData)
{
- Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}");
+ Console.WriteLine($"{row.Date.ToString("d")}" +
+ $"\t{row.Temperature}");
}
Console.WriteLine();
// Expected output:
@@ -40,12 +43,17 @@ public static void Example()
// Take the first 5 rows in the dataset
var filteredData = mlContext.Data.TakeRows(data, 5);
- // Look at the filtered data and observe that only the first 5 rows are in the resulting dataset.
- var enumerable = mlContext.Data.CreateEnumerable(filteredData, reuseRowObject: true);
+ // Look at the filtered data and observe that only the first 5 rows are
+ // in the resulting dataset.
+ var enumerable = mlContext.Data
+ .CreateEnumerable(filteredData,
+ reuseRowObject: true);
+
Console.WriteLine($"Date\tTemperature");
foreach (var row in enumerable)
{
- Console.WriteLine($"{row.Date.ToString("d")}\t{row.Temperature}");
+ Console.WriteLine($"{row.Date.ToString("d")}" +
+ $"\t{row.Temperature}");
}
// Expected output:
// Date Temperature
@@ -67,7 +75,9 @@ private class SampleTemperatureData
///
/// The number of examples to return.
/// An enumerable of .
- private static IEnumerable GetSampleTemperatureData(int exampleCount)
+ private static IEnumerable GetSampleTemperatureData(
+ int exampleCount)
+
{
var rng = new Random(1234321);
var date = new DateTime(2012, 1, 1);
@@ -77,7 +87,9 @@ private static IEnumerable GetSampleTemperatureData(int e
{
date = date.AddDays(1);
temperature += rng.Next(-5, 5);
- yield return new SampleTemperatureData { Date = date, Temperature = temperature };
+ yield return new SampleTemperatureData { Date = date, Temperature =
+ temperature };
+
}
}
}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TakeRows.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TakeRows.tt
index 76f1edeb80..1e482591db 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TakeRows.tt
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TakeRows.tt
@@ -3,20 +3,23 @@
string NameSpace = "Samples.Dynamic";
string ClassName="TakeRows";
string AddExtraClass = null;
-string ExampleShortDoc = @"// Sample class showing how to take some rows from the beginning of IDataView.";
-string Example = @"// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
+string ExampleShortDoc = @"// Sample class showing how to take some rows from the
+ // beginning of IDataView.";
+string Example = @"// Create a new context for ML.NET operations. It can be used for
+ //exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
var mlContext = new MLContext();
// Get a small dataset as an IEnumerable.
- var enumerableOfData = Microsoft.ML.SamplesUtils.DatasetUtils.GetSampleTemperatureData(10);
+ var enumerableOfData = GetSampleTemperatureData(10);
var data = mlContext.Data.LoadFromEnumerable(enumerableOfData);
// Before we apply a filter, examine all the records in the dataset.
Console.WriteLine($""Date\tTemperature"");
foreach (var row in enumerableOfData)
{
- Console.WriteLine($""{row.Date.ToString(""d"")}\t{row.Temperature}"");
+ Console.WriteLine($""{row.Date.ToString(""d"")}"" +
+ $""\t{row.Temperature}"");
}
Console.WriteLine();
// Expected output:
@@ -35,12 +38,17 @@ string Example = @"// Create a new context for ML.NET operations. It can be used
// Take the first 5 rows in the dataset
var filteredData = mlContext.Data.TakeRows(data, 5);
- // Look at the filtered data and observe that only the first 5 rows are in the resulting dataset.
- var enumerable = mlContext.Data.CreateEnumerable(filteredData, reuseRowObject: true);
+ // Look at the filtered data and observe that only the first 5 rows are
+ // in the resulting dataset.
+ var enumerable = mlContext.Data
+ .CreateEnumerable(filteredData,
+ reuseRowObject: true);
+
Console.WriteLine($""Date\tTemperature"");
foreach (var row in enumerable)
{
- Console.WriteLine($""{row.Date.ToString(""d"")}\t{row.Temperature}"");
+ Console.WriteLine($""{row.Date.ToString(""d"")}"" +
+ $""\t{row.Temperature}"");
}
// Expected output:
// Date Temperature
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TemperatureAndLatitude.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TemperatureAndLatitude.ttinclude
index 448e743791..07ecca7ddf 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TemperatureAndLatitude.ttinclude
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TemperatureAndLatitude.ttinclude
@@ -32,7 +32,9 @@ namespace <#=NameSpace#>
///
/// The number of examples to return.
/// An enumerable of .
- private static IEnumerable GetSampleTemperatureData(int exampleCount)
+ private static IEnumerable GetSampleTemperatureData(
+ int exampleCount)
+
{
var rng = new Random(1234321);
var date = new DateTime(2012, 1, 1);
@@ -42,7 +44,9 @@ namespace <#=NameSpace#>
{
date = date.AddDays(1);
temperature += rng.Next(-5, 5);
- yield return new SampleTemperatureData { Date = date, Temperature = temperature };
+ yield return new SampleTemperatureData { Date = date,
+ Temperature = temperature };
+
}
}
}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TrainTestSplit.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TrainTestSplit.cs
index 47c56266bc..e4fc26296b 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TrainTestSplit.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TrainTestSplit.cs
@@ -17,16 +17,26 @@ public static void Example()
// Generate some data points.
var examples = GenerateRandomDataPoints(10);
- // Convert the examples list to an IDataView object, which is consumable by ML.NET API.
+ // Convert the examples list to an IDataView object, which is consumable
+ // by ML.NET API.
var dataview = mlContext.Data.LoadFromEnumerable(examples);
- // Leave out 10% of the dataset for testing.For some types of problems, for example for ranking or anomaly detection,
- // we must ensure that the split leaves the rows with the same value in a particular column, in one of the splits.
- // So below, we specify Group column as the column containing the sampling keys.
- // Notice how keeping the rows with the same value in the Group column overrides the testFraction definition.
- var split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumnName: "Group");
- var trainSet = mlContext.Data.CreateEnumerable(split.TrainSet, reuseRowObject: false);
- var testSet = mlContext.Data.CreateEnumerable(split.TestSet, reuseRowObject: false);
+ // Leave out 10% of the dataset for testing.For some types of problems,
+ // for example for ranking or anomaly detection, we must ensure that the
+ // split leaves the rows with the same value in a particular column, in
+ // one of the splits. So below, we specify Group column as the column
+ // containing the sampling keys. Notice how keeping the rows with the
+ // same value in the Group column overrides the testFraction definition.
+ var split = mlContext.Data
+ .TrainTestSplit(dataview, testFraction: 0.1,
+ samplingKeyColumnName: "Group");
+
+ var trainSet = mlContext.Data
+ .CreateEnumerable(split.TrainSet, reuseRowObject: false);
+
+ var testSet = mlContext.Data
+ .CreateEnumerable(split.TestSet,reuseRowObject: false);
+
PrintPreviewRows(trainSet, testSet);
// The data in the Train split.
@@ -45,8 +55,12 @@ public static void Example()
// Example of a split without specifying a sampling key column.
split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.2);
- trainSet = mlContext.Data.CreateEnumerable(split.TrainSet, reuseRowObject: false);
- testSet = mlContext.Data.CreateEnumerable(split.TestSet, reuseRowObject: false);
+ trainSet = mlContext.Data
+ .CreateEnumerable(split.TrainSet,reuseRowObject: false);
+
+ testSet = mlContext.Data
+ .CreateEnumerable(split.TestSet,reuseRowObject: false);
+
PrintPreviewRows(trainSet, testSet);
// The data in the Train split.
@@ -65,7 +79,9 @@ public static void Example()
}
- private static IEnumerable GenerateRandomDataPoints(int count, int seed = 0)
+ private static IEnumerable GenerateRandomDataPoints(int count,
+ int seed = 0)
+
{
var random = new Random(seed);
for (int i = 0; i < count; i++)
@@ -80,7 +96,8 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se
}
}
- // Example with label and group column. A data set is a collection of such examples.
+ // Example with label and group column. A data set is a collection of such
+ // examples.
private class DataPoint
{
public float Group { get; set; }
@@ -89,7 +106,9 @@ private class DataPoint
}
// print helper
- private static void PrintPreviewRows(IEnumerable trainSet, IEnumerable testSet)
+ private static void PrintPreviewRows(IEnumerable trainSet,
+ IEnumerable testSet)
+
{
Console.WriteLine($"The data in the Train split.");
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ModelOperations/SaveLoadModel.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ModelOperations/SaveLoadModel.cs
index a5f70802bb..67e9226ad6 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/ModelOperations/SaveLoadModel.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ModelOperations/SaveLoadModel.cs
@@ -9,8 +9,8 @@ public class SaveLoadModel
{
public static void Example()
{
- // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
- // as well as the source of randomness.
+ // Create a new ML context, for ML.NET operations. It can be used for
+ // exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();
// Generate sample data.
@@ -25,7 +25,8 @@ public static void Example()
var outputColumnName = nameof(Transformation.Key);
// Transform.
- ITransformer model = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName, inputColumnName).Fit(dataView);
+ ITransformer model = mlContext.Transforms.Conversion
+ .MapValueToKey(outputColumnName, inputColumnName).Fit(dataView);
// Save model.
mlContext.Model.Save(model, dataView.Schema, "model.zip");
@@ -35,11 +36,15 @@ public static void Example()
model = mlContext.Model.Load(file, out DataViewSchema schema);
// Create a prediction engine from the model for feeding new data.
- var engine = mlContext.Model.CreatePredictionEngine(model);
+ var engine = mlContext.Model
+ .CreatePredictionEngine(model);
+
var transformation = engine.Predict(new Data() { Value = "abc" });
// Print transformation to console.
- Console.WriteLine("Value: {0}\t Key:{1}", transformation.Value, transformation.Key);
+ Console.WriteLine("Value: {0}\t Key:{1}", transformation.Value,
+ transformation.Key);
+
// Value: abc Key:1
}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ModelOperations/SaveLoadModelFile.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ModelOperations/SaveLoadModelFile.cs
index 9884f8c8e9..88192cb3d3 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/ModelOperations/SaveLoadModelFile.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ModelOperations/SaveLoadModelFile.cs
@@ -9,8 +9,8 @@ public class SaveLoadModelFile
{
public static void Example()
{
- // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
- // as well as the source of randomness.
+ // Create a new ML context, for ML.NET operations. It can be used for
+ // exception tracking and logging, as well as the source of randomness.
var mlContext = new MLContext();
// Generate sample data.
@@ -25,7 +25,8 @@ public static void Example()
var outputColumnName = nameof(Transformation.Key);
// Transform.
- ITransformer model = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName, inputColumnName).Fit(dataView);
+ ITransformer model = mlContext.Transforms.Conversion
+ .MapValueToKey(outputColumnName, inputColumnName).Fit(dataView);
// Save model.
mlContext.Model.Save(model, dataView.Schema, "model.zip");
@@ -34,11 +35,15 @@ public static void Example()
model = mlContext.Model.Load("model.zip", out DataViewSchema schema);
// Create a prediction engine from the model for feeding new data.
- var engine = mlContext.Model.CreatePredictionEngine(model);
+ var engine = mlContext.Model
+ .CreatePredictionEngine(model);
+
var transformation = engine.Predict(new Data() { Value = "abc" });
// Print transformation to console.
- Console.WriteLine("Value: {0}\t Key:{1}", transformation.Value, transformation.Key);
+ Console.WriteLine("Value: {0}\t Key:{1}", transformation.Value,
+ transformation.Key);
+
// Value: abc Key:1
}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
index 1fe2f70325..9f607a9c9e 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs
@@ -9,15 +9,21 @@ public static partial class TransformSamples
{
public static void Example()
{
- // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
- // as well as the source of randomness.
+ // Create a new ML context, for ML.NET operations. It can be used for
+ // exception tracking and logging, as well as the source of randomness.
var ml = new MLContext();
// Get a small dataset as an IEnumerable and convert to IDataView.
var data = new List() {
- new SampleSentimentData { Sentiment = true, SentimentText = "Best game I've ever played." },
- new SampleSentimentData { Sentiment = false, SentimentText = "==RUDE== Dude, 2" },
- new SampleSentimentData { Sentiment = true, SentimentText = "Until the next game, this is the best Xbox game!" } };
+ new SampleSentimentData { Sentiment = true,
+ SentimentText = "Best game I've ever played." },
+
+ new SampleSentimentData { Sentiment = false,
+ SentimentText = "==RUDE== Dude, 2" },
+
+ new SampleSentimentData { Sentiment = true,
+ SentimentText = "Until the next game," +
+ "this is the best Xbox game!" } };
// Convert IEnumerable to IDataView.
var trainData = ml.Data.LoadFromEnumerable(data);
@@ -29,23 +35,42 @@ public static void Example()
// false ==RUDE== Dude, 2.
// true Until the next game, this is the best Xbox game!
- // A pipeline to tokenize text as characters and then combine them together into n-grams
- // The pipeline uses the default settings to featurize.
+ // A pipeline to tokenize text as characters and then combine them
+ // together into n-grams. The pipeline uses the default settings to
+ // featurize.
+
+ var charsPipeline = ml.Transforms.Text
+ .TokenizeIntoCharactersAsKeys("Chars", "SentimentText",
+ useMarkerCharacters: false);
+
+ var ngramOnePipeline = ml.Transforms.Text
+ .ProduceNgrams("CharsUnigrams", "Chars", ngramLength: 1);
- var charsPipeline = ml.Transforms.Text.TokenizeIntoCharactersAsKeys("Chars", "SentimentText", useMarkerCharacters: false);
- var ngramOnePipeline = ml.Transforms.Text.ProduceNgrams("CharsUnigrams", "Chars", ngramLength: 1);
- var ngramTwpPipeline = ml.Transforms.Text.ProduceNgrams("CharsTwograms", "Chars");
- var oneCharsPipeline = charsPipeline.Append(ngramOnePipeline);
- var twoCharsPipeline = charsPipeline.Append(ngramTwpPipeline);
+ var ngramTwpPipeline = ml.Transforms.Text
+ .ProduceNgrams("CharsTwograms", "Chars");
+
+ var oneCharsPipeline = charsPipeline
+ .Append(ngramOnePipeline);
+
+ var twoCharsPipeline = charsPipeline
+ .Append(ngramTwpPipeline);
// The transformed data for pipelines.
- var transformedData_onechars = oneCharsPipeline.Fit(trainData).Transform(trainData);
- var transformedData_twochars = twoCharsPipeline.Fit(trainData).Transform(trainData);
+ var transformedData_onechars = oneCharsPipeline.Fit(trainData)
+ .Transform(trainData);
+
+ var transformedData_twochars = twoCharsPipeline.Fit(trainData)
+ .Transform(trainData);
// Small helper to print the text inside the columns, in the console.
- Action>, VBuffer>> printHelper = (columnName, column, names) =>
+ Action>,
+ VBuffer>>
+ printHelper = (columnName, column, names) =>
+
{
- Console.WriteLine($"{columnName} column obtained post-transformation.");
+ Console.WriteLine(
+ $"{columnName} column obtained post-transformation.");
+
var slots = names.GetValues();
foreach (var featureRow in column)
{
@@ -54,12 +79,19 @@ public static void Example()
Console.WriteLine("");
}
- Console.WriteLine("===================================================");
+ Console.WriteLine(
+ "===================================================");
};
- // Preview of the CharsUnigrams column obtained after processing the input.
+ // Preview of the CharsUnigrams column obtained after processing the
+ // input.
VBuffer> slotNames = default;
- transformedData_onechars.Schema["CharsUnigrams"].GetSlotNames(ref slotNames);
- var charsOneGramColumn = transformedData_onechars.GetColumn>(transformedData_onechars.Schema["CharsUnigrams"]);
+ transformedData_onechars.Schema["CharsUnigrams"]
+ .GetSlotNames(ref slotNames);
+
+ var charsOneGramColumn = transformedData_onechars
+ .GetColumn>(transformedData_onechars
+ .Schema["CharsUnigrams"]);
+
printHelper("CharsUnigrams", charsOneGramColumn, slotNames);
// CharsUnigrams column obtained post-transformation.
@@ -67,8 +99,13 @@ public static void Example()
// 'e' - 1 '>' - 2 'd' - 1 '=' - 4 'R' - 1 'U' - 1 'D' - 2 'E' - 1 'u' - 1 ',' - 1 '2' - 1
// 'B' - 0 'e' - 6 's' - 3 't' - 6 '>' - 9 'g' - 2 'a' - 2 'm' - 2 'I' - 0 ''' - 0 'v' - 0 ...
// Preview of the CharsTwoGrams column obtained after processing the input.
- var charsTwoGramColumn = transformedData_twochars.GetColumn>(transformedData_twochars.Schema["CharsTwograms"]);
- transformedData_twochars.Schema["CharsTwograms"].GetSlotNames(ref slotNames);
+ var charsTwoGramColumn = transformedData_twochars
+ .GetColumn>(transformedData_twochars
+ .Schema["CharsTwograms"]);
+
+ transformedData_twochars.Schema["CharsTwograms"]
+ .GetSlotNames(ref slotNames);
+
printHelper("CharsTwograms", charsTwoGramColumn, slotNames);
// CharsTwograms column obtained post-transformation.
@@ -78,7 +115,8 @@ public static void Example()
}
///
- /// A dataset that contains a tweet and the sentiment assigned to that tweet: 0 - negative and 1 - positive sentiment.
+ /// A dataset that contains a tweet and the sentiment assigned to that
+ /// tweet: 0 - negative and 1 - positive sentiment.
///
public class SampleSentimentData
{
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/SimpleDataViewImplementation.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/SimpleDataViewImplementation.cs
index b1a134b192..f19f42348d 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/SimpleDataViewImplementation.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/SimpleDataViewImplementation.cs
@@ -7,17 +7,21 @@
namespace Samples.Dynamic
{
///
- /// The interface is the central concept of "data" in ML.NET. While many conveniences exist
- /// to create pre-baked implementations, it is also useful to know how to create one completely from scratch. We also
- /// take this opportunity to illustrate and motivate the basic principles of how the IDataView system is architected,
- /// since people interested in implementing need at least some knowledge of those principles.
+ /// The interface is the central concept of "data" in
+ /// ML.NET. While many conveniences exist to create pre-baked implementations,
+ /// it is also useful to know how to create one completely from scratch. We also
+ /// take this opportunity to illustrate and motivate the basic principles of how
+ /// the IDataView system is architected, since people interested in
+ /// implementing need at least some knowledge of those
+ /// principles.
///
public static class SimpleDataViewImplementation
{
public static void Example()
{
- // First we create an array of these objects, which we "present" as this IDataView implementation so that it
- // can be used in a simple ML.NET pipeline.
+ // First we create an array of these objects, which we "present" as this
+ // IDataView implementation so that it can be used in a simple ML.NET
+ // pipeline.
var inputArray = new[]
{
new InputObject(false, "Hello my friend."),
@@ -26,8 +30,8 @@ public static void Example()
};
var dataView = new InputObjectDataView(inputArray);
- // So, this is a very simple pipeline: a transformer that tokenizes Text, does nothing with the Label column
- // at all.
+ // So, this is a very simple pipeline: a transformer that tokenizes
+ // Text, does nothing with the Label column at all.
var mlContext = new MLContext();
var transformedDataView = mlContext.Transforms.Text.TokenizeIntoWords(
"TokenizedText", "Text").Fit(dataView).Transform(dataView);
@@ -35,22 +39,31 @@ public static void Example()
var textColumn = transformedDataView.Schema["Text"];
var tokensColumn = transformedDataView.Schema["TokenizedText"];
- using (var cursor = transformedDataView.GetRowCursor(new[] { textColumn, tokensColumn }))
+ using (var cursor = transformedDataView.GetRowCursor(
+ new[] { textColumn, tokensColumn }))
+
{
- // Note that it is best to get the getters and values *before* iteration, so as to faciliate buffer
- // sharing (if applicable), and column-type validation once, rather than many times.
+ // Note that it is best to get the getters and values *before*
+ // iteration, so as to faciliate buffer sharing (if applicable),
+ // and column-type validation once, rather than many times.
ReadOnlyMemory textValue = default;
VBuffer> tokensValue = default;
- var textGetter = cursor.GetGetter>(textColumn);
- var tokensGetter = cursor.GetGetter>>(tokensColumn);
+ var textGetter = cursor
+ .GetGetter>(textColumn);
+
+ var tokensGetter = cursor
+ .GetGetter>>(tokensColumn);
while (cursor.MoveNext())
{
textGetter(ref textValue);
tokensGetter(ref tokensValue);
- Console.WriteLine($"{textValue} => {string.Join(", ", tokensValue.DenseValues())}");
+ Console.WriteLine(
+ $"{textValue} => " +
+ $"{string.Join(", ", tokensValue.DenseValues())}");
+
}
// The output to console is this:
@@ -59,12 +72,15 @@ public static void Example()
// Stay awhile and listen. => Stay, awhile, and, listen.
// Masterfully done hero! => Masterfully, done, hero!
- // Note that it may be interesting to set a breakpoint on the Console.WriteLine, and explore
- // what is going on with the cursor, and the buffers. In particular, on the third iteration,
- // while `tokensValue` is logically presented as a three element array, internally you will
- // see that the arrays internal to that structure have (at least) four items, specifically:
- // `Masterfully`, `done`, `hero!`, `listen.`. In this way we see a simple example of the details
- // of how buffer sharing from one iteration to the next actually works.
+ // Note that it may be interesting to set a breakpoint on the
+ // Console.WriteLine, and explore what is going on with the cursor,
+ // and the buffers. In particular, on the third iteration, while
+ // `tokensValue` is logically presented as a three element array,
+ // internally you will see that the arrays internal to that
+ // structure have (at least) four items, specifically:
+ // `Masterfully`, `done`, `hero!`, `listen.`. In this way we see a
+ // simple example of the details of how buffer sharing from one
+ // iteration to the next actually works.
}
}
@@ -81,37 +97,51 @@ public InputObject(bool label, string text)
}
///
- /// This is an implementation of that wraps an
- /// of the above . Note that normally under these circumstances, the first
- /// recommendation would be to use a convenience like
- ///
- /// or something like that, rather than implementing outright. However, sometimes when
- /// code generation is impossible on some situations, like Unity or other similar platforms, implementing
+ /// This is an implementation of that wraps an
+ /// of the above .
+ /// Note that normally under these circumstances, the first recommendation
+ /// would be to use a convenience like
+ ///
+ /// or something like that, rather than implementing
+ /// outright. However, sometimes when code generation is impossible on some
+ /// situations, like Unity or other similar platforms, implementing
/// something even closely resembling this may become necessary.
///
- /// This implementation of , being didactic, is much simpler than practically
- /// anything one would find in the ML.NET codebase. In this case we have a completely fixed schema (the two
- /// fields of ), with fixed types.
+ /// This implementation of , being didactic, is much
+ /// simpler than practically anything one would find in the ML.NET codebase.
+ /// In this case we have a completely fixed schema (the two fields of
+ /// ), with fixed types.
///
- /// For , note that we keep a very simple schema based off the members of the object. You
- /// may in fact note that it is possible in this specific case, this implementation of
- /// could share the same object across all instances of this
- /// object, but since this is almost never the case, I do not take advantage of that.
+ /// For , note that we keep a very simple schema based
+ /// off the members of the object. You may in fact note that it is possible
+ /// in this specific case, this implementation of
+ /// could share the same object across all
+ /// instances of this object, but since this is almost never the case, I do
+ /// not take advantage of that.
///
- /// We have chosen to wrap an , so in fact only a very simple implementation is
- /// possible. Specifically: we cannot meaningfully shuffle (so is
- /// , and even if a parameter were passed to
- /// , we could not make use of it), we do
- /// not know the count of the item right away without counting (so, it is most correct for
- /// to return , even after we might hypothetically know after
- /// the first pass, given the immutability principle of ), and the
- /// method returns a single item.
+ /// We have chosen to wrap an , so in fact only
+ /// a very simple implementation is possible. Specifically: we cannot
+ /// meaningfully shuffle (so is
+ /// , and even if a
+ /// parameter were passed to
+ /// ,
+ /// we could not make use of it), we do not know the count of the item right
+ /// away without counting (so, it is most correct for
+ /// to return , even after
+ /// we might hypothetically know after the first pass, given the
+ /// immutability principle of ), and the
+ /// method returns a
+ /// single item.
///
- /// The derived class has more documentation specific to its behavior.
+ /// The derived class has more documentation
+ /// specific to its behavior.
///
- /// Note that this implementation, as well as the nested derived class, does
- /// almost no validation of parameters or guard against misuse than we would like from, say, implementations of
- /// the same classes within the ML.NET codebase.
+ /// Note that this implementation, as well as the nested
+ /// derived class, does almost no validation
+ /// of parameters or guard against misuse than we would like from, say,
+ /// implementations of the same classes within the ML.NET codebase.
///
private sealed class InputObjectDataView : IDataView
{
@@ -131,52 +161,76 @@ public InputObjectDataView(IEnumerable data)
public long? GetRowCount() => null;
- public DataViewRowCursor GetRowCursor(IEnumerable columnsNeeded, Random rand = null)
- => new Cursor(this, columnsNeeded.Any(c => c.Index == 0), columnsNeeded.Any(c => c.Index == 1));
+ public DataViewRowCursor GetRowCursor(
+ IEnumerable columnsNeeded,
+ Random rand = null)
+
+ => new Cursor(this, columnsNeeded.Any(c => c.Index == 0),
+ columnsNeeded.Any(c => c.Index == 1));
+
+ public DataViewRowCursor[] GetRowCursorSet(
+ IEnumerable columnsNeeded, int n,
+ Random rand = null)
- public DataViewRowCursor[] GetRowCursorSet(IEnumerable columnsNeeded, int n, Random rand = null)
=> new[] { GetRowCursor(columnsNeeded, rand) };
///
- /// Having this be a private sealed nested class follows the typical pattern: in most
- /// implementations, the cursor instance is almost always that. The only "common"
- /// exceptions to this tendency are those implementations that are such thin wrappings of existing
- /// without even bothering to change the schema.
+ /// Having this be a private sealed nested class follows the typical
+ /// pattern: in most implementations, the cursor
+ /// instance is almost always that. The only "common" exceptions to this
+ /// tendency are those implementations that are such thin wrappings of
+ /// existing without even bothering to change
+ /// the schema.
///
- /// On the subject of schema, note that there is an expectation that the object is
- /// reference equal to the object that created this cursor, as we see here.
+ /// On the subject of schema, note that there is an expectation that
+ /// the object is reference equal to the
+ /// object that created this cursor, as
+ /// we see here.
///
- /// Note that returns 0. As described in the documentation of that property, that
- /// is meant to facilitate the reconciliation of the partitioning of the data in the case where multiple
- /// cursors are returned from
- /// , but since only one is
- /// ever returned from the implementation, this behavior is appropriate.
+ /// Note that returns 0. As described in the
+ /// documentation of that property, that is meant to facilitate the
+ /// reconciliation of the partitioning of the data in the case where
+ /// multiple cursors are returned from
+ /// ,
+ /// but since only one is ever returned from the implementation, this
+ /// behavior is appropriate.
///
- /// Similarly, since it is impossible to have a shuffled cursor or a cursor set, it is sufficient for the
- /// implementation to return a simple ID based on the position. If, however, this
- /// had been something built on, hypothetically, an or some other such structure, and
- /// shuffling and partitioning was available, an ID based on the index of whatever item was being returned
- /// would be appropriate.
+ /// Similarly, since it is impossible to have a shuffled cursor or a
+ /// cursor set, it is sufficient for the
+ /// implementation to return a simple ID based on the position. If,
+ /// however, this had been something built on, hypothetically, an
+ /// or some other such structure, and shuffling
+ /// and partitioning was available, an ID based on the index of whatever
+ /// item was being returned would be appropriate.
///
- /// Note the usage of the parameters on the
- /// implementations. This is most valuable in the case of buffer sharing for , but
- /// we still of course have to deal with it here.
+ /// Note the usage of the parameters on the
+ /// implementations. This is most
+ /// valuable in the case of buffer sharing for ,
+ /// but we still of course have to deal with it here.
///
- /// Note also that we spend a considerable amount of effort to not make the
- /// and
- /// methods correctly reflect what was asked for from
- /// the
- /// method that was used to create this method. In this particular case, the point is somewhat moot: this
- /// mechanism exists to enable lazy evaluation, but since this cursor is implemented to wrap an
- /// which has no concept of lazy evaluation, there is no real practical benefit
- /// to doing this. However, it is best of course to illustrate the general principle for the sake of the
- /// example.
+ /// Note also that we spend a considerable amount of effort to not make
+ /// the and
+ /// methods
+ /// correctly reflect what was asked for from the
+ /// method that was used
+ /// to create this method. In this particular case, the point is
+ /// somewhat moot: this mechanism exists to enable lazy evaluation,
+ /// but since this cursor is implemented to wrap an
+ /// which has no concept of lazy
+ /// evaluation, there is no real practical benefit to doing this.
+ /// However, it is best of course to illustrate the general principle
+ /// for the sake of the example.
///
- /// Even in this simple form, we see the reason why
- /// is beneficial: the implementations themselves are simple to the point
- /// where their operation is dwarfed by the simple acts of casting and validation checking one sees in
- /// . In this way we only pay the cost of validation
- /// and casting once, not every time we get a value.
+ /// Even in this simple form, we see the reason why
+ /// is
+ /// beneficial: the implementations
+ /// themselves are simple to the point where their operation is dwarfed
+ /// by the simple acts of casting and validation checking one sees in
+ /// . In this way
+ /// we only pay the cost of validation and casting once, not every time
+ /// we get a value.
///
private sealed class Cursor : DataViewRowCursor
{
@@ -189,15 +243,22 @@ private sealed class Cursor : DataViewRowCursor
public override long Batch => 0;
public override DataViewSchema Schema { get; }
- public Cursor(InputObjectDataView parent, bool wantsLabel, bool wantsText)
+ public Cursor(InputObjectDataView parent, bool wantsLabel,
+ bool wantsText)
+
{
Schema = parent.Schema;
_position = -1;
_enumerator = parent._data.GetEnumerator();
_getters = new Delegate[]
{
- wantsLabel ? (ValueGetter)LabelGetterImplementation : null,
- wantsText ? (ValueGetter>)TextGetterImplementation : null
+ wantsLabel ?
+ (ValueGetter)LabelGetterImplementation : null,
+
+ wantsText ?
+ (ValueGetter>)
+ TextGetterImplementation : null
+
};
}
@@ -217,13 +278,17 @@ protected override void Dispose(bool disposing)
private void LabelGetterImplementation(ref bool value)
=> value = _enumerator.Current.Label;
- private void TextGetterImplementation(ref ReadOnlyMemory value)
+ private void TextGetterImplementation(
+ ref ReadOnlyMemory value)
+
=> value = _enumerator.Current.Text.AsMemory();
private void IdGetterImplementation(ref DataViewRowId id)
=> id = new DataViewRowId((ulong)_position, 0);
- public override ValueGetter GetGetter(DataViewSchema.Column column)
+ public override ValueGetter GetGetter(
+ DataViewSchema.Column column)
+
{
if (!IsColumnActive(column))
throw new ArgumentOutOfRangeException(nameof(column));
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ImageClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ImageClassification.cs
index 3fa852fb44..611eb501c1 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ImageClassification.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ImageClassification.cs
@@ -23,7 +23,9 @@ public static void Example()
if (!File.Exists(modelLocation))
{
modelLocation = Download(@"https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/resnet_v2_101.tgz", @"resnet_v2_101_299_frozen.tgz");
- Unzip(Path.Join(Directory.GetCurrentDirectory(), modelLocation), Directory.GetCurrentDirectory());
+ Unzip(Path.Join(Directory.GetCurrentDirectory(), modelLocation),
+ Directory.GetCurrentDirectory());
+
modelLocation = "resnet_v2_101_299_frozen.pb";
}
@@ -32,7 +34,8 @@ public static void Example()
var idv = mlContext.Data.LoadFromEnumerable(data);
// Create a ML pipeline.
- var pipeline = mlContext.Model.LoadTensorFlowModel(modelLocation).ScoreTensorFlowModel(
+ var pipeline = mlContext.Model.LoadTensorFlowModel(modelLocation)
+ .ScoreTensorFlowModel(
new[] { nameof(OutputScores.output) },
new[] { nameof(TensorData.input) }, addBatchDimensionInput: true);
@@ -41,15 +44,18 @@ public static void Example()
var transformedValues = estimator.Transform(idv);
// Retrieve model scores.
- var outScores = mlContext.Data.CreateEnumerable(transformedValues, reuseRowObject: false);
+ var outScores = mlContext.Data.CreateEnumerable(
+ transformedValues, reuseRowObject: false);
- // Display scores. (for the sake of brevity we display scores of the first 3 classes)
+ // Display scores. (for the sake of brevity we display scores of the
+ // first 3 classes)
foreach (var prediction in outScores)
{
int numClasses = 0;
foreach (var classScore in prediction.output.Take(3))
{
- Console.WriteLine($"Class #{numClasses++} score = {classScore}");
+ Console.WriteLine(
+ $"Class #{numClasses++} score = {classScore}");
}
Console.WriteLine(new string('-', 10));
}
@@ -72,7 +78,8 @@ public static void Example()
///
/// A class to hold sample tensor data.
- /// Member name should match the inputs that the model expects (in this case, input).
+ /// Member name should match the inputs that the model expects (in this
+ /// case, input).
///
public class TensorData
{
@@ -86,9 +93,13 @@ public class TensorData
public static TensorData[] GetTensorData()
{
// This can be any numerical data. Assume image pixel values.
- var image1 = Enumerable.Range(0, inputSize).Select(x => (float)x / inputSize).ToArray();
- var image2 = Enumerable.Range(0, inputSize).Select(x => (float)(x + 10000) / inputSize).ToArray();
- return new TensorData[] { new TensorData() { input = image1 }, new TensorData() { input = image2 } };
+ var image1 = Enumerable.Range(0, inputSize).Select(
+ x => (float)x / inputSize).ToArray();
+
+ var image2 = Enumerable.Range(0, inputSize).Select(
+ x => (float)(x + 10000) / inputSize).ToArray();
+ return new TensorData[] { new TensorData() { input = image1 },
+ new TensorData() { input = image2 } };
}
///
@@ -110,7 +121,8 @@ private static string Download(string baseGitPath, string dataFile)
}
///
- /// Taken from https://github.com/icsharpcode/SharpZipLib/wiki/GZip-and-Tar-Samples.
+ /// Taken from
+ /// https://github.com/icsharpcode/SharpZipLib/wiki/GZip-and-Tar-Samples.
///
private static void Unzip(string path, string targetDir)
{
@@ -125,4 +137,4 @@ private static void Unzip(string path, string targetDir)
inStream.Close();
}
}
-}
\ No newline at end of file
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs
index 808a8eb347..4f09cf55db 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs
@@ -13,27 +13,36 @@ public static class TextClassification
///
public static void Example()
{
- string modelLocation = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadTensorFlowSentimentModel();
+ string modelLocation = Microsoft.ML.SamplesUtils.DatasetUtils
+ .DownloadTensorFlowSentimentModel();
var mlContext = new MLContext();
var data = new[] { new IMDBSentiment() {
- Sentiment_Text = "this film was just brilliant casting location scenery story direction " +
- "everyone's really suited the part they played and you could just imagine being there robert " +
- "is an amazing actor and now the same being director father came from the same scottish " +
- "island as myself so i loved the fact there was a real connection with this film the witty " +
- "remarks throughout the film were great it was just brilliant so much that i bought the " +
- "film as soon as it was released for and would recommend it to everyone to watch and the " +
- "fly fishing was amazing really cried at the end it was so sad and you know what they say " +
- "if you cry at a film it must have been good and this definitely was also to the two " +
- "little boy's that played the of norman and paul they were just brilliant children are " +
- "often left out of the list i think because the stars that play them all grown up are " +
- "such a big profile for the whole film but these children are amazing and should be praised " +
- "for what they have done don't you think the whole story was so lovely because it was true " +
- "and was someone's life after all that was shared with us all" } };
+ Sentiment_Text = "this film was just brilliant casting location " +
+ "scenery story direction everyone's really suited the part they " +
+ "played and you could just imagine being there robert is an " +
+ "amazing actor and now the same being director father came from " +
+ "the same scottish island as myself so i loved the fact there " +
+ "was a real connection with this film the witty remarks " +
+ "throughout the film were great it was just brilliant so much " +
+ "that i bought the film as soon as it was released for and " +
+ "would recommend it to everyone to watch and the fly fishing was " +
+ "amazing really cried at the end it was so sad and you know what " +
+ "they say if you cry at a film it must have been good and this " +
+ "definitely was also to the two little boy's that played the of " +
+ "norman and paul they were just brilliant children are often " +
+ "left out of the list i think because the stars that play them " +
+ "all grown up are such a big profile for the whole film but " +
+ "these children are amazing and should be praised for what " +
+ "they have done don't you think the whole story was so lovely" +
+ "because it was true and was someone's life after all that was" +
+ "shared with us all" } };
var dataView = mlContext.Data.LoadFromEnumerable(data);
// This is the dictionary to convert words into the integer indexes.
- var lookupMap = mlContext.Data.LoadFromTextFile(Path.Combine(modelLocation, "imdb_word_index.csv"),
+ var lookupMap = mlContext.Data.LoadFromTextFile(Path.Combine(
+ modelLocation, "imdb_word_index.csv"),
+
columns: new[]
{
new TextLoader.Column("Words", DataKind.String, 0),
@@ -43,25 +52,38 @@ public static void Example()
);
// Load the TensorFlow model once.
- // - Use it for quering the schema for input and output in the model
+ // - Use it for quering the schema for input and output in the
+ // model
// - Use it for prediction in the pipeline.
- var tensorFlowModel = mlContext.Model.LoadTensorFlowModel(modelLocation);
+ var tensorFlowModel = mlContext.Model.LoadTensorFlowModel(
+ modelLocation);
var schema = tensorFlowModel.GetModelSchema();
var featuresType = (VectorDataViewType)schema["Features"].Type;
- Console.WriteLine("Name: {0}, Type: {1}, Shape: (-1, {2})", "Features", featuresType.ItemType.RawType, featuresType.Dimensions[0]);
- var predictionType = (VectorDataViewType)schema["Prediction/Softmax"].Type;
- Console.WriteLine("Name: {0}, Type: {1}, Shape: (-1, {2})", "Prediction/Softmax", predictionType.ItemType.RawType, predictionType.Dimensions[0]);
-
- // The model expects the input feature vector to be a fixed length vector.
- // In this sample, CustomMappingEstimator is used to resize variable length vector to fixed length vector.
+ Console.WriteLine("Name: {0}, Type: {1}, Shape: (-1, {2})", "Features",
+ featuresType.ItemType.RawType, featuresType.Dimensions[0]);
+
+ var predictionType = (VectorDataViewType)schema["Prediction/Softmax"]
+ .Type;
+ Console.WriteLine("Name: {0}, Type: {1}, Shape: (-1, {2})",
+ "Prediction/Softmax", predictionType.ItemType.RawType,
+ predictionType.Dimensions[0]);
+
+ // The model expects the input feature vector to be a fixed length
+ // vector.
+ // In this sample, CustomMappingEstimator is used to resize variable
+ // length vector to fixed length vector.
// The following ML.NET pipeline
// 1. tokenzies the string into words,
- // 2. maps each word to an integer which is an index in the dictionary ('lookupMap'),
- // 3. Resizes the integer vector to a fixed length vector using CustomMappingEstimator ('ResizeFeaturesAction')
+ // 2. maps each word to an integer which is an index in the
+ // dictionary ('lookupMap'),
+ // 3. Resizes the integer vector to a fixed length vector using
+ // CustomMappingEstimator ('ResizeFeaturesAction')
// 4. Passes the data to TensorFlow for scoring.
- // 5. Retreives the 'Prediction' from TensorFlow and put it into ML.NET Pipeline
+ // 5. Retreives the 'Prediction' from TensorFlow and put it into
+ // ML.NET Pipeline
- Action ResizeFeaturesAction = (i, j) =>
+ Action ResizeFeaturesAction =
+ (i, j) =>
{
j.Sentiment_Text = i.Sentiment_Text;
var features = i.VariableLengthFeatures;
@@ -69,23 +91,40 @@ public static void Example()
j.Features = features;
};
- var model = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedWords", "Sentiment_Text")
- .Append(mlContext.Transforms.Conversion.MapValue("VariableLengthFeatures", lookupMap,
- lookupMap.Schema["Words"], lookupMap.Schema["Ids"], "TokenizedWords"))
- .Append(mlContext.Transforms.CustomMapping(ResizeFeaturesAction, "Resize"))
- .Append(tensorFlowModel.ScoreTensorFlowModel("Prediction/Softmax", "Features"))
- .Append(mlContext.Transforms.CopyColumns("Prediction", "Prediction/Softmax"))
+ var model =
+ mlContext.Transforms.Text.TokenizeIntoWords(
+ "TokenizedWords",
+ "Sentiment_Text")
+ .Append(mlContext.Transforms.Conversion.MapValue(
+ "VariableLengthFeatures",
+ lookupMap,
+ lookupMap.Schema["Words"],
+ lookupMap.Schema["Ids"],
+ "TokenizedWords"))
+ .Append(mlContext.Transforms.CustomMapping(
+ ResizeFeaturesAction,
+ "Resize"))
+ .Append(tensorFlowModel.ScoreTensorFlowModel(
+ "Prediction/Softmax",
+ "Features"))
+ .Append(mlContext.Transforms.CopyColumns(
+ "Prediction",
+ "Prediction/Softmax"))
.Fit(dataView);
- var engine = mlContext.Model.CreatePredictionEngine(model);
+ var engine = mlContext.Model.CreatePredictionEngine(model);
// Predict with TensorFlow pipeline.
var prediction = engine.Predict(data[0]);
- Console.WriteLine("Number of classes: {0}", prediction.Prediction.Length);
- Console.WriteLine("Is sentiment/review positive? {0}", prediction.Prediction[1] > 0.5 ? "Yes." : "No.");
- Console.WriteLine("Prediction Confidence: {0}", prediction.Prediction[1].ToString("0.00"));
+ Console.WriteLine("Number of classes: {0}", prediction.Prediction
+ .Length);
+ Console.WriteLine("Is sentiment/review positive? {0}", prediction
+ .Prediction[1] > 0.5 ? "Yes." : "No.");
+ Console.WriteLine("Prediction Confidence: {0}", prediction.Prediction[1]
+ .ToString("0.00"));
- /////////////////////////////////// Expected output ///////////////////////////////////
+ ///////////////////////////// Expected output //////////////////////////
//
// Name: Features, Type: System.Int32, Shape: (-1, 600)
// Name: Prediction/Softmax, Type: System.Single, Shape: (-1, 2)
@@ -105,8 +144,9 @@ public class IMDBSentiment
///
/// This is a variable length vector designated by VectorType attribute.
- /// Variable length vectors are produced by applying operations such as 'TokenizeWords' on strings
- /// resulting in vectors of tokens of variable lengths.
+ /// Variable length vectors are produced by applying operations such as
+ /// 'TokenizeWords' on strings resulting in vectors of tokens of
+ /// variable lengths.
///
[VectorType]
public int[] VariableLengthFeatures { get; set; }
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs
index 5c49ac6bbd..db8f7c4961 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs
@@ -10,16 +10,22 @@ public static class TextTransform
{
public static void Example()
{
- // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
- // as well as the source of randomness.
+ // Create a new ML context, for ML.NET operations. It can be used for
+ // exception tracking and logging, as well as the source of randomness.
var ml = new MLContext();
// Get a small dataset as an IEnumerable and convert to IDataView.
// Get a small dataset as an IEnumerable and convert to IDataView.
var data = new List() {
- new SampleSentimentData { Sentiment = true, SentimentText = "Best game I've ever played." },
- new SampleSentimentData { Sentiment = false, SentimentText = "==RUDE== Dude, 2" },
- new SampleSentimentData { Sentiment = true, SentimentText = "Until the next game, this is the best Xbox game!" } };
+ new SampleSentimentData { Sentiment = true,
+ SentimentText = "Best game I've ever played." },
+
+ new SampleSentimentData { Sentiment = false,
+ SentimentText = "==RUDE== Dude, 2" },
+
+ new SampleSentimentData { Sentiment = true,
+ SentimentText = "Until the next game," +
+ "this is the best Xbox game!" } };
// Convert IEnumerable to IDataView.
var trainData = ml.Data.LoadFromEnumerable(data);
@@ -31,29 +37,47 @@ public static void Example()
// false ==RUDE== Dude, 2.
// true Until the next game, this is the best Xbox game!
- // A pipeline for featurization of the "SentimentText" column, and placing the output in a new column named "DefaultTextFeatures"
- // The pipeline uses the default settings to featurize.
+ // A pipeline for featurization of the "SentimentText" column, and
+ // placing the output in a new column named "DefaultTextFeatures". The
+ // pipeline uses the default settings to featurize.
string defaultColumnName = "DefaultTextFeatures";
- var default_pipeline = ml.Transforms.Text.FeaturizeText(defaultColumnName , "SentimentText");
+ var default_pipeline = ml.Transforms.Text
+ .FeaturizeText(defaultColumnName , "SentimentText");
- // Another pipeline, that customizes the advanced settings of the FeaturizeText transformer.
+ // Another pipeline, that customizes the advanced settings of the
+ // FeaturizeText transformer.
string customizedColumnName = "CustomizedTextFeatures";
- var customized_pipeline = ml.Transforms.Text.FeaturizeText(customizedColumnName, new TextFeaturizingEstimator.Options
+ var customized_pipeline = ml.Transforms.Text
+ .FeaturizeText(customizedColumnName,
+ new TextFeaturizingEstimator.Options
+
{
KeepPunctuations = false,
KeepNumbers = false,
OutputTokensColumnName = "OutputTokens",
- StopWordsRemoverOptions = new StopWordsRemovingEstimator.Options() { Language = TextFeaturizingEstimator.Language.English }, // supports English, French, German, Dutch, Italian, Spanish, Japanese
+ StopWordsRemoverOptions =
+ new StopWordsRemovingEstimator.Options() {
+ Language = TextFeaturizingEstimator.Language.English },
+ // supports English, French, German, Dutch, Italian, Spanish,
+ // Japanese
+
}, "SentimentText");
// The transformed data for both pipelines.
- var transformedData_default = default_pipeline.Fit(trainData).Transform(trainData);
- var transformedData_customized = customized_pipeline.Fit(trainData).Transform(trainData);
+ var transformedData_default = default_pipeline.Fit(trainData)
+ .Transform(trainData);
+
+ var transformedData_customized = customized_pipeline.Fit(trainData)
+ .Transform(trainData);
// Small helper to print the text inside the columns, in the console.
- Action>> printHelper = (columnName, column) =>
+ Action>> printHelper = (columnName,
+ column) =>
+
{
- Console.WriteLine($"{columnName} column obtained post-transformation.");
+ Console.WriteLine(
+ $"{columnName} column obtained post-transformation.");
+
foreach (var featureRow in column)
{
foreach (var value in featureRow.GetValues())
@@ -61,11 +85,17 @@ public static void Example()
Console.WriteLine("");
}
- Console.WriteLine("===================================================");
+ Console.WriteLine(
+ "===================================================");
+
};
- // Preview of the DefaultTextFeatures column obtained after processing the input.
- var defaultColumn = transformedData_default.GetColumn>(transformedData_default.Schema[defaultColumnName]);
+ // Preview of the DefaultTextFeatures column obtained after processing
+ // the input.
+ var defaultColumn = transformedData_default
+ .GetColumn>(transformedData_default
+ .Schema[defaultColumnName]);
+
printHelper(defaultColumnName, defaultColumn);
// DefaultTextFeatures column obtained post-transformation.
@@ -74,8 +104,12 @@ public static void Example()
// 0.2357023 0.2357023 0.2357023 0.2357023 0.4714046 0.2357023 0.2357023 0.2357023 0.2357023 0.2357023 0.2357023 0.2357023 0.2357023 0.2357023 0.2357023 0.5773503 0.5773503 0.5773503 0.1924501 0.1924501 0.1924501 0.1924501 0.1924501 0.1924501 0.1924501 0.1924501 0.1924501 0.4472136 0.4472136 0.4472136 0.4472136 0.4472136
// 0 0.1230915 0.1230915 0.1230915 0.1230915 0.246183 0.246183 0.246183 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.1230915 0 0 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.3692745 0.246183 0.246183 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.246183 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.2886751 0 0 0 0 0 0 0 0.2886751 0.5773503 0.2886751 0.2886751 0.2886751 0.2886751 0.2886751 0.2886751
- // Preview of the CustomizedTextFeatures column obtained after processing the input.
- var customizedColumn = transformedData_customized.GetColumn>(transformedData_customized.Schema[customizedColumnName]);
+ // Preview of the CustomizedTextFeatures column obtained after
+ // processing the input.
+ var customizedColumn = transformedData_customized
+ .GetColumn>(transformedData_customized
+ .Schema[customizedColumnName]);
+
printHelper(customizedColumnName, customizedColumn);
// CustomizedTextFeatures column obtained post-transformation.
@@ -86,7 +120,8 @@ public static void Example()
}
///
- /// A dataset that contains a tweet and the sentiment assigned to that tweet: 0 - negative and 1 - positive sentiment.
+ /// A dataset that contains a tweet and the sentiment assigned to that
+ /// tweet: 0 - negative and 1 - positive sentiment.
///
public class SampleSentimentData
{
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSample.cs
index 62287c4ba1..adccee9e81 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSample.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSample.cs
@@ -10,9 +10,10 @@ public static class RandomizedPcaSample
{
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for except
+ // ion tracking and logging, as a catalog of available operations and as
+ // the source of randomness. Setting the seed to a fixed number in this
+ // example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Training data.
@@ -26,11 +27,15 @@ public static void Example()
new DataPoint(){ Features = new float[3] {-100, 50, -100} }
};
- // Convert the List to IDataView, a consumble format to ML.NET functions.
+ // Convert the List to IDataView, a consumble format to
+ // ML.NET functions.
var data = mlContext.Data.LoadFromEnumerable(samples);
- // Create an anomaly detector. Its underlying algorithm is randomized PCA.
- var pipeline = mlContext.AnomalyDetection.Trainers.RandomizedPca(featureColumnName: nameof(DataPoint.Features), rank: 1, ensureZeroMean: false);
+ // Create an anomaly detector. Its underlying algorithm is randomized
+ // PCA.
+ var pipeline = mlContext.AnomalyDetection.Trainers.RandomizedPca(
+ featureColumnName: nameof(DataPoint.Features), rank: 1,
+ ensureZeroMean: false);
// Train the anomaly detector.
var model = pipeline.Fit(data);
@@ -39,7 +44,8 @@ public static void Example()
var transformed = model.Transform(data);
// Read ML.NET predictions into IEnumerable.
- var results = mlContext.Data.CreateEnumerable(transformed, reuseRowObject: false).ToList();
+ var results = mlContext.Data.CreateEnumerable(transformed,
+ reuseRowObject: false).ToList();
// Let's go through all predictions.
for (int i = 0; i < samples.Count; ++i)
@@ -52,12 +58,14 @@ public static void Example()
if (result.PredictedLabel)
// The i-th sample is predicted as an inlier.
- Console.WriteLine("The {0}-th example with features [{1}] is an inlier with a score of being inlier {2}",
- i, featuresInText, result.Score);
+ Console.WriteLine("The {0}-th example with features [{1}]" +
+ "is an inlier with a score of being inlier {2}", i,
+ featuresInText, result.Score);
else
// The i-th sample is predicted as an outlier.
- Console.WriteLine("The {0}-th example with features [{1}] is an outlier with a score of being inlier {2}",
- i, featuresInText, result.Score);
+ Console.WriteLine("The {0}-th example with features [{1}] is" +
+ "an outlier with a score of being inlier {2}", i,
+ featuresInText, result.Score);
}
// Lines printed out should be
// The 0 - th example with features[1, 0, 0] is an inlier with a score of being inlier 0.7453707
@@ -68,7 +76,8 @@ public static void Example()
// The 5 - th example with features[-100, 50, -100] is an outlier with a score of being inlier 0
}
- // Example with 3 feature values. A training data set is a collection of such examples.
+ // Example with 3 feature values. A training data set is a collection of
+ // such examples.
private class DataPoint
{
[VectorType(3)]
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSampleWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSampleWithOptions.cs
index 02b725f7ce..7a281880b7 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSampleWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSampleWithOptions.cs
@@ -10,9 +10,10 @@ public static class RandomizedPcaSampleWithOptions
{
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Training data.
@@ -26,7 +27,8 @@ public static void Example()
new DataPoint(){ Features = new float[3] {-100, 50, -100} }
};
- // Convert the List to IDataView, a consumble format to ML.NET functions.
+ // Convert the List to IDataView, a consumble format to
+ // ML.NET functions.
var data = mlContext.Data.LoadFromEnumerable(samples);
var options = new Microsoft.ML.Trainers.RandomizedPcaTrainer.Options()
@@ -36,8 +38,10 @@ public static void Example()
Seed = 10,
};
- // Create an anomaly detector. Its underlying algorithm is randomized PCA.
- var pipeline = mlContext.AnomalyDetection.Trainers.RandomizedPca(options);
+ // Create an anomaly detector. Its underlying algorithm is randomized
+ // PCA.
+ var pipeline = mlContext.AnomalyDetection.Trainers.RandomizedPca(
+ options);
// Train the anomaly detector.
var model = pipeline.Fit(data);
@@ -46,7 +50,8 @@ public static void Example()
var transformed = model.Transform(data);
// Read ML.NET predictions into IEnumerable.
- var results = mlContext.Data.CreateEnumerable(transformed, reuseRowObject: false).ToList();
+ var results = mlContext.Data.CreateEnumerable(transformed,
+ reuseRowObject: false).ToList();
// Let's go through all predictions.
for (int i = 0; i < samples.Count; ++i)
@@ -59,11 +64,13 @@ public static void Example()
if (result.PredictedLabel)
// The i-th sample is predicted as an inlier.
- Console.WriteLine("The {0}-th example with features [{1}] is an inlier with a score of being inlier {2}",
- i, featuresInText, result.Score);
+ Console.WriteLine("The {0}-th example with features [{1}] is" +
+ "an inlier with a score of being inlier {2}", i,
+ featuresInText, result.Score);
else
// The i-th sample is predicted as an outlier.
- Console.WriteLine("The {0}-th example with features [{1}] is an outlier with a score of being inlier {2}",
+ Console.WriteLine("The {0}-th example with features [{1}] is" +
+ "an outlier with a score of being inlier {2}",
i, featuresInText, result.Score);
}
// Lines printed out should be
@@ -75,7 +82,8 @@ public static void Example()
// The 5 - th example with features[-100, 50, -100] is an outlier with a score of being inlier 0
}
- // Example with 3 feature values. A training data set is a collection of such examples.
+ // Example with 3 feature values. A training data set is a collection of
+ // such examples.
private class DataPoint
{
[VectorType(3)]
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs
index 1506fc1abd..fd24a32b39 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs
@@ -10,35 +10,43 @@ public static class AveragedPerceptron
{
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+ // Convert the list of data points to an IDataView object, which is
+ // consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define the trainer.
- var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .AveragedPerceptron();
// Train the model.
var model = pipeline.Fit(trainingData);
- // Create testing data. Use different random seed to make it different from training data.
- var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+ // Create testing data. Use different random seed to make it different
+ // from training data.
+ var testData = mlContext.Data
+ .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
- var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList();
+ var predictions = mlContext.Data
+ .CreateEnumerable(transformedTestData,
+ reuseRowObject: false).ToList();
// Print 5 predictions.
foreach (var p in predictions.Take(5))
- Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
+ Console.WriteLine($"Label: {p.Label}, "
+ + $"Prediction: {p.PredictedLabel}");
// Expected output:
// Label: True, Prediction: True
@@ -48,7 +56,9 @@ public static void Example()
// Label: False, Prediction: False
// Evaluate the overall metrics.
- var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData);
+ var metrics = mlContext.BinaryClassification
+ .EvaluateNonCalibrated(transformedTestData);
+
PrintMetrics(metrics);
// Expected output:
@@ -71,7 +81,9 @@ public static void Example()
// Precision || 0.7402 | 0.7061 |
}
- private static IEnumerable GenerateRandomDataPoints(int count, int seed=0)
+ private static IEnumerable GenerateRandomDataPoints(int count,
+ int seed=0)
+
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
@@ -82,13 +94,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se
{
Label = label,
// Create random features that are correlated with the label.
- // For data points with false label, the feature values are slightly increased by adding a constant.
- Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray()
+ // For data points with false label, the feature values are
+ // slightly increased by adding a constant.
+ Features = Enumerable.Repeat(label, 50)
+ .Select(x => x ? randomFloat() : randomFloat() +
+ 0.1f).ToArray()
+
};
}
}
- // Example with label and 50 feature values. A data set is a collection of such examples.
+ // Example with label and 50 feature values. A data set is a collection of
+ // such examples.
private class DataPoint
{
public bool Label { get; set; }
@@ -111,11 +128,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics)
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
- Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
+ Console.WriteLine($"Negative Precision: " +
+ $"{metrics.NegativePrecision:F2}");
+
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
- Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
+ Console.WriteLine($"Positive Precision: " +
+ $"{metrics.PositivePrecision:F2}");
+
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
}
}
}
+
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs
index 748e37be8e..8f31f474e4 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs
@@ -11,15 +11,17 @@ public static class AveragedPerceptronWithOptions
{
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+ // Convert the list of data points to an IDataView object, which is
+ // consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define trainer options.
@@ -33,23 +35,29 @@ public static void Example()
};
// Define the trainer.
- var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron(options);
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .AveragedPerceptron(options);
// Train the model.
var model = pipeline.Fit(trainingData);
- // Create testing data. Use different random seed to make it different from training data.
- var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+ // Create testing data. Use different random seed to make it different
+ // from training data.
+ var testData = mlContext.Data
+ .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
- var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList();
+ var predictions = mlContext.Data
+ .CreateEnumerable(transformedTestData,
+ reuseRowObject: false).ToList();
// Print 5 predictions.
foreach (var p in predictions.Take(5))
- Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
+ Console.WriteLine($"Label: {p.Label}, "
+ + $"Prediction: {p.PredictedLabel}");
// Expected output:
// Label: True, Prediction: True
@@ -59,7 +67,9 @@ public static void Example()
// Label: False, Prediction: False
// Evaluate the overall metrics.
- var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData);
+ var metrics = mlContext.BinaryClassification
+ .EvaluateNonCalibrated(transformedTestData);
+
PrintMetrics(metrics);
// Expected output:
@@ -82,7 +92,9 @@ public static void Example()
// Precision || 0.7402 | 0.7061 |
}
- private static IEnumerable GenerateRandomDataPoints(int count, int seed=0)
+ private static IEnumerable GenerateRandomDataPoints(int count,
+ int seed=0)
+
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
@@ -93,13 +105,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se
{
Label = label,
// Create random features that are correlated with the label.
- // For data points with false label, the feature values are slightly increased by adding a constant.
- Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray()
+ // For data points with false label, the feature values are
+ // slightly increased by adding a constant.
+ Features = Enumerable.Repeat(label, 50)
+ .Select(x => x ? randomFloat() : randomFloat() +
+ 0.1f).ToArray()
+
};
}
}
- // Example with label and 50 feature values. A data set is a collection of such examples.
+ // Example with label and 50 feature values. A data set is a collection of
+ // such examples.
private class DataPoint
{
public bool Label { get; set; }
@@ -122,11 +139,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics)
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
- Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
+ Console.WriteLine($"Negative Precision: " +
+ $"{metrics.NegativePrecision:F2}");
+
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
- Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
+ Console.WriteLine($"Positive Precision: " +
+ $"{metrics.PositivePrecision:F2}");
+
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
}
}
}
+
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/BinaryClassification.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/BinaryClassification.ttinclude
index 6bc5660d46..72dc7cc111 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/BinaryClassification.ttinclude
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/BinaryClassification.ttinclude
@@ -13,63 +13,79 @@ namespace Samples.Dynamic.Trainers.BinaryClassification
{<#=Comments#>
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+ // Convert the list of data points to an IDataView object, which is
+ // consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
<# if (CacheData) { #>
- // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times,
- // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory,
- // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms
+ // ML.NET doesn't cache data set by default. Therefore, if one reads a
+ // data set from a file and accesses it many times, it can be slow due
+ // to expensive featurization and disk operations. When the considered
+ // data can fit into memory, a solution is to cache the data in memory.
+ // Caching is especially helpful when working with iterative algorithms
// which needs many data passes.
trainingData = mlContext.Data.Cache(trainingData);
<# } #>
<# if (TrainerOptions == null) { #>
// Define the trainer.
- var pipeline = mlContext.BinaryClassification.Trainers.<#=Trainer#>();
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .<#=Trainer#>();
<# } else { #>
// Define trainer options.
var options = new <#=TrainerOptions#>;
// Define the trainer.
- var pipeline = mlContext.BinaryClassification.Trainers.<#=Trainer#>(options);
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .<#=Trainer#>(options);
<# } #>
// Train the model.
var model = pipeline.Fit(trainingData);
- // Create testing data. Use different random seed to make it different from training data.
- var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+ // Create testing data. Use different random seed to make it different
+ // from training data.
+ var testData = mlContext.Data
+ .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
- var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList();
+ var predictions = mlContext.Data
+ .CreateEnumerable(transformedTestData,
+ reuseRowObject: false).ToList();
// Print 5 predictions.
foreach (var p in predictions.Take(5))
- Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
+ Console.WriteLine($"Label: {p.Label}, "
+ + $"Prediction: {p.PredictedLabel}");
<#=ExpectedOutputPerInstance#>
- <# string Evaluator = IsCalibrated ? "Evaluate" : "EvaluateNonCalibrated"; #>
+ <# string Evaluator = IsCalibrated ? "Evaluate" :
+ "EvaluateNonCalibrated"; #>
// Evaluate the overall metrics.
- var metrics = mlContext.BinaryClassification.<#=Evaluator#>(transformedTestData);
+ var metrics = mlContext.BinaryClassification
+ .<#=Evaluator#>(transformedTestData);
+
PrintMetrics(metrics);
<#=ExpectedOutput#>
}
- private static IEnumerable GenerateRandomDataPoints(int count, int seed=0)
+ private static IEnumerable GenerateRandomDataPoints(int count,
+ int seed=0)
+
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
@@ -80,13 +96,18 @@ namespace Samples.Dynamic.Trainers.BinaryClassification
{
Label = label,
// Create random features that are correlated with the label.
- // For data points with false label, the feature values are slightly increased by adding a constant.
- Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + <#=DataSepValue#>).ToArray()
+ // For data points with false label, the feature values are
+ // slightly increased by adding a constant.
+ Features = Enumerable.Repeat(label, 50)
+ .Select(x => x ? randomFloat() : randomFloat() +
+ <#=DataSepValue#>).ToArray()
+
};
}
}
- // Example with label and 50 feature values. A data set is a collection of such examples.
+ // Example with label and 50 feature values. A data set is a collection of
+ // such examples.
private class DataPoint
{
public bool Label { get; set; }
@@ -109,11 +130,15 @@ namespace Samples.Dynamic.Trainers.BinaryClassification
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
- Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
+ Console.WriteLine($"Negative Precision: " +
+ $"{metrics.NegativePrecision:F2}");
+
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
- Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
+ Console.WriteLine($"Positive Precision: " +
+ $"{metrics.PositivePrecision:F2}");
+
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
}
}
-}
\ No newline at end of file
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/FixedPlatt.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/FixedPlatt.cs
index 1da43a7790..2b56ef63f0 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/FixedPlatt.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/FixedPlatt.cs
@@ -9,26 +9,36 @@ public static class FixedPlatt
{
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Download and featurize the dataset.
- var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
+ var data = Microsoft.ML.SamplesUtils.DatasetUtils
+ .LoadFeaturizedAdultDataset(mlContext);
+
// Leave out 10% of data for testing.
- var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3);
+ var trainTestData = mlContext.Data
+ .TrainTestSplit(data, testFraction: 0.3);
- // Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it.
- var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
+ // Create data training pipeline for non calibrated trainer and train
+ // Naive calibrator on top of it.
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .AveragedPerceptron();
- // Fit the pipeline, and get a transformer that knows how to score new data.
+ // Fit the pipeline, and get a transformer that knows how to score new
+ // data.
var transformer = pipeline.Fit(trainTestData.TrainSet);
// Fit this pipeline to the training data.
- // Let's score the new data. The score will give us a numerical estimation of the chance that the particular sample
- // bears positive sentiment. This estimate is relative to the numbers obtained.
+ // Let's score the new data. The score will give us a numerical
+ // estimation of the chance that the particular sample bears positive
+ // sentiment. This estimate is relative to the numbers obtained.
var scoredData = transformer.Transform(trainTestData.TestSet);
- var outScores = mlContext.Data.CreateEnumerable(scoredData, reuseRowObject: false);
+ var outScores = mlContext.Data
+ .CreateEnumerable(scoredData, reuseRowObject: false);
+
PrintScore(outScores, 5);
// Preview of scoredDataPreview.RowView
// Score 4.18144
@@ -37,16 +47,24 @@ public static void Example()
// Score -2.554229
// Score 5.36571
- // Let's train a calibrator estimator on this scored dataset. The trained calibrator estimator produces a transformer
- // that can transform the scored data by adding a new column names "Probability".
- var calibratorEstimator = mlContext.BinaryClassification.Calibrators.Platt(slope: -1f, offset: -0.05f);
+ // Let's train a calibrator estimator on this scored dataset. The
+ // trained calibrator estimator produces a transformer that can
+ // transform the scored data by adding a new column names "Probability".
+ var calibratorEstimator = mlContext.BinaryClassification.Calibrators
+ .Platt(slope: -1f, offset: -0.05f);
+
var calibratorTransformer = calibratorEstimator.Fit(scoredData);
- // Transform the scored data with a calibrator transfomer by adding a new column names "Probability".
- // This column is a calibrated version of the "Score" column, meaning its values are a valid probability value in the [0, 1] interval
- // representing the chance that the respective sample bears positive sentiment.
+ // Transform the scored data with a calibrator transfomer by adding a
+ // new column names "Probability". This column is a calibrated version
+ // of the "Score" column, meaning its values are a valid probability
+ // value in the [0, 1] interval representing the chance that the
+ // respective sample bears positive sentiment.
var finalData = calibratorTransformer.Transform(scoredData);
- var outScoresAndProbabilities = mlContext.Data.CreateEnumerable(finalData, reuseRowObject: false);
+ var outScoresAndProbabilities = mlContext.Data
+ .CreateEnumerable(finalData,
+ reuseRowObject: false);
+
PrintScoreAndProbability(outScoresAndProbabilities, 5);
// Score 4.18144 Probability 0.9856767
// Score -14.10248 Probability 7.890148E-07
@@ -61,10 +79,13 @@ private static void PrintScore(IEnumerable values, int numRows)
Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score);
}
- private static void PrintScoreAndProbability(IEnumerable values, int numRows)
+ private static void PrintScoreAndProbability(
+ IEnumerable values, int numRows)
+
{
foreach (var value in values.Take(numRows))
- Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score", value.Score, "Probability", value.Probability);
+ Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score",
+ value.Score, "Probability", value.Probability);
}
private class ScoreValue
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Isotonic.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Isotonic.cs
index 15a3162d4e..1b1b63139e 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Isotonic.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Isotonic.cs
@@ -9,26 +9,35 @@ public static class Isotonic
{
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Download and featurize the dataset.
- var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
+ var data = Microsoft.ML.SamplesUtils.DatasetUtils
+ .LoadFeaturizedAdultDataset(mlContext);
+
// Leave out 10% of data for testing.
- var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3);
+ var trainTestData = mlContext.Data
+ .TrainTestSplit(data, testFraction: 0.3);
- // Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it.
- var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
+ // Create data training pipeline for non calibrated trainer and train
+ // Naive calibrator on top of it.
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .AveragedPerceptron();
- // Fit the pipeline, and get a transformer that knows how to score new data.
+ // Fit the pipeline, and get a transformer that knows how to score new
+ // data.
var transformer = pipeline.Fit(trainTestData.TrainSet);
// Fit this pipeline to the training data.
- // Let's score the new data. The score will give us a numerical estimation of the chance that the particular sample
- // bears positive sentiment. This estimate is relative to the numbers obtained.
+ // Let's score the new data. The score will give us a numerical
+ // estimation of the chance that the particular sample bears positive
+ // sentiment. This estimate is relative to the numbers obtained.
var scoredData = transformer.Transform(trainTestData.TestSet);
- var outScores = mlContext.Data.CreateEnumerable(scoredData, reuseRowObject: false);
+ var outScores = mlContext.Data
+ .CreateEnumerable(scoredData, reuseRowObject: false);
PrintScore(outScores, 5);
// Preview of scoredDataPreview.RowView
@@ -38,16 +47,24 @@ public static void Example()
// Score -2.554229
// Score 5.36571
- // Let's train a calibrator estimator on this scored dataset. The trained calibrator estimator produces a transformer
- // that can transform the scored data by adding a new column names "Probability".
- var calibratorEstimator = mlContext.BinaryClassification.Calibrators.Isotonic();
+ // Let's train a calibrator estimator on this scored dataset. The
+ // trained calibrator estimator produces a transformer that can
+ // transform the scored data by adding a new column names "Probability".
+ var calibratorEstimator = mlContext.BinaryClassification.Calibrators
+ .Isotonic();
+
var calibratorTransformer = calibratorEstimator.Fit(scoredData);
- // Transform the scored data with a calibrator transfomer by adding a new column names "Probability".
- // This column is a calibrated version of the "Score" column, meaning its values are a valid probability value in the [0, 1] interval
- // representing the chance that the respective sample bears positive sentiment.
+ // Transform the scored data with a calibrator transfomer by adding a
+ // new column names "Probability". This column is a calibrated version
+ // of the "Score" column, meaning its values are a valid probability
+ // value in the [0, 1] interval representing the chance that the
+ // respective sample bears positive sentiment.
var finalData = calibratorTransformer.Transform(scoredData);
- var outScoresAndProbabilities = mlContext.Data.CreateEnumerable(finalData, reuseRowObject: false);
+ var outScoresAndProbabilities = mlContext.Data
+ .CreateEnumerable(finalData,
+ reuseRowObject: false);
+
PrintScoreAndProbability(outScoresAndProbabilities, 5);
// Score 4.18144 Probability 0.8
// Score -14.10248 Probability 1E-15
@@ -62,10 +79,14 @@ private static void PrintScore(IEnumerable values, int numRows)
Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score);
}
- private static void PrintScoreAndProbability(IEnumerable values, int numRows)
+ private static void PrintScoreAndProbability(
+ IEnumerable values, int numRows)
+
{
foreach (var value in values.Take(numRows))
- Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score", value.Score, "Probability", value.Probability);
+ Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score",
+ value.Score, "Probability", value.Probability);
+
}
private class ScoreValue
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Naive.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Naive.cs
index 84a004b1c5..054d4f9e31 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Naive.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Naive.cs
@@ -9,26 +9,36 @@ public static class Naive
{
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Download and featurize the dataset.
- var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
+ var data = Microsoft.ML.SamplesUtils.DatasetUtils
+ .LoadFeaturizedAdultDataset(mlContext);
+
// Leave out 10% of data for testing.
- var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3);
+ var trainTestData = mlContext.Data
+ .TrainTestSplit(data, testFraction: 0.3);
- // Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it.
- var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
+ // Create data training pipeline for non calibrated trainer and train
+ // Naive calibrator on top of it.
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .AveragedPerceptron();
- // Fit the pipeline, and get a transformer that knows how to score new data.
+ // Fit the pipeline, and get a transformer that knows how to score new
+ // data.
var transformer = pipeline.Fit(trainTestData.TrainSet);
// Fit this pipeline to the training data.
- // Let's score the new data. The score will give us a numerical estimation of the chance that the particular sample
- // bears positive sentiment. This estimate is relative to the numbers obtained.
+ // Let's score the new data. The score will give us a numerical
+ // estimation of the chance that the particular sample bears positive
+ // sentiment. This estimate is relative to the numbers obtained.
var scoredData = transformer.Transform(trainTestData.TestSet);
- var outScores = mlContext.Data.CreateEnumerable(scoredData, reuseRowObject: false);
+ var outScores = mlContext.Data
+ .CreateEnumerable(scoredData, reuseRowObject: false);
+
PrintScore(outScores, 5);
// Preview of scoredDataPreview.RowView
// Score 4.18144
@@ -37,16 +47,24 @@ public static void Example()
// Score -2.554229
// Score 5.36571
- // Let's train a calibrator estimator on this scored dataset. The trained calibrator estimator produces a transformer
- // that can transform the scored data by adding a new column names "Probability".
- var calibratorEstimator = mlContext.BinaryClassification.Calibrators.Naive();
+ // Let's train a calibrator estimator on this scored dataset. The
+ // trained calibrator estimator produces a transformer that can
+ // transform the scored data by adding a new column names "Probability".
+ var calibratorEstimator = mlContext.BinaryClassification.Calibrators
+ .Naive();
+
var calibratorTransformer = calibratorEstimator.Fit(scoredData);
- // Transform the scored data with a calibrator transfomer by adding a new column names "Probability".
- // This column is a calibrated version of the "Score" column, meaning its values are a valid probability value in the [0, 1] interval
- // representing the chance that the respective sample bears positive sentiment.
+ // Transform the scored data with a calibrator transfomer by adding a
+ // new column names "Probability". This column is a calibrated version
+ // of the "Score" column, meaning its values are a valid probability
+ // value in the [0, 1] interval representing the chance that the
+ // respective sample bears positive sentiment.
var finalData = calibratorTransformer.Transform(scoredData);
- var outScoresAndProbabilities = mlContext.Data.CreateEnumerable(finalData, reuseRowObject: false);
+ var outScoresAndProbabilities = mlContext.Data
+ .CreateEnumerable(finalData,
+ reuseRowObject: false);
+
PrintScoreAndProbability(outScoresAndProbabilities, 5);
// Score 4.18144 Probability 0.775
// Score -14.10248 Probability 0.01923077
@@ -61,10 +79,14 @@ private static void PrintScore(IEnumerable values, int numRows)
Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score);
}
- private static void PrintScoreAndProbability(IEnumerable values, int numRows)
+ private static void PrintScoreAndProbability(
+ IEnumerable values, int numRows)
+
{
foreach (var value in values.Take(numRows))
- Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score", value.Score, "Probability", value.Probability);
+ Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score",
+ value.Score, "Probability", value.Probability);
+
}
private class ScoreValue
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Platt.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Platt.cs
index aa0d7d0798..709db362e6 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Platt.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Platt.cs
@@ -9,26 +9,36 @@ public static class Platt
{
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Download and featurize the dataset.
- var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
+ var data = Microsoft.ML.SamplesUtils.DatasetUtils
+ .LoadFeaturizedAdultDataset(mlContext);
+
// Leave out 10% of data for testing.
- var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3);
+ var trainTestData = mlContext.Data
+ .TrainTestSplit(data, testFraction: 0.3);
- // Create data training pipeline for non calibrated trainer and train Naive calibrator on top of it.
- var pipeline = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
+ // Create data training pipeline for non calibrated trainer and train
+ // Naive calibrator on top of it.
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .AveragedPerceptron();
- // Fit the pipeline, and get a transformer that knows how to score new data.
+ // Fit the pipeline, and get a transformer that knows how to score new
+ // data.
var transformer = pipeline.Fit(trainTestData.TrainSet);
// Fit this pipeline to the training data.
- // Let's score the new data. The score will give us a numerical estimation of the chance that the particular sample
- // bears positive sentiment. This estimate is relative to the numbers obtained.
+ // Let's score the new data. The score will give us a numerical
+ // estimation of the chance that the particular sample bears positive
+ // sentiment. This estimate is relative to the numbers obtained.
var scoredData = transformer.Transform(trainTestData.TestSet);
- var outScores = mlContext.Data.CreateEnumerable(scoredData, reuseRowObject: false);
+ var outScores = mlContext.Data
+ .CreateEnumerable(scoredData, reuseRowObject: false);
+
PrintScore(outScores, 5);
// Preview of scoredDataPreview.RowView
// Score 4.18144
@@ -37,16 +47,24 @@ public static void Example()
// Score -2.554229
// Score 5.36571
- // Let's train a calibrator estimator on this scored dataset. The trained calibrator estimator produces a transformer
- // that can transform the scored data by adding a new column names "Probability".
- var calibratorEstimator = mlContext.BinaryClassification.Calibrators.Platt();
+ // Let's train a calibrator estimator on this scored dataset. The
+ // trained calibrator estimator produces a transformer that can
+ // transform the scored data by adding a new column names "Probability".
+ var calibratorEstimator = mlContext.BinaryClassification.Calibrators
+ .Platt();
+
var calibratorTransformer = calibratorEstimator.Fit(scoredData);
- // Transform the scored data with a calibrator transfomer by adding a new column names "Probability".
- // This column is a calibrated version of the "Score" column, meaning its values are a valid probability value in the [0, 1] interval
- // representing the chance that the respective sample bears positive sentiment.
+ // Transform the scored data with a calibrator transfomer by adding a
+ // new column names "Probability". This column is a calibrated version
+ // of the "Score" column, meaning its values are a valid probability
+ // value in the [0, 1] interval representing the chance that the
+ // respective sample bears positive sentiment.
var finalData = calibratorTransformer.Transform(scoredData);
- var outScoresAndProbabilities = mlContext.Data.CreateEnumerable(finalData, reuseRowObject: false);
+ var outScoresAndProbabilities = mlContext.Data
+ .CreateEnumerable(finalData,
+ reuseRowObject: false);
+
PrintScoreAndProbability(outScoresAndProbabilities, 5);
// Score 4.18144 Probability 0.8511352
// Score -14.10248 Probability 0.001633563
@@ -61,10 +79,14 @@ private static void PrintScore(IEnumerable values, int numRows)
Console.WriteLine("{0, -10} {1, -10}", "Score", value.Score);
}
- private static void PrintScoreAndProbability(IEnumerable values, int numRows)
+ private static void PrintScoreAndProbability(
+ IEnumerable values, int numRows)
+
{
foreach (var value in values.Take(numRows))
- Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score", value.Score, "Probability", value.Probability);
+ Console.WriteLine("{0, -10} {1, -10} {2, -10} {3, -10}", "Score",
+ value.Score, "Probability", value.Probability);
+
}
private class ScoreValue
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FactorizationMachine.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FactorizationMachine.cs
index afc9231814..4e8bec4f4e 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FactorizationMachine.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FactorizationMachine.cs
@@ -10,41 +10,51 @@ public static class FactorizationMachine
{
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+ // Convert the list of data points to an IDataView object, which is
+ // consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
- // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times,
- // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory,
- // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms
+ // ML.NET doesn't cache data set by default. Therefore, if one reads a
+ // data set from a file and accesses it many times, it can be slow due
+ // to expensive featurization and disk operations. When the considered
+ // data can fit into memory, a solution is to cache the data in memory.
+ // Caching is especially helpful when working with iterative algorithms
// which needs many data passes.
trainingData = mlContext.Data.Cache(trainingData);
// Define the trainer.
- var pipeline = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine();
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .FieldAwareFactorizationMachine();
// Train the model.
var model = pipeline.Fit(trainingData);
- // Create testing data. Use different random seed to make it different from training data.
- var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+ // Create testing data. Use different random seed to make it different
+ // from training data.
+ var testData = mlContext.Data
+ .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
- var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList();
+ var predictions = mlContext.Data
+ .CreateEnumerable(transformedTestData,
+ reuseRowObject: false).ToList();
// Print 5 predictions.
foreach (var p in predictions.Take(5))
- Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
+ Console.WriteLine($"Label: {p.Label}, "
+ + $"Prediction: {p.PredictedLabel}");
// Expected output:
// Label: True, Prediction: False
@@ -54,7 +64,9 @@ public static void Example()
// Label: False, Prediction: False
// Evaluate the overall metrics.
- var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData);
+ var metrics = mlContext.BinaryClassification
+ .Evaluate(transformedTestData);
+
PrintMetrics(metrics);
// Expected output:
@@ -77,7 +89,9 @@ public static void Example()
// Precision || 0.9063 | 0.8732 |
}
- private static IEnumerable GenerateRandomDataPoints(int count, int seed=0)
+ private static IEnumerable GenerateRandomDataPoints(int count,
+ int seed=0)
+
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
@@ -88,13 +102,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se
{
Label = label,
// Create random features that are correlated with the label.
- // For data points with false label, the feature values are slightly increased by adding a constant.
- Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray()
+ // For data points with false label, the feature values are
+ // slightly increased by adding a constant.
+ Features = Enumerable.Repeat(label, 50)
+ .Select(x => x ? randomFloat() : randomFloat() +
+ 0.1f).ToArray()
+
};
}
}
- // Example with label and 50 feature values. A data set is a collection of such examples.
+ // Example with label and 50 feature values. A data set is a collection of
+ // such examples.
private class DataPoint
{
public bool Label { get; set; }
@@ -117,11 +136,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics)
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
- Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
+ Console.WriteLine($"Negative Precision: " +
+ $"{metrics.NegativePrecision:F2}");
+
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
- Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
+ Console.WriteLine($"Positive Precision: " +
+ $"{metrics.PositivePrecision:F2}");
+
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
}
}
}
+
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForest.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForest.cs
index b415ae5dc9..f33feb7063 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForest.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForest.cs
@@ -8,39 +8,48 @@ namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class FastForest
{
- // This example requires installation of additional NuGet package
- // Microsoft.ML.FastTree.
+ // This example requires installation of additional NuGet package for
+ // Microsoft.ML.FastTree at
+ // https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+ // Convert the list of data points to an IDataView object, which is
+ // consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define the trainer.
- var pipeline = mlContext.BinaryClassification.Trainers.FastForest();
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .FastForest();
// Train the model.
var model = pipeline.Fit(trainingData);
- // Create testing data. Use different random seed to make it different from training data.
- var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+ // Create testing data. Use different random seed to make it different
+ // from training data.
+ var testData = mlContext.Data
+ .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
- var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList();
+ var predictions = mlContext.Data
+ .CreateEnumerable(transformedTestData,
+ reuseRowObject: false).ToList();
// Print 5 predictions.
foreach (var p in predictions.Take(5))
- Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
+ Console.WriteLine($"Label: {p.Label}, "
+ + $"Prediction: {p.PredictedLabel}");
// Expected output:
// Label: True, Prediction: True
@@ -50,7 +59,9 @@ public static void Example()
// Label: False, Prediction: False
// Evaluate the overall metrics.
- var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData);
+ var metrics = mlContext.BinaryClassification
+ .EvaluateNonCalibrated(transformedTestData);
+
PrintMetrics(metrics);
// Expected output:
@@ -73,7 +84,9 @@ public static void Example()
// Precision || 0.6182 | 0.5416 |
}
- private static IEnumerable GenerateRandomDataPoints(int count, int seed=0)
+ private static IEnumerable GenerateRandomDataPoints(int count,
+ int seed=0)
+
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
@@ -84,13 +97,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se
{
Label = label,
// Create random features that are correlated with the label.
- // For data points with false label, the feature values are slightly increased by adding a constant.
- Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray()
+ // For data points with false label, the feature values are
+ // slightly increased by adding a constant.
+ Features = Enumerable.Repeat(label, 50)
+ .Select(x => x ? randomFloat() : randomFloat() +
+ 0.03f).ToArray()
+
};
}
}
- // Example with label and 50 feature values. A data set is a collection of such examples.
+ // Example with label and 50 feature values. A data set is a collection of
+ // such examples.
private class DataPoint
{
public bool Label { get; set; }
@@ -113,12 +131,17 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics)
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
- Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
+ Console.WriteLine($"Negative Precision: " +
+ $"{metrics.NegativePrecision:F2}");
+
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
- Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
+ Console.WriteLine($"Positive Precision: " +
+ $"{metrics.PositivePrecision:F2}");
+
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
}
}
}
+
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForestWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForestWithOptions.cs
index 372be0f1fa..c5bf4d5366 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForestWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForestWithOptions.cs
@@ -9,19 +9,22 @@ namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class FastForestWithOptions
{
- // This example requires installation of additional NuGet package
- // Microsoft.ML.FastTree.
+ // This example requires installation of additional NuGet package for
+ // Microsoft.ML.FastTree at
+ // https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+ // Convert the list of data points to an IDataView object, which is
+ // consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define trainer options.
@@ -36,23 +39,29 @@ public static void Example()
};
// Define the trainer.
- var pipeline = mlContext.BinaryClassification.Trainers.FastForest(options);
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .FastForest(options);
// Train the model.
var model = pipeline.Fit(trainingData);
- // Create testing data. Use different random seed to make it different from training data.
- var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+ // Create testing data. Use different random seed to make it different
+ // from training data.
+ var testData = mlContext.Data
+ .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
- var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList();
+ var predictions = mlContext.Data
+ .CreateEnumerable(transformedTestData,
+ reuseRowObject: false).ToList();
// Print 5 predictions.
foreach (var p in predictions.Take(5))
- Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
+ Console.WriteLine($"Label: {p.Label}, "
+ + $"Prediction: {p.PredictedLabel}");
// Expected output:
// Label: True, Prediction: True
@@ -62,7 +71,9 @@ public static void Example()
// Label: False, Prediction: True
// Evaluate the overall metrics.
- var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData);
+ var metrics = mlContext.BinaryClassification
+ .EvaluateNonCalibrated(transformedTestData);
+
PrintMetrics(metrics);
// Expected output:
@@ -85,7 +96,9 @@ public static void Example()
// Precision || 0.7072 | 0.7806 |
}
- private static IEnumerable GenerateRandomDataPoints(int count, int seed=0)
+ private static IEnumerable GenerateRandomDataPoints(int count,
+ int seed=0)
+
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
@@ -96,13 +109,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se
{
Label = label,
// Create random features that are correlated with the label.
- // For data points with false label, the feature values are slightly increased by adding a constant.
- Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray()
+ // For data points with false label, the feature values are
+ // slightly increased by adding a constant.
+ Features = Enumerable.Repeat(label, 50)
+ .Select(x => x ? randomFloat() : randomFloat() +
+ 0.03f).ToArray()
+
};
}
}
- // Example with label and 50 feature values. A data set is a collection of such examples.
+ // Example with label and 50 feature values. A data set is a collection of
+ // such examples.
private class DataPoint
{
public bool Label { get; set; }
@@ -125,12 +143,17 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics)
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
- Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
+ Console.WriteLine($"Negative Precision: " +
+ $"{metrics.NegativePrecision:F2}");
+
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
- Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
+ Console.WriteLine($"Positive Precision: " +
+ $"{metrics.PositivePrecision:F2}");
+
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
}
}
}
+
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTree.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTree.cs
index d260ec8666..f50b8f9732 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTree.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTree.cs
@@ -8,39 +8,48 @@ namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class FastTree
{
- // This example requires installation of additional NuGet package
- // Microsoft.ML.FastTree.
+ // This example requires installation of additional NuGet package for
+ // Microsoft.ML.FastTree at
+ // https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+ // Convert the list of data points to an IDataView object, which is
+ // consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define the trainer.
- var pipeline = mlContext.BinaryClassification.Trainers.FastTree();
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .FastTree();
// Train the model.
var model = pipeline.Fit(trainingData);
- // Create testing data. Use different random seed to make it different from training data.
- var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+ // Create testing data. Use different random seed to make it different
+ // from training data.
+ var testData = mlContext.Data
+ .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
- var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList();
+ var predictions = mlContext.Data
+ .CreateEnumerable(transformedTestData,
+ reuseRowObject: false).ToList();
// Print 5 predictions.
foreach (var p in predictions.Take(5))
- Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
+ Console.WriteLine($"Label: {p.Label}, "
+ + $"Prediction: {p.PredictedLabel}");
// Expected output:
// Label: True, Prediction: True
@@ -50,7 +59,9 @@ public static void Example()
// Label: False, Prediction: False
// Evaluate the overall metrics.
- var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData);
+ var metrics = mlContext.BinaryClassification
+ .Evaluate(transformedTestData);
+
PrintMetrics(metrics);
// Expected output:
@@ -76,7 +87,9 @@ public static void Example()
// Precision || 0.6903 | 0.7716 |
}
- private static IEnumerable GenerateRandomDataPoints(int count, int seed=0)
+ private static IEnumerable GenerateRandomDataPoints(int count,
+ int seed=0)
+
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
@@ -87,13 +100,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se
{
Label = label,
// Create random features that are correlated with the label.
- // For data points with false label, the feature values are slightly increased by adding a constant.
- Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray()
+ // For data points with false label, the feature values are
+ // slightly increased by adding a constant.
+ Features = Enumerable.Repeat(label, 50)
+ .Select(x => x ? randomFloat() : randomFloat() +
+ 0.03f).ToArray()
+
};
}
}
- // Example with label and 50 feature values. A data set is a collection of such examples.
+ // Example with label and 50 feature values. A data set is a collection of
+ // such examples.
private class DataPoint
{
public bool Label { get; set; }
@@ -116,12 +134,17 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics)
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
- Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
+ Console.WriteLine($"Negative Precision: " +
+ $"{metrics.NegativePrecision:F2}");
+
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
- Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
+ Console.WriteLine($"Positive Precision: " +
+ $"{metrics.PositivePrecision:F2}");
+
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
}
}
}
+
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTreeWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTreeWithOptions.cs
index b02e9b977b..87e894d903 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTreeWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTreeWithOptions.cs
@@ -9,19 +9,22 @@ namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class FastTreeWithOptions
{
- // This example requires installation of additional NuGet package
- // Microsoft.ML.FastTree.
+ // This example requires installation of additional NuGet package for
+ // Microsoft.ML.FastTree at
+ // https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+ // Convert the list of data points to an IDataView object, which is
+ // consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define trainer options.
@@ -36,23 +39,29 @@ public static void Example()
};
// Define the trainer.
- var pipeline = mlContext.BinaryClassification.Trainers.FastTree(options);
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .FastTree(options);
// Train the model.
var model = pipeline.Fit(trainingData);
- // Create testing data. Use different random seed to make it different from training data.
- var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+ // Create testing data. Use different random seed to make it different
+ // from training data.
+ var testData = mlContext.Data
+ .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
- var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList();
+ var predictions = mlContext.Data
+ .CreateEnumerable(transformedTestData,
+ reuseRowObject: false).ToList();
// Print 5 predictions.
foreach (var p in predictions.Take(5))
- Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
+ Console.WriteLine($"Label: {p.Label}, "
+ + $"Prediction: {p.PredictedLabel}");
// Expected output:
// Label: True, Prediction: True
@@ -62,7 +71,9 @@ public static void Example()
// Label: False, Prediction: False
// Evaluate the overall metrics.
- var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData);
+ var metrics = mlContext.BinaryClassification
+ .Evaluate(transformedTestData);
+
PrintMetrics(metrics);
// Expected output:
@@ -88,7 +99,9 @@ public static void Example()
// Precision || 0.6903 | 0.7716 |
}
- private static IEnumerable GenerateRandomDataPoints(int count, int seed=0)
+ private static IEnumerable GenerateRandomDataPoints(int count,
+ int seed=0)
+
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
@@ -99,13 +112,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se
{
Label = label,
// Create random features that are correlated with the label.
- // For data points with false label, the feature values are slightly increased by adding a constant.
- Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray()
+ // For data points with false label, the feature values are
+ // slightly increased by adding a constant.
+ Features = Enumerable.Repeat(label, 50)
+ .Select(x => x ? randomFloat() : randomFloat() +
+ 0.03f).ToArray()
+
};
}
}
- // Example with label and 50 feature values. A data set is a collection of such examples.
+ // Example with label and 50 feature values. A data set is a collection of
+ // such examples.
private class DataPoint
{
public bool Label { get; set; }
@@ -128,12 +146,17 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics)
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
- Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
+ Console.WriteLine($"Negative Precision: " +
+ $"{metrics.NegativePrecision:F2}");
+
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
- Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
+ Console.WriteLine($"Positive Precision: " +
+ $"{metrics.PositivePrecision:F2}");
+
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
}
}
}
+
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs
index bc2eaef343..bddc926ccf 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs
@@ -8,28 +8,36 @@ namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class FieldAwareFactorizationMachine
{
- // This example first train a field-aware factorization to binary classification, measure the trained model's quality, and finally
+ // This example first train a field-aware factorization to binary
+ // classification, measure the trained model's quality, and finally
// use the trained model to make prediction.
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
IEnumerable data = GenerateRandomDataPoints(500);
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+ // Convert the list of data points to an IDataView object, which is
+ // consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(data);
// Define the trainer.
- // This trainer trains field-aware factorization (FFM) for binary classification. See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf
- // for the theory behind and https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the training
- // algorithm implemented in ML.NET.
- var pipeline = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(
+ // This trainer trains field-aware factorization (FFM)
+ // for binary classification.
+ // See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf for the theory
+ // behind and
+ // https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the
+ // training algorithm implemented in ML.NET.
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .FieldAwareFactorizationMachine(
// Specify three feature columns!
- new[] {nameof(DataPoint.Field0), nameof(DataPoint.Field1), nameof(DataPoint.Field2) },
+ new[] {nameof(DataPoint.Field0), nameof(DataPoint.Field1),
+ nameof(DataPoint.Field2) },
// Specify binary label's column name.
nameof(DataPoint.Label) );
@@ -40,7 +48,8 @@ public static void Example()
var transformedTrainingData = model.Transform(trainingData);
// Measure the quality of the trained model.
- var metrics = mlContext.BinaryClassification.Evaluate(transformedTrainingData);
+ var metrics = mlContext.BinaryClassification
+ .Evaluate(transformedTrainingData);
// Show the quality metrics.
PrintMetrics(metrics);
@@ -68,14 +77,19 @@ public static void Example()
// Precision || 0.7878 | 0.8235 |
// Create prediction function from the trained model.
- var engine = mlContext.Model.CreatePredictionEngine(model);
+ var engine = mlContext.Model
+ .CreatePredictionEngine(model);
// Make some predictions.
foreach(var dataPoint in data.Take(5))
{
var result = engine.Predict(dataPoint);
- Console.WriteLine($"Actual label: {dataPoint.Label}, predicted label: {result.PredictedLabel}, " +
- $"score of being positive class: {result.Score}, and probability of beling positive class: {result.Probability}.");
+ Console.WriteLine($"Actual label: {dataPoint.Label}, "
+ + $"predicted label: {result.PredictedLabel}, "
+ + $"score of being positive class: {result.Score}, "
+ + $"and probability of beling positive class: "
+ + $"{result.Probability}.");
+
}
// Expected output:
@@ -95,7 +109,8 @@ private class DataPoint
// Label.
public bool Label { get; set; }
- // Features from the first field. Note that different fields can have different numbers of features.
+ // Features from the first field. Note that different fields can have
+ // different numbers of features.
[VectorType(featureLength)]
public float[] Field0 { get; set; }
@@ -108,8 +123,8 @@ private class DataPoint
public float[] Field2 { get; set; }
}
- // This class defines objects produced by trained model. The trained model maps
- // a DataPoint to a Result.
+ // This class defines objects produced by trained model. The trained model
+ // maps a DataPoint to a Result.
public class Result
{
// Label.
@@ -123,13 +138,16 @@ public class Result
}
// Function used to create toy data sets.
- private static IEnumerable GenerateRandomDataPoints(int exampleCount, int seed = 0)
+ private static IEnumerable GenerateRandomDataPoints(
+ int exampleCount, int seed = 0)
+
{
var rnd = new Random(seed);
var data = new List();
for (int i = 0; i < exampleCount; ++i)
{
- // Initialize an example with a random label and an empty feature vector.
+ // Initialize an example with a random label and an empty feature
+ // vector.
var sample = new DataPoint()
{
Label = rnd.Next() % 2 == 0,
@@ -139,9 +157,10 @@ private static IEnumerable GenerateRandomDataPoints(int exampleCount,
};
// Fill feature vectors according the assigned label.
- // Notice that features from different fields have different biases and therefore different distributions.
- // In practices such as game recommendation, one may use one field to store features from user profile and
- // another field to store features from game profile.
+ // Notice that features from different fields have different biases
+ // and therefore different distributions. In practices such as game
+ // recommendation, one may use one field to store features from user
+ // profile and another field to store features from game profile.
for (int j = 0; j < featureLength; ++j)
{
var value0 = (float)rnd.NextDouble();
@@ -169,14 +188,20 @@ private static IEnumerable GenerateRandomDataPoints(int exampleCount,
}
// Function used to show evaluation metrics such as accuracy of predictions.
- private static void PrintMetrics(CalibratedBinaryClassificationMetrics metrics)
+ private static void PrintMetrics(
+ CalibratedBinaryClassificationMetrics metrics)
+
{
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
- Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
+ Console.WriteLine($"Negative Precision: " +
+ $"{metrics.NegativePrecision:F2}");
+
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
- Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
+ Console.WriteLine($"Positive Precision: " +
+ $"{metrics.PositivePrecision:F2}");
+
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}");
Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}");
Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}");
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.tt
index 3f41aa671f..22cdd45721 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.tt
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.tt
@@ -3,19 +3,24 @@
string ClassName="FieldAwareFactorizationMachine";
string Trainer = @"FieldAwareFactorizationMachine(
// Specify three feature columns!
- new[] {nameof(DataPoint.Field0), nameof(DataPoint.Field1), nameof(DataPoint.Field2) },
+ new[] {nameof(DataPoint.Field0), nameof(DataPoint.Field1),
+ nameof(DataPoint.Field2) },
// Specify binary label's column name.
nameof(DataPoint.Label) )";
string OptionsInclude = null;
string Comments = @"
- // This example first train a field-aware factorization to binary classification, measure the trained model's quality, and finally
+ // This example first train a field-aware factorization to binary
+ // classification, measure the trained model's quality, and finally
// use the trained model to make prediction.";
-string TrainerDescription = @"// This trainer trains field-aware factorization (FFM) for binary classification. See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf
- // for the theory behind and https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the training
- // algorithm implemented in ML.NET.";
+string TrainerDescription = @"// This trainer trains field-aware factorization (FFM)
+ // for binary classification.
+ // See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf for the theory
+ // behind and
+ // https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the
+ // training algorithm implemented in ML.NET.";
string TrainerOptions = null;
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.cs
index 47e53dfdc3..08b48a9e74 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.cs
@@ -9,26 +9,31 @@ namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class FieldAwareFactorizationMachineWithOptions
{
- // This example first train a field-aware factorization to binary classification, measure the trained model's quality, and finally
+ // This example first train a field-aware factorization to binary
+ // classification, measure the trained model's quality, and finally
// use the trained model to make prediction.
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
IEnumerable data = GenerateRandomDataPoints(500);
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+ // Convert the list of data points to an IDataView object, which is
+ // consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(data);
// Define trainer options.
var options = new FieldAwareFactorizationMachineTrainer.Options
{
FeatureColumnName = nameof(DataPoint.Field0),
- ExtraFeatureColumns = new[] { nameof(DataPoint.Field1), nameof(DataPoint.Field2) },
+ ExtraFeatureColumns =
+ new[] { nameof(DataPoint.Field1), nameof(DataPoint.Field2) },
+
LabelColumnName = nameof(DataPoint.Label),
LambdaLatent = 0.01f,
LambdaLinear = 0.001f,
@@ -38,10 +43,14 @@ public static void Example()
};
// Define the trainer.
- // This trainer trains field-aware factorization (FFM) for binary classification. See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf
- // for the theory behind and https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the training
- // algorithm implemented in ML.NET.
- var pipeline = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(options);
+ // This trainer trains field-aware factorization (FFM)
+ // for binary classification.
+ // See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf for the theory
+ // behind and
+ // https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the
+ // training algorithm implemented in ML.NET.
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .FieldAwareFactorizationMachine(options);
// Train the model.
var model = pipeline.Fit(trainingData);
@@ -50,7 +59,8 @@ public static void Example()
var transformedTrainingData = model.Transform(trainingData);
// Measure the quality of the trained model.
- var metrics = mlContext.BinaryClassification.Evaluate(transformedTrainingData);
+ var metrics = mlContext.BinaryClassification
+ .Evaluate(transformedTrainingData);
// Show the quality metrics.
PrintMetrics(metrics);
@@ -78,14 +88,19 @@ public static void Example()
// Precision || 0.7425 | 0.8319 |
// Create prediction function from the trained model.
- var engine = mlContext.Model.CreatePredictionEngine(model);
+ var engine = mlContext.Model
+ .CreatePredictionEngine(model);
// Make some predictions.
foreach(var dataPoint in data.Take(5))
{
var result = engine.Predict(dataPoint);
- Console.WriteLine($"Actual label: {dataPoint.Label}, predicted label: {result.PredictedLabel}, " +
- $"score of being positive class: {result.Score}, and probability of beling positive class: {result.Probability}.");
+ Console.WriteLine($"Actual label: {dataPoint.Label}, "
+ + $"predicted label: {result.PredictedLabel}, "
+ + $"score of being positive class: {result.Score}, "
+ + $"and probability of beling positive class: "
+ + $"{result.Probability}.");
+
}
// Expected output:
@@ -105,7 +120,8 @@ private class DataPoint
// Label.
public bool Label { get; set; }
- // Features from the first field. Note that different fields can have different numbers of features.
+ // Features from the first field. Note that different fields can have
+ // different numbers of features.
[VectorType(featureLength)]
public float[] Field0 { get; set; }
@@ -118,8 +134,8 @@ private class DataPoint
public float[] Field2 { get; set; }
}
- // This class defines objects produced by trained model. The trained model maps
- // a DataPoint to a Result.
+ // This class defines objects produced by trained model. The trained model
+ // maps a DataPoint to a Result.
public class Result
{
// Label.
@@ -133,13 +149,16 @@ public class Result
}
// Function used to create toy data sets.
- private static IEnumerable GenerateRandomDataPoints(int exampleCount, int seed = 0)
+ private static IEnumerable GenerateRandomDataPoints(
+ int exampleCount, int seed = 0)
+
{
var rnd = new Random(seed);
var data = new List();
for (int i = 0; i < exampleCount; ++i)
{
- // Initialize an example with a random label and an empty feature vector.
+ // Initialize an example with a random label and an empty feature
+ // vector.
var sample = new DataPoint()
{
Label = rnd.Next() % 2 == 0,
@@ -149,9 +168,10 @@ private static IEnumerable GenerateRandomDataPoints(int exampleCount,
};
// Fill feature vectors according the assigned label.
- // Notice that features from different fields have different biases and therefore different distributions.
- // In practices such as game recommendation, one may use one field to store features from user profile and
- // another field to store features from game profile.
+ // Notice that features from different fields have different biases
+ // and therefore different distributions. In practices such as game
+ // recommendation, one may use one field to store features from user
+ // profile and another field to store features from game profile.
for (int j = 0; j < featureLength; ++j)
{
var value0 = (float)rnd.NextDouble();
@@ -179,14 +199,20 @@ private static IEnumerable GenerateRandomDataPoints(int exampleCount,
}
// Function used to show evaluation metrics such as accuracy of predictions.
- private static void PrintMetrics(CalibratedBinaryClassificationMetrics metrics)
+ private static void PrintMetrics(
+ CalibratedBinaryClassificationMetrics metrics)
+
{
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
- Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
+ Console.WriteLine($"Negative Precision: " +
+ $"{metrics.NegativePrecision:F2}");
+
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
- Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
+ Console.WriteLine($"Positive Precision: " +
+ $"{metrics.PositivePrecision:F2}");
+
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}");
Console.WriteLine($"Log Loss: {metrics.LogLoss:F2}");
Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction:F2}");
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.tt
index c3de9d1882..0c7e32048d 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.tt
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithOptions.tt
@@ -6,17 +6,23 @@ string Trainer = "FieldAwareFactorizationMachine";
string OptionsInclude = @"using Microsoft.ML.Trainers;";
string Comments = @"
- // This example first train a field-aware factorization to binary classification, measure the trained model's quality, and finally
+ // This example first train a field-aware factorization to binary
+ // classification, measure the trained model's quality, and finally
// use the trained model to make prediction.";
-string TrainerDescription = @"// This trainer trains field-aware factorization (FFM) for binary classification. See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf
- // for the theory behind and https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the training
- // algorithm implemented in ML.NET.";
+string TrainerDescription = @"// This trainer trains field-aware factorization (FFM)
+ // for binary classification.
+ // See https://www.csie.ntu.edu.tw/~cjlin/papers/ffm.pdf for the theory
+ // behind and
+ // https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf for the
+ // training algorithm implemented in ML.NET.";
string TrainerOptions = @"FieldAwareFactorizationMachineTrainer.Options
{
FeatureColumnName = nameof(DataPoint.Field0),
- ExtraFeatureColumns = new[] { nameof(DataPoint.Field1), nameof(DataPoint.Field2) },
+ ExtraFeatureColumns =
+ new[] { nameof(DataPoint.Field1), nameof(DataPoint.Field2) },
+
LabelColumnName = nameof(DataPoint.Label),
LambdaLatent = 0.01f,
LambdaLinear = 0.001f,
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
index 3b9c36644f..d4ce855e3c 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Gam.cs
@@ -7,12 +7,14 @@ namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class Gam
{
- // This example requires installation of additional NuGet package
- // Microsoft.ML.FastTree.
+ // This example requires installation of additional NuGet package for
+ // Microsoft.ML.FastTree at
+ // https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness.
var mlContext = new MLContext();
// Create the dataset.
@@ -27,30 +29,36 @@ public static void Example()
var validSet = dataSets.TestSet;
// Create a GAM trainer.
- // Use a small number of bins for this example. The setting below means for each feature,
- // we divide its range into 16 discrete regions for the training process. Note that these
- // regions are not evenly spaced, and that the final model may contain fewer bins, as
- // neighboring bins with identical values will be combined. In general, we recommend using
- // at least the default number of bins, as a small number of bins limits the capacity of
- // the model.
- var trainer = mlContext.BinaryClassification.Trainers.Gam(maximumBinCountPerFeature: 16);
-
- // Fit the model using both of training and validation sets. GAM can use a technique called
- // pruning to tune the model to the validation set after training to improve generalization.
+ // Use a small number of bins for this example. The setting below means
+ // for each feature, we divide its range into 16 discrete regions for
+ // the training process. Note that these regions are not evenly spaced,
+ // and that the final model may contain fewer bins, as neighboring bins
+ // with identical values will be combined. In general, we recommend
+ // using at least the default number of bins, as a small number of bins
+ // limits the capacity of the model.
+ var trainer = mlContext.BinaryClassification.Trainers
+ .Gam(maximumBinCountPerFeature: 16);
+
+ // Fit the model using both of training and validation sets. GAM can use
+ // a technique called pruning to tune the model to the validation set
+ // after training to improve generalization.
var model = trainer.Fit(trainSet, validSet);
// Extract the model parameters.
var gam = model.Model.SubModel;
- // Now we can inspect the parameters of the Generalized Additive Model to understand the fit
- // and potentially learn about our dataset.
- // First, we will look at the bias; the bias represents the average prediction for the training data.
+ // Now we can inspect the parameters of the Generalized Additive Model
+ // to understand the fit and potentially learn about our dataset. First,
+ // we will look at the bias; the bias represents the average prediction
+ // for the training data.
Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
- // Now look at the shape functions that the model has learned. Similar to a linear model, we have
- // one response per feature, and they are independent. Unlike a linear model, this response is a
- // generic function instead of a line. Because we have included a bias term, each feature response
- // represents the deviation from the average prediction as a function of the feature value.
+ // Now look at the shape functions that the model has learned. Similar
+ // to a linear model, we have one response per feature, and they are
+ // independent. Unlike a linear model, this response is a generic
+ // function instead of a line. Because we have included a bias term,
+ // each feature response represents the deviation from the average
+ // prediction as a function of the feature value.
for (int i = 0; i < gam.NumberOfShapeFunctions; i++)
{
// Break a line.
@@ -62,11 +70,14 @@ public static void Example()
// Get the bin effects; these are the function values for each bin.
var binEffects = gam.GetBinEffects(i);
- // Now, write the function to the console. The function is a set of bins, and the corresponding
- // function values. You can think of GAMs as building a bar-chart or lookup table for each feature.
+ // Now, write the function to the console. The function is a set of
+ // bins, and the corresponding function values. You can think of
+ // GAMs as building a bar-chart or lookup table for each feature.
Console.WriteLine($"Feature{i}");
for (int j = 0; j < binUpperBounds.Count; j++)
- Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}");
+ Console.WriteLine(
+ $"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}");
+
}
// Expected output:
@@ -91,18 +102,23 @@ public static void Example()
// x < 0.31 => -0.138
// x < ∞ => -0.188
- // Let's consider this output. To score a given example, we look up the first bin where the inequality
- // is satisfied for the feature value. We can look at the whole function to get a sense for how the
- // model responds to the variable on a global level.
- // The model can be seen to reconstruct the parabolic and step-wise function, shifted with respect to the average
- // expected output over the training set. Very few bins are used to model the second feature because the GAM model
- // discards unchanged bins to create smaller models.
- // One last thing to notice is that these feature functions can be noisy. While we know that Feature1 should be
- // symmetric, this is not captured in the model. This is due to noise in the data. Common practice is to use
- // resampling methods to estimate a confidence interval at each bin. This will help to determine if the effect is
- // real or just sampling noise. See for example:
- // Tan, Caruana, Hooker, and Lou. "Distill-and-Compare: Auditing Black-Box Models Using Transparent Model
- // Distillation." arXiv:1710.06169."
+ // Let's consider this output. To score a given example, we look up the
+ // first bin where the inequality is satisfied for the feature value.
+ // We can look at the whole function to get a sense for how the model
+ // responds to the variable on a global level.The model can be seen to
+ // reconstruct the parabolic and step-wise function, shifted with
+ // respect to the average expected output over the training set.
+ // Very few bins are used to model the second feature because the GAM
+ // model discards unchanged bins to create smaller models. One last
+ // thing to notice is that these feature functions can be noisy. While
+ // we know that Feature1 should be symmetric, this is not captured in
+ // the model. This is due to noise in the data. Common practice is to
+ // use resampling methods to estimate a confidence interval at each bin.
+ // This will help to determine if the effect is real or just sampling
+ // noise. See for example: Tan, Caruana, Hooker, and Lou.
+ // "Distill-and-Compare: Auditing Black-Box Models Using Transparent
+ // Model Distillation."
+ // arXiv:1710.06169."
}
private class Data
@@ -114,13 +130,17 @@ private class Data
}
///
- /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. Feature1 is a parabola centered around 0,
- /// while Feature2 is a simple piecewise function.
+ /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample.
+ /// Feature1 is a parabola centered around 0, while Feature2 is a simple
+ /// piecewise function.
///
/// The number of examples to generate.
- /// The seed for the random number generator used to produce data.
+ /// The seed for the random number generator used to
+ /// produce data.
///
- private static IEnumerable GenerateData(int numExamples = 25000, int seed = 1)
+ private static IEnumerable GenerateData(int numExamples = 25000,
+ int seed = 1)
+
{
var rng = new Random(seed);
float centeredFloat() => (float)(rng.NextDouble() - 0.5);
@@ -131,7 +151,8 @@ private static IEnumerable GenerateData(int numExamples = 25000, int seed
Features = new float[2] { centeredFloat(), centeredFloat() }
};
// Compute the label from the shape functions and add noise.
- data.Label = Sigmoid(Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + centeredFloat()) > 0.5;
+ data.Label = Sigmoid(Parabola(data.Features[0])
+ + SimplePiecewise(data.Features[1]) + centeredFloat()) > 0.5;
yield return data;
}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs
index e4a408a3ae..cdacd51b93 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/GamWithOptions.cs
@@ -8,12 +8,14 @@ namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class GamWithOptions
{
- // This example requires installation of additional NuGet package
- // Microsoft.ML.FastTree.
+ // This example requires installation of additional NuGet package for
+ // Microsoft.ML.FastTree at
+ // https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness.
var mlContext = new MLContext();
// Create the dataset.
@@ -28,14 +30,15 @@ public static void Example()
var validSet = dataSets.TestSet;
// Create a GAM trainer.
- // Use a small number of bins for this example. The setting below means for each feature,
- // we divide its range into 16 discrete regions for the training process. Note that these
- // regions are not evenly spaced, and that the final model may contain fewer bins, as
- // neighboring bins with identical values will be combined. In general, we recommend using
- // at least the default number of bins, as a small number of bins limits the capacity of
- // the model.
- // Also, set the learning rate to half the default to slow down the gradient descent, and
- // double the number of iterations to compensate.
+ // Use a small number of bins for this example. The setting below means
+ // for each feature, we divide its range into 16 discrete regions for
+ // the training process. Note that these regions are not evenly spaced,
+ // and that the final model may contain fewer bins, as neighboring bins
+ // with identical values will be combined. In general, we recommend
+ // using at least the default number of bins, as a small number of bins
+ // limits the capacity of the model. Also, set the learning rate to half
+ // the default to slow down the gradient descent, and double the number
+ // of iterations to compensate.
var trainer = mlContext.BinaryClassification.Trainers.Gam(
new GamBinaryTrainer.Options {
NumberOfIterations = 19000,
@@ -43,22 +46,26 @@ public static void Example()
LearningRate = 0.001
});
- // Fit the model using both of training and validation sets. GAM can use a technique called
- // pruning to tune the model to the validation set after training to improve generalization.
+ // Fit the model using both of training and validation sets. GAM can use
+ // a technique called pruning to tune the model to the validation set
+ // after training to improve generalization.
var model = trainer.Fit(trainSet, validSet);
// Extract the model parameters.
var gam = model.Model.SubModel;
- // Now we can inspect the parameters of the Generalized Additive Model to understand the fit
- // and potentially learn about our dataset.
- // First, we will look at the bias; the bias represents the average prediction for the training data.
+ // Now we can inspect the parameters of the Generalized Additive Model
+ // to understand the fit and potentially learn about our dataset. First,
+ // we will look at the bias; the bias represents the average prediction
+ // for the training data.
Console.WriteLine($"Average prediction: {gam.Bias:0.00}");
- // Now look at the shape functions that the model has learned. Similar to a linear model, we have
- // one response per feature, and they are independent. Unlike a linear model, this response is a
- // generic function instead of a line. Because we have included a bias term, each feature response
- // represents the deviation from the average prediction as a function of the feature value.
+ // Now look at the shape functions that the model has learned. Similar
+ // to a linear model, we have one response per feature, and they are
+ // independent. Unlike a linear model, this response is a generic
+ // function instead of a line. Because we have included a bias term,
+ // each feature response represents the deviation from the average
+ // prediction as a function of the feature value.
for (int i = 0; i < gam.NumberOfShapeFunctions; i++)
{
// Break a line.
@@ -70,11 +77,13 @@ public static void Example()
// Get the bin effects; these are the function values for each bin.
var binEffects = gam.GetBinEffects(i);
- // Now, write the function to the console. The function is a set of bins, and the corresponding
- // function values. You can think of GAMs as building a bar-chart or lookup table for each feature.
+ // Now, write the function to the console. The function is a set of
+ // bins, and the corresponding function values. You can think of
+ // GAMs as building a bar-chart or lookup table for each feature.
Console.WriteLine($"Feature{i}");
for (int j = 0; j < binUpperBounds.Count; j++)
- Console.WriteLine($"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}");
+ Console.WriteLine(
+ $"x < {binUpperBounds[j]:0.00} => {binEffects[j]:0.000}");
}
// Expected output:
@@ -99,18 +108,23 @@ public static void Example()
// x < 0.31 => -0.138
// x < ∞ => -0.188
- // Let's consider this output. To score a given example, we look up the first bin where the inequality
- // is satisfied for the feature value. We can look at the whole function to get a sense for how the
- // model responds to the variable on a global level.
- // The model can be seen to reconstruct the parabolic and step-wise function, shifted with respect to the average
- // expected output over the training set. Very few bins are used to model the second feature because the GAM model
- // discards unchanged bins to create smaller models.
- // One last thing to notice is that these feature functions can be noisy. While we know that Feature1 should be
- // symmetric, this is not captured in the model. This is due to noise in the data. Common practice is to use
- // resampling methods to estimate a confidence interval at each bin. This will help to determine if the effect is
- // real or just sampling noise. See for example:
- // Tan, Caruana, Hooker, and Lou. "Distill-and-Compare: Auditing Black-Box Models Using Transparent Model
- // Distillation." arXiv:1710.06169."
+ // Let's consider this output. To score a given example, we look up the
+ // first bin where the inequality is satisfied for the feature value.
+ // We can look at the whole function to get a sense for how the model
+ // responds to the variable on a global level. The model can be seen to
+ // reconstruct the parabolic and step-wise function, shifted with
+ // respect to the average expected output over the training set. Very
+ // few bins are used to model the second feature because the GAM model
+ // discards unchanged bins to create smaller models.One last thing to
+ // notice is that these feature functions can be noisy. While we know
+ // that Feature1 should be symmetric, this is not captured in the model.
+ // This is due to noise in the data. Common practice is to use
+ // resampling methods to estimate a confidence interval at each bin.
+ // This will help to determine if the effect is real or just sampling
+ // noise. See for example: Tan, Caruana, Hooker, and Lou.
+ // "Distill-and-Compare: Auditing Black-Box Models Using Transparent
+ // Model Distillation."
+ // arXiv:1710.06169."
}
private class Data
@@ -122,13 +136,17 @@ private class Data
}
///
- /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample. Feature1 is a parabola centered around 0,
- /// while Feature2 is a simple piecewise function.
+ /// Creates a dataset, an IEnumerable of Data objects, for a GAM sample.
+ /// Feature1 is a parabola centered around 0, while Feature2 is a simple
+ /// piecewise function.
///
/// The number of examples to generate.
- /// The seed for the random number generator used to produce data.
+ /// The seed for the random number generator used to
+ /// produce data.
///
- private static IEnumerable GenerateData(int numExamples = 25000, int seed = 1)
+ private static IEnumerable GenerateData(int numExamples = 25000,
+ int seed = 1)
+
{
var rng = new Random(seed);
float centeredFloat() => (float)(rng.NextDouble() - 0.5);
@@ -140,7 +158,8 @@ private static IEnumerable GenerateData(int numExamples = 25000, int seed
Features = new float[2] { centeredFloat(), centeredFloat() }
};
// Compute the label from the shape functions and add noise.
- data.Label = Sigmoid(Parabola(data.Features[0]) + SimplePiecewise(data.Features[1]) + centeredFloat()) > 0.5;
+ data.Label = Sigmoid(Parabola(data.Features[0]) +
+ SimplePiecewise(data.Features[1]) + centeredFloat()) > 0.5;
yield return data;
}
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegression.cs
index e762da494b..5aece5a264 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegression.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegression.cs
@@ -10,35 +10,43 @@ public static class LbfgsLogisticRegression
{
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+ // Convert the list of data points to an IDataView object, which is
+ // consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define the trainer.
- var pipeline = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression();
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .LbfgsLogisticRegression();
// Train the model.
var model = pipeline.Fit(trainingData);
- // Create testing data. Use different random seed to make it different from training data.
- var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+ // Create testing data. Use different random seed to make it different
+ // from training data.
+ var testData = mlContext.Data
+ .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
- var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList();
+ var predictions = mlContext.Data
+ .CreateEnumerable(transformedTestData,
+ reuseRowObject: false).ToList();
// Print 5 predictions.
foreach (var p in predictions.Take(5))
- Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
+ Console.WriteLine($"Label: {p.Label}, "
+ + $"Prediction: {p.PredictedLabel}");
// Expected output:
// Label: True, Prediction: True
@@ -48,7 +56,9 @@ public static void Example()
// Label: False, Prediction: False
// Evaluate the overall metrics.
- var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData);
+ var metrics = mlContext.BinaryClassification
+ .Evaluate(transformedTestData);
+
PrintMetrics(metrics);
// Expected output:
@@ -74,7 +84,9 @@ public static void Example()
// Precision || 0.8583 | 0.8972 |
}
- private static IEnumerable GenerateRandomDataPoints(int count, int seed=0)
+ private static IEnumerable GenerateRandomDataPoints(int count,
+ int seed=0)
+
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
@@ -85,13 +97,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se
{
Label = label,
// Create random features that are correlated with the label.
- // For data points with false label, the feature values are slightly increased by adding a constant.
- Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray()
+ // For data points with false label, the feature values are
+ // slightly increased by adding a constant.
+ Features = Enumerable.Repeat(label, 50)
+ .Select(x => x ? randomFloat() : randomFloat() +
+ 0.1f).ToArray()
+
};
}
}
- // Example with label and 50 feature values. A data set is a collection of such examples.
+ // Example with label and 50 feature values. A data set is a collection of
+ // such examples.
private class DataPoint
{
public bool Label { get; set; }
@@ -114,11 +131,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics)
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
- Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
+ Console.WriteLine($"Negative Precision: " +
+ $"{metrics.NegativePrecision:F2}");
+
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
- Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
+ Console.WriteLine($"Positive Precision: " +
+ $"{metrics.PositivePrecision:F2}");
+
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
}
}
}
+
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegressionWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegressionWithOptions.cs
index 4204f0c4c2..fe2fcb14ab 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegressionWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegressionWithOptions.cs
@@ -11,15 +11,17 @@ public static class LbfgsLogisticRegressionWithOptions
{
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+ // Convert the list of data points to an IDataView object, which is
+ // consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define trainer options.
@@ -31,23 +33,29 @@ public static void Example()
};
// Define the trainer.
- var pipeline = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(options);
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .LbfgsLogisticRegression(options);
// Train the model.
var model = pipeline.Fit(trainingData);
- // Create testing data. Use different random seed to make it different from training data.
- var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+ // Create testing data. Use different random seed to make it different
+ // from training data.
+ var testData = mlContext.Data
+ .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
- var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList();
+ var predictions = mlContext.Data
+ .CreateEnumerable(transformedTestData,
+ reuseRowObject: false).ToList();
// Print 5 predictions.
foreach (var p in predictions.Take(5))
- Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
+ Console.WriteLine($"Label: {p.Label}, "
+ + $"Prediction: {p.PredictedLabel}");
// Expected output:
// Label: True, Prediction: True
@@ -57,7 +65,9 @@ public static void Example()
// Label: False, Prediction: False
// Evaluate the overall metrics.
- var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData);
+ var metrics = mlContext.BinaryClassification
+ .Evaluate(transformedTestData);
+
PrintMetrics(metrics);
// Expected output:
@@ -83,7 +93,9 @@ public static void Example()
// Precision || 0.8571 | 0.8902 |
}
- private static IEnumerable GenerateRandomDataPoints(int count, int seed=0)
+ private static IEnumerable GenerateRandomDataPoints(int count,
+ int seed=0)
+
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
@@ -94,13 +106,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se
{
Label = label,
// Create random features that are correlated with the label.
- // For data points with false label, the feature values are slightly increased by adding a constant.
- Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray()
+ // For data points with false label, the feature values are
+ // slightly increased by adding a constant.
+ Features = Enumerable.Repeat(label, 50)
+ .Select(x => x ? randomFloat() : randomFloat() +
+ 0.1f).ToArray()
+
};
}
}
- // Example with label and 50 feature values. A data set is a collection of such examples.
+ // Example with label and 50 feature values. A data set is a collection of
+ // such examples.
private class DataPoint
{
public bool Label { get; set; }
@@ -123,11 +140,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics)
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
- Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
+ Console.WriteLine($"Negative Precision: " +
+ $"{metrics.NegativePrecision:F2}");
+
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
- Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
+ Console.WriteLine($"Positive Precision: " +
+ $"{metrics.PositivePrecision:F2}");
+
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
}
}
}
+
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.cs
index 4bf27d017a..c89a4b2b2c 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.cs
@@ -8,39 +8,48 @@ namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class LightGbm
{
- // This example requires installation of additional nuget package
- // Microsoft.ML.LightGbm.
+ // This example requires installation of additional NuGet package for
+ // Microsoft.ML.FastTree at
+ // https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+ // Convert the list of data points to an IDataView object, which is
+ // consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define the trainer.
- var pipeline = mlContext.BinaryClassification.Trainers.LightGbm();
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .LightGbm();
// Train the model.
var model = pipeline.Fit(trainingData);
- // Create testing data. Use different random seed to make it different from training data.
- var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+ // Create testing data. Use different random seed to make it different
+ // from training data.
+ var testData = mlContext.Data
+ .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
- var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList();
+ var predictions = mlContext.Data
+ .CreateEnumerable(transformedTestData,
+ reuseRowObject: false).ToList();
// Print 5 predictions.
foreach (var p in predictions.Take(5))
- Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
+ Console.WriteLine($"Label: {p.Label}, "
+ + $"Prediction: {p.PredictedLabel}");
// Expected output:
// Label: True, Prediction: True
@@ -50,7 +59,9 @@ public static void Example()
// Label: False, Prediction: False
// Evaluate the overall metrics.
- var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData);
+ var metrics = mlContext.BinaryClassification
+ .Evaluate(transformedTestData);
+
PrintMetrics(metrics);
// Expected output:
@@ -73,7 +84,9 @@ public static void Example()
// Precision || 0.7531 | 0.7860 |
}
- private static IEnumerable GenerateRandomDataPoints(int count, int seed=0)
+ private static IEnumerable GenerateRandomDataPoints(int count,
+ int seed=0)
+
{
var random = new Random(seed);
float randomFloat() => (float)random.NextDouble();
@@ -84,13 +97,18 @@ private static IEnumerable GenerateRandomDataPoints(int count, int se
{
Label = label,
// Create random features that are correlated with the label.
- // For data points with false label, the feature values are slightly increased by adding a constant.
- Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.03f).ToArray()
+ // For data points with false label, the feature values are
+ // slightly increased by adding a constant.
+ Features = Enumerable.Repeat(label, 50)
+ .Select(x => x ? randomFloat() : randomFloat() +
+ 0.03f).ToArray()
+
};
}
}
- // Example with label and 50 feature values. A data set is a collection of such examples.
+ // Example with label and 50 feature values. A data set is a collection of
+ // such examples.
private class DataPoint
{
public bool Label { get; set; }
@@ -113,11 +131,16 @@ private static void PrintMetrics(BinaryClassificationMetrics metrics)
Console.WriteLine($"Accuracy: {metrics.Accuracy:F2}");
Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve:F2}");
Console.WriteLine($"F1 Score: {metrics.F1Score:F2}");
- Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:F2}");
+ Console.WriteLine($"Negative Precision: " +
+ $"{metrics.NegativePrecision:F2}");
+
Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:F2}");
- Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:F2}");
+ Console.WriteLine($"Positive Precision: " +
+ $"{metrics.PositivePrecision:F2}");
+
Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:F2}\n");
Console.WriteLine(metrics.ConfusionMatrix.GetFormattedConfusionTable());
}
}
}
+
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.tt b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.tt
index 8bef899d83..2de4012354 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.tt
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.tt
@@ -10,8 +10,9 @@ string LabelThreshold = "0.5f";
string DataSepValue = "0.03f";
string OptionsInclude = "";
string Comments= @"
- // This example requires installation of additional nuget package
- // Microsoft.ML.LightGbm.";
+ // This example requires installation of additional NuGet package for
+ // Microsoft.ML.FastTree at
+ // https://www.nuget.org/packages/Microsoft.ML.FastTree/";
string ExpectedOutputPerInstance = @"// Expected output:
// Label: True, Prediction: True
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.cs
index 82bdace764..5df0a59ee5 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.cs
@@ -9,19 +9,22 @@ namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class LightGbmWithOptions
{
- // This example requires installation of additional nuget package
- // Microsoft.ML.LightGbm.
+ // This example requires installation of additional NuGet package for
+ // Microsoft.ML.FastTree at
+ // https://www.nuget.org/packages/Microsoft.ML.FastTree/
public static void Example()
{
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
- // as a catalog of available operations and as the source of randomness.
- // Setting the seed to a fixed number in this example to make outputs deterministic.
+ // Create a new context for ML.NET operations. It can be used for
+ // exception tracking and logging, as a catalog of available operations
+ // and as the source of randomness. Setting the seed to a fixed number
+ // in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);
// Create a list of training data points.
var dataPoints = GenerateRandomDataPoints(1000);
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
+ // Convert the list of data points to an IDataView object, which is
+ // consumable by ML.NET API.
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
// Define trainer options.
@@ -35,23 +38,29 @@ public static void Example()
};
// Define the trainer.
- var pipeline = mlContext.BinaryClassification.Trainers.LightGbm(options);
+ var pipeline = mlContext.BinaryClassification.Trainers
+ .LightGbm(options);
// Train the model.
var model = pipeline.Fit(trainingData);
- // Create testing data. Use different random seed to make it different from training data.
- var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
+ // Create testing data. Use different random seed to make it different
+ // from training data.
+ var testData = mlContext.Data
+ .LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123));
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
- var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList();
+ var predictions = mlContext.Data
+ .CreateEnumerable