Skip to content

Commit 9ca5a5a

Browse files
authored
Projection documentation (#3232)
1 parent e37e5b1 commit 9ca5a5a

File tree

7 files changed

+164
-101
lines changed

7 files changed

+164
-101
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs

Lines changed: 0 additions & 97 deletions
This file was deleted.
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using Microsoft.ML;
5+
using Microsoft.ML.Data;
6+
using Microsoft.ML.Transforms;
7+
8+
namespace Samples.Dynamic
9+
{
10+
public static class ApproximatedKernelMap
11+
{
12+
// Transform feature vector to another non-linear space. See https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf.
13+
public static void Example()
14+
{
15+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
16+
// as well as the source of randomness.
17+
var mlContext = new MLContext();
18+
var samples = new List<DataPoint>()
19+
{
20+
new DataPoint(){ Features = new float[7] { 1, 1, 0, 0, 1, 0, 1} },
21+
new DataPoint(){ Features = new float[7] { 0, 0, 1, 0, 0, 1, 1} },
22+
new DataPoint(){ Features = new float[7] {-1, 1, 0,-1,-1, 0,-1} },
23+
new DataPoint(){ Features = new float[7] { 0,-1, 0, 1, 0,-1,-1} }
24+
};
25+
// Convert training data to IDataView, the general data type used in ML.NET.
26+
var data = mlContext.Data.LoadFromEnumerable(samples);
27+
// ApproximatedKernel map takes data and maps it's to a random low-dimensional space.
28+
var approximation = mlContext.Transforms.ApproximatedKernelMap("Features", rank: 4, generator: new GaussianKernel(gamma: 0.7f), seed: 1);
29+
30+
// Now we can transform the data and look at the output to confirm the behavior of the estimator.
31+
// This operation doesn't actually evaluate data until we read the data below.
32+
var tansformer = approximation.Fit(data);
33+
var transformedData = tansformer.Transform(data);
34+
35+
var column = transformedData.GetColumn<float[]>("Features").ToArray();
36+
foreach (var row in column)
37+
Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4"))));
38+
// Expected output:
39+
// -0.0119, 0.5867, 0.4942, 0.7041
40+
// 0.4720, 0.5639, 0.4346, 0.2671
41+
// -0.2243, 0.7071, 0.7053, -0.1681
42+
// 0.0846, 0.5836, 0.6575, 0.0581
43+
}
44+
45+
private class DataPoint
46+
{
47+
[VectorType(7)]
48+
public float[] Features { get; set; }
49+
}
50+
51+
}
52+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using Microsoft.ML;
5+
using Microsoft.ML.Data;
6+
7+
namespace Samples.Dynamic
8+
{
9+
class NormalizeGlobalContrast
10+
{
11+
public static void Example()
12+
{
13+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
14+
// as well as the source of randomness.
15+
var mlContext = new MLContext();
16+
var samples = new List<DataPoint>()
17+
{
18+
new DataPoint(){ Features = new float[4] { 1, 1, 0, 0} },
19+
new DataPoint(){ Features = new float[4] { 2, 2, 0, 0} },
20+
new DataPoint(){ Features = new float[4] { 1, 0, 1, 0} },
21+
new DataPoint(){ Features = new float[4] { 0, 1, 0, 1} }
22+
};
23+
// Convert training data to IDataView, the general data type used in ML.NET.
24+
var data = mlContext.Data.LoadFromEnumerable(samples);
25+
var approximation = mlContext.Transforms.NormalizeGlobalContrast("Features", ensureZeroMean: false, scale:2, ensureUnitStandardDeviation:true);
26+
27+
// Now we can transform the data and look at the output to confirm the behavior of the estimator.
28+
// This operation doesn't actually evaluate data until we read the data below.
29+
var tansformer = approximation.Fit(data);
30+
var transformedData = tansformer.Transform(data);
31+
32+
var column = transformedData.GetColumn<float[]>("Features").ToArray();
33+
foreach (var row in column)
34+
Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4"))));
35+
// Expected output:
36+
// 2.0000, 2.0000,-2.0000,-2.0000
37+
// 2.0000, 2.0000,-2.0000,-2.0000
38+
// 2.0000,-2.0000, 2.0000,-2.0000
39+
//- 2.0000, 2.0000,-2.0000, 2.0000
40+
}
41+
42+
private class DataPoint
43+
{
44+
[VectorType(4)]
45+
public float[] Features { get; set; }
46+
}
47+
}
48+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using Microsoft.ML;
5+
using Microsoft.ML.Data;
6+
using Microsoft.ML.Transforms;
7+
8+
namespace Samples.Dynamic
9+
{
10+
class NormalizeLpNorm
11+
{
12+
public static void Example()
13+
{
14+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
15+
// as well as the source of randomness.
16+
var mlContext = new MLContext();
17+
var samples = new List<DataPoint>()
18+
{
19+
new DataPoint(){ Features = new float[4] { 1, 1, 0, 0} },
20+
new DataPoint(){ Features = new float[4] { 2, 2, 0, 0} },
21+
new DataPoint(){ Features = new float[4] { 1, 0, 1, 0} },
22+
new DataPoint(){ Features = new float[4] { 0, 1, 0, 1} }
23+
};
24+
// Convert training data to IDataView, the general data type used in ML.NET.
25+
var data = mlContext.Data.LoadFromEnumerable(samples);
26+
var approximation = mlContext.Transforms.NormalizeLpNorm("Features", norm: LpNormNormalizingEstimatorBase.NormFunction.L1, ensureZeroMean: true);
27+
28+
// Now we can transform the data and look at the output to confirm the behavior of the estimator.
29+
// This operation doesn't actually evaluate data until we read the data below.
30+
var tansformer = approximation.Fit(data);
31+
var transformedData = tansformer.Transform(data);
32+
33+
var column = transformedData.GetColumn<float[]>("Features").ToArray();
34+
foreach (var row in column)
35+
Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4"))));
36+
// Expected output:
37+
// 0.2500, 0.2500, -0.2500, -0.2500
38+
// 0.2500, 0.2500, -0.2500, -0.2500
39+
// 0.2500, -0.2500, 0.2500, -0.2500
40+
// -0.2500, 0.2500, -0.2500, 0.2500
41+
}
42+
43+
private class DataPoint
44+
{
45+
[VectorType(4)]
46+
public float[] Features { get; set; }
47+
}
48+
}
49+
}

src/Microsoft.ML.Transforms/FourierDistributionSampler.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ internal sealed class Options : IComponentFactory<KernelBase>
203203
/// <summary>
204204
/// Create a new instance of a LaplacianKernel.
205205
/// </summary>
206-
/// <param name="a">The coefficient in the exponent of the kernel function</param>
206+
/// <param name="a">The coefficient in the exponent of the kernel function.</param>
207207
public LaplacianKernel(float a = 1)
208208
{
209209
Contracts.CheckParam(a > 0, nameof(a));

src/Microsoft.ML.Transforms/KernelCatalog.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public static class KernelExpansionCatalog
2626
/// <example>
2727
/// <format type="text/markdown">
2828
/// <![CDATA[
29-
/// [!code-csharp[CreateRandomFourierFeatures](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs?range=1-6,12-112)]
29+
/// [!code-csharp[ApproximatedKernelMap](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs)]
3030
/// ]]>
3131
/// </format>
3232
/// </example>

src/Microsoft.ML.Transforms/NormalizerCatalog.cs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -246,10 +246,15 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog,
246246
/// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
247247
/// <param name="norm">Type of norm to use to normalize each sample. The indicated norm of the resulted vector will be normalized to one.</param>
248248
/// <param name="ensureZeroMean">If <see langword="true"/>, subtract mean from each value before normalizing and use the raw input otherwise.</param>
249+
/// <remarks>
250+
/// This transform performs the following operation on a each row X: Y = (X - M(X)) / D(X)
251+
/// where M(X) is scalar value of mean for all elements in the current row if <paramref name="ensureZeroMean"/>set to <see langword="true"/> or <value>0</value> othewise
252+
/// and D(X) is scalar value of selected <paramref name="norm"/>.
253+
/// </remarks>
249254
/// <example>
250255
/// <format type="text/markdown">
251256
/// <![CDATA[
252-
/// [!code-csharp[LpNormalize](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs?range=1-6,12-112)]
257+
/// [!code-csharp[NormalizeLpNorm](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs)]
253258
/// ]]>
254259
/// </format>
255260
/// </example>
@@ -276,10 +281,16 @@ internal static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalo
276281
/// <param name="ensureZeroMean">If <see langword="true"/>, subtract mean from each value before normalizing and use the raw input otherwise.</param>
277282
/// <param name="ensureUnitStandardDeviation">If <see langword="true"/>, resulted vector's standard deviation would be one. Otherwise, resulted vector's L2-norm would be one.</param>
278283
/// <param name="scale">Scale features by this value.</param>
284+
/// <remarks>
285+
/// This transform performs the following operation on a row X: Y = scale * (X - M(X)) / D(X)
286+
/// where M(X) is scalar value of mean for all elements in the current row if <paramref name="ensureZeroMean"/>set to <see langword="true"/> or <value>0</value> othewise
287+
/// D(X) is scalar value of standard deviation for row if <paramref name="ensureUnitStandardDeviation"/> set to <see langword="true"/> or
288+
/// L2 norm of this row vector if <paramref name="ensureUnitStandardDeviation"/> set to <see langword="false"/> and scale is <paramref name="scale"/>.
289+
/// </remarks>
279290
/// <example>
280291
/// <format type="text/markdown">
281292
/// <![CDATA[
282-
/// [!code-csharp[GlobalContrastNormalize](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs?range=1-6,12-112)]
293+
/// [!code-csharp[NormalizeGlobalContrast](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs)]
283294
/// ]]>
284295
/// </format>
285296
/// </example>

0 commit comments

Comments
 (0)