Skip to content

Commit 057c4b9

Browse files
authored
Convert Gcn and LpNorm to estimators (#1582)
1 parent 9e8b041 commit 057c4b9

File tree

16 files changed

+1259
-774
lines changed

16 files changed

+1259
-774
lines changed
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using Microsoft.ML.Runtime.Api;
6+
using Microsoft.ML.Runtime.Data;
7+
using Microsoft.ML.Data;
8+
using System;
9+
using System.Collections.Generic;
10+
using System.Linq;
11+
12+
namespace Microsoft.ML.Samples.Dynamic
13+
{
14+
public partial class TransformSamples
15+
{
16+
public static void ProjectionTransforms()
17+
{
18+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
19+
// as well as the source of randomness.
20+
var ml = new MLContext();
21+
22+
// Get a small dataset as an IEnumerable and convert it to an IDataView.
23+
IEnumerable<SamplesUtils.DatasetUtils.SampleVectorOfNumbersData> data = SamplesUtils.DatasetUtils.GetVectorOfNumbersData();
24+
var trainData = ml.CreateStreamingDataView(data);
25+
26+
// Preview of the data.
27+
//
28+
// Features
29+
// 0 1 2 3 4 5 6 7 8 9
30+
// 1 2 3 4 5 6 7 8 9 0
31+
// 2 3 4 5 6 7 8 9 0 1
32+
// 3 4 5 6 7 8 9 0 1 2
33+
// 4 5 6 7 8 9 0 1 2 3
34+
// 5 6 7 8 9 0 1 2 3 4
35+
// 6 7 8 9 0 1 2 3 4 5
36+
37+
// A small printing utility.
38+
Action<string, IEnumerable<VBuffer<float>>> printHelper = (colName, column) =>
39+
{
40+
Console.WriteLine($"{colName} column obtained post-transformation.");
41+
foreach (var row in column)
42+
Console.WriteLine($"{string.Join(" ",row.DenseValues().Select(x=>x.ToString("f3")))} ");
43+
};
44+
45+
// A pipeline to project Features column into Random fourier space.
46+
var rffPipeline = ml.Transforms.Projection.CreateRandomFourierFeatures(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), newDim: 4);
47+
// The transformed (projected) data.
48+
var transformedData = rffPipeline.Fit(trainData).Transform(trainData);
49+
// Getting the data of the newly created column, so we can preview it.
50+
var randomFourier = transformedData.GetColumn<VBuffer<float>>(ml, nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features));
51+
52+
printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), randomFourier);
53+
54+
// Features column obtained post-transformation.
55+
//
56+
//0.634 0.628 -0.705 -0.337
57+
//0.704 0.683 -0.555 -0.422
58+
//0.407 0.542 -0.707 -0.616
59+
//0.473 0.331 -0.400 -0.699
60+
//0.181 0.361 -0.335 -0.157
61+
//0.165 0.117 -0.547 0.014
62+
63+
// A pipeline to project Features column into white noise vector.
64+
var whiteningPipeline = ml.Transforms.Projection.VectorWhiten(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), kind: Transforms.Projections.WhiteningKind.Zca);
65+
// The transformed (projected) data.
66+
transformedData = whiteningPipeline.Fit(trainData).Transform(trainData);
67+
// Getting the data of the newly created column, so we can preview it.
68+
var whitening = transformedData.GetColumn<VBuffer<float>>(ml, nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features));
69+
70+
printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), whitening);
71+
72+
// Features column obtained post-transformation.
73+
//
74+
//-0.394 -0.318 -0.243 -0.168 0.209 0.358 0.433 0.589 0.873 2.047
75+
//-0.034 0.030 0.094 0.159 0.298 0.427 0.492 0.760 1.855 -1.197
76+
// 0.099 0.161 0.223 0.286 0.412 0.603 0.665 1.797 -1.265 -0.172
77+
// 0.211 0.277 0.344 0.410 0.606 1.267 1.333 -1.340 -0.205 0.065
78+
// 0.454 0.523 0.593 0.664 1.886 -0.757 -0.687 -0.022 0.176 0.310
79+
// 0.863 0.938 1.016 1.093 -1.326 -0.096 -0.019 0.189 0.330 0.483
80+
81+
// A pipeline to project Features column into L-p normalized vector.
82+
var lpNormalizePipeline = ml.Transforms.Projection.LpNormalize(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), normKind: Transforms.Projections.LpNormalizingEstimatorBase.NormalizerKind.L1Norm);
83+
// The transformed (projected) data.
84+
transformedData = lpNormalizePipeline.Fit(trainData).Transform(trainData);
85+
// Getting the data of the newly created column, so we can preview it.
86+
var lpNormalize= transformedData.GetColumn<VBuffer<float>>(ml, nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features));
87+
88+
printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), lpNormalize);
89+
90+
// Features column obtained post-transformation.
91+
//
92+
// 0.000 0.022 0.044 0.067 0.089 0.111 0.133 0.156 0.178 0.200
93+
// 0.022 0.044 0.067 0.089 0.111 0.133 0.156 0.178 0.200 0.000
94+
// 0.044 0.067 0.089 0.111 0.133 0.156 0.178 0.200 0.000 0.022
95+
// 0.067 0.089 0.111 0.133 0.156 0.178 0.200 0.000 0.022 0.044
96+
// 0.111 0.133 0.156 0.178 0.200 0.000 0.022 0.044 0.067 0.089
97+
// 0.133 0.156 0.178 0.200 0.000 0.022 0.044 0.067 0.089 0.111
98+
99+
// A pipeline to project Features column into L-p normalized vector.
100+
var gcNormalizePipeline = ml.Transforms.Projection.GlobalContrastNormalize(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), substractMean:false);
101+
// The transformed (projected) data.
102+
transformedData = gcNormalizePipeline.Fit(trainData).Transform(trainData);
103+
// Getting the data of the newly created column, so we can preview it.
104+
var gcNormalize = transformedData.GetColumn<VBuffer<float>>(ml, nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features));
105+
106+
printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), gcNormalize);
107+
108+
// Features column obtained post-transformation.
109+
//
110+
// 0.000 0.059 0.118 0.178 0.237 0.296 0.355 0.415 0.474 0.533
111+
// 0.059 0.118 0.178 0.237 0.296 0.355 0.415 0.474 0.533 0.000
112+
// 0.118 0.178 0.237 0.296 0.355 0.415 0.474 0.533 0.000 0.059
113+
// 0.178 0.237 0.296 0.355 0.415 0.474 0.533 0.000 0.059 0.118
114+
// 0.296 0.355 0.415 0.474 0.533 0.000 0.059 0.118 0.178 0.237
115+
// 0.355 0.415 0.474 0.533 0.000 0.059 0.118 0.178 0.237 0.296
116+
}
117+
}
118+
}

src/Microsoft.ML.Legacy/CSharpApi.cs

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12690,7 +12690,7 @@ public FeatureSelectorByMutualInformationPipelineStep(Output output)
1269012690
namespace Legacy.Transforms
1269112691
{
1269212692

12693-
public sealed partial class LpNormNormalizerTransformGcnColumn : OneToOneColumn<LpNormNormalizerTransformGcnColumn>, IOneToOneColumn
12693+
public sealed partial class LpNormalizingTransformerGcnColumn : OneToOneColumn<LpNormalizingTransformerGcnColumn>, IOneToOneColumn
1269412694
{
1269512695
/// <summary>
1269612696
/// Normalize by standard deviation rather than L2 norm
@@ -12751,23 +12751,23 @@ public GlobalContrastNormalizer(params (string inputColumn, string outputColumn)
1275112751

1275212752
public void AddColumn(string inputColumn)
1275312753
{
12754-
var list = Column == null ? new List<Microsoft.ML.Legacy.Transforms.LpNormNormalizerTransformGcnColumn>() : new List<Microsoft.ML.Legacy.Transforms.LpNormNormalizerTransformGcnColumn>(Column);
12755-
list.Add(OneToOneColumn<Microsoft.ML.Legacy.Transforms.LpNormNormalizerTransformGcnColumn>.Create(inputColumn));
12754+
var list = Column == null ? new List<Microsoft.ML.Legacy.Transforms.LpNormalizingTransformerGcnColumn>() : new List<Microsoft.ML.Legacy.Transforms.LpNormalizingTransformerGcnColumn>(Column);
12755+
list.Add(OneToOneColumn<Microsoft.ML.Legacy.Transforms.LpNormalizingTransformerGcnColumn>.Create(inputColumn));
1275612756
Column = list.ToArray();
1275712757
}
1275812758

1275912759
public void AddColumn(string outputColumn, string inputColumn)
1276012760
{
12761-
var list = Column == null ? new List<Microsoft.ML.Legacy.Transforms.LpNormNormalizerTransformGcnColumn>() : new List<Microsoft.ML.Legacy.Transforms.LpNormNormalizerTransformGcnColumn>(Column);
12762-
list.Add(OneToOneColumn<Microsoft.ML.Legacy.Transforms.LpNormNormalizerTransformGcnColumn>.Create(outputColumn, inputColumn));
12761+
var list = Column == null ? new List<Microsoft.ML.Legacy.Transforms.LpNormalizingTransformerGcnColumn>() : new List<Microsoft.ML.Legacy.Transforms.LpNormalizingTransformerGcnColumn>(Column);
12762+
list.Add(OneToOneColumn<Microsoft.ML.Legacy.Transforms.LpNormalizingTransformerGcnColumn>.Create(outputColumn, inputColumn));
1276312763
Column = list.ToArray();
1276412764
}
1276512765

1276612766

1276712767
/// <summary>
1276812768
/// New column definition(s) (optional form: name:src)
1276912769
/// </summary>
12770-
public LpNormNormalizerTransformGcnColumn[] Column { get; set; }
12770+
public LpNormalizingTransformerGcnColumn[] Column { get; set; }
1277112771

1277212772
/// <summary>
1277312773
/// Subtract mean from each value before normalizing
@@ -14369,7 +14369,7 @@ public LogMeanVarianceNormalizerPipelineStep(Output output)
1436914369

1437014370
namespace Legacy.Transforms
1437114371
{
14372-
public enum LpNormNormalizerTransformNormalizerKind : byte
14372+
public enum LpNormalizingEstimatorBaseNormalizerKind : byte
1437314373
{
1437414374
L2Norm = 0,
1437514375
StdDev = 1,
@@ -14378,12 +14378,12 @@ public enum LpNormNormalizerTransformNormalizerKind : byte
1437814378
}
1437914379

1438014380

14381-
public sealed partial class LpNormNormalizerTransformColumn : OneToOneColumn<LpNormNormalizerTransformColumn>, IOneToOneColumn
14381+
public sealed partial class LpNormalizingTransformerColumn : OneToOneColumn<LpNormalizingTransformerColumn>, IOneToOneColumn
1438214382
{
1438314383
/// <summary>
1438414384
/// The norm to use to normalize each sample
1438514385
/// </summary>
14386-
public LpNormNormalizerTransformNormalizerKind? NormKind { get; set; }
14386+
public LpNormalizingEstimatorBaseNormalizerKind? NormKind { get; set; }
1438714387

1438814388
/// <summary>
1438914389
/// Subtract mean from each value before normalizing
@@ -14434,28 +14434,28 @@ public LpNormalizer(params (string inputColumn, string outputColumn)[] inputOutp
1443414434

1443514435
public void AddColumn(string inputColumn)
1443614436
{
14437-
var list = Column == null ? new List<Microsoft.ML.Legacy.Transforms.LpNormNormalizerTransformColumn>() : new List<Microsoft.ML.Legacy.Transforms.LpNormNormalizerTransformColumn>(Column);
14438-
list.Add(OneToOneColumn<Microsoft.ML.Legacy.Transforms.LpNormNormalizerTransformColumn>.Create(inputColumn));
14437+
var list = Column == null ? new List<Microsoft.ML.Legacy.Transforms.LpNormalizingTransformerColumn>() : new List<Microsoft.ML.Legacy.Transforms.LpNormalizingTransformerColumn>(Column);
14438+
list.Add(OneToOneColumn<Microsoft.ML.Legacy.Transforms.LpNormalizingTransformerColumn>.Create(inputColumn));
1443914439
Column = list.ToArray();
1444014440
}
1444114441

1444214442
public void AddColumn(string outputColumn, string inputColumn)
1444314443
{
14444-
var list = Column == null ? new List<Microsoft.ML.Legacy.Transforms.LpNormNormalizerTransformColumn>() : new List<Microsoft.ML.Legacy.Transforms.LpNormNormalizerTransformColumn>(Column);
14445-
list.Add(OneToOneColumn<Microsoft.ML.Legacy.Transforms.LpNormNormalizerTransformColumn>.Create(outputColumn, inputColumn));
14444+
var list = Column == null ? new List<Microsoft.ML.Legacy.Transforms.LpNormalizingTransformerColumn>() : new List<Microsoft.ML.Legacy.Transforms.LpNormalizingTransformerColumn>(Column);
14445+
list.Add(OneToOneColumn<Microsoft.ML.Legacy.Transforms.LpNormalizingTransformerColumn>.Create(outputColumn, inputColumn));
1444614446
Column = list.ToArray();
1444714447
}
1444814448

1444914449

1445014450
/// <summary>
1445114451
/// New column definition(s) (optional form: name:src)
1445214452
/// </summary>
14453-
public LpNormNormalizerTransformColumn[] Column { get; set; }
14453+
public LpNormalizingTransformerColumn[] Column { get; set; }
1445414454

1445514455
/// <summary>
1445614456
/// The norm to use to normalize each sample
1445714457
/// </summary>
14458-
public LpNormNormalizerTransformNormalizerKind NormKind { get; set; } = LpNormNormalizerTransformNormalizerKind.L2Norm;
14458+
public LpNormalizingEstimatorBaseNormalizerKind NormKind { get; set; } = LpNormalizingEstimatorBaseNormalizerKind.L2Norm;
1445914459

1446014460
/// <summary>
1446114461
/// Subtract mean from each value before normalizing

src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,8 @@
55
<IncludeInPackage>Microsoft.ML</IncludeInPackage>
66
</PropertyGroup>
77

8+
<ItemGroup>
9+
<ProjectReference Include="..\Microsoft.ML.Api\Microsoft.ML.Api.csproj" />
10+
</ItemGroup>
11+
812
</Project>

src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs

Lines changed: 96 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using Microsoft.ML.Runtime.Api;
56
using System;
67
using System.Collections.Generic;
78
using System.Net;
@@ -105,7 +106,7 @@ public static IEnumerable<SampleTopicsData> GetTopicsData()
105106
var data = new List<SampleTopicsData>();
106107
data.Add(new SampleTopicsData { Review = "animals birds cats dogs fish horse", ReviewReverse = "radiation galaxy universe duck", Label = true });
107108
data.Add(new SampleTopicsData { Review = "horse birds house fish duck cats", ReviewReverse = "space galaxy universe radiation", Label = false });
108-
data.Add(new SampleTopicsData { Review = "car truck driver bus pickup", ReviewReverse = "bus pickup", Label = true});
109+
data.Add(new SampleTopicsData { Review = "car truck driver bus pickup", ReviewReverse = "bus pickup", Label = true });
109110
data.Add(new SampleTopicsData { Review = "car truck driver bus pickup horse", ReviewReverse = "car truck", Label = false });
110111

111112
return data;
@@ -134,16 +135,100 @@ public class SampleInfertData
134135
public static IEnumerable<SampleInfertData> GetInfertData()
135136
{
136137
var data = new List<SampleInfertData>();
137-
data.Add(new SampleInfertData {
138-
RowNum = 0, Education = "0-5yrs", Age = 26, Parity = 6, Induced = 1, Case = 1, Spontaneous = 2, Stratum = 1, PooledStratum = 3 });
139-
data.Add(new SampleInfertData {
140-
RowNum = 1, Education = "0-5yrs", Age = 42, Parity = 1, Induced = 1, Case = 1, Spontaneous = 0, Stratum = 2, PooledStratum = 1 });
141-
data.Add(new SampleInfertData {
142-
RowNum = 2, Education = "0-5yrs", Age = 39, Parity = 6, Induced = 2, Case = 1, Spontaneous = 0, Stratum = 3, PooledStratum = 4 });
143-
data.Add(new SampleInfertData {
144-
RowNum = 3, Education = "0-5yrs", Age = 34, Parity = 4, Induced = 2, Case = 1, Spontaneous = 0, Stratum = 4, PooledStratum = 2 });
145-
data.Add(new SampleInfertData {
146-
RowNum = 4, Education = "6-11yrs", Age = 35, Parity = 3, Induced = 1, Case = 1, Spontaneous = 1, Stratum = 5, PooledStratum = 32 });
138+
data.Add(new SampleInfertData
139+
{
140+
RowNum = 0,
141+
Education = "0-5yrs",
142+
Age = 26,
143+
Parity = 6,
144+
Induced = 1,
145+
Case = 1,
146+
Spontaneous = 2,
147+
Stratum = 1,
148+
PooledStratum = 3
149+
});
150+
data.Add(new SampleInfertData
151+
{
152+
RowNum = 1,
153+
Education = "0-5yrs",
154+
Age = 42,
155+
Parity = 1,
156+
Induced = 1,
157+
Case = 1,
158+
Spontaneous = 0,
159+
Stratum = 2,
160+
PooledStratum = 1
161+
});
162+
data.Add(new SampleInfertData
163+
{
164+
RowNum = 2,
165+
Education = "0-5yrs",
166+
Age = 39,
167+
Parity = 6,
168+
Induced = 2,
169+
Case = 1,
170+
Spontaneous = 0,
171+
Stratum = 3,
172+
PooledStratum = 4
173+
});
174+
data.Add(new SampleInfertData
175+
{
176+
RowNum = 3,
177+
Education = "0-5yrs",
178+
Age = 34,
179+
Parity = 4,
180+
Induced = 2,
181+
Case = 1,
182+
Spontaneous = 0,
183+
Stratum = 4,
184+
PooledStratum = 2
185+
});
186+
data.Add(new SampleInfertData
187+
{
188+
RowNum = 4,
189+
Education = "6-11yrs",
190+
Age = 35,
191+
Parity = 3,
192+
Induced = 1,
193+
Case = 1,
194+
Spontaneous = 1,
195+
Stratum = 5,
196+
PooledStratum = 32
197+
});
198+
return data;
199+
}
200+
201+
public class SampleVectorOfNumbersData
202+
{
203+
[VectorType(10)]
204+
205+
public float[] Features { get; set; }
206+
}
207+
208+
/// <summary>
209+
/// Returns a few rows of the infertility dataset.
210+
/// </summary>
211+
public static IEnumerable<SampleVectorOfNumbersData> GetVectorOfNumbersData()
212+
{
213+
var data = new List<SampleVectorOfNumbersData>();
214+
data.Add(new SampleVectorOfNumbersData { Features = new float[10] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 } });
215+
data.Add(new SampleVectorOfNumbersData { Features = new float[10] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 0 } });
216+
data.Add(new SampleVectorOfNumbersData
217+
{
218+
Features = new float[10] { 2, 3, 4, 5, 6, 7, 8, 9, 0, 1 }
219+
});
220+
data.Add(new SampleVectorOfNumbersData
221+
{
222+
Features = new float[10] { 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, }
223+
});
224+
data.Add(new SampleVectorOfNumbersData
225+
{
226+
Features = new float[10] { 5, 6, 7, 8, 9, 0, 1, 2, 3, 4 }
227+
});
228+
data.Add(new SampleVectorOfNumbersData
229+
{
230+
Features = new float[10] { 6, 7, 8, 9, 0, 1, 2, 3, 4, 5 }
231+
});
147232
return data;
148233
}
149234
}

0 commit comments

Comments
 (0)