Skip to content

Commit 93a994f

Browse files
committed
DateTimeTransformer is done.
ToStringTransformer is done. CatagoryImputer is done. TimeSeriesImputer is done. RobustScaler is done. Adding in samples and documentation. General code cleanup. Made the RowToRowMapperTransform create a new mapper if possible for each cursor.
1 parent dfa7a52 commit 93a994f

40 files changed

+10545
-13
lines changed

Microsoft.ML.sln

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,14 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.AutoML.Samples
273273
EndProject
274274
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Samples.GPU", "docs\samples\Microsoft.ML.Samples.GPU\Microsoft.ML.Samples.GPU.csproj", "{3C8F910B-7F23-4D25-B521-6D5AC9570ADD}"
275275
EndProject
276+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Featurizers", "src\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.csproj", "{E2DD0721-5B0F-4606-8182-4C7EFB834518}"
277+
EndProject
278+
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Microsoft.ML.Featurizers", "Microsoft.ML.Featurizers", "{1BA5C784-52E8-4A87-8525-26B2452F2882}"
279+
ProjectSection(SolutionItems) = preProject
280+
pkg\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.nupkgproj = pkg\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.nupkgproj
281+
pkg\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.symbols.nupkgproj = pkg\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.symbols.nupkgproj
282+
EndProjectSection
283+
EndProject
276284
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CodeGenerator", "src\Microsoft.ML.CodeGenerator\Microsoft.ML.CodeGenerator.csproj", "{56CB0850-7341-4D71-9AE4-9EFC472D93DD}"
277285
EndProject
278286
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CodeGenerator.Tests", "test\Microsoft.ML.CodeGenerator.Tests\Microsoft.ML.CodeGenerator.Tests.csproj", "{46CC5637-3DDF-4100-93FC-44BB87B2DB81}"
@@ -1690,6 +1698,30 @@ Global
16901698
{46CC5637-3DDF-4100-93FC-44BB87B2DB81}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
16911699
{46CC5637-3DDF-4100-93FC-44BB87B2DB81}.Release-netfx|x64.ActiveCfg = Release-netfx|Any CPU
16921700
{46CC5637-3DDF-4100-93FC-44BB87B2DB81}.Release-netfx|x64.Build.0 = Release-netfx|Any CPU
1701+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
1702+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|Any CPU.Build.0 = Debug|Any CPU
1703+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|x64.ActiveCfg = Debug|Any CPU
1704+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|x64.Build.0 = Debug|Any CPU
1705+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|Any CPU.ActiveCfg = Debug-netcoreapp3_0|Any CPU
1706+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|Any CPU.Build.0 = Debug-netcoreapp3_0|Any CPU
1707+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|x64.ActiveCfg = Debug-netcoreapp3_0|Any CPU
1708+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|x64.Build.0 = Debug-netcoreapp3_0|Any CPU
1709+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
1710+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
1711+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|x64.ActiveCfg = Debug-netfx|Any CPU
1712+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|x64.Build.0 = Debug-netfx|Any CPU
1713+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|Any CPU.ActiveCfg = Release|Any CPU
1714+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|Any CPU.Build.0 = Release|Any CPU
1715+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|x64.ActiveCfg = Release|Any CPU
1716+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|x64.Build.0 = Release|Any CPU
1717+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|Any CPU.ActiveCfg = Release-netcoreapp3_0|Any CPU
1718+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|Any CPU.Build.0 = Release-netcoreapp3_0|Any CPU
1719+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|x64.ActiveCfg = Release-netcoreapp3_0|Any CPU
1720+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|x64.Build.0 = Release-netcoreapp3_0|Any CPU
1721+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
1722+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
1723+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|x64.ActiveCfg = Release-netfx|Any CPU
1724+
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|x64.Build.0 = Release-netfx|Any CPU
16931725
EndGlobalSection
16941726
GlobalSection(SolutionProperties) = preSolution
16951727
HideSolutionNode = FALSE
@@ -1779,6 +1811,8 @@ Global
17791811
{56CB0850-7341-4D71-9AE4-9EFC472D93DD} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
17801812
{46CC5637-3DDF-4100-93FC-44BB87B2DB81} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
17811813
{3817A875-278C-4140-BF66-3C4A8CA55F0D} = {D3D38B03-B557-484D-8348-8BADEE4DF592}
1814+
{E2DD0721-5B0F-4606-8182-4C7EFB834518} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
1815+
{1BA5C784-52E8-4A87-8525-26B2452F2882} = {D3D38B03-B557-484D-8348-8BADEE4DF592}
17821816
EndGlobalSection
17831817
GlobalSection(ExtensibilityGlobals) = postSolution
17841818
SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D}

build/BranchInfo.props

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,12 @@
3030
<MajorVersion>1</MajorVersion>
3131
<MinorVersion>4</MinorVersion>
3232
<PatchVersion>0</PatchVersion>
33-
<PreReleaseLabel>preview3</PreReleaseLabel>
33+
<PreReleaseLabel>preview2</PreReleaseLabel>
3434
</PropertyGroup>
3535
<PropertyGroup Condition="'$(IsStableProject)' != 'true'">
3636
<MajorVersion>0</MajorVersion>
3737
<MinorVersion>16</MinorVersion>
3838
<PatchVersion>0</PatchVersion>
39-
<PreReleaseLabel>preview3</PreReleaseLabel>
39+
<PreReleaseLabel>preview2</PreReleaseLabel>
4040
</PropertyGroup>
4141
</Project>
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.ML;
4+
using Microsoft.ML.Data;
5+
using Microsoft.ML.Featurizers;
6+
7+
namespace Samples.Dynamic
8+
{
9+
public static class CategoryImputer
10+
{
11+
public static void Example()
12+
{
13+
// Create a new ML context, for ML.NET operations. It can be used for
14+
// exception tracking and logging, as well as the source of randomness.
15+
var mlContext = new MLContext();
16+
17+
// Create a small dataset as an IEnumerable.
18+
var samples = new List<InputData>()
19+
{
20+
new InputData(){ Feature1 = 1f },
21+
22+
new InputData(){ Feature1 = float.NaN },
23+
24+
new InputData(){ Feature1 = 1f },
25+
26+
new InputData(){ Feature1 = float.NaN },
27+
28+
new InputData(){ Feature1 = 9f },
29+
};
30+
31+
// Convert training data to IDataView.
32+
var dataview = mlContext.Data.LoadFromEnumerable(samples);
33+
34+
// A pipeline for filling in the missing values in the feature1 column
35+
var pipeline = mlContext.Transforms.CatagoryImputerTransformer("Feature1");
36+
37+
// The transformed data.
38+
var transformedData = pipeline.Fit(dataview).Transform(dataview);
39+
40+
// Now let's take a look at what this did. The NaN values should be filled in with the most frequent value, 1.
41+
// We can extract the newly created columns as an IEnumerable of TransformedData.
42+
var featuresColumn = mlContext.Data.CreateEnumerable<TransformedData>(
43+
transformedData, reuseRowObject: false);
44+
45+
// And we can write out a few rows
46+
Console.WriteLine($"Features column obtained post-transformation.");
47+
foreach (var featureRow in featuresColumn)
48+
Console.WriteLine(featureRow.Feature1);
49+
50+
// Expected output:
51+
// Features column obtained post-transformation.
52+
// 1
53+
// 1
54+
// 1
55+
// 1
56+
// 9
57+
}
58+
59+
private class InputData
60+
{
61+
public float Feature1;
62+
}
63+
64+
private sealed class TransformedData
65+
{
66+
public float Feature1 { get; set; }
67+
}
68+
}
69+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.ML;
4+
using Microsoft.ML.Data;
5+
using Microsoft.ML.Featurizers;
6+
7+
namespace Samples.Dynamic
8+
{
9+
public static class DateTimeTransformer
10+
{
11+
private class DateTimeInput
12+
{
13+
public long Date;
14+
}
15+
16+
public static void Example()
17+
{
18+
// Create a new ML context, for ML.NET operations. It can be used for
19+
// exception tracking and logging, as well as the source of randomness.
20+
var mlContext = new MLContext();
21+
22+
// Create a small dataset as an IEnumerable.
23+
// Future Date - 2025 June 30
24+
var samples = new[] { new DateTimeInput() { Date = 1751241600 } };
25+
26+
// Convert training data to IDataView.
27+
var dataview = mlContext.Data.LoadFromEnumerable(samples);
28+
29+
// A pipeline for splitting the time features into individual columns
30+
var pipeline = mlContext.Transforms.DateTimeTransformer("Date", "DTC");
31+
32+
// The transformed data.
33+
var transformedData = pipeline.Fit(dataview).Transform(dataview);
34+
35+
// Now let's take a look at what this did. We should have created 21 more columns with all the
36+
// DateTime information split into its own columns
37+
var featuresColumn = mlContext.Data.CreateEnumerable<TransformedData>(
38+
transformedData, reuseRowObject: false);
39+
40+
// And we can write out a few rows
41+
Console.WriteLine($"Features column obtained post-transformation.");
42+
foreach (var featureRow in featuresColumn)
43+
Console.WriteLine(featureRow.Date + ", " + featureRow.DTCYear + ", " + featureRow.DTCMonth + ", " +
44+
featureRow.DTCDay + ", " + featureRow.DTCHour + ", " + featureRow.DTCMinute + ", " +
45+
featureRow.DTCSecond + ", " + featureRow.DTCAmPm + ", " + featureRow.DTCHour12 + ", " +
46+
featureRow.DTCDayOfWeek + ", " + featureRow.DTCDayOfQuarter + ", " + featureRow.DTCDayOfYear +
47+
", " + featureRow.DTCWeekOfMonth + ", " + featureRow.DTCQuarterOfYear + ", " + featureRow.DTCHalfOfYear +
48+
", " + featureRow.DTCWeekIso + ", " + featureRow.DTCYearIso + ", " + featureRow.DTCMonthLabel + ", " +
49+
featureRow.DTCAmPmLabel + ", " + featureRow.DTCDayOfWeekLabel + ", " + featureRow.DTCHolidayName + ", " +
50+
featureRow.DTCIsPaidTimeOff);
51+
52+
// Expected output:
53+
// Features columns obtained post-transformation.
54+
// 1751241600, 2025, 6, 30, 0, 0, 0, 0, 0, 1, 91, 180, 4, 2, 1, 27, 2025, June, am, Monday, , 0
55+
}
56+
57+
// These columns start with DTC because that is the prefix we picked
58+
private sealed class TransformedData
59+
{
60+
public long Date { get; set; }
61+
public int DTCYear { get; set; }
62+
public byte DTCMonth { get; set; }
63+
public byte DTCDay { get; set; }
64+
public byte DTCHour { get; set; }
65+
public byte DTCMinute { get; set; }
66+
public byte DTCSecond { get; set; }
67+
public byte DTCAmPm { get; set; }
68+
public byte DTCHour12 { get; set; }
69+
public byte DTCDayOfWeek { get; set; }
70+
public byte DTCDayOfQuarter { get; set; }
71+
public ushort DTCDayOfYear { get; set; }
72+
public ushort DTCWeekOfMonth { get; set; }
73+
public byte DTCQuarterOfYear { get; set; }
74+
public byte DTCHalfOfYear { get; set; }
75+
public byte DTCWeekIso { get; set; }
76+
public int DTCYearIso { get; set; }
77+
public string DTCMonthLabel { get; set; }
78+
public string DTCAmPmLabel { get; set; }
79+
public string DTCDayOfWeekLabel { get; set; }
80+
public string DTCHolidayName { get; set; }
81+
public byte DTCIsPaidTimeOff { get; set; }
82+
}
83+
}
84+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.ML;
4+
using Microsoft.ML.Data;
5+
using Microsoft.ML.Featurizers;
6+
7+
namespace Samples.Dynamic
8+
{
9+
public static class DateTimeTransformerDropColumns
10+
{
11+
private class DateTimeInput
12+
{
13+
public long Date;
14+
}
15+
16+
public static void Example()
17+
{
18+
// Create a new ML context, for ML.NET operations. It can be used for
19+
// exception tracking and logging, as well as the source of randomness.
20+
var mlContext = new MLContext();
21+
22+
// Create a small dataset as an IEnumerable.
23+
// Future Date - 2025 June 30
24+
var samples = new[] { new DateTimeInput() { Date = 1751241600 } };
25+
26+
// Convert training data to IDataView.
27+
var dataview = mlContext.Data.LoadFromEnumerable(samples);
28+
29+
// A pipeline for splitting the time features into individual columns
30+
// All the columns listed here will be dropped.
31+
var pipeline = mlContext.Transforms.DateTimeTransformer("Date", "DTC", DateTimeTransformerEstimator.ColumnsProduced.IsPaidTimeOff,
32+
DateTimeTransformerEstimator.ColumnsProduced.Day, DateTimeTransformerEstimator.ColumnsProduced.QuarterOfYear,
33+
DateTimeTransformerEstimator.ColumnsProduced.AmPm, DateTimeTransformerEstimator.ColumnsProduced.HolidayName);
34+
35+
// The transformed data.
36+
var transformedData = pipeline.Fit(dataview).Transform(dataview);
37+
38+
// Now let's take a look at what this did. We should have created 16 more columns with all the
39+
// DateTime information split into its own columns
40+
var featuresColumn = mlContext.Data.CreateEnumerable<TransformedData>(
41+
transformedData, reuseRowObject: false);
42+
43+
// And we can write out a few rows
44+
Console.WriteLine($"Features column obtained post-transformation.");
45+
foreach (var featureRow in featuresColumn)
46+
Console.WriteLine(featureRow.Date + ", " + featureRow.DTCYear + ", " + featureRow.DTCMonth + ", " +
47+
featureRow.DTCHour + ", " + featureRow.DTCMinute + ", " + featureRow.DTCSecond + ", " +
48+
featureRow.DTCHour12 + ", " + featureRow.DTCDayOfWeek + ", " + featureRow.DTCDayOfQuarter + ", " +
49+
featureRow.DTCDayOfYear + ", " + featureRow.DTCWeekOfMonth + ", " + featureRow.DTCHalfOfYear +
50+
", " + featureRow.DTCWeekIso + ", " + featureRow.DTCYearIso + ", " + featureRow.DTCMonthLabel + ", " +
51+
featureRow.DTCAmPmLabel + ", " + featureRow.DTCDayOfWeekLabel);
52+
53+
// Expected output:
54+
// Features columns obtained post-transformation.
55+
// 1751241600, 2025, 6, 30, 0, 0, 0, 0, 0, 1, 91, 180, 4, 2, 1, 27, 2025, June, am, Monday
56+
}
57+
58+
// These columns start with DTC because that is the prefix we picked
59+
private sealed class TransformedData
60+
{
61+
public long Date { get; set; }
62+
public int DTCYear { get; set; }
63+
public byte DTCMonth { get; set; }
64+
public byte DTCHour { get; set; }
65+
public byte DTCMinute { get; set; }
66+
public byte DTCSecond { get; set; }
67+
public byte DTCHour12 { get; set; }
68+
public byte DTCDayOfWeek { get; set; }
69+
public byte DTCDayOfQuarter { get; set; }
70+
public ushort DTCDayOfYear { get; set; }
71+
public ushort DTCWeekOfMonth { get; set; }
72+
public byte DTCHalfOfYear { get; set; }
73+
public byte DTCWeekIso { get; set; }
74+
public int DTCYearIso { get; set; }
75+
public string DTCMonthLabel { get; set; }
76+
public string DTCAmPmLabel { get; set; }
77+
public string DTCDayOfWeekLabel { get; set; }
78+
}
79+
}
80+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.ML;
4+
using Microsoft.ML.Data;
5+
using Microsoft.ML.Featurizers;
6+
7+
namespace Samples.Dynamic
8+
{
9+
public static class RobustScaler
10+
{
11+
public static void Example()
12+
{
13+
// Create a new ML context, for ML.NET operations. It can be used for
14+
// exception tracking and logging, as well as the source of randomness.
15+
var mlContext = new MLContext();
16+
17+
// Create a small dataset as an IEnumerable.
18+
var samples = new List<InputData>()
19+
{
20+
new InputData(){ Feature1 = 1f },
21+
22+
new InputData(){ Feature1 = 3f },
23+
24+
new InputData(){ Feature1 = 5f },
25+
26+
new InputData(){ Feature1 = 7f },
27+
28+
new InputData(){ Feature1 = 9f },
29+
};
30+
31+
// Convert training data to IDataView.
32+
var dataview = mlContext.Data.LoadFromEnumerable(samples);
33+
34+
// A pipeline for centering and scaling the feature1 column
35+
var pipeline = mlContext.Transforms.RobustScalerTransformer("Feature1");
36+
37+
// The transformed data.
38+
var transformedData = pipeline.Fit(dataview).Transform(dataview);
39+
40+
// Now let's take a look at what this did. The values should be centered around 0 and scaled.
41+
// We can extract the newly created columns as an IEnumerable of TransformedData.
42+
var featuresColumn = mlContext.Data.CreateEnumerable<TransformedData>(
43+
transformedData, reuseRowObject: false);
44+
45+
// And we can write out a few rows
46+
Console.WriteLine($"Features column obtained post-transformation.");
47+
foreach (var featureRow in featuresColumn)
48+
Console.WriteLine(featureRow.Feature1);
49+
50+
// Expected output:
51+
// Features column obtained post-transformation.
52+
// -1
53+
// -.5
54+
// 0
55+
// .5
56+
// 1
57+
}
58+
59+
private class InputData
60+
{
61+
public float Feature1;
62+
}
63+
64+
private sealed class TransformedData
65+
{
66+
public float Feature1 { get; set; }
67+
}
68+
}
69+
}

0 commit comments

Comments
 (0)