Skip to content

Commit 882a6d9

Browse files
author
Rayan-Krishnan
authored
Reformatting Conversion, FeatureSelection and Image Analytics of Transform to Width 85 (#3943)
* samples/dynamic/transforms/conversions formatted to 85 char * samples/dynamic/transforms/featureselection formatted to 85 char * ImageAnalytics * ImageAnalytics * small corrections to formatting * minor tab and empty line changes
1 parent 1288d1d commit 882a6d9

23 files changed

+570
-311
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,22 @@ public static void Example()
2020
var data = mlContext.Data.LoadFromEnumerable(rawData);
2121

2222
// Construct the pipeline.
23-
var pipeline = mlContext.Transforms.Conversion.ConvertType("SurvivedInt32", "Survived", DataKind.Int32);
23+
var pipeline = mlContext.Transforms.Conversion.ConvertType(
24+
"SurvivedInt32", "Survived", DataKind.Int32);
2425

2526
// Let's train our pipeline, and then apply it to the same data.
2627
var transformer = pipeline.Fit(data);
2728
var transformedData = transformer.Transform(data);
2829

29-
// Display original column 'Survived' (boolean) and converted column 'SurvivedInt32' (Int32)
30-
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, true);
30+
// Display original column 'Survived' (boolean) and converted column
31+
// SurvivedInt32' (Int32)
32+
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(
33+
transformedData, true);
34+
3135
foreach (var item in convertedData)
3236
{
33-
Console.WriteLine("A:{0,-10} Aconv:{1}", item.Survived, item.SurvivedInt32);
37+
Console.WriteLine("A:{0,-10} Aconv:{1}", item.Survived,
38+
item.SurvivedInt32);
3439
}
3540

3641
// Output

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,33 @@
44

55
namespace Samples.Dynamic
66
{
7-
// This example illustrates how to convert multiple columns of different types to one type, in this case System.Single.
8-
// This is often a useful data transformation before concatenating the features together and passing them to a particular estimator.
7+
// This example illustrates how to convert multiple columns of different types
8+
// to one type, in this case System.Single.
9+
// This is often a useful data transformation before concatenating the features
10+
// together and passing them to a particular estimator.
911
public static class ConvertTypeMultiColumn
1012
{
1113
public static void Example()
1214
{
13-
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
14-
// as well as the source of randomness.
15+
// Create a new ML context, for ML.NET operations. It can be used for
16+
// exception tracking and logging, as well as the source of randomness.
1517
var mlContext = new MLContext(seed: 1);
1618

1719
var rawData = new[] {
18-
new InputData() { Feature1 = true, Feature2 = "0.4", Feature3 = DateTime.Now, Feature4 = 0.145},
19-
new InputData() { Feature1 = false, Feature2 = "0.5", Feature3 = DateTime.Today, Feature4 = 3.14},
20-
new InputData() { Feature1 = false, Feature2 = "14", Feature3 = DateTime.Today, Feature4 = 0.2046},
21-
new InputData() { Feature1 = false, Feature2 = "23", Feature3 = DateTime.Now, Feature4 = 0.1206},
22-
new InputData() { Feature1 = true, Feature2 = "8904", Feature3 = DateTime.UtcNow, Feature4 = 8.09},
20+
new InputData() { Feature1 = true, Feature2 = "0.4",
21+
Feature3 = DateTime.Now, Feature4 = 0.145},
22+
23+
new InputData() { Feature1 = false, Feature2 = "0.5",
24+
Feature3 = DateTime.Today, Feature4 = 3.14},
25+
26+
new InputData() { Feature1 = false, Feature2 = "14",
27+
Feature3 = DateTime.Today, Feature4 = 0.2046},
28+
29+
new InputData() { Feature1 = false, Feature2 = "23",
30+
Feature3 = DateTime.Now, Feature4 = 0.1206},
31+
32+
new InputData() { Feature1 = true, Feature2 = "8904",
33+
Feature3 = DateTime.UtcNow, Feature4 = 8.09},
2334
};
2435

2536
// Convert the data to an IDataView.
@@ -37,17 +48,20 @@ public static void Example()
3748

3849
// Let's fit our pipeline to the data.
3950
var transformer = pipeline.Fit(data);
40-
// Transforming the same data. This will add the 4 columns defined in the pipeline, containing the converted
51+
// Transforming the same data. This will add the 4 columns defined in
52+
// the pipeline, containing the converted
4153
// values of the initial columns.
4254
var transformedData = transformer.Transform(data);
4355

4456
// Shape the transformed data as a strongly typed IEnumerable.
45-
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, true);
57+
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(
58+
transformedData, true);
4659

4760
// Printing the results.
4861
Console.WriteLine("Converted1\t Converted2\t Converted3\t Converted4");
4962
foreach (var item in convertedData)
50-
Console.WriteLine($"\t{item.Converted1}\t {item.Converted2}\t\t {item.Converted3}\t {item.Converted4}");
63+
Console.WriteLine($"\t{item.Converted1}\t {item.Converted2}\t\t " +
64+
$"{item.Converted3}\t {item.Converted4}");
5165

5266
// Transformed data.
5367
//

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/Hash.cs

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ public static class Hash
99
{
1010
public static void Example()
1111
{
12-
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
13-
// as well as the source of randomness.
12+
// Create a new ML context, for ML.NET operations. It can be used for
13+
// exception tracking and logging, as well as the source of randomness.
1414
var mlContext = new MLContext(seed: 1);
1515

1616
// Get a small dataset as an IEnumerable.
@@ -24,30 +24,40 @@ public static void Example()
2424

2525
var data = mlContext.Data.LoadFromEnumerable(rawData);
2626

27-
// Construct the pipeline that would hash the two columns and store the results in new columns.
28-
// The first transform hashes the string column and the second transform hashes the integer column.
27+
// Construct the pipeline that would hash the two columns and store the
28+
// results in new columns. The first transform hashes the string column
29+
// and the second transform hashes the integer column.
2930
//
30-
// Hashing is not a reversible operation, so there is no way to retrive the original value from the hashed value.
31-
// Sometimes, for debugging, or model explainability, users will need to know what values in the original columns generated
32-
// the values in the hashed columns, since the algorithms will mostly use the hashed values for further computations.
33-
// The Hash method will preserve the mapping from the original values to the hashed values in the Annotations of the
34-
// newly created column (column populated with the hashed values).
35-
//
36-
// Setting the maximumNumberOfInverts parameters to -1 will preserve the full map.
37-
// If that parameter is left to the default 0 value, the mapping is not preserved.
38-
var pipeline = mlContext.Transforms.Conversion.Hash("CategoryHashed", "Category", numberOfBits: 16, maximumNumberOfInverts: -1)
39-
.Append(mlContext.Transforms.Conversion.Hash("AgeHashed", "Age", numberOfBits: 8));
31+
// Hashing is not a reversible operation, so there is no way to retrive
32+
// the original value from the hashed value. Sometimes, for debugging,
33+
// or model explainability, users will need to know what values in the
34+
// original columns generated the values in the hashed columns, since
35+
// the algorithms will mostly use the hashed values for further
36+
// computations. The Hash method will preserve the mapping from the
37+
// original values to the hashed values in the Annotations of the newly
38+
// created column (column populated with the hashed values).
39+
//
40+
// Setting the maximumNumberOfInverts parameters to -1 will preserve the
41+
// full map. If that parameter is left to the default 0 value, the
42+
// mapping is not preserved.
43+
var pipeline = mlContext.Transforms.Conversion.Hash("CategoryHashed",
44+
"Category", numberOfBits: 16, maximumNumberOfInverts: -1)
45+
.Append(mlContext.Transforms.Conversion.Hash("AgeHashed", "Age",
46+
numberOfBits: 8));
4047

4148
// Let's fit our pipeline, and then apply it to the same data.
4249
var transformer = pipeline.Fit(data);
4350
var transformedData = transformer.Transform(data);
4451

45-
// Convert the post transformation from the IDataView format to an IEnumerable<TransformedData> for easy consumption.
46-
var convertedData = mlContext.Data.CreateEnumerable<TransformedDataPoint>(transformedData, true);
52+
// Convert the post transformation from the IDataView format to an
53+
// IEnumerable <TransformedData> for easy consumption.
54+
var convertedData = mlContext.Data.CreateEnumerable<
55+
TransformedDataPoint>(transformedData, true);
4756

4857
Console.WriteLine("Category CategoryHashed\t Age\t AgeHashed");
4958
foreach (var item in convertedData)
50-
Console.WriteLine($"{item.Category}\t {item.CategoryHashed}\t\t {item.Age}\t {item.AgeHashed}");
59+
Console.WriteLine($"{item.Category}\t {item.CategoryHashed}\t\t " +
60+
$"{item.Age}\t {item.AgeHashed}");
5161

5262
// Expected data after the transformation.
5363
//
@@ -58,20 +68,24 @@ public static void Example()
5868
// MLB 36206 18 127
5969
// MLS 6013 14 62
6070

61-
// For the Category column, where we set the maximumNumberOfInverts parameter, the names of the original categories,
62-
// and their correspondance with the generated hash values is preserved in the Annotations in the format of indices and values.
63-
// the indices array will have the hashed values, and the corresponding element, position-wise, in the values array will
64-
// contain the original value.
71+
// For the Category column, where we set the maximumNumberOfInverts
72+
// parameter, the names of the original categories, and their
73+
// correspondance with the generated hash values is preserved in the
74+
// Annotations in the format of indices and values.the indices array
75+
// will have the hashed values, and the corresponding element,
76+
// position -wise, in the values array will contain the original value.
6577
//
6678
// See below for an example on how to retrieve the mapping.
6779
var slotNames = new VBuffer<ReadOnlyMemory<char>>();
68-
transformedData.Schema["CategoryHashed"].Annotations.GetValue("KeyValues", ref slotNames);
80+
transformedData.Schema["CategoryHashed"].Annotations.GetValue(
81+
"KeyValues", ref slotNames);
6982

7083
var indices = slotNames.GetIndices();
7184
var categoryNames = slotNames.GetValues();
7285

7386
for (int i = 0; i < indices.Length; i++)
74-
Console.WriteLine($"The original value of the {indices[i]} category is {categoryNames[i]}");
87+
Console.WriteLine($"The original value of the {indices[i]} " +
88+
$"category is {categoryNames[i]}");
7589

7690
// Output Data
7791
//

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ public class KeyToValueToKey
1111
{
1212
public static void Example()
1313
{
14-
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
15-
// as well as the source of randomness.
14+
// Create a new ML context, for ML.NET operations. It can be used for
15+
// exception tracking and logging, as well as the source of randomness.
1616
var mlContext = new MLContext();
1717

1818
// Get a small dataset as an IEnumerable.
@@ -27,25 +27,40 @@ public static void Example()
2727

2828
// A pipeline to convert the terms of the 'Review' column in
2929
// making use of default settings.
30-
var defaultPipeline = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedText", nameof(DataPoint.Review))
31-
.Append(mlContext.Transforms.Conversion.MapValueToKey(nameof(TransformedData.Keys), "TokenizedText"));
30+
var defaultPipeline = mlContext.Transforms.Text.TokenizeIntoWords(
31+
"TokenizedText", nameof(DataPoint.Review)).Append(mlContext
32+
.Transforms.Conversion.MapValueToKey(nameof(TransformedData.Keys),
33+
"TokenizedText"));
3234

33-
// Another pipeline, that customizes the advanced settings of the ValueToKeyMappingEstimator.
34-
// We can change the maximumNumberOfKeys to limit how many keys will get generated out of the set of words,
35-
// and condition the order in which they get evaluated by changing keyOrdinality from the default ByOccurence (order in which they get encountered)
36-
// to value/alphabetically.
37-
var customizedPipeline = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedText", nameof(DataPoint.Review))
38-
.Append(mlContext.Transforms.Conversion.MapValueToKey(nameof(TransformedData.Keys), "TokenizedText", maximumNumberOfKeys: 10,
39-
keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue));
35+
// Another pipeline, that customizes the advanced settings of the
36+
// ValueToKeyMappingEstimator. We can change the maximumNumberOfKeys to
37+
// limit how many keys will get generated out of the set of words, and
38+
// condition the order in which they get evaluated by changing
39+
// keyOrdinality from the default ByOccurence (order in which they get
40+
// encountered) to value/alphabetically.
41+
var customizedPipeline = mlContext.Transforms.Text.TokenizeIntoWords(
42+
"TokenizedText", nameof(DataPoint.Review)).Append(mlContext
43+
.Transforms.Conversion.MapValueToKey(nameof(TransformedData.Keys),
44+
"TokenizedText", maximumNumberOfKeys: 10, keyOrdinality:
45+
ValueToKeyMappingEstimator.KeyOrdinality.ByValue));
4046

4147
// The transformed data.
42-
var transformedDataDefault = defaultPipeline.Fit(trainData).Transform(trainData);
43-
var transformedDataCustomized = customizedPipeline.Fit(trainData).Transform(trainData);
48+
var transformedDataDefault = defaultPipeline.Fit(trainData).Transform(
49+
trainData);
50+
51+
var transformedDataCustomized = customizedPipeline.Fit(trainData)
52+
.Transform(trainData);
4453

4554
// Getting the resulting data as an IEnumerable.
4655
// This will contain the newly created columns.
47-
IEnumerable<TransformedData> defaultData = mlContext.Data.CreateEnumerable<TransformedData>(transformedDataDefault, reuseRowObject: false);
48-
IEnumerable<TransformedData> customizedData = mlContext.Data.CreateEnumerable<TransformedData>(transformedDataCustomized, reuseRowObject: false);
56+
IEnumerable<TransformedData> defaultData = mlContext.Data.
57+
CreateEnumerable<TransformedData>(transformedDataDefault,
58+
reuseRowObject: false);
59+
60+
IEnumerable<TransformedData> customizedData = mlContext.Data.
61+
CreateEnumerable<TransformedData>(transformedDataCustomized,
62+
reuseRowObject: false);
63+
4964
Console.WriteLine($"Keys");
5065
foreach (var dataRow in defaultData)
5166
Console.WriteLine($"{string.Join(',', dataRow.Keys)}");
@@ -65,13 +80,17 @@ public static void Example()
6580
// 8,2,9,7,6,4
6681
// 3,10,0,0,0
6782
// 3,10,0,0,0,8
68-
// Retrieve the original values, by appending the KeyToValue etimator to the existing pipelines
69-
// to convert the keys back to the strings.
70-
var pipeline = defaultPipeline.Append(mlContext.Transforms.Conversion.MapKeyToValue(nameof(TransformedData.Keys)));
83+
// Retrieve the original values, by appending the KeyToValue etimator to
84+
// the existing pipelines to convert the keys back to the strings.
85+
var pipeline = defaultPipeline.Append(mlContext.Transforms.Conversion
86+
.MapKeyToValue(nameof(TransformedData.Keys)));
87+
7188
transformedDataDefault = pipeline.Fit(trainData).Transform(trainData);
7289

7390
// Preview of the DefaultColumnName column obtained.
74-
var originalColumnBack = transformedDataDefault.GetColumn<VBuffer<ReadOnlyMemory<char>>>(transformedDataDefault.Schema[nameof(TransformedData.Keys)]);
91+
var originalColumnBack = transformedDataDefault.GetColumn<VBuffer<
92+
ReadOnlyMemory<char>>>(transformedDataDefault.Schema[nameof(
93+
TransformedData.Keys)]);
7594

7695
foreach (var row in originalColumnBack)
7796
{

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToBinaryVector.cs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,16 @@ namespace Samples.Dynamic
77
{
88
class MapKeyToBinaryVector
99
{
10-
/// This example demonstrates the use of MapKeyToVector by mapping keys to floats[] of 0 and 1, representing the number in binary format.
11-
/// Because the ML.NET KeyType maps the missing value to zero, counting starts at 1, so the uint values
12-
/// converted to KeyTypes will appear skewed by one.
10+
/// This example demonstrates the use of MapKeyToVector by mapping keys to
11+
/// floats[] of 0 and 1, representing the number in binary format.
12+
/// Because the ML.NET KeyType maps the missing value to zero, counting
13+
/// starts at 1, so the uint values converted to KeyTypes will appear
14+
/// skewed by one.
1315
/// See https://github.com/dotnet/machinelearning/blob/master/docs/code/IDataViewTypeSystem.md#key-types
1416
public static void Example()
1517
{
16-
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
17-
// as well as the source of randomness.
18+
// Create a new ML context, for ML.NET operations. It can be used for
19+
// exception tracking and logging, as well as the source of randomness.
1820
var mlContext = new MLContext();
1921

2022
// Get a small dataset as an IEnumerable.
@@ -30,18 +32,21 @@ public static void Example()
3032
var data = mlContext.Data.LoadFromEnumerable(rawData);
3133

3234
// Constructs the ML.net pipeline
33-
var pipeline = mlContext.Transforms.Conversion.MapKeyToBinaryVector("TimeframeVector", "Timeframe");
35+
var pipeline = mlContext.Transforms.Conversion.MapKeyToBinaryVector(
36+
"TimeframeVector", "Timeframe");
3437

3538
// Fits the pipeline to the data.
3639
IDataView transformedData = pipeline.Fit(data).Transform(data);
3740

3841
// Getting the resulting data as an IEnumerable.
3942
// This will contain the newly created columns.
40-
IEnumerable<TransformedData> features = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false);
43+
IEnumerable<TransformedData> features = mlContext.Data.CreateEnumerable<
44+
TransformedData>(transformedData, reuseRowObject: false);
4145

4246
Console.WriteLine($" Timeframe TimeframeVector");
4347
foreach (var featureRow in features)
44-
Console.WriteLine($"{featureRow.Timeframe}\t\t\t{string.Join(',', featureRow.TimeframeVector)}");
48+
Console.WriteLine($"{featureRow.Timeframe}\t\t\t" +
49+
$"{string.Join(',', featureRow.TimeframeVector)}");
4550

4651
// Timeframe TimeframeVector
4752
// 10 0,1,0,0,1 //binary representation of 9, the original value

0 commit comments

Comments
 (0)