Skip to content

Commit 14c7a47

Browse files
authored
Clean up of TextLoader constructor (#1784)
1 parent 2c87b19 commit 14c7a47

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+344
-407
lines changed

docs/code/MlNetCookBook.md

Lines changed: 46 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ This is how you can read this data:
9595
var mlContext = new MLContext();
9696

9797
// Create the reader: define the data columns and where to find them in the text file.
98-
var reader = mlContext.Data.TextReader(ctx => (
98+
var reader = mlContext.Data.CreateTextReader(ctx => (
9999
// A boolean column depicting the 'target label'.
100100
IsOver50K: ctx.LoadBool(0),
101101
// Three text columns.
@@ -115,9 +115,7 @@ If the schema of the data is not known at compile time, or too cumbersome, you c
115115
var mlContext = new MLContext();
116116

117117
// Create the reader: define the data columns and where to find them in the text file.
118-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
119-
{
120-
Column = new[] {
118+
var reader = mlContext.Data.CreateTextReader(new[] {
121119
// A boolean column depicting the 'label'.
122120
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
123121
// Three text columns.
@@ -126,8 +124,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
126124
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
127125
},
128126
// First line of the file is a header, not a data row.
129-
HasHeader = true
130-
});
127+
hasHeader: true
128+
);
131129

132130
// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
133131
var data = reader.Read(dataPath);
@@ -155,7 +153,7 @@ This is how you can read this data:
155153
var mlContext = new MLContext();
156154

157155
// Create the reader: define the data columns and where to find them in the text file.
158-
var reader = mlContext.Data.TextReader(ctx => (
156+
var reader = mlContext.Data.CreateTextReader(ctx => (
159157
// A boolean column depicting the 'target label'.
160158
IsOver50K: ctx.LoadBool(14),
161159
// Three text columns.
@@ -175,19 +173,17 @@ The code is very similar using the dynamic API:
175173
var mlContext = new MLContext();
176174

177175
// Create the reader: define the data columns and where to find them in the text file.
178-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
179-
{
180-
Column = new[] {
176+
var reader = mlContext.Data.CreateTextReader(new[] {
181177
// A boolean column depicting the 'label'.
182-
new TextLoader.Column("IsOver50k", DataKind.BL, 0),
178+
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
183179
// Three text columns.
184180
new TextLoader.Column("Workclass", DataKind.TX, 1),
185181
new TextLoader.Column("Education", DataKind.TX, 2),
186182
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
187183
},
188184
// First line of the file is a header, not a data row.
189-
HasHeader = true
190-
});
185+
hasHeader: true
186+
);
191187

192188
var data = reader.Read(exampleFile1, exampleFile2);
193189
```
@@ -211,7 +207,7 @@ Reading this file using `TextLoader`:
211207
var mlContext = new MLContext();
212208

213209
// Create the reader: define the data columns and where to find them in the text file.
214-
var reader = mlContext.Data.TextReader(ctx => (
210+
var reader = mlContext.Data.CreateTextReader(ctx => (
215211
// We read the first 11 values as a single float vector.
216212
FeatureVector: ctx.LoadFloat(0, 10),
217213
// Separately, read the target variable.
@@ -233,7 +229,7 @@ If the schema of the data is not known at compile time, or too cumbersome, you c
233229
var mlContext = new MLContext();
234230

235231
// Create the reader: define the data columns and where to find them in the text file.
236-
var reader = mlContext.Data.TextReader(new[] {
232+
var reader = mlContext.Data.CreateTextReader(new[] {
237233
// We read the first 10 values as a single float vector.
238234
new TextLoader.Column("FeatureVector", DataKind.R4, new[] {new TextLoader.Range(0, 9)}),
239235
// Separately, read the target variable.
@@ -302,7 +298,7 @@ Label Workclass education marital-status
302298
var mlContext = new MLContext();
303299

304300
// Create the reader: define the data columns and where to find them in the text file.
305-
var reader = mlContext.Data.TextReader(ctx => (
301+
var reader = mlContext.Data.CreateTextReader(ctx => (
306302
// A boolean column depicting the 'target label'.
307303
IsOver50K: ctx.LoadBool(0),
308304
// Three text columns.
@@ -365,19 +361,17 @@ You can also use the dynamic API to create the equivalent of the previous pipeli
365361
var mlContext = new MLContext();
366362

367363
// Create the reader: define the data columns and where to find them in the text file.
368-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
369-
{
370-
Column = new[] {
364+
var reader = mlContext.Data.CreateTextReader(new[] {
371365
// A boolean column depicting the 'label'.
372-
new TextLoader.Column("IsOver50k", DataKind.BL, 0),
366+
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
373367
// Three text columns.
374368
new TextLoader.Column("Workclass", DataKind.TX, 1),
375369
new TextLoader.Column("Education", DataKind.TX, 2),
376370
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
377371
},
378372
// First line of the file is a header, not a data row.
379-
HasHeader = true
380-
});
373+
hasHeader: true
374+
);
381375

382376
// Start creating our processing pipeline. For now, let's just concatenate all the text columns
383377
// together into one.
@@ -428,7 +422,7 @@ var mlContext = new MLContext();
428422

429423
// Step one: read the data as an IDataView.
430424
// First, we define the reader: specify the data columns and where to find them in the text file.
431-
var reader = mlContext.Data.TextReader(ctx => (
425+
var reader = mlContext.Data.CreateTextReader(ctx => (
432426
// We read the first 11 values as a single float vector.
433427
FeatureVector: ctx.LoadFloat(0, 10),
434428
// Separately, read the target variable.
@@ -482,20 +476,18 @@ var mlContext = new MLContext();
482476

483477
// Step one: read the data as an IDataView.
484478
// First, we define the reader: specify the data columns and where to find them in the text file.
485-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
486-
{
487-
Column = new[] {
479+
var reader = mlContext.Data.CreateTextReader(new[] {
488480
// We read the first 11 values as a single float vector.
489481
new TextLoader.Column("FeatureVector", DataKind.R4, 0, 10),
490482

491483
// Separately, read the target variable.
492484
new TextLoader.Column("Target", DataKind.R4, 11),
493485
},
494486
// First line of the file is a header, not a data row.
495-
HasHeader = true,
487+
hasHeader: true,
496488
// Default separator is tab, but we need a semicolon.
497-
Separator = ";"
498-
});
489+
separatorChar: ';'
490+
);
499491

500492
// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
501493
var trainData = reader.Read(trainDataPath);
@@ -603,7 +595,7 @@ var mlContext = new MLContext();
603595

604596
// Step one: read the data as an IDataView.
605597
// First, we define the reader: specify the data columns and where to find them in the text file.
606-
var reader = mlContext.Data.TextReader(ctx => (
598+
var reader = mlContext.Data.CreateTextReader(ctx => (
607599
// The four features of the Iris dataset.
608600
SepalLength: ctx.LoadFloat(0),
609601
SepalWidth: ctx.LoadFloat(1),
@@ -653,9 +645,7 @@ var mlContext = new MLContext();
653645

654646
// Step one: read the data as an IDataView.
655647
// First, we define the reader: specify the data columns and where to find them in the text file.
656-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
657-
{
658-
Column = new[] {
648+
var reader = mlContext.Data.CreateTextReader(new[] {
659649
new TextLoader.Column("SepalLength", DataKind.R4, 0),
660650
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
661651
new TextLoader.Column("PetalLength", DataKind.R4, 2),
@@ -664,8 +654,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
664654
new TextLoader.Column("Label", DataKind.TX, 4),
665655
},
666656
// Default separator is tab, but the dataset has comma.
667-
Separator = ","
668-
});
657+
separatorChar: ','
658+
);
669659

670660
// Retrieve the training data.
671661
var trainData = reader.Read(irisDataPath);
@@ -821,7 +811,7 @@ var mlContext = new MLContext();
821811

822812
// Step one: read the data as an IDataView.
823813
// First, we define the reader: specify the data columns and where to find them in the text file.
824-
var reader = mlContext.Data.TextReader(ctx => (
814+
var reader = mlContext.Data.CreateTextReader(ctx => (
825815
// The four features of the Iris dataset.
826816
SepalLength: ctx.LoadFloat(0),
827817
SepalWidth: ctx.LoadFloat(1),
@@ -917,7 +907,7 @@ Here's a snippet of code that demonstrates normalization in learning pipelines.
917907
var mlContext = new MLContext();
918908

919909
// Define the reader: specify the data columns and where to find them in the text file.
920-
var reader = mlContext.Data.TextReader(ctx => (
910+
var reader = mlContext.Data.CreateTextReader(ctx => (
921911
// The four features of the Iris dataset will be grouped together as one Features column.
922912
Features: ctx.LoadFloat(0, 3),
923913
// Label: kind of iris.
@@ -952,17 +942,15 @@ You can achieve the same results using the dynamic API.
952942
var mlContext = new MLContext();
953943

954944
// Define the reader: specify the data columns and where to find them in the text file.
955-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
956-
{
957-
Column = new[] {
945+
var reader = mlContext.Data.CreateTextReader(new[] {
958946
// The four features of the Iris dataset will be grouped together as one Features column.
959947
new TextLoader.Column("Features", DataKind.R4, 0, 3),
960948
// Label: kind of iris.
961949
new TextLoader.Column("Label", DataKind.TX, 4),
962950
},
963951
// Default separator is tab, but the dataset has comma.
964-
Separator = ","
965-
});
952+
separatorChar: ','
953+
);
966954

967955
// Read the training data.
968956
var trainData = reader.Read(dataPath);
@@ -1011,7 +999,7 @@ Label Workclass education marital-status occupation relationship ethnicity sex n
1011999
var mlContext = new MLContext();
10121000

10131001
// Define the reader: specify the data columns and where to find them in the text file.
1014-
var reader = mlContext.Data.TextReader(ctx => (
1002+
var reader = mlContext.Data.CreateTextReader(ctx => (
10151003
Label: ctx.LoadBool(0),
10161004
// We will load all the categorical features into one vector column of size 8.
10171005
CategoricalFeatures: ctx.LoadText(1, 8),
@@ -1073,9 +1061,8 @@ You can achieve the same results using the dynamic API.
10731061
var mlContext = new MLContext();
10741062

10751063
// Define the reader: specify the data columns and where to find them in the text file.
1076-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
1077-
{
1078-
Column = new[] {
1064+
var reader = mlContext.Data.CreateTextReader(new[]
1065+
{
10791066
new TextLoader.Column("Label", DataKind.BL, 0),
10801067
// We will load all the categorical features into one vector column of size 8.
10811068
new TextLoader.Column("CategoricalFeatures", DataKind.TX, 1, 8),
@@ -1084,8 +1071,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
10841071
// Let's also separately load the 'Workclass' column.
10851072
new TextLoader.Column("Workclass", DataKind.TX, 1),
10861073
},
1087-
HasHeader = true
1088-
});
1074+
hasHeader: true
1075+
);
10891076

10901077
// Read the data.
10911078
var data = reader.Read(dataPath);
@@ -1157,7 +1144,7 @@ Sentiment SentimentText
11571144
var mlContext = new MLContext();
11581145

11591146
// Define the reader: specify the data columns and where to find them in the text file.
1160-
var reader = mlContext.Data.TextReader(ctx => (
1147+
var reader = mlContext.Data.CreateTextReader(ctx => (
11611148
IsToxic: ctx.LoadBool(0),
11621149
Message: ctx.LoadText(1)
11631150
), hasHeader: true);
@@ -1207,14 +1194,13 @@ You can achieve the same results using the dynamic API.
12071194
var mlContext = new MLContext();
12081195

12091196
// Define the reader: specify the data columns and where to find them in the text file.
1210-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
1211-
{
1212-
Column = new[] {
1197+
var reader = mlContext.Data.CreateTextReader(new[]
1198+
{
12131199
new TextLoader.Column("IsToxic", DataKind.BL, 0),
12141200
new TextLoader.Column("Message", DataKind.TX, 1),
12151201
},
1216-
HasHeader = true
1217-
});
1202+
hasHeader: true
1203+
);
12181204

12191205
// Read the data.
12201206
var data = reader.Read(dataPath);
@@ -1274,7 +1260,7 @@ var mlContext = new MLContext();
12741260

12751261
// Step one: read the data as an IDataView.
12761262
// First, we define the reader: specify the data columns and where to find them in the text file.
1277-
var reader = mlContext.Data.TextReader(ctx => (
1263+
var reader = mlContext.Data.CreateTextReader(ctx => (
12781264
// The four features of the Iris dataset.
12791265
SepalLength: ctx.LoadFloat(0),
12801266
SepalWidth: ctx.LoadFloat(1),
@@ -1330,9 +1316,8 @@ var mlContext = new MLContext();
13301316

13311317
// Step one: read the data as an IDataView.
13321318
// First, we define the reader: specify the data columns and where to find them in the text file.
1333-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
1334-
{
1335-
Column = new[] {
1319+
var reader = mlContext.Data.CreateTextReader(new[]
1320+
{
13361321
// We read the first 11 values as a single float vector.
13371322
new TextLoader.Column("SepalLength", DataKind.R4, 0),
13381323
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
@@ -1342,8 +1327,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
13421327
new TextLoader.Column("Label", DataKind.TX, 4),
13431328
},
13441329
// Default separator is tab, but the dataset has comma.
1345-
Separator = ","
1346-
});
1330+
separatorChar: ','
1331+
);
13471332

13481333
// Read the data.
13491334
var data = reader.Read(dataPath);
@@ -1395,7 +1380,7 @@ var mlContext = new MLContext();
13951380

13961381
// Read the data as an IDataView.
13971382
// First, we define the reader: specify the data columns and where to find them in the text file.
1398-
var reader = mlContext.Data.TextReader(ctx => (
1383+
var reader = mlContext.Data.CreateTextReader(ctx => (
13991384
// The four features of the Iris dataset.
14001385
SepalLength: ctx.LoadFloat(0),
14011386
SepalWidth: ctx.LoadFloat(1),

docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,8 @@ public static void FeatureContributionCalculationTransform_Regression()
1919

2020
// Step 1: Read the data as an IDataView.
2121
// First, we define the reader: specify the data columns and where to find them in the text file.
22-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments()
23-
{
24-
Separator = "tab",
25-
HasHeader = true,
26-
Column = new[]
22+
var reader = mlContext.Data.CreateTextReader(
23+
columns: new[]
2724
{
2825
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
2926
new TextLoader.Column("CrimesPerCapita", DataKind.R4, 1),
@@ -37,8 +34,9 @@ public static void FeatureContributionCalculationTransform_Regression()
3734
new TextLoader.Column("HighwayDistance", DataKind.R4, 9),
3835
new TextLoader.Column("TaxRate", DataKind.R4, 10),
3936
new TextLoader.Column("TeacherRatio", DataKind.R4, 11),
40-
}
41-
});
37+
},
38+
hasHeader: true
39+
);
4240

4341
// Read the data
4442
var data = reader.Read(dataFile);

docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,14 @@ public static void FeatureSelectionTransform()
3131

3232
// First, we define the reader: specify the data columns and where to find them in the text file. Notice that we combine entries from
3333
// all the feature columns into entries of a vector of a single column named "Features".
34-
var reader = ml.Data.TextReader(new TextLoader.Arguments()
35-
{
36-
Separator = "tab",
37-
HasHeader = true,
38-
Column = new[]
34+
var reader = ml.Data.CreateTextReader(
35+
columns: new[]
3936
{
4037
new TextLoader.Column("Label", DataKind.BL, 0),
4138
new TextLoader.Column("Features", DataKind.Num, new [] { new TextLoader.Range(1, 9) })
42-
}
43-
});
39+
},
40+
hasHeader: true
41+
);
4442

4543
// Then, we use the reader to read the data as an IDataView.
4644
var data = reader.Read(dataFilePath);

docs/samples/Microsoft.ML.Samples/Dynamic/GeneralizedAdditiveModels.cs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,8 @@ public static void RunExample()
1919

2020
// Step 1: Read the data as an IDataView.
2121
// First, we define the reader: specify the data columns and where to find them in the text file.
22-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments()
23-
{
24-
Separator = "tab",
25-
HasHeader = true,
26-
Column = new[]
22+
var reader = mlContext.Data.CreateTextReader(
23+
columns: new[]
2724
{
2825
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
2926
new TextLoader.Column("CrimesPerCapita", DataKind.R4, 1),
@@ -37,8 +34,9 @@ public static void RunExample()
3734
new TextLoader.Column("HighwayDistance", DataKind.R4, 9),
3835
new TextLoader.Column("TaxRate", DataKind.R4, 10),
3936
new TextLoader.Column("TeacherRatio", DataKind.R4, 11),
40-
}
41-
});
37+
},
38+
hasHeader: true
39+
);
4240

4341
// Read the data
4442
var data = reader.Read(dataFile);

0 commit comments

Comments
 (0)