Skip to content

Commit 1e22d1d

Browse files
authored
Removed extraSettings param in unit test datasets file (#4808)
* Update Datasets.cs * Update Datasets.cs
1 parent c9d950e commit 1e22d1d

File tree

1 file changed

+34
-55
lines changed

1 file changed

+34
-55
lines changed

test/Microsoft.ML.TestFrameworkCommon/Datasets.cs

Lines changed: 34 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ public class TestDataset
2121
// REVIEW: Replace these with appropriate SubComponents!
2222
public string settings;
2323
public string testSettings;
24-
public string extraSettings;
2524
// REVIEW: Remove the three above setting strings once conversion work is complete.
2625
public string loaderSettings;
2726
public string[] mamlExtraSettings;
@@ -38,7 +37,6 @@ public TestDataset Clone()
3837
labelFilename = labelFilename,
3938
settings = settings,
4039
testSettings = testSettings,
41-
extraSettings = extraSettings,
4240
loaderSettings = loaderSettings,
4341
mamlExtraSettings = mamlExtraSettings
4442
};
@@ -80,8 +78,7 @@ public static class TestDatasets
8078
{
8179
name = "breast-cancer",
8280
trainFilename = "breast-cancer.txt",
83-
testFilename = "breast-cancer.txt",
84-
extraSettings = "cacheinst- inst=Text{label=0 attr=1-9}",
81+
testFilename = "breast-cancer.txt"
8582
};
8683

8784
/// <summary>
@@ -94,8 +91,7 @@ public static class TestDatasets
9491
testFilename = "breast-cancer.txt",
9592
// Using "col=Features:1-5,6,7-9" improves code coverage. Same with "col=Attr:TX:6".
9693
loaderSettings = "loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9}",
97-
mamlExtraSettings = new[] { "cache-" },
98-
extraSettings = "/cacheinst- /inst Pipe{loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} lab=Label feat=Features}"
94+
mamlExtraSettings = new[] { "cache-" }
9995
};
10096

10197
/// <summary>
@@ -108,7 +104,7 @@ public static class TestDatasets
108104
testFilename = "breast-cancer.txt",
109105
// Note that More and More_Cleansed are not really needed (duplicate info), but improve code coverage.
110106
loaderSettings = "loader=Text{col=Label:0 col=Good:1-5,7-9 col=Mixed:6 col=More:4-6}",
111-
mamlExtraSettings = new[] { "cache-", "xf=NAHandle{col=Fixed:Mixed col=More}", "xf=Concat{col=Features:Good,Fixed,More}" },
107+
mamlExtraSettings = new[] { "cache-", "xf=NAHandle{col=Fixed:Mixed col=More}", "xf=Concat{col=Features:Good,Fixed,More}" }
112108
};
113109

114110
/// <summary>
@@ -121,7 +117,7 @@ public static class TestDatasets
121117
testFilename = "breast-cancer.txt",
122118
// Note that More and More_Cleansed are not really needed (duplicate info), but improve code coverage.
123119
loaderSettings = "loader=Text{col=Label:0 col=Good:1-5,7-9 col=Mixed:6 col=More:4-6}",
124-
mamlExtraSettings = new[] { "cache-", "xf=MissingFilter{col=Mixed col=More}", "xf=Concat{col=Features:Good,Mixed,More}" },
120+
mamlExtraSettings = new[] { "cache-", "xf=MissingFilter{col=Mixed col=More}", "xf=Concat{col=Features:Good,Mixed,More}" }
125121
};
126122

127123
public static TestDataset breastCancerOneClass = new TestDataset
@@ -146,7 +142,7 @@ public static class TestDatasets
146142
public static TestDataset Digits = new TestDataset
147143
{
148144
name = "Digits",
149-
trainFilename = @"external/digits.csv",
145+
trainFilename = @"external/digits.csv"
150146
};
151147

152148
public static TestDataset vw = new TestDataset
@@ -239,7 +235,7 @@ public static class TestDatasets
239235
trainFilename = "MSM-sparse-sample-train.txt",
240236
testFilename = "MSM-sparse-sample-test.txt",
241237
loaderSettings = "loader=Text{col=Name:TX:0 col=Label:Num:1 col=Features:Num:~}",
242-
mamlExtraSettings = new[] { "xf=Expr{col=Name expr={x=>right(x, 1)}}" },
238+
mamlExtraSettings = new[] { "xf=Expr{col=Name expr={x=>right(x, 1)}}" }
243239
};
244240

245241
public static TestDataset msmNamesHeader = new TestDataset
@@ -269,8 +265,7 @@ public static class TestDatasets
269265
{
270266
name = "breast-cancer-bing",
271267
trainFilename = "breast-cancer-bing.txt",
272-
testFilename = "breast-cancer-bing.txt",
273-
extraSettings = "/inst ExtractInstances{}"
268+
testFilename = "breast-cancer-bing.txt"
274269
};
275270

276271
public static TestDataset adult = new TestDataset
@@ -281,8 +276,7 @@ public static class TestDatasets
281276
fileHasHeader = true,
282277
fileSeparator = '\t',
283278
loaderSettings = "loader=Text{header+ col=Label:0 col=Num:9-14 col=Cat:TX:1-8}",
284-
mamlExtraSettings = new[] { "xf=Cat{col=Cat}", "xf=Concat{col=Features:Num,Cat}" },
285-
extraSettings = @"/inst Text{header+ sep=, label=14 handler=Categorical{cols=5-9,1,13,3}}",
279+
mamlExtraSettings = new[] { "xf=Cat{col=Cat}", "xf=Concat{col=Features:Num,Cat}" }
286280
};
287281

288282
public static TestDataset adultOnlyCat = new TestDataset
@@ -291,8 +285,7 @@ public static class TestDatasets
291285
trainFilename = "adult.tiny.with-schema.txt",
292286
testFilename = "adult.tiny.with-schema.txt",
293287
loaderSettings = "loader=Text{header+ col=Label:0 col=Cat:TX:1-8}",
294-
mamlExtraSettings = new[] { "xf=Cat{col=Cat}", "xf=Concat{col=Features:Cat}" },
295-
extraSettings = @"/inst Text{header+ sep=, label=14 handler=Categorical{cols=5-9,1,13,3}}",
288+
mamlExtraSettings = new[] { "xf=Cat{col=Cat}", "xf=Concat{col=Features:Cat}" }
296289
};
297290

298291
public static TestDataset adultHash = new TestDataset
@@ -301,8 +294,7 @@ public static class TestDatasets
301294
trainFilename = "adult.tiny.with-schema.txt",
302295
testFilename = "adult.tiny.with-schema.txt",
303296
loaderSettings = "loader=Text{header+ col=Label:0 col=Num:9-14 col=Cat:TX:1-8}",
304-
mamlExtraSettings = new[] { "xf=CatHash{col=Cat bits=5}", "xf=Concat{col=Features:Num,Cat}" },
305-
extraSettings = @"/inst Text{header+ sep=, label=14 handler=CatHash{cols=1,3,5-9,13 bits=5}}"
297+
mamlExtraSettings = new[] { "xf=CatHash{col=Cat bits=5}", "xf=Concat{col=Features:Num,Cat}" }
306298
};
307299

308300
public static TestDataset adultHashWithDataPipe = new TestDataset
@@ -320,8 +312,7 @@ public static class TestDatasets
320312
trainFilename = "adult.tiny.with-schema.txt",
321313
testFilename = "adult.tiny.with-schema.txt",
322314
loaderSettings = "loader=Text{header+ col=Label:0 col=Word:TX:1-8 col=Num:~}",
323-
mamlExtraSettings = new[] { "xf=WordBag{col=Word}", "xf=Concat{col=Features:Num,Word}" },
324-
extraSettings = @"/inst Text{header+ sep=, label=14 handler=WordBag{cols=1,3,5-9,13}}"
315+
mamlExtraSettings = new[] { "xf=WordBag{col=Word}", "xf=Concat{col=Features:Num,Word}" }
325316
};
326317

327318
public static TestDataset adultTextHash = new TestDataset
@@ -330,15 +321,14 @@ public static class TestDatasets
330321
trainFilename = "adult.tiny.with-schema.txt",
331322
testFilename = "adult.tiny.with-schema.txt",
332323
loaderSettings = "loader=Text{header+ col=Label:0 col=Word:TX:1-8 col=Num:~}",
333-
mamlExtraSettings = new[] { "xf=WordHashBag{col=Word bits=8}", "xf=Concat{col=Features:Num,Word}" },
334-
extraSettings = @"/inst Text{header+ sep=, label=14 handler=WordHashBag{cols=1,3,5-9,13 sep=, bits=8}}"
324+
mamlExtraSettings = new[] { "xf=WordHashBag{col=Word bits=8}", "xf=Concat{col=Features:Num,Word}" }
335325
};
336326

337327
public static TestDataset adultRanking = new TestDataset
338328
{
339329
name = "adultRanking",
340330
trainFilename = "adult.tiny.with-schema.txt",
341-
loaderSettings = "loader=Text{header+ sep=tab, col=Label:R4:0 col=Workclass:TX:1 col=Categories:TX:2-8 col=NumericFeatures:R4:9-14}",
331+
loaderSettings = "loader=Text{header+ sep=tab, col=Label:R4:0 col=Workclass:TX:1 col=Categories:TX:2-8 col=NumericFeatures:R4:9-14}"
342332
};
343333

344334
public static TestDataset displayPoisson = new TestDataset
@@ -365,7 +355,7 @@ public static class TestDatasets
365355
trainFilename = @"..\children\children.txt",
366356
testFilename = @"..\children\children.txt",
367357
loaderSettings = "loader=Text{header+ sep=space col=Cat1:TX:1 col=Cat2:TX:2 col=Cat3:TX:3 col=Label:4 col=Ignore:TX:0,5-7 col=Features:8-*}",
368-
mamlExtraSettings = new[] { "xf=Cat{col=Cat1 col=Cat2 col=Cat3}", "xf=Concat{col=Features:Features,Cat1,Cat2,Cat3}" },
358+
mamlExtraSettings = new[] { "xf=Cat{col=Cat1 col=Cat2 col=Cat3}", "xf=Concat{col=Features:Features,Cat1,Cat2,Cat3}" }
369359
// settings = "header+;sep:space;cat:1,2,3;label:4;attr:0,5,6,7",
370360
};
371361

@@ -377,8 +367,7 @@ public static class TestDatasets
377367
loaderSettings = "loader=Text{col=Label:0 col=Cat3:TX:3 col=Cat4:TX:4 col=Cat5:TX:5 col=Cat6:TX:6 col=Cat7:TX:7 col=Cat8:TX:8 col=Cat9:TX:9 col=Cat15:TX:15 col=Cat16:TX:16 col=Cat18:TX:18 col=Features:~}",
378368
mamlExtraSettings = new[] {
379369
"xf=Cat{col=Cat3 col=Cat4 col=Cat5 col=Cat6 col=Cat7 col=Cat8 col=Cat9 col=Cat15 col=Cat16 col=Cat18}",
380-
"xf=Concat{col=Features:Features,Cat3,Cat4,Cat5,Cat6,Cat7,Cat8,Cat9,Cat15,Cat16,Cat18}" },
381-
// extraSettings = "inst=Text{cat=3,4,5,6,7,8,9,15,16,18 label=0 maxBad=100}"
370+
"xf=Concat{col=Features:Features,Cat3,Cat4,Cat5,Cat6,Cat7,Cat8,Cat9,Cat15,Cat16,Cat18}" }
382371
};
383372

384373
public static TestDataset reutersMaxDim = new TestDataset()
@@ -396,7 +385,7 @@ public static class TestDatasets
396385
trainFilename = @"iris.txt",
397386
testFilename = @"iris.txt",
398387
loaderSettings = "loader=Text{col=Label:TX:0 col=Features:1-*}",
399-
mamlExtraSettings = new[] { "xf=Term{col=Label}" },
388+
mamlExtraSettings = new[] { "xf=Term{col=Label}" }
400389
};
401390

402391
public static TestDataset irisData = new TestDataset()
@@ -423,7 +412,7 @@ public static class TestDatasets
423412
trainFilename = @"iris-label-name.txt",
424413
testFilename = @"iris-label-name.txt",
425414
loaderSettings = "loader=Text{header+ col=Label:TX:0 col=Features:1-*}",
426-
mamlExtraSettings = new[] { "xf=Term{col=Label}" },
415+
mamlExtraSettings = new[] { "xf=Term{col=Label}" }
427416
};
428417

429418
public static TestDataset irisTreeFeaturized = new TestDataset()
@@ -432,7 +421,7 @@ public static class TestDatasets
432421
trainFilename = @"iris.txt",
433422
testFilename = @"iris.txt",
434423
loaderSettings = "loader=Text{col=Label:U4[0-2]:0 col=Features:1-*}",
435-
mamlExtraSettings = new[] { "xf=TreeFeat{lps=0 trainer=ftr{iter=3}} xf=copy{col=Features:Leaves}" },
424+
mamlExtraSettings = new[] { "xf=TreeFeat{lps=0 trainer=ftr{iter=3}} xf=copy{col=Features:Leaves}" }
436425
};
437426

438427
public static TestDataset irisTreeFeaturizedPermuted = new TestDataset()
@@ -441,15 +430,15 @@ public static class TestDatasets
441430
trainFilename = @"iris.txt",
442431
testFilename = @"iris.txt",
443432
loaderSettings = "loader=Text{col=Label:U4[0-2]:0 col=Features:1-*}",
444-
mamlExtraSettings = new[] { "xf=TreeFeat{lps=2 trainer=ftr{iter=3}} xf=copy{col=Features:Leaves}" },
433+
mamlExtraSettings = new[] { "xf=TreeFeat{lps=2 trainer=ftr{iter=3}} xf=copy{col=Features:Leaves}" }
445434
};
446435

447436
public static TestDataset irisLoaderU404 = new TestDataset()
448437
{
449438
name = "iris",
450439
trainFilename = @"iris.txt",
451440
testFilename = @"iris.txt",
452-
loaderSettings = "loader=Text{col=Label:U4[0-2]:0 col=Features:1-4}",
441+
loaderSettings = "loader=Text{col=Label:U4[0-2]:0 col=Features:1-4}"
453442
};
454443

455444
public static TestDataset iris = new TestDataset()
@@ -466,7 +455,7 @@ public static class TestDatasets
466455
{
467456
name = "iris",
468457
trainFilename = @"iris-train",
469-
testFilename = @"iris-test",
458+
testFilename = @"iris-test"
470459
};
471460

472461
public static TestDataset irisMissing = new TestDataset()
@@ -492,53 +481,47 @@ public static class TestDatasets
492481
name = "LM",
493482
trainFilename = @"..\LM\Local.source_features.de-de.txt",
494483
testFilename = @"..\LM\Local.validate_features.de-de.txt",
495-
labelFilename = @"..\LM\Mapping.de-de.txt",
496-
extraSettings = @"/inst Text{header+ attr=1,2 handler=WordHashBag{cols=3,4}}"
484+
labelFilename = @"..\LM\Mapping.de-de.txt"
497485
};
498486

499487
public static TestDataset LMCharGrams = new TestDataset()
500488
{
501489
name = "LMCharGrams",
502490
trainFilename = @"..\LM\Local.source_features.de-de.txt",
503491
testFilename = @"..\LM\Local.validate_features.de-de.txt",
504-
labelFilename = @"..\LM\Mapping.de-de.txt",
505-
extraSettings = @"/inst Text{header+ attr=1,2 handler=CharGram{cols=3,4 len=3}}"
492+
labelFilename = @"..\LM\Mapping.de-de.txt"
506493
};
507494

508495
public static TestDataset LMBigrams = new TestDataset()
509496
{
510497
name = "LMBigrams",
511498
trainFilename = @"..\LM\Local.source_features.de-de.txt",
512499
testFilename = @"..\LM\Local.validate_features.de-de.txt",
513-
labelFilename = @"..\LM\Mapping.de-de.txt",
514-
extraSettings = @"/inst Text{header+ attr=1,2 handler=WordBag{cols=3,4 ngram=2}}"
500+
labelFilename = @"..\LM\Mapping.de-de.txt"
515501
};
516502

517503
public static TestDataset LMNgrams = new TestDataset()
518504
{
519505
name = "LMNgrams",
520506
trainFilename = @"..\LM\Local.source_features.de-de.txt",
521507
testFilename = @"..\LM\Local.validate_features.de-de.txt",
522-
labelFilename = @"..\LM\Mapping.de-de.txt",
523-
extraSettings = @"/inst Text{header+ attr=1,2 handler=WordBag{cols=3,4 ngram=5 max=200000}}"
508+
labelFilename = @"..\LM\Mapping.de-de.txt"
524509
};
525510

526511
public static TestDataset LMSkipNgrams = new TestDataset()
527512
{
528513
name = "LMSkipNgrams",
529514
trainFilename = @"..\LM\Local.source_features.de-de.txt",
530515
testFilename = @"..\LM\Local.validate_features.de-de.txt",
531-
labelFilename = @"..\LM\Mapping.de-de.txt",
532-
extraSettings = @"/inst Text{header+ attr=1,2 handler=WordBag{cols=3,4 ngram=4 skips=1 max=200000}}"
516+
labelFilename = @"..\LM\Mapping.de-de.txt"
533517
};
534518

535519
public static TestDataset LMNgramsHashing = new TestDataset()
536520
{
537521
name = "LMNgramsHashing",
538522
trainFilename = @"..\LM\Local.source_features.de-de.txt",
539523
testFilename = @"..\LM\Local.validate_features.de-de.txt",
540-
labelFilename = @"..\LM\Mapping.de-de.txt",
541-
extraSettings = @"/inst Text{header+ attr=1,2 handler=WordHashBag{cols=3,4 ngram=10}}"
524+
labelFilename = @"..\LM\Mapping.de-de.txt"
542525
};
543526

544527
public static TestDataset rankingText = new TestDataset()
@@ -547,17 +530,15 @@ public static class TestDatasets
547530
trainFilename = @"ranking-sample.txt",
548531
testFilename = @"ranking-sample.txt",
549532
labelFilename = @"ranking-sample.txt.labels",
550-
loaderSettings = "loader=Text{header+ col=Label:TX:0 col=GroupId:U4[0-*]:1 col=Name:TX:1-2 col=Features:3-*}",
551-
extraSettings = @"/inst Text{header+ name=1-2 groupKey=n0}"
533+
loaderSettings = "loader=Text{header+ col=Label:TX:0 col=GroupId:U4[0-*]:1 col=Name:TX:1-2 col=Features:3-*}"
552534
};
553535

554536
public static TestDataset rankingExtract = new TestDataset()
555537
{
556538
name = "ranking",
557539
trainFilename = @"ranking-sample.txt",
558540
testFilename = @"ranking-sample.txt",
559-
labelFilename = @"ranking-sample.txt.labels",
560-
extraSettings = @"/inst ExtractInstances{header+ name=1-2 groupKey=n0}"
541+
labelFilename = @"ranking-sample.txt.labels"
561542
};
562543

563544
public static TestDataset breastCancerWeighted = new TestDataset
@@ -574,15 +555,15 @@ public static class TestDatasets
574555
name = "breast-cancer-weighted",
575556
trainFilename = @"ArtificiallyWeighted\breast-cancer-weights-quarter.txt",
576557
testFilename = @"ArtificiallyWeighted\breast-cancer-weights-quarter.txt",
577-
loaderSettings = "loader=Text{col=Label:Num:1 col=Weight:Num:4 col=Features:Num:~}",
558+
loaderSettings = "loader=Text{col=Label:Num:1 col=Weight:Num:4 col=Features:Num:~}"
578559
};
579560

580561
public static TestDataset housingWeightedRep = new TestDataset
581562
{
582563
name = "housing-weighted",
583564
trainFilename = @"ArtificiallyWeighted\housing-weights-quarter-rep.txt",
584565
testFilename = @"ArtificiallyWeighted\housing-weights-quarter-rep.txt",
585-
loaderSettings = "loader=Text{col=Weight:0 col=Label:1 col=Features:~}",
566+
loaderSettings = "loader=Text{col=Weight:0 col=Label:1 col=Features:~}"
586567
};
587568

588569
public static TestDataset housingDifferentlyWeightedRep = new TestDataset
@@ -600,7 +581,7 @@ public static class TestDatasets
600581
trainFilename = @"ArtificiallyWeighted\ranking-sample-weights-one.txt",
601582
testFilename = @"ArtificiallyWeighted\ranking-sample-weights-one.txt",
602583
labelFilename = @"ranking-sample.txt.labels",
603-
loaderSettings = "loader=Text{header+ col=Weight:0 col=Label:TX:1 col=Name:TX:2-3 col=GroupId:U4[0-*]:2 col=Features:~}",
584+
loaderSettings = "loader=Text{header+ col=Weight:0 col=Label:TX:1 col=Name:TX:2-3 col=GroupId:U4[0-*]:2 col=Features:~}"
604585
};
605586

606587
public static TestDataset adultSparseWithCategory = new TestDataset()
@@ -617,16 +598,14 @@ public static class TestDatasets
617598
trainFilename = @"adult.SparseWithCat.txt",
618599
testFilename = @"adult.SparseWithCat.txt",
619600
settings = "",
620-
extraSettings = @"/inst TextInstances { handler=CatHashHandler{cols=0 bits=6} }"
621601
};
622602

623603
public static TestDataset adultSparseWithCatAsAtt = new TestDataset()
624604
{
625605
name = "adult-sparseWithCat",
626606
trainFilename = @"adult.SparseWithCat.txt",
627607
testFilename = @"adult.SparseWithCat.txt",
628-
settings = "",
629-
extraSettings = @"/inst TextInstances { attr=0 threads-}"
608+
settings = ""
630609
};
631610

632611
public static TestDataset pClick = new TestDataset()
@@ -667,7 +646,7 @@ public static class TestDatasets
667646
trainFilename = @"..\Bing\SampleInputExtraction.bin",
668647
testFilename = @"..\Bing\SampleInputExtraction.bin",
669648
labelFilename = @"..\Bing\labelmap.txt",
670-
loaderSettings = "loader=Text{header+ col=Label:0 col=Features:1-84 rows=3000}",
649+
loaderSettings = "loader=Text{header+ col=Label:0 col=Features:1-84 rows=3000}"
671650
};
672651

673652
public static TestDataset mnistOneClass = new TestDataset()

0 commit comments

Comments
 (0)