@@ -21,7 +21,6 @@ public class TestDataset
2121 // REVIEW: Replace these with appropriate SubComponents!
2222 public string settings ;
2323 public string testSettings ;
24- public string extraSettings ;
2524 // REVIEW: Remove the three above setting strings once conversion work is complete.
2625 public string loaderSettings ;
2726 public string [ ] mamlExtraSettings ;
@@ -38,7 +37,6 @@ public TestDataset Clone()
3837 labelFilename = labelFilename ,
3938 settings = settings ,
4039 testSettings = testSettings ,
41- extraSettings = extraSettings ,
4240 loaderSettings = loaderSettings ,
4341 mamlExtraSettings = mamlExtraSettings
4442 } ;
@@ -80,8 +78,7 @@ public static class TestDatasets
8078 {
8179 name = "breast-cancer" ,
8280 trainFilename = "breast-cancer.txt" ,
83- testFilename = "breast-cancer.txt" ,
84- extraSettings = "cacheinst- inst=Text{label=0 attr=1-9}" ,
81+ testFilename = "breast-cancer.txt"
8582 } ;
8683
8784 /// <summary>
@@ -94,8 +91,7 @@ public static class TestDatasets
9491 testFilename = "breast-cancer.txt" ,
9592 // Using "col=Features:1-5,6,7-9" improves code coverage. Same with "col=Attr:TX:6".
9693 loaderSettings = "loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9}" ,
97- mamlExtraSettings = new [ ] { "cache-" } ,
98- extraSettings = "/cacheinst- /inst Pipe{loader=Text{sparse- col=Attr:TX:6 col=Label:0 col=Features:1-5,6,7-9} lab=Label feat=Features}"
94+ mamlExtraSettings = new [ ] { "cache-" }
9995 } ;
10096
10197 /// <summary>
@@ -108,7 +104,7 @@ public static class TestDatasets
108104 testFilename = "breast-cancer.txt" ,
109105 // Note that More and More_Cleansed are not really needed (duplicate info), but improve code coverage.
110106 loaderSettings = "loader=Text{col=Label:0 col=Good:1-5,7-9 col=Mixed:6 col=More:4-6}" ,
111- mamlExtraSettings = new [ ] { "cache-" , "xf=NAHandle{col=Fixed:Mixed col=More}" , "xf=Concat{col=Features:Good,Fixed,More}" } ,
107+ mamlExtraSettings = new [ ] { "cache-" , "xf=NAHandle{col=Fixed:Mixed col=More}" , "xf=Concat{col=Features:Good,Fixed,More}" }
112108 } ;
113109
114110 /// <summary>
@@ -121,7 +117,7 @@ public static class TestDatasets
121117 testFilename = "breast-cancer.txt" ,
122118 // Note that More and More_Cleansed are not really needed (duplicate info), but improve code coverage.
123119 loaderSettings = "loader=Text{col=Label:0 col=Good:1-5,7-9 col=Mixed:6 col=More:4-6}" ,
124- mamlExtraSettings = new [ ] { "cache-" , "xf=MissingFilter{col=Mixed col=More}" , "xf=Concat{col=Features:Good,Mixed,More}" } ,
120+ mamlExtraSettings = new [ ] { "cache-" , "xf=MissingFilter{col=Mixed col=More}" , "xf=Concat{col=Features:Good,Mixed,More}" }
125121 } ;
126122
127123 public static TestDataset breastCancerOneClass = new TestDataset
@@ -146,7 +142,7 @@ public static class TestDatasets
146142 public static TestDataset Digits = new TestDataset
147143 {
148144 name = "Digits" ,
149- trainFilename = @"external/digits.csv" ,
145+ trainFilename = @"external/digits.csv"
150146 } ;
151147
152148 public static TestDataset vw = new TestDataset
@@ -239,7 +235,7 @@ public static class TestDatasets
239235 trainFilename = "MSM-sparse-sample-train.txt" ,
240236 testFilename = "MSM-sparse-sample-test.txt" ,
241237 loaderSettings = "loader=Text{col=Name:TX:0 col=Label:Num:1 col=Features:Num:~}" ,
242- mamlExtraSettings = new [ ] { "xf=Expr{col=Name expr={x=>right(x, 1)}}" } ,
238+ mamlExtraSettings = new [ ] { "xf=Expr{col=Name expr={x=>right(x, 1)}}" }
243239 } ;
244240
245241 public static TestDataset msmNamesHeader = new TestDataset
@@ -269,8 +265,7 @@ public static class TestDatasets
269265 {
270266 name = "breast-cancer-bing" ,
271267 trainFilename = "breast-cancer-bing.txt" ,
272- testFilename = "breast-cancer-bing.txt" ,
273- extraSettings = "/inst ExtractInstances{}"
268+ testFilename = "breast-cancer-bing.txt"
274269 } ;
275270
276271 public static TestDataset adult = new TestDataset
@@ -281,8 +276,7 @@ public static class TestDatasets
281276 fileHasHeader = true ,
282277 fileSeparator = '\t ' ,
283278 loaderSettings = "loader=Text{header+ col=Label:0 col=Num:9-14 col=Cat:TX:1-8}" ,
284- mamlExtraSettings = new [ ] { "xf=Cat{col=Cat}" , "xf=Concat{col=Features:Num,Cat}" } ,
285- extraSettings = @"/inst Text{header+ sep=, label=14 handler=Categorical{cols=5-9,1,13,3}}" ,
279+ mamlExtraSettings = new [ ] { "xf=Cat{col=Cat}" , "xf=Concat{col=Features:Num,Cat}" }
286280 } ;
287281
288282 public static TestDataset adultOnlyCat = new TestDataset
@@ -291,8 +285,7 @@ public static class TestDatasets
291285 trainFilename = "adult.tiny.with-schema.txt" ,
292286 testFilename = "adult.tiny.with-schema.txt" ,
293287 loaderSettings = "loader=Text{header+ col=Label:0 col=Cat:TX:1-8}" ,
294- mamlExtraSettings = new [ ] { "xf=Cat{col=Cat}" , "xf=Concat{col=Features:Cat}" } ,
295- extraSettings = @"/inst Text{header+ sep=, label=14 handler=Categorical{cols=5-9,1,13,3}}" ,
288+ mamlExtraSettings = new [ ] { "xf=Cat{col=Cat}" , "xf=Concat{col=Features:Cat}" }
296289 } ;
297290
298291 public static TestDataset adultHash = new TestDataset
@@ -301,8 +294,7 @@ public static class TestDatasets
301294 trainFilename = "adult.tiny.with-schema.txt" ,
302295 testFilename = "adult.tiny.with-schema.txt" ,
303296 loaderSettings = "loader=Text{header+ col=Label:0 col=Num:9-14 col=Cat:TX:1-8}" ,
304- mamlExtraSettings = new [ ] { "xf=CatHash{col=Cat bits=5}" , "xf=Concat{col=Features:Num,Cat}" } ,
305- extraSettings = @"/inst Text{header+ sep=, label=14 handler=CatHash{cols=1,3,5-9,13 bits=5}}"
297+ mamlExtraSettings = new [ ] { "xf=CatHash{col=Cat bits=5}" , "xf=Concat{col=Features:Num,Cat}" }
306298 } ;
307299
308300 public static TestDataset adultHashWithDataPipe = new TestDataset
@@ -320,8 +312,7 @@ public static class TestDatasets
320312 trainFilename = "adult.tiny.with-schema.txt" ,
321313 testFilename = "adult.tiny.with-schema.txt" ,
322314 loaderSettings = "loader=Text{header+ col=Label:0 col=Word:TX:1-8 col=Num:~}" ,
323- mamlExtraSettings = new [ ] { "xf=WordBag{col=Word}" , "xf=Concat{col=Features:Num,Word}" } ,
324- extraSettings = @"/inst Text{header+ sep=, label=14 handler=WordBag{cols=1,3,5-9,13}}"
315+ mamlExtraSettings = new [ ] { "xf=WordBag{col=Word}" , "xf=Concat{col=Features:Num,Word}" }
325316 } ;
326317
327318 public static TestDataset adultTextHash = new TestDataset
@@ -330,15 +321,14 @@ public static class TestDatasets
330321 trainFilename = "adult.tiny.with-schema.txt" ,
331322 testFilename = "adult.tiny.with-schema.txt" ,
332323 loaderSettings = "loader=Text{header+ col=Label:0 col=Word:TX:1-8 col=Num:~}" ,
333- mamlExtraSettings = new [ ] { "xf=WordHashBag{col=Word bits=8}" , "xf=Concat{col=Features:Num,Word}" } ,
334- extraSettings = @"/inst Text{header+ sep=, label=14 handler=WordHashBag{cols=1,3,5-9,13 sep=, bits=8}}"
324+ mamlExtraSettings = new [ ] { "xf=WordHashBag{col=Word bits=8}" , "xf=Concat{col=Features:Num,Word}" }
335325 } ;
336326
337327 public static TestDataset adultRanking = new TestDataset
338328 {
339329 name = "adultRanking" ,
340330 trainFilename = "adult.tiny.with-schema.txt" ,
341- loaderSettings = "loader=Text{header+ sep=tab, col=Label:R4:0 col=Workclass:TX:1 col=Categories:TX:2-8 col=NumericFeatures:R4:9-14}" ,
331+ loaderSettings = "loader=Text{header+ sep=tab, col=Label:R4:0 col=Workclass:TX:1 col=Categories:TX:2-8 col=NumericFeatures:R4:9-14}"
342332 } ;
343333
344334 public static TestDataset displayPoisson = new TestDataset
@@ -365,7 +355,7 @@ public static class TestDatasets
365355 trainFilename = @"..\children\children.txt" ,
366356 testFilename = @"..\children\children.txt" ,
367357 loaderSettings = "loader=Text{header+ sep=space col=Cat1:TX:1 col=Cat2:TX:2 col=Cat3:TX:3 col=Label:4 col=Ignore:TX:0,5-7 col=Features:8-*}" ,
368- mamlExtraSettings = new [ ] { "xf=Cat{col=Cat1 col=Cat2 col=Cat3}" , "xf=Concat{col=Features:Features,Cat1,Cat2,Cat3}" } ,
358+ mamlExtraSettings = new [ ] { "xf=Cat{col=Cat1 col=Cat2 col=Cat3}" , "xf=Concat{col=Features:Features,Cat1,Cat2,Cat3}" }
369359 // settings = "header+;sep:space;cat:1,2,3;label:4;attr:0,5,6,7",
370360 } ;
371361
@@ -377,8 +367,7 @@ public static class TestDatasets
377367 loaderSettings = "loader=Text{col=Label:0 col=Cat3:TX:3 col=Cat4:TX:4 col=Cat5:TX:5 col=Cat6:TX:6 col=Cat7:TX:7 col=Cat8:TX:8 col=Cat9:TX:9 col=Cat15:TX:15 col=Cat16:TX:16 col=Cat18:TX:18 col=Features:~}" ,
378368 mamlExtraSettings = new [ ] {
379369 "xf=Cat{col=Cat3 col=Cat4 col=Cat5 col=Cat6 col=Cat7 col=Cat8 col=Cat9 col=Cat15 col=Cat16 col=Cat18}" ,
380- "xf=Concat{col=Features:Features,Cat3,Cat4,Cat5,Cat6,Cat7,Cat8,Cat9,Cat15,Cat16,Cat18}" } ,
381- // extraSettings = "inst=Text{cat=3,4,5,6,7,8,9,15,16,18 label=0 maxBad=100}"
370+ "xf=Concat{col=Features:Features,Cat3,Cat4,Cat5,Cat6,Cat7,Cat8,Cat9,Cat15,Cat16,Cat18}" }
382371 } ;
383372
384373 public static TestDataset reutersMaxDim = new TestDataset ( )
@@ -396,7 +385,7 @@ public static class TestDatasets
396385 trainFilename = @"iris.txt" ,
397386 testFilename = @"iris.txt" ,
398387 loaderSettings = "loader=Text{col=Label:TX:0 col=Features:1-*}" ,
399- mamlExtraSettings = new [ ] { "xf=Term{col=Label}" } ,
388+ mamlExtraSettings = new [ ] { "xf=Term{col=Label}" }
400389 } ;
401390
402391 public static TestDataset irisData = new TestDataset ( )
@@ -423,7 +412,7 @@ public static class TestDatasets
423412 trainFilename = @"iris-label-name.txt" ,
424413 testFilename = @"iris-label-name.txt" ,
425414 loaderSettings = "loader=Text{header+ col=Label:TX:0 col=Features:1-*}" ,
426- mamlExtraSettings = new [ ] { "xf=Term{col=Label}" } ,
415+ mamlExtraSettings = new [ ] { "xf=Term{col=Label}" }
427416 } ;
428417
429418 public static TestDataset irisTreeFeaturized = new TestDataset ( )
@@ -432,7 +421,7 @@ public static class TestDatasets
432421 trainFilename = @"iris.txt" ,
433422 testFilename = @"iris.txt" ,
434423 loaderSettings = "loader=Text{col=Label:U4[0-2]:0 col=Features:1-*}" ,
435- mamlExtraSettings = new [ ] { "xf=TreeFeat{lps=0 trainer=ftr{iter=3}} xf=copy{col=Features:Leaves}" } ,
424+ mamlExtraSettings = new [ ] { "xf=TreeFeat{lps=0 trainer=ftr{iter=3}} xf=copy{col=Features:Leaves}" }
436425 } ;
437426
438427 public static TestDataset irisTreeFeaturizedPermuted = new TestDataset ( )
@@ -441,15 +430,15 @@ public static class TestDatasets
441430 trainFilename = @"iris.txt" ,
442431 testFilename = @"iris.txt" ,
443432 loaderSettings = "loader=Text{col=Label:U4[0-2]:0 col=Features:1-*}" ,
444- mamlExtraSettings = new [ ] { "xf=TreeFeat{lps=2 trainer=ftr{iter=3}} xf=copy{col=Features:Leaves}" } ,
433+ mamlExtraSettings = new [ ] { "xf=TreeFeat{lps=2 trainer=ftr{iter=3}} xf=copy{col=Features:Leaves}" }
445434 } ;
446435
447436 public static TestDataset irisLoaderU404 = new TestDataset ( )
448437 {
449438 name = "iris" ,
450439 trainFilename = @"iris.txt" ,
451440 testFilename = @"iris.txt" ,
452- loaderSettings = "loader=Text{col=Label:U4[0-2]:0 col=Features:1-4}" ,
441+ loaderSettings = "loader=Text{col=Label:U4[0-2]:0 col=Features:1-4}"
453442 } ;
454443
455444 public static TestDataset iris = new TestDataset ( )
@@ -466,7 +455,7 @@ public static class TestDatasets
466455 {
467456 name = "iris" ,
468457 trainFilename = @"iris-train" ,
469- testFilename = @"iris-test" ,
458+ testFilename = @"iris-test"
470459 } ;
471460
472461 public static TestDataset irisMissing = new TestDataset ( )
@@ -492,53 +481,47 @@ public static class TestDatasets
492481 name = "LM" ,
493482 trainFilename = @"..\LM\Local.source_features.de-de.txt" ,
494483 testFilename = @"..\LM\Local.validate_features.de-de.txt" ,
495- labelFilename = @"..\LM\Mapping.de-de.txt" ,
496- extraSettings = @"/inst Text{header+ attr=1,2 handler=WordHashBag{cols=3,4}}"
484+ labelFilename = @"..\LM\Mapping.de-de.txt"
497485 } ;
498486
499487 public static TestDataset LMCharGrams = new TestDataset ( )
500488 {
501489 name = "LMCharGrams" ,
502490 trainFilename = @"..\LM\Local.source_features.de-de.txt" ,
503491 testFilename = @"..\LM\Local.validate_features.de-de.txt" ,
504- labelFilename = @"..\LM\Mapping.de-de.txt" ,
505- extraSettings = @"/inst Text{header+ attr=1,2 handler=CharGram{cols=3,4 len=3}}"
492+ labelFilename = @"..\LM\Mapping.de-de.txt"
506493 } ;
507494
508495 public static TestDataset LMBigrams = new TestDataset ( )
509496 {
510497 name = "LMBigrams" ,
511498 trainFilename = @"..\LM\Local.source_features.de-de.txt" ,
512499 testFilename = @"..\LM\Local.validate_features.de-de.txt" ,
513- labelFilename = @"..\LM\Mapping.de-de.txt" ,
514- extraSettings = @"/inst Text{header+ attr=1,2 handler=WordBag{cols=3,4 ngram=2}}"
500+ labelFilename = @"..\LM\Mapping.de-de.txt"
515501 } ;
516502
517503 public static TestDataset LMNgrams = new TestDataset ( )
518504 {
519505 name = "LMNgrams" ,
520506 trainFilename = @"..\LM\Local.source_features.de-de.txt" ,
521507 testFilename = @"..\LM\Local.validate_features.de-de.txt" ,
522- labelFilename = @"..\LM\Mapping.de-de.txt" ,
523- extraSettings = @"/inst Text{header+ attr=1,2 handler=WordBag{cols=3,4 ngram=5 max=200000}}"
508+ labelFilename = @"..\LM\Mapping.de-de.txt"
524509 } ;
525510
526511 public static TestDataset LMSkipNgrams = new TestDataset ( )
527512 {
528513 name = "LMSkipNgrams" ,
529514 trainFilename = @"..\LM\Local.source_features.de-de.txt" ,
530515 testFilename = @"..\LM\Local.validate_features.de-de.txt" ,
531- labelFilename = @"..\LM\Mapping.de-de.txt" ,
532- extraSettings = @"/inst Text{header+ attr=1,2 handler=WordBag{cols=3,4 ngram=4 skips=1 max=200000}}"
516+ labelFilename = @"..\LM\Mapping.de-de.txt"
533517 } ;
534518
535519 public static TestDataset LMNgramsHashing = new TestDataset ( )
536520 {
537521 name = "LMNgramsHashing" ,
538522 trainFilename = @"..\LM\Local.source_features.de-de.txt" ,
539523 testFilename = @"..\LM\Local.validate_features.de-de.txt" ,
540- labelFilename = @"..\LM\Mapping.de-de.txt" ,
541- extraSettings = @"/inst Text{header+ attr=1,2 handler=WordHashBag{cols=3,4 ngram=10}}"
524+ labelFilename = @"..\LM\Mapping.de-de.txt"
542525 } ;
543526
544527 public static TestDataset rankingText = new TestDataset ( )
@@ -547,17 +530,15 @@ public static class TestDatasets
547530 trainFilename = @"ranking-sample.txt" ,
548531 testFilename = @"ranking-sample.txt" ,
549532 labelFilename = @"ranking-sample.txt.labels" ,
550- loaderSettings = "loader=Text{header+ col=Label:TX:0 col=GroupId:U4[0-*]:1 col=Name:TX:1-2 col=Features:3-*}" ,
551- extraSettings = @"/inst Text{header+ name=1-2 groupKey=n0}"
533+ loaderSettings = "loader=Text{header+ col=Label:TX:0 col=GroupId:U4[0-*]:1 col=Name:TX:1-2 col=Features:3-*}"
552534 } ;
553535
554536 public static TestDataset rankingExtract = new TestDataset ( )
555537 {
556538 name = "ranking" ,
557539 trainFilename = @"ranking-sample.txt" ,
558540 testFilename = @"ranking-sample.txt" ,
559- labelFilename = @"ranking-sample.txt.labels" ,
560- extraSettings = @"/inst ExtractInstances{header+ name=1-2 groupKey=n0}"
541+ labelFilename = @"ranking-sample.txt.labels"
561542 } ;
562543
563544 public static TestDataset breastCancerWeighted = new TestDataset
@@ -574,15 +555,15 @@ public static class TestDatasets
574555 name = "breast-cancer-weighted" ,
575556 trainFilename = @"ArtificiallyWeighted\breast-cancer-weights-quarter.txt" ,
576557 testFilename = @"ArtificiallyWeighted\breast-cancer-weights-quarter.txt" ,
577- loaderSettings = "loader=Text{col=Label:Num:1 col=Weight:Num:4 col=Features:Num:~}" ,
558+ loaderSettings = "loader=Text{col=Label:Num:1 col=Weight:Num:4 col=Features:Num:~}"
578559 } ;
579560
580561 public static TestDataset housingWeightedRep = new TestDataset
581562 {
582563 name = "housing-weighted" ,
583564 trainFilename = @"ArtificiallyWeighted\housing-weights-quarter-rep.txt" ,
584565 testFilename = @"ArtificiallyWeighted\housing-weights-quarter-rep.txt" ,
585- loaderSettings = "loader=Text{col=Weight:0 col=Label:1 col=Features:~}" ,
566+ loaderSettings = "loader=Text{col=Weight:0 col=Label:1 col=Features:~}"
586567 } ;
587568
588569 public static TestDataset housingDifferentlyWeightedRep = new TestDataset
@@ -600,7 +581,7 @@ public static class TestDatasets
600581 trainFilename = @"ArtificiallyWeighted\ranking-sample-weights-one.txt" ,
601582 testFilename = @"ArtificiallyWeighted\ranking-sample-weights-one.txt" ,
602583 labelFilename = @"ranking-sample.txt.labels" ,
603- loaderSettings = "loader=Text{header+ col=Weight:0 col=Label:TX:1 col=Name:TX:2-3 col=GroupId:U4[0-*]:2 col=Features:~}" ,
584+ loaderSettings = "loader=Text{header+ col=Weight:0 col=Label:TX:1 col=Name:TX:2-3 col=GroupId:U4[0-*]:2 col=Features:~}"
604585 } ;
605586
606587 public static TestDataset adultSparseWithCategory = new TestDataset ( )
@@ -617,16 +598,14 @@ public static class TestDatasets
617598 trainFilename = @"adult.SparseWithCat.txt" ,
618599 testFilename = @"adult.SparseWithCat.txt" ,
619600 settings = "" ,
620- extraSettings = @"/inst TextInstances { handler=CatHashHandler{cols=0 bits=6} }"
621601 } ;
622602
623603 public static TestDataset adultSparseWithCatAsAtt = new TestDataset ( )
624604 {
625605 name = "adult-sparseWithCat" ,
626606 trainFilename = @"adult.SparseWithCat.txt" ,
627607 testFilename = @"adult.SparseWithCat.txt" ,
628- settings = "" ,
629- extraSettings = @"/inst TextInstances { attr=0 threads-}"
608+ settings = ""
630609 } ;
631610
632611 public static TestDataset pClick = new TestDataset ( )
@@ -667,7 +646,7 @@ public static class TestDatasets
667646 trainFilename = @"..\Bing\SampleInputExtraction.bin" ,
668647 testFilename = @"..\Bing\SampleInputExtraction.bin" ,
669648 labelFilename = @"..\Bing\labelmap.txt" ,
670- loaderSettings = "loader=Text{header+ col=Label:0 col=Features:1-84 rows=3000}" ,
649+ loaderSettings = "loader=Text{header+ col=Label:0 col=Features:1-84 rows=3000}"
671650 } ;
672651
673652 public static TestDataset mnistOneClass = new TestDataset ( )
0 commit comments