Skip to content

Commit f222025

Browse files
authored
Fix ConvertingTransform bug (#1545)
* Fix ConvertingTransform bug * Enable NgramHash unit test * Address PR comment.
1 parent 18f7acc commit f222025

File tree

7 files changed

+934
-105
lines changed

7 files changed

+934
-105
lines changed

src/Microsoft.ML.Data/Transforms/ConvertTransform.cs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,22 @@ internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDat
316316
var item = args.Column[i];
317317
var tempResultType = item.ResultType ?? args.ResultType;
318318
DataKind kind;
319-
KeyRange range = item.KeyRange ?? (item.Range != null ? KeyRange.Parse(item.Range) : null) ?? args.KeyRange ?? (args.Range != null ? KeyRange.Parse(args.Range) : null);
319+
KeyRange range = null;
320+
// If KeyRange or Range are defined on this column, set range to the appropriate value.
321+
if (item.KeyRange != null)
322+
range = item.KeyRange;
323+
else if (item.Range != null)
324+
range = KeyRange.Parse(item.Range);
325+
// If KeyRange and Range are not defined for this column, we set range to the value
326+
// defined in the Arguments object only in case the ResultType is not defined on the column.
327+
else if (item.ResultType == null)
328+
{
329+
if (args.KeyRange != null)
330+
range = args.KeyRange;
331+
else if (args.Range != null)
332+
range = KeyRange.Parse(args.Range);
333+
}
334+
320335
if (tempResultType == null)
321336
{
322337
if (range == null)

test/BaselineOutput/Common/SavePipe/SavePipeNgramHash-Data.txt

Lines changed: 101 additions & 100 deletions
Large diffs are not rendered by default.

test/BaselineOutput/Common/SavePipe/SavePipeNgramHash-Schema.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
TextFeatures: Vec<Text, 2>
88
Metadata 'SlotNames': Vec<Text, 2>: Length=2, Count=2
99
[0] 'weg fuer milliardenhilfe frei', [1] 'vor dem parlamentsgebaeude toben strassenkaempfe zwischen demonstranten drinnen haben die griechischen abgeordneten das drastische sparpaket am abend endgueltig beschlossen die entscheidung ist eine wichtige voraussetzung fuer die auszahlung von weiteren acht milliarden euro hilfsgeldern athen das griechische parlament hat einem umfassenden sparpaket endgueltig zugestimmt'
10-
---- DelimitedTokenizeTransform ----
10+
---- RowToRowMapperTransform ----
1111
4 columns:
1212
Label: Text
1313
Attrs: Vec<Text, 2>
@@ -164,7 +164,7 @@
164164
HashNgram7: Vec<R4, 64>
165165
HashNgram8: Vec<R4, 64>
166166
---- SelectColumnsDataTransform ----
167-
8 columns:
167+
9 columns:
168168
NgramHashOne: Vec<R4, 16>
169169
HashNgram1: Vec<R4, 1024>
170170
HashNgram2: Vec<R4, 256>
@@ -173,3 +173,4 @@
173173
HashNgram5: Vec<R4, 8>
174174
HashNgram6: Vec<R4, 8>
175175
HashNgram7: Vec<R4, 64>
176+
HashNgram8: Vec<R4, 64>
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
Bad value at line 19 in column B
2+
Bad value at line 19 in column E
3+
Bad value at line 21 in column B
4+
Bad value at line 21 in column E
5+
Bad value at line 22 in column A
6+
Bad value at line 22 in column C
7+
Bad value at line 23 in column B
8+
Bad value at line 23 in column E
9+
Bad value at line 24 in column A
10+
Bad value at line 24 in column B
11+
Suppressing further bad value messages
12+
Processed 683 rows with 1245 bad values and 0 format errors
13+
Cursored through 683 rows

test/BaselineOutput/Common/SavePipe/SavePipeWithKey-Data.txt

Lines changed: 698 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
---- BoundLoader ----
2+
8 columns:
3+
Label: Key<U1, 0-1>
4+
Features: Vec<U2, 9>
5+
Metadata 'SlotNames': Vec<Text, 9>: Length=9, Count=9
6+
[0] 'thickness', [1] 'uniform_size', [2] 'uniform_shape', [3] 'adhesion', [4] 'epit_size', [5] 'bare_nuclei', [6] 'bland_chromatin', [7] 'normal_nucleoli', [8] 'mitoses'
7+
A: Key<U1, 1-5>
8+
B: Key<U1, 3-8>
9+
C: Key<U4, 0-5>
10+
D: Key<U1, 1-*>
11+
E: Key<U4, 3-*>
12+
F: Key<U1, 0-*>
13+
---- RowToRowMapperTransform ----
14+
10 columns:
15+
Label: Key<U1, 0-1>
16+
Features: Vec<U2, 9>
17+
Metadata 'SlotNames': Vec<Text, 9>: Length=9, Count=9
18+
[0] 'thickness', [1] 'uniform_size', [2] 'uniform_shape', [3] 'adhesion', [4] 'epit_size', [5] 'bare_nuclei', [6] 'bland_chromatin', [7] 'normal_nucleoli', [8] 'mitoses'
19+
A: Key<U1, 1-5>
20+
B: Key<U1, 3-8>
21+
C: Key<U4, 0-5>
22+
D: Key<U1, 1-*>
23+
E: Key<U4, 3-*>
24+
F: Key<U1, 0-*>
25+
Label2: Key<U2, 0-1>
26+
Features2: Vec<R4, 9>
27+
Metadata 'SlotNames': Vec<Text, 9>: Length=9, Count=9
28+
[0] 'thickness', [1] 'uniform_size', [2] 'uniform_shape', [3] 'adhesion', [4] 'epit_size', [5] 'bare_nuclei', [6] 'bland_chromatin', [7] 'normal_nucleoli', [8] 'mitoses'

test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using Microsoft.ML.Runtime.CommandLine;
56
using Microsoft.ML.Runtime.Data;
67
using Microsoft.ML.Runtime.Internal.Utilities;
8+
using Microsoft.ML.Transforms;
79
using Microsoft.ML.Transforms.Conversions;
810
using Microsoft.ML.Transforms.Text;
911
using System;
@@ -506,7 +508,7 @@ private bool VerifyMatch<TSrc, TDst>(TSrc src, TDst dst, ValueMapper<TSrc, TDst>
506508
return false;
507509
}
508510

509-
[Fact(Skip = "Fails until issue #1342 is resolved.")]
511+
[Fact]
510512
public void SavePipeNgramHash()
511513
{
512514
string pathData = GetDataPath("lm.sample.txt");
@@ -522,7 +524,7 @@ public void SavePipeNgramHash()
522524
"xf=NgramHash{bits=6 col=HashNgram4:HashBig,Hash rehash+}",
523525
"xf=NgramHash{bits=3 ngram=1 col={name=HashNgram5 src=Hash src=Hash} col={name=HashNgram6 src=Hash ord-}}",
524526
"xf=NgramHash{bits=6 col=HashNgram7:HashBig,Hash rehash+ all- col={name=HashNgram8 src=Hash all+ ord-}}",
525-
"xf=SelectColumns{keepcol=NgramHashOne keepcol=HashNgram1 keepcol=HashNgram2 keepcol=HashNgram3 keepcol=HashNgram4 keepcol=HashNgram5 keepcol=HashNgram6 keepcol=HashNgram7 keepcol=HashNgram8, hidden=-}",
527+
"xf=SelectColumns{keepcol=NgramHashOne keepcol=HashNgram1 keepcol=HashNgram2 keepcol=HashNgram3 keepcol=HashNgram4 keepcol=HashNgram5 keepcol=HashNgram6 keepcol=HashNgram7 keepcol=HashNgram8 hidden=-}",
526528
});
527529

528530
TestCore(null, true,
@@ -600,6 +602,77 @@ public void SavePipeWordHash()
600602
Done();
601603
}
602604

605+
[Fact]
606+
public void SavePipeWithKey()
607+
{
608+
var dataPath = GetDataPath("breast-cancer-withheader.txt");
609+
TestCore(dataPath, true,
610+
new[] {
611+
"loader=Text{header=+",
612+
" col=Label:U1[0-1]:0",
613+
" col=Features:U2:1-*",
614+
" col=A:U1[1-5]:1",
615+
" col=B:U1[3-8]:2",
616+
" col=C:U4[0-5]:3",
617+
" col=D:U1[1-*]:4",
618+
" col=E:[3-*]:5",
619+
" col=F:U1[0-*]:6",
620+
"}",
621+
"xf=Convert{col=Label2:U2[0-1]:Label col=Features2:Features type=Num}",
622+
},
623+
624+
pipe =>
625+
{
626+
var argsText = new TextLoader.Arguments();
627+
bool tmp = CmdParser.ParseArguments(Env,
628+
" header=+" +
629+
" col=Label:TX:0" +
630+
" col=Features:TX:1-*" +
631+
" col=A:TX:1" +
632+
" col=B:TX:2" +
633+
" col=C:TX:3" +
634+
" col=D:TX:4" +
635+
" col=E:TX:5" +
636+
" col=F:TX:6",
637+
argsText);
638+
Check(tmp, "Parsing argsText failed!");
639+
IDataView view2 = TextLoader.Create(Env, argsText, new MultiFileSource(dataPath));
640+
641+
var argsConv = new ConvertingTransform.Arguments();
642+
tmp = CmdParser.ParseArguments(Env,
643+
" col=Label:U1[0-1]:Label" +
644+
" col=Features:U2:Features" +
645+
" col=A:U1[1-5]:A" +
646+
" col=B:U1[3-8]:B" +
647+
" col=C:[0-5]:C" +
648+
" col=D:U1[1-*]:D" +
649+
" col=E" +
650+
" col=F:U1[0-*]:F" +
651+
" key={min=3}",
652+
argsConv);
653+
Check(tmp, "Parsing argsConv failed!");
654+
view2 = ConvertingTransform.Create(Env, argsConv, view2);
655+
656+
argsConv = new ConvertingTransform.Arguments();
657+
tmp = CmdParser.ParseArguments(Env,
658+
" col=Label2:U2:Label col=Features2:Num:Features",
659+
argsConv);
660+
Check(tmp, "Parsing argsConv(2) failed!");
661+
view2 = ConvertingTransform.Create(Env, argsConv, view2);
662+
663+
var colsChoose = new[] { "Label", "Features", "Label2", "Features2", "A", "B", "C", "D", "E", "F" };
664+
665+
IDataView view1 = SelectColumnsTransform.CreateKeep(Env, pipe, colsChoose);
666+
view2 = SelectColumnsTransform.CreateKeep(Env, view2, colsChoose);
667+
668+
CheckSameValues(view1, view2);
669+
},
670+
671+
logCurs: true);
672+
673+
Done();
674+
}
675+
603676
[Fact]
604677
public void TestHashTransformFloat()
605678
{

0 commit comments

Comments
 (0)