55using System ;
66using System . IO ;
77using System . Text ;
8+ using Apache . Arrow ;
89using Xunit ;
910
1011namespace Microsoft . Data . Analysis . Tests
1112{
1213 public partial class DataFrameTests
1314 {
15+ internal static void VerifyColumnTypes ( DataFrame df , bool testArrowStringColumn = false )
16+ {
17+ foreach ( DataFrameColumn column in df . Columns )
18+ {
19+ Type dataType = column . DataType ;
20+ if ( dataType == typeof ( bool ) )
21+ {
22+ Assert . IsType < BooleanDataFrameColumn > ( column ) ;
23+
24+ }
25+ else if ( dataType == typeof ( decimal ) )
26+ {
27+ Assert . IsType < DecimalDataFrameColumn > ( column ) ;
28+
29+ }
30+ else if ( dataType == typeof ( byte ) )
31+ {
32+ Assert . IsType < ByteDataFrameColumn > ( column ) ;
33+
34+ }
35+ else if ( dataType == typeof ( char ) )
36+ {
37+ Assert . IsType < CharDataFrameColumn > ( column ) ;
38+
39+ }
40+ else if ( dataType == typeof ( double ) )
41+ {
42+ Assert . IsType < DoubleDataFrameColumn > ( column ) ;
43+
44+ }
45+ else if ( dataType == typeof ( float ) )
46+ {
47+ Assert . IsType < SingleDataFrameColumn > ( column ) ;
48+
49+ }
50+ else if ( dataType == typeof ( int ) )
51+ {
52+ Assert . IsType < Int32DataFrameColumn > ( column ) ;
53+
54+ }
55+ else if ( dataType == typeof ( long ) )
56+ {
57+
58+ Assert . IsType < Int64DataFrameColumn > ( column ) ;
59+ }
60+ else if ( dataType == typeof ( sbyte ) )
61+ {
62+ Assert . IsType < SByteDataFrameColumn > ( column ) ;
63+
64+ }
65+ else if ( dataType == typeof ( short ) )
66+ {
67+ Assert . IsType < Int16DataFrameColumn > ( column ) ;
68+
69+ }
70+ else if ( dataType == typeof ( uint ) )
71+ {
72+ Assert . IsType < UInt32DataFrameColumn > ( column ) ;
73+
74+ }
75+ else if ( dataType == typeof ( ulong ) )
76+ {
77+
78+ Assert . IsType < UInt64DataFrameColumn > ( column ) ;
79+ }
80+ else if ( dataType == typeof ( ushort ) )
81+ {
82+ Assert . IsType < UInt16DataFrameColumn > ( column ) ;
83+
84+ }
85+ else if ( dataType == typeof ( string ) )
86+ {
87+ if ( ! testArrowStringColumn )
88+ {
89+ Assert . IsType < StringDataFrameColumn > ( column ) ;
90+ }
91+ else
92+ {
93+ Assert . IsType < ArrowStringDataFrameColumn > ( column ) ;
94+ }
95+ }
96+ else
97+ {
98+ throw new NotImplementedException ( "Unit test has to be updated" ) ;
99+ }
100+ }
101+ }
102+
14103 [ Fact ]
15104 public void TestReadCsvWithHeader ( )
16105 {
@@ -28,11 +117,13 @@ Stream GetStream(string streamData)
28117 Assert . Equal ( 4 , df . Rows . Count ) ;
29118 Assert . Equal ( 7 , df . Columns . Count ) ;
30119 Assert . Equal ( "CMT" , df . Columns [ "vendor_id" ] [ 3 ] ) ;
120+ VerifyColumnTypes ( df ) ;
31121
32122 DataFrame reducedRows = DataFrame . LoadCsv ( GetStream ( data ) , numberOfRowsToRead : 3 ) ;
33123 Assert . Equal ( 3 , reducedRows . Rows . Count ) ;
34124 Assert . Equal ( 7 , reducedRows . Columns . Count ) ;
35125 Assert . Equal ( "CMT" , reducedRows . Columns [ "vendor_id" ] [ 2 ] ) ;
126+ VerifyColumnTypes ( df ) ;
36127 }
37128
38129 [ Fact ]
@@ -51,11 +142,13 @@ Stream GetStream(string streamData)
51142 Assert . Equal ( 4 , df . Rows . Count ) ;
52143 Assert . Equal ( 7 , df . Columns . Count ) ;
53144 Assert . Equal ( "CMT" , df . Columns [ "Column0" ] [ 3 ] ) ;
145+ VerifyColumnTypes ( df ) ;
54146
55147 DataFrame reducedRows = DataFrame . LoadCsv ( GetStream ( data ) , header : false , numberOfRowsToRead : 3 ) ;
56148 Assert . Equal ( 3 , reducedRows . Rows . Count ) ;
57149 Assert . Equal ( 7 , reducedRows . Columns . Count ) ;
58150 Assert . Equal ( "CMT" , reducedRows . Columns [ "Column0" ] [ 2 ] ) ;
151+ VerifyColumnTypes ( df ) ;
59152 }
60153
61154 [ Fact ]
@@ -83,6 +176,7 @@ Stream GetStream(string streamData)
83176 Assert . True ( typeof ( float ) == df . Columns [ 4 ] . DataType ) ;
84177 Assert . True ( typeof ( string ) == df . Columns [ 5 ] . DataType ) ;
85178 Assert . True ( typeof ( double ) == df . Columns [ 6 ] . DataType ) ;
179+ VerifyColumnTypes ( df ) ;
86180
87181 foreach ( var column in df . Columns )
88182 {
@@ -124,11 +218,13 @@ Stream GetStream(string streamData)
124218 Assert . Equal ( 5 , df . Rows . Count ) ;
125219 Assert . Equal ( 7 , df . Columns . Count ) ;
126220 Assert . Equal ( "CMT" , df . Columns [ "vendor_id" ] [ 4 ] ) ;
221+ VerifyColumnTypes ( df ) ;
127222
128223 DataFrame reducedRows = DataFrame . LoadCsv ( GetStream ( data ) , separator : '|' , numberOfRowsToRead : 3 ) ;
129224 Assert . Equal ( 3 , reducedRows . Rows . Count ) ;
130225 Assert . Equal ( 7 , reducedRows . Columns . Count ) ;
131226 Assert . Equal ( "CMT" , reducedRows . Columns [ "vendor_id" ] [ 2 ] ) ;
227+ VerifyColumnTypes ( df ) ;
132228
133229 var nullRow = df . Rows [ 3 ] ;
134230 Assert . Equal ( "" , nullRow [ 0 ] ) ;
@@ -159,11 +255,13 @@ Stream GetStream(string streamData)
159255 Assert . Equal ( 5 , df . Rows . Count ) ;
160256 Assert . Equal ( 7 , df . Columns . Count ) ;
161257 Assert . Equal ( "CMT" , df . Columns [ "vendor_id" ] [ 4 ] ) ;
258+ VerifyColumnTypes ( df ) ;
162259
163260 DataFrame reducedRows = DataFrame . LoadCsv ( GetStream ( data ) , separator : ';' , numberOfRowsToRead : 3 ) ;
164261 Assert . Equal ( 3 , reducedRows . Rows . Count ) ;
165262 Assert . Equal ( 7 , reducedRows . Columns . Count ) ;
166263 Assert . Equal ( "CMT" , reducedRows . Columns [ "vendor_id" ] [ 2 ] ) ;
264+ VerifyColumnTypes ( df ) ;
167265
168266 var nullRow = df . Rows [ 3 ] ;
169267 Assert . Equal ( "" , nullRow [ 0 ] ) ;
@@ -193,11 +291,13 @@ Stream GetStream(string streamData)
193291 Assert . Equal ( 4 , df . Rows . Count ) ;
194292 Assert . Equal ( 7 , df . Columns . Count ) ;
195293 Assert . Equal ( "CMT" , df . Columns [ "vendor_id" ] [ 3 ] ) ;
294+ VerifyColumnTypes ( df ) ;
196295
197296 DataFrame reducedRows = DataFrame . LoadCsv ( GetStream ( data ) , numberOfRowsToRead : 3 ) ;
198297 Assert . Equal ( 3 , reducedRows . Rows . Count ) ;
199298 Assert . Equal ( 7 , reducedRows . Columns . Count ) ;
200299 Assert . Equal ( "CMT" , reducedRows . Columns [ "vendor_id" ] [ 2 ] ) ;
300+ VerifyColumnTypes ( df ) ;
201301 }
202302
203303 [ Fact ]
@@ -235,11 +335,13 @@ Stream GetStream(string streamData)
235335 Assert . Equal ( 4 , df . Rows . Count ) ;
236336 Assert . Equal ( 6 , df . Columns . Count ) ;
237337 Assert . Equal ( "CMT" , df . Columns [ "vendor_id" ] [ 3 ] ) ;
338+ VerifyColumnTypes ( df ) ;
238339
239340 DataFrame reducedRows = DataFrame . LoadCsv ( GetStream ( data ) , numberOfRowsToRead : 3 ) ;
240341 Assert . Equal ( 3 , reducedRows . Rows . Count ) ;
241342 Assert . Equal ( 6 , reducedRows . Columns . Count ) ;
242343 Assert . Equal ( "CMT" , reducedRows . Columns [ "vendor_id" ] [ 2 ] ) ;
344+ VerifyColumnTypes ( df ) ;
243345
244346 }
245347 }
0 commit comments