@@ -136,7 +136,7 @@ def test_concat_compat() -> None:
136
136
137
137
138
138
def test_concat_missing_var () -> None :
139
- datasets = create_concat_datasets (2 , 123 )
139
+ datasets = create_concat_datasets (2 , seed = 123 )
140
140
expected = concat (datasets , dim = "day" )
141
141
vars_to_drop = ["humidity" , "precipitation" , "cloud_cover" ]
142
142
@@ -152,7 +152,7 @@ def test_concat_missing_var() -> None:
152
152
153
153
154
154
def test_concat_missing_multiple_consecutive_var () -> None :
155
- datasets = create_concat_datasets (3 , 123 )
155
+ datasets = create_concat_datasets (3 , seed = 123 )
156
156
expected = concat (datasets , dim = "day" )
157
157
vars_to_drop = ["humidity" , "pressure" ]
158
158
@@ -191,9 +191,16 @@ def test_concat_second_empty() -> None:
191
191
192
192
assert_identical (actual , expected )
193
193
194
+ expected = Dataset (
195
+ data_vars = {"a" : ("y" , [0.1 , np .nan ])}, coords = {"x" : ("y" , [0.1 , 0.1 ])}
196
+ )
197
+ actual = concat ([ds1 , ds2 ], dim = "y" , coords = "all" )
198
+
199
+ assert_identical (actual , expected )
194
200
195
- def test_multiple_missing_variables () -> None :
196
- datasets = create_concat_datasets (2 , 123 )
201
+
202
+ def test_concat_multiple_missing_variables () -> None :
203
+ datasets = create_concat_datasets (2 , seed = 123 )
197
204
expected = concat (datasets , dim = "day" )
198
205
vars_to_drop = ["pressure" , "cloud_cover" ]
199
206
@@ -216,7 +223,7 @@ def test_multiple_missing_variables() -> None:
216
223
217
224
218
225
@pytest .mark .parametrize ("include_day" , [True , False ])
219
- def test_concat_multiple_datasets_missing_vars_and_new_dim (include_day : bool ) -> None :
226
+ def test_concat_multiple_datasets_missing_vars (include_day : bool ) -> None :
220
227
vars_to_drop = [
221
228
"temperature" ,
222
229
"pressure" ,
@@ -225,7 +232,9 @@ def test_concat_multiple_datasets_missing_vars_and_new_dim(include_day: bool) ->
225
232
"cloud_cover" ,
226
233
]
227
234
228
- datasets = create_concat_datasets (len (vars_to_drop ), 123 , include_day = include_day )
235
+ datasets = create_concat_datasets (
236
+ len (vars_to_drop ), seed = 123 , include_day = include_day
237
+ )
229
238
expected = concat (datasets , dim = "day" )
230
239
231
240
for i , name in enumerate (vars_to_drop ):
@@ -235,36 +244,8 @@ def test_concat_multiple_datasets_missing_vars_and_new_dim(include_day: bool) ->
235
244
expected [name ][i : i + 1 , ...] = np .nan
236
245
237
246
# set up the test data
238
- datasets = [datasets [i ].drop_vars (vars_to_drop [i ]) for i in range (len (datasets ))]
239
-
240
- actual = concat (datasets , dim = "day" )
241
-
242
- assert list (actual .data_vars .keys ()) == [
243
- "pressure" ,
244
- "humidity" ,
245
- "precipitation" ,
246
- "cloud_cover" ,
247
- "temperature" ,
248
- ]
249
- assert_identical (actual , expected )
247
+ datasets = [ds .drop_vars (varname ) for ds , varname in zip (datasets , vars_to_drop )]
250
248
251
-
252
- def test_multiple_datasets_with_missing_variables () -> None :
253
- vars_to_drop = [
254
- "temperature" ,
255
- "pressure" ,
256
- "humidity" ,
257
- "precipitation" ,
258
- "cloud_cover" ,
259
- ]
260
- datasets = create_concat_datasets (len (vars_to_drop ), 123 )
261
-
262
- expected = concat (datasets , dim = "day" )
263
- for i , name in enumerate (vars_to_drop ):
264
- expected [name ][..., i * 2 : (i + 1 ) * 2 ] = np .nan
265
-
266
- # set up the test data
267
- datasets = [datasets [i ].drop_vars (vars_to_drop [i ]) for i in range (len (datasets ))]
268
249
actual = concat (datasets , dim = "day" )
269
250
270
251
assert list (actual .data_vars .keys ()) == [
@@ -277,10 +258,10 @@ def test_multiple_datasets_with_missing_variables() -> None:
277
258
assert_identical (actual , expected )
278
259
279
260
280
- def test_multiple_datasets_with_multiple_missing_variables () -> None :
261
+ def test_concat_multiple_datasets_with_multiple_missing_variables () -> None :
281
262
vars_to_drop_in_first = ["temperature" , "pressure" ]
282
263
vars_to_drop_in_second = ["humidity" , "precipitation" , "cloud_cover" ]
283
- datasets = create_concat_datasets (2 , 123 )
264
+ datasets = create_concat_datasets (2 , seed = 123 )
284
265
expected = concat (datasets , dim = "day" )
285
266
for name in vars_to_drop_in_first :
286
267
expected [name ][..., :2 ] = np .nan
@@ -303,8 +284,8 @@ def test_multiple_datasets_with_multiple_missing_variables() -> None:
303
284
assert_identical (actual , expected )
304
285
305
286
306
- def test_type_of_missing_fill () -> None :
307
- datasets = create_typed_datasets (2 , 123 )
287
+ def test_concat_type_of_missing_fill () -> None :
288
+ datasets = create_typed_datasets (2 , seed = 123 )
308
289
expected1 = concat (datasets , dim = "day" , fill_value = dtypes .NA )
309
290
expected2 = concat (datasets [::- 1 ], dim = "day" , fill_value = dtypes .NA )
310
291
vars = ["float" , "float2" , "string" , "int" , "datetime64" , "timedelta64" ]
@@ -334,11 +315,11 @@ def test_type_of_missing_fill() -> None:
334
315
assert_identical (actual , expected [0 ])
335
316
336
317
337
- def test_order_when_filling_missing () -> None :
318
+ def test_concat_order_when_filling_missing () -> None :
338
319
vars_to_drop_in_first : list [str ] = []
339
320
# drop middle
340
321
vars_to_drop_in_second = ["humidity" ]
341
- datasets = create_concat_datasets (2 , 123 )
322
+ datasets = create_concat_datasets (2 , seed = 123 )
342
323
expected1 = concat (datasets , dim = "day" )
343
324
for name in vars_to_drop_in_second :
344
325
expected1 [name ][..., 2 :] = np .nan
@@ -423,11 +404,9 @@ def create_ds(
423
404
def test_concat_fill_missing_variables (
424
405
concat_var_names , create_concat_ds , dim : bool , coord : bool
425
406
) -> None :
426
- # random single variables missing in each dataset
427
407
var_names = concat_var_names ()
408
+ drop_idx = [0 , 7 , 6 , 4 , 4 , 8 , 0 , 6 , 2 , 0 ]
428
409
429
- rng = np .random .default_rng (seed = 42 )
430
- drop_idx = [rng .integers (len (vlist )) for vlist in var_names ]
431
410
expected = concat (
432
411
create_concat_ds (var_names , dim = dim , coord = coord ), dim = "time" , data_vars = "all"
433
412
)
0 commit comments