|
2 | 2 | from __future__ import annotations |
3 | 3 |
|
4 | 4 | from importlib.util import find_spec |
| 5 | +from pathlib import Path |
5 | 6 | from typing import TYPE_CHECKING, cast |
6 | 7 |
|
7 | 8 | import numpy as np |
8 | 9 | import pytest |
| 10 | +import scipy.sparse as sps |
9 | 11 | from numpy.exceptions import AxisError |
10 | 12 |
|
11 | 13 | from fast_array_utils import stats, types |
12 | 14 | from testing.fast_array_utils import SUPPORTED_TYPES, Flags |
13 | 15 |
|
14 | 16 |
|
| 17 | +DATA_DIR = Path(__file__).parent / "data" |
| 18 | + |
| 19 | + |
15 | 20 | if TYPE_CHECKING: |
16 | 21 | from collections.abc import Callable |
17 | 22 | from typing import Any, Literal, Protocol, TypeAlias |
@@ -126,6 +131,21 @@ def to_np_dense_checked( |
126 | 131 | return stat |
127 | 132 |
|
128 | 133 |
|
| 134 | +@pytest.fixture(scope="session") |
| 135 | +def pbmc64k_reduced_raw() -> sps.csr_array[np.float32]: |
| 136 | + """Scanpy’s pbmc68k_reduced raw data. |
| 137 | +
|
| 138 | + Data was created using: |
| 139 | + >>> if not find_spec("scanpy"): |
| 140 | + ... pytest.skip() |
| 141 | + >>> import scanpy as sc |
| 142 | + >>> import scipy.sparse as sps |
| 143 | + >>> arr = sps.csr_array(sc.datasets.pbmc68k_reduced().raw.X) |
| 144 | + >>> sps.save_npz("pbmc68k_reduced_raw_csr.npz", arr) |
| 145 | + """ |
| 146 | + return cast("sps.csr_array[np.float32]", sps.load_npz(DATA_DIR / "pbmc68k_reduced_raw_csr.npz")) |
| 147 | + |
| 148 | + |
129 | 149 | @pytest.mark.array_type(skip={*ATS_SPARSE_DS, Flags.Matrix}) |
130 | 150 | @pytest.mark.parametrize("func", STAT_FUNCS) |
131 | 151 | @pytest.mark.parametrize(("ndim", "axis"), [(1, 0), (2, 3), (2, -1)], ids=["1d-ax0", "2d-ax3", "2d-axneg"]) |
@@ -273,6 +293,23 @@ def test_mean_var_sparse_32(array_type: ArrayType[types.CSArray]) -> None: |
273 | 293 | assert resid_fau < resid_skl |
274 | 294 |
|
275 | 295 |
|
| 296 | +@pytest.mark.array_type({at for at in SUPPORTED_TYPES if at.flags & Flags.Sparse and at.flags & Flags.Dask}) |
| 297 | +def test_mean_var_pbmc_dask(array_type: ArrayType[types.DaskArray], pbmc64k_reduced_raw: sps.csr_array[np.float32]) -> None: |
| 298 | + """Test float32 precision for bigger data. |
| 299 | +
|
| 300 | + This test is flaky for sparse-in-dask for some reason. |
| 301 | + """ |
| 302 | + mat = pbmc64k_reduced_raw |
| 303 | + arr = array_type(mat) |
| 304 | + |
| 305 | + mean_mat, var_mat = stats.mean_var(mat, axis=0, correction=1) |
| 306 | + mean_arr, var_arr = (to_np_dense_checked(a, 0, arr) for a in stats.mean_var(arr, axis=0, correction=1)) |
| 307 | + |
| 308 | + rtol = 1.0e-5 if array_type.flags & Flags.Gpu else 1.0e-7 |
| 309 | + np.testing.assert_allclose(mean_arr, mean_mat, rtol=rtol) |
| 310 | + np.testing.assert_allclose(var_arr, var_mat, rtol=rtol) |
| 311 | + |
| 312 | + |
276 | 313 | @pytest.mark.array_type(skip={Flags.Disk, *ATS_CUPY_SPARSE}) |
277 | 314 | @pytest.mark.parametrize( |
278 | 315 | ("axis", "expected"), |
|
0 commit comments