Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/release-notes/1.10.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* {func}`scanpy.experimental.pp.highly_variable_genes` using `flavor='pearson_residuals'`
now uses numba for variance computation {pr}`2612` {smaller}`S Dicks & P Angerer`
* {func}`scanpy.external.pp.harmony_integrate` now runs with 64 bit floats improving reproducibility {pr}`2655` {smaller}`S Dicks`
* Enhanced dask support for some internal utilities, paving the way for more extensive dask support {pr}`2696` {smaller}`P Angerer`

```{rubric} Docs
```
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ xfail_strict = true
nunit_attach_on = "fail"
markers = [
"internet: tests which rely on internet resources (enable with `--internet-tests`)",
"gpu: tests that use a GPU (currently unused, but needs to be specified here as we import anndata.tests.helpers, which uses it)",
]

[tool.coverage.run]
Expand Down
64 changes: 53 additions & 11 deletions scanpy/_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
This file largely consists of the old _utils.py file. Over time, these functions
should be moved of this file.
"""
from __future__ import annotations

import sys
import inspect
import warnings
Expand All @@ -11,21 +13,22 @@
from pathlib import Path
from weakref import WeakSet
from collections import namedtuple
from functools import partial, wraps
from functools import partial, singledispatch, wraps
from types import ModuleType, MethodType
from typing import Union, Callable, Optional, Mapping, Any, Dict, Tuple, Literal

import numpy as np
from numpy import random
from numpy.typing import NDArray
from scipy import sparse
from anndata import AnnData, __version__ as anndata_version
from textwrap import dedent
from packaging import version

from .._settings import settings
from .. import logging as logg

from .compute.is_constant import is_constant
from .._compat import DaskArray
from .compute.is_constant import is_constant # noqa: F401


class Empty(Enum):
Expand Down Expand Up @@ -400,12 +403,12 @@ def identify_groups(ref_labels, pred_labels, return_overlaps=False):


# backwards compat... remove this in the future
def sanitize_anndata(adata):
def sanitize_anndata(adata: AnnData) -> None:
"""Transform string annotations to categoricals."""
adata._sanitize()


def view_to_actual(adata):
def view_to_actual(adata: AnnData) -> None:
if adata.is_view:
warnings.warn(
"Received a view of an AnnData. Making a copy.",
Expand Down Expand Up @@ -483,8 +486,41 @@ def update_params(
# --------------------------------------------------------------------------------


def check_nonnegative_integers(X: Union[np.ndarray, sparse.spmatrix]):
_SparseMatrix = Union[sparse.csr_matrix, sparse.csc_matrix]
_MemoryArray = Union[NDArray, _SparseMatrix]
_SupportedArray = Union[_MemoryArray, DaskArray]


@singledispatch
def elem_mul(x: _SupportedArray, y: _SupportedArray) -> _SupportedArray:
raise NotImplementedError


@elem_mul.register(np.ndarray)
@elem_mul.register(sparse.spmatrix)
def _elem_mul_in_mem(x: _MemoryArray, y: _MemoryArray) -> _MemoryArray:
if isinstance(x, sparse.spmatrix):
# returns coo_matrix, so cast back to input type
return type(x)(x.multiply(y))
return x * y


@elem_mul.register(DaskArray)
def _elem_mul_dask(x: DaskArray, y: DaskArray) -> DaskArray:
import dask.array as da

return da.map_blocks(elem_mul, x, y)


@singledispatch
def check_nonnegative_integers(X: _SupportedArray) -> bool | DaskArray:
"""Checks values of X to ensure it is count data"""
raise NotImplementedError


@check_nonnegative_integers.register(np.ndarray)
@check_nonnegative_integers.register(sparse.spmatrix)
def _check_nonnegative_integers_in_mem(X: _MemoryArray) -> bool:
from numbers import Integral

data = X if isinstance(X, np.ndarray) else X.data
Expand All @@ -494,13 +530,19 @@ def check_nonnegative_integers(X: Union[np.ndarray, sparse.spmatrix]):
# Check all are integers
elif issubclass(data.dtype.type, Integral):
return True
elif np.any(~np.equal(np.mod(data, 1), 0)):
return False
else:
return True
return not np.any((data % 1) != 0)


@check_nonnegative_integers.register(DaskArray)
def _check_nonnegative_integers_dask(X: DaskArray) -> DaskArray:
return X.map_blocks(check_nonnegative_integers, dtype=bool, drop_axis=(0, 1))

def select_groups(adata, groups_order_subset="all", key="groups"):

def select_groups(
adata: AnnData,
groups_order_subset: list[str] | Literal["all"] = "all",
key: str = "groups",
) -> tuple[list[str], NDArray[np.bool_]]:
"""Get subset of groups in adata.obs[key]."""
groups_order = adata.obs[key].cat.categories
if key + "_masks" in adata.uns:
Expand Down
2 changes: 1 addition & 1 deletion scanpy/experimental/pp/_highly_variable_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def _highly_variable_pearson_residuals(
computed_on = layer if layer else "adata.X"

# Check for raw counts
if check_values and (check_nonnegative_integers(X) is False):
if check_values and not check_nonnegative_integers(X):
warnings.warn(
"`flavor='pearson_residuals'` expects raw count data, but non-integers were found.",
UserWarning,
Expand Down
13 changes: 8 additions & 5 deletions scanpy/preprocessing/_utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
from typing import Literal
import numpy as np
from scipy import sparse
import numba
from scipy import sparse

from .._utils import _SupportedArray, elem_mul


def _get_mean_var(X, *, axis=0):
if sparse.issparse(X):
def _get_mean_var(X: _SupportedArray, *, axis: Literal[0, 1] = 0) -> _SupportedArray:
if isinstance(X, sparse.spmatrix):
mean, var = sparse_mean_variance_axis(X, axis=axis)
else:
mean = np.mean(X, axis=axis, dtype=np.float64)
mean_sq = np.multiply(X, X).mean(axis=axis, dtype=np.float64)
mean = X.mean(axis=axis, dtype=np.float64)
mean_sq = elem_mul(X, X).mean(axis=axis, dtype=np.float64)
var = mean_sq - mean**2
# enforce R convention (unbiased estimator) for variance
var *= X.shape[axis] / (X.shape[axis] - 1)
Expand Down
30 changes: 0 additions & 30 deletions scanpy/testing/_pytest/fixtures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,9 @@
from __future__ import annotations

from pathlib import Path
from collections.abc import Callable

import pytest
import numpy as np
from numpy.typing import ArrayLike
from scipy import sparse
from anndata.tests.helpers import asarray

from ...._compat import DaskArray
from ..._pytest.marks import needs
from .data import (
_pbmc3ks_parametrized_session,
pbmc3k_parametrized,
Expand All @@ -23,7 +16,6 @@


__all__ = [
"array_type",
"float_dtype",
"doctest_env",
"_pbmc3ks_parametrized_session",
Expand All @@ -32,28 +24,6 @@
]


def _as_dense_dask_array(x: ArrayLike) -> DaskArray:
import dask.array as da

return da.from_array(asarray(x))


@pytest.fixture(
params=[
pytest.param(asarray, id="numpy-ndarray"),
pytest.param(sparse.csr_matrix, id="scipy-csr"),
pytest.param(sparse.csc_matrix, id="scipy-csc"),
# Dask doesn’t support scipy sparse matrices, so only dense here
pytest.param(_as_dense_dask_array, marks=[needs("dask")], id="dask-array"),
]
)
def array_type(
request,
) -> Callable[[ArrayLike], DaskArray | np.ndarray | sparse.spmatrix]:
"""Function which converts passed array to one of the common array types."""
return request.param


@pytest.fixture(params=[np.float64, np.float32])
def float_dtype(request):
return request.param
Expand Down
75 changes: 75 additions & 0 deletions scanpy/testing/_pytest/params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""Like fixtures, but more flexible"""

from __future__ import annotations

from collections.abc import Iterable
from typing import TYPE_CHECKING, Literal

import pytest
from scipy import sparse
from anndata.tests.helpers import asarray, as_dense_dask_array, as_sparse_dask_array

from .._pytest.marks import needs

if TYPE_CHECKING:
from _pytest.mark.structures import ParameterSet


def param_with(
at: ParameterSet,
*,
marks: Iterable[pytest.Mark | pytest.MarkDecorator] = (),
id: str | None = None,
) -> ParameterSet:
return pytest.param(*at.values, marks=[*at.marks, *marks], id=id or at.id)


MAP_ARRAY_TYPES: dict[
tuple[Literal["mem", "dask"], Literal["dense", "sparse"]],
tuple[ParameterSet, ...],
] = {
("mem", "dense"): (pytest.param(asarray, id="numpy_ndarray"),),
("mem", "sparse"): (
pytest.param(sparse.csr_matrix, id="scipy_csr"),
pytest.param(sparse.csc_matrix, id="scipy_csc"),
),
("dask", "dense"): (
pytest.param(as_dense_dask_array, marks=[needs("dask")], id="dask_array_dense"),
),
("dask", "sparse"): (
pytest.param(
as_sparse_dask_array, marks=[needs("dask")], id="dask_array_sparse"
),
# probably not necessary to also do csc
),
}

ARRAY_TYPES_MEM = tuple(
at for (strg, _), ats in MAP_ARRAY_TYPES.items() if strg == "mem" for at in ats
)
ARRAY_TYPES_DASK = tuple(
at for (strg, _), ats in MAP_ARRAY_TYPES.items() if strg == "dask" for at in ats
)

ARRAY_TYPES_DENSE = tuple(
at for (_, spsty), ats in MAP_ARRAY_TYPES.items() if spsty == "dense" for at in ats
)
ARRAY_TYPES_SPARSE = tuple(
at for (_, spsty), ats in MAP_ARRAY_TYPES.items() if spsty == "dense" for at in ats
)

ARRAY_TYPES_SUPPORTED = tuple(
(
param_with(at, marks=[pytest.mark.xfail(reason="sparse-in-dask not supported")])
if attrs == ("dask", "sparse")
else at
)
for attrs, ats in MAP_ARRAY_TYPES.items()
for at in ats
)
"""
Sparse matrices in dask arrays aren’t officially supported upstream,
so add xfail to them.
"""

ARRAY_TYPES = tuple(at for ats in MAP_ARRAY_TYPES.values() for at in ats)
4 changes: 3 additions & 1 deletion scanpy/tests/test_highly_variable_genes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from pathlib import Path

import pytest
import pandas as pd
import numpy as np
import scanpy as sc
from pathlib import Path

from scanpy.testing._helpers import _check_check_values_warnings
from scanpy.testing._helpers.data import pbmc3k, pbmc68k_reduced
from scanpy.testing._pytest.marks import needs
Expand Down
2 changes: 2 additions & 0 deletions scanpy/tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from scanpy._compat import DaskArray
from scanpy.testing._helpers.data import pbmc68k_reduced
from scanpy.testing._pytest.params import ARRAY_TYPES


mark_flaky = pytest.mark.xfail(
Expand Down Expand Up @@ -94,6 +95,7 @@ def test_correctness(metric, size, expected, assert_equal):
assert metric(adata, vals=connected) == expected


@pytest.mark.parametrize("array_type", ARRAY_TYPES)
def test_graph_metrics_w_constant_values(metric, array_type, assert_equal):
# https://github.com/scverse/scanpy/issues/1806
pbmc = pbmc68k_reduced()
Expand Down
22 changes: 10 additions & 12 deletions scanpy/tests/test_normalization.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,28 @@
from __future__ import annotations

from collections.abc import Callable

import pytest
import numpy as np
from anndata import AnnData
from scipy.sparse import csr_matrix
from scipy import sparse

from scanpy.testing._pytest.marks import needs

try:
import dask.array as da
except ImportError:
da = None
from anndata.tests.helpers import assert_equal

import scanpy as sc
from scanpy.testing._helpers import (
check_rep_mutation,
check_rep_results,
_check_check_values_warnings,
)
from anndata.tests.helpers import assert_equal, asarray

# TODO: Add support for sparse-in-dask
from scanpy.testing._pytest.params import ARRAY_TYPES_SUPPORTED


X_total = [[1, 0], [3, 0], [5, 6]]
X_frac = [[1, 0, 1], [3, 0, 1], [5, 6, 1]]
X_total = np.array([[1, 0], [3, 0], [5, 6]])
X_frac = np.array([[1, 0, 1], [3, 0, 1], [5, 6, 1]])


@pytest.mark.parametrize("array_type", ARRAY_TYPES_SUPPORTED)
@pytest.mark.parametrize("dtype", ["float32", "int64"])
def test_normalize_total(array_type, dtype):
adata = AnnData(array_type(X_total).astype(dtype))
Expand All @@ -41,6 +36,7 @@ def test_normalize_total(array_type, dtype):
assert np.allclose(np.ravel(adata.X[:, 1:3].sum(axis=1)), [1.0, 1.0, 1.0])


@pytest.mark.parametrize("array_type", ARRAY_TYPES_SUPPORTED)
@pytest.mark.parametrize("dtype", ["float32", "int64"])
def test_normalize_total_rep(array_type, dtype):
# Test that layer kwarg works
Expand All @@ -49,6 +45,7 @@ def test_normalize_total_rep(array_type, dtype):
check_rep_results(sc.pp.normalize_total, X, fields=["layer"])


@pytest.mark.parametrize("array_type", ARRAY_TYPES_SUPPORTED)
@pytest.mark.parametrize("dtype", ["float32", "int64"])
def test_normalize_total_layers(array_type, dtype):
adata = AnnData(array_type(X_total).astype(dtype))
Expand All @@ -58,6 +55,7 @@ def test_normalize_total_layers(array_type, dtype):
assert np.allclose(adata.layers["layer"].sum(axis=1), [3.0, 3.0, 3.0])


@pytest.mark.parametrize("array_type", ARRAY_TYPES_SUPPORTED)
@pytest.mark.parametrize("dtype", ["float32", "int64"])
def test_normalize_total_view(array_type, dtype):
adata = AnnData(array_type(X_total).astype(dtype))
Expand Down
Loading