Skip to content

All arrays must be instances of SparseArray #194

@ealsop

Description

@ealsop

Running de.test.wald using an un-normalized adata.X matrix with only 2 samples and ~5000 cells to test differential expression in scanpy. All libraries (numby, scanpy, diffxpy, sparse and dask) are updated the most recent versions.

test = de.test.wald(data=adata, formula_loc="~ 1 + Group", factor_loc_totest="Group")

make initally make this run at all I needed to include:

%env SPARSE_AUTO_DENSIFY=1

Or I would receive the error "RuntimeError: Cannot convert a sparse array to dense automatically. To manually densify, use the todense method." mentioned in other issues.

With this solved I am now running through several iterations:

training location model: False
training scale model: True
iter 0: ll=74052044.544684
iter 1: ll=74052044.544684, converged: 0.00% (loc: 100.00%, scale update: False), in 0.00sec
iter 2: ll=53700205.003682, converged: 16.18% (loc: 16.18%, scale update: True), in 883.76sec
iter 3: ll=53700205.003682, converged: 16.18% (loc: 100.00%, scale update: False), in 0.01sec
iter 4: ll=53407203.078969, converged: 84.27% (loc: 84.27%, scale update: True), in 692.52sec
iter 5: ll=53407203.078969, converged: 84.27% (loc: 100.00%, scale update: False), in 0.00sec
iter 6: ll=53376103.605985, converged: 94.97% (loc: 94.97%, scale update: True), in 178.97sec
iter 7: ll=53376103.605985, converged: 94.97% (loc: 100.00%, scale update: False), in 0.00sec
Fitting 1842 dispersion models: (progress not available with multiprocessing)

But getting this error / trace back:

ValueError Traceback (most recent call last)
in
----> 1 test = de.test.wald(data=adata, formula_loc="~ 1 + Group", factor_loc_totest="Group")

~/anaconda3/envs/scanpy/lib/python3.8/site-packages/diffxpy/testing/tests.py in wald(data, factor_loc_totest, coef_to_test, formula_loc, formula_scale, as_numeric, init_a, init_b, gene_names, sample_description, dmat_loc, dmat_scale, constraints_loc, constraints_scale, noise_model, size_factors, batch_size, backend, train_args, training_strategy, quick_scale, dtype, **kwargs)
719
720 # Fit model.
--> 721 model = _fit(
722 noise_model=noise_model,
723 data=data,

~/anaconda3/envs/scanpy/lib/python3.8/site-packages/diffxpy/testing/tests.py in _fit(noise_model, data, design_loc, design_scale, design_loc_names, design_scale_names, constraints_loc, constraints_scale, init_model, init_a, init_b, gene_names, size_factors, batch_size, backend, training_strategy, quick_scale, train_args, close_session, dtype)
242 pass
243
--> 244 estim.train_sequence(
245 training_strategy=training_strategy,
246 **train_args

~/anaconda3/envs/scanpy/lib/python3.8/site-packages/batchglm/models/base/estimator.py in train_sequence(self, training_strategy, **kwargs)
122 (x, str(d[x]), str(kwargs[x]))
123 )
--> 124 self.train(**d, **kwargs)
125 logger.debug("Training sequence #%d complete", idx + 1)
126

~/anaconda3/envs/scanpy/lib/python3.8/site-packages/batchglm/train/numpy/base_glm/estimator.py in train(self, max_steps, method_b, update_b_freq, ftol_b, lr_b, max_iter_b, nproc, **kwargs)
115 self.model.b_var = self.model.b_var + b_step
116 # Reverse update by feature if update leads to worse loss:
--> 117 ll_proposal = - self.model.ll_byfeature_j(j=idx_update).compute()
118 idx_bad_step = idx_update[np.where(ll_proposal > ll_current[idx_update])[0]]
119 if isinstance(self.model.b_var, dask.array.core.Array):

~/anaconda3/envs/scanpy/lib/python3.8/site-packages/dask/base.py in compute(self, **kwargs)
281 dask.base.compute
282 """
--> 283 (result,) = compute(self, traverse=False, **kwargs)
284 return result
285

~/anaconda3/envs/scanpy/lib/python3.8/site-packages/dask/base.py in compute(*args, **kwargs)
564
565 results = schedule(dsk, keys, **kwargs)
--> 566 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
567
568

~/anaconda3/envs/scanpy/lib/python3.8/site-packages/dask/base.py in (.0)
564
565 results = schedule(dsk, keys, **kwargs)
--> 566 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
567
568

~/anaconda3/envs/scanpy/lib/python3.8/site-packages/dask/array/core.py in finalize(results)
1081 while isinstance(results2, (tuple, list)):
1082 if len(results2) > 1:
-> 1083 return concatenate3(results)
1084 else:
1085 results2 = results2[0]

~/anaconda3/envs/scanpy/lib/python3.8/site-packages/dask/array/core.py in concatenate3(arrays)
4747 try:
4748 x = unpack_singleton(arrays)
-> 4749 return _concatenate2(arrays, axes=tuple(range(x.ndim)))
4750 except TypeError:
4751 pass

~/anaconda3/envs/scanpy/lib/python3.8/site-packages/dask/array/core.py in _concatenate2(arrays, axes)
348 type(max(arrays, key=lambda x: getattr(x, "array_priority", 0)))
349 )
--> 350 return concatenate(arrays, axis=axes[0])
351
352

~/anaconda3/envs/scanpy/lib/python3.8/site-packages/sparse/_common.py in concatenate(arrays, axis, compressed_axes)
1253 from ._coo import concatenate as coo_concat
1254
-> 1255 return coo_concat(arrays, axis)
1256 else:
1257 from ._compressed import concatenate as gcxs_concat

~/anaconda3/envs/scanpy/lib/python3.8/site-packages/sparse/_coo/common.py in concatenate(arrays, axis)
159 from .core import COO
160
--> 161 check_consistent_fill_value(arrays)
162
163 arrays = [x if isinstance(x, COO) else COO(x) for x in arrays]

~/anaconda3/envs/scanpy/lib/python3.8/site-packages/sparse/_utils.py in check_consistent_fill_value(arrays)
468
469 if not all(isinstance(s, SparseArray) for s in arrays):
--> 470 raise ValueError("All arrays must be instances of SparseArray.")
471 if len(arrays) == 0:
472 raise ValueError("At least one array required.")

ValueError: All arrays must be instances of SparseArray.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions