-
Notifications
You must be signed in to change notification settings - Fork 23
Description
Running de.test.wald using an un-normalized adata.X matrix with only 2 samples and ~5000 cells to test differential expression in scanpy. All libraries (numby, scanpy, diffxpy, sparse and dask) are updated the most recent versions.
test = de.test.wald(data=adata, formula_loc="~ 1 + Group", factor_loc_totest="Group")
make initally make this run at all I needed to include:
%env SPARSE_AUTO_DENSIFY=1
Or I would receive the error "RuntimeError: Cannot convert a sparse array to dense automatically. To manually densify, use the todense method." mentioned in other issues.
With this solved I am now running through several iterations:
training location model: False
training scale model: True
iter 0: ll=74052044.544684
iter 1: ll=74052044.544684, converged: 0.00% (loc: 100.00%, scale update: False), in 0.00sec
iter 2: ll=53700205.003682, converged: 16.18% (loc: 16.18%, scale update: True), in 883.76sec
iter 3: ll=53700205.003682, converged: 16.18% (loc: 100.00%, scale update: False), in 0.01sec
iter 4: ll=53407203.078969, converged: 84.27% (loc: 84.27%, scale update: True), in 692.52sec
iter 5: ll=53407203.078969, converged: 84.27% (loc: 100.00%, scale update: False), in 0.00sec
iter 6: ll=53376103.605985, converged: 94.97% (loc: 94.97%, scale update: True), in 178.97sec
iter 7: ll=53376103.605985, converged: 94.97% (loc: 100.00%, scale update: False), in 0.00sec
Fitting 1842 dispersion models: (progress not available with multiprocessing)
But getting this error / trace back:
ValueError Traceback (most recent call last)
in
----> 1 test = de.test.wald(data=adata, formula_loc="~ 1 + Group", factor_loc_totest="Group")
~/anaconda3/envs/scanpy/lib/python3.8/site-packages/diffxpy/testing/tests.py in wald(data, factor_loc_totest, coef_to_test, formula_loc, formula_scale, as_numeric, init_a, init_b, gene_names, sample_description, dmat_loc, dmat_scale, constraints_loc, constraints_scale, noise_model, size_factors, batch_size, backend, train_args, training_strategy, quick_scale, dtype, **kwargs)
719
720 # Fit model.
--> 721 model = _fit(
722 noise_model=noise_model,
723 data=data,
~/anaconda3/envs/scanpy/lib/python3.8/site-packages/diffxpy/testing/tests.py in _fit(noise_model, data, design_loc, design_scale, design_loc_names, design_scale_names, constraints_loc, constraints_scale, init_model, init_a, init_b, gene_names, size_factors, batch_size, backend, training_strategy, quick_scale, train_args, close_session, dtype)
242 pass
243
--> 244 estim.train_sequence(
245 training_strategy=training_strategy,
246 **train_args
~/anaconda3/envs/scanpy/lib/python3.8/site-packages/batchglm/models/base/estimator.py in train_sequence(self, training_strategy, **kwargs)
122 (x, str(d[x]), str(kwargs[x]))
123 )
--> 124 self.train(**d, **kwargs)
125 logger.debug("Training sequence #%d complete", idx + 1)
126
~/anaconda3/envs/scanpy/lib/python3.8/site-packages/batchglm/train/numpy/base_glm/estimator.py in train(self, max_steps, method_b, update_b_freq, ftol_b, lr_b, max_iter_b, nproc, **kwargs)
115 self.model.b_var = self.model.b_var + b_step
116 # Reverse update by feature if update leads to worse loss:
--> 117 ll_proposal = - self.model.ll_byfeature_j(j=idx_update).compute()
118 idx_bad_step = idx_update[np.where(ll_proposal > ll_current[idx_update])[0]]
119 if isinstance(self.model.b_var, dask.array.core.Array):
~/anaconda3/envs/scanpy/lib/python3.8/site-packages/dask/base.py in compute(self, **kwargs)
281 dask.base.compute
282 """
--> 283 (result,) = compute(self, traverse=False, **kwargs)
284 return result
285
~/anaconda3/envs/scanpy/lib/python3.8/site-packages/dask/base.py in compute(*args, **kwargs)
564
565 results = schedule(dsk, keys, **kwargs)
--> 566 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
567
568
~/anaconda3/envs/scanpy/lib/python3.8/site-packages/dask/base.py in (.0)
564
565 results = schedule(dsk, keys, **kwargs)
--> 566 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
567
568
~/anaconda3/envs/scanpy/lib/python3.8/site-packages/dask/array/core.py in finalize(results)
1081 while isinstance(results2, (tuple, list)):
1082 if len(results2) > 1:
-> 1083 return concatenate3(results)
1084 else:
1085 results2 = results2[0]
~/anaconda3/envs/scanpy/lib/python3.8/site-packages/dask/array/core.py in concatenate3(arrays)
4747 try:
4748 x = unpack_singleton(arrays)
-> 4749 return _concatenate2(arrays, axes=tuple(range(x.ndim)))
4750 except TypeError:
4751 pass
~/anaconda3/envs/scanpy/lib/python3.8/site-packages/dask/array/core.py in _concatenate2(arrays, axes)
348 type(max(arrays, key=lambda x: getattr(x, "array_priority", 0)))
349 )
--> 350 return concatenate(arrays, axis=axes[0])
351
352
~/anaconda3/envs/scanpy/lib/python3.8/site-packages/sparse/_common.py in concatenate(arrays, axis, compressed_axes)
1253 from ._coo import concatenate as coo_concat
1254
-> 1255 return coo_concat(arrays, axis)
1256 else:
1257 from ._compressed import concatenate as gcxs_concat
~/anaconda3/envs/scanpy/lib/python3.8/site-packages/sparse/_coo/common.py in concatenate(arrays, axis)
159 from .core import COO
160
--> 161 check_consistent_fill_value(arrays)
162
163 arrays = [x if isinstance(x, COO) else COO(x) for x in arrays]
~/anaconda3/envs/scanpy/lib/python3.8/site-packages/sparse/_utils.py in check_consistent_fill_value(arrays)
468
469 if not all(isinstance(s, SparseArray) for s in arrays):
--> 470 raise ValueError("All arrays must be instances of SparseArray.")
471 if len(arrays) == 0:
472 raise ValueError("At least one array required.")
ValueError: All arrays must be instances of SparseArray.