Skip to content

Commit 5db51f9

Browse files
authored
Merge pull request #436 from lamblin/help_debug
Use "right" ld* with 1D matrices
2 parents 65ed476 + f82d5be commit 5db51f9

File tree

2 files changed

+25
-13
lines changed

2 files changed

+25
-13
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ before_install:
2323
- export PREFIX=$HOME/.local
2424
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update && brew install doxygen; fi
2525
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export PYTHONUSERBASE=$PREFIX; fi
26-
- pip install --user breathe sphinx sphinx_rtd_theme cython numpy 'mako>=0.7' six
26+
- pip install --user breathe sphinx==1.5.1 sphinx_rtd_theme cython numpy 'mako>=0.7' six
2727
- export PATH=$PATH:$PREFIX/bin
2828
- export CPATH=$CPATH:$PREFIX/include
2929
- export LIBRARY_PATH=$LIBRARY_PATH:$PREFIX/lib

src/gpuarray_array_blas.c

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ int GpuArray_rgemm(cb_transpose transA, cb_transpose transB, double alpha,
254254
else {
255255
err = GpuArray_copy(&copyA, A, GA_F_ORDER);
256256
if (err != GA_NO_ERROR)
257-
goto cleanup;
257+
goto cleanup;
258258
Ap = &copyA;
259259
}
260260
}
@@ -264,7 +264,7 @@ int GpuArray_rgemm(cb_transpose transA, cb_transpose transB, double alpha,
264264
else {
265265
err = GpuArray_copy(&copyB, B, GA_F_ORDER);
266266
if (err != GA_NO_ERROR)
267-
goto cleanup;
267+
goto cleanup;
268268
Bp = &copyB;
269269
}
270270
}
@@ -388,7 +388,7 @@ int GpuArray_rger(double alpha, GpuArray *X, GpuArray *Y, GpuArray *A,
388388
else {
389389
err = GpuArray_copy(&copyX, X, GA_ANY_ORDER);
390390
if (err != GA_NO_ERROR)
391-
goto cleanup;
391+
goto cleanup;
392392
Xp = &copyX;
393393
}
394394
}
@@ -398,7 +398,7 @@ int GpuArray_rger(double alpha, GpuArray *X, GpuArray *Y, GpuArray *A,
398398
else {
399399
err = GpuArray_copy(&copyY, Y, GA_ANY_ORDER);
400400
if (err != GA_NO_ERROR)
401-
goto cleanup;
401+
goto cleanup;
402402
Yp = &copyY;
403403
}
404404
}
@@ -526,7 +526,7 @@ int GpuArray_rgemmBatch_3d(cb_transpose transA, cb_transpose transB, double alph
526526
err = GpuArray_copy(&copyA, A, GA_C_ORDER);
527527
cA = 1;
528528
if (err != GA_NO_ERROR)
529-
goto cleanup;
529+
goto cleanup;
530530
Ap = &copyA;
531531
}
532532
}
@@ -538,7 +538,7 @@ int GpuArray_rgemmBatch_3d(cb_transpose transA, cb_transpose transB, double alph
538538
err = GpuArray_copy(&copyB, B, GA_C_ORDER);
539539
cB = 1;
540540
if (err != GA_NO_ERROR)
541-
goto cleanup;
541+
goto cleanup;
542542
Bp = &copyB;
543543
}
544544
}
@@ -550,24 +550,32 @@ int GpuArray_rgemmBatch_3d(cb_transpose transA, cb_transpose transB, double alph
550550

551551
if (cC == 2) {
552552
o = cb_fortran;
553-
ldc = Cp->strides[2] / elsize;
553+
ldc = Cp->dimensions[2] > 1
554+
? Cp->strides[2] / elsize
555+
: Cp->dimensions[1];
554556
} else if (cC == 1) {
555557
o = cb_c;
556-
ldc = Cp->strides[1] / elsize;
558+
ldc = Cp->dimensions[1] > 1
559+
? Cp->strides[1] / elsize
560+
: Cp->dimensions[2];
557561
} else {
558562
err = GA_VALUE_ERROR;
559563
goto cleanup;
560564
}
561565
if (cA == 2) {
562-
lda = Ap->strides[2] / elsize;
566+
lda = Ap->dimensions[2] > 1
567+
? Ap->strides[2] / elsize
568+
: Ap->dimensions[1];
563569
if (o == cb_c) {
564570
if (transA == cb_no_trans)
565571
transA = cb_trans;
566572
else
567573
transA = cb_no_trans;
568574
}
569575
} else if (cA == 1) {
570-
lda = Ap->strides[1] / elsize;
576+
lda = Ap->dimensions[1] > 1
577+
? Ap->strides[1] / elsize
578+
: Ap->dimensions[2];
571579
if (o == cb_fortran) {
572580
if (transA == cb_no_trans)
573581
transA = cb_trans;
@@ -579,15 +587,19 @@ int GpuArray_rgemmBatch_3d(cb_transpose transA, cb_transpose transB, double alph
579587
goto cleanup;
580588
}
581589
if (cB == 2) {
582-
ldb = Bp->strides[2] / elsize;
590+
ldb = Bp->dimensions[2] > 1
591+
? Bp->strides[2] / elsize
592+
: Bp->dimensions[1];
583593
if (o == cb_c) {
584594
if (transB == cb_no_trans)
585595
transB = cb_trans;
586596
else
587597
transB = cb_no_trans;
588598
}
589599
} else if (cB == 1) {
590-
ldb = Bp->strides[1] / elsize;
600+
ldb = Bp->dimensions[1] > 1
601+
? Bp->strides[1] / elsize
602+
: Bp->dimensions[2];
591603
if (o == cb_fortran) {
592604
if (transB == cb_no_trans)
593605
transB = cb_trans;

0 commit comments

Comments
 (0)