@@ -254,7 +254,7 @@ int GpuArray_rgemm(cb_transpose transA, cb_transpose transB, double alpha,
254254 else {
255255 err = GpuArray_copy (& copyA , A , GA_F_ORDER );
256256 if (err != GA_NO_ERROR )
257- goto cleanup ;
257+ goto cleanup ;
258258 Ap = & copyA ;
259259 }
260260 }
@@ -264,7 +264,7 @@ int GpuArray_rgemm(cb_transpose transA, cb_transpose transB, double alpha,
264264 else {
265265 err = GpuArray_copy (& copyB , B , GA_F_ORDER );
266266 if (err != GA_NO_ERROR )
267- goto cleanup ;
267+ goto cleanup ;
268268 Bp = & copyB ;
269269 }
270270 }
@@ -388,7 +388,7 @@ int GpuArray_rger(double alpha, GpuArray *X, GpuArray *Y, GpuArray *A,
388388 else {
389389 err = GpuArray_copy (& copyX , X , GA_ANY_ORDER );
390390 if (err != GA_NO_ERROR )
391- goto cleanup ;
391+ goto cleanup ;
392392 Xp = & copyX ;
393393 }
394394 }
@@ -398,7 +398,7 @@ int GpuArray_rger(double alpha, GpuArray *X, GpuArray *Y, GpuArray *A,
398398 else {
399399 err = GpuArray_copy (& copyY , Y , GA_ANY_ORDER );
400400 if (err != GA_NO_ERROR )
401- goto cleanup ;
401+ goto cleanup ;
402402 Yp = & copyY ;
403403 }
404404 }
@@ -526,7 +526,7 @@ int GpuArray_rgemmBatch_3d(cb_transpose transA, cb_transpose transB, double alph
526526 err = GpuArray_copy (& copyA , A , GA_C_ORDER );
527527 cA = 1 ;
528528 if (err != GA_NO_ERROR )
529- goto cleanup ;
529+ goto cleanup ;
530530 Ap = & copyA ;
531531 }
532532 }
@@ -538,7 +538,7 @@ int GpuArray_rgemmBatch_3d(cb_transpose transA, cb_transpose transB, double alph
538538 err = GpuArray_copy (& copyB , B , GA_C_ORDER );
539539 cB = 1 ;
540540 if (err != GA_NO_ERROR )
541- goto cleanup ;
541+ goto cleanup ;
542542 Bp = & copyB ;
543543 }
544544 }
@@ -550,24 +550,32 @@ int GpuArray_rgemmBatch_3d(cb_transpose transA, cb_transpose transB, double alph
550550
551551 if (cC == 2 ) {
552552 o = cb_fortran ;
553- ldc = Cp -> strides [2 ] / elsize ;
553+ ldc = Cp -> dimensions [2 ] > 1
554+ ? Cp -> strides [2 ] / elsize
555+ : Cp -> dimensions [1 ];
554556 } else if (cC == 1 ) {
555557 o = cb_c ;
556- ldc = Cp -> strides [1 ] / elsize ;
558+ ldc = Cp -> dimensions [1 ] > 1
559+ ? Cp -> strides [1 ] / elsize
560+ : Cp -> dimensions [2 ];
557561 } else {
558562 err = GA_VALUE_ERROR ;
559563 goto cleanup ;
560564 }
561565 if (cA == 2 ) {
562- lda = Ap -> strides [2 ] / elsize ;
566+ lda = Ap -> dimensions [2 ] > 1
567+ ? Ap -> strides [2 ] / elsize
568+ : Ap -> dimensions [1 ];
563569 if (o == cb_c ) {
564570 if (transA == cb_no_trans )
565571 transA = cb_trans ;
566572 else
567573 transA = cb_no_trans ;
568574 }
569575 } else if (cA == 1 ) {
570- lda = Ap -> strides [1 ] / elsize ;
576+ lda = Ap -> dimensions [1 ] > 1
577+ ? Ap -> strides [1 ] / elsize
578+ : Ap -> dimensions [2 ];
571579 if (o == cb_fortran ) {
572580 if (transA == cb_no_trans )
573581 transA = cb_trans ;
@@ -579,15 +587,19 @@ int GpuArray_rgemmBatch_3d(cb_transpose transA, cb_transpose transB, double alph
579587 goto cleanup ;
580588 }
581589 if (cB == 2 ) {
582- ldb = Bp -> strides [2 ] / elsize ;
590+ ldb = Bp -> dimensions [2 ] > 1
591+ ? Bp -> strides [2 ] / elsize
592+ : Bp -> dimensions [1 ];
583593 if (o == cb_c ) {
584594 if (transB == cb_no_trans )
585595 transB = cb_trans ;
586596 else
587597 transB = cb_no_trans ;
588598 }
589599 } else if (cB == 1 ) {
590- ldb = Bp -> strides [1 ] / elsize ;
600+ ldb = Bp -> dimensions [1 ] > 1
601+ ? Bp -> strides [1 ] / elsize
602+ : Bp -> dimensions [2 ];
591603 if (o == cb_fortran ) {
592604 if (transB == cb_no_trans )
593605 transB = cb_trans ;
0 commit comments