From 8a338cf2bae25dba394660484f2a3de44930dbf2 Mon Sep 17 00:00:00 2001 From: Johnathan Rhyne Date: Wed, 29 May 2024 17:28:45 -0400 Subject: [PATCH 01/10] a --- SRC/la_constants.mod | Bin 0 -> 1563 bytes SRC/la_xisnan.mod | Bin 0 -> 321 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 SRC/la_constants.mod create mode 100644 SRC/la_xisnan.mod diff --git a/SRC/la_constants.mod b/SRC/la_constants.mod new file mode 100644 index 0000000000000000000000000000000000000000..b8006a566979124de13911e5efcec9baedca2f6f GIT binary patch literal 1563 zcmV+$2ITo4iwFP!000001La%abD~HPe&?^4H_U@u12dq$t!3Cf>k>>Eb9Z|$78@{2 zoxvQC+b#e4>lqk8;-Hp5q>}7bXyr$L-80kQbT?nW%;t;v*hbGA_jT=||GY5rHvv*L zLq(6F=fs|iR^jFut)1oLCWvAuh@(%d5mtJ=D^$lnMI2lyaJc8cj?^W4Hs}^FeYy;( ziqxJT#GxNV{^J!$D$-D|cQ>C+tm}ulg&yqR?b)Ape`+r*d!Y!Coglu;{$lw3@?cLE z)0vH|8|xOX7~h~fxQ{ZP&&S`EvxkK>UtU{xmVIs6lW&Q>a|_O*{Bn%A01&v40Mc`i zSix5cDZ?0E4ve831}VXyNPYDDwI4j4s6K)}^QnD5op1n3C4gfMuk6v77Bohnu>l(Y zaMml(usDcwg~#*ptp!giD_pd9rtb9i?q)ifF8cT5KP>(L{;bIiKDYkL2857{swtD1 zeZLsPoe!L@;!i`&e?DWCNjR6-JmJW8L7D(^6L_7%sHLd1BQXvD<4fqR{9nz&{0;6h zVK>o)=GiB`UXD*;_x=hWirFLN6rjd|6l_y4P4N**`4P!A8d4|x4rq@lhG2@V7j8P` zZ+zmI9wF8J?mq>u&yB*YUI7dKmYoP-C@%hd)+v#?PJ;_v&wu(6Up%VeE-FoqNn@B^ zqOKw8YZZP1-r=q`ui?dmqDQ+4IEdSLC~Uw_SPlvJi0eQ39K=P^5v|NeG$15y)3Le^ zCum2HD_|e)OQU#1Y!$&}H`cz1Mb)RPLphWIgYuC`#stevKvn9in!_rb%9dK6(R$n&^IWER7B4D?jL!}Wa zN5u$6^t|pMW$)`$zGmo*Wkdk%;!%|sLQ*jV84rm&I0UO!cb!p@*t`72!a0^)>5P%Y zKSJ;F2_rcg6Jr)}2j>%n;DH-VZZqzflnAIlD60>#7j{!!QGZxgAA)S?xc7kM5F=^N#S(BJpxXj$k%X#(S&wNC3SHgOB+g)D zp`Fr5UF3Qs2gB$>J0+32sE?d57>qEqQVKZ}i*u0187$tnR0BB^3^@qKWL%(~;zwQc zUz5*&lW~DoiXJtAsc;PA0j*R!>hep4TNnvwr^<0An8W1_69~YzwGMu zL%h&}ky&`}1EWFq>H|i@!W#k@jk3miFnQ8u&l06@tYxj-Am~{WGzdiW;=-|!bg{yb znKyy5TMhC)NRWnk{~$=CLfaiiSr=aNZPLBQ`6NzTzEb87-;u1R2n3q-;(%a?9u_z@ z^X?Cj28!sL0AVQF4d8er3iTNAqP?&Xt9I`WAo$Es^87wVJl&t>$3fCXf*FoUmY@ZJ zW*JTpj5I|E=Hcl={^TUvmU)V2tTMZa6i8wjosSB) literal 0 HcmV?d00001 diff --git a/SRC/la_xisnan.mod b/SRC/la_xisnan.mod new file mode 100644 index 0000000000000000000000000000000000000000..1b5610476a459fda31282807dce61ffd51c6d396 GIT binary patch literal 321 zcmV-H0lxkpiwFP!000001I3fcYQr!PhVOccxye4HC|(lx=z0mJwp4`O(o-?D3xUK2 zyDfSAYKfPmY3U(QB!Ncr!+iQR+Qf|K+^4W_tB+j`pLN%7+XnE`#qiqI<$GPhTi5Pk zSH2v!eN#3!@hw4yVZt>g8a5^pTd@o*3aL66@K_Ur1@@>JmIb)FAjKG#U>Faa1yNY= z2nu>n*%MtPG>>Qw84HUvTF5+vQIMiz9*7umL4!PDRAZ%VlLUSWj>-`jV!^A<3|Hx_ z=`uoPqSYzScQ%c14mO06Y=oM@X{I@?978ObWUL(}lf??j2HgLjS8K`RDV=@hl?(Ww zIK*@W%A?N{|L%8l;DR|QEVArbqwq-7^^P1W_{kj2;LRA;*JN=1&LA6lY-b``YW_?F TR@3_t@fFY)*n0O@xdQ+IFY=W< literal 0 HcmV?d00001 From 212270836bd59a8b179d6c19d3c5095246491107 Mon Sep 17 00:00:00 2001 From: Johnathan Rhyne Date: Mon, 14 Oct 2024 11:16:34 -0600 Subject: [PATCH 02/10] DO NOT MERGE: demonstrating changes work --- SRC/Makefile | 1 + SRC/dgelqf.f | 6 +- SRC/dgeqlf.f | 7 +- SRC/dgeqrf.f | 4 +- SRC/dgeqrfp.f | 4 +- SRC/dgerqf.f | 7 +- SRC/dlarft.f | 2 + SRC/dorglq.f | 6 +- SRC/dorgql.f | 7 +- SRC/dorgqr.f | 4 +- SRC/dorgrq.f | 7 +- SRC/dormlq.f | 6 +- SRC/dormql.f | 6 +- SRC/dormqr.f | 6 +- SRC/dormrq.f | 4 +- SRC/my_dlarft_rec.f | 239 ++++++++++++++++++++++++++++++++++++++++++++ st7lLjwJ | 1 + 17 files changed, 282 insertions(+), 35 deletions(-) create mode 100644 SRC/my_dlarft_rec.f create mode 100644 st7lLjwJ diff --git a/SRC/Makefile b/SRC/Makefile index 0191626f0e..7d1ca17981 100644 --- a/SRC/Makefile +++ b/SRC/Makefile @@ -340,6 +340,7 @@ DLASRC = \ dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \ dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \ dlarf.o dlarfb.o dlarfb_gett.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o dlarfy.o dlarf1f.o dlarf1l.o\ + my_dlarft_rec.o \ dlargv.o dlarmm.o dlarrv.o dlartv.o \ dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o \ dlasyf.o dlasyf_rook.o dlasyf_rk.o \ diff --git a/SRC/dgelqf.f b/SRC/dgelqf.f index 03bbb8e1e3..254bd1b68e 100644 --- a/SRC/dgelqf.f +++ b/SRC/dgelqf.f @@ -161,7 +161,7 @@ SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) $ NBMIN, NX * .. * .. External Subroutines .. - EXTERNAL DGELQ2, DLARFB, DLARFT, XERBLA + EXTERNAL DGELQ2, DLARFB, MY_DLARFT_REC, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -251,8 +251,8 @@ SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, - $ I ), + CALL MY_DLARFT_REC( 'Forward', 'Rowwise', N-I+1, IB, + $ A( I, I ), $ LDA, TAU( I ), WORK, LDWORK ) * * Apply H to A(i+ib:m,i:n) from the right diff --git a/SRC/dgeqlf.f b/SRC/dgeqlf.f index d472e3365e..e1287116be 100644 --- a/SRC/dgeqlf.f +++ b/SRC/dgeqlf.f @@ -156,7 +156,7 @@ SUBROUTINE DGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) $ MU, NB, NBMIN, NU, NX * .. * .. External Subroutines .. - EXTERNAL DGEQL2, DLARFB, DLARFT, XERBLA + EXTERNAL DGEQL2, DLARFB, MY_DLARFT_REC, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -256,8 +256,9 @@ SUBROUTINE DGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL DLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, - $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) + CALL MY_DLARFT_REC( 'Backward', 'Columnwise', M-K+I+IB-1, + $ IB, A( 1, N-K+I ), LDA, TAU( I ), WORK, + $ LDWORK ) * * Apply H**T to A(1:m-k+i+ib-1,1:n-k+i-1) from the left * diff --git a/SRC/dgeqrf.f b/SRC/dgeqrf.f index c005d47af5..8e02d6bc24 100644 --- a/SRC/dgeqrf.f +++ b/SRC/dgeqrf.f @@ -163,7 +163,7 @@ SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) $ NBMIN, NX * .. * .. External Subroutines .. - EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA + EXTERNAL DGEQR2, DLARFB, MY_DLARFT_REC, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -253,7 +253,7 @@ SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB, + CALL MY_DLARFT_REC( 'Forward', 'Columnwise', M-I+1, IB, $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) * * Apply H**T to A(i:m,i+ib:n) from the left diff --git a/SRC/dgeqrfp.f b/SRC/dgeqrfp.f index aa757e96cf..65e5d51009 100644 --- a/SRC/dgeqrfp.f +++ b/SRC/dgeqrfp.f @@ -167,7 +167,7 @@ SUBROUTINE DGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) $ NB, NBMIN, NX * .. * .. External Subroutines .. - EXTERNAL DGEQR2P, DLARFB, DLARFT, XERBLA + EXTERNAL DGEQR2P, DLARFB, MY_DLARFT_REC, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -259,7 +259,7 @@ SUBROUTINE DGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB, + CALL MY_DLARFT_REC( 'Forward', 'Columnwise', M-I+1, IB, $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) * * Apply H**T to A(i:m,i+ib:n) from the left diff --git a/SRC/dgerqf.f b/SRC/dgerqf.f index 8cabdc36ee..048483c807 100644 --- a/SRC/dgerqf.f +++ b/SRC/dgerqf.f @@ -156,7 +156,7 @@ SUBROUTINE DGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) $ MU, NB, NBMIN, NU, NX * .. * .. External Subroutines .. - EXTERNAL DGERQ2, DLARFB, DLARFT, XERBLA + EXTERNAL DGERQ2, DLARFB, MY_DLARFT_REC, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -256,8 +256,9 @@ SUBROUTINE DGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL DLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, - $ A( M-K+I, 1 ), LDA, TAU( I ), WORK, LDWORK ) + CALL MY_DLARFT_REC( 'Backward', 'Rowwise', N-K+I+IB-1, + $ IB, A( M-K+I, 1 ), LDA, TAU( I ), WORK, + $ LDWORK ) * * Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right * diff --git a/SRC/dlarft.f b/SRC/dlarft.f index d9ef2f77b6..81ffb39857 100644 --- a/SRC/dlarft.f +++ b/SRC/dlarft.f @@ -196,6 +196,8 @@ SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) IF( N.EQ.0 ) $ RETURN * + WRITE(*,*) "in dlarft, n = ", N, " k = ", K, "flags: ", DIRECT, + $ " ", STOREV IF( LSAME( DIRECT, 'F' ) ) THEN PREVLASTV = N DO I = 1, K diff --git a/SRC/dorglq.f b/SRC/dorglq.f index c41367ced4..6057a96338 100644 --- a/SRC/dorglq.f +++ b/SRC/dorglq.f @@ -148,7 +148,7 @@ SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) $ LWKOPT, NB, NBMIN, NX * .. * .. External Subroutines .. - EXTERNAL DLARFB, DLARFT, DORGL2, XERBLA + EXTERNAL DLARFB, MY_DLARFT_REC, DORGL2, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -253,8 +253,8 @@ SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, - $ I ), + CALL MY_DLARFT_REC( 'Forward', 'Rowwise', N-I+1, IB, + $ A( I, I ), $ LDA, TAU( I ), WORK, LDWORK ) * * Apply H**T to A(i+ib:m,i:n) from the right diff --git a/SRC/dorgql.f b/SRC/dorgql.f index f931f5a9c8..4920a705bf 100644 --- a/SRC/dorgql.f +++ b/SRC/dorgql.f @@ -149,7 +149,7 @@ SUBROUTINE DORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) $ NB, NBMIN, NX * .. * .. External Subroutines .. - EXTERNAL DLARFB, DLARFT, DORG2L, XERBLA + EXTERNAL DLARFB, MY_DLARFT_REC, DORG2L, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -260,8 +260,9 @@ SUBROUTINE DORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL DLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, - $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) + CALL MY_DLARFT_REC( 'Backward', 'Columnwise', M-K+I+IB-1, + $ IB, A( 1, N-K+I ), LDA, TAU( I ), WORK, + $ LDWORK ) * * Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left * diff --git a/SRC/dorgqr.f b/SRC/dorgqr.f index fd88519871..5823cf8ae4 100644 --- a/SRC/dorgqr.f +++ b/SRC/dorgqr.f @@ -149,7 +149,7 @@ SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) $ LWKOPT, NB, NBMIN, NX * .. * .. External Subroutines .. - EXTERNAL DLARFB, DLARFT, DORG2R, XERBLA + EXTERNAL DLARFB, MY_DLARFT_REC, DORG2R, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -254,7 +254,7 @@ SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB, + CALL MY_DLARFT_REC( 'Forward', 'Columnwise', M-I+1, IB, $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) * * Apply H to A(i:m,i+ib:n) from the left diff --git a/SRC/dorgrq.f b/SRC/dorgrq.f index c805484578..2f2c84d9c3 100644 --- a/SRC/dorgrq.f +++ b/SRC/dorgrq.f @@ -149,7 +149,7 @@ SUBROUTINE DORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) $ LWKOPT, NB, NBMIN, NX * .. * .. External Subroutines .. - EXTERNAL DLARFB, DLARFT, DORGR2, XERBLA + EXTERNAL DLARFB, MY_DLARFT_REC, DORGR2, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -261,8 +261,9 @@ SUBROUTINE DORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL DLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, - $ A( II, 1 ), LDA, TAU( I ), WORK, LDWORK ) + CALL MY_DLARFT_REC( 'Backward', 'Rowwise', N-K+I+IB-1, + $ IB, A( II, 1 ), LDA, TAU( I ), WORK, + $ LDWORK) * * Apply H**T to A(1:m-k+i-1,1:n-k+i+ib-1) from the right * diff --git a/SRC/dormlq.f b/SRC/dormlq.f index 85ca134737..cb68138cb6 100644 --- a/SRC/dormlq.f +++ b/SRC/dormlq.f @@ -196,7 +196,7 @@ SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL DLARFB, DLARFT, DORML2, XERBLA + EXTERNAL DLARFB, MY_DLARFT_REC, DORML2, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -316,8 +316,8 @@ SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL DLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), - $ LDA, TAU( I ), WORK( IWT ), LDT ) + CALL MY_DLARFT_REC( 'Forward', 'Rowwise', NQ-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**T is applied to C(i:m,1:n) diff --git a/SRC/dormql.f b/SRC/dormql.f index 11022d78c6..c28e0d0e70 100644 --- a/SRC/dormql.f +++ b/SRC/dormql.f @@ -195,7 +195,7 @@ SUBROUTINE DORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL DLARFB, DLARFT, DORM2L, XERBLA + EXTERNAL DLARFB, MY_DLARFT_REC, DORM2L, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -310,8 +310,8 @@ SUBROUTINE DORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL DLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, - $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT ) + CALL MY_DLARFT_REC( 'Backward', 'Columnwise', NQ-K+I+IB-1, + $ IB, A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT) IF( LEFT ) THEN * * H or H**T is applied to C(1:m-k+i+ib-1,1:n) diff --git a/SRC/dormqr.f b/SRC/dormqr.f index a9f8ba2279..e35534067f 100644 --- a/SRC/dormqr.f +++ b/SRC/dormqr.f @@ -195,7 +195,7 @@ SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL DLARFB, DLARFT, DORM2R, XERBLA + EXTERNAL DLARFB, MY_DLARFT_REC, DORM2R, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -309,8 +309,8 @@ SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL DLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, - $ I ), + CALL MY_DLARFT_REC( 'Forward', 'Columnwise', NQ-I+1, IB, + $ A(I, I ), $ LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * diff --git a/SRC/dormrq.f b/SRC/dormrq.f index 03159e4961..dcefe8d1df 100644 --- a/SRC/dormrq.f +++ b/SRC/dormrq.f @@ -196,7 +196,7 @@ SUBROUTINE DORMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL DLARFB, DLARFT, DORMR2, XERBLA + EXTERNAL DLARFB, MY_DLARFT_REC, DORMR2, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -317,7 +317,7 @@ SUBROUTINE DORMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL DLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB, + CALL MY_DLARFT_REC( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB, $ A( I, 1 ), LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * diff --git a/SRC/my_dlarft_rec.f b/SRC/my_dlarft_rec.f new file mode 100644 index 0000000000..15dd59dee1 --- /dev/null +++ b/SRC/my_dlarft_rec.f @@ -0,0 +1,239 @@ +c Cost: n > k: 1/6 * (k^2-1)(2n+k) +c n = k: 1/2 * (n^3-n) + RECURSIVE SUBROUTINE MY_DLARFT_REC( DIRECT, STOREV, N, K, V, LDV, + $ TAU, T, LDT) + IMPLICIT NONE + ! Arguemnts + ! Scalars + INTEGER N, K, LDV, LDT + CHARACTER DIRECT, STOREV + ! Matrix + DOUBLE PRECISION V(LDV,*), T(LDT,*), TAU(N) + + ! Local variables + INTEGER I,J,L,MINNK + LOGICAL QR,LQ,QL,DIRF,COLV + ! Parameters + DOUBLE PRECISION ONE, NEG_ONE, ZERO + PARAMETER(ONE=1.0D+0, ZERO = 0.0, NEG_ONE=-1.0D+0) + ! External functions + LOGICAL LSAME + EXTERNAL LSAME + ! External subroutines + EXTERNAL DTRMM,DGEMM,DLACPY + + ! Break V apart into 6 components + ! V = |---------------| + ! |V_{1,1} 0 | + ! |V_{2,1} V_{2,2}| + ! |V_{3,1} V_{3,2}| + ! |---------------| + ! V_{1,1}\in\R^{k,k} unit lower triangular + ! V_{2,1}\in\R^{n-k,k} rectangular + ! V_{3,1}\in\R^{m-n,k} rectangular + ! + ! V_{2,2}\in\R^{n-k,n-k} unit upper triangular + ! V_{3,2}\in\R^{m-n,n-k} rectangular + + ! We will construct the T matrix + ! T = |---------------| = |--------| + ! |T_{1,1} T_{1,2}| |T_1 T_3| + ! |0 T_{2,2}| |0 T_2| + ! |---------------| |--------| + + ! T is the triangular factor attained from block reflectors. + ! To motivate the structure, consider the product + ! + ! (I - V_1T_1V_1^\top)(I - V_2T_2V_2^\top) + ! = I - V_1T_1V_1^\top - V_2T_2V_2^\top + V_1T_1V_1^\topV_2T_2V_2^\top + ! + ! Define T_3 = -T_1V_1^\topV_2T_2 + ! + ! Then, we can define the matrix V as + ! V = |-------| + ! |V_1 V_2| + ! |-------| + ! + ! So, our product is equivalent to the matrix product + ! I - VTV^\top + ! So, we compute T_1, then T_2, then use these values to get T_3 + ! + ! The general scheme used is inspired by the approach inside DGEQRT3 + ! which was (at the time of writing this code): + ! Based on the algorithm of Elmroth and Gustavson, + ! IBM J. Res. Develop. Vol 44 No. 4 July 2000. + + IF(K.EQ.0.OR.N.EQ.0) THEN + RETURN + END IF + ! Base case + IF(K.EQ.1.OR.N.EQ.1) THEN + T(1,1) = TAU(1) + RETURN + END IF + + ! Beginning of executable statements +! MINNK = MIN(N,K) +! L = MINNK / 2 + L = K / 2 + ! Determine what kind of Q we need to compute + ! We assume that if the user doesn't provide 'F' for DIRECT, + ! then they meant to provide 'B' and if they don't provide + ! 'C' for STOREV, then they meant to provide 'R' + DIRF = LSAME(DIRECT,'F') + COLV = LSAME(STOREV,'C') + ! QR happens when we have forward direction in column storage + QR = DIRF.AND.COLV + ! LQ happens when we have Forward direction in row storage + LQ = DIRF.AND.(.NOT.COLV) + ! QL happens when we have backward direction in column storage + QL = (.NOT.DIRF).AND.COLV + ! The last case is RQ. Due to how we strucutured this, if the + ! above 3 are false, then RQ must be true, so we never store + ! this + ! RQ happens when we have backward direction in row storage + !RQ = (.NOT.DIRF).AND.(.NOT.COLV) + + + ! Compute T3 + IF(QR) THEN + ! If we are wide, then our + ! Compute T_1 + CALL MY_DLARFT_REC(DIRECT, STOREV, N, L, V, LDV, TAU, T, + $ LDT) + ! Compute T_2 + CALL MY_DLARFT_REC(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), + $ LDV, TAU(L+1), T(L+1,L+1), LDT) + ! Compute T_3 + ! T_3 = V_{2,1}^\top + DO J = 1, L + DO I = 1, K-L + T(J,L+I) = V(L+I,J) + END DO + END DO + ! T_3 = V_{2,1}^\top * V_{2,2} + CALL DTRMM('Right', 'Lower', 'No transpose', 'Unit', + $ L, K - L, ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + + IF(N.GT.K) THEN + ! T_3 = T_3 + V_{3,1}^\topV_{3,2} + CALL DGEMM('Transpose', 'No transpose', L, K-L, N-K, + $ ONE, V(K+1, 1), LDV, V(K+1,L+1), LDV, ONE, + $ T(1, L+1), LDT) + END IF + + ! At this point, we have that T_3 = V_1^\top *V_2 + ! All that is left is to pre and post multiply by -T_1 and T_2 + ! respectively. + + ! T_3 = -T_1*T_3 + CALL DTRMM('Left', 'Upper', 'No transpose', 'Non-unit', + $ L, K - L, NEG_ONE, T, LDT, T(1, L+1), LDT) + ! T_3 = T_3*T_2 + CALL DTRMM('Right', 'Upper', 'No transpose', 'Non-unit', + $ L, K - L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + + ELSE IF(LQ) THEN + ! Compute T_1 + CALL MY_DLARFT_REC(DIRECT, STOREV, N, L, V, LDV, TAU, T, + $ LDT) + ! Compute T_2 + CALL MY_DLARFT_REC(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), + $ LDV, TAU(L+1), T(L+1,L+1), LDT) + + ! Begin computing T_3 + ! First, T_3 = V_1V_2^\top + ! T_3 = V_{12} + CALL DLACPY('All', L, K - L, V(1,L+1), LDV, T(1, L+1), LDT) + + ! T_3 = V_{12}V_{22}^\top = T_3V_{22}^\top + CALL DTRMM('Right', 'Upper', 'Transpose', 'Unit', L, K-L, + $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + + ! If needed, use the trailing components + IF(N.GT.K) THEN + CALL DGEMM('No transpose', 'Transpose', L, K-L, N-K, + $ ONE, V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, + $ T(1, L+1), LDT) + END IF + + ! T_3 = -T_1T_3 + CALL DTRMM('Left', 'Upper', 'No transpose', 'Non-unit', + $ L, K - L, NEG_ONE, T, LDT, T(1, L+1), LDT) + + ! T_3 = T_3T_1 + CALL DTRMM('Right', 'Upper', 'No transpose', 'Non-unit', + $ L, K - L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + ELSE IF(QL) THEN + ! Compute T_1 + CALL MY_DLARFT_REC(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, + $ T, LDT) + ! Compute T_2 + CALL MY_DLARFT_REC(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, + $ TAU(K-L+1), T(K-L+1,K-L+1), LDT) + + ! Begin computing T_3 = T_2V_2^\topV_1T_1 + + ! T_3 = V_2^\top V_1 + + ! T_3 = V_{2,2}^\top + DO J = 1, K-L + DO I = 1, L + T(K-L+I,J) = V(N-K+J, K-L+I) + END DO + END DO + + ! T_3 = V_{2,2}^\topV_{2,1} = T_3V_{2,1} + CALL DTRMM('Right', 'Upper', 'No transpose', 'Unit', + $ L, K - L, ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) + + ! If needed, T_3 = V_{1,2}^\topV_{1,1} + T_3 + IF(N.GT.K) THEN + CALL DGEMM('Transpose', 'No transpose', L, K-L, N-K, + $ ONE, V(1,K-L+1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) + END IF + + ! T_3 = -T_2T_3 + CALL DTRMM('Left', 'Lower', 'No transpose', 'Non-unit', + $ L, K-L, NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + ! T_3 = T_3T_1 + CALL DTRMM('Right', 'Lower', 'No transpose', 'Non-unit', + $ L, K-L, ONE, T, LDT, T(K-L+1,1), LDT) + ELSE + ! Else means RQ + ! Compute T_1 + CALL MY_DLARFT_REC(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, + $ T, LDT) + ! Compute T_2 + CALL MY_DLARFT_REC(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, + $ TAU(K-L+1), T(K-L+1,K-L+1), LDT) + + ! Begin computing T_3 = T_2V_2V_1^\topT_1 + + ! T_3 = V_2V_1^\top + + ! T_3 = V_{2,2} + CALL DLACPY('All', L, K-L, V(K-L+1,N-K+1), LDV, + $ T(K-L+1,1), LDT) + + ! T_3 = T_3V_{1,2}^\top + CALL DTRMM('Right', 'Lower', 'Transpose', 'Unit', + $ L, K-L, ONE, V(1, N-K+1), LDV, T(K-L+1,1), LDT) + + ! If needed, T_3 = V_{2,1}V_{1,1}^\top + T_3 + IF(N.GT.K) THEN + CALL DGEMM('No transpose', 'Transpose', L, K-L, N-K, + $ ONE, V(K-L+1,1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) + END IF + + ! T_3 = -T_2T_3 + CALL DTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', + $ L, K-L, NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + + ! T_3 = T_3T_1 + CALL DTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', + $ L, K-L, ONE, T, LDT, T(K-L+1,1), LDT) + END IF + + ! Now, we have T in the correct form! + END SUBROUTINE diff --git a/st7lLjwJ b/st7lLjwJ new file mode 100644 index 0000000000..8b277f0dd5 --- /dev/null +++ b/st7lLjwJ @@ -0,0 +1 @@ +! From 54956283e297afe6cc8efb77d1ee5a1cca3c1a2a Mon Sep 17 00:00:00 2001 From: Johnathan Rhyne Date: Wed, 16 Oct 2024 11:08:37 -0600 Subject: [PATCH 03/10] CAN MERGE: Implemented my version of xlarft with comments added, and moved the previous version into VARIANTS --- SRC/Makefile | 1 - SRC/VARIANTS/larft/LL-LVL2/clarft.f | 328 +++++++++++++++ SRC/VARIANTS/larft/LL-LVL2/dlarft.f | 326 +++++++++++++++ SRC/VARIANTS/larft/LL-LVL2/slarft.f | 326 +++++++++++++++ SRC/VARIANTS/larft/LL-LVL2/zlarft.f | 327 +++++++++++++++ SRC/clarft.f | 592 +++++++++++++++++++++------- SRC/dgelqf.f | 5 +- SRC/dgeqlf.f | 4 +- SRC/dgeqrf.f | 4 +- SRC/dgeqrfp.f | 4 +- SRC/dgerqf.f | 6 +- SRC/dlarft.f | 582 ++++++++++++++++++++------- SRC/dorglq.f | 5 +- SRC/dorgql.f | 7 +- SRC/dorgqr.f | 4 +- SRC/dorgrq.f | 7 +- SRC/dormlq.f | 6 +- SRC/dormql.f | 6 +- SRC/dormqr.f | 5 +- SRC/dormrq.f | 4 +- SRC/my_dlarft_rec.f | 239 ----------- SRC/slarft.f | 576 ++++++++++++++++++++------- SRC/zlarft.f | 591 ++++++++++++++++++++------- st7lLjwJ | 1 - 24 files changed, 3102 insertions(+), 854 deletions(-) create mode 100644 SRC/VARIANTS/larft/LL-LVL2/clarft.f create mode 100644 SRC/VARIANTS/larft/LL-LVL2/dlarft.f create mode 100644 SRC/VARIANTS/larft/LL-LVL2/slarft.f create mode 100644 SRC/VARIANTS/larft/LL-LVL2/zlarft.f delete mode 100644 SRC/my_dlarft_rec.f delete mode 100644 st7lLjwJ diff --git a/SRC/Makefile b/SRC/Makefile index 7d1ca17981..0191626f0e 100644 --- a/SRC/Makefile +++ b/SRC/Makefile @@ -340,7 +340,6 @@ DLASRC = \ dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \ dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \ dlarf.o dlarfb.o dlarfb_gett.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o dlarfy.o dlarf1f.o dlarf1l.o\ - my_dlarft_rec.o \ dlargv.o dlarmm.o dlarrv.o dlartv.o \ dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o \ dlasyf.o dlasyf_rook.o dlasyf_rk.o \ diff --git a/SRC/VARIANTS/larft/LL-LVL2/clarft.f b/SRC/VARIANTS/larft/LL-LVL2/clarft.f new file mode 100644 index 0000000000..9e2e4503e3 --- /dev/null +++ b/SRC/VARIANTS/larft/LL-LVL2/clarft.f @@ -0,0 +1,328 @@ +*> \brief \b CLARFT forms the triangular factor T of a block reflector H = I - vtvH +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARFT forms the triangular factor T of a complex block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**H +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**H * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larft +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CGEMV, CTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( PREVLASTV, I ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * CONJG( V( I , J ) ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) +* + CALL CGEMV( 'Conjugate transpose', J-I, I-1, + $ -TAU( I ), V( I+1, 1 ), LDV, + $ V( I+1, I ), 1, + $ ONE, T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H +* + CALL CGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), LDT ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, + $ T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) +* + CALL CGEMV( 'Conjugate transpose', N-K+I-J, K-I, + $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ), + $ 1, ONE, T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H +* + CALL CGEMM( 'N', 'C', K-I, 1, N-K+I-J, + $ -TAU( I ), + $ V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), LDT ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', + $ K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of CLARFT +* + END diff --git a/SRC/VARIANTS/larft/LL-LVL2/dlarft.f b/SRC/VARIANTS/larft/LL-LVL2/dlarft.f new file mode 100644 index 0000000000..d9ef2f77b6 --- /dev/null +++ b/SRC/VARIANTS/larft/LL-LVL2/dlarft.f @@ -0,0 +1,326 @@ +*> \brief \b DLARFT forms the triangular factor T of a block reflector H = I - vtvH +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFT forms the triangular factor T of a real block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**T +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**T * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larft +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( I, PREVLASTV ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( I , J ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) +* + CALL DGEMV( 'Transpose', J-I, I-1, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, + $ T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T +* + CALL DGEMV( 'No transpose', I-1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, ONE, + $ T( 1, I ), 1 ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, + $ T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( N-K+I , J ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) +* + CALL DGEMV( 'Transpose', N-K+I-J, K-I, + $ -TAU( I ), + $ V( J, I+1 ), LDV, V( J, I ), 1, ONE, + $ T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T +* + CALL DGEMV( 'No transpose', K-I, N-K+I-J, + $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), 1 ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', + $ K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of DLARFT +* + END diff --git a/SRC/VARIANTS/larft/LL-LVL2/slarft.f b/SRC/VARIANTS/larft/LL-LVL2/slarft.f new file mode 100644 index 0000000000..31b7951819 --- /dev/null +++ b/SRC/VARIANTS/larft/LL-LVL2/slarft.f @@ -0,0 +1,326 @@ +*> \brief \b SLARFT forms the triangular factor T of a block reflector H = I - vtvH +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* REAL T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARFT forms the triangular factor T of a real block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**T +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**T * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larft +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + REAL T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, STRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( I, PREVLASTV ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( I , J ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) +* + CALL SGEMV( 'Transpose', J-I, I-1, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, + $ T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T +* + CALL SGEMV( 'No transpose', I-1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), 1 ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1, + $ T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( N-K+I , J ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) +* + CALL SGEMV( 'Transpose', N-K+I-J, K-I, + $ -TAU( I ), + $ V( J, I+1 ), LDV, V( J, I ), 1, ONE, + $ T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T +* + CALL SGEMV( 'No transpose', K-I, N-K+I-J, + $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), 1 ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL STRMV( 'Lower', 'No transpose', 'Non-unit', + $ K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of SLARFT +* + END diff --git a/SRC/VARIANTS/larft/LL-LVL2/zlarft.f b/SRC/VARIANTS/larft/LL-LVL2/zlarft.f new file mode 100644 index 0000000000..be773becc2 --- /dev/null +++ b/SRC/VARIANTS/larft/LL-LVL2/zlarft.f @@ -0,0 +1,327 @@ +*> \brief \b ZLARFT forms the triangular factor T of a block reflector H = I - vtvH +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFT forms the triangular factor T of a complex block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**H +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**H * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup larft +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZTRMV, ZGEMM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( PREVLASTV, I ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * CONJG( V( I , J ) ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) +* + CALL ZGEMV( 'Conjugate transpose', J-I, I-1, + $ -TAU( I ), V( I+1, 1 ), LDV, + $ V( I+1, I ), 1, ONE, T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H +* + CALL ZGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), LDT ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, + $ T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) +* + CALL ZGEMV( 'Conjugate transpose', N-K+I-J, K-I, + $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ), + $ 1, ONE, T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H +* + CALL ZGEMM( 'N', 'C', K-I, 1, N-K+I-J, + $ -TAU( I ), + $ V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), LDT ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', + $ K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of ZLARFT +* + END diff --git a/SRC/clarft.f b/SRC/clarft.f index 9e2e4503e3..4517bb9b31 100644 --- a/SRC/clarft.f +++ b/SRC/clarft.f @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) * * .. Scalar Arguments .. * CHARACTER DIRECT, STOREV @@ -159,170 +159,468 @@ *> \endverbatim *> * ===================================================================== - SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) + RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, + $ LDT ) * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -* .. Scalar Arguments .. - CHARACTER DIRECT, STOREV - INTEGER K, LDT, LDV, N +* .. Scalar Arguments +* + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N * .. * .. Array Arguments .. - COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) -* .. * -* ===================================================================== + COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. * * .. Parameters .. - COMPLEX ONE, ZERO - PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), - $ ZERO = ( 0.0E+0, 0.0E+0 ) ) -* .. +* + COMPLEX ONE, NEG_ONE, ZERO + PARAMETER(ONE=1.0D+0, ZERO = 0.0D+0, NEG_ONE=-1.0D+0) +* * .. Local Scalars .. - INTEGER I, J, PREVLASTV, LASTV -* .. +* + INTEGER I,J,L + LOGICAL QR,LQ,QL,DIRF,COLV +* * .. External Subroutines .. - EXTERNAL CGEMM, CGEMV, CTRMV -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME +* + EXTERNAL CTRMM,CGEMM,CLACPY +* +* .. External Functions.. +* + LOGICAL LSAME + EXTERNAL LSAME +* +* .. Intrinsic Functions.. +* + INTRINSIC CONJG +* +* The general scheme used is inspired by the approach inside DGEQRT3 +* which was (at the time of writing this code): +* Based on the algorithm of Elmroth and Gustavson, +* IBM J. Res. Develop. Vol 44 No. 4 July 2000. * .. * .. Executable Statements .. * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN -* - IF( LSAME( DIRECT, 'F' ) ) THEN - PREVLASTV = N - DO I = 1, K - PREVLASTV = MAX( PREVLASTV, I ) - IF( TAU( I ).EQ.ZERO ) THEN -* -* H(i) = I -* - DO J = 1, I - T( J, I ) = ZERO - END DO - ELSE -* -* general case -* - IF( LSAME( STOREV, 'C' ) ) THEN -* Skip any trailing zeros. - DO LASTV = N, I+1, -1 - IF( V( LASTV, I ).NE.ZERO ) EXIT - END DO - DO J = 1, I-1 - T( J, I ) = -TAU( I ) * CONJG( V( I , J ) ) - END DO - J = MIN( LASTV, PREVLASTV ) -* -* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) -* - CALL CGEMV( 'Conjugate transpose', J-I, I-1, - $ -TAU( I ), V( I+1, 1 ), LDV, - $ V( I+1, I ), 1, - $ ONE, T( 1, I ), 1 ) - ELSE -* Skip any trailing zeros. - DO LASTV = N, I+1, -1 - IF( V( I, LASTV ).NE.ZERO ) EXIT - END DO - DO J = 1, I-1 - T( J, I ) = -TAU( I ) * V( J , I ) - END DO - J = MIN( LASTV, PREVLASTV ) -* -* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H -* - CALL CGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ), - $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, - $ ONE, T( 1, I ), LDT ) - END IF -* -* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) -* - CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, - $ T, - $ LDT, T( 1, I ), 1 ) - T( I, I ) = TAU( I ) - IF( I.GT.1 ) THEN - PREVLASTV = MAX( PREVLASTV, LASTV ) - ELSE - PREVLASTV = LASTV - END IF - END IF + IF(N.EQ.0.OR.K.EQ.0) THEN + RETURN + END IF +* +* Base case +* + IF(N.EQ.1.OR.K.EQ.1) THEN + T(1,1) = TAU(1) + RETURN + END IF +* +* Beginning of executable statements +* + L = K / 2 +* +* Determine what kind of Q we need to compute +* We assume that if the user doesn't provide 'F' for DIRECT, +* then they meant to provide 'B' and if they don't provide +* 'C' for STOREV, then they meant to provide 'R' +* + DIRF = LSAME(DIRECT,'F') + COLV = LSAME(STOREV,'C') +* +* QR happens when we have forward direction in column storage +* + QR = DIRF.AND.COLV +* +* LQ happens when we have Forward direction in row storage +* + LQ = DIRF.AND.(.NOT.COLV) +* +* QL happens when we have backward direction in column storage +* + QL = (.NOT.DIRF).AND.COLV +* +* The last case is RQ. Due to how we structured this, if the +* above 3 are false, then RQ must be true, so we never store +* this +* RQ happens when we have backward direction in row storage +* RQ = (.NOT.DIRF).AND.(.NOT.COLV) +* + IF(QR) THEN +* +* Break V apart into 6 components +* +* V = |---------------| +* |V_{1,1} 0 | +* |V_{2,1} V_{2,2}| +* |V_{3,1} V_{3,2}| +* |---------------| +* +* V_{1,1}\in\C^{l,l} unit lower triangular +* V_{2,1}\in\C^{k-l,l} rectangular +* V_{3,1}\in\C^{n-k,l} rectangular +* +* V_{2,2}\in\C^{k-l,k-l} unit lower triangular +* V_{3,2}\in\C^{n-k,k-l} rectangular +* +* We will construct the T matrix +* T = |---------------| = |--------| +* |T_{1,1} T_{1,2}| |T_1 T_3| +* |0 T_{2,2}| |0 T_2| +* |---------------| |--------| +* +* T is the triangular factor attained from block reflectors. +* To motivate the structure, assume we have already computed T_1 +* and T_2. Then collect the associated reflectors in V_1 and V_2 +* +* T_1\in\C^{l, l} upper triangular +* T_2\in\C^{k-l, k-l} upper triangular +* T_3\in\C^{l, k-l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_1T_1V_1')(I - V_2T_2V_2') +* = I - V_1T_1V_1' - V_2T_2V_2' + V_1T_1V_1'V_2T_2V_2' +* +* Define T_3 = -T_1V_1'V_2T_2 +* +* Then, we can define the matrix V as +* V = |-------| +* |V_1 V_2| +* |-------| +* +* So, our product is equivalent to the matrix product +* I - VTV' +* This means, we can compute T_1 and T_2, then use this information +* to compute T_3 +* +* Compute T_1 recursively +* + CALL CLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) +* +* Compute T_2 recursively +* + CALL CLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, + $ TAU(L+1), T(L+1,L+1), LDT) +* +* Compute T_3 +* T_3 = V_{2,1}' +* + DO J = 1, L + DO I = 1, K-L + T(J,L+I) = CONJG(V(L+I,J)) + END DO END DO - ELSE - PREVLASTV = 1 - DO I = K, 1, -1 - IF( TAU( I ).EQ.ZERO ) THEN -* -* H(i) = I -* - DO J = I, K - T( J, I ) = ZERO - END DO - ELSE -* -* general case -* - IF( I.LT.K ) THEN - IF( LSAME( STOREV, 'C' ) ) THEN -* Skip any leading zeros. - DO LASTV = 1, I-1 - IF( V( LASTV, I ).NE.ZERO ) EXIT - END DO - DO J = I+1, K - T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) ) - END DO - J = MAX( LASTV, PREVLASTV ) -* -* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) -* - CALL CGEMV( 'Conjugate transpose', N-K+I-J, K-I, - $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ), - $ 1, ONE, T( I+1, I ), 1 ) - ELSE -* Skip any leading zeros. - DO LASTV = 1, I-1 - IF( V( I, LASTV ).NE.ZERO ) EXIT - END DO - DO J = I+1, K - T( J, I ) = -TAU( I ) * V( J, N-K+I ) - END DO - J = MAX( LASTV, PREVLASTV ) -* -* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H -* - CALL CGEMM( 'N', 'C', K-I, 1, N-K+I-J, - $ -TAU( I ), - $ V( I+1, J ), LDV, V( I, J ), LDV, - $ ONE, T( I+1, I ), LDT ) - END IF -* -* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) -* - CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', - $ K-I, - $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) - IF( I.GT.1 ) THEN - PREVLASTV = MIN( PREVLASTV, LASTV ) - ELSE - PREVLASTV = LASTV - END IF - END IF - T( I, I ) = TAU( I ) - END IF +* +* T_3 = T_3V_{2,2} +* + CALL CTRMM('Right', 'Lower', 'No transpose', 'Unit', L, K-L, + $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + +* +* T_3 = V_{3,1}'V_{3,2} + T_3 +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL CGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, + $ V(K+1, 1), LDV, V(K+1,L+1), LDV, ONE, T(1, L+1), LDT) +* +* At this point, we have that T_3 = V_1'V_2 +* All that is left is to pre and post multiply by -T_1 and T_2 +* respectively. +* +* T_3 = -T_1T_3 +* + CALL CTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) +* +* T_3 = T_3T_2 +* + CALL CTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + + ELSE IF(LQ) THEN +* +* Break V apart into 6 components +* +* V = |----------------------| +* |V_{1,1} V_{1,2} V{1,3}| +* |0 V_{2,2} V{2,3}| +* |----------------------| +* +* V_{1,1}\in\C^{l,l} unit upper triangular +* V_{1,2}\in\C^{l,k-l} rectangular +* V_{1,3}\in\C^{l,n-k} rectangular +* +* V_{2,2}\in\C^{k-l,k-l} unit upper triangular +* V_{2,3}\in\C^{k-l,n-k} rectangular +* +* Where l = floor(k/2) +* +* We will construct the T matrix +* T = |---------------| = |--------| +* |T_{1,1} T_{1,2}| |T_1 T_3| +* |0 T_{2,2}| |0 T_2| +* |---------------| |--------| +* +* T is the triangular factor attained from block reflectors. +* To motivate the structure, assume we have already computed T_1 +* and T_2. Then collect the associated reflectors in V_1 and V_2 +* +* T_1\in\C^{l, l} upper triangular +* T_2\in\C^{k-l, k-l} upper triangular +* T_3\in\C^{l, k-l} rectangular +* +* Then, consider the product: +* +* (I - V_1'T_1V_1)(I - V_2'T_2V_2) +* = I - V_1'T_1V_1 - V_2'T_2V_2 + V_1'T_1V_1V_2'T_2V_2 +* +* Define T_3 = -T_1V_1V_2'T_2 +* +* Then, we can define the matrix V as +* V = |---| +* |V_1| +* |V_2| +* |---| +* +* So, our product is equivalent to the matrix product +* I - V'TV +* This means, we can compute T_1 and T_2, then use this information +* to compute T_3 +* +* Compute T_1 recursively +* + CALL CLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) +* +* Compute T_2 recursively +* + CALL CLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, + $ TAU(L+1), T(L+1,L+1), LDT) + +* +* Compute T_3 +* T_3 = V_{1,2} +* + CALL CLACPY('All', L, K - L, V(1,L+1), LDV, T(1, L+1), LDT) +* +* T_3 = T_3V_{2,2}' +* + CALL CTRMM('Right', 'Upper', 'Conjugate', 'Unit', L, K-L, ONE, + $ V(L+1, L+1), LDV, T(1, L+1), LDT) + +* +* T_3 = V_{1,3}V_{2,3}' + T_3 +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL CGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, + $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, T(1, L+1), LDT) +* +* At this point, we have that T_3 = V_1V_2' +* All that is left is to pre and post multiply by -T_1 and T_2 +* respectively. +* +* T_3 = -T_1T_3 +* + CALL CTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, K-L, + $ NEG_ONE, T, LDT, T(1, L+1), LDT) + +* +* T_3 = T_3T_2 +* + CALL CTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + ELSE IF(QL) THEN +* +* Break V apart into 6 components +* +* V = |---------------| +* |V_{1,1} V_{1,2}| +* |V_{2,1} V_{2,2}| +* |0 V_{3,2}| +* |---------------| +* +* V_{1,1}\in\C^{n-k,k-l} rectangular +* V_{2,1}\in\C^{k-l,k-l} unit upper triangular +* +* V_{1,2}\in\C^{n-k,l} rectangular +* V_{2,2}\in\C^{k-l,l} rectangular +* V_{3,2}\in\C^{l,l} unit upper triangular +* +* We will construct the T matrix +* T = |---------------| = |--------| +* |T_{1,1} 0 | |T_1 0 | +* |T_{2,1} T_{2,2}| |T_3 T_2| +* |---------------| |--------| +* +* T is the triangular factor attained from block reflectors. +* To motivate the structure, assume we have already computed T_1 +* and T_2. Then collect the associated reflectors in V_1 and V_2 +* +* T_1\in\C^{k-l, k-l} non-unit lower triangular +* T_2\in\C^{l, l} non-unit lower triangular +* T_3\in\C^{k-l, l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_2T_2V_2')(I - V_1T_1V_1') +* = I - V_2T_2V_2' - V_1T_1V_1' + V_2T_2V_2'V_1T_1V_1' +* +* Define T_3 = -T_2V_2'V_1T_1 +* +* Then, we can define the matrix V as +* V = |-------| +* |V_1 V_2| +* |-------| +* +* So, our product is equivalent to the matrix product +* I - VTV' +* This means, we can compute T_1 and T_2, then use this information +* to compute T_3 +* +* Compute T_1 recursively +* + CALL CLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) +* +* Compute T_2 recursively +* + CALL CLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, TAU(K-L+1), + $ T(K-L+1,K-L+1), LDT) +* +* Compute T_3 +* T_3 = V_{2,2}' +* + DO J = 1, K-L + DO I = 1, L + T(K-L+I,J) = CONJG(V(N-K+J, K-L+I)) + END DO END DO - END IF - RETURN * -* End of CLARFT +* T_3 = T_3V_{2,1} +* + CALL CTRMM('Right', 'Upper', 'No transpose', 'Unit', L, K-L, + $ ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) + +* +* T_3 = V_{2,2}'V_{2,1} + T_3 +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL CGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, + $ V(1,K-L+1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) +* +* At this point, we have that T_3 = V_2'V_1 +* All that is left is to pre and post multiply by -T_2 and T_1 +* respectively. +* +* T_3 = -T_2T_3 * - END + CALL CTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, K-L, + $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) +* +* T_3 = T_3T_1 +* + CALL CTRMM('Right', 'Lower', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T, LDT, T(K-L+1,1), LDT) + ELSE +* +* Else means RQ case +* +* Break V apart into 6 components +* +* V = |-----------------------| +* |V_{1,1} V_{1,2} 0 | +* |V_{2,1} V_{2,2} V_{2,3}| +* |-----------------------| +* +* V_{1,1}\in\C^{k-l,n-k} rectangular +* V_{1,2}\in\C^{k-l,k-l} unit lower triangular +* +* V_{2,1}\in\C^{l,n-k} rectangular +* V_{2,2}\in\C^{l,k-l} rectangular +* V_{2,3}\in\C^{l,l} unit lower triangular +* +* We will construct the T matrix +* T = |---------------| = |--------| +* |T_{1,1} 0 | |T_1 0 | +* |T_{2,1} T_{2,2}| |T_3 T_2| +* |---------------| |--------| +* +* T is the triangular factor attained from block reflectors. +* To motivate the structure, assume we have already computed T_1 +* and T_2. Then collect the associated reflectors in V_1 and V_2 +* +* T_1\in\C^{k-l, k-l} non-unit lower triangular +* T_2\in\C^{l, l} non-unit lower triangular +* T_3\in\C^{k-l, l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_2'T_2V_2)(I - V_1'T_1V_1) +* = I - V_2'T_2V_2 - V_1'T_1V_1 + V_2'T_2V_2V_1'T_1V_1 +* +* Define T_3 = -T_2V_2V_1'T_1 +* +* Then, we can define the matrix V as +* V = |---| +* |V_1| +* |V_2| +* |---| +* +* So, our product is equivalent to the matrix product +* I - V'TV +* This means, we can compute T_1 and T_2, then use this information +* to compute T_3 +* +* Compute T_1 recursively +* + CALL CLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) +* +* Compute T_2 recursively +* + CALL CLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, TAU(K-L+1), + $ T(K-L+1,K-L+1), LDT) +* +* Compute T_3 +* T_3 = V_{2,2} +* + CALL CLACPY('All', L, K-L, V(K-L+1,N-K+1), LDV, T(K-L+1,1), + $ LDT) + +* +* T_3 = T_3V_{1,2}' +* + CALL CTRMM('Right', 'Lower', 'Conjugate', 'Unit', L, K-L, ONE, + $ V(1, N-K+1), LDV, T(K-L+1,1), LDT) + +* +* T_3 = V_{2,1}V_{1,1}' + T_3 +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL CGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, + $ V(K-L+1,1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) + +* +* At this point, we have that T_3 = V_2V_1' +* All that is left is to pre and post multiply by -T_2 and T_1 +* respectively. +* +* T_3 = -T_2T_3 +* + CALL CTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, K-L, + $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + +* +* T_3 = T_3T_1 +* + CALL CTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, K-L, + $ ONE, T, LDT, T(K-L+1,1), LDT) + END IF + END SUBROUTINE diff --git a/SRC/dgelqf.f b/SRC/dgelqf.f index 254bd1b68e..f0eb00a55d 100644 --- a/SRC/dgelqf.f +++ b/SRC/dgelqf.f @@ -161,7 +161,7 @@ SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) $ NBMIN, NX * .. * .. External Subroutines .. - EXTERNAL DGELQ2, DLARFB, MY_DLARFT_REC, XERBLA + EXTERNAL DGELQ2, DLARFB, DLARFT, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -251,8 +251,7 @@ SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL MY_DLARFT_REC( 'Forward', 'Rowwise', N-I+1, IB, - $ A( I, I ), + CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), $ LDA, TAU( I ), WORK, LDWORK ) * * Apply H to A(i+ib:m,i:n) from the right diff --git a/SRC/dgeqlf.f b/SRC/dgeqlf.f index e1287116be..7da963aeaf 100644 --- a/SRC/dgeqlf.f +++ b/SRC/dgeqlf.f @@ -156,7 +156,7 @@ SUBROUTINE DGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) $ MU, NB, NBMIN, NU, NX * .. * .. External Subroutines .. - EXTERNAL DGEQL2, DLARFB, MY_DLARFT_REC, XERBLA + EXTERNAL DGEQL2, DLARFB, DLARFT, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -256,7 +256,7 @@ SUBROUTINE DGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL MY_DLARFT_REC( 'Backward', 'Columnwise', M-K+I+IB-1, + CALL DLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, $ IB, A( 1, N-K+I ), LDA, TAU( I ), WORK, $ LDWORK ) * diff --git a/SRC/dgeqrf.f b/SRC/dgeqrf.f index 8e02d6bc24..c005d47af5 100644 --- a/SRC/dgeqrf.f +++ b/SRC/dgeqrf.f @@ -163,7 +163,7 @@ SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) $ NBMIN, NX * .. * .. External Subroutines .. - EXTERNAL DGEQR2, DLARFB, MY_DLARFT_REC, XERBLA + EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -253,7 +253,7 @@ SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL MY_DLARFT_REC( 'Forward', 'Columnwise', M-I+1, IB, + CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB, $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) * * Apply H**T to A(i:m,i+ib:n) from the left diff --git a/SRC/dgeqrfp.f b/SRC/dgeqrfp.f index 65e5d51009..aa757e96cf 100644 --- a/SRC/dgeqrfp.f +++ b/SRC/dgeqrfp.f @@ -167,7 +167,7 @@ SUBROUTINE DGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) $ NB, NBMIN, NX * .. * .. External Subroutines .. - EXTERNAL DGEQR2P, DLARFB, MY_DLARFT_REC, XERBLA + EXTERNAL DGEQR2P, DLARFB, DLARFT, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -259,7 +259,7 @@ SUBROUTINE DGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL MY_DLARFT_REC( 'Forward', 'Columnwise', M-I+1, IB, + CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB, $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) * * Apply H**T to A(i:m,i+ib:n) from the left diff --git a/SRC/dgerqf.f b/SRC/dgerqf.f index 048483c807..8760ee04b3 100644 --- a/SRC/dgerqf.f +++ b/SRC/dgerqf.f @@ -156,7 +156,7 @@ SUBROUTINE DGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) $ MU, NB, NBMIN, NU, NX * .. * .. External Subroutines .. - EXTERNAL DGERQ2, DLARFB, MY_DLARFT_REC, XERBLA + EXTERNAL DGERQ2, DLARFB, DLARFT, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -256,8 +256,8 @@ SUBROUTINE DGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL MY_DLARFT_REC( 'Backward', 'Rowwise', N-K+I+IB-1, - $ IB, A( M-K+I, 1 ), LDA, TAU( I ), WORK, + CALL DLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, TAU( I ), WORK, $ LDWORK ) * * Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right diff --git a/SRC/dlarft.f b/SRC/dlarft.f index 81ffb39857..d3f0b87454 100644 --- a/SRC/dlarft.f +++ b/SRC/dlarft.f @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) * * .. Scalar Arguments .. * CHARACTER DIRECT, STOREV @@ -159,170 +159,464 @@ *> \endverbatim *> * ===================================================================== - SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) + RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, + $ LDT ) * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -* .. Scalar Arguments .. +* .. Scalar Arguments +* CHARACTER DIRECT, STOREV INTEGER K, LDT, LDV, N * .. * .. Array Arguments .. +* DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) * .. * -* ===================================================================== -* * .. Parameters .. - DOUBLE PRECISION ONE, ZERO - PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) -* .. +* + DOUBLE PRECISION ONE, NEG_ONE, ZERO + PARAMETER(ONE=1.0D+0, ZERO = 0.0D+0, NEG_ONE=-1.0D+0) +* * .. Local Scalars .. - INTEGER I, J, PREVLASTV, LASTV -* .. +* + INTEGER I,J,L + LOGICAL QR,LQ,QL,DIRF,COLV +* * .. External Subroutines .. - EXTERNAL DGEMV, DTRMV -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME +* + EXTERNAL DTRMM,DGEMM,DLACPY +* +* .. External Functions.. +* + LOGICAL LSAME + EXTERNAL LSAME +* +* The general scheme used is inspired by the approach inside DGEQRT3 +* which was (at the time of writing this code): +* Based on the algorithm of Elmroth and Gustavson, +* IBM J. Res. Develop. Vol 44 No. 4 July 2000. * .. * .. Executable Statements .. * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN -* - WRITE(*,*) "in dlarft, n = ", N, " k = ", K, "flags: ", DIRECT, - $ " ", STOREV - IF( LSAME( DIRECT, 'F' ) ) THEN - PREVLASTV = N - DO I = 1, K - PREVLASTV = MAX( I, PREVLASTV ) - IF( TAU( I ).EQ.ZERO ) THEN -* -* H(i) = I -* - DO J = 1, I - T( J, I ) = ZERO - END DO - ELSE -* -* general case -* - IF( LSAME( STOREV, 'C' ) ) THEN -* Skip any trailing zeros. - DO LASTV = N, I+1, -1 - IF( V( LASTV, I ).NE.ZERO ) EXIT - END DO - DO J = 1, I-1 - T( J, I ) = -TAU( I ) * V( I , J ) - END DO - J = MIN( LASTV, PREVLASTV ) -* -* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) -* - CALL DGEMV( 'Transpose', J-I, I-1, -TAU( I ), - $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, - $ T( 1, I ), 1 ) - ELSE -* Skip any trailing zeros. - DO LASTV = N, I+1, -1 - IF( V( I, LASTV ).NE.ZERO ) EXIT - END DO - DO J = 1, I-1 - T( J, I ) = -TAU( I ) * V( J , I ) - END DO - J = MIN( LASTV, PREVLASTV ) -* -* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T -* - CALL DGEMV( 'No transpose', I-1, J-I, -TAU( I ), - $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, ONE, - $ T( 1, I ), 1 ) - END IF -* -* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) -* - CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, - $ T, - $ LDT, T( 1, I ), 1 ) - T( I, I ) = TAU( I ) - IF( I.GT.1 ) THEN - PREVLASTV = MAX( PREVLASTV, LASTV ) - ELSE - PREVLASTV = LASTV - END IF - END IF + IF(N.EQ.0.OR.K.EQ.0) THEN + RETURN + END IF +* +* Base case +* + IF(N.EQ.1.OR.K.EQ.1) THEN + T(1,1) = TAU(1) + RETURN + END IF +* +* Beginning of executable statements +* + L = K / 2 +* +* Determine what kind of Q we need to compute +* We assume that if the user doesn't provide 'F' for DIRECT, +* then they meant to provide 'B' and if they don't provide +* 'C' for STOREV, then they meant to provide 'R' +* + DIRF = LSAME(DIRECT,'F') + COLV = LSAME(STOREV,'C') +* +* QR happens when we have forward direction in column storage +* + QR = DIRF.AND.COLV +* +* LQ happens when we have Forward direction in row storage +* + LQ = DIRF.AND.(.NOT.COLV) +* +* QL happens when we have backward direction in column storage +* + QL = (.NOT.DIRF).AND.COLV +* +* The last case is RQ. Due to how we structured this, if the +* above 3 are false, then RQ must be true, so we never store +* this +* RQ happens when we have backward direction in row storage +* RQ = (.NOT.DIRF).AND.(.NOT.COLV) +* + IF(QR) THEN +* +* Break V apart into 6 components +* +* V = |---------------| +* |V_{1,1} 0 | +* |V_{2,1} V_{2,2}| +* |V_{3,1} V_{3,2}| +* |---------------| +* +* V_{1,1}\in\R^{l,l} unit lower triangular +* V_{2,1}\in\R^{k-l,l} rectangular +* V_{3,1}\in\R^{n-k,l} rectangular +* +* V_{2,2}\in\R^{k-l,k-l} unit lower triangular +* V_{3,2}\in\R^{n-k,k-l} rectangular +* +* We will construct the T matrix +* T = |---------------| = |--------| +* |T_{1,1} T_{1,2}| |T_1 T_3| +* |0 T_{2,2}| |0 T_2| +* |---------------| |--------| +* +* T is the triangular factor attained from block reflectors. +* To motivate the structure, assume we have already computed T_1 +* and T_2. Then collect the associated reflectors in V_1 and V_2 +* +* T_1\in\R^{l, l} upper triangular +* T_2\in\R^{k-l, k-l} upper triangular +* T_3\in\R^{l, k-l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_1T_1V_1')(I - V_2T_2V_2') +* = I - V_1T_1V_1' - V_2T_2V_2' + V_1T_1V_1'V_2T_2V_2' +* +* Define T_3 = -T_1V_1'V_2T_2 +* +* Then, we can define the matrix V as +* V = |-------| +* |V_1 V_2| +* |-------| +* +* So, our product is equivalent to the matrix product +* I - VTV' +* This means, we can compute T_1 and T_2, then use this information +* to compute T_3 +* +* Compute T_1 recursively +* + CALL DLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) +* +* Compute T_2 recursively +* + CALL DLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, + $ TAU(L+1), T(L+1,L+1), LDT) +* +* Compute T_3 +* T_3 = V_{2,1}' +* + DO J = 1, L + DO I = 1, K-L + T(J,L+I) = V(L+I,J) + END DO END DO - ELSE - PREVLASTV = 1 - DO I = K, 1, -1 - IF( TAU( I ).EQ.ZERO ) THEN -* -* H(i) = I -* - DO J = I, K - T( J, I ) = ZERO - END DO - ELSE -* -* general case -* - IF( I.LT.K ) THEN - IF( LSAME( STOREV, 'C' ) ) THEN -* Skip any leading zeros. - DO LASTV = 1, I-1 - IF( V( LASTV, I ).NE.ZERO ) EXIT - END DO - DO J = I+1, K - T( J, I ) = -TAU( I ) * V( N-K+I , J ) - END DO - J = MAX( LASTV, PREVLASTV ) -* -* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) -* - CALL DGEMV( 'Transpose', N-K+I-J, K-I, - $ -TAU( I ), - $ V( J, I+1 ), LDV, V( J, I ), 1, ONE, - $ T( I+1, I ), 1 ) - ELSE -* Skip any leading zeros. - DO LASTV = 1, I-1 - IF( V( I, LASTV ).NE.ZERO ) EXIT - END DO - DO J = I+1, K - T( J, I ) = -TAU( I ) * V( J, N-K+I ) - END DO - J = MAX( LASTV, PREVLASTV ) -* -* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T -* - CALL DGEMV( 'No transpose', K-I, N-K+I-J, - $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV, - $ ONE, T( I+1, I ), 1 ) - END IF -* -* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) -* - CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', - $ K-I, - $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) - IF( I.GT.1 ) THEN - PREVLASTV = MIN( PREVLASTV, LASTV ) - ELSE - PREVLASTV = LASTV - END IF - END IF - T( I, I ) = TAU( I ) - END IF +* +* T_3 = T_3V_{2,2} +* + CALL DTRMM('Right', 'Lower', 'No transpose', 'Unit', L, K-L, + $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + +* +* T_3 = V_{3,1}'V_{3,2} + T_3 +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL DGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, + $ V(K+1, 1), LDV, V(K+1,L+1), LDV, ONE, T(1, L+1), LDT) +* +* At this point, we have that T_3 = V_1'V_2 +* All that is left is to pre and post multiply by -T_1 and T_2 +* respectively. +* +* T_3 = -T_1T_3 +* + CALL DTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) +* +* T_3 = T_3T_2 +* + CALL DTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + + ELSE IF(LQ) THEN +* +* Break V apart into 6 components +* +* V = |----------------------| +* |V_{1,1} V_{1,2} V{1,3}| +* |0 V_{2,2} V{2,3}| +* |----------------------| +* +* V_{1,1}\in\R^{l,l} unit upper triangular +* V_{1,2}\in\R^{l,k-l} rectangular +* V_{1,3}\in\R^{l,n-k} rectangular +* +* V_{2,2}\in\R^{k-l,k-l} unit upper triangular +* V_{2,3}\in\R^{k-l,n-k} rectangular +* +* Where l = floor(k/2) +* +* We will construct the T matrix +* T = |---------------| = |--------| +* |T_{1,1} T_{1,2}| |T_1 T_3| +* |0 T_{2,2}| |0 T_2| +* |---------------| |--------| +* +* T is the triangular factor attained from block reflectors. +* To motivate the structure, assume we have already computed T_1 +* and T_2. Then collect the associated reflectors in V_1 and V_2 +* +* T_1\in\R^{l, l} upper triangular +* T_2\in\R^{k-l, k-l} upper triangular +* T_3\in\R^{l, k-l} rectangular +* +* Then, consider the product: +* +* (I - V_1'T_1V_1)(I - V_2'T_2V_2) +* = I - V_1'T_1V_1 - V_2'T_2V_2 + V_1'T_1V_1V_2'T_2V_2 +* +* Define T_3 = -T_1V_1V_2'T_2 +* +* Then, we can define the matrix V as +* V = |---| +* |V_1| +* |V_2| +* |---| +* +* So, our product is equivalent to the matrix product +* I - V'TV +* This means, we can compute T_1 and T_2, then use this information +* to compute T_3 +* +* Compute T_1 recursively +* + CALL DLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) +* +* Compute T_2 recursively +* + CALL DLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, + $ TAU(L+1), T(L+1,L+1), LDT) + +* +* Compute T_3 +* T_3 = V_{1,2} +* + CALL DLACPY('All', L, K - L, V(1,L+1), LDV, T(1, L+1), LDT) +* +* T_3 = T_3V_{2,2}' +* + CALL DTRMM('Right', 'Upper', 'Transpose', 'Unit', L, K-L, ONE, + $ V(L+1, L+1), LDV, T(1, L+1), LDT) + +* +* T_3 = V_{1,3}V_{2,3}' + T_3 +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL DGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, + $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, T(1, L+1), LDT) +* +* At this point, we have that T_3 = V_1V_2' +* All that is left is to pre and post multiply by -T_1 and T_2 +* respectively. +* +* T_3 = -T_1T_3 +* + CALL DTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, K-L, + $ NEG_ONE, T, LDT, T(1, L+1), LDT) + +* +* T_3 = T_3T_2 +* + CALL DTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + ELSE IF(QL) THEN +* +* Break V apart into 6 components +* +* V = |---------------| +* |V_{1,1} V_{1,2}| +* |V_{2,1} V_{2,2}| +* |0 V_{3,2}| +* |---------------| +* +* V_{1,1}\in\R^{n-k,k-l} rectangular +* V_{2,1}\in\R^{k-l,k-l} unit upper triangular +* +* V_{1,2}\in\R^{n-k,l} rectangular +* V_{2,2}\in\R^{k-l,l} rectangular +* V_{3,2}\in\R^{l,l} unit upper triangular +* +* We will construct the T matrix +* T = |---------------| = |--------| +* |T_{1,1} 0 | |T_1 0 | +* |T_{2,1} T_{2,2}| |T_3 T_2| +* |---------------| |--------| +* +* T is the triangular factor attained from block reflectors. +* To motivate the structure, assume we have already computed T_1 +* and T_2. Then collect the associated reflectors in V_1 and V_2 +* +* T_1\in\R^{k-l, k-l} non-unit lower triangular +* T_2\in\R^{l, l} non-unit lower triangular +* T_3\in\R^{k-l, l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_2T_2V_2')(I - V_1T_1V_1') +* = I - V_2T_2V_2' - V_1T_1V_1' + V_2T_2V_2'V_1T_1V_1' +* +* Define T_3 = -T_2V_2'V_1T_1 +* +* Then, we can define the matrix V as +* V = |-------| +* |V_1 V_2| +* |-------| +* +* So, our product is equivalent to the matrix product +* I - VTV' +* This means, we can compute T_1 and T_2, then use this information +* to compute T_3 +* +* Compute T_1 recursively +* + CALL DLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) +* +* Compute T_2 recursively +* + CALL DLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, TAU(K-L+1), + $ T(K-L+1,K-L+1), LDT) +* +* Compute T_3 +* T_3 = V_{2,2}' +* + DO J = 1, K-L + DO I = 1, L + T(K-L+I,J) = V(N-K+J, K-L+I) + END DO END DO - END IF - RETURN * -* End of DLARFT +* T_3 = T_3V_{2,1} +* + CALL DTRMM('Right', 'Upper', 'No transpose', 'Unit', L, K-L, + $ ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) + +* +* T_3 = V_{2,2}'V_{2,1} + T_3 +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL DGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, + $ V(1,K-L+1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) +* +* At this point, we have that T_3 = V_2'V_1 +* All that is left is to pre and post multiply by -T_2 and T_1 +* respectively. +* +* T_3 = -T_2T_3 +* + CALL DTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, K-L, + $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) * - END +* T_3 = T_3T_1 +* + CALL DTRMM('Right', 'Lower', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T, LDT, T(K-L+1,1), LDT) + ELSE +* +* Else means RQ case +* +* Break V apart into 6 components +* +* V = |-----------------------| +* |V_{1,1} V_{1,2} 0 | +* |V_{2,1} V_{2,2} V_{2,3}| +* |-----------------------| +* +* V_{1,1}\in\R^{k-l,n-k} rectangular +* V_{1,2}\in\R^{k-l,k-l} unit lower triangular +* +* V_{2,1}\in\R^{l,n-k} rectangular +* V_{2,2}\in\R^{l,k-l} rectangular +* V_{2,3}\in\R^{l,l} unit lower triangular +* +* We will construct the T matrix +* T = |---------------| = |--------| +* |T_{1,1} 0 | |T_1 0 | +* |T_{2,1} T_{2,2}| |T_3 T_2| +* |---------------| |--------| +* +* T is the triangular factor attained from block reflectors. +* To motivate the structure, assume we have already computed T_1 +* and T_2. Then collect the associated reflectors in V_1 and V_2 +* +* T_1\in\R^{k-l, k-l} non-unit lower triangular +* T_2\in\R^{l, l} non-unit lower triangular +* T_3\in\R^{k-l, l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_2'T_2V_2)(I - V_1'T_1V_1) +* = I - V_2'T_2V_2 - V_1'T_1V_1 + V_2'T_2V_2V_1'T_1V_1 +* +* Define T_3 = -T_2V_2V_1'T_1 +* +* Then, we can define the matrix V as +* V = |---| +* |V_1| +* |V_2| +* |---| +* +* So, our product is equivalent to the matrix product +* I - V'TV +* This means, we can compute T_1 and T_2, then use this information +* to compute T_3 +* +* Compute T_1 recursively +* + CALL DLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) +* +* Compute T_2 recursively +* + CALL DLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, TAU(K-L+1), + $ T(K-L+1,K-L+1), LDT) +* +* Compute T_3 +* T_3 = V_{2,2} +* + CALL DLACPY('All', L, K-L, V(K-L+1,N-K+1), LDV, T(K-L+1,1), + $ LDT) + +* +* T_3 = T_3V_{1,2}' +* + CALL DTRMM('Right', 'Lower', 'Transpose', 'Unit', L, K-L, ONE, + $ V(1, N-K+1), LDV, T(K-L+1,1), LDT) + +* +* T_3 = V_{2,1}V_{1,1}' + T_3 +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL DGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, + $ V(K-L+1,1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) + +* +* At this point, we have that T_3 = V_2V_1' +* All that is left is to pre and post multiply by -T_2 and T_1 +* respectively. +* +* T_3 = -T_2T_3 +* + CALL DTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, K-L, + $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + +* +* T_3 = T_3T_1 +* + CALL DTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, K-L, + $ ONE, T, LDT, T(K-L+1,1), LDT) + END IF + END SUBROUTINE diff --git a/SRC/dorglq.f b/SRC/dorglq.f index 6057a96338..47edfe4eda 100644 --- a/SRC/dorglq.f +++ b/SRC/dorglq.f @@ -148,7 +148,7 @@ SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) $ LWKOPT, NB, NBMIN, NX * .. * .. External Subroutines .. - EXTERNAL DLARFB, MY_DLARFT_REC, DORGL2, XERBLA + EXTERNAL DLARFB, DLARFT, DORGL2, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -253,8 +253,7 @@ SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL MY_DLARFT_REC( 'Forward', 'Rowwise', N-I+1, IB, - $ A( I, I ), + CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), $ LDA, TAU( I ), WORK, LDWORK ) * * Apply H**T to A(i+ib:m,i:n) from the right diff --git a/SRC/dorgql.f b/SRC/dorgql.f index 4920a705bf..8ac4cbf003 100644 --- a/SRC/dorgql.f +++ b/SRC/dorgql.f @@ -149,7 +149,7 @@ SUBROUTINE DORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) $ NB, NBMIN, NX * .. * .. External Subroutines .. - EXTERNAL DLARFB, MY_DLARFT_REC, DORG2L, XERBLA + EXTERNAL DLARFB, DLARFT, DORG2L, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -260,9 +260,8 @@ SUBROUTINE DORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL MY_DLARFT_REC( 'Backward', 'Columnwise', M-K+I+IB-1, - $ IB, A( 1, N-K+I ), LDA, TAU( I ), WORK, - $ LDWORK ) + CALL DLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) * * Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left * diff --git a/SRC/dorgqr.f b/SRC/dorgqr.f index 5823cf8ae4..fd88519871 100644 --- a/SRC/dorgqr.f +++ b/SRC/dorgqr.f @@ -149,7 +149,7 @@ SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) $ LWKOPT, NB, NBMIN, NX * .. * .. External Subroutines .. - EXTERNAL DLARFB, MY_DLARFT_REC, DORG2R, XERBLA + EXTERNAL DLARFB, DLARFT, DORG2R, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -254,7 +254,7 @@ SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL MY_DLARFT_REC( 'Forward', 'Columnwise', M-I+1, IB, + CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB, $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) * * Apply H to A(i:m,i+ib:n) from the left diff --git a/SRC/dorgrq.f b/SRC/dorgrq.f index 2f2c84d9c3..54e109b492 100644 --- a/SRC/dorgrq.f +++ b/SRC/dorgrq.f @@ -149,7 +149,7 @@ SUBROUTINE DORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) $ LWKOPT, NB, NBMIN, NX * .. * .. External Subroutines .. - EXTERNAL DLARFB, MY_DLARFT_REC, DORGR2, XERBLA + EXTERNAL DLARFB, DLARFT, DORGR2, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -261,9 +261,8 @@ SUBROUTINE DORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL MY_DLARFT_REC( 'Backward', 'Rowwise', N-K+I+IB-1, - $ IB, A( II, 1 ), LDA, TAU( I ), WORK, - $ LDWORK) + CALL DLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( II, 1 ), LDA, TAU( I ), WORK, LDWORK) * * Apply H**T to A(1:m-k+i-1,1:n-k+i+ib-1) from the right * diff --git a/SRC/dormlq.f b/SRC/dormlq.f index cb68138cb6..ac6f931047 100644 --- a/SRC/dormlq.f +++ b/SRC/dormlq.f @@ -196,7 +196,7 @@ SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL DLARFB, MY_DLARFT_REC, DORML2, XERBLA + EXTERNAL DLARFB, DLARFT, DORML2, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -316,8 +316,8 @@ SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL MY_DLARFT_REC( 'Forward', 'Rowwise', NQ-I+1, IB, - $ A( I, I ), LDA, TAU( I ), WORK( IWT ), LDT ) + CALL DLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**T is applied to C(i:m,1:n) diff --git a/SRC/dormql.f b/SRC/dormql.f index c28e0d0e70..9020c6abd9 100644 --- a/SRC/dormql.f +++ b/SRC/dormql.f @@ -195,7 +195,7 @@ SUBROUTINE DORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL DLARFB, MY_DLARFT_REC, DORM2L, XERBLA + EXTERNAL DLARFB, DLARFT, DORM2L, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -310,8 +310,8 @@ SUBROUTINE DORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL MY_DLARFT_REC( 'Backward', 'Columnwise', NQ-K+I+IB-1, - $ IB, A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT) + CALL DLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, + $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT) IF( LEFT ) THEN * * H or H**T is applied to C(1:m-k+i+ib-1,1:n) diff --git a/SRC/dormqr.f b/SRC/dormqr.f index e35534067f..7793103b33 100644 --- a/SRC/dormqr.f +++ b/SRC/dormqr.f @@ -195,7 +195,7 @@ SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL DLARFB, MY_DLARFT_REC, DORM2R, XERBLA + EXTERNAL DLARFB, DLARFT, DORM2R, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -309,8 +309,7 @@ SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL MY_DLARFT_REC( 'Forward', 'Columnwise', NQ-I+1, IB, - $ A(I, I ), + CALL DLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), $ LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * diff --git a/SRC/dormrq.f b/SRC/dormrq.f index dcefe8d1df..03159e4961 100644 --- a/SRC/dormrq.f +++ b/SRC/dormrq.f @@ -196,7 +196,7 @@ SUBROUTINE DORMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, EXTERNAL LSAME, ILAENV * .. * .. External Subroutines .. - EXTERNAL DLARFB, MY_DLARFT_REC, DORMR2, XERBLA + EXTERNAL DLARFB, DLARFT, DORMR2, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -317,7 +317,7 @@ SUBROUTINE DORMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL MY_DLARFT_REC( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB, + CALL DLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB, $ A( I, 1 ), LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * diff --git a/SRC/my_dlarft_rec.f b/SRC/my_dlarft_rec.f deleted file mode 100644 index 15dd59dee1..0000000000 --- a/SRC/my_dlarft_rec.f +++ /dev/null @@ -1,239 +0,0 @@ -c Cost: n > k: 1/6 * (k^2-1)(2n+k) -c n = k: 1/2 * (n^3-n) - RECURSIVE SUBROUTINE MY_DLARFT_REC( DIRECT, STOREV, N, K, V, LDV, - $ TAU, T, LDT) - IMPLICIT NONE - ! Arguemnts - ! Scalars - INTEGER N, K, LDV, LDT - CHARACTER DIRECT, STOREV - ! Matrix - DOUBLE PRECISION V(LDV,*), T(LDT,*), TAU(N) - - ! Local variables - INTEGER I,J,L,MINNK - LOGICAL QR,LQ,QL,DIRF,COLV - ! Parameters - DOUBLE PRECISION ONE, NEG_ONE, ZERO - PARAMETER(ONE=1.0D+0, ZERO = 0.0, NEG_ONE=-1.0D+0) - ! External functions - LOGICAL LSAME - EXTERNAL LSAME - ! External subroutines - EXTERNAL DTRMM,DGEMM,DLACPY - - ! Break V apart into 6 components - ! V = |---------------| - ! |V_{1,1} 0 | - ! |V_{2,1} V_{2,2}| - ! |V_{3,1} V_{3,2}| - ! |---------------| - ! V_{1,1}\in\R^{k,k} unit lower triangular - ! V_{2,1}\in\R^{n-k,k} rectangular - ! V_{3,1}\in\R^{m-n,k} rectangular - ! - ! V_{2,2}\in\R^{n-k,n-k} unit upper triangular - ! V_{3,2}\in\R^{m-n,n-k} rectangular - - ! We will construct the T matrix - ! T = |---------------| = |--------| - ! |T_{1,1} T_{1,2}| |T_1 T_3| - ! |0 T_{2,2}| |0 T_2| - ! |---------------| |--------| - - ! T is the triangular factor attained from block reflectors. - ! To motivate the structure, consider the product - ! - ! (I - V_1T_1V_1^\top)(I - V_2T_2V_2^\top) - ! = I - V_1T_1V_1^\top - V_2T_2V_2^\top + V_1T_1V_1^\topV_2T_2V_2^\top - ! - ! Define T_3 = -T_1V_1^\topV_2T_2 - ! - ! Then, we can define the matrix V as - ! V = |-------| - ! |V_1 V_2| - ! |-------| - ! - ! So, our product is equivalent to the matrix product - ! I - VTV^\top - ! So, we compute T_1, then T_2, then use these values to get T_3 - ! - ! The general scheme used is inspired by the approach inside DGEQRT3 - ! which was (at the time of writing this code): - ! Based on the algorithm of Elmroth and Gustavson, - ! IBM J. Res. Develop. Vol 44 No. 4 July 2000. - - IF(K.EQ.0.OR.N.EQ.0) THEN - RETURN - END IF - ! Base case - IF(K.EQ.1.OR.N.EQ.1) THEN - T(1,1) = TAU(1) - RETURN - END IF - - ! Beginning of executable statements -! MINNK = MIN(N,K) -! L = MINNK / 2 - L = K / 2 - ! Determine what kind of Q we need to compute - ! We assume that if the user doesn't provide 'F' for DIRECT, - ! then they meant to provide 'B' and if they don't provide - ! 'C' for STOREV, then they meant to provide 'R' - DIRF = LSAME(DIRECT,'F') - COLV = LSAME(STOREV,'C') - ! QR happens when we have forward direction in column storage - QR = DIRF.AND.COLV - ! LQ happens when we have Forward direction in row storage - LQ = DIRF.AND.(.NOT.COLV) - ! QL happens when we have backward direction in column storage - QL = (.NOT.DIRF).AND.COLV - ! The last case is RQ. Due to how we strucutured this, if the - ! above 3 are false, then RQ must be true, so we never store - ! this - ! RQ happens when we have backward direction in row storage - !RQ = (.NOT.DIRF).AND.(.NOT.COLV) - - - ! Compute T3 - IF(QR) THEN - ! If we are wide, then our - ! Compute T_1 - CALL MY_DLARFT_REC(DIRECT, STOREV, N, L, V, LDV, TAU, T, - $ LDT) - ! Compute T_2 - CALL MY_DLARFT_REC(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), - $ LDV, TAU(L+1), T(L+1,L+1), LDT) - ! Compute T_3 - ! T_3 = V_{2,1}^\top - DO J = 1, L - DO I = 1, K-L - T(J,L+I) = V(L+I,J) - END DO - END DO - ! T_3 = V_{2,1}^\top * V_{2,2} - CALL DTRMM('Right', 'Lower', 'No transpose', 'Unit', - $ L, K - L, ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) - - IF(N.GT.K) THEN - ! T_3 = T_3 + V_{3,1}^\topV_{3,2} - CALL DGEMM('Transpose', 'No transpose', L, K-L, N-K, - $ ONE, V(K+1, 1), LDV, V(K+1,L+1), LDV, ONE, - $ T(1, L+1), LDT) - END IF - - ! At this point, we have that T_3 = V_1^\top *V_2 - ! All that is left is to pre and post multiply by -T_1 and T_2 - ! respectively. - - ! T_3 = -T_1*T_3 - CALL DTRMM('Left', 'Upper', 'No transpose', 'Non-unit', - $ L, K - L, NEG_ONE, T, LDT, T(1, L+1), LDT) - ! T_3 = T_3*T_2 - CALL DTRMM('Right', 'Upper', 'No transpose', 'Non-unit', - $ L, K - L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) - - ELSE IF(LQ) THEN - ! Compute T_1 - CALL MY_DLARFT_REC(DIRECT, STOREV, N, L, V, LDV, TAU, T, - $ LDT) - ! Compute T_2 - CALL MY_DLARFT_REC(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), - $ LDV, TAU(L+1), T(L+1,L+1), LDT) - - ! Begin computing T_3 - ! First, T_3 = V_1V_2^\top - ! T_3 = V_{12} - CALL DLACPY('All', L, K - L, V(1,L+1), LDV, T(1, L+1), LDT) - - ! T_3 = V_{12}V_{22}^\top = T_3V_{22}^\top - CALL DTRMM('Right', 'Upper', 'Transpose', 'Unit', L, K-L, - $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) - - ! If needed, use the trailing components - IF(N.GT.K) THEN - CALL DGEMM('No transpose', 'Transpose', L, K-L, N-K, - $ ONE, V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, - $ T(1, L+1), LDT) - END IF - - ! T_3 = -T_1T_3 - CALL DTRMM('Left', 'Upper', 'No transpose', 'Non-unit', - $ L, K - L, NEG_ONE, T, LDT, T(1, L+1), LDT) - - ! T_3 = T_3T_1 - CALL DTRMM('Right', 'Upper', 'No transpose', 'Non-unit', - $ L, K - L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) - ELSE IF(QL) THEN - ! Compute T_1 - CALL MY_DLARFT_REC(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, - $ T, LDT) - ! Compute T_2 - CALL MY_DLARFT_REC(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, - $ TAU(K-L+1), T(K-L+1,K-L+1), LDT) - - ! Begin computing T_3 = T_2V_2^\topV_1T_1 - - ! T_3 = V_2^\top V_1 - - ! T_3 = V_{2,2}^\top - DO J = 1, K-L - DO I = 1, L - T(K-L+I,J) = V(N-K+J, K-L+I) - END DO - END DO - - ! T_3 = V_{2,2}^\topV_{2,1} = T_3V_{2,1} - CALL DTRMM('Right', 'Upper', 'No transpose', 'Unit', - $ L, K - L, ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) - - ! If needed, T_3 = V_{1,2}^\topV_{1,1} + T_3 - IF(N.GT.K) THEN - CALL DGEMM('Transpose', 'No transpose', L, K-L, N-K, - $ ONE, V(1,K-L+1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) - END IF - - ! T_3 = -T_2T_3 - CALL DTRMM('Left', 'Lower', 'No transpose', 'Non-unit', - $ L, K-L, NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) - ! T_3 = T_3T_1 - CALL DTRMM('Right', 'Lower', 'No transpose', 'Non-unit', - $ L, K-L, ONE, T, LDT, T(K-L+1,1), LDT) - ELSE - ! Else means RQ - ! Compute T_1 - CALL MY_DLARFT_REC(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, - $ T, LDT) - ! Compute T_2 - CALL MY_DLARFT_REC(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, - $ TAU(K-L+1), T(K-L+1,K-L+1), LDT) - - ! Begin computing T_3 = T_2V_2V_1^\topT_1 - - ! T_3 = V_2V_1^\top - - ! T_3 = V_{2,2} - CALL DLACPY('All', L, K-L, V(K-L+1,N-K+1), LDV, - $ T(K-L+1,1), LDT) - - ! T_3 = T_3V_{1,2}^\top - CALL DTRMM('Right', 'Lower', 'Transpose', 'Unit', - $ L, K-L, ONE, V(1, N-K+1), LDV, T(K-L+1,1), LDT) - - ! If needed, T_3 = V_{2,1}V_{1,1}^\top + T_3 - IF(N.GT.K) THEN - CALL DGEMM('No transpose', 'Transpose', L, K-L, N-K, - $ ONE, V(K-L+1,1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) - END IF - - ! T_3 = -T_2T_3 - CALL DTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', - $ L, K-L, NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) - - ! T_3 = T_3T_1 - CALL DTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', - $ L, K-L, ONE, T, LDT, T(K-L+1,1), LDT) - END IF - - ! Now, we have T in the correct form! - END SUBROUTINE diff --git a/SRC/slarft.f b/SRC/slarft.f index 31b7951819..f6e647e470 100644 --- a/SRC/slarft.f +++ b/SRC/slarft.f @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) * * .. Scalar Arguments .. * CHARACTER DIRECT, STOREV @@ -159,168 +159,464 @@ *> \endverbatim *> * ===================================================================== - SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) + RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, + $ LDT ) * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -* .. Scalar Arguments .. +* .. Scalar Arguments +* CHARACTER DIRECT, STOREV INTEGER K, LDT, LDV, N * .. * .. Array Arguments .. +* REAL T( LDT, * ), TAU( * ), V( LDV, * ) * .. * -* ===================================================================== -* * .. Parameters .. - REAL ONE, ZERO - PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) -* .. +* + REAL ONE, NEG_ONE, ZERO + PARAMETER(ONE=1.0E+0, ZERO = 0.0E+0, NEG_ONE=-1.0E+0) +* * .. Local Scalars .. - INTEGER I, J, PREVLASTV, LASTV -* .. +* + INTEGER I,J,L + LOGICAL QR,LQ,QL,DIRF,COLV +* * .. External Subroutines .. - EXTERNAL SGEMV, STRMV -* .. -* .. External Functions .. +* + EXTERNAL STRMM,SGEMM,SLACPY +* +* .. External Functions.. +* LOGICAL LSAME EXTERNAL LSAME +* +* The general scheme used is inspired by the approach inside DGEQRT3 +* which was (at the time of writing this code): +* Based on the algorithm of Elmroth and Gustavson, +* IBM J. Res. Develop. Vol 44 No. 4 July 2000. * .. * .. Executable Statements .. * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN -* - IF( LSAME( DIRECT, 'F' ) ) THEN - PREVLASTV = N - DO I = 1, K - PREVLASTV = MAX( I, PREVLASTV ) - IF( TAU( I ).EQ.ZERO ) THEN -* -* H(i) = I -* - DO J = 1, I - T( J, I ) = ZERO - END DO - ELSE -* -* general case -* - IF( LSAME( STOREV, 'C' ) ) THEN -* Skip any trailing zeros. - DO LASTV = N, I+1, -1 - IF( V( LASTV, I ).NE.ZERO ) EXIT - END DO - DO J = 1, I-1 - T( J, I ) = -TAU( I ) * V( I , J ) - END DO - J = MIN( LASTV, PREVLASTV ) -* -* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) -* - CALL SGEMV( 'Transpose', J-I, I-1, -TAU( I ), - $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, - $ T( 1, I ), 1 ) - ELSE -* Skip any trailing zeros. - DO LASTV = N, I+1, -1 - IF( V( I, LASTV ).NE.ZERO ) EXIT - END DO - DO J = 1, I-1 - T( J, I ) = -TAU( I ) * V( J , I ) - END DO - J = MIN( LASTV, PREVLASTV ) -* -* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T -* - CALL SGEMV( 'No transpose', I-1, J-I, -TAU( I ), - $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, - $ ONE, T( 1, I ), 1 ) - END IF -* -* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) -* - CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1, - $ T, - $ LDT, T( 1, I ), 1 ) - T( I, I ) = TAU( I ) - IF( I.GT.1 ) THEN - PREVLASTV = MAX( PREVLASTV, LASTV ) - ELSE - PREVLASTV = LASTV - END IF - END IF + IF(N.EQ.0.OR.K.EQ.0) THEN + RETURN + END IF +* +* Base case +* + IF(N.EQ.1.OR.K.EQ.1) THEN + T(1,1) = TAU(1) + RETURN + END IF +* +* Beginning of executable statements +* + L = K / 2 +* +* Determine what kind of Q we need to compute +* We assume that if the user doesn't provide 'F' for DIRECT, +* then they meant to provide 'B' and if they don't provide +* 'C' for STOREV, then they meant to provide 'R' +* + DIRF = LSAME(DIRECT,'F') + COLV = LSAME(STOREV,'C') +* +* QR happens when we have forward direction in column storage +* + QR = DIRF.AND.COLV +* +* LQ happens when we have Forward direction in row storage +* + LQ = DIRF.AND.(.NOT.COLV) +* +* QL happens when we have backward direction in column storage +* + QL = (.NOT.DIRF).AND.COLV +* +* The last case is RQ. Due to how we structured this, if the +* above 3 are false, then RQ must be true, so we never store +* this +* RQ happens when we have backward direction in row storage +* RQ = (.NOT.DIRF).AND.(.NOT.COLV) +* + IF(QR) THEN +* +* Break V apart into 6 components +* +* V = |---------------| +* |V_{1,1} 0 | +* |V_{2,1} V_{2,2}| +* |V_{3,1} V_{3,2}| +* |---------------| +* +* V_{1,1}\in\R^{l,l} unit lower triangular +* V_{2,1}\in\R^{k-l,l} rectangular +* V_{3,1}\in\R^{n-k,l} rectangular +* +* V_{2,2}\in\R^{k-l,k-l} unit lower triangular +* V_{3,2}\in\R^{n-k,k-l} rectangular +* +* We will construct the T matrix +* T = |---------------| = |--------| +* |T_{1,1} T_{1,2}| |T_1 T_3| +* |0 T_{2,2}| |0 T_2| +* |---------------| |--------| +* +* T is the triangular factor attained from block reflectors. +* To motivate the structure, assume we have already computed T_1 +* and T_2. Then collect the associated reflectors in V_1 and V_2 +* +* T_1\in\R^{l, l} upper triangular +* T_2\in\R^{k-l, k-l} upper triangular +* T_3\in\R^{l, k-l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_1T_1V_1')(I - V_2T_2V_2') +* = I - V_1T_1V_1' - V_2T_2V_2' + V_1T_1V_1'V_2T_2V_2' +* +* Define T_3 = -T_1V_1'V_2T_2 +* +* Then, we can define the matrix V as +* V = |-------| +* |V_1 V_2| +* |-------| +* +* So, our product is equivalent to the matrix product +* I - VTV' +* This means, we can compute T_1 and T_2, then use this information +* to compute T_3 +* +* Compute T_1 recursively +* + CALL SLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) +* +* Compute T_2 recursively +* + CALL SLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, + $ TAU(L+1), T(L+1,L+1), LDT) +* +* Compute T_3 +* T_3 = V_{2,1}' +* + DO J = 1, L + DO I = 1, K-L + T(J,L+I) = V(L+I,J) + END DO END DO - ELSE - PREVLASTV = 1 - DO I = K, 1, -1 - IF( TAU( I ).EQ.ZERO ) THEN -* -* H(i) = I -* - DO J = I, K - T( J, I ) = ZERO - END DO - ELSE -* -* general case -* - IF( I.LT.K ) THEN - IF( LSAME( STOREV, 'C' ) ) THEN -* Skip any leading zeros. - DO LASTV = 1, I-1 - IF( V( LASTV, I ).NE.ZERO ) EXIT - END DO - DO J = I+1, K - T( J, I ) = -TAU( I ) * V( N-K+I , J ) - END DO - J = MAX( LASTV, PREVLASTV ) -* -* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) -* - CALL SGEMV( 'Transpose', N-K+I-J, K-I, - $ -TAU( I ), - $ V( J, I+1 ), LDV, V( J, I ), 1, ONE, - $ T( I+1, I ), 1 ) - ELSE -* Skip any leading zeros. - DO LASTV = 1, I-1 - IF( V( I, LASTV ).NE.ZERO ) EXIT - END DO - DO J = I+1, K - T( J, I ) = -TAU( I ) * V( J, N-K+I ) - END DO - J = MAX( LASTV, PREVLASTV ) -* -* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T -* - CALL SGEMV( 'No transpose', K-I, N-K+I-J, - $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV, - $ ONE, T( I+1, I ), 1 ) - END IF -* -* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) -* - CALL STRMV( 'Lower', 'No transpose', 'Non-unit', - $ K-I, - $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) - IF( I.GT.1 ) THEN - PREVLASTV = MIN( PREVLASTV, LASTV ) - ELSE - PREVLASTV = LASTV - END IF - END IF - T( I, I ) = TAU( I ) - END IF +* +* T_3 = T_3V_{2,2} +* + CALL STRMM('Right', 'Lower', 'No transpose', 'Unit', L, K-L, + $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + +* +* T_3 = V_{3,1}'V_{3,2} + T_3 +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL SGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, + $ V(K+1, 1), LDV, V(K+1,L+1), LDV, ONE, T(1, L+1), LDT) +* +* At this point, we have that T_3 = V_1'V_2 +* All that is left is to pre and post multiply by -T_1 and T_2 +* respectively. +* +* T_3 = -T_1T_3 +* + CALL STRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) +* +* T_3 = T_3T_2 +* + CALL STRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + + ELSE IF(LQ) THEN +* +* Break V apart into 6 components +* +* V = |----------------------| +* |V_{1,1} V_{1,2} V{1,3}| +* |0 V_{2,2} V{2,3}| +* |----------------------| +* +* V_{1,1}\in\R^{l,l} unit upper triangular +* V_{1,2}\in\R^{l,k-l} rectangular +* V_{1,3}\in\R^{l,n-k} rectangular +* +* V_{2,2}\in\R^{k-l,k-l} unit upper triangular +* V_{2,3}\in\R^{k-l,n-k} rectangular +* +* Where l = floor(k/2) +* +* We will construct the T matrix +* T = |---------------| = |--------| +* |T_{1,1} T_{1,2}| |T_1 T_3| +* |0 T_{2,2}| |0 T_2| +* |---------------| |--------| +* +* T is the triangular factor attained from block reflectors. +* To motivate the structure, assume we have already computed T_1 +* and T_2. Then collect the associated reflectors in V_1 and V_2 +* +* T_1\in\R^{l, l} upper triangular +* T_2\in\R^{k-l, k-l} upper triangular +* T_3\in\R^{l, k-l} rectangular +* +* Then, consider the product: +* +* (I - V_1'T_1V_1)(I - V_2'T_2V_2) +* = I - V_1'T_1V_1 - V_2'T_2V_2 + V_1'T_1V_1V_2'T_2V_2 +* +* Define T_3 = -T_1V_1V_2'T_2 +* +* Then, we can define the matrix V as +* V = |---| +* |V_1| +* |V_2| +* |---| +* +* So, our product is equivalent to the matrix product +* I - V'TV +* This means, we can compute T_1 and T_2, then use this information +* to compute T_3 +* +* Compute T_1 recursively +* + CALL SLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) +* +* Compute T_2 recursively +* + CALL SLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, + $ TAU(L+1), T(L+1,L+1), LDT) + +* +* Compute T_3 +* T_3 = V_{1,2} +* + CALL SLACPY('All', L, K - L, V(1,L+1), LDV, T(1, L+1), LDT) +* +* T_3 = T_3V_{2,2}' +* + CALL STRMM('Right', 'Upper', 'Transpose', 'Unit', L, K-L, ONE, + $ V(L+1, L+1), LDV, T(1, L+1), LDT) + +* +* T_3 = V_{1,3}V_{2,3}' + T_3 +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL SGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, + $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, T(1, L+1), LDT) +* +* At this point, we have that T_3 = V_1V_2' +* All that is left is to pre and post multiply by -T_1 and T_2 +* respectively. +* +* T_3 = -T_1T_3 +* + CALL STRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, K-L, + $ NEG_ONE, T, LDT, T(1, L+1), LDT) + +* +* T_3 = T_3T_2 +* + CALL STRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + ELSE IF(QL) THEN +* +* Break V apart into 6 components +* +* V = |---------------| +* |V_{1,1} V_{1,2}| +* |V_{2,1} V_{2,2}| +* |0 V_{3,2}| +* |---------------| +* +* V_{1,1}\in\R^{n-k,k-l} rectangular +* V_{2,1}\in\R^{k-l,k-l} unit upper triangular +* +* V_{1,2}\in\R^{n-k,l} rectangular +* V_{2,2}\in\R^{k-l,l} rectangular +* V_{3,2}\in\R^{l,l} unit upper triangular +* +* We will construct the T matrix +* T = |---------------| = |--------| +* |T_{1,1} 0 | |T_1 0 | +* |T_{2,1} T_{2,2}| |T_3 T_2| +* |---------------| |--------| +* +* T is the triangular factor attained from block reflectors. +* To motivate the structure, assume we have already computed T_1 +* and T_2. Then collect the associated reflectors in V_1 and V_2 +* +* T_1\in\R^{k-l, k-l} non-unit lower triangular +* T_2\in\R^{l, l} non-unit lower triangular +* T_3\in\R^{k-l, l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_2T_2V_2')(I - V_1T_1V_1') +* = I - V_2T_2V_2' - V_1T_1V_1' + V_2T_2V_2'V_1T_1V_1' +* +* Define T_3 = -T_2V_2'V_1T_1 +* +* Then, we can define the matrix V as +* V = |-------| +* |V_1 V_2| +* |-------| +* +* So, our product is equivalent to the matrix product +* I - VTV' +* This means, we can compute T_1 and T_2, then use this information +* to compute T_3 +* +* Compute T_1 recursively +* + CALL SLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) +* +* Compute T_2 recursively +* + CALL SLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, TAU(K-L+1), + $ T(K-L+1,K-L+1), LDT) +* +* Compute T_3 +* T_3 = V_{2,2}' +* + DO J = 1, K-L + DO I = 1, L + T(K-L+I,J) = V(N-K+J, K-L+I) + END DO END DO - END IF - RETURN * -* End of SLARFT +* T_3 = T_3V_{2,1} +* + CALL STRMM('Right', 'Upper', 'No transpose', 'Unit', L, K-L, + $ ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) + +* +* T_3 = V_{2,2}'V_{2,1} + T_3 +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL SGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, + $ V(1,K-L+1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) +* +* At this point, we have that T_3 = V_2'V_1 +* All that is left is to pre and post multiply by -T_2 and T_1 +* respectively. +* +* T_3 = -T_2T_3 +* + CALL STRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, K-L, + $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) * - END +* T_3 = T_3T_1 +* + CALL STRMM('Right', 'Lower', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T, LDT, T(K-L+1,1), LDT) + ELSE +* +* Else means RQ case +* +* Break V apart into 6 components +* +* V = |-----------------------| +* |V_{1,1} V_{1,2} 0 | +* |V_{2,1} V_{2,2} V_{2,3}| +* |-----------------------| +* +* V_{1,1}\in\R^{k-l,n-k} rectangular +* V_{1,2}\in\R^{k-l,k-l} unit lower triangular +* +* V_{2,1}\in\R^{l,n-k} rectangular +* V_{2,2}\in\R^{l,k-l} rectangular +* V_{2,3}\in\R^{l,l} unit lower triangular +* +* We will construct the T matrix +* T = |---------------| = |--------| +* |T_{1,1} 0 | |T_1 0 | +* |T_{2,1} T_{2,2}| |T_3 T_2| +* |---------------| |--------| +* +* T is the triangular factor attained from block reflectors. +* To motivate the structure, assume we have already computed T_1 +* and T_2. Then collect the associated reflectors in V_1 and V_2 +* +* T_1\in\R^{k-l, k-l} non-unit lower triangular +* T_2\in\R^{l, l} non-unit lower triangular +* T_3\in\R^{k-l, l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_2'T_2V_2)(I - V_1'T_1V_1) +* = I - V_2'T_2V_2 - V_1'T_1V_1 + V_2'T_2V_2V_1'T_1V_1 +* +* Define T_3 = -T_2V_2V_1'T_1 +* +* Then, we can define the matrix V as +* V = |---| +* |V_1| +* |V_2| +* |---| +* +* So, our product is equivalent to the matrix product +* I - V'TV +* This means, we can compute T_1 and T_2, then use this information +* to compute T_3 +* +* Compute T_1 recursively +* + CALL SLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) +* +* Compute T_2 recursively +* + CALL SLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, TAU(K-L+1), + $ T(K-L+1,K-L+1), LDT) +* +* Compute T_3 +* T_3 = V_{2,2} +* + CALL SLACPY('All', L, K-L, V(K-L+1,N-K+1), LDV, T(K-L+1,1), + $ LDT) + +* +* T_3 = T_3V_{1,2}' +* + CALL STRMM('Right', 'Lower', 'Transpose', 'Unit', L, K-L, ONE, + $ V(1, N-K+1), LDV, T(K-L+1,1), LDT) + +* +* T_3 = V_{2,1}V_{1,1}' + T_3 +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL SGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, + $ V(K-L+1,1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) + +* +* At this point, we have that T_3 = V_2V_1' +* All that is left is to pre and post multiply by -T_2 and T_1 +* respectively. +* +* T_3 = -T_2T_3 +* + CALL STRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, K-L, + $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + +* +* T_3 = T_3T_1 +* + CALL STRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, K-L, + $ ONE, T, LDT, T(K-L+1,1), LDT) + END IF + END SUBROUTINE diff --git a/SRC/zlarft.f b/SRC/zlarft.f index be773becc2..eaada56253 100644 --- a/SRC/zlarft.f +++ b/SRC/zlarft.f @@ -18,7 +18,7 @@ * Definition: * =========== * -* SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) * * .. Scalar Arguments .. * CHARACTER DIRECT, STOREV @@ -159,169 +159,468 @@ *> \endverbatim *> * ===================================================================== - SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) + RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, + $ LDT ) * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * -* .. Scalar Arguments .. - CHARACTER DIRECT, STOREV - INTEGER K, LDT, LDV, N +* .. Scalar Arguments +* + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N * .. * .. Array Arguments .. - COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) -* .. * -* ===================================================================== + COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. * * .. Parameters .. - COMPLEX*16 ONE, ZERO - PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), - $ ZERO = ( 0.0D+0, 0.0D+0 ) ) -* .. +* + COMPLEX*16 ONE, NEG_ONE, ZERO + PARAMETER(ONE=1.0D+0, ZERO = 0.0D+0, NEG_ONE=-1.0D+0) +* * .. Local Scalars .. - INTEGER I, J, PREVLASTV, LASTV -* .. +* + INTEGER I,J,L + LOGICAL QR,LQ,QL,DIRF,COLV +* * .. External Subroutines .. - EXTERNAL ZGEMV, ZTRMV, ZGEMM -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME +* + EXTERNAL ZTRMM,ZGEMM,ZLACPY +* +* .. External Functions.. +* + LOGICAL LSAME + EXTERNAL LSAME +* +* .. Intrinsic Functions.. +* + INTRINSIC CONJG +* +* The general scheme used is inspired by the approach inside DGEQRT3 +* which was (at the time of writing this code): +* Based on the algorithm of Elmroth and Gustavson, +* IBM J. Res. Develop. Vol 44 No. 4 July 2000. * .. * .. Executable Statements .. * * Quick return if possible * - IF( N.EQ.0 ) - $ RETURN -* - IF( LSAME( DIRECT, 'F' ) ) THEN - PREVLASTV = N - DO I = 1, K - PREVLASTV = MAX( PREVLASTV, I ) - IF( TAU( I ).EQ.ZERO ) THEN -* -* H(i) = I -* - DO J = 1, I - T( J, I ) = ZERO - END DO - ELSE -* -* general case -* - IF( LSAME( STOREV, 'C' ) ) THEN -* Skip any trailing zeros. - DO LASTV = N, I+1, -1 - IF( V( LASTV, I ).NE.ZERO ) EXIT - END DO - DO J = 1, I-1 - T( J, I ) = -TAU( I ) * CONJG( V( I , J ) ) - END DO - J = MIN( LASTV, PREVLASTV ) -* -* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) -* - CALL ZGEMV( 'Conjugate transpose', J-I, I-1, - $ -TAU( I ), V( I+1, 1 ), LDV, - $ V( I+1, I ), 1, ONE, T( 1, I ), 1 ) - ELSE -* Skip any trailing zeros. - DO LASTV = N, I+1, -1 - IF( V( I, LASTV ).NE.ZERO ) EXIT - END DO - DO J = 1, I-1 - T( J, I ) = -TAU( I ) * V( J , I ) - END DO - J = MIN( LASTV, PREVLASTV ) -* -* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H -* - CALL ZGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ), - $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, - $ ONE, T( 1, I ), LDT ) - END IF -* -* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) -* - CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, - $ T, - $ LDT, T( 1, I ), 1 ) - T( I, I ) = TAU( I ) - IF( I.GT.1 ) THEN - PREVLASTV = MAX( PREVLASTV, LASTV ) - ELSE - PREVLASTV = LASTV - END IF - END IF + IF(N.EQ.0.OR.K.EQ.0) THEN + RETURN + END IF +* +* Base case +* + IF(N.EQ.1.OR.K.EQ.1) THEN + T(1,1) = TAU(1) + RETURN + END IF +* +* Beginning of executable statements +* + L = K / 2 +* +* Determine what kind of Q we need to compute +* We assume that if the user doesn't provide 'F' for DIRECT, +* then they meant to provide 'B' and if they don't provide +* 'C' for STOREV, then they meant to provide 'R' +* + DIRF = LSAME(DIRECT,'F') + COLV = LSAME(STOREV,'C') +* +* QR happens when we have forward direction in column storage +* + QR = DIRF.AND.COLV +* +* LQ happens when we have Forward direction in row storage +* + LQ = DIRF.AND.(.NOT.COLV) +* +* QL happens when we have backward direction in column storage +* + QL = (.NOT.DIRF).AND.COLV +* +* The last case is RQ. Due to how we structured this, if the +* above 3 are false, then RQ must be true, so we never store +* this +* RQ happens when we have backward direction in row storage +* RQ = (.NOT.DIRF).AND.(.NOT.COLV) +* + IF(QR) THEN +* +* Break V apart into 6 components +* +* V = |---------------| +* |V_{1,1} 0 | +* |V_{2,1} V_{2,2}| +* |V_{3,1} V_{3,2}| +* |---------------| +* +* V_{1,1}\in\C^{l,l} unit lower triangular +* V_{2,1}\in\C^{k-l,l} rectangular +* V_{3,1}\in\C^{n-k,l} rectangular +* +* V_{2,2}\in\C^{k-l,k-l} unit lower triangular +* V_{3,2}\in\C^{n-k,k-l} rectangular +* +* We will construct the T matrix +* T = |---------------| = |--------| +* |T_{1,1} T_{1,2}| |T_1 T_3| +* |0 T_{2,2}| |0 T_2| +* |---------------| |--------| +* +* T is the triangular factor attained from block reflectors. +* To motivate the structure, assume we have already computed T_1 +* and T_2. Then collect the associated reflectors in V_1 and V_2 +* +* T_1\in\C^{l, l} upper triangular +* T_2\in\C^{k-l, k-l} upper triangular +* T_3\in\C^{l, k-l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_1T_1V_1')(I - V_2T_2V_2') +* = I - V_1T_1V_1' - V_2T_2V_2' + V_1T_1V_1'V_2T_2V_2' +* +* Define T_3 = -T_1V_1'V_2T_2 +* +* Then, we can define the matrix V as +* V = |-------| +* |V_1 V_2| +* |-------| +* +* So, our product is equivalent to the matrix product +* I - VTV' +* This means, we can compute T_1 and T_2, then use this information +* to compute T_3 +* +* Compute T_1 recursively +* + CALL ZLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) +* +* Compute T_2 recursively +* + CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, + $ TAU(L+1), T(L+1,L+1), LDT) +* +* Compute T_3 +* T_3 = V_{2,1}' +* + DO J = 1, L + DO I = 1, K-L + T(J,L+I) = CONJG(V(L+I,J)) + END DO END DO - ELSE - PREVLASTV = 1 - DO I = K, 1, -1 - IF( TAU( I ).EQ.ZERO ) THEN -* -* H(i) = I -* - DO J = I, K - T( J, I ) = ZERO - END DO - ELSE -* -* general case -* - IF( I.LT.K ) THEN - IF( LSAME( STOREV, 'C' ) ) THEN -* Skip any leading zeros. - DO LASTV = 1, I-1 - IF( V( LASTV, I ).NE.ZERO ) EXIT - END DO - DO J = I+1, K - T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) ) - END DO - J = MAX( LASTV, PREVLASTV ) -* -* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) -* - CALL ZGEMV( 'Conjugate transpose', N-K+I-J, K-I, - $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ), - $ 1, ONE, T( I+1, I ), 1 ) - ELSE -* Skip any leading zeros. - DO LASTV = 1, I-1 - IF( V( I, LASTV ).NE.ZERO ) EXIT - END DO - DO J = I+1, K - T( J, I ) = -TAU( I ) * V( J, N-K+I ) - END DO - J = MAX( LASTV, PREVLASTV ) -* -* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H -* - CALL ZGEMM( 'N', 'C', K-I, 1, N-K+I-J, - $ -TAU( I ), - $ V( I+1, J ), LDV, V( I, J ), LDV, - $ ONE, T( I+1, I ), LDT ) - END IF -* -* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) -* - CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', - $ K-I, - $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) - IF( I.GT.1 ) THEN - PREVLASTV = MIN( PREVLASTV, LASTV ) - ELSE - PREVLASTV = LASTV - END IF - END IF - T( I, I ) = TAU( I ) - END IF +* +* T_3 = T_3V_{2,2} +* + CALL ZTRMM('Right', 'Lower', 'No transpose', 'Unit', L, K-L, + $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + +* +* T_3 = V_{3,1}'V_{3,2} + T_3 +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL ZGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, + $ V(K+1, 1), LDV, V(K+1,L+1), LDV, ONE, T(1, L+1), LDT) +* +* At this point, we have that T_3 = V_1'V_2 +* All that is left is to pre and post multiply by -T_1 and T_2 +* respectively. +* +* T_3 = -T_1T_3 +* + CALL ZTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) +* +* T_3 = T_3T_2 +* + CALL ZTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + + ELSE IF(LQ) THEN +* +* Break V apart into 6 components +* +* V = |----------------------| +* |V_{1,1} V_{1,2} V{1,3}| +* |0 V_{2,2} V{2,3}| +* |----------------------| +* +* V_{1,1}\in\C^{l,l} unit upper triangular +* V_{1,2}\in\C^{l,k-l} rectangular +* V_{1,3}\in\C^{l,n-k} rectangular +* +* V_{2,2}\in\C^{k-l,k-l} unit upper triangular +* V_{2,3}\in\C^{k-l,n-k} rectangular +* +* Where l = floor(k/2) +* +* We will construct the T matrix +* T = |---------------| = |--------| +* |T_{1,1} T_{1,2}| |T_1 T_3| +* |0 T_{2,2}| |0 T_2| +* |---------------| |--------| +* +* T is the triangular factor attained from block reflectors. +* To motivate the structure, assume we have already computed T_1 +* and T_2. Then collect the associated reflectors in V_1 and V_2 +* +* T_1\in\C^{l, l} upper triangular +* T_2\in\C^{k-l, k-l} upper triangular +* T_3\in\C^{l, k-l} rectangular +* +* Then, consider the product: +* +* (I - V_1'T_1V_1)(I - V_2'T_2V_2) +* = I - V_1'T_1V_1 - V_2'T_2V_2 + V_1'T_1V_1V_2'T_2V_2 +* +* Define T_3 = -T_1V_1V_2'T_2 +* +* Then, we can define the matrix V as +* V = |---| +* |V_1| +* |V_2| +* |---| +* +* So, our product is equivalent to the matrix product +* I - V'TV +* This means, we can compute T_1 and T_2, then use this information +* to compute T_3 +* +* Compute T_1 recursively +* + CALL ZLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) +* +* Compute T_2 recursively +* + CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, + $ TAU(L+1), T(L+1,L+1), LDT) + +* +* Compute T_3 +* T_3 = V_{1,2} +* + CALL ZLACPY('All', L, K - L, V(1,L+1), LDV, T(1, L+1), LDT) +* +* T_3 = T_3V_{2,2}' +* + CALL ZTRMM('Right', 'Upper', 'Conjugate', 'Unit', L, K-L, ONE, + $ V(L+1, L+1), LDV, T(1, L+1), LDT) + +* +* T_3 = V_{1,3}V_{2,3}' + T_3 +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL ZGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, + $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, T(1, L+1), LDT) +* +* At this point, we have that T_3 = V_1V_2' +* All that is left is to pre and post multiply by -T_1 and T_2 +* respectively. +* +* T_3 = -T_1T_3 +* + CALL ZTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, K-L, + $ NEG_ONE, T, LDT, T(1, L+1), LDT) + +* +* T_3 = T_3T_2 +* + CALL ZTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + ELSE IF(QL) THEN +* +* Break V apart into 6 components +* +* V = |---------------| +* |V_{1,1} V_{1,2}| +* |V_{2,1} V_{2,2}| +* |0 V_{3,2}| +* |---------------| +* +* V_{1,1}\in\C^{n-k,k-l} rectangular +* V_{2,1}\in\C^{k-l,k-l} unit upper triangular +* +* V_{1,2}\in\C^{n-k,l} rectangular +* V_{2,2}\in\C^{k-l,l} rectangular +* V_{3,2}\in\C^{l,l} unit upper triangular +* +* We will construct the T matrix +* T = |---------------| = |--------| +* |T_{1,1} 0 | |T_1 0 | +* |T_{2,1} T_{2,2}| |T_3 T_2| +* |---------------| |--------| +* +* T is the triangular factor attained from block reflectors. +* To motivate the structure, assume we have already computed T_1 +* and T_2. Then collect the associated reflectors in V_1 and V_2 +* +* T_1\in\C^{k-l, k-l} non-unit lower triangular +* T_2\in\C^{l, l} non-unit lower triangular +* T_3\in\C^{k-l, l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_2T_2V_2')(I - V_1T_1V_1') +* = I - V_2T_2V_2' - V_1T_1V_1' + V_2T_2V_2'V_1T_1V_1' +* +* Define T_3 = -T_2V_2'V_1T_1 +* +* Then, we can define the matrix V as +* V = |-------| +* |V_1 V_2| +* |-------| +* +* So, our product is equivalent to the matrix product +* I - VTV' +* This means, we can compute T_1 and T_2, then use this information +* to compute T_3 +* +* Compute T_1 recursively +* + CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) +* +* Compute T_2 recursively +* + CALL ZLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, TAU(K-L+1), + $ T(K-L+1,K-L+1), LDT) +* +* Compute T_3 +* T_3 = V_{2,2}' +* + DO J = 1, K-L + DO I = 1, L + T(K-L+I,J) = CONJG(V(N-K+J, K-L+I)) + END DO END DO - END IF - RETURN * -* End of ZLARFT +* T_3 = T_3V_{2,1} +* + CALL ZTRMM('Right', 'Upper', 'No transpose', 'Unit', L, K-L, + $ ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) + +* +* T_3 = V_{2,2}'V_{2,1} + T_3 +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL ZGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, + $ V(1,K-L+1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) +* +* At this point, we have that T_3 = V_2'V_1 +* All that is left is to pre and post multiply by -T_2 and T_1 +* respectively. +* +* T_3 = -T_2T_3 * - END + CALL ZTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, K-L, + $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) +* +* T_3 = T_3T_1 +* + CALL ZTRMM('Right', 'Lower', 'No transpose', 'Non-unit', L, + $ K-L, ONE, T, LDT, T(K-L+1,1), LDT) + ELSE +* +* Else means RQ case +* +* Break V apart into 6 components +* +* V = |-----------------------| +* |V_{1,1} V_{1,2} 0 | +* |V_{2,1} V_{2,2} V_{2,3}| +* |-----------------------| +* +* V_{1,1}\in\C^{k-l,n-k} rectangular +* V_{1,2}\in\C^{k-l,k-l} unit lower triangular +* +* V_{2,1}\in\C^{l,n-k} rectangular +* V_{2,2}\in\C^{l,k-l} rectangular +* V_{2,3}\in\C^{l,l} unit lower triangular +* +* We will construct the T matrix +* T = |---------------| = |--------| +* |T_{1,1} 0 | |T_1 0 | +* |T_{2,1} T_{2,2}| |T_3 T_2| +* |---------------| |--------| +* +* T is the triangular factor attained from block reflectors. +* To motivate the structure, assume we have already computed T_1 +* and T_2. Then collect the associated reflectors in V_1 and V_2 +* +* T_1\in\C^{k-l, k-l} non-unit lower triangular +* T_2\in\C^{l, l} non-unit lower triangular +* T_3\in\C^{k-l, l} rectangular +* +* Where l = floor(k/2) +* +* Then, consider the product: +* +* (I - V_2'T_2V_2)(I - V_1'T_1V_1) +* = I - V_2'T_2V_2 - V_1'T_1V_1 + V_2'T_2V_2V_1'T_1V_1 +* +* Define T_3 = -T_2V_2V_1'T_1 +* +* Then, we can define the matrix V as +* V = |---| +* |V_1| +* |V_2| +* |---| +* +* So, our product is equivalent to the matrix product +* I - V'TV +* This means, we can compute T_1 and T_2, then use this information +* to compute T_3 +* +* Compute T_1 recursively +* + CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) +* +* Compute T_2 recursively +* + CALL ZLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, TAU(K-L+1), + $ T(K-L+1,K-L+1), LDT) +* +* Compute T_3 +* T_3 = V_{2,2} +* + CALL ZLACPY('All', L, K-L, V(K-L+1,N-K+1), LDV, T(K-L+1,1), + $ LDT) + +* +* T_3 = T_3V_{1,2}' +* + CALL ZTRMM('Right', 'Lower', 'Conjugate', 'Unit', L, K-L, ONE, + $ V(1, N-K+1), LDV, T(K-L+1,1), LDT) + +* +* T_3 = V_{2,1}V_{1,1}' + T_3 +* Note: We assume K <= N, and GEMM will do nothing if N=K +* + CALL ZGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, + $ V(K-L+1,1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) + +* +* At this point, we have that T_3 = V_2V_1' +* All that is left is to pre and post multiply by -T_2 and T_1 +* respectively. +* +* T_3 = -T_2T_3 +* + CALL ZTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, K-L, + $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + +* +* T_3 = T_3T_1 +* + CALL ZTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, K-L, + $ ONE, T, LDT, T(K-L+1,1), LDT) + END IF + END SUBROUTINE diff --git a/st7lLjwJ b/st7lLjwJ deleted file mode 100644 index 8b277f0dd5..0000000000 --- a/st7lLjwJ +++ /dev/null @@ -1 +0,0 @@ -! From 1ba075ccd8bf4d1c4029699d817882839d98bbe5 Mon Sep 17 00:00:00 2001 From: Johnathan Rhyne Date: Wed, 16 Oct 2024 11:40:20 -0600 Subject: [PATCH 04/10] updating parameter definition in the single complex version --- SRC/clarft.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SRC/clarft.f b/SRC/clarft.f index 4517bb9b31..f1af5d3b33 100644 --- a/SRC/clarft.f +++ b/SRC/clarft.f @@ -179,7 +179,7 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * .. Parameters .. * COMPLEX ONE, NEG_ONE, ZERO - PARAMETER(ONE=1.0D+0, ZERO = 0.0D+0, NEG_ONE=-1.0D+0) + PARAMETER(ONE=1.0E+0, ZERO = 0.0E+0, NEG_ONE=-1.0E+0) * * .. Local Scalars .. * From 2534b59e3128640ec5ff0d0053423458d99dd0f6 Mon Sep 17 00:00:00 2001 From: Johnathan Rhyne Date: Fri, 22 Nov 2024 16:08:21 -0700 Subject: [PATCH 05/10] updating documentation to be more descriptive --- SRC/clarft.f | 210 +++++++++++++++++++++++++-------------------------- SRC/dlarft.f | 210 +++++++++++++++++++++++++-------------------------- SRC/slarft.f | 210 +++++++++++++++++++++++++-------------------------- SRC/zlarft.f | 210 +++++++++++++++++++++++++-------------------------- 4 files changed, 420 insertions(+), 420 deletions(-) diff --git a/SRC/clarft.f b/SRC/clarft.f index f1af5d3b33..08ef9cc224 100644 --- a/SRC/clarft.f +++ b/SRC/clarft.f @@ -235,7 +235,7 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * QR = DIRF.AND.COLV * -* LQ happens when we have Forward direction in row storage +* LQ happens when we have forward direction in row storage * LQ = DIRF.AND.(.NOT.COLV) * @@ -267,27 +267,27 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * V_{3,2}\in\C^{n-k,k-l} rectangular * * We will construct the T matrix -* T = |---------------| = |--------| -* |T_{1,1} T_{1,2}| |T_1 T_3| -* |0 T_{2,2}| |0 T_2| -* |---------------| |--------| +* T = |---------------| +* |T_{1,1} T_{1,2}| +* |0 T_{2,2}| +* |---------------| * -* T is the triangular factor attained from block reflectors. -* To motivate the structure, assume we have already computed T_1 -* and T_2. Then collect the associated reflectors in V_1 and V_2 +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 * -* T_1\in\C^{l, l} upper triangular -* T_2\in\C^{k-l, k-l} upper triangular -* T_3\in\C^{l, k-l} rectangular +* T_{1,1}\in\C^{l, l} upper triangular +* T_{2,2}\in\C^{k-l, k-l} upper triangular +* T_{1,2}\in\C^{l, k-l} rectangular * * Where l = floor(k/2) * * Then, consider the product: * -* (I - V_1T_1V_1')(I - V_2T_2V_2') -* = I - V_1T_1V_1' - V_2T_2V_2' + V_1T_1V_1'V_2T_2V_2' +* (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') +* = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' * -* Define T_3 = -T_1V_1'V_2T_2 +* Define T{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} * * Then, we can define the matrix V as * V = |-------| @@ -295,21 +295,21 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * |-------| * * So, our product is equivalent to the matrix product -* I - VTV' -* This means, we can compute T_1 and T_2, then use this information -* to compute T_3 +* I - V*T*V' +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{1,2} * -* Compute T_1 recursively +* Compute T_{1,1} recursively * CALL CLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) * -* Compute T_2 recursively +* Compute T_{2,2} recursively * CALL CLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, $ TAU(L+1), T(L+1,L+1), LDT) * -* Compute T_3 -* T_3 = V_{2,1}' +* Compute T_{1,2} +* T_{1,2} = V_{2,1}' * DO J = 1, L DO I = 1, K-L @@ -317,28 +317,28 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, END DO END DO * -* T_3 = T_3V_{2,2} +* T_{1,2} = T_{1,2}*V_{2,2} * CALL CTRMM('Right', 'Lower', 'No transpose', 'Unit', L, K-L, $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) * -* T_3 = V_{3,1}'V_{3,2} + T_3 +* T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL CGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, $ V(K+1, 1), LDV, V(K+1,L+1), LDV, ONE, T(1, L+1), LDT) * -* At this point, we have that T_3 = V_1'V_2 -* All that is left is to pre and post multiply by -T_1 and T_2 +* At this point, we have that T_{1,2} = V_1'*V_2 +* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} * respectively. * -* T_3 = -T_1T_3 +* T_{1,2} = -T_{1,1}*T_{1,2} * CALL CTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) * -* T_3 = T_3T_2 +* T_{1,2} = T_{1,2}*T_{2,2} * CALL CTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) @@ -362,25 +362,25 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * Where l = floor(k/2) * * We will construct the T matrix -* T = |---------------| = |--------| -* |T_{1,1} T_{1,2}| |T_1 T_3| -* |0 T_{2,2}| |0 T_2| -* |---------------| |--------| +* T = |---------------| +* |T_{1,1} T_{1,2}| +* |0 T_{2,2}| +* |---------------| * -* T is the triangular factor attained from block reflectors. -* To motivate the structure, assume we have already computed T_1 -* and T_2. Then collect the associated reflectors in V_1 and V_2 +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 * -* T_1\in\C^{l, l} upper triangular -* T_2\in\C^{k-l, k-l} upper triangular -* T_3\in\C^{l, k-l} rectangular +* T_{1,1}\in\C^{l, l} upper triangular +* T_{2,2}\in\C^{k-l, k-l} upper triangular +* T_{1,2}\in\C^{l, k-l} rectangular * * Then, consider the product: * -* (I - V_1'T_1V_1)(I - V_2'T_2V_2) -* = I - V_1'T_1V_1 - V_2'T_2V_2 + V_1'T_1V_1V_2'T_2V_2 +* (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) +* = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 * -* Define T_3 = -T_1V_1V_2'T_2 +* Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} * * Then, we can define the matrix V as * V = |---| @@ -389,48 +389,48 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * |---| * * So, our product is equivalent to the matrix product -* I - V'TV -* This means, we can compute T_1 and T_2, then use this information -* to compute T_3 +* I - V'*T*V +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{1,2} * -* Compute T_1 recursively +* Compute T_{1,1} recursively * CALL CLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) * -* Compute T_2 recursively +* Compute T_{2,2} recursively * CALL CLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, $ TAU(L+1), T(L+1,L+1), LDT) * -* Compute T_3 -* T_3 = V_{1,2} +* Compute T_{1,2} +* T_{1,2} = V_{1,2} * CALL CLACPY('All', L, K - L, V(1,L+1), LDV, T(1, L+1), LDT) * -* T_3 = T_3V_{2,2}' +* T_{1,2} = T_{1,2}*V_{2,2}' * CALL CTRMM('Right', 'Upper', 'Conjugate', 'Unit', L, K-L, ONE, $ V(L+1, L+1), LDV, T(1, L+1), LDT) * -* T_3 = V_{1,3}V_{2,3}' + T_3 +* T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL CGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, T(1, L+1), LDT) * -* At this point, we have that T_3 = V_1V_2' -* All that is left is to pre and post multiply by -T_1 and T_2 +* At this point, we have that T_{1,2} = V_1*V_2' +* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} * respectively. * -* T_3 = -T_1T_3 +* T_{1,2} = -T_{1,1}*T_{1,2} * CALL CTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, K-L, $ NEG_ONE, T, LDT, T(1, L+1), LDT) * -* T_3 = T_3T_2 +* T_{1,2} = T_{1,2}*T_{2,2} * CALL CTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) @@ -452,27 +452,27 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * V_{3,2}\in\C^{l,l} unit upper triangular * * We will construct the T matrix -* T = |---------------| = |--------| -* |T_{1,1} 0 | |T_1 0 | -* |T_{2,1} T_{2,2}| |T_3 T_2| -* |---------------| |--------| +* T = |---------------| +* |T_{1,1} 0 | +* |T_{2,1} T_{2,2}| +* |---------------| * -* T is the triangular factor attained from block reflectors. -* To motivate the structure, assume we have already computed T_1 -* and T_2. Then collect the associated reflectors in V_1 and V_2 +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 * -* T_1\in\C^{k-l, k-l} non-unit lower triangular -* T_2\in\C^{l, l} non-unit lower triangular -* T_3\in\C^{k-l, l} rectangular +* T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular +* T_{2,2}\in\C^{l, l} non-unit lower triangular +* T_{2,1}\in\C^{k-l, l} rectangular * * Where l = floor(k/2) * * Then, consider the product: * -* (I - V_2T_2V_2')(I - V_1T_1V_1') -* = I - V_2T_2V_2' - V_1T_1V_1' + V_2T_2V_2'V_1T_1V_1' +* (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') +* = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' * -* Define T_3 = -T_2V_2'V_1T_1 +* Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} * * Then, we can define the matrix V as * V = |-------| @@ -480,21 +480,21 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * |-------| * * So, our product is equivalent to the matrix product -* I - VTV' -* This means, we can compute T_1 and T_2, then use this information -* to compute T_3 +* I - V*T*V' +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{2,1} * -* Compute T_1 recursively +* Compute T_{1,1} recursively * CALL CLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) * -* Compute T_2 recursively +* Compute T_{2,2} recursively * CALL CLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, TAU(K-L+1), $ T(K-L+1,K-L+1), LDT) * -* Compute T_3 -* T_3 = V_{2,2}' +* Compute T_{2,1} +* T_{2,1} = V_{2,2}' * DO J = 1, K-L DO I = 1, L @@ -502,28 +502,28 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, END DO END DO * -* T_3 = T_3V_{2,1} +* T_{2,1} = T_{2,1}*V_{2,1} * CALL CTRMM('Right', 'Upper', 'No transpose', 'Unit', L, K-L, $ ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) * -* T_3 = V_{2,2}'V_{2,1} + T_3 +* T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL CGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, $ V(1,K-L+1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) * -* At this point, we have that T_3 = V_2'V_1 -* All that is left is to pre and post multiply by -T_2 and T_1 +* At this point, we have that T_{2,1} = V_2'*V_1 +* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} * respectively. * -* T_3 = -T_2T_3 +* T_{2,1} = -T_{2,2}*T_{2,1} * CALL CTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, K-L, $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) * -* T_3 = T_3T_1 +* T_{2,1} = T_{2,1}*T_{1,1} * CALL CTRMM('Right', 'Lower', 'No transpose', 'Non-unit', L, $ K-L, ONE, T, LDT, T(K-L+1,1), LDT) @@ -546,27 +546,27 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * V_{2,3}\in\C^{l,l} unit lower triangular * * We will construct the T matrix -* T = |---------------| = |--------| -* |T_{1,1} 0 | |T_1 0 | -* |T_{2,1} T_{2,2}| |T_3 T_2| -* |---------------| |--------| +* T = |---------------| +* |T_{1,1} 0 | +* |T_{2,1} T_{2,2}| +* |---------------| * -* T is the triangular factor attained from block reflectors. -* To motivate the structure, assume we have already computed T_1 -* and T_2. Then collect the associated reflectors in V_1 and V_2 +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 * -* T_1\in\C^{k-l, k-l} non-unit lower triangular -* T_2\in\C^{l, l} non-unit lower triangular -* T_3\in\C^{k-l, l} rectangular +* T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular +* T_{2,2}\in\C^{l, l} non-unit lower triangular +* T_{2,1}\in\C^{k-l, l} rectangular * * Where l = floor(k/2) * * Then, consider the product: * -* (I - V_2'T_2V_2)(I - V_1'T_1V_1) -* = I - V_2'T_2V_2 - V_1'T_1V_1 + V_2'T_2V_2V_1'T_1V_1 +* (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) +* = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 * -* Define T_3 = -T_2V_2V_1'T_1 +* Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} * * Then, we can define the matrix V as * V = |---| @@ -575,50 +575,50 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * |---| * * So, our product is equivalent to the matrix product -* I - V'TV -* This means, we can compute T_1 and T_2, then use this information -* to compute T_3 +* I - V'*T*V +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{2,1} * -* Compute T_1 recursively +* Compute T_{1,1} recursively * CALL CLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) * -* Compute T_2 recursively +* Compute T_{2,2} recursively * CALL CLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, TAU(K-L+1), $ T(K-L+1,K-L+1), LDT) * -* Compute T_3 -* T_3 = V_{2,2} +* Compute T_{2,1} +* T_{2,1} = V_{2,2} * CALL CLACPY('All', L, K-L, V(K-L+1,N-K+1), LDV, T(K-L+1,1), $ LDT) * -* T_3 = T_3V_{1,2}' +* T_{2,1} = T_{2,1}*V_{1,2}' * CALL CTRMM('Right', 'Lower', 'Conjugate', 'Unit', L, K-L, ONE, $ V(1, N-K+1), LDV, T(K-L+1,1), LDT) * -* T_3 = V_{2,1}V_{1,1}' + T_3 +* T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL CGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, $ V(K-L+1,1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) * -* At this point, we have that T_3 = V_2V_1' -* All that is left is to pre and post multiply by -T_2 and T_1 +* At this point, we have that T_{2,1} = V_2*V_1' +* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} * respectively. * -* T_3 = -T_2T_3 +* T_{2,1} = -T_{2,2}*T_{2,1} * CALL CTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, K-L, $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) * -* T_3 = T_3T_1 +* T_{2,1} = T_{2,1}*T_{1,1} * CALL CTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, K-L, $ ONE, T, LDT, T(K-L+1,1), LDT) diff --git a/SRC/dlarft.f b/SRC/dlarft.f index d3f0b87454..66b8c3d922 100644 --- a/SRC/dlarft.f +++ b/SRC/dlarft.f @@ -231,7 +231,7 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * QR = DIRF.AND.COLV * -* LQ happens when we have Forward direction in row storage +* LQ happens when we have forward direction in row storage * LQ = DIRF.AND.(.NOT.COLV) * @@ -263,27 +263,27 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * V_{3,2}\in\R^{n-k,k-l} rectangular * * We will construct the T matrix -* T = |---------------| = |--------| -* |T_{1,1} T_{1,2}| |T_1 T_3| -* |0 T_{2,2}| |0 T_2| -* |---------------| |--------| +* T = |---------------| +* |T_{1,1} T_{1,2}| +* |0 T_{2,2}| +* |---------------| * -* T is the triangular factor attained from block reflectors. -* To motivate the structure, assume we have already computed T_1 -* and T_2. Then collect the associated reflectors in V_1 and V_2 +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 * -* T_1\in\R^{l, l} upper triangular -* T_2\in\R^{k-l, k-l} upper triangular -* T_3\in\R^{l, k-l} rectangular +* T_{1,1}\in\R^{l, l} upper triangular +* T_{2,2}\in\R^{k-l, k-l} upper triangular +* T_{1,2}\in\R^{l, k-l} rectangular * * Where l = floor(k/2) * * Then, consider the product: * -* (I - V_1T_1V_1')(I - V_2T_2V_2') -* = I - V_1T_1V_1' - V_2T_2V_2' + V_1T_1V_1'V_2T_2V_2' +* (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') +* = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' * -* Define T_3 = -T_1V_1'V_2T_2 +* Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} * * Then, we can define the matrix V as * V = |-------| @@ -291,21 +291,21 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * |-------| * * So, our product is equivalent to the matrix product -* I - VTV' -* This means, we can compute T_1 and T_2, then use this information -* to compute T_3 +* I - V*T*V' +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{1,2} * -* Compute T_1 recursively +* Compute T_{1,1} recursively * CALL DLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) * -* Compute T_2 recursively +* Compute T_{2,2} recursively * CALL DLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, $ TAU(L+1), T(L+1,L+1), LDT) * -* Compute T_3 -* T_3 = V_{2,1}' +* Compute T_{1,2} +* T_{1,2} = V_{2,1}' * DO J = 1, L DO I = 1, K-L @@ -313,28 +313,28 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, END DO END DO * -* T_3 = T_3V_{2,2} +* T_{1,2} = T_{1,2}*V_{2,2} * CALL DTRMM('Right', 'Lower', 'No transpose', 'Unit', L, K-L, $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) * -* T_3 = V_{3,1}'V_{3,2} + T_3 +* T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL DGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, $ V(K+1, 1), LDV, V(K+1,L+1), LDV, ONE, T(1, L+1), LDT) * -* At this point, we have that T_3 = V_1'V_2 -* All that is left is to pre and post multiply by -T_1 and T_2 +* At this point, we have that T_{1,2} = V_1'*V_2 +* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} * respectively. * -* T_3 = -T_1T_3 +* T_{1,2} = -T_{1,1}*T_{1,2} * CALL DTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) * -* T_3 = T_3T_2 +* T_{1,2} = T_{1,2}*T_{2,2} * CALL DTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) @@ -358,25 +358,25 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * Where l = floor(k/2) * * We will construct the T matrix -* T = |---------------| = |--------| -* |T_{1,1} T_{1,2}| |T_1 T_3| -* |0 T_{2,2}| |0 T_2| -* |---------------| |--------| +* T = |---------------| +* |T_{1,1} T_{1,2}| +* |0 T_{2,2}| +* |---------------| * -* T is the triangular factor attained from block reflectors. -* To motivate the structure, assume we have already computed T_1 -* and T_2. Then collect the associated reflectors in V_1 and V_2 +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 * -* T_1\in\R^{l, l} upper triangular -* T_2\in\R^{k-l, k-l} upper triangular -* T_3\in\R^{l, k-l} rectangular +* T_{1,1}\in\R^{l, l} upper triangular +* T_{2,2}\in\R^{k-l, k-l} upper triangular +* T_{1,2}\in\R^{l, k-l} rectangular * * Then, consider the product: * -* (I - V_1'T_1V_1)(I - V_2'T_2V_2) -* = I - V_1'T_1V_1 - V_2'T_2V_2 + V_1'T_1V_1V_2'T_2V_2 +* (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) +* = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 * -* Define T_3 = -T_1V_1V_2'T_2 +* Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} * * Then, we can define the matrix V as * V = |---| @@ -385,48 +385,48 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * |---| * * So, our product is equivalent to the matrix product -* I - V'TV -* This means, we can compute T_1 and T_2, then use this information -* to compute T_3 +* I - V'*T*V +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{1,2} * -* Compute T_1 recursively +* Compute T_{1,1} recursively * CALL DLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) * -* Compute T_2 recursively +* Compute T_{2,2} recursively * CALL DLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, $ TAU(L+1), T(L+1,L+1), LDT) * -* Compute T_3 -* T_3 = V_{1,2} +* Compute T_{1,2} +* T_{1,2} = V_{1,2} * CALL DLACPY('All', L, K - L, V(1,L+1), LDV, T(1, L+1), LDT) * -* T_3 = T_3V_{2,2}' +* T_{1,2} = T_{1,2}*V_{2,2}' * CALL DTRMM('Right', 'Upper', 'Transpose', 'Unit', L, K-L, ONE, $ V(L+1, L+1), LDV, T(1, L+1), LDT) * -* T_3 = V_{1,3}V_{2,3}' + T_3 +* T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL DGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, T(1, L+1), LDT) * -* At this point, we have that T_3 = V_1V_2' -* All that is left is to pre and post multiply by -T_1 and T_2 +* At this point, we have that T_{1,2} = V_1*V_2' +* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} * respectively. * -* T_3 = -T_1T_3 +* T_{1,2} = -T_{1,1}*T_{1,2} * CALL DTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, K-L, $ NEG_ONE, T, LDT, T(1, L+1), LDT) * -* T_3 = T_3T_2 +* T_{1,2} = T_{1,2}*T_{2,2} * CALL DTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) @@ -448,27 +448,27 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * V_{3,2}\in\R^{l,l} unit upper triangular * * We will construct the T matrix -* T = |---------------| = |--------| -* |T_{1,1} 0 | |T_1 0 | -* |T_{2,1} T_{2,2}| |T_3 T_2| -* |---------------| |--------| +* T = |---------------| +* |T_{1,1} 0 | +* |T_{2,1} T_{2,2}| +* |---------------| * -* T is the triangular factor attained from block reflectors. -* To motivate the structure, assume we have already computed T_1 -* and T_2. Then collect the associated reflectors in V_1 and V_2 +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 * -* T_1\in\R^{k-l, k-l} non-unit lower triangular -* T_2\in\R^{l, l} non-unit lower triangular -* T_3\in\R^{k-l, l} rectangular +* T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular +* T_{2,2}\in\R^{l, l} non-unit lower triangular +* T_{2,1}\in\R^{k-l, l} rectangular * * Where l = floor(k/2) * * Then, consider the product: * -* (I - V_2T_2V_2')(I - V_1T_1V_1') -* = I - V_2T_2V_2' - V_1T_1V_1' + V_2T_2V_2'V_1T_1V_1' +* (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') +* = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' * -* Define T_3 = -T_2V_2'V_1T_1 +* Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} * * Then, we can define the matrix V as * V = |-------| @@ -476,21 +476,21 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * |-------| * * So, our product is equivalent to the matrix product -* I - VTV' -* This means, we can compute T_1 and T_2, then use this information -* to compute T_3 +* I - V*T*V' +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{2,1} * -* Compute T_1 recursively +* Compute T_{1,1} recursively * CALL DLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) * -* Compute T_2 recursively +* Compute T_{2,2} recursively * CALL DLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, TAU(K-L+1), $ T(K-L+1,K-L+1), LDT) * -* Compute T_3 -* T_3 = V_{2,2}' +* Compute T_{2,1} +* T_{2,1} = V_{2,2}' * DO J = 1, K-L DO I = 1, L @@ -498,28 +498,28 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, END DO END DO * -* T_3 = T_3V_{2,1} +* T_{2,1} = T_{2,1}*V_{2,1} * CALL DTRMM('Right', 'Upper', 'No transpose', 'Unit', L, K-L, $ ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) * -* T_3 = V_{2,2}'V_{2,1} + T_3 +* T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL DGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, $ V(1,K-L+1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) * -* At this point, we have that T_3 = V_2'V_1 -* All that is left is to pre and post multiply by -T_2 and T_1 +* At this point, we have that T_{2,1} = V_2'*V_1 +* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} * respectively. * -* T_3 = -T_2T_3 +* T_{2,1} = -T_{2,2}*T_{2,1} * CALL DTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, K-L, $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) * -* T_3 = T_3T_1 +* T_{2,1} = T_{2,1}*T_{1,1} * CALL DTRMM('Right', 'Lower', 'No transpose', 'Non-unit', L, $ K-L, ONE, T, LDT, T(K-L+1,1), LDT) @@ -542,27 +542,27 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * V_{2,3}\in\R^{l,l} unit lower triangular * * We will construct the T matrix -* T = |---------------| = |--------| -* |T_{1,1} 0 | |T_1 0 | -* |T_{2,1} T_{2,2}| |T_3 T_2| -* |---------------| |--------| +* T = |---------------| +* |T_{1,1} 0 | +* |T_{2,1} T_{2,2}| +* |---------------| * -* T is the triangular factor attained from block reflectors. -* To motivate the structure, assume we have already computed T_1 -* and T_2. Then collect the associated reflectors in V_1 and V_2 +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 * -* T_1\in\R^{k-l, k-l} non-unit lower triangular -* T_2\in\R^{l, l} non-unit lower triangular -* T_3\in\R^{k-l, l} rectangular +* T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular +* T_{2,2}\in\R^{l, l} non-unit lower triangular +* T_{2,1}\in\R^{k-l, l} rectangular * * Where l = floor(k/2) * * Then, consider the product: * -* (I - V_2'T_2V_2)(I - V_1'T_1V_1) -* = I - V_2'T_2V_2 - V_1'T_1V_1 + V_2'T_2V_2V_1'T_1V_1 +* (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) +* = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 * -* Define T_3 = -T_2V_2V_1'T_1 +* Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} * * Then, we can define the matrix V as * V = |---| @@ -571,50 +571,50 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * |---| * * So, our product is equivalent to the matrix product -* I - V'TV -* This means, we can compute T_1 and T_2, then use this information -* to compute T_3 +* I - V'*T*V +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{2,1} * -* Compute T_1 recursively +* Compute T_{1,1} recursively * CALL DLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) * -* Compute T_2 recursively +* Compute T_{2,2} recursively * CALL DLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, TAU(K-L+1), $ T(K-L+1,K-L+1), LDT) * -* Compute T_3 -* T_3 = V_{2,2} +* Compute T_{2,1} +* T_{2,1} = V_{2,2} * CALL DLACPY('All', L, K-L, V(K-L+1,N-K+1), LDV, T(K-L+1,1), $ LDT) * -* T_3 = T_3V_{1,2}' +* T_{2,1} = T_{2,1}*V_{1,2}' * CALL DTRMM('Right', 'Lower', 'Transpose', 'Unit', L, K-L, ONE, $ V(1, N-K+1), LDV, T(K-L+1,1), LDT) * -* T_3 = V_{2,1}V_{1,1}' + T_3 +* T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL DGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, $ V(K-L+1,1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) * -* At this point, we have that T_3 = V_2V_1' -* All that is left is to pre and post multiply by -T_2 and T_1 +* At this point, we have that T_{2,1} = V_2*V_1' +* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} * respectively. * -* T_3 = -T_2T_3 +* T_{2,1} = -T_{2,2}*T_{2,1} * CALL DTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, K-L, $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) * -* T_3 = T_3T_1 +* T_{2,1} = T_{2,1}*T_{1,1} * CALL DTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, K-L, $ ONE, T, LDT, T(K-L+1,1), LDT) diff --git a/SRC/slarft.f b/SRC/slarft.f index f6e647e470..449c4b5a75 100644 --- a/SRC/slarft.f +++ b/SRC/slarft.f @@ -127,7 +127,7 @@ * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley -*> \author Univ. of Colorado Denver +*> \author Johnathan Rhyne, Univ. of Colorado Denver (original author, 2024) *> \author NAG Ltd. * *> \ingroup larft @@ -231,7 +231,7 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * QR = DIRF.AND.COLV * -* LQ happens when we have Forward direction in row storage +* LQ happens when we have forward direction in row storage * LQ = DIRF.AND.(.NOT.COLV) * @@ -263,27 +263,27 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * V_{3,2}\in\R^{n-k,k-l} rectangular * * We will construct the T matrix -* T = |---------------| = |--------| -* |T_{1,1} T_{1,2}| |T_1 T_3| -* |0 T_{2,2}| |0 T_2| -* |---------------| |--------| +* T = |---------------| +* |T_{1,1} T_{1,2}| +* |0 T_{2,2}| +* |---------------| * -* T is the triangular factor attained from block reflectors. -* To motivate the structure, assume we have already computed T_1 -* and T_2. Then collect the associated reflectors in V_1 and V_2 +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 * -* T_1\in\R^{l, l} upper triangular -* T_2\in\R^{k-l, k-l} upper triangular -* T_3\in\R^{l, k-l} rectangular +* T_{1,1}\in\R^{l, l} upper triangular +* T_{2,2}\in\R^{k-l, k-l} upper triangular +* T_{1,2}\in\R^{l, k-l} rectangular * * Where l = floor(k/2) * * Then, consider the product: * -* (I - V_1T_1V_1')(I - V_2T_2V_2') -* = I - V_1T_1V_1' - V_2T_2V_2' + V_1T_1V_1'V_2T_2V_2' +* (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') +* = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' * -* Define T_3 = -T_1V_1'V_2T_2 +* Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} * * Then, we can define the matrix V as * V = |-------| @@ -291,21 +291,21 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * |-------| * * So, our product is equivalent to the matrix product -* I - VTV' -* This means, we can compute T_1 and T_2, then use this information -* to compute T_3 +* I - V*T*V' +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{1,2} * -* Compute T_1 recursively +* Compute T_{1,1} recursively * CALL SLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) * -* Compute T_2 recursively +* Compute T_{2,2} recursively * CALL SLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, $ TAU(L+1), T(L+1,L+1), LDT) * -* Compute T_3 -* T_3 = V_{2,1}' +* Compute T_{1,2} +* T_{1,2} = V_{2,1}' * DO J = 1, L DO I = 1, K-L @@ -313,28 +313,28 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, END DO END DO * -* T_3 = T_3V_{2,2} +* T_{1,2} = T_{1,2}*V_{2,2} * CALL STRMM('Right', 'Lower', 'No transpose', 'Unit', L, K-L, $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) * -* T_3 = V_{3,1}'V_{3,2} + T_3 +* T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL SGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, $ V(K+1, 1), LDV, V(K+1,L+1), LDV, ONE, T(1, L+1), LDT) * -* At this point, we have that T_3 = V_1'V_2 -* All that is left is to pre and post multiply by -T_1 and T_2 +* At this point, we have that T_{1,2} = V_1'*V_2 +* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} * respectively. * -* T_3 = -T_1T_3 +* T_{1,2} = -T_{1,1}*T_{1,2} * CALL STRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) * -* T_3 = T_3T_2 +* T_{1,2} = T_{1,2}*T_{2,2} * CALL STRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) @@ -358,25 +358,25 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * Where l = floor(k/2) * * We will construct the T matrix -* T = |---------------| = |--------| -* |T_{1,1} T_{1,2}| |T_1 T_3| -* |0 T_{2,2}| |0 T_2| -* |---------------| |--------| +* T = |---------------| +* |T_{1,1} T_{1,2}| +* |0 T_{2,2}| +* |---------------| * -* T is the triangular factor attained from block reflectors. -* To motivate the structure, assume we have already computed T_1 -* and T_2. Then collect the associated reflectors in V_1 and V_2 +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 * -* T_1\in\R^{l, l} upper triangular -* T_2\in\R^{k-l, k-l} upper triangular -* T_3\in\R^{l, k-l} rectangular +* T_{1,1}\in\R^{l, l} upper triangular +* T_{2,2}\in\R^{k-l, k-l} upper triangular +* T_{1,2}\in\R^{l, k-l} rectangular * * Then, consider the product: * -* (I - V_1'T_1V_1)(I - V_2'T_2V_2) -* = I - V_1'T_1V_1 - V_2'T_2V_2 + V_1'T_1V_1V_2'T_2V_2 +* (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) +* = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 * -* Define T_3 = -T_1V_1V_2'T_2 +* Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} * * Then, we can define the matrix V as * V = |---| @@ -385,48 +385,48 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * |---| * * So, our product is equivalent to the matrix product -* I - V'TV -* This means, we can compute T_1 and T_2, then use this information -* to compute T_3 +* I - V'*T*V +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{1,2} * -* Compute T_1 recursively +* Compute T_{1,1} recursively * CALL SLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) * -* Compute T_2 recursively +* Compute T_{2,2} recursively * CALL SLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, $ TAU(L+1), T(L+1,L+1), LDT) * -* Compute T_3 -* T_3 = V_{1,2} +* Compute T_{1,2} +* T_{1,2} = V_{1,2} * CALL SLACPY('All', L, K - L, V(1,L+1), LDV, T(1, L+1), LDT) * -* T_3 = T_3V_{2,2}' +* T_{1,2} = T_{1,2}*V_{2,2}' * CALL STRMM('Right', 'Upper', 'Transpose', 'Unit', L, K-L, ONE, $ V(L+1, L+1), LDV, T(1, L+1), LDT) * -* T_3 = V_{1,3}V_{2,3}' + T_3 +* T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL SGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, T(1, L+1), LDT) * -* At this point, we have that T_3 = V_1V_2' -* All that is left is to pre and post multiply by -T_1 and T_2 +* At this point, we have that T_{1,2} = V_1*V_2' +* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} * respectively. * -* T_3 = -T_1T_3 +* T_{1,2} = -T_{1,1}*T_{1,2} * CALL STRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, K-L, $ NEG_ONE, T, LDT, T(1, L+1), LDT) * -* T_3 = T_3T_2 +* T_{1,2} = T_{1,2}*T_{2,2} * CALL STRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) @@ -448,27 +448,27 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * V_{3,2}\in\R^{l,l} unit upper triangular * * We will construct the T matrix -* T = |---------------| = |--------| -* |T_{1,1} 0 | |T_1 0 | -* |T_{2,1} T_{2,2}| |T_3 T_2| -* |---------------| |--------| +* T = |---------------| +* |T_{1,1} 0 | +* |T_{2,1} T_{2,2}| +* |---------------| * -* T is the triangular factor attained from block reflectors. -* To motivate the structure, assume we have already computed T_1 -* and T_2. Then collect the associated reflectors in V_1 and V_2 +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 * -* T_1\in\R^{k-l, k-l} non-unit lower triangular -* T_2\in\R^{l, l} non-unit lower triangular -* T_3\in\R^{k-l, l} rectangular +* T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular +* T_{2,2}\in\R^{l, l} non-unit lower triangular +* T_{2,1}\in\R^{k-l, l} rectangular * * Where l = floor(k/2) * * Then, consider the product: * -* (I - V_2T_2V_2')(I - V_1T_1V_1') -* = I - V_2T_2V_2' - V_1T_1V_1' + V_2T_2V_2'V_1T_1V_1' +* (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') +* = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' * -* Define T_3 = -T_2V_2'V_1T_1 +* Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} * * Then, we can define the matrix V as * V = |-------| @@ -476,21 +476,21 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * |-------| * * So, our product is equivalent to the matrix product -* I - VTV' -* This means, we can compute T_1 and T_2, then use this information -* to compute T_3 +* I - V*T*V' +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{2,1} * -* Compute T_1 recursively +* Compute T_{1,1} recursively * CALL SLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) * -* Compute T_2 recursively +* Compute T_{2,2} recursively * CALL SLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, TAU(K-L+1), $ T(K-L+1,K-L+1), LDT) * -* Compute T_3 -* T_3 = V_{2,2}' +* Compute T_{2,1} +* T_{2,1} = V_{2,2}' * DO J = 1, K-L DO I = 1, L @@ -498,28 +498,28 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, END DO END DO * -* T_3 = T_3V_{2,1} +* T_{2,1} = T_{2,1}*V_{2,1} * CALL STRMM('Right', 'Upper', 'No transpose', 'Unit', L, K-L, $ ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) * -* T_3 = V_{2,2}'V_{2,1} + T_3 +* T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL SGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, $ V(1,K-L+1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) * -* At this point, we have that T_3 = V_2'V_1 -* All that is left is to pre and post multiply by -T_2 and T_1 +* At this point, we have that T_{2,1} = V_2'*V_1 +* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} * respectively. * -* T_3 = -T_2T_3 +* T_{2,1} = -T_{2,2}*T_{2,1} * CALL STRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, K-L, $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) * -* T_3 = T_3T_1 +* T_{2,1} = T_{2,1}*T_{1,1} * CALL STRMM('Right', 'Lower', 'No transpose', 'Non-unit', L, $ K-L, ONE, T, LDT, T(K-L+1,1), LDT) @@ -542,27 +542,27 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * V_{2,3}\in\R^{l,l} unit lower triangular * * We will construct the T matrix -* T = |---------------| = |--------| -* |T_{1,1} 0 | |T_1 0 | -* |T_{2,1} T_{2,2}| |T_3 T_2| -* |---------------| |--------| +* T = |---------------| +* |T_{1,1} 0 | +* |T_{2,1} T_{2,2}| +* |---------------| * -* T is the triangular factor attained from block reflectors. -* To motivate the structure, assume we have already computed T_1 -* and T_2. Then collect the associated reflectors in V_1 and V_2 +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 * -* T_1\in\R^{k-l, k-l} non-unit lower triangular -* T_2\in\R^{l, l} non-unit lower triangular -* T_3\in\R^{k-l, l} rectangular +* T_{1,1}\in\R^{k-l, k-l} non-unit lower triangular +* T_{2,2}\in\R^{l, l} non-unit lower triangular +* T_{2,1}\in\R^{k-l, l} rectangular * * Where l = floor(k/2) * * Then, consider the product: * -* (I - V_2'T_2V_2)(I - V_1'T_1V_1) -* = I - V_2'T_2V_2 - V_1'T_1V_1 + V_2'T_2V_2V_1'T_1V_1 +* (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) +* = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 * -* Define T_3 = -T_2V_2V_1'T_1 +* Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} * * Then, we can define the matrix V as * V = |---| @@ -572,49 +572,49 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * So, our product is equivalent to the matrix product * I - V'TV -* This means, we can compute T_1 and T_2, then use this information -* to compute T_3 +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{2,1} * -* Compute T_1 recursively +* Compute T_{1,1} recursively * CALL SLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) * -* Compute T_2 recursively +* Compute T_{2,2} recursively * CALL SLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, TAU(K-L+1), $ T(K-L+1,K-L+1), LDT) * -* Compute T_3 -* T_3 = V_{2,2} +* Compute T_{2,1} +* T_{2,1} = V_{2,2} * CALL SLACPY('All', L, K-L, V(K-L+1,N-K+1), LDV, T(K-L+1,1), $ LDT) * -* T_3 = T_3V_{1,2}' +* T_{2,1} = T_{2,1}*V_{1,2}' * CALL STRMM('Right', 'Lower', 'Transpose', 'Unit', L, K-L, ONE, $ V(1, N-K+1), LDV, T(K-L+1,1), LDT) * -* T_3 = V_{2,1}V_{1,1}' + T_3 +* T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL SGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, $ V(K-L+1,1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) * -* At this point, we have that T_3 = V_2V_1' -* All that is left is to pre and post multiply by -T_2 and T_1 +* At this point, we have that T_{2,1} = V_2*V_1' +* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} * respectively. * -* T_3 = -T_2T_3 +* T_{2,1} = -T_{2,2}*T_{2,1} * CALL STRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, K-L, $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) * -* T_3 = T_3T_1 +* T_{2,1} = T_{2,1}*T_{1,1} * CALL STRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, K-L, $ ONE, T, LDT, T(K-L+1,1), LDT) diff --git a/SRC/zlarft.f b/SRC/zlarft.f index eaada56253..bccb4325e9 100644 --- a/SRC/zlarft.f +++ b/SRC/zlarft.f @@ -235,7 +235,7 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * QR = DIRF.AND.COLV * -* LQ happens when we have Forward direction in row storage +* LQ happens when we have forward direction in row storage * LQ = DIRF.AND.(.NOT.COLV) * @@ -267,27 +267,27 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * V_{3,2}\in\C^{n-k,k-l} rectangular * * We will construct the T matrix -* T = |---------------| = |--------| -* |T_{1,1} T_{1,2}| |T_1 T_3| -* |0 T_{2,2}| |0 T_2| -* |---------------| |--------| +* T = |---------------| +* |T_{1,1} T_{1,2}| +* |0 T_{2,2}| +* |---------------| * -* T is the triangular factor attained from block reflectors. -* To motivate the structure, assume we have already computed T_1 -* and T_2. Then collect the associated reflectors in V_1 and V_2 +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 * -* T_1\in\C^{l, l} upper triangular -* T_2\in\C^{k-l, k-l} upper triangular -* T_3\in\C^{l, k-l} rectangular +* T_{1,1}\in\C^{l, l} upper triangular +* T_{2,2}\in\C^{k-l, k-l} upper triangular +* T_{1,2}\in\C^{l, k-l} rectangular * * Where l = floor(k/2) * * Then, consider the product: * -* (I - V_1T_1V_1')(I - V_2T_2V_2') -* = I - V_1T_1V_1' - V_2T_2V_2' + V_1T_1V_1'V_2T_2V_2' +* (I - V_1*T_{1,1}*V_1')*(I - V_2*T_{2,2}*V_2') +* = I - V_1*T_{1,1}*V_1' - V_2*T_{2,2}*V_2' + V_1*T_{1,1}*V_1'*V_2*T_{2,2}*V_2' * -* Define T_3 = -T_1V_1'V_2T_2 +* Define T_{1,2} = -T_{1,1}*V_1'*V_2*T_{2,2} * * Then, we can define the matrix V as * V = |-------| @@ -295,21 +295,21 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * |-------| * * So, our product is equivalent to the matrix product -* I - VTV' -* This means, we can compute T_1 and T_2, then use this information -* to compute T_3 +* I - V*T*V' +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{1,2} * -* Compute T_1 recursively +* Compute T_{1,1} recursively * CALL ZLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) * -* Compute T_2 recursively +* Compute T_{2,2} recursively * CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, $ TAU(L+1), T(L+1,L+1), LDT) * -* Compute T_3 -* T_3 = V_{2,1}' +* Compute T_{1,2} +* T_{1,2} = V_{2,1}' * DO J = 1, L DO I = 1, K-L @@ -317,28 +317,28 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, END DO END DO * -* T_3 = T_3V_{2,2} +* T_{1,2} = T_{1,2}*V_{2,2} * CALL ZTRMM('Right', 'Lower', 'No transpose', 'Unit', L, K-L, $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) * -* T_3 = V_{3,1}'V_{3,2} + T_3 +* T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL ZGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, $ V(K+1, 1), LDV, V(K+1,L+1), LDV, ONE, T(1, L+1), LDT) * -* At this point, we have that T_3 = V_1'V_2 -* All that is left is to pre and post multiply by -T_1 and T_2 +* At this point, we have that T_{1,2} = V_1'*V_2 +* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} * respectively. * -* T_3 = -T_1T_3 +* T_{1,2} = -T_{1,1}*T_{1,2} * CALL ZTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) * -* T_3 = T_3T_2 +* T_{1,2} = T_{1,2}*T_{2,2} * CALL ZTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) @@ -362,25 +362,25 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * Where l = floor(k/2) * * We will construct the T matrix -* T = |---------------| = |--------| -* |T_{1,1} T_{1,2}| |T_1 T_3| -* |0 T_{2,2}| |0 T_2| -* |---------------| |--------| +* T = |---------------| +* |T_{1,1} T_{1,2}| +* |0 T_{2,2}| +* |---------------| * -* T is the triangular factor attained from block reflectors. -* To motivate the structure, assume we have already computed T_1 -* and T_2. Then collect the associated reflectors in V_1 and V_2 +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 * -* T_1\in\C^{l, l} upper triangular -* T_2\in\C^{k-l, k-l} upper triangular -* T_3\in\C^{l, k-l} rectangular +* T_{1,1}\in\C^{l, l} upper triangular +* T_{2,2}\in\C^{k-l, k-l} upper triangular +* T_{1,2}\in\C^{l, k-l} rectangular * * Then, consider the product: * -* (I - V_1'T_1V_1)(I - V_2'T_2V_2) -* = I - V_1'T_1V_1 - V_2'T_2V_2 + V_1'T_1V_1V_2'T_2V_2 +* (I - V_1'*T_{1,1}*V_1)*(I - V_2'*T_{2,2}*V_2) +* = I - V_1'*T_{1,1}*V_1 - V_2'*T_{2,2}*V_2 + V_1'*T_{1,1}*V_1*V_2'*T_{2,2}*V_2 * -* Define T_3 = -T_1V_1V_2'T_2 +* Define T_{1,2} = -T_{1,1}*V_1*V_2'*T_{2,2} * * Then, we can define the matrix V as * V = |---| @@ -389,48 +389,48 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * |---| * * So, our product is equivalent to the matrix product -* I - V'TV -* This means, we can compute T_1 and T_2, then use this information -* to compute T_3 +* I - V'*T*V +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{1,2} * -* Compute T_1 recursively +* Compute T_{1,1} recursively * CALL ZLARFT(DIRECT, STOREV, N, L, V, LDV, TAU, T, LDT) * -* Compute T_2 recursively +* Compute T_{2,2} recursively * CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, $ TAU(L+1), T(L+1,L+1), LDT) * -* Compute T_3 -* T_3 = V_{1,2} +* Compute T_{1,2} +* T_{1,2} = V_{1,2} * CALL ZLACPY('All', L, K - L, V(1,L+1), LDV, T(1, L+1), LDT) * -* T_3 = T_3V_{2,2}' +* T_{1,2} = T_{1,2}*V_{2,2}' * CALL ZTRMM('Right', 'Upper', 'Conjugate', 'Unit', L, K-L, ONE, $ V(L+1, L+1), LDV, T(1, L+1), LDT) * -* T_3 = V_{1,3}V_{2,3}' + T_3 +* T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL ZGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, T(1, L+1), LDT) * -* At this point, we have that T_3 = V_1V_2' -* All that is left is to pre and post multiply by -T_1 and T_2 +* At this point, we have that T_{1,2} = V_1*V_2' +* All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} * respectively. * -* T_3 = -T_1T_3 +* T_{1,2} = -T_{1,1}*T_{1,2} * CALL ZTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, K-L, $ NEG_ONE, T, LDT, T(1, L+1), LDT) * -* T_3 = T_3T_2 +* T_{1,2} = T_{1,2}*T_{2,2} * CALL ZTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) @@ -452,27 +452,27 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * V_{3,2}\in\C^{l,l} unit upper triangular * * We will construct the T matrix -* T = |---------------| = |--------| -* |T_{1,1} 0 | |T_1 0 | -* |T_{2,1} T_{2,2}| |T_3 T_2| -* |---------------| |--------| +* T = |---------------| +* |T_{1,1} 0 | +* |T_{2,1} T_{2,2}| +* |---------------| * -* T is the triangular factor attained from block reflectors. -* To motivate the structure, assume we have already computed T_1 -* and T_2. Then collect the associated reflectors in V_1 and V_2 +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 * -* T_1\in\C^{k-l, k-l} non-unit lower triangular -* T_2\in\C^{l, l} non-unit lower triangular -* T_3\in\C^{k-l, l} rectangular +* T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular +* T_{2,2}\in\C^{l, l} non-unit lower triangular +* T_{2,1}\in\C^{k-l, l} rectangular * * Where l = floor(k/2) * * Then, consider the product: * -* (I - V_2T_2V_2')(I - V_1T_1V_1') -* = I - V_2T_2V_2' - V_1T_1V_1' + V_2T_2V_2'V_1T_1V_1' +* (I - V_2*T_{2,2}*V_2')*(I - V_1*T_{1,1}*V_1') +* = I - V_2*T_{2,2}*V_2' - V_1*T_{1,1}*V_1' + V_2*T_{2,2}*V_2'*V_1*T_{1,1}*V_1' * -* Define T_3 = -T_2V_2'V_1T_1 +* Define T_{2,1} = -T_{2,2}*V_2'*V_1*T_{1,1} * * Then, we can define the matrix V as * V = |-------| @@ -480,21 +480,21 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * |-------| * * So, our product is equivalent to the matrix product -* I - VTV' -* This means, we can compute T_1 and T_2, then use this information -* to compute T_3 +* I - V*T*V' +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{2,1} * -* Compute T_1 recursively +* Compute T_{1,1} recursively * CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) * -* Compute T_2 recursively +* Compute T_{2,2} recursively * CALL ZLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, TAU(K-L+1), $ T(K-L+1,K-L+1), LDT) * -* Compute T_3 -* T_3 = V_{2,2}' +* Compute T_{2,1} +* T_{2,1} = V_{2,2}' * DO J = 1, K-L DO I = 1, L @@ -502,28 +502,28 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, END DO END DO * -* T_3 = T_3V_{2,1} +* T_{2,1} = T_{2,1}*V_{2,1} * CALL ZTRMM('Right', 'Upper', 'No transpose', 'Unit', L, K-L, $ ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) * -* T_3 = V_{2,2}'V_{2,1} + T_3 +* T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL ZGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, $ V(1,K-L+1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) * -* At this point, we have that T_3 = V_2'V_1 -* All that is left is to pre and post multiply by -T_2 and T_1 +* At this point, we have that T_{2,1} = V_2'*V_1 +* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} * respectively. * -* T_3 = -T_2T_3 +* T_{2,1} = -T_{2,2}*T_{2,1} * CALL ZTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, K-L, $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) * -* T_3 = T_3T_1 +* T_{2,1} = T_{2,1}*T_{1,1} * CALL ZTRMM('Right', 'Lower', 'No transpose', 'Non-unit', L, $ K-L, ONE, T, LDT, T(K-L+1,1), LDT) @@ -546,27 +546,27 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * V_{2,3}\in\C^{l,l} unit lower triangular * * We will construct the T matrix -* T = |---------------| = |--------| -* |T_{1,1} 0 | |T_1 0 | -* |T_{2,1} T_{2,2}| |T_3 T_2| -* |---------------| |--------| +* T = |---------------| +* |T_{1,1} 0 | +* |T_{2,1} T_{2,2}| +* |---------------| * -* T is the triangular factor attained from block reflectors. -* To motivate the structure, assume we have already computed T_1 -* and T_2. Then collect the associated reflectors in V_1 and V_2 +* T is the triangular factor obtained from block reflectors. +* To motivate the structure, assume we have already computed T_{1,1} +* and T_{2,2}. Then collect the associated reflectors in V_1 and V_2 * -* T_1\in\C^{k-l, k-l} non-unit lower triangular -* T_2\in\C^{l, l} non-unit lower triangular -* T_3\in\C^{k-l, l} rectangular +* T_{1,1}\in\C^{k-l, k-l} non-unit lower triangular +* T_{2,2}\in\C^{l, l} non-unit lower triangular +* T_{2,1}\in\C^{k-l, l} rectangular * * Where l = floor(k/2) * * Then, consider the product: * -* (I - V_2'T_2V_2)(I - V_1'T_1V_1) -* = I - V_2'T_2V_2 - V_1'T_1V_1 + V_2'T_2V_2V_1'T_1V_1 +* (I - V_2'*T_{2,2}*V_2)*(I - V_1'*T_{1,1}*V_1) +* = I - V_2'*T_{2,2}*V_2 - V_1'*T_{1,1}*V_1 + V_2'*T_{2,2}*V_2*V_1'*T_{1,1}*V_1 * -* Define T_3 = -T_2V_2V_1'T_1 +* Define T_{2,1} = -T_{2,2}*V_2*V_1'*T_{1,1} * * Then, we can define the matrix V as * V = |---| @@ -575,50 +575,50 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * |---| * * So, our product is equivalent to the matrix product -* I - V'TV -* This means, we can compute T_1 and T_2, then use this information -* to compute T_3 +* I - V'*T*V +* This means, we can compute T_{1,1} and T_{2,2}, then use this information +* to compute T_{2,1} * -* Compute T_1 recursively +* Compute T_{1,1} recursively * CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V, LDV, TAU, T, LDT) * -* Compute T_2 recursively +* Compute T_{2,2} recursively * CALL ZLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, TAU(K-L+1), $ T(K-L+1,K-L+1), LDT) * -* Compute T_3 -* T_3 = V_{2,2} +* Compute T_{2,1} +* T_{2,1} = V_{2,2} * CALL ZLACPY('All', L, K-L, V(K-L+1,N-K+1), LDV, T(K-L+1,1), $ LDT) * -* T_3 = T_3V_{1,2}' +* T_{2,1} = T_{2,1}*V_{1,2}' * CALL ZTRMM('Right', 'Lower', 'Conjugate', 'Unit', L, K-L, ONE, $ V(1, N-K+1), LDV, T(K-L+1,1), LDT) * -* T_3 = V_{2,1}V_{1,1}' + T_3 +* T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL ZGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, $ V(K-L+1,1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) * -* At this point, we have that T_3 = V_2V_1' -* All that is left is to pre and post multiply by -T_2 and T_1 +* At this point, we have that T_{2,1} = V_2*V_1' +* All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} * respectively. * -* T_3 = -T_2T_3 +* T_{2,1} = -T_{2,2}*T_{2,1} * CALL ZTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, K-L, $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) * -* T_3 = T_3T_1 +* T_{2,1} = T_{2,1}*T_{1,1} * CALL ZTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, K-L, $ ONE, T, LDT, T(K-L+1,1), LDT) From 354a16f22f984ce1ff161657c4db9b7ad352b136 Mon Sep 17 00:00:00 2001 From: Johnathan Rhyne Date: Sat, 30 Nov 2024 08:49:18 -0500 Subject: [PATCH 06/10] Removed mod files and extranous file changes (hopefully) --- SRC/clarft.f | 44 +++++++++++++++++++++---------------------- SRC/dgelqf.f | 4 ++-- SRC/dgeqlf.f | 4 ++-- SRC/dgerqf.f | 3 +-- SRC/dlarft.f | 44 +++++++++++++++++++++---------------------- SRC/dorglq.f | 3 ++- SRC/dorgql.f | 2 +- SRC/dorgrq.f | 4 ++-- SRC/dormlq.f | 2 +- SRC/dormql.f | 4 ++-- SRC/dormqr.f | 3 ++- SRC/la_constants.mod | Bin 1563 -> 0 bytes SRC/la_xisnan.mod | Bin 321 -> 0 bytes SRC/slarft.f | 2 +- 14 files changed, 60 insertions(+), 59 deletions(-) delete mode 100644 SRC/la_constants.mod delete mode 100644 SRC/la_xisnan.mod diff --git a/SRC/clarft.f b/SRC/clarft.f index 08ef9cc224..204903be46 100644 --- a/SRC/clarft.f +++ b/SRC/clarft.f @@ -159,8 +159,8 @@ *> \endverbatim *> * ===================================================================== - RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, - $ LDT ) + RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, + $ TAU, T, LDT ) * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -319,8 +319,8 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{1,2} = T_{1,2}*V_{2,2} * - CALL CTRMM('Right', 'Lower', 'No transpose', 'Unit', L, K-L, - $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + CALL CTRMM('Right', 'Lower', 'No transpose', 'Unit', L, + $ K-L, ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) * * T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} @@ -410,8 +410,8 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{1,2} = T_{1,2}*V_{2,2}' * - CALL CTRMM('Right', 'Upper', 'Conjugate', 'Unit', L, K-L, ONE, - $ V(L+1, L+1), LDV, T(1, L+1), LDT) + CALL CTRMM('Right', 'Upper', 'Conjugate', 'Unit', L, K-L, + $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) * * T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} @@ -426,8 +426,8 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{1,2} = -T_{1,1}*T_{1,2} * - CALL CTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, K-L, - $ NEG_ONE, T, LDT, T(1, L+1), LDT) + CALL CTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) * * T_{1,2} = T_{1,2}*T_{2,2} @@ -490,8 +490,8 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * Compute T_{2,2} recursively * - CALL CLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, TAU(K-L+1), - $ T(K-L+1,K-L+1), LDT) + CALL CLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, + $ TAU(K-L+1), T(K-L+1,K-L+1), LDT) * * Compute T_{2,1} * T_{2,1} = V_{2,2}' @@ -504,8 +504,8 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{2,1} = T_{2,1}*V_{2,1} * - CALL CTRMM('Right', 'Upper', 'No transpose', 'Unit', L, K-L, - $ ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) + CALL CTRMM('Right', 'Upper', 'No transpose', 'Unit', L, + $ K-L, ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) * * T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} @@ -520,8 +520,8 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{2,1} = -T_{2,2}*T_{2,1} * - CALL CTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, K-L, - $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + CALL CTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) * * T_{2,1} = T_{2,1}*T_{1,1} * @@ -585,8 +585,8 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * Compute T_{2,2} recursively * - CALL CLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, TAU(K-L+1), - $ T(K-L+1,K-L+1), LDT) + CALL CLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, + $ TAU(K-L+1), T(K-L+1,K-L+1), LDT) * * Compute T_{2,1} * T_{2,1} = V_{2,2} @@ -597,8 +597,8 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{2,1} = T_{2,1}*V_{1,2}' * - CALL CTRMM('Right', 'Lower', 'Conjugate', 'Unit', L, K-L, ONE, - $ V(1, N-K+1), LDV, T(K-L+1,1), LDT) + CALL CTRMM('Right', 'Lower', 'Conjugate', 'Unit', L, K-L, + $ ONE, V(1, N-K+1), LDV, T(K-L+1,1), LDT) * * T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} @@ -614,13 +614,13 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{2,1} = -T_{2,2}*T_{2,1} * - CALL CTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, K-L, - $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + CALL CTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, + $ K-L, NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) * * T_{2,1} = T_{2,1}*T_{1,1} * - CALL CTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, K-L, - $ ONE, T, LDT, T(K-L+1,1), LDT) + CALL CTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, + $ K-L, ONE, T, LDT, T(K-L+1,1), LDT) END IF END SUBROUTINE diff --git a/SRC/dgelqf.f b/SRC/dgelqf.f index f0eb00a55d..7d146b0e1e 100644 --- a/SRC/dgelqf.f +++ b/SRC/dgelqf.f @@ -251,8 +251,8 @@ SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), - $ LDA, TAU( I ), WORK, LDWORK ) + CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, + $ I ), LDA, TAU( I ), WORK, LDWORK ) * * Apply H to A(i+ib:m,i:n) from the right * diff --git a/SRC/dgeqlf.f b/SRC/dgeqlf.f index 7da963aeaf..8cc69cdb1f 100644 --- a/SRC/dgeqlf.f +++ b/SRC/dgeqlf.f @@ -256,8 +256,8 @@ SUBROUTINE DGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL DLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, - $ IB, A( 1, N-K+I ), LDA, TAU( I ), WORK, + CALL DLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, $ LDWORK ) * * Apply H**T to A(1:m-k+i+ib-1,1:n-k+i-1) from the left diff --git a/SRC/dgerqf.f b/SRC/dgerqf.f index 8760ee04b3..94e90bca10 100644 --- a/SRC/dgerqf.f +++ b/SRC/dgerqf.f @@ -257,8 +257,7 @@ SUBROUTINE DGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * H = H(i+ib-1) . . . H(i+1) H(i) * CALL DLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, - $ A( M-K+I, 1 ), LDA, TAU( I ), WORK, - $ LDWORK ) + $ A( M-K+I, 1 ), LDA, TAU( I ), WORK, LDWORK ) * * Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right * diff --git a/SRC/dlarft.f b/SRC/dlarft.f index 66b8c3d922..679de121bf 100644 --- a/SRC/dlarft.f +++ b/SRC/dlarft.f @@ -159,8 +159,8 @@ *> \endverbatim *> * ===================================================================== - RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, - $ LDT ) + RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, + $ TAU, T, LDT ) * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- @@ -315,8 +315,8 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{1,2} = T_{1,2}*V_{2,2} * - CALL DTRMM('Right', 'Lower', 'No transpose', 'Unit', L, K-L, - $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + CALL DTRMM('Right', 'Lower', 'No transpose', 'Unit', L, + $ K-L, ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) * * T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} @@ -406,8 +406,8 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{1,2} = T_{1,2}*V_{2,2}' * - CALL DTRMM('Right', 'Upper', 'Transpose', 'Unit', L, K-L, ONE, - $ V(L+1, L+1), LDV, T(1, L+1), LDT) + CALL DTRMM('Right', 'Upper', 'Transpose', 'Unit', L, K-L, + $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) * * T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} @@ -422,8 +422,8 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{1,2} = -T_{1,1}*T_{1,2} * - CALL DTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, K-L, - $ NEG_ONE, T, LDT, T(1, L+1), LDT) + CALL DTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) * * T_{1,2} = T_{1,2}*T_{2,2} @@ -486,8 +486,8 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * Compute T_{2,2} recursively * - CALL DLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, TAU(K-L+1), - $ T(K-L+1,K-L+1), LDT) + CALL DLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, + $ TAU(K-L+1), T(K-L+1,K-L+1), LDT) * * Compute T_{2,1} * T_{2,1} = V_{2,2}' @@ -500,8 +500,8 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{2,1} = T_{2,1}*V_{2,1} * - CALL DTRMM('Right', 'Upper', 'No transpose', 'Unit', L, K-L, - $ ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) + CALL DTRMM('Right', 'Upper', 'No transpose', 'Unit', L, + $ K-L, ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) * * T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} @@ -516,8 +516,8 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{2,1} = -T_{2,2}*T_{2,1} * - CALL DTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, K-L, - $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + CALL DTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) * * T_{2,1} = T_{2,1}*T_{1,1} * @@ -581,8 +581,8 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * Compute T_{2,2} recursively * - CALL DLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, TAU(K-L+1), - $ T(K-L+1,K-L+1), LDT) + CALL DLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, + $ TAU(K-L+1), T(K-L+1,K-L+1), LDT) * * Compute T_{2,1} * T_{2,1} = V_{2,2} @@ -593,8 +593,8 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{2,1} = T_{2,1}*V_{1,2}' * - CALL DTRMM('Right', 'Lower', 'Transpose', 'Unit', L, K-L, ONE, - $ V(1, N-K+1), LDV, T(K-L+1,1), LDT) + CALL DTRMM('Right', 'Lower', 'Transpose', 'Unit', L, K-L, + $ ONE, V(1, N-K+1), LDV, T(K-L+1,1), LDT) * * T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} @@ -610,13 +610,13 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{2,1} = -T_{2,2}*T_{2,1} * - CALL DTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, K-L, - $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + CALL DTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, + $ K-L, NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) * * T_{2,1} = T_{2,1}*T_{1,1} * - CALL DTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, K-L, - $ ONE, T, LDT, T(K-L+1,1), LDT) + CALL DTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, + $ K-L, ONE, T, LDT, T(K-L+1,1), LDT) END IF END SUBROUTINE diff --git a/SRC/dorglq.f b/SRC/dorglq.f index 47edfe4eda..a1a1147b8d 100644 --- a/SRC/dorglq.f +++ b/SRC/dorglq.f @@ -253,7 +253,8 @@ SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, + $ I ), $ LDA, TAU( I ), WORK, LDWORK ) * * Apply H**T to A(i+ib:m,i:n) from the right diff --git a/SRC/dorgql.f b/SRC/dorgql.f index 8ac4cbf003..f931f5a9c8 100644 --- a/SRC/dorgql.f +++ b/SRC/dorgql.f @@ -260,7 +260,7 @@ SUBROUTINE DORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL DLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + CALL DLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) * * Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left diff --git a/SRC/dorgrq.f b/SRC/dorgrq.f index 54e109b492..c805484578 100644 --- a/SRC/dorgrq.f +++ b/SRC/dorgrq.f @@ -261,8 +261,8 @@ SUBROUTINE DORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL DLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, - $ A( II, 1 ), LDA, TAU( I ), WORK, LDWORK) + CALL DLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( II, 1 ), LDA, TAU( I ), WORK, LDWORK ) * * Apply H**T to A(1:m-k+i-1,1:n-k+i+ib-1) from the right * diff --git a/SRC/dormlq.f b/SRC/dormlq.f index ac6f931047..85ca134737 100644 --- a/SRC/dormlq.f +++ b/SRC/dormlq.f @@ -316,7 +316,7 @@ SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL DLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), + CALL DLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), $ LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * diff --git a/SRC/dormql.f b/SRC/dormql.f index 9020c6abd9..11022d78c6 100644 --- a/SRC/dormql.f +++ b/SRC/dormql.f @@ -310,8 +310,8 @@ SUBROUTINE DORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL DLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, - $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT) + CALL DLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, + $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * * H or H**T is applied to C(1:m-k+i+ib-1,1:n) diff --git a/SRC/dormqr.f b/SRC/dormqr.f index 7793103b33..a9f8ba2279 100644 --- a/SRC/dormqr.f +++ b/SRC/dormqr.f @@ -309,7 +309,8 @@ SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, * Form the triangular factor of the block reflector * H = H(i) H(i+1) . . . H(i+ib-1) * - CALL DLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), + CALL DLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, + $ I ), $ LDA, TAU( I ), WORK( IWT ), LDT ) IF( LEFT ) THEN * diff --git a/SRC/la_constants.mod b/SRC/la_constants.mod deleted file mode 100644 index b8006a566979124de13911e5efcec9baedca2f6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1563 zcmV+$2ITo4iwFP!000001La%abD~HPe&?^4H_U@u12dq$t!3Cf>k>>Eb9Z|$78@{2 zoxvQC+b#e4>lqk8;-Hp5q>}7bXyr$L-80kQbT?nW%;t;v*hbGA_jT=||GY5rHvv*L zLq(6F=fs|iR^jFut)1oLCWvAuh@(%d5mtJ=D^$lnMI2lyaJc8cj?^W4Hs}^FeYy;( ziqxJT#GxNV{^J!$D$-D|cQ>C+tm}ulg&yqR?b)Ape`+r*d!Y!Coglu;{$lw3@?cLE z)0vH|8|xOX7~h~fxQ{ZP&&S`EvxkK>UtU{xmVIs6lW&Q>a|_O*{Bn%A01&v40Mc`i zSix5cDZ?0E4ve831}VXyNPYDDwI4j4s6K)}^QnD5op1n3C4gfMuk6v77Bohnu>l(Y zaMml(usDcwg~#*ptp!giD_pd9rtb9i?q)ifF8cT5KP>(L{;bIiKDYkL2857{swtD1 zeZLsPoe!L@;!i`&e?DWCNjR6-JmJW8L7D(^6L_7%sHLd1BQXvD<4fqR{9nz&{0;6h zVK>o)=GiB`UXD*;_x=hWirFLN6rjd|6l_y4P4N**`4P!A8d4|x4rq@lhG2@V7j8P` zZ+zmI9wF8J?mq>u&yB*YUI7dKmYoP-C@%hd)+v#?PJ;_v&wu(6Up%VeE-FoqNn@B^ zqOKw8YZZP1-r=q`ui?dmqDQ+4IEdSLC~Uw_SPlvJi0eQ39K=P^5v|NeG$15y)3Le^ zCum2HD_|e)OQU#1Y!$&}H`cz1Mb)RPLphWIgYuC`#stevKvn9in!_rb%9dK6(R$n&^IWER7B4D?jL!}Wa zN5u$6^t|pMW$)`$zGmo*Wkdk%;!%|sLQ*jV84rm&I0UO!cb!p@*t`72!a0^)>5P%Y zKSJ;F2_rcg6Jr)}2j>%n;DH-VZZqzflnAIlD60>#7j{!!QGZxgAA)S?xc7kM5F=^N#S(BJpxXj$k%X#(S&wNC3SHgOB+g)D zp`Fr5UF3Qs2gB$>J0+32sE?d57>qEqQVKZ}i*u0187$tnR0BB^3^@qKWL%(~;zwQc zUz5*&lW~DoiXJtAsc;PA0j*R!>hep4TNnvwr^<0An8W1_69~YzwGMu zL%h&}ky&`}1EWFq>H|i@!W#k@jk3miFnQ8u&l06@tYxj-Am~{WGzdiW;=-|!bg{yb znKyy5TMhC)NRWnk{~$=CLfaiiSr=aNZPLBQ`6NzTzEb87-;u1R2n3q-;(%a?9u_z@ z^X?Cj28!sL0AVQF4d8er3iTNAqP?&Xt9I`WAo$Es^87wVJl&t>$3fCXf*FoUmY@ZJ zW*JTpj5I|E=Hcl={^TUvmU)V2tTMZa6i8wjosSB) diff --git a/SRC/la_xisnan.mod b/SRC/la_xisnan.mod deleted file mode 100644 index 1b5610476a459fda31282807dce61ffd51c6d396..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 321 zcmV-H0lxkpiwFP!000001I3fcYQr!PhVOccxye4HC|(lx=z0mJwp4`O(o-?D3xUK2 zyDfSAYKfPmY3U(QB!Ncr!+iQR+Qf|K+^4W_tB+j`pLN%7+XnE`#qiqI<$GPhTi5Pk zSH2v!eN#3!@hw4yVZt>g8a5^pTd@o*3aL66@K_Ur1@@>JmIb)FAjKG#U>Faa1yNY= z2nu>n*%MtPG>>Qw84HUvTF5+vQIMiz9*7umL4!PDRAZ%VlLUSWj>-`jV!^A<3|Hx_ z=`uoPqSYzScQ%c14mO06Y=oM@X{I@?978ObWUL(}lf??j2HgLjS8K`RDV=@hl?(Ww zIK*@W%A?N{|L%8l;DR|QEVArbqwq-7^^P1W_{kj2;LRA;*JN=1&LA6lY-b``YW_?F TR@3_t@fFY)*n0O@xdQ+IFY=W< diff --git a/SRC/slarft.f b/SRC/slarft.f index 449c4b5a75..aa508de2dd 100644 --- a/SRC/slarft.f +++ b/SRC/slarft.f @@ -160,7 +160,7 @@ *> * ===================================================================== RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, - $ LDT ) + $ TAU, T, LDT ) * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, -- From 273ab49035a92f06324c0cde4b2be2f725b1d15d Mon Sep 17 00:00:00 2001 From: Johnathan Rhyne Date: Sat, 30 Nov 2024 08:51:18 -0500 Subject: [PATCH 07/10] removed extranous changes (hopefully x2) --- SRC/dgelqf.f | 3 ++- SRC/dgeqlf.f | 3 +-- SRC/dgerqf.f | 2 +- SRC/dorglq.f | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/SRC/dgelqf.f b/SRC/dgelqf.f index 7d146b0e1e..77ecbfd787 100644 --- a/SRC/dgelqf.f +++ b/SRC/dgelqf.f @@ -252,7 +252,8 @@ SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * H = H(i) H(i+1) . . . H(i+ib-1) * CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, - $ I ), LDA, TAU( I ), WORK, LDWORK ) + $ I ), + $ LDA, TAU( I ), WORK, LDWORK ) * * Apply H to A(i+ib:m,i:n) from the right * diff --git a/SRC/dgeqlf.f b/SRC/dgeqlf.f index 8cc69cdb1f..d472e3365e 100644 --- a/SRC/dgeqlf.f +++ b/SRC/dgeqlf.f @@ -257,8 +257,7 @@ SUBROUTINE DGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * H = H(i+ib-1) . . . H(i+1) H(i) * CALL DLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, - $ A( 1, N-K+I ), LDA, TAU( I ), WORK, - $ LDWORK ) + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) * * Apply H**T to A(1:m-k+i+ib-1,1:n-k+i-1) from the left * diff --git a/SRC/dgerqf.f b/SRC/dgerqf.f index 94e90bca10..8cabdc36ee 100644 --- a/SRC/dgerqf.f +++ b/SRC/dgerqf.f @@ -256,7 +256,7 @@ SUBROUTINE DGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * Form the triangular factor of the block reflector * H = H(i+ib-1) . . . H(i+1) H(i) * - CALL DLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + CALL DLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, $ A( M-K+I, 1 ), LDA, TAU( I ), WORK, LDWORK ) * * Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right diff --git a/SRC/dorglq.f b/SRC/dorglq.f index a1a1147b8d..c41367ced4 100644 --- a/SRC/dorglq.f +++ b/SRC/dorglq.f @@ -254,7 +254,7 @@ SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) * H = H(i) H(i+1) . . . H(i+ib-1) * CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, - $ I ), + $ I ), $ LDA, TAU( I ), WORK, LDWORK ) * * Apply H**T to A(i+ib:m,i:n) from the right From d4741c8a574386765267d69e5a77fa541a085d04 Mon Sep 17 00:00:00 2001 From: Johnathan Rhyne Date: Sat, 30 Nov 2024 08:52:03 -0500 Subject: [PATCH 08/10] removed all extranous changes --- SRC/dgelqf.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SRC/dgelqf.f b/SRC/dgelqf.f index 77ecbfd787..03bbb8e1e3 100644 --- a/SRC/dgelqf.f +++ b/SRC/dgelqf.f @@ -252,7 +252,7 @@ SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) * H = H(i) H(i+1) . . . H(i+ib-1) * CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, - $ I ), + $ I ), $ LDA, TAU( I ), WORK, LDWORK ) * * Apply H to A(i+ib:m,i:n) from the right From db48820da796f8c54baf375139725a8d8101fd3c Mon Sep 17 00:00:00 2001 From: Johnathan Rhyne Date: Sat, 30 Nov 2024 11:05:04 -0500 Subject: [PATCH 09/10] lowered line length to hopefully fix build failures in the CI --- SRC/clarft.f | 47 ++++++++++++++++-------------- SRC/dlarft.f | 62 +++++++++++++++++++++------------------ SRC/slarft.f | 82 ++++++++++++++++++++++++++++------------------------ SRC/zlarft.f | 80 ++++++++++++++++++++++++++------------------------ 4 files changed, 147 insertions(+), 124 deletions(-) diff --git a/SRC/clarft.f b/SRC/clarft.f index 204903be46..de8b97bf9c 100644 --- a/SRC/clarft.f +++ b/SRC/clarft.f @@ -305,15 +305,15 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, * * Compute T_{2,2} recursively * - CALL CLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, - $ TAU(L+1), T(L+1,L+1), LDT) + CALL CLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, + $ TAU(L+1), T(L+1, L+1), LDT) * * Compute T_{1,2} * T_{1,2} = V_{2,1}' * DO J = 1, L DO I = 1, K-L - T(J,L+I) = CONJG(V(L+I,J)) + T(J, L+I) = CONJG(V(L+I, J)) END DO END DO * @@ -327,7 +327,8 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL CGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, - $ V(K+1, 1), LDV, V(K+1,L+1), LDV, ONE, T(1, L+1), LDT) + $ V(K+1, 1), LDV, V(K+1, L+1), LDV, ONE, T(1, L+1), + $ LDT) * * At this point, we have that T_{1,2} = V_1'*V_2 * All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} @@ -341,7 +342,7 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, * T_{1,2} = T_{1,2}*T_{2,2} * CALL CTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, - $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + $ K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT) ELSE IF(LQ) THEN * @@ -399,14 +400,14 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, * * Compute T_{2,2} recursively * - CALL CLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, - $ TAU(L+1), T(L+1,L+1), LDT) + CALL CLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, + $ TAU(L+1), T(L+1, L+1), LDT) * * Compute T_{1,2} * T_{1,2} = V_{1,2} * - CALL CLACPY('All', L, K - L, V(1,L+1), LDV, T(1, L+1), LDT) + CALL CLACPY('All', L, K-L, V(1, L+1), LDV, T(1, L+1), LDT) * * T_{1,2} = T_{1,2}*V_{2,2}' * @@ -491,28 +492,29 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, * Compute T_{2,2} recursively * CALL CLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, - $ TAU(K-L+1), T(K-L+1,K-L+1), LDT) + $ TAU(K-L+1), T(K-L+1, K-L+1), LDT) * * Compute T_{2,1} * T_{2,1} = V_{2,2}' * DO J = 1, K-L DO I = 1, L - T(K-L+I,J) = CONJG(V(N-K+J, K-L+I)) + T(K-L+I, J) = CONJG(V(N-K+J, K-L+I)) END DO END DO * * T_{2,1} = T_{2,1}*V_{2,1} * CALL CTRMM('Right', 'Upper', 'No transpose', 'Unit', L, - $ K-L, ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) + $ K-L, ONE, V(N-K+1, 1), LDV, T(K-L+1, 1), LDT) * * T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL CGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, - $ V(1,K-L+1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) + $ V(1, K-L+1), LDV, V, LDV, ONE, T(K-L+1, 1), + $ LDT) * * At this point, we have that T_{2,1} = V_2'*V_1 * All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} @@ -521,12 +523,13 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, * T_{2,1} = -T_{2,2}*T_{2,1} * CALL CTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, - $ K-L, NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + $ K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, + $ T(K-L+1, 1), LDT) * * T_{2,1} = T_{2,1}*T_{1,1} * CALL CTRMM('Right', 'Lower', 'No transpose', 'Non-unit', L, - $ K-L, ONE, T, LDT, T(K-L+1,1), LDT) + $ K-L, ONE, T, LDT, T(K-L+1, 1), LDT) ELSE * * Else means RQ case @@ -586,26 +589,27 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, * Compute T_{2,2} recursively * CALL CLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, - $ TAU(K-L+1), T(K-L+1,K-L+1), LDT) + $ TAU(K-L+1), T(K-L+1, K-L+1), LDT) * * Compute T_{2,1} * T_{2,1} = V_{2,2} * - CALL CLACPY('All', L, K-L, V(K-L+1,N-K+1), LDV, T(K-L+1,1), - $ LDT) + CALL CLACPY('All', L, K-L, V(K-L+1, N-K+1), LDV, + $ T(K-L+1, 1), LDT) * * T_{2,1} = T_{2,1}*V_{1,2}' * CALL CTRMM('Right', 'Lower', 'Conjugate', 'Unit', L, K-L, - $ ONE, V(1, N-K+1), LDV, T(K-L+1,1), LDT) + $ ONE, V(1, N-K+1), LDV, T(K-L+1,1), LDT) * * T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL CGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, - $ V(K-L+1,1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) + $ V(K-L+1, 1), LDV, V, LDV, ONE, T(K-L+1, 1), + $ LDT) * * At this point, we have that T_{2,1} = V_2*V_1' @@ -615,12 +619,13 @@ RECURSIVE SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, * T_{2,1} = -T_{2,2}*T_{2,1} * CALL CTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, - $ K-L, NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + $ K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, + $ T(K-L+1, 1), LDT) * * T_{2,1} = T_{2,1}*T_{1,1} * CALL CTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, - $ K-L, ONE, T, LDT, T(K-L+1,1), LDT) + $ K-L, ONE, T, LDT, T(K-L+1, 1), LDT) END IF END SUBROUTINE diff --git a/SRC/dlarft.f b/SRC/dlarft.f index 679de121bf..c27bb1a806 100644 --- a/SRC/dlarft.f +++ b/SRC/dlarft.f @@ -301,29 +301,30 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, * * Compute T_{2,2} recursively * - CALL DLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, - $ TAU(L+1), T(L+1,L+1), LDT) + CALL DLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, + $ TAU(L+1), T(L+1, L+1), LDT) * * Compute T_{1,2} * T_{1,2} = V_{2,1}' * DO J = 1, L DO I = 1, K-L - T(J,L+I) = V(L+I,J) + T(J, L+I) = V(L+I, J) END DO END DO * * T_{1,2} = T_{1,2}*V_{2,2} * CALL DTRMM('Right', 'Lower', 'No transpose', 'Unit', L, - $ K-L, ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + $ K-L, ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) * * T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL DGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, - $ V(K+1, 1), LDV, V(K+1,L+1), LDV, ONE, T(1, L+1), LDT) + $ V(K+1, 1), LDV, V(K+1, L+1), LDV, ONE, + $ T(1, L+1), LDT) * * At this point, we have that T_{1,2} = V_1'*V_2 * All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} @@ -332,12 +333,12 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, * T_{1,2} = -T_{1,1}*T_{1,2} * CALL DTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, - $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) * * T_{1,2} = T_{1,2}*T_{2,2} * CALL DTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, - $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + $ K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT) ELSE IF(LQ) THEN * @@ -395,26 +396,27 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, * * Compute T_{2,2} recursively * - CALL DLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, - $ TAU(L+1), T(L+1,L+1), LDT) + CALL DLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, + $ TAU(L+1), T(L+1, L+1), LDT) * * Compute T_{1,2} * T_{1,2} = V_{1,2} * - CALL DLACPY('All', L, K - L, V(1,L+1), LDV, T(1, L+1), LDT) + CALL DLACPY('All', L, K-L, V(1, L+1), LDV, T(1, L+1), LDT) * * T_{1,2} = T_{1,2}*V_{2,2}' * CALL DTRMM('Right', 'Upper', 'Transpose', 'Unit', L, K-L, - $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) * * T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL DGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, - $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, T(1, L+1), LDT) + $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, + $ T(1, L+1), LDT) * * At this point, we have that T_{1,2} = V_1*V_2' * All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} @@ -423,13 +425,13 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, * T_{1,2} = -T_{1,1}*T_{1,2} * CALL DTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, - $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) * * T_{1,2} = T_{1,2}*T_{2,2} * CALL DTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, - $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + $ K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT) ELSE IF(QL) THEN * * Break V apart into 6 components @@ -487,28 +489,29 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, * Compute T_{2,2} recursively * CALL DLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, - $ TAU(K-L+1), T(K-L+1,K-L+1), LDT) + $ TAU(K-L+1), T(K-L+1, K-L+1), LDT) * * Compute T_{2,1} * T_{2,1} = V_{2,2}' * DO J = 1, K-L DO I = 1, L - T(K-L+I,J) = V(N-K+J, K-L+I) + T(K-L+I, J) = V(N-K+J, K-L+I) END DO END DO * * T_{2,1} = T_{2,1}*V_{2,1} * CALL DTRMM('Right', 'Upper', 'No transpose', 'Unit', L, - $ K-L, ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) + $ K-L, ONE, V(N-K+1, 1), LDV, T(K-L+1, 1), LDT) * * T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL DGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, - $ V(1,K-L+1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) + $ V(1, K-L+1), LDV, V, LDV, ONE, T(K-L+1, 1), + $ LDT) * * At this point, we have that T_{2,1} = V_2'*V_1 * All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} @@ -517,12 +520,13 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, * T_{2,1} = -T_{2,2}*T_{2,1} * CALL DTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, - $ K-L, NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + $ K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, + $ T(K-L+1, 1), LDT) * * T_{2,1} = T_{2,1}*T_{1,1} * CALL DTRMM('Right', 'Lower', 'No transpose', 'Non-unit', L, - $ K-L, ONE, T, LDT, T(K-L+1,1), LDT) + $ K-L, ONE, T, LDT, T(K-L+1, 1), LDT) ELSE * * Else means RQ case @@ -581,27 +585,28 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, * * Compute T_{2,2} recursively * - CALL DLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, - $ TAU(K-L+1), T(K-L+1,K-L+1), LDT) + CALL DLARFT(DIRECT, STOREV, N, L, V(K-L+1, 1), LDV, + $ TAU(K-L+1), T(K-L+1, K-L+1), LDT) * * Compute T_{2,1} * T_{2,1} = V_{2,2} * - CALL DLACPY('All', L, K-L, V(K-L+1,N-K+1), LDV, T(K-L+1,1), - $ LDT) + CALL DLACPY('All', L, K-L, V(K-L+1, N-K+1), LDV, + $ T(K-L+1, 1), LDT) * * T_{2,1} = T_{2,1}*V_{1,2}' * CALL DTRMM('Right', 'Lower', 'Transpose', 'Unit', L, K-L, - $ ONE, V(1, N-K+1), LDV, T(K-L+1,1), LDT) + $ ONE, V(1, N-K+1), LDV, T(K-L+1, 1), LDT) * * T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL DGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, - $ V(K-L+1,1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) + $ V(K-L+1, 1), LDV, V, LDV, ONE, T(K-L+1, 1), + $ LDT) * * At this point, we have that T_{2,1} = V_2*V_1' @@ -611,12 +616,13 @@ RECURSIVE SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, * T_{2,1} = -T_{2,2}*T_{2,1} * CALL DTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, - $ K-L, NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + $ K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, + $ T(K-L+1, 1), LDT) * * T_{2,1} = T_{2,1}*T_{1,1} * CALL DTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, - $ K-L, ONE, T, LDT, T(K-L+1,1), LDT) + $ K-L, ONE, T, LDT, T(K-L+1, 1), LDT) END IF END SUBROUTINE diff --git a/SRC/slarft.f b/SRC/slarft.f index aa508de2dd..ad3a4d924c 100644 --- a/SRC/slarft.f +++ b/SRC/slarft.f @@ -159,7 +159,7 @@ *> \endverbatim *> * ===================================================================== - RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, + RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, $ TAU, T, LDT ) * * -- LAPACK auxiliary routine -- @@ -301,29 +301,30 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * Compute T_{2,2} recursively * - CALL SLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, - $ TAU(L+1), T(L+1,L+1), LDT) + CALL SLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, + $ TAU(L+1), T(L+1, L+1), LDT) * * Compute T_{1,2} * T_{1,2} = V_{2,1}' * DO J = 1, L DO I = 1, K-L - T(J,L+I) = V(L+I,J) + T(J, L+I) = V(L+I, J) END DO END DO * * T_{1,2} = T_{1,2}*V_{2,2} * - CALL STRMM('Right', 'Lower', 'No transpose', 'Unit', L, K-L, - $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + CALL STRMM('Right', 'Lower', 'No transpose', 'Unit', L, + $ K-L, ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) * * T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL SGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, - $ V(K+1, 1), LDV, V(K+1,L+1), LDV, ONE, T(1, L+1), LDT) + $ V(K+1, 1), LDV, V(K+1, L+1), LDV, ONE, + $ T(1, L+1), LDT) * * At this point, we have that T_{1,2} = V_1'*V_2 * All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} @@ -332,12 +333,12 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * T_{1,2} = -T_{1,1}*T_{1,2} * CALL STRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, - $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) * * T_{1,2} = T_{1,2}*T_{2,2} * CALL STRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, - $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + $ K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT) ELSE IF(LQ) THEN * @@ -395,26 +396,27 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * Compute T_{2,2} recursively * - CALL SLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, - $ TAU(L+1), T(L+1,L+1), LDT) + CALL SLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, + $ TAU(L+1), T(L+1, L+1), LDT) * * Compute T_{1,2} * T_{1,2} = V_{1,2} * - CALL SLACPY('All', L, K - L, V(1,L+1), LDV, T(1, L+1), LDT) + CALL SLACPY('All', L, K-L, V(1, L+1), LDV, T(1, L+1), LDT) * * T_{1,2} = T_{1,2}*V_{2,2}' * - CALL STRMM('Right', 'Upper', 'Transpose', 'Unit', L, K-L, ONE, - $ V(L+1, L+1), LDV, T(1, L+1), LDT) + CALL STRMM('Right', 'Upper', 'Transpose', 'Unit', L, K-L, + $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) * * T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL SGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, - $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, T(1, L+1), LDT) + $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, + $ T(1, L+1), LDT) * * At this point, we have that T_{1,2} = V_1*V_2' * All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} @@ -422,14 +424,14 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{1,2} = -T_{1,1}*T_{1,2} * - CALL STRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, K-L, - $ NEG_ONE, T, LDT, T(1, L+1), LDT) + CALL STRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) * * T_{1,2} = T_{1,2}*T_{2,2} * CALL STRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, - $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + $ K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT) ELSE IF(QL) THEN * * Break V apart into 6 components @@ -486,29 +488,30 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * Compute T_{2,2} recursively * - CALL SLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, TAU(K-L+1), - $ T(K-L+1,K-L+1), LDT) + CALL SLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, + $ TAU(K-L+1), T(K-L+1, K-L+1), LDT) * * Compute T_{2,1} * T_{2,1} = V_{2,2}' * DO J = 1, K-L DO I = 1, L - T(K-L+I,J) = V(N-K+J, K-L+I) + T(K-L+I, J) = V(N-K+J, K-L+I) END DO END DO * * T_{2,1} = T_{2,1}*V_{2,1} * - CALL STRMM('Right', 'Upper', 'No transpose', 'Unit', L, K-L, - $ ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) + CALL STRMM('Right', 'Upper', 'No transpose', 'Unit', L, + $ K-L, ONE, V(N-K+1, 1), LDV, T(K-L+1, 1), LDT) * * T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL SGEMM('Transpose', 'No transpose', L, K-L, N-K, ONE, - $ V(1,K-L+1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) + $ V(1, K-L+1), LDV, V, LDV, ONE, T(K-L+1, 1), + $ LDT) * * At this point, we have that T_{2,1} = V_2'*V_1 * All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} @@ -516,13 +519,14 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{2,1} = -T_{2,2}*T_{2,1} * - CALL STRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, K-L, - $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + CALL STRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, + $ T(K-L+1, 1), LDT) * * T_{2,1} = T_{2,1}*T_{1,1} * CALL STRMM('Right', 'Lower', 'No transpose', 'Non-unit', L, - $ K-L, ONE, T, LDT, T(K-L+1,1), LDT) + $ K-L, ONE, T, LDT, T(K-L+1, 1), LDT) ELSE * * Else means RQ case @@ -581,27 +585,28 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * Compute T_{2,2} recursively * - CALL SLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, TAU(K-L+1), - $ T(K-L+1,K-L+1), LDT) + CALL SLARFT(DIRECT, STOREV, N, L, V(K-L+1, 1), LDV, + $ TAU(K-L+1), T(K-L+1, K-L+1), LDT) * * Compute T_{2,1} * T_{2,1} = V_{2,2} * - CALL SLACPY('All', L, K-L, V(K-L+1,N-K+1), LDV, T(K-L+1,1), - $ LDT) + CALL SLACPY('All', L, K-L, V(K-L+1, N-K+1), LDV, + $ T(K-L+1, 1), LDT) * * T_{2,1} = T_{2,1}*V_{1,2}' * - CALL STRMM('Right', 'Lower', 'Transpose', 'Unit', L, K-L, ONE, - $ V(1, N-K+1), LDV, T(K-L+1,1), LDT) + CALL STRMM('Right', 'Lower', 'Transpose', 'Unit', L, K-L, + $ ONE, V(1, N-K+1), LDV, T(K-L+1, 1), LDT) * * T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL SGEMM('No transpose', 'Transpose', L, K-L, N-K, ONE, - $ V(K-L+1,1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) + $ V(K-L+1, 1), LDV, V, LDV, ONE, T(K-L+1, 1), + $ LDT) * * At this point, we have that T_{2,1} = V_2*V_1' @@ -610,13 +615,14 @@ RECURSIVE SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{2,1} = -T_{2,2}*T_{2,1} * - CALL STRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, K-L, - $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + CALL STRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, + $ K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, + $ T(K-L+1, 1), LDT) * * T_{2,1} = T_{2,1}*T_{1,1} * - CALL STRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, K-L, - $ ONE, T, LDT, T(K-L+1,1), LDT) + CALL STRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, + $ K-L, ONE, T, LDT, T(K-L+1, 1), LDT) END IF END SUBROUTINE diff --git a/SRC/zlarft.f b/SRC/zlarft.f index bccb4325e9..4d98b7f154 100644 --- a/SRC/zlarft.f +++ b/SRC/zlarft.f @@ -305,29 +305,30 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * Compute T_{2,2} recursively * - CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, - $ TAU(L+1), T(L+1,L+1), LDT) + CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, + $ TAU(L+1), T(L+1, L+1), LDT) * * Compute T_{1,2} * T_{1,2} = V_{2,1}' * DO J = 1, L DO I = 1, K-L - T(J,L+I) = CONJG(V(L+I,J)) + T(J, L+I) = CONJG(V(L+I, J)) END DO END DO * * T_{1,2} = T_{1,2}*V_{2,2} * - CALL ZTRMM('Right', 'Lower', 'No transpose', 'Unit', L, K-L, - $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) + CALL ZTRMM('Right', 'Lower', 'No transpose', 'Unit', L, + $ K-L, ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) * * T_{1,2} = V_{3,1}'*V_{3,2} + T_{1,2} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL ZGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, - $ V(K+1, 1), LDV, V(K+1,L+1), LDV, ONE, T(1, L+1), LDT) + $ V(K+1, 1), LDV, V(K+1, L+1), LDV, ONE, + $ T(1, L+1), LDT) * * At this point, we have that T_{1,2} = V_1'*V_2 * All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} @@ -336,12 +337,12 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * T_{1,2} = -T_{1,1}*T_{1,2} * CALL ZTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, - $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) * * T_{1,2} = T_{1,2}*T_{2,2} * CALL ZTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, - $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + $ K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT) ELSE IF(LQ) THEN * @@ -399,26 +400,27 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * Compute T_{2,2} recursively * - CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V(L+1,L+1), LDV, - $ TAU(L+1), T(L+1,L+1), LDT) + CALL ZLARFT(DIRECT, STOREV, N-L, K-L, V(L+1, L+1), LDV, + $ TAU(L+1), T(L+1, L+1), LDT) * * Compute T_{1,2} * T_{1,2} = V_{1,2} * - CALL ZLACPY('All', L, K - L, V(1,L+1), LDV, T(1, L+1), LDT) + CALL ZLACPY('All', L, K-L, V(1, L+1), LDV, T(1, L+1), LDT) * * T_{1,2} = T_{1,2}*V_{2,2}' * - CALL ZTRMM('Right', 'Upper', 'Conjugate', 'Unit', L, K-L, ONE, - $ V(L+1, L+1), LDV, T(1, L+1), LDT) + CALL ZTRMM('Right', 'Upper', 'Conjugate', 'Unit', L, K-L, + $ ONE, V(L+1, L+1), LDV, T(1, L+1), LDT) * * T_{1,2} = V_{1,3}*V_{2,3}' + T_{1,2} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL ZGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, - $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, T(1, L+1), LDT) + $ V(1, K+1), LDV, V(L+1, K+1), LDV, ONE, + $ T(1, L+1), LDT) * * At this point, we have that T_{1,2} = V_1*V_2' * All that is left is to pre and post multiply by -T_{1,1} and T_{2,2} @@ -426,14 +428,14 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{1,2} = -T_{1,1}*T_{1,2} * - CALL ZTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, K-L, - $ NEG_ONE, T, LDT, T(1, L+1), LDT) + CALL ZTRMM('Left', 'Upper', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T, LDT, T(1, L+1), LDT) * * T_{1,2} = T_{1,2}*T_{2,2} * CALL ZTRMM('Right', 'Upper', 'No transpose', 'Non-unit', L, - $ K-L, ONE, T(L+1,L+1), LDT, T(1, L+1), LDT) + $ K-L, ONE, T(L+1, L+1), LDT, T(1, L+1), LDT) ELSE IF(QL) THEN * * Break V apart into 6 components @@ -490,29 +492,30 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * Compute T_{2,2} recursively * - CALL ZLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, TAU(K-L+1), - $ T(K-L+1,K-L+1), LDT) + CALL ZLARFT(DIRECT, STOREV, N, L, V(1, K-L+1), LDV, + $ TAU(K-L+1), T(K-L+1, K-L+1), LDT) * * Compute T_{2,1} * T_{2,1} = V_{2,2}' * DO J = 1, K-L DO I = 1, L - T(K-L+I,J) = CONJG(V(N-K+J, K-L+I)) + T(K-L+I, J) = CONJG(V(N-K+J, K-L+I)) END DO END DO * * T_{2,1} = T_{2,1}*V_{2,1} * - CALL ZTRMM('Right', 'Upper', 'No transpose', 'Unit', L, K-L, - $ ONE, V(N-K+1,1), LDV, T(K-L+1,1), LDT) + CALL ZTRMM('Right', 'Upper', 'No transpose', 'Unit', L, + $ K-L, ONE, V(N-K+1, 1), LDV, T(K-L+1, 1), LDT) * * T_{2,1} = V_{2,2}'*V_{2,1} + T_{2,1} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL ZGEMM('Conjugate', 'No transpose', L, K-L, N-K, ONE, - $ V(1,K-L+1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) + $ V(1, K-L+1), LDV, V, LDV, ONE, T(K-L+1, 1), + $ LDT) * * At this point, we have that T_{2,1} = V_2'*V_1 * All that is left is to pre and post multiply by -T_{2,2} and T_{1,1} @@ -520,13 +523,14 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{2,1} = -T_{2,2}*T_{2,1} * - CALL ZTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, K-L, - $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + CALL ZTRMM('Left', 'Lower', 'No transpose', 'Non-unit', L, + $ K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, + $ T(K-L+1, 1), LDT) * * T_{2,1} = T_{2,1}*T_{1,1} * CALL ZTRMM('Right', 'Lower', 'No transpose', 'Non-unit', L, - $ K-L, ONE, T, LDT, T(K-L+1,1), LDT) + $ K-L, ONE, T, LDT, T(K-L+1, 1), LDT) ELSE * * Else means RQ case @@ -585,27 +589,28 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * Compute T_{2,2} recursively * - CALL ZLARFT(DIRECT, STOREV, N, L, V(K-L+1,1), LDV, TAU(K-L+1), - $ T(K-L+1,K-L+1), LDT) + CALL ZLARFT(DIRECT, STOREV, N, L, V(K-L+1, 1), LDV, + $ TAU(K-L+1), T(K-L+1, K-L+1), LDT) * * Compute T_{2,1} * T_{2,1} = V_{2,2} * - CALL ZLACPY('All', L, K-L, V(K-L+1,N-K+1), LDV, T(K-L+1,1), - $ LDT) + CALL ZLACPY('All', L, K-L, V(K-L+1, N-K+1), LDV, + $ T(K-L+1, 1), LDT) * * T_{2,1} = T_{2,1}*V_{1,2}' * - CALL ZTRMM('Right', 'Lower', 'Conjugate', 'Unit', L, K-L, ONE, - $ V(1, N-K+1), LDV, T(K-L+1,1), LDT) + CALL ZTRMM('Right', 'Lower', 'Conjugate', 'Unit', L, K-L, + $ ONE, V(1, N-K+1), LDV, T(K-L+1, 1), LDT) * * T_{2,1} = V_{2,1}*V_{1,1}' + T_{2,1} * Note: We assume K <= N, and GEMM will do nothing if N=K * CALL ZGEMM('No transpose', 'Conjugate', L, K-L, N-K, ONE, - $ V(K-L+1,1), LDV, V, LDV, ONE, T(K-L+1,1), LDT) + $ V(K-L+1, 1), LDV, V, LDV, ONE, T(K-L+1, 1), + $ LDT) * * At this point, we have that T_{2,1} = V_2*V_1' @@ -614,13 +619,14 @@ RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, * * T_{2,1} = -T_{2,2}*T_{2,1} * - CALL ZTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, K-L, - $ NEG_ONE, T(K-L+1,K-L+1), LDT, T(K-L+1,1), LDT) + CALL ZTRMM('Left', 'Lower', 'No tranpose', 'Non-unit', L, + $ K-L, NEG_ONE, T(K-L+1, K-L+1), LDT, + $ T(K-L+1, 1), LDT) * * T_{2,1} = T_{2,1}*T_{1,1} * - CALL ZTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, K-L, - $ ONE, T, LDT, T(K-L+1,1), LDT) + CALL ZTRMM('Right', 'Lower', 'No tranpose', 'Non-unit', L, + $ K-L, ONE, T, LDT, T(K-L+1, 1), LDT) END IF END SUBROUTINE From e9b05ef6e96673b875089b899188cf227d8767c8 Mon Sep 17 00:00:00 2001 From: Johnathan Rhyne Date: Sat, 30 Nov 2024 11:19:15 -0500 Subject: [PATCH 10/10] Updated variants information as well as fixed trailing line in zlarft --- SRC/VARIANTS/Makefile | 10 ++++++++-- SRC/VARIANTS/README | 2 ++ SRC/VARIANTS/larft/LL-LVL2/clarft.f | 2 +- SRC/VARIANTS/larft/LL-LVL2/dlarft.f | 2 +- SRC/VARIANTS/larft/LL-LVL2/slarft.f | 2 +- SRC/VARIANTS/larft/LL-LVL2/zlarft.f | 2 +- SRC/zlarft.f | 4 ++-- 7 files changed, 16 insertions(+), 8 deletions(-) diff --git a/SRC/VARIANTS/Makefile b/SRC/VARIANTS/Makefile index 35e50cbc2c..4b0575cc6f 100644 --- a/SRC/VARIANTS/Makefile +++ b/SRC/VARIANTS/Makefile @@ -30,9 +30,11 @@ LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o +LARFTL2 = larft/LL-LVL2/clarft.o larft/LL-LVL2/dlarft.o larft/LL-LVL2/slarft.o larft/LL-LVL2/zlarft.o + .PHONY: all -all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a +all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a larftl2.a cholrl.a: $(CHOLRL) $(AR) $(ARFLAGS) $@ $^ @@ -58,9 +60,13 @@ qrll.a: $(QRLL) $(AR) $(ARFLAGS) $@ $^ $(RANLIB) $@ +larftl2.a: $(LARFTL2) + $(AR) $(ARFLAGS) $@ $^ + $(RANLIB) $@ + .PHONY: clean cleanobj cleanlib clean: cleanobj cleanlib cleanobj: - rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL) + rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL) $(LARFTL2) cleanlib: rm -f *.a diff --git a/SRC/VARIANTS/README b/SRC/VARIANTS/README index ef7626debe..217cfa3e01 100644 --- a/SRC/VARIANTS/README +++ b/SRC/VARIANTS/README @@ -23,6 +23,7 @@ This directory contains several variants of LAPACK routines in single/double/com - [sdcz]geqrf with QR Left Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/qr/LL - [sdcz]potrf with Cholesky Right Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/RL - [sdcz]potrf with Cholesky Top Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/TOP + - [sdcz]larft using a Left Looking Level 2 BLAS version algorithm - Directory: SRC/VARIANTS/larft/LL-LVL2 References:For a more detailed description please refer to - [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), @@ -44,6 +45,7 @@ Corresponding libraries created in SRC/VARIANTS: - QR Left Looking : qrll.a - Cholesky Right Looking : cholrl.a - Cholesky Top : choltop.a + - LARFT Level 2: larftl2.a =========== diff --git a/SRC/VARIANTS/larft/LL-LVL2/clarft.f b/SRC/VARIANTS/larft/LL-LVL2/clarft.f index 9e2e4503e3..9a7000eff3 100644 --- a/SRC/VARIANTS/larft/LL-LVL2/clarft.f +++ b/SRC/VARIANTS/larft/LL-LVL2/clarft.f @@ -1,4 +1,4 @@ -*> \brief \b CLARFT forms the triangular factor T of a block reflector H = I - vtvH +*> \brief \b CLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm * * =========== DOCUMENTATION =========== * diff --git a/SRC/VARIANTS/larft/LL-LVL2/dlarft.f b/SRC/VARIANTS/larft/LL-LVL2/dlarft.f index d9ef2f77b6..19b7c7b1b2 100644 --- a/SRC/VARIANTS/larft/LL-LVL2/dlarft.f +++ b/SRC/VARIANTS/larft/LL-LVL2/dlarft.f @@ -1,4 +1,4 @@ -*> \brief \b DLARFT forms the triangular factor T of a block reflector H = I - vtvH +*> \brief \b DLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm * * =========== DOCUMENTATION =========== * diff --git a/SRC/VARIANTS/larft/LL-LVL2/slarft.f b/SRC/VARIANTS/larft/LL-LVL2/slarft.f index 31b7951819..e1578e2587 100644 --- a/SRC/VARIANTS/larft/LL-LVL2/slarft.f +++ b/SRC/VARIANTS/larft/LL-LVL2/slarft.f @@ -1,4 +1,4 @@ -*> \brief \b SLARFT forms the triangular factor T of a block reflector H = I - vtvH +*> \brief \b SLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm. * * =========== DOCUMENTATION =========== * diff --git a/SRC/VARIANTS/larft/LL-LVL2/zlarft.f b/SRC/VARIANTS/larft/LL-LVL2/zlarft.f index be773becc2..6abadd501e 100644 --- a/SRC/VARIANTS/larft/LL-LVL2/zlarft.f +++ b/SRC/VARIANTS/larft/LL-LVL2/zlarft.f @@ -1,4 +1,4 @@ -*> \brief \b ZLARFT forms the triangular factor T of a block reflector H = I - vtvH +*> \brief \b ZLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm. * * =========== DOCUMENTATION =========== * diff --git a/SRC/zlarft.f b/SRC/zlarft.f index 4d98b7f154..900795afad 100644 --- a/SRC/zlarft.f +++ b/SRC/zlarft.f @@ -159,8 +159,8 @@ *> \endverbatim *> * ===================================================================== - RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, - $ LDT ) + RECURSIVE SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, + $ TAU, T, LDT ) * * -- LAPACK auxiliary routine -- * -- LAPACK is a software package provided by Univ. of Tennessee, --