From bc2b37472593ae055d74f5f047649f2c75f01700 Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Tue, 7 Nov 2023 17:26:55 +0100 Subject: [PATCH 1/6] xORBDB6/xUNBDB6: do not require unit-norm vector Do not assume that the vector x passed to these subroutines has Euclidean norm one (the norm is almost surely smaller than one when xORBDB5/xUNBDB5 calls this subroutine for the first time). This commit fixes the occasional inaccurate results computed by xORCSD2BY1/xUNCSD2BY1. --- SRC/cunbdb6.f | 19 ++++++++++--------- SRC/dorbdb6.f | 19 ++++++++++--------- SRC/sorbdb6.f | 19 ++++++++++--------- SRC/zunbdb6.f | 19 ++++++++++--------- 4 files changed, 40 insertions(+), 36 deletions(-) diff --git a/SRC/cunbdb6.f b/SRC/cunbdb6.f index cd14d9295c..f63992a16f 100644 --- a/SRC/cunbdb6.f +++ b/SRC/cunbdb6.f @@ -41,9 +41,8 @@ *> with respect to the columns of *> Q = [ Q1 ] . *> [ Q2 ] -*> The Euclidean norm of X must be one and the columns of Q must be -*> orthonormal. The orthogonalized vector will be zero if and only if it -*> lies entirely in the range of Q. +*> The columns of Q must be orthonormal. The orthogonalized vector will +*> be zero if and only if it lies entirely in the range of Q. *> *> The projection is computed with at most two iterations of the *> classical Gram-Schmidt algorithm, see @@ -223,14 +222,16 @@ SUBROUTINE CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * EPS = SLAMCH( 'Precision' ) * +* Compute the Euclidean norm of X +* + SCL = REALZERO + SSQ = REALZERO + CALL CLASSQ( M1, X1, INCX1, SCL, SSQ ) + CALL CLASSQ( M2, X2, INCX2, SCL, SSQ ) + NORM = SCL * SQRT( SSQ ) +* * First, project X onto the orthogonal complement of Q's column * space -* -* Christoph Conrads: In debugging mode the norm should be computed -* and an assertion added comparing the norm with one. Alas, Fortran -* never made it into 1989 when assert() was introduced into the C -* programming language. - NORM = REALONE * IF( M1 .EQ. 0 ) THEN DO I = 1, N diff --git a/SRC/dorbdb6.f b/SRC/dorbdb6.f index 1428876847..ecc0db1dc6 100644 --- a/SRC/dorbdb6.f +++ b/SRC/dorbdb6.f @@ -41,9 +41,8 @@ *> with respect to the columns of *> Q = [ Q1 ] . *> [ Q2 ] -*> The Euclidean norm of X must be one and the columns of Q must be -*> orthonormal. The orthogonalized vector will be zero if and only if it -*> lies entirely in the range of Q. +*> The columns of Q must be orthonormal. The orthogonalized vector will +*> be zero if and only if it lies entirely in the range of Q. *> *> The projection is computed with at most two iterations of the *> classical Gram-Schmidt algorithm, see @@ -222,14 +221,16 @@ SUBROUTINE DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * EPS = DLAMCH( 'Precision' ) * +* Compute the Euclidean norm of X +* + SCL = REALZERO + SSQ = REALZERO + CALL DLASSQ( M1, X1, INCX1, SCL, SSQ ) + CALL DLASSQ( M2, X2, INCX2, SCL, SSQ ) + NORM = SCL * SQRT( SSQ ) +* * First, project X onto the orthogonal complement of Q's column * space -* -* Christoph Conrads: In debugging mode the norm should be computed -* and an assertion added comparing the norm with one. Alas, Fortran -* never made it into 1989 when assert() was introduced into the C -* programming language. - NORM = REALONE * IF( M1 .EQ. 0 ) THEN DO I = 1, N diff --git a/SRC/sorbdb6.f b/SRC/sorbdb6.f index d320c9e464..646fc1ef23 100644 --- a/SRC/sorbdb6.f +++ b/SRC/sorbdb6.f @@ -41,9 +41,8 @@ *> with respect to the columns of *> Q = [ Q1 ] . *> [ Q2 ] -*> The Euclidean norm of X must be one and the columns of Q must be -*> orthonormal. The orthogonalized vector will be zero if and only if it -*> lies entirely in the range of Q. +*> The columns of Q must be orthonormal. The orthogonalized vector will +*> be zero if and only if it lies entirely in the range of Q. *> *> The projection is computed with at most two iterations of the *> classical Gram-Schmidt algorithm, see @@ -222,14 +221,16 @@ SUBROUTINE SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * EPS = SLAMCH( 'Precision' ) * +* Compute the Euclidean norm of X +* + SCL = REALZERO + SSQ = REALZERO + CALL SLASSQ( M1, X1, INCX1, SCL, SSQ ) + CALL SLASSQ( M2, X2, INCX2, SCL, SSQ ) + NORM = SCL * SQRT( SSQ ) +* * First, project X onto the orthogonal complement of Q's column * space -* -* Christoph Conrads: In debugging mode the norm should be computed -* and an assertion added comparing the norm with one. Alas, Fortran -* never made it into 1989 when assert() was introduced into the C -* programming language. - NORM = REALONE * IF( M1 .EQ. 0 ) THEN DO I = 1, N diff --git a/SRC/zunbdb6.f b/SRC/zunbdb6.f index ac7fa4be3b..bcc1aa6f16 100644 --- a/SRC/zunbdb6.f +++ b/SRC/zunbdb6.f @@ -41,9 +41,8 @@ *> with respect to the columns of *> Q = [ Q1 ] . *> [ Q2 ] -*> The Euclidean norm of X must be one and the columns of Q must be -*> orthonormal. The orthogonalized vector will be zero if and only if it -*> lies entirely in the range of Q. +*> The columns of Q must be orthonormal. The orthogonalized vector will +*> be zero if and only if it lies entirely in the range of Q. *> *> The projection is computed with at most two iterations of the *> classical Gram-Schmidt algorithm, see @@ -223,14 +222,16 @@ SUBROUTINE ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * EPS = DLAMCH( 'Precision' ) * +* Compute the Euclidean norm of X +* + SCL = REALZERO + SSQ = REALZERO + CALL ZLASSQ( M1, X1, INCX1, SCL, SSQ ) + CALL ZLASSQ( M2, X2, INCX2, SCL, SSQ ) + NORM = SCL * SQRT( SSQ ) +* * First, project X onto the orthogonal complement of Q's column * space -* -* Christoph Conrads: In debugging mode the norm should be computed -* and an assertion added comparing the norm with one. Alas, Fortran -* never made it into 1989 when assert() was introduced into the C -* programming language. - NORM = REALONE * IF( M1 .EQ. 0 ) THEN DO I = 1, N From ad3d9b8fc4de79737b4ec69adc1d91515360618e Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Thu, 9 Nov 2023 12:07:25 +0100 Subject: [PATCH 2/6] xORBDB6/xUNBDB6: use safe termination criterion With a modest amount of random testing on a work station, it is possible to detect the loss of orthogonality caused by the old early termination criterion. --- SRC/cunbdb6.f | 2 +- SRC/dorbdb6.f | 2 +- SRC/sorbdb6.f | 2 +- SRC/zunbdb6.f | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/SRC/cunbdb6.f b/SRC/cunbdb6.f index f63992a16f..566fd76b7c 100644 --- a/SRC/cunbdb6.f +++ b/SRC/cunbdb6.f @@ -173,7 +173,7 @@ SUBROUTINE CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * * .. Parameters .. REAL ALPHA, REALONE, REALZERO - PARAMETER ( ALPHA = 0.1E0, REALONE = 1.0E0, + PARAMETER ( ALPHA = 0.83E0, REALONE = 1.0E0, $ REALZERO = 0.0E0 ) COMPLEX NEGONE, ONE, ZERO PARAMETER ( NEGONE = (-1.0E0,0.0E0), ONE = (1.0E0,0.0E0), diff --git a/SRC/dorbdb6.f b/SRC/dorbdb6.f index ecc0db1dc6..3e356d0010 100644 --- a/SRC/dorbdb6.f +++ b/SRC/dorbdb6.f @@ -173,7 +173,7 @@ SUBROUTINE DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * * .. Parameters .. DOUBLE PRECISION ALPHA, REALONE, REALZERO - PARAMETER ( ALPHA = 0.1D0, REALONE = 1.0D0, + PARAMETER ( ALPHA = 0.83D0, REALONE = 1.0D0, $ REALZERO = 0.0D0 ) DOUBLE PRECISION NEGONE, ONE, ZERO PARAMETER ( NEGONE = -1.0D0, ONE = 1.0D0, ZERO = 0.0D0 ) diff --git a/SRC/sorbdb6.f b/SRC/sorbdb6.f index 646fc1ef23..eac1777225 100644 --- a/SRC/sorbdb6.f +++ b/SRC/sorbdb6.f @@ -173,7 +173,7 @@ SUBROUTINE SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * * .. Parameters .. REAL ALPHA, REALONE, REALZERO - PARAMETER ( ALPHA = 0.1E0, REALONE = 1.0E0, + PARAMETER ( ALPHA = 0.83E0, REALONE = 1.0E0, $ REALZERO = 0.0E0 ) REAL NEGONE, ONE, ZERO PARAMETER ( NEGONE = -1.0E0, ONE = 1.0E0, ZERO = 0.0E0 ) diff --git a/SRC/zunbdb6.f b/SRC/zunbdb6.f index bcc1aa6f16..ddc9dfc61f 100644 --- a/SRC/zunbdb6.f +++ b/SRC/zunbdb6.f @@ -173,7 +173,7 @@ SUBROUTINE ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * * .. Parameters .. DOUBLE PRECISION ALPHA, REALONE, REALZERO - PARAMETER ( ALPHA = 0.1D0, REALONE = 1.0D0, + PARAMETER ( ALPHA = 0.83D0, REALONE = 1.0D0, $ REALZERO = 0.0D0 ) COMPLEX*16 NEGONE, ONE, ZERO PARAMETER ( NEGONE = (-1.0D0,0.0D0), ONE = (1.0D0,0.0D0), From 80d7c69a523931c175c4968678b06d1fe78598d3 Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Thu, 9 Nov 2023 14:40:44 +0100 Subject: [PATCH 3/6] xORBDB5/xUNBDB5: introduce real zero parameter * introduce a parameter representing a real zero for consistency * stop comparing real values with complex zero --- SRC/cunbdb5.f | 14 ++++++++------ SRC/dorbdb5.f | 14 ++++++++------ SRC/sorbdb5.f | 14 ++++++++------ SRC/zunbdb5.f | 14 ++++++++------ 4 files changed, 32 insertions(+), 24 deletions(-) diff --git a/SRC/cunbdb5.f b/SRC/cunbdb5.f index bbe2c4846e..574edb1127 100644 --- a/SRC/cunbdb5.f +++ b/SRC/cunbdb5.f @@ -169,6 +169,8 @@ SUBROUTINE CUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * ===================================================================== * * .. Parameters .. + REAL REALZERO + PARAMETER ( REALZERO = 0.0E0 ) COMPLEX ONE, ZERO PARAMETER ( ONE = (1.0E0,0.0E0), ZERO = (0.0E0,0.0E0) ) * .. @@ -220,8 +222,8 @@ SUBROUTINE CUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * * If the projection is nonzero, then return * - IF( SCNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. SCNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( SCNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. SCNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF * @@ -238,8 +240,8 @@ SUBROUTINE CUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, END DO CALL CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) - IF( SCNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. SCNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( SCNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. SCNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF END DO @@ -257,8 +259,8 @@ SUBROUTINE CUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, X2(I) = ONE CALL CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) - IF( SCNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. SCNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( SCNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. SCNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF END DO diff --git a/SRC/dorbdb5.f b/SRC/dorbdb5.f index 07b7a024d3..ccf2e9e938 100644 --- a/SRC/dorbdb5.f +++ b/SRC/dorbdb5.f @@ -169,6 +169,8 @@ SUBROUTINE DORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * ===================================================================== * * .. Parameters .. + DOUBLE PRECISION REALZERO + PARAMETER ( REALZERO = 0.0D0 ) DOUBLE PRECISION ONE, ZERO PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) * .. @@ -220,8 +222,8 @@ SUBROUTINE DORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * * If the projection is nonzero, then return * - IF( DNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. DNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( DNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. DNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF * @@ -238,8 +240,8 @@ SUBROUTINE DORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, END DO CALL DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) - IF( DNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. DNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( DNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. DNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF END DO @@ -257,8 +259,8 @@ SUBROUTINE DORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, X2(I) = ONE CALL DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) - IF( DNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. DNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( DNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. DNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF END DO diff --git a/SRC/sorbdb5.f b/SRC/sorbdb5.f index 50dadbe21a..dba68918f0 100644 --- a/SRC/sorbdb5.f +++ b/SRC/sorbdb5.f @@ -169,6 +169,8 @@ SUBROUTINE SORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * ===================================================================== * * .. Parameters .. + REAL REALZERO + PARAMETER ( REALZERO = 0.0E0 ) REAL ONE, ZERO PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0 ) * .. @@ -220,8 +222,8 @@ SUBROUTINE SORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * * If the projection is nonzero, then return * - IF( SNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. SNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( SNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. SNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF * @@ -238,8 +240,8 @@ SUBROUTINE SORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, END DO CALL SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) - IF( SNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. SNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( SNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. SNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF END DO @@ -257,8 +259,8 @@ SUBROUTINE SORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, X2(I) = ONE CALL SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) - IF( SNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. SNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( SNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. SNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF END DO diff --git a/SRC/zunbdb5.f b/SRC/zunbdb5.f index 6ccdde1ce4..72a00e6b50 100644 --- a/SRC/zunbdb5.f +++ b/SRC/zunbdb5.f @@ -169,6 +169,8 @@ SUBROUTINE ZUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * ===================================================================== * * .. Parameters .. + DOUBLE PRECISION REALZERO + PARAMETER ( REALZERO = 0.0D0 ) COMPLEX*16 ONE, ZERO PARAMETER ( ONE = (1.0D0,0.0D0), ZERO = (0.0D0,0.0D0) ) * .. @@ -220,8 +222,8 @@ SUBROUTINE ZUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * * If the projection is nonzero, then return * - IF( DZNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. DZNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( DZNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. DZNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF * @@ -238,8 +240,8 @@ SUBROUTINE ZUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, END DO CALL ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) - IF( DZNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. DZNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( DZNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. DZNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF END DO @@ -257,8 +259,8 @@ SUBROUTINE ZUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, X2(I) = ONE CALL ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) - IF( DZNRM2(M1,X1,INCX1) .NE. ZERO - $ .OR. DZNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + IF( DZNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. DZNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN RETURN END IF END DO From 7eb4057d6dd012aa35c2022f55bf81c6ff95050d Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Thu, 9 Nov 2023 15:02:38 +0100 Subject: [PATCH 4/6] xORBDB5/xUNBDB5: detect numerically zero vectors Do not call xORBDB6/xUNBDB6 with input vectors x that are numerically zero because this can cause problems at the call sites (xLARFGP computations might underflow) leading to, e.g., the computation of nonunitary matrices in the xORCSD2BY1/xUNCSD2BY1 output. --- SRC/cunbdb5.f | 31 +++++++++++++++++++++---------- SRC/dorbdb5.f | 31 +++++++++++++++++++++---------- SRC/sorbdb5.f | 31 +++++++++++++++++++++---------- SRC/zunbdb5.f | 31 +++++++++++++++++++++---------- 4 files changed, 84 insertions(+), 40 deletions(-) diff --git a/SRC/cunbdb5.f b/SRC/cunbdb5.f index 574edb1127..184e445c8e 100644 --- a/SRC/cunbdb5.f +++ b/SRC/cunbdb5.f @@ -176,13 +176,14 @@ SUBROUTINE CUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * .. * .. Local Scalars .. INTEGER CHILDINFO, I, J + REAL EPS, NORM, SCL, SSQ * .. * .. External Subroutines .. - EXTERNAL CUNBDB6, XERBLA + EXTERNAL CLASSQ, CUNBDB6, XERBLA * .. * .. External Functions .. - REAL SCNRM2 - EXTERNAL SCNRM2 + REAL SLAMCH, SCNRM2 + EXTERNAL SLAMCH, SCNRM2 * .. * .. Intrinsic Function .. INTRINSIC MAX @@ -215,16 +216,26 @@ SUBROUTINE CUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, RETURN END IF * -* Project X onto the orthogonal complement of Q + EPS = SLAMCH( 'Precision' ) * - CALL CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, LDQ2, - $ WORK, LWORK, CHILDINFO ) +* Project X onto the orthogonal complement of Q if X is nonzero * -* If the projection is nonzero, then return + SCL = REALZERO + SSQ = REALZERO + CALL CLASSQ( M1, X1, INCX1, SCL, SSQ ) + CALL CLASSQ( M2, X2, INCX2, SCL, SSQ ) + NORM = SCL * SQRT( SSQ ) * - IF( SCNRM2(M1,X1,INCX1) .NE. REALZERO - $ .OR. SCNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN - RETURN + IF( NORM .GT. N * EPS ) THEN + CALL CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, CHILDINFO ) +* +* If the projection is nonzero, then return +* + IF( SCNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. SCNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN + RETURN + END IF END IF * * Project each standard basis vector e_1,...,e_M1 in turn, stopping diff --git a/SRC/dorbdb5.f b/SRC/dorbdb5.f index ccf2e9e938..5a52107b77 100644 --- a/SRC/dorbdb5.f +++ b/SRC/dorbdb5.f @@ -176,13 +176,14 @@ SUBROUTINE DORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * .. * .. Local Scalars .. INTEGER CHILDINFO, I, J + DOUBLE PRECISION EPS, NORM, SCL, SSQ * .. * .. External Subroutines .. - EXTERNAL DORBDB6, XERBLA + EXTERNAL DLASSQ, DORBDB6, XERBLA * .. * .. External Functions .. - DOUBLE PRECISION DNRM2 - EXTERNAL DNRM2 + DOUBLE PRECISION DLAMCH, DNRM2 + EXTERNAL DLAMCH, DNRM2 * .. * .. Intrinsic Function .. INTRINSIC MAX @@ -215,16 +216,26 @@ SUBROUTINE DORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, RETURN END IF * -* Project X onto the orthogonal complement of Q + EPS = DLAMCH( 'Precision' ) * - CALL DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, LDQ2, - $ WORK, LWORK, CHILDINFO ) +* Project X onto the orthogonal complement of Q if X is nonzero * -* If the projection is nonzero, then return + SCL = REALZERO + SSQ = REALZERO + CALL DLASSQ( M1, X1, INCX1, SCL, SSQ ) + CALL DLASSQ( M2, X2, INCX2, SCL, SSQ ) + NORM = SCL * SQRT( SSQ ) * - IF( DNRM2(M1,X1,INCX1) .NE. REALZERO - $ .OR. DNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN - RETURN + IF( NORM .GT. N * EPS ) THEN + CALL DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, CHILDINFO ) +* +* If the projection is nonzero, then return +* + IF( DNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. DNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN + RETURN + END IF END IF * * Project each standard basis vector e_1,...,e_M1 in turn, stopping diff --git a/SRC/sorbdb5.f b/SRC/sorbdb5.f index dba68918f0..6aa166e7b1 100644 --- a/SRC/sorbdb5.f +++ b/SRC/sorbdb5.f @@ -176,13 +176,14 @@ SUBROUTINE SORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * .. * .. Local Scalars .. INTEGER CHILDINFO, I, J + REAL EPS, NORM, SCL, SSQ * .. * .. External Subroutines .. - EXTERNAL SORBDB6, XERBLA + EXTERNAL SLASSQ, SORBDB6, XERBLA * .. * .. External Functions .. - REAL SNRM2 - EXTERNAL SNRM2 + REAL SLAMCH, SNRM2 + EXTERNAL SLAMCH, SNRM2 * .. * .. Intrinsic Function .. INTRINSIC MAX @@ -215,16 +216,26 @@ SUBROUTINE SORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, RETURN END IF * -* Project X onto the orthogonal complement of Q + EPS = SLAMCH( 'Precision' ) * - CALL SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, LDQ2, - $ WORK, LWORK, CHILDINFO ) +* Project X onto the orthogonal complement of Q if X is nonzero * -* If the projection is nonzero, then return + SCL = REALZERO + SSQ = REALZERO + CALL SLASSQ( M1, X1, INCX1, SCL, SSQ ) + CALL SLASSQ( M2, X2, INCX2, SCL, SSQ ) + NORM = SCL * SQRT( SSQ ) * - IF( SNRM2(M1,X1,INCX1) .NE. REALZERO - $ .OR. SNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN - RETURN + IF( NORM .GT. N * EPS ) THEN + CALL SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, CHILDINFO ) +* +* If the projection is nonzero, then return +* + IF( SNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. SNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN + RETURN + END IF END IF * * Project each standard basis vector e_1,...,e_M1 in turn, stopping diff --git a/SRC/zunbdb5.f b/SRC/zunbdb5.f index 72a00e6b50..b867ec14b4 100644 --- a/SRC/zunbdb5.f +++ b/SRC/zunbdb5.f @@ -176,13 +176,14 @@ SUBROUTINE ZUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, * .. * .. Local Scalars .. INTEGER CHILDINFO, I, J + DOUBLE PRECISION EPS, NORM, SCL, SSQ * .. * .. External Subroutines .. - EXTERNAL ZUNBDB6, XERBLA + EXTERNAL ZLASSQ, ZUNBDB6, XERBLA * .. * .. External Functions .. - DOUBLE PRECISION DZNRM2 - EXTERNAL DZNRM2 + DOUBLE PRECISION DLAMCH, DZNRM2 + EXTERNAL DLAMCH, DZNRM2 * .. * .. Intrinsic Function .. INTRINSIC MAX @@ -215,16 +216,26 @@ SUBROUTINE ZUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, RETURN END IF * -* Project X onto the orthogonal complement of Q + EPS = DLAMCH( 'Precision' ) * - CALL ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, LDQ2, - $ WORK, LWORK, CHILDINFO ) +* Project X onto the orthogonal complement of Q if X is nonzero * -* If the projection is nonzero, then return + SCL = REALZERO + SSQ = REALZERO + CALL ZLASSQ( M1, X1, INCX1, SCL, SSQ ) + CALL ZLASSQ( M2, X2, INCX2, SCL, SSQ ) + NORM = SCL * SQRT( SSQ ) * - IF( DZNRM2(M1,X1,INCX1) .NE. REALZERO - $ .OR. DZNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN - RETURN + IF( NORM .GT. N * EPS ) THEN + CALL ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, CHILDINFO ) +* +* If the projection is nonzero, then return +* + IF( DZNRM2(M1,X1,INCX1) .NE. REALZERO + $ .OR. DZNRM2(M2,X2,INCX2) .NE. REALZERO ) THEN + RETURN + END IF END IF * * Project each standard basis vector e_1,...,e_M1 in turn, stopping From 5b0bc5e555a74c35782a7665bec212172bd42cb3 Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Fri, 10 Nov 2023 16:29:41 +0100 Subject: [PATCH 5/6] xORBDB5/xUNBDB5: ensure xORBDB6 input has unit norm Call sites may run into problems when this subroutine computes nonzero vectors that are very small in norm. --- SRC/cunbdb5.f | 9 ++++++++- SRC/dorbdb5.f | 9 ++++++++- SRC/sorbdb5.f | 9 ++++++++- SRC/zunbdb5.f | 9 ++++++++- 4 files changed, 32 insertions(+), 4 deletions(-) diff --git a/SRC/cunbdb5.f b/SRC/cunbdb5.f index 184e445c8e..22513cf8b1 100644 --- a/SRC/cunbdb5.f +++ b/SRC/cunbdb5.f @@ -179,7 +179,7 @@ SUBROUTINE CUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, REAL EPS, NORM, SCL, SSQ * .. * .. External Subroutines .. - EXTERNAL CLASSQ, CUNBDB6, XERBLA + EXTERNAL CLASSQ, CUNBDB6, CSCAL, XERBLA * .. * .. External Functions .. REAL SLAMCH, SCNRM2 @@ -227,6 +227,13 @@ SUBROUTINE CUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, NORM = SCL * SQRT( SSQ ) * IF( NORM .GT. N * EPS ) THEN +* Scale vector to unit norm to avoid problems in the caller code. +* Computing the reciprocal is undesirable but +* * xLASCL cannot be used because of the vector increments and +* * the round-off error has a negligible impact on +* orthogonalization. + CALL CSCAL( M1, ONE / NORM, X1, INCX1 ) + CALL CSCAL( M2, ONE / NORM, X2, INCX2 ) CALL CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) * diff --git a/SRC/dorbdb5.f b/SRC/dorbdb5.f index 5a52107b77..cbd58ae547 100644 --- a/SRC/dorbdb5.f +++ b/SRC/dorbdb5.f @@ -179,7 +179,7 @@ SUBROUTINE DORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, DOUBLE PRECISION EPS, NORM, SCL, SSQ * .. * .. External Subroutines .. - EXTERNAL DLASSQ, DORBDB6, XERBLA + EXTERNAL DLASSQ, DORBDB6, DSCAL, XERBLA * .. * .. External Functions .. DOUBLE PRECISION DLAMCH, DNRM2 @@ -227,6 +227,13 @@ SUBROUTINE DORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, NORM = SCL * SQRT( SSQ ) * IF( NORM .GT. N * EPS ) THEN +* Scale vector to unit norm to avoid problems in the caller code. +* Computing the reciprocal is undesirable but +* * xLASCL cannot be used because of the vector increments and +* * the round-off error has a negligible impact on +* orthogonalization. + CALL DSCAL( M1, ONE / NORM, X1, INCX1 ) + CALL DSCAL( M2, ONE / NORM, X2, INCX2 ) CALL DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) * diff --git a/SRC/sorbdb5.f b/SRC/sorbdb5.f index 6aa166e7b1..8fb88876fe 100644 --- a/SRC/sorbdb5.f +++ b/SRC/sorbdb5.f @@ -179,7 +179,7 @@ SUBROUTINE SORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, REAL EPS, NORM, SCL, SSQ * .. * .. External Subroutines .. - EXTERNAL SLASSQ, SORBDB6, XERBLA + EXTERNAL SLASSQ, SORBDB6, SSCAL, XERBLA * .. * .. External Functions .. REAL SLAMCH, SNRM2 @@ -227,6 +227,13 @@ SUBROUTINE SORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, NORM = SCL * SQRT( SSQ ) * IF( NORM .GT. N * EPS ) THEN +* Scale vector to unit norm to avoid problems in the caller code. +* Computing the reciprocal is undesirable but +* * xLASCL cannot be used because of the vector increments and +* * the round-off error has a negligible impact on +* orthogonalization. + CALL SSCAL( M1, ONE / NORM, X1, INCX1 ) + CALL SSCAL( M2, ONE / NORM, X2, INCX2 ) CALL SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) * diff --git a/SRC/zunbdb5.f b/SRC/zunbdb5.f index b867ec14b4..c451ae921a 100644 --- a/SRC/zunbdb5.f +++ b/SRC/zunbdb5.f @@ -179,7 +179,7 @@ SUBROUTINE ZUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, DOUBLE PRECISION EPS, NORM, SCL, SSQ * .. * .. External Subroutines .. - EXTERNAL ZLASSQ, ZUNBDB6, XERBLA + EXTERNAL ZLASSQ, ZUNBDB6, ZSCAL, XERBLA * .. * .. External Functions .. DOUBLE PRECISION DLAMCH, DZNRM2 @@ -227,6 +227,13 @@ SUBROUTINE ZUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, NORM = SCL * SQRT( SSQ ) * IF( NORM .GT. N * EPS ) THEN +* Scale vector to unit norm to avoid problems in the caller code. +* Computing the reciprocal is undesirable but +* * xLASCL cannot be used because of the vector increments and +* * the round-off error has a negligible impact on +* orthogonalization. + CALL ZSCAL( M1, ONE / NORM, X1, INCX1 ) + CALL ZSCAL( M2, ONE / NORM, X2, INCX2 ) CALL ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, $ LDQ2, WORK, LWORK, CHILDINFO ) * From 64897c447470caec89529c7ed2c144d75dc2e193 Mon Sep 17 00:00:00 2001 From: Christoph Conrads Date: Fri, 10 Nov 2023 20:06:27 +0100 Subject: [PATCH 6/6] xLARFGP: avoid overflows Avoid overflows when XNORM << ABS(ALPHA) << 1. fixes #934 --- SRC/clarfgp.f | 5 +++-- SRC/dlarfgp.f | 7 ++++--- SRC/slarfgp.f | 5 +++-- SRC/zlarfgp.f | 5 +++-- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/SRC/clarfgp.f b/SRC/clarfgp.f index cbc489d618..47b5e47b07 100644 --- a/SRC/clarfgp.f +++ b/SRC/clarfgp.f @@ -122,7 +122,7 @@ SUBROUTINE CLARFGP( N, ALPHA, X, INCX, TAU ) * .. * .. Local Scalars .. INTEGER J, KNT - REAL ALPHI, ALPHR, BETA, BIGNUM, SMLNUM, XNORM + REAL ALPHI, ALPHR, BETA, BIGNUM, EPS, SMLNUM, XNORM COMPLEX SAVEALPHA * .. * .. External Functions .. @@ -143,11 +143,12 @@ SUBROUTINE CLARFGP( N, ALPHA, X, INCX, TAU ) RETURN END IF * + EPS = SLAMCH( 'Precision' ) XNORM = SCNRM2( N-1, X, INCX ) ALPHR = REAL( ALPHA ) ALPHI = AIMAG( ALPHA ) * - IF( XNORM.EQ.ZERO ) THEN + IF( XNORM.LE.EPS*ABS(ALPHA) ) THEN * * H = [1-alpha/abs(alpha) 0; 0 I], sign chosen so ALPHA >= 0. * diff --git a/SRC/dlarfgp.f b/SRC/dlarfgp.f index bd7a719342..a8cf1b31e3 100644 --- a/SRC/dlarfgp.f +++ b/SRC/dlarfgp.f @@ -122,7 +122,7 @@ SUBROUTINE DLARFGP( N, ALPHA, X, INCX, TAU ) * .. * .. Local Scalars .. INTEGER J, KNT - DOUBLE PRECISION BETA, BIGNUM, SAVEALPHA, SMLNUM, XNORM + DOUBLE PRECISION BETA, BIGNUM, EPS, SAVEALPHA, SMLNUM, XNORM * .. * .. External Functions .. DOUBLE PRECISION DLAMCH, DLAPY2, DNRM2 @@ -141,11 +141,12 @@ SUBROUTINE DLARFGP( N, ALPHA, X, INCX, TAU ) RETURN END IF * + EPS = DLAMCH( 'Precision' ) XNORM = DNRM2( N-1, X, INCX ) * - IF( XNORM.EQ.ZERO ) THEN + IF( XNORM.LE.EPS*ABS(ALPHA) ) THEN * -* H = [+/-1, 0; I], sign chosen so ALPHA >= 0 +* H = [+/-1, 0; I], sign chosen so ALPHA >= 0. * IF( ALPHA.GE.ZERO ) THEN * When TAU.eq.ZERO, the vector is special-cased to be diff --git a/SRC/slarfgp.f b/SRC/slarfgp.f index 4069f122a9..c28274c2c4 100644 --- a/SRC/slarfgp.f +++ b/SRC/slarfgp.f @@ -122,7 +122,7 @@ SUBROUTINE SLARFGP( N, ALPHA, X, INCX, TAU ) * .. * .. Local Scalars .. INTEGER J, KNT - REAL BETA, BIGNUM, SAVEALPHA, SMLNUM, XNORM + REAL BETA, BIGNUM, EPS, SAVEALPHA, SMLNUM, XNORM * .. * .. External Functions .. REAL SLAMCH, SLAPY2, SNRM2 @@ -141,9 +141,10 @@ SUBROUTINE SLARFGP( N, ALPHA, X, INCX, TAU ) RETURN END IF * + EPS = SLAMCH( 'Precision' ) XNORM = SNRM2( N-1, X, INCX ) * - IF( XNORM.EQ.ZERO ) THEN + IF( XNORM.LE.EPS*ABS(ALPHA) ) THEN * * H = [+/-1, 0; I], sign chosen so ALPHA >= 0. * diff --git a/SRC/zlarfgp.f b/SRC/zlarfgp.f index 07b32def60..6c9efb04c6 100644 --- a/SRC/zlarfgp.f +++ b/SRC/zlarfgp.f @@ -122,7 +122,7 @@ SUBROUTINE ZLARFGP( N, ALPHA, X, INCX, TAU ) * .. * .. Local Scalars .. INTEGER J, KNT - DOUBLE PRECISION ALPHI, ALPHR, BETA, BIGNUM, SMLNUM, XNORM + DOUBLE PRECISION ALPHI, ALPHR, BETA, BIGNUM, EPS, SMLNUM, XNORM COMPLEX*16 SAVEALPHA * .. * .. External Functions .. @@ -143,11 +143,12 @@ SUBROUTINE ZLARFGP( N, ALPHA, X, INCX, TAU ) RETURN END IF * + EPS = DLAMCH( 'Precision' ) XNORM = DZNRM2( N-1, X, INCX ) ALPHR = DBLE( ALPHA ) ALPHI = DIMAG( ALPHA ) * - IF( XNORM.EQ.ZERO ) THEN + IF( XNORM.LE.EPS*ABS(ALPHA) ) THEN * * H = [1-alpha/abs(alpha) 0; 0 I], sign chosen so ALPHA >= 0. *