1- ! > \brief \b CROTG
1+ ! > \brief \b CROTG generates a Givens rotation with real cosine and complex sine.
22!
33! =========== DOCUMENTATION ===========
44!
2424! > = 1 if x = 0
2525! > c = |a| / sqrt(|a|**2 + |b|**2)
2626! > s = sgn(a) * conjg(b) / sqrt(|a|**2 + |b|**2)
27- ! > When a and b are real and r /= 0, the formulas simplify to
2827! > r = sgn(a)*sqrt(|a|**2 + |b|**2)
28+ ! > When a and b are real and r /= 0, the formulas simplify to
2929! > c = a / r
3030! > s = b / r
31- ! > the same as in CROTG when |a| > |b|. When |b| >= |a|, the
32- ! > sign of c and s will be different from those computed by CROTG
31+ ! > the same as in SROTG when |a| > |b|. When |b| >= |a|, the
32+ ! > sign of c and s will be different from those computed by SROTG
3333! > if the signs of a and b are not the same.
3434! >
3535! > \endverbatim
6565! Authors:
6666! ========
6767!
68- ! > \author Edward Anderson, Lockheed Martin
68+ ! > \author Weslley Pereira, University of Colorado Denver, USA
6969!
70- ! > \par Contributors:
71- ! ==================
72- ! >
73- ! > Weslley Pereira, University of Colorado Denver, USA
70+ ! > \date December 2021
7471!
75- ! > \ingroup single_blas_level1
72+ ! > \ingroup OTHERauxiliary
7673!
7774! > \par Further Details:
7875! =====================
7976! >
8077! > \verbatim
8178! >
79+ ! > Based on the algorithm from
80+ ! >
8281! > Anderson E. (2017)
8382! > Algorithm 978: Safe Scaling in the Level 1 BLAS
8483! > ACM Trans Math Softw 44:1--28
@@ -108,21 +107,14 @@ subroutine CROTG( a, b, c, s )
108107 1 - minexponent (0._wp ), &
109108 maxexponent (0._wp )- 1 &
110109 )
111- real (wp), parameter :: rtmin = sqrt ( real (radix (0._wp ),wp)** max ( &
112- minexponent (0._wp )- 1 , &
113- 1 - maxexponent (0._wp ) &
114- ) / epsilon (0._wp ) )
115- real (wp), parameter :: rtmax = sqrt ( real (radix (0._wp ),wp)** max ( &
116- 1 - minexponent (0._wp ), &
117- maxexponent (0._wp )- 1 &
118- ) * epsilon (0._wp ) )
110+ real (wp), parameter :: rtmin = sqrt ( safmin )
119111! ..
120112! .. Scalar Arguments ..
121113 real (wp) :: c
122114 complex (wp) :: a, b, s
123115! ..
124116! .. Local Scalars ..
125- real (wp) :: d, f1, f2, g1, g2, h2, u, v, w
117+ real (wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmax
126118 complex (wp) :: f, fs, g, gs, r, t
127119! ..
128120! .. Intrinsic Functions ..
@@ -145,6 +137,7 @@ subroutine CROTG( a, b, c, s )
145137 else if ( f == czero ) then
146138 c = zero
147139 g1 = max ( abs (real (g)), abs (aimag (g)) )
140+ rtmax = sqrt ( safmax/ 2 )
148141 if ( g1 > rtmin .and. g1 < rtmax ) then
149142!
150143! Use unscaled algorithm
@@ -165,6 +158,7 @@ subroutine CROTG( a, b, c, s )
165158 else
166159 f1 = max ( abs (real (f)), abs (aimag (f)) )
167160 g1 = max ( abs (real (g)), abs (aimag (g)) )
161+ rtmax = sqrt ( safmax/ 4 )
168162 if ( f1 > rtmin .and. f1 < rtmax .and. &
169163 g1 > rtmin .and. g1 < rtmax ) then
170164!
@@ -173,14 +167,36 @@ subroutine CROTG( a, b, c, s )
173167 f2 = ABSSQ( f )
174168 g2 = ABSSQ( g )
175169 h2 = f2 + g2
176- if ( f2 > rtmin .and. h2 < rtmax ) then
177- d = sqrt ( f2* h2 )
170+ ! safmin <= f2 <= h2 <= safmax
171+ if ( f2 >= h2 * safmin ) then
172+ ! safmin <= f2/h2 <= 1, and h2/f2 is finite
173+ c = sqrt ( f2 / h2 )
174+ r = f / c
175+ rtmax = rtmax * 2
176+ if ( f2 > rtmin .and. h2 < rtmax ) then
177+ ! safmin <= sqrt( f2*h2 ) <= safmax
178+ s = conjg ( g ) * ( f / sqrt ( f2* h2 ) )
179+ else
180+ s = conjg ( g ) * ( r / h2 )
181+ end if
178182 else
179- d = sqrt ( f2 )* sqrt ( h2 )
183+ ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
184+ ! Moreover,
185+ ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
186+ ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
187+ ! Also,
188+ ! g2 >> f2, which means that h2 = g2.
189+ d = sqrt ( f2 * h2 )
190+ c = f2 / d
191+ if ( c >= safmin ) then
192+ r = f / c
193+ else
194+ ! f2 / sqrt(f2 * h2) < safmin, then
195+ ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
196+ r = f * ( h2 / d )
197+ end if
198+ s = conjg ( g ) * ( f / d )
180199 end if
181- c = f2 / d
182- s = conjg ( g )* ( f / d )
183- r = f* ( h2 / d )
184200 else
185201!
186202! Use scaled algorithm
@@ -207,14 +223,39 @@ subroutine CROTG( a, b, c, s )
207223 f2 = ABSSQ( fs )
208224 h2 = f2 + g2
209225 end if
210- if ( f2 > rtmin .and. h2 < rtmax ) then
211- d = sqrt ( f2* h2 )
226+ ! safmin <= f2 <= h2 <= safmax
227+ if ( f2 >= h2 * safmin ) then
228+ ! safmin <= f2/h2 <= 1, and h2/f2 is finite
229+ c = sqrt ( f2 / h2 )
230+ r = fs / c
231+ rtmax = rtmax * 2
232+ if ( f2 > rtmin .and. h2 < rtmax ) then
233+ ! safmin <= sqrt( f2*h2 ) <= safmax
234+ s = conjg ( gs ) * ( fs / sqrt ( f2* h2 ) )
235+ else
236+ s = conjg ( gs ) * ( r / h2 )
237+ end if
212238 else
213- d = sqrt ( f2 )* sqrt ( h2 )
239+ ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
240+ ! Moreover,
241+ ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
242+ ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
243+ ! Also,
244+ ! g2 >> f2, which means that h2 = g2.
245+ d = sqrt ( f2 * h2 )
246+ c = f2 / d
247+ if ( c >= safmin ) then
248+ r = fs / c
249+ else
250+ ! f2 / sqrt(f2 * h2) < safmin, then
251+ ! h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
252+ r = fs * ( h2 / d )
253+ end if
254+ s = conjg ( gs ) * ( fs / d )
214255 end if
215- c = ( f2 / d ) * w
216- s = conjg ( gs ) * ( fs / d )
217- r = ( fs * ( h2 / d ) ) * u
256+ ! Rescale c and r
257+ c = c * w
258+ r = r * u
218259 end if
219260 end if
220261 a = r
0 commit comments