1- ! > \brief \b CROTG
1+ ! > \brief \b CROTG generates a Givens rotation with real cosine and complex sine.
22!
33! =========== DOCUMENTATION ===========
44!
2424! > = 1 if x = 0
2525! > c = |a| / sqrt(|a|**2 + |b|**2)
2626! > s = sgn(a) * conjg(b) / sqrt(|a|**2 + |b|**2)
27- ! > When a and b are real and r /= 0, the formulas simplify to
2827! > r = sgn(a)*sqrt(|a|**2 + |b|**2)
28+ ! > When a and b are real and r /= 0, the formulas simplify to
2929! > c = a / r
3030! > s = b / r
3131! > the same as in SROTG when |a| > |b|. When |b| >= |a|, the
6565! Authors:
6666! ========
6767!
68- ! > \author Edward Anderson, Lockheed Martin
68+ ! > \author Weslley Pereira, University of Colorado Denver, USA
6969!
70- ! > \par Contributors:
71- ! ==================
72- ! >
73- ! > Weslley Pereira, University of Colorado Denver, USA
70+ ! > \date December 2021
7471!
7572! > \ingroup single_blas_level1
7673!
7976! >
8077! > \verbatim
8178! >
79+ ! > Based on the algorithm from
80+ ! >
8281! > Anderson E. (2017)
8382! > Algorithm 978: Safe Scaling in the Level 1 BLAS
8483! > ACM Trans Math Softw 44:1--28
@@ -108,21 +107,14 @@ subroutine CROTG( a, b, c, s )
108107 1 - minexponent (0._wp ), &
109108 maxexponent (0._wp )- 1 &
110109 )
111- real (wp), parameter :: rtmin = sqrt ( real (radix (0._wp ),wp)** max ( &
112- minexponent (0._wp )- 1 , &
113- 1 - maxexponent (0._wp ) &
114- ) / epsilon (0._wp ) )
115- real (wp), parameter :: rtmax = sqrt ( real (radix (0._wp ),wp)** max ( &
116- 1 - minexponent (0._wp ), &
117- maxexponent (0._wp )- 1 &
118- ) * epsilon (0._wp ) )
110+ real (wp), parameter :: rtmin = sqrt ( safmin )
119111! ..
120112! .. Scalar Arguments ..
121113 real (wp) :: c
122114 complex (wp) :: a, b, s
123115! ..
124116! .. Local Scalars ..
125- real (wp) :: d, f1, f2, g1, g2, h2, p, u, uu, v, vv, w
117+ real (wp) :: d, f1, f2, g1, g2, h2, u, v, w, rtmax
126118 complex (wp) :: f, fs, g, gs, r, t
127119! ..
128120! .. Intrinsic Functions ..
@@ -144,30 +136,43 @@ subroutine CROTG( a, b, c, s )
144136 r = f
145137 else if ( f == czero ) then
146138 c = zero
147- g1 = max ( abs (real (g)), abs (aimag (g)) )
148- if ( g1 > rtmin .and. g1 < rtmax ) then
139+ if ( real (g) == zero ) then
140+ r = abs (aimag (g))
141+ s = conjg ( g ) / r
142+ elseif ( aimag (g) == zero ) then
143+ r = abs (real (g))
144+ s = conjg ( g ) / r
145+ else
146+ g1 = max ( abs (real (g)), abs (aimag (g)) )
147+ rtmax = sqrt ( safmax/ 2 )
148+ if ( g1 > rtmin .and. g1 < rtmax ) then
149149!
150150! Use unscaled algorithm
151151!
152- g2 = ABSSQ( g )
153- d = sqrt ( g2 )
154- s = conjg ( g ) / d
155- r = d
156- else
152+ ! The following two lines can be replaced by `d = abs( g )`.
153+ ! This algorithm do not use the intrinsic complex abs.
154+ g2 = ABSSQ( g )
155+ d = sqrt ( g2 )
156+ s = conjg ( g ) / d
157+ r = d
158+ else
157159!
158160! Use scaled algorithm
159161!
160- u = min ( safmax, max ( safmin, g1 ) )
161- uu = one / u
162- gs = g* uu
163- g2 = ABSSQ( gs )
164- d = sqrt ( g2 )
165- s = conjg ( gs ) / d
166- r = d* u
162+ u = min ( safmax, max ( safmin, g1 ) )
163+ gs = g / u
164+ ! The following two lines can be replaced by `d = abs( gs )`.
165+ ! This algorithm do not use the intrinsic complex abs.
166+ g2 = ABSSQ( gs )
167+ d = sqrt ( g2 )
168+ s = conjg ( gs ) / d
169+ r = d* u
170+ end if
167171 end if
168172 else
169173 f1 = max ( abs (real (f)), abs (aimag (f)) )
170174 g1 = max ( abs (real (g)), abs (aimag (g)) )
175+ rtmax = sqrt ( safmax/ 4 )
171176 if ( f1 > rtmin .and. f1 < rtmax .and. &
172177 g1 > rtmin .and. g1 < rtmax ) then
173178!
@@ -176,52 +181,95 @@ subroutine CROTG( a, b, c, s )
176181 f2 = ABSSQ( f )
177182 g2 = ABSSQ( g )
178183 h2 = f2 + g2
179- if ( f2 > rtmin .and. h2 < rtmax ) then
180- d = sqrt ( f2* h2 )
184+ ! safmin <= f2 <= h2 <= safmax
185+ if ( f2 >= h2 * safmin ) then
186+ ! safmin <= f2/h2 <= 1, and h2/f2 is finite
187+ c = sqrt ( f2 / h2 )
188+ r = f / c
189+ rtmax = rtmax * 2
190+ if ( f2 > rtmin .and. h2 < rtmax ) then
191+ ! safmin <= sqrt( f2*h2 ) <= safmax
192+ s = conjg ( g ) * ( f / sqrt ( f2* h2 ) )
193+ else
194+ s = conjg ( g ) * ( r / h2 )
195+ end if
181196 else
182- d = sqrt ( f2 )* sqrt ( h2 )
197+ ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
198+ ! Moreover,
199+ ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
200+ ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
201+ ! Also,
202+ ! g2 >> f2, which means that h2 = g2.
203+ d = sqrt ( f2 * h2 )
204+ c = f2 / d
205+ if ( c >= safmin ) then
206+ r = f / c
207+ else
208+ ! f2 / sqrt(f2 * h2) < safmin, then
209+ ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
210+ r = f * ( h2 / d )
211+ end if
212+ s = conjg ( g ) * ( f / d )
183213 end if
184- p = 1 / d
185- c = f2* p
186- s = conjg ( g )* ( f* p )
187- r = f* ( h2* p )
188214 else
189215!
190216! Use scaled algorithm
191217!
192218 u = min ( safmax, max ( safmin, f1, g1 ) )
193- uu = one / u
194- gs = g* uu
219+ gs = g / u
195220 g2 = ABSSQ( gs )
196- if ( f1* uu < rtmin ) then
221+ if ( f1 / u < rtmin ) then
197222!
198223! f is not well-scaled when scaled by g1.
199224! Use a different scaling for f.
200225!
201226 v = min ( safmax, max ( safmin, f1 ) )
202- vv = one / v
203- w = v * uu
204- fs = f* vv
227+ w = v / u
228+ fs = f / v
205229 f2 = ABSSQ( fs )
206230 h2 = f2* w** 2 + g2
207231 else
208232!
209233! Otherwise use the same scaling for f and g.
210234!
211235 w = one
212- fs = f* uu
236+ fs = f / u
213237 f2 = ABSSQ( fs )
214238 h2 = f2 + g2
215239 end if
216- if ( f2 > rtmin .and. h2 < rtmax ) then
217- d = sqrt ( f2* h2 )
240+ ! safmin <= f2 <= h2 <= safmax
241+ if ( f2 >= h2 * safmin ) then
242+ ! safmin <= f2/h2 <= 1, and h2/f2 is finite
243+ c = sqrt ( f2 / h2 )
244+ r = fs / c
245+ rtmax = rtmax * 2
246+ if ( f2 > rtmin .and. h2 < rtmax ) then
247+ ! safmin <= sqrt( f2*h2 ) <= safmax
248+ s = conjg ( gs ) * ( fs / sqrt ( f2* h2 ) )
249+ else
250+ s = conjg ( gs ) * ( r / h2 )
251+ end if
218252 else
219- d = sqrt ( f2 )* sqrt ( h2 )
253+ ! f2/h2 <= safmin may be subnormal, and h2/f2 may overflow.
254+ ! Moreover,
255+ ! safmin <= f2*f2 * safmax < f2 * h2 < h2*h2 * safmin <= safmax,
256+ ! sqrt(safmin) <= sqrt(f2 * h2) <= sqrt(safmax).
257+ ! Also,
258+ ! g2 >> f2, which means that h2 = g2.
259+ d = sqrt ( f2 * h2 )
260+ c = f2 / d
261+ if ( c >= safmin ) then
262+ r = fs / c
263+ else
264+ ! f2 / sqrt(f2 * h2) < safmin, then
265+ ! sqrt(safmin) <= f2 * sqrt(safmax) <= h2 / sqrt(f2 * h2) <= h2 * (safmin / f2) <= h2 <= safmax
266+ r = fs * ( h2 / d )
267+ end if
268+ s = conjg ( gs ) * ( fs / d )
220269 end if
221- p = 1 / d
222- c = ( f2* p )* w
223- s = conjg ( gs )* ( fs* p )
224- r = ( fs* ( h2* p ) )* u
270+ ! Rescale c and r
271+ c = c * w
272+ r = r * u
225273 end if
226274 end if
227275 a = r
0 commit comments