Skip to content

Commit 52d8a22

Browse files
Sandhya ViswanathanEric CaspoleCharlie Hunt
committed
8264054: Bad XMM performance on java.lang.MathBench.sqrtDouble
Co-authored-by: Eric Caspole <[email protected]> Co-authored-by: Charlie Hunt <[email protected]> Reviewed-by: neliasso, kvn, vlivanov
1 parent 16acfaf commit 52d8a22

File tree

3 files changed

+896
-68
lines changed

3 files changed

+896
-68
lines changed

src/hotspot/cpu/x86/x86.ad

Lines changed: 12 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -3232,73 +3232,26 @@ instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
32323232
ins_pipe(pipe_slow);
32333233
%}
32343234

3235-
instruct sqrtF_reg(regF dst, regF src) %{
3235+
// sqrtss instruction needs destination register to be pre initialized for best performance
3236+
// Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
3237+
instruct sqrtF_reg(regF dst) %{
32363238
predicate(UseSSE>=1);
3237-
match(Set dst (SqrtF src));
3238-
3239-
format %{ "sqrtss $dst, $src" %}
3240-
ins_cost(150);
3241-
ins_encode %{
3242-
__ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
3243-
%}
3244-
ins_pipe(pipe_slow);
3245-
%}
3246-
3247-
instruct sqrtF_mem(regF dst, memory src) %{
3248-
predicate(UseSSE>=1);
3249-
match(Set dst (SqrtF (LoadF src)));
3250-
3251-
format %{ "sqrtss $dst, $src" %}
3252-
ins_cost(150);
3253-
ins_encode %{
3254-
__ sqrtss($dst$$XMMRegister, $src$$Address);
3255-
%}
3256-
ins_pipe(pipe_slow);
3257-
%}
3258-
3259-
instruct sqrtF_imm(regF dst, immF con) %{
3260-
predicate(UseSSE>=1);
3261-
match(Set dst (SqrtF con));
3262-
3263-
format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
3264-
ins_cost(150);
3239+
match(Set dst (SqrtF dst));
3240+
format %{ "sqrtss $dst, $dst" %}
32653241
ins_encode %{
3266-
__ sqrtss($dst$$XMMRegister, $constantaddress($con));
3242+
__ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
32673243
%}
32683244
ins_pipe(pipe_slow);
32693245
%}
32703246

3271-
instruct sqrtD_reg(regD dst, regD src) %{
3247+
// sqrtsd instruction needs destination register to be pre initialized for best performance
3248+
// Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
3249+
instruct sqrtD_reg(regD dst) %{
32723250
predicate(UseSSE>=2);
3273-
match(Set dst (SqrtD src));
3274-
3275-
format %{ "sqrtsd $dst, $src" %}
3276-
ins_cost(150);
3277-
ins_encode %{
3278-
__ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
3279-
%}
3280-
ins_pipe(pipe_slow);
3281-
%}
3282-
3283-
instruct sqrtD_mem(regD dst, memory src) %{
3284-
predicate(UseSSE>=2);
3285-
match(Set dst (SqrtD (LoadD src)));
3286-
3287-
format %{ "sqrtsd $dst, $src" %}
3288-
ins_cost(150);
3289-
ins_encode %{
3290-
__ sqrtsd($dst$$XMMRegister, $src$$Address);
3291-
%}
3292-
ins_pipe(pipe_slow);
3293-
%}
3294-
3295-
instruct sqrtD_imm(regD dst, immD con) %{
3296-
predicate(UseSSE>=2);
3297-
match(Set dst (SqrtD con));
3298-
format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
3299-
ins_cost(150);
3251+
match(Set dst (SqrtD dst));
3252+
format %{ "sqrtsd $dst, $dst" %}
33003253
ins_encode %{
3301-
__ sqrtsd($dst$$XMMRegister, $constantaddress($con));
3254+
__ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
33023255
%}
33033256
ins_pipe(pipe_slow);
33043257
%}

0 commit comments

Comments
 (0)