Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 00ac1d0

Browse files
committed
[CodeGen] Round [SU]INT_TO_FP result when promoting from f16.
If we don't, values that aren't precisely representable in f16 could be used as-is in a promoted f32 operation, which would produce incorrect results. AArch64 had the correct behavior; add a focused test. Fixes http://llvm.org/PR26871 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268700 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent ecc2a1e commit 00ac1d0

File tree

5 files changed

+115
-6
lines changed

5 files changed

+115
-6
lines changed

lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2102,9 +2102,14 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) {
21022102
// Construct a SDNode that transforms the SINT or UINT operand to the promoted
21032103
// float type.
21042104
SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) {
2105+
SDLoc DL(N);
21052106
EVT VT = N->getValueType(0);
21062107
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
2107-
return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, N->getOperand(0));
2108+
SDValue NV = DAG.getNode(N->getOpcode(), DL, NVT, N->getOperand(0));
2109+
// Round the value to the desired precision (that of the source type).
2110+
return DAG.getNode(
2111+
ISD::FP_EXTEND, DL, NVT,
2112+
DAG.getNode(ISD::FP_ROUND, DL, VT, NV, DAG.getIntPtrConstant(0, DL)));
21082113
}
21092114

21102115
SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {

test/CodeGen/AArch64/f16-instructions.ll

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,34 @@ define half @test_sitofp_i64(i64 %a) #0 {
446446
ret half %r
447447
}
448448

449+
; CHECK-LABEL: test_uitofp_i32_fadd:
450+
; CHECK-NEXT: ucvtf s1, w0
451+
; CHECK-NEXT: fcvt h1, s1
452+
; CHECK-NEXT: fcvt s0, h0
453+
; CHECK-NEXT: fcvt s1, h1
454+
; CHECK-NEXT: fadd s0, s0, s1
455+
; CHECK-NEXT: fcvt h0, s0
456+
; CHECK-NEXT: ret
457+
define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 {
458+
%c = uitofp i32 %a to half
459+
%r = fadd half %b, %c
460+
ret half %r
461+
}
462+
463+
; CHECK-LABEL: test_sitofp_i32_fadd:
464+
; CHECK-NEXT: scvtf s1, w0
465+
; CHECK-NEXT: fcvt h1, s1
466+
; CHECK-NEXT: fcvt s0, h0
467+
; CHECK-NEXT: fcvt s1, h1
468+
; CHECK-NEXT: fadd s0, s0, s1
469+
; CHECK-NEXT: fcvt h0, s0
470+
; CHECK-NEXT: ret
471+
define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 {
472+
%c = sitofp i32 %a to half
473+
%r = fadd half %b, %c
474+
ret half %r
475+
}
476+
449477
; CHECK-LABEL: test_fptrunc_float:
450478
; CHECK-NEXT: fcvt h0, s0
451479
; CHECK-NEXT: ret

test/CodeGen/ARM/fp16-promote.ll

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,4 +889,44 @@ define half @test_struct_arg(%struct.dummy %p) {
889889
ret half %a
890890
}
891891

892+
; CHECK-LABEL: test_uitofp_i32_fadd:
893+
; CHECK-VFP-DAG: vcvt.f32.u32
894+
; CHECK-NOVFP-DAG: bl __aeabi_ui2f
895+
896+
; CHECK-FP16-DAG: vcvtb.f16.f32
897+
; CHECK-FP16-DAG: vcvtb.f32.f16
898+
; CHECK-LIBCALL-DAG: bl __aeabi_h2f
899+
; CHECK-LIBCALL-DAG: bl __aeabi_h2f
900+
901+
; CHECK-VFP-DAG: vadd.f32
902+
; CHECK-NOVFP-DAG: bl __aeabi_fadd
903+
904+
; CHECK-FP16-DAG: vcvtb.f16.f32
905+
; CHECK-LIBCALL-DAG: bl __aeabi_f2h
906+
define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 {
907+
%c = uitofp i32 %a to half
908+
%r = fadd half %b, %c
909+
ret half %r
910+
}
911+
912+
; CHECK-LABEL: test_sitofp_i32_fadd:
913+
; CHECK-VFP-DAG: vcvt.f32.s32
914+
; CHECK-NOVFP-DAG: bl __aeabi_i2f
915+
916+
; CHECK-FP16-DAG: vcvtb.f16.f32
917+
; CHECK-FP16-DAG: vcvtb.f32.f16
918+
; CHECK-LIBCALL-DAG: bl __aeabi_h2f
919+
; CHECK-LIBCALL-DAG: bl __aeabi_h2f
920+
921+
; CHECK-VFP-DAG: vadd.f32
922+
; CHECK-NOVFP-DAG: bl __aeabi_fadd
923+
924+
; CHECK-FP16-DAG: vcvtb.f16.f32
925+
; CHECK-LIBCALL-DAG: bl __aeabi_f2h
926+
define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 {
927+
%c = sitofp i32 %a to half
928+
%r = fadd half %b, %c
929+
ret half %r
930+
}
931+
892932
attributes #0 = { nounwind }

test/CodeGen/ARM/fp16-v3.ll

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
44
target triple = "armv7a--none-eabi"
55

66
; CHECK-LABEL: test_vec3:
7-
; CHECK: vcvtb.f32.f16
8-
; CHECK: vcvt.f32.s32
9-
; CHECK: vadd.f32
10-
; CHECK-NEXT: vcvtb.f16.f32 [[SREG:s[0-9]+]], {{.*}}
11-
; CHECK-NEXT: vmov [[RREG1:r[0-9]+]], [[SREG]]
7+
; CHECK-DAG: vcvtb.f32.f16 [[SREG1:s[0-9]+]],
8+
; CHECK-DAG: vcvt.f32.s32 [[SREG2:s[0-9]+]],
9+
; CHECK-DAG: vcvtb.f16.f32 [[SREG3:s[0-9]+]], [[SREG2]]
10+
; CHECK-DAG: vcvtb.f32.f16 [[SREG4:s[0-9]+]], [[SREG3]]
11+
; CHECK: vadd.f32 [[SREG5:s[0-9]+]], [[SREG4]], [[SREG1]]
12+
; CHECK-NEXT: vcvtb.f16.f32 [[SREG6:s[0-9]+]], [[SREG5]]
13+
; CHECK-NEXT: vmov [[RREG1:r[0-9]+]], [[SREG6]]
1214
; CHECK-NEXT: uxth [[RREG2:r[0-9]+]], [[RREG1]]
1315
; CHECK-NEXT: pkhbt [[RREG3:r[0-9]+]], [[RREG1]], [[RREG1]], lsl #16
1416
; CHECK-DAG: strh [[RREG1]], [r0, #4]

test/CodeGen/X86/half.ll

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,4 +279,38 @@ define half @test_f80trunc_nodagcombine() #0 {
279279
ret half %2
280280
}
281281

282+
; CHECK-LABEL: test_sitofp_fadd_i32:
283+
284+
; CHECK-LIBCALL-NEXT: pushq %rbx
285+
; CHECK-LIBCALL-NEXT: subq $16, %rsp
286+
; CHECK-LIBCALL-NEXT: movl %edi, %ebx
287+
; CHECK-LIBCALL-NEXT: movzwl (%rsi), %edi
288+
; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
289+
; CHECK-LIBCALL-NEXT: movss %xmm0, 12(%rsp)
290+
; CHECK-LIBCALL-NEXT: cvtsi2ssl %ebx, %xmm0
291+
; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
292+
; CHECK-LIBCALL-NEXT: movzwl %ax, %edi
293+
; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
294+
; CHECK-LIBCALL-NEXT: addss 12(%rsp), %xmm0
295+
; CHECK-LIBCALL-NEXT: addq $16, %rsp
296+
; CHECK-LIBCALL-NEXT: popq %rbx
297+
; CHECK-LIBCALL-NEXT: retq
298+
299+
; CHECK-F16C-NEXT: movswl (%rsi), %eax
300+
; CHECK-F16C-NEXT: vmovd %eax, %xmm0
301+
; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
302+
; CHECK-F16C-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm1
303+
; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1
304+
; CHECK-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
305+
; CHECK-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0
306+
; CHECK-F16C-NEXT: retq
307+
308+
define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 {
309+
%tmp0 = load half, half* %b
310+
%tmp1 = sitofp i32 %a to half
311+
%tmp2 = fadd half %tmp0, %tmp1
312+
%tmp3 = fpext half %tmp2 to float
313+
ret float %tmp3
314+
}
315+
282316
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)