-
Notifications
You must be signed in to change notification settings - Fork 15.1k
Description
| Bugzilla Link | 44835 |
| Resolution | FIXED |
| Resolved on | Feb 10, 2020 02:27 |
| Version | 10.0 |
| OS | Linux |
| Blocks | #43900 |
| CC | @zmodem,@jdm,@RKSimon |
| Fixed by commit(s) | 23db972 |
Extended Description
define void @test(i32* %p, i32* %p2) {
%v = load i32, i32* %p, align 4
%v2 = load i32, i32* %p2, align 4
%cmp = icmp ult i32 %v2, %v
%sel = select i1 %cmp, i32* %p2, i32* %p
%p8 = bitcast i32* %p to i8*
%sel8 = bitcast i32* %sel to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %p8, i8* align 4 %sel8, i64 4, i1 false)
ret void
}
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
attributes #0 = { argmemonly nounwind willreturn }
This loops on LLVM 10. It is fixed in master as a side-effect of 8058196.
InstCombine log:
INSTCOMBINE ITERATION #1 on test
IC: ADDING: 8 instrs to worklist
IC: Visiting: %v = load i32, i32* %p, align 4
IC: Visiting: %v2 = load i32, i32* %p2, align 4
IC: Visiting: %cmp = icmp ult i32 %v2, %v
IC: Visiting: %sel = select i1 %cmp, i32* %p2, i32* %p
IC: Visiting: %p8 = bitcast i32* %p to i8*
IC: Visiting: %sel8 = bitcast i32* %sel to i8*
IC: Visiting: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %p8, i8* align 4 %sel8, i64 4, i1 false)
IC: ADD: %1 = bitcast i8* %sel8 to i32*
IC: ADD: %2 = bitcast i8* %p8 to i32*
IC: ADD: %3 = load i32, i32* %1
IC: ADD: store i32 %3, i32* %2
IC: Mod = call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %p8, i8* align 4 %sel8, i64 4, i1 false)
New = call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %p8, i8* align 4 %sel8, i64 0, i1 false)
IC: ADD: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %p8, i8* align 4 %sel8, i64 0, i1 false)
IC: Visiting: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %p8, i8* align 4 %sel8, i64 0, i1 false)
IC: ERASE call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %p8, i8* align 4 %sel8, i64 0, i1 false)
IC: ADD: %p8 = bitcast i32* %p to i8*
IC: ADD: %sel8 = bitcast i32* %sel to i8*
IC: Visiting: %sel8 = bitcast i32* %sel to i8*
IC: Visiting: %p8 = bitcast i32* %p to i8*
IC: Visiting: store i32 %3, i32* %2, align 4
IC: ADD: %3 = load i32, i32* %1
IC: ADD: store i32 %3, i32* %2
IC: ADD: store i32 %4, i32* %2, align 4
IC: Replacing %4 = load i32, i32* %1, align 4
with i32 undef
IC: ERASE %4 = load i32, i32* %1, align 4
IC: ERASE store i32 undef, i32* %2, align 4
IC: Visiting: store i32 %3, i32* %2, align 4
IC: ADD: %3 = load i32, i32* %1
IC: ADD: store i32 %3, i32* %2
IC: ADD: store i32 %4, i32* %2, align 4
IC: Replacing %4 = load i32, i32* %1, align 4
with i32 undef
IC: ERASE %4 = load i32, i32* %1, align 4
IC: ERASE store i32 undef, i32* %2, align 4
The memcpy() transform leaves behind a redundant bitcast-of-bitcast, which (prior to worklist order fixes) never gets a chance to be cleaned up, because the removeBitcastsFromLoadStoreOnMinMax() transform goes into an infinite loop when it encounters this structure:
define void @test(i32* %p, i32* %p2) {
%v = load i32, i32* %p, align 4
%v2 = load i32, i32* %p2, align 4
%cmp = icmp ult i32 %v2, %v
%sel = select i1 %cmp, i32* %p2, i32* %p
%p8 = bitcast i32* %p to i8*
%sel8 = bitcast i32* %sel to i8*
%1 = bitcast i8* %sel8 to i32*
%2 = bitcast i8* %p8 to i32*
%3 = load i32, i32* %1, align 4
store i32 %3, i32* %2, align 4
ret void
}