diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll index cc3dcf32ac0eb..06e7d4773c58d 100644 --- a/llvm/test/CodeGen/X86/bittest-big-integer.ll +++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll @@ -1676,3 +1676,291 @@ define i1 @test_ne_i4096(ptr %word, i32 %position) nounwind { %cmp = icmp ne i4096 %test, 0 ret i1 %cmp } + +; Special Cases + +; Multiple uses of the stored value +define i1 @complement_cmpz_i128(ptr %word, i32 %position) nounwind { +; X86-LABEL: complement_cmpz_i128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $64, %esp +; X86-NEXT: movzbl 12(%ebp), %ecx +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %esi +; X86-NEXT: movl 36(%esp,%esi), %eax +; X86-NEXT: movl 40(%esp,%esi), %edi +; X86-NEXT: movl %edi, %edx +; X86-NEXT: shldl %cl, %eax, %edx +; X86-NEXT: movl 32(%esp,%esi), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%esp,%esi), %esi +; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl %cl, %ebx, %eax +; X86-NEXT: movl 8(%ebp), %ecx +; X86-NEXT: xorl 12(%ecx), %esi +; X86-NEXT: xorl 8(%ecx), %edx +; X86-NEXT: xorl 4(%ecx), %eax +; X86-NEXT: xorl (%ecx), %edi +; X86-NEXT: movl %edx, 8(%ecx) +; X86-NEXT: movl %esi, 12(%ecx) +; X86-NEXT: movl %edi, (%ecx) +; X86-NEXT: movl %eax, 4(%ecx) +; X86-NEXT: orl %esi, %eax +; X86-NEXT: orl %edx, %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: setne %al +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; SSE-LABEL: complement_cmpz_i128: +; SSE: # %bb.0: +; SSE-NEXT: movl %esi, %ecx +; SSE-NEXT: movl $1, %eax +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: shldq %cl, %rax, %rdx +; SSE-NEXT: shlq %cl, %rax +; SSE-NEXT: xorl %esi, %esi +; SSE-NEXT: testb $64, %cl +; SSE-NEXT: cmovneq %rax, %rdx +; SSE-NEXT: cmovneq %rsi, %rax +; SSE-NEXT: xorq 8(%rdi), %rdx +; SSE-NEXT: xorq (%rdi), %rax +; SSE-NEXT: movq %rax, (%rdi) +; SSE-NEXT: movq %rdx, 8(%rdi) +; SSE-NEXT: orq %rdx, %rax +; SSE-NEXT: setne %al +; SSE-NEXT: retq +; +; AVX2-LABEL: complement_cmpz_i128: +; AVX2: # %bb.0: +; AVX2-NEXT: movl %esi, %ecx +; AVX2-NEXT: movl $1, %eax +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: shldq %cl, %rax, %rdx +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: shlxq %rcx, %rax, %rax +; AVX2-NEXT: testb $64, %cl +; AVX2-NEXT: cmovneq %rax, %rdx +; AVX2-NEXT: cmovneq %rsi, %rax +; AVX2-NEXT: xorq 8(%rdi), %rdx +; AVX2-NEXT: xorq (%rdi), %rax +; AVX2-NEXT: movq %rax, (%rdi) +; AVX2-NEXT: movq %rdx, 8(%rdi) +; AVX2-NEXT: orq %rdx, %rax +; AVX2-NEXT: setne %al +; AVX2-NEXT: retq +; +; AVX512-LABEL: complement_cmpz_i128: +; AVX512: # %bb.0: +; AVX512-NEXT: movl %esi, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: movl $1, %edx +; AVX512-NEXT: xorl %esi, %esi +; AVX512-NEXT: shldq %cl, %rdx, %rsi +; AVX512-NEXT: shlxq %rcx, %rdx, %rdx +; AVX512-NEXT: testb $64, %cl +; AVX512-NEXT: cmovneq %rdx, %rsi +; AVX512-NEXT: cmovneq %rax, %rdx +; AVX512-NEXT: xorq 8(%rdi), %rsi +; AVX512-NEXT: xorq (%rdi), %rdx +; AVX512-NEXT: movq %rdx, (%rdi) +; AVX512-NEXT: movq %rsi, 8(%rdi) +; AVX512-NEXT: orq %rsi, %rdx +; AVX512-NEXT: setne %al +; AVX512-NEXT: retq + %rem = and i32 %position, 127 + %ofs = zext nneg i32 %rem to i128 + %bit = shl nuw i128 1, %ofs + %ld = load i128, ptr %word + %res = xor i128 %ld, %bit + store i128 %res, ptr %word + %cmp = icmp ne i128 %res, 0 + ret i1 %cmp +} + +; Multiple loads in store chain +define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind { +; X86-LABEL: reset_multiload_i128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $64, %esp +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %edi +; X86-NEXT: movl 36(%esp,%edi), %edx +; X86-NEXT: movl 40(%esp,%edi), %ebx +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: shldl %cl, %edx, %esi +; X86-NEXT: movl 32(%esp,%edi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%esp,%edi), %edi +; X86-NEXT: shldl %cl, %ebx, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %ebx +; X86-NEXT: notl %ebx +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl (%eax), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: andl $96, %eax +; X86-NEXT: shrl $3, %eax +; X86-NEXT: movl 8(%ebp), %ecx +; X86-NEXT: movl (%ecx,%eax), %eax +; X86-NEXT: andl %ebx, (%ecx) +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl %cl, %ebx, %edx +; X86-NEXT: notl %edx +; X86-NEXT: movl 8(%ebp), %ebx +; X86-NEXT: andl %edx, 4(%ebx) +; X86-NEXT: notl %esi +; X86-NEXT: andl %esi, 8(%ebx) +; X86-NEXT: notl %edi +; X86-NEXT: andl %edi, 12(%ebx) +; X86-NEXT: btl %ecx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: jae .LBB22_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: .LBB22_2: +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; SSE-LABEL: reset_multiload_i128: +; SSE: # %bb.0: +; SSE-NEXT: movl %esi, %ecx +; SSE-NEXT: movl $1, %esi +; SSE-NEXT: xorl %r8d, %r8d +; SSE-NEXT: shldq %cl, %rsi, %r8 +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: shlq %cl, %rsi +; SSE-NEXT: testb $64, %cl +; SSE-NEXT: cmovneq %rsi, %r8 +; SSE-NEXT: cmovneq %rax, %rsi +; SSE-NEXT: notq %r8 +; SSE-NEXT: notq %rsi +; SSE-NEXT: movl %ecx, %r9d +; SSE-NEXT: andl $96, %r9d +; SSE-NEXT: shrl $3, %r9d +; SSE-NEXT: movl (%rdi,%r9), %r9d +; SSE-NEXT: btl %ecx, %r9d +; SSE-NEXT: jb .LBB22_2 +; SSE-NEXT: # %bb.1: +; SSE-NEXT: movl (%rdx), %eax +; SSE-NEXT: .LBB22_2: +; SSE-NEXT: andq %r8, 8(%rdi) +; SSE-NEXT: andq %rsi, (%rdi) +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: reset_multiload_i128: +; AVX2: # %bb.0: +; AVX2-NEXT: movl %esi, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: movl $1, %r8d +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: shldq %cl, %r8, %rsi +; AVX2-NEXT: shlxq %rcx, %r8, %r8 +; AVX2-NEXT: testb $64, %cl +; AVX2-NEXT: cmovneq %r8, %rsi +; AVX2-NEXT: cmovneq %rax, %r8 +; AVX2-NEXT: notq %rsi +; AVX2-NEXT: notq %r8 +; AVX2-NEXT: movl %ecx, %r9d +; AVX2-NEXT: andl $96, %r9d +; AVX2-NEXT: shrl $3, %r9d +; AVX2-NEXT: movl (%rdi,%r9), %r9d +; AVX2-NEXT: btl %ecx, %r9d +; AVX2-NEXT: jb .LBB22_2 +; AVX2-NEXT: # %bb.1: +; AVX2-NEXT: movl (%rdx), %eax +; AVX2-NEXT: .LBB22_2: +; AVX2-NEXT: andq %rsi, 8(%rdi) +; AVX2-NEXT: andq %r8, (%rdi) +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: reset_multiload_i128: +; AVX512: # %bb.0: +; AVX512-NEXT: movl %esi, %ecx +; AVX512-NEXT: movl $1, %r8d +; AVX512-NEXT: xorl %esi, %esi +; AVX512-NEXT: shldq %cl, %r8, %rsi +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: shlxq %rcx, %r8, %r8 +; AVX512-NEXT: testb $64, %cl +; AVX512-NEXT: cmovneq %r8, %rsi +; AVX512-NEXT: cmovneq %rax, %r8 +; AVX512-NEXT: notq %rsi +; AVX512-NEXT: notq %r8 +; AVX512-NEXT: movl %ecx, %r9d +; AVX512-NEXT: andl $96, %r9d +; AVX512-NEXT: shrl $3, %r9d +; AVX512-NEXT: movl (%rdi,%r9), %r9d +; AVX512-NEXT: btl %ecx, %r9d +; AVX512-NEXT: jb .LBB22_2 +; AVX512-NEXT: # %bb.1: +; AVX512-NEXT: movl (%rdx), %eax +; AVX512-NEXT: .LBB22_2: +; AVX512-NEXT: andq %rsi, 8(%rdi) +; AVX512-NEXT: andq %r8, (%rdi) +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %rem = and i32 %position, 127 + %ofs = zext nneg i32 %rem to i128 + %bit = shl nuw i128 1, %ofs + %mask = xor i128 %bit, -1 + %ld = load i128, ptr %word + %sel = load i32, ptr %p + %test = and i128 %ld, %bit + %res = and i128 %ld, %mask + %cmp = icmp eq i128 %test, 0 + store i128 %res, ptr %word + %ret = select i1 %cmp, i32 %sel, i32 0 + ret i32 %ret +}