@@ -1082,3 +1082,237 @@ define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
10821082 %ret = select i1 %cmp , i32 %sel , i32 0
10831083 ret i32 %ret
10841084}
1085+
1086+ ; BTC/BT/BTS sequence on same i128
1087+ define i1 @sequence_i128 (ptr %word , i32 %pos0 , i32 %pos1 , i32 %pos2 ) nounwind {
1088+ ; X86-LABEL: sequence_i128:
1089+ ; X86: # %bb.0:
1090+ ; X86-NEXT: pushl %ebp
1091+ ; X86-NEXT: movl %esp, %ebp
1092+ ; X86-NEXT: pushl %ebx
1093+ ; X86-NEXT: pushl %edi
1094+ ; X86-NEXT: pushl %esi
1095+ ; X86-NEXT: andl $-16, %esp
1096+ ; X86-NEXT: subl $144, %esp
1097+ ; X86-NEXT: movb 20(%ebp), %ch
1098+ ; X86-NEXT: movb 12(%ebp), %cl
1099+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1100+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1101+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1102+ ; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
1103+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1104+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1105+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1106+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1107+ ; X86-NEXT: movl %ecx, %eax
1108+ ; X86-NEXT: shrb $3, %al
1109+ ; X86-NEXT: andb $12, %al
1110+ ; X86-NEXT: negb %al
1111+ ; X86-NEXT: movsbl %al, %eax
1112+ ; X86-NEXT: movl 56(%esp,%eax), %edx
1113+ ; X86-NEXT: movl 60(%esp,%eax), %esi
1114+ ; X86-NEXT: shldl %cl, %edx, %esi
1115+ ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1116+ ; X86-NEXT: movl 48(%esp,%eax), %edi
1117+ ; X86-NEXT: movl 52(%esp,%eax), %ebx
1118+ ; X86-NEXT: shldl %cl, %ebx, %edx
1119+ ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1120+ ; X86-NEXT: shldl %cl, %edi, %ebx
1121+ ; X86-NEXT: shll %cl, %edi
1122+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1123+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1124+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1125+ ; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
1126+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1127+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1128+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1129+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1130+ ; X86-NEXT: movb %ch, %al
1131+ ; X86-NEXT: shrb $3, %al
1132+ ; X86-NEXT: andb $12, %al
1133+ ; X86-NEXT: negb %al
1134+ ; X86-NEXT: movsbl %al, %eax
1135+ ; X86-NEXT: movl 84(%esp,%eax), %edx
1136+ ; X86-NEXT: movl 88(%esp,%eax), %esi
1137+ ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1138+ ; X86-NEXT: movzbl 20(%ebp), %ecx
1139+ ; X86-NEXT: shldl %cl, %edx, %esi
1140+ ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1141+ ; X86-NEXT: movl 80(%esp,%eax), %esi
1142+ ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1143+ ; X86-NEXT: movl 92(%esp,%eax), %eax
1144+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
1145+ ; X86-NEXT: shldl %cl, %esi, %eax
1146+ ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1147+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
1148+ ; X86-NEXT: movl %esi, %eax
1149+ ; X86-NEXT: shll %cl, %eax
1150+ ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1151+ ; X86-NEXT: shldl %cl, %esi, %edx
1152+ ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1153+ ; X86-NEXT: movl 8(%ebp), %eax
1154+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1155+ ; X86-NEXT: xorl 8(%eax), %edx
1156+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
1157+ ; X86-NEXT: xorl 12(%eax), %esi
1158+ ; X86-NEXT: xorl (%eax), %edi
1159+ ; X86-NEXT: xorl 4(%eax), %ebx
1160+ ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
1161+ ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
1162+ ; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
1163+ ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
1164+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1165+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1166+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1167+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1168+ ; X86-NEXT: movl 16(%ebp), %eax
1169+ ; X86-NEXT: # kill: def $al killed $al killed $eax
1170+ ; X86-NEXT: andb $96, %al
1171+ ; X86-NEXT: shrb $3, %al
1172+ ; X86-NEXT: movzbl %al, %eax
1173+ ; X86-NEXT: movl 96(%esp,%eax), %eax
1174+ ; X86-NEXT: movl 16(%ebp), %ecx
1175+ ; X86-NEXT: btl %ecx, %eax
1176+ ; X86-NEXT: setae %al
1177+ ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
1178+ ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
1179+ ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
1180+ ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
1181+ ; X86-NEXT: movl 8(%ebp), %ecx
1182+ ; X86-NEXT: movl %edx, 8(%ecx)
1183+ ; X86-NEXT: movl %esi, 12(%ecx)
1184+ ; X86-NEXT: movl %edi, (%ecx)
1185+ ; X86-NEXT: movl %ebx, 4(%ecx)
1186+ ; X86-NEXT: leal -12(%ebp), %esp
1187+ ; X86-NEXT: popl %esi
1188+ ; X86-NEXT: popl %edi
1189+ ; X86-NEXT: popl %ebx
1190+ ; X86-NEXT: popl %ebp
1191+ ; X86-NEXT: retl
1192+ ;
1193+ ; SSE-LABEL: sequence_i128:
1194+ ; SSE: # %bb.0:
1195+ ; SSE-NEXT: movl %ecx, %eax
1196+ ; SSE-NEXT: movl %esi, %ecx
1197+ ; SSE-NEXT: movl $1, %r8d
1198+ ; SSE-NEXT: xorl %esi, %esi
1199+ ; SSE-NEXT: shldq %cl, %r8, %rsi
1200+ ; SSE-NEXT: movl $1, %r9d
1201+ ; SSE-NEXT: shlq %cl, %r9
1202+ ; SSE-NEXT: xorl %r11d, %r11d
1203+ ; SSE-NEXT: testb $64, %cl
1204+ ; SSE-NEXT: cmovneq %r9, %rsi
1205+ ; SSE-NEXT: cmovneq %r11, %r9
1206+ ; SSE-NEXT: xorl %r10d, %r10d
1207+ ; SSE-NEXT: movl %eax, %ecx
1208+ ; SSE-NEXT: shldq %cl, %r8, %r10
1209+ ; SSE-NEXT: shlq %cl, %r8
1210+ ; SSE-NEXT: testb $64, %al
1211+ ; SSE-NEXT: cmovneq %r8, %r10
1212+ ; SSE-NEXT: cmovneq %r11, %r8
1213+ ; SSE-NEXT: xorq 8(%rdi), %rsi
1214+ ; SSE-NEXT: xorq (%rdi), %r9
1215+ ; SSE-NEXT: movl %edx, %ecx
1216+ ; SSE-NEXT: andb $32, %cl
1217+ ; SSE-NEXT: movq %r9, %rax
1218+ ; SSE-NEXT: shrdq %cl, %rsi, %rax
1219+ ; SSE-NEXT: movq %rsi, %r11
1220+ ; SSE-NEXT: shrq %cl, %r11
1221+ ; SSE-NEXT: testb $64, %dl
1222+ ; SSE-NEXT: cmoveq %rax, %r11
1223+ ; SSE-NEXT: btl %edx, %r11d
1224+ ; SSE-NEXT: setae %al
1225+ ; SSE-NEXT: orq %r10, %rsi
1226+ ; SSE-NEXT: orq %r8, %r9
1227+ ; SSE-NEXT: movq %r9, (%rdi)
1228+ ; SSE-NEXT: movq %rsi, 8(%rdi)
1229+ ; SSE-NEXT: retq
1230+ ;
1231+ ; AVX2-LABEL: sequence_i128:
1232+ ; AVX2: # %bb.0:
1233+ ; AVX2-NEXT: movl %ecx, %eax
1234+ ; AVX2-NEXT: movl %esi, %ecx
1235+ ; AVX2-NEXT: xorl %r9d, %r9d
1236+ ; AVX2-NEXT: movl $1, %r10d
1237+ ; AVX2-NEXT: xorl %esi, %esi
1238+ ; AVX2-NEXT: shldq %cl, %r10, %rsi
1239+ ; AVX2-NEXT: shlxq %rcx, %r10, %r8
1240+ ; AVX2-NEXT: testb $64, %cl
1241+ ; AVX2-NEXT: cmovneq %r8, %rsi
1242+ ; AVX2-NEXT: cmovneq %r9, %r8
1243+ ; AVX2-NEXT: xorl %r11d, %r11d
1244+ ; AVX2-NEXT: movl %eax, %ecx
1245+ ; AVX2-NEXT: shldq %cl, %r10, %r11
1246+ ; AVX2-NEXT: shlxq %rax, %r10, %r10
1247+ ; AVX2-NEXT: testb $64, %al
1248+ ; AVX2-NEXT: cmovneq %r10, %r11
1249+ ; AVX2-NEXT: cmovneq %r9, %r10
1250+ ; AVX2-NEXT: xorq 8(%rdi), %rsi
1251+ ; AVX2-NEXT: xorq (%rdi), %r8
1252+ ; AVX2-NEXT: movl %edx, %ecx
1253+ ; AVX2-NEXT: andb $32, %cl
1254+ ; AVX2-NEXT: movq %r8, %rax
1255+ ; AVX2-NEXT: shrdq %cl, %rsi, %rax
1256+ ; AVX2-NEXT: shrxq %rcx, %rsi, %rcx
1257+ ; AVX2-NEXT: testb $64, %dl
1258+ ; AVX2-NEXT: cmoveq %rax, %rcx
1259+ ; AVX2-NEXT: btl %edx, %ecx
1260+ ; AVX2-NEXT: setae %al
1261+ ; AVX2-NEXT: orq %r11, %rsi
1262+ ; AVX2-NEXT: orq %r10, %r8
1263+ ; AVX2-NEXT: movq %r8, (%rdi)
1264+ ; AVX2-NEXT: movq %rsi, 8(%rdi)
1265+ ; AVX2-NEXT: retq
1266+ ;
1267+ ; AVX512-LABEL: sequence_i128:
1268+ ; AVX512: # %bb.0:
1269+ ; AVX512-NEXT: movl %ecx, %eax
1270+ ; AVX512-NEXT: movl %esi, %ecx
1271+ ; AVX512-NEXT: movl $1, %r9d
1272+ ; AVX512-NEXT: xorl %esi, %esi
1273+ ; AVX512-NEXT: shldq %cl, %r9, %rsi
1274+ ; AVX512-NEXT: xorl %r10d, %r10d
1275+ ; AVX512-NEXT: shlxq %rcx, %r9, %r8
1276+ ; AVX512-NEXT: testb $64, %cl
1277+ ; AVX512-NEXT: cmovneq %r8, %rsi
1278+ ; AVX512-NEXT: cmovneq %r10, %r8
1279+ ; AVX512-NEXT: xorl %r11d, %r11d
1280+ ; AVX512-NEXT: movl %eax, %ecx
1281+ ; AVX512-NEXT: shldq %cl, %r9, %r11
1282+ ; AVX512-NEXT: shlxq %rax, %r9, %r9
1283+ ; AVX512-NEXT: testb $64, %al
1284+ ; AVX512-NEXT: cmovneq %r9, %r11
1285+ ; AVX512-NEXT: cmovneq %r10, %r9
1286+ ; AVX512-NEXT: xorq 8(%rdi), %rsi
1287+ ; AVX512-NEXT: xorq (%rdi), %r8
1288+ ; AVX512-NEXT: movl %edx, %ecx
1289+ ; AVX512-NEXT: andb $32, %cl
1290+ ; AVX512-NEXT: movq %r8, %rax
1291+ ; AVX512-NEXT: shrdq %cl, %rsi, %rax
1292+ ; AVX512-NEXT: shrxq %rcx, %rsi, %rcx
1293+ ; AVX512-NEXT: testb $64, %dl
1294+ ; AVX512-NEXT: cmoveq %rax, %rcx
1295+ ; AVX512-NEXT: btl %edx, %ecx
1296+ ; AVX512-NEXT: setae %al
1297+ ; AVX512-NEXT: orq %r11, %rsi
1298+ ; AVX512-NEXT: orq %r9, %r8
1299+ ; AVX512-NEXT: movq %r8, (%rdi)
1300+ ; AVX512-NEXT: movq %rsi, 8(%rdi)
1301+ ; AVX512-NEXT: retq
1302+ %rem0 = and i32 %pos0 , 127
1303+ %rem1 = and i32 %pos1 , 127
1304+ %rem2 = and i32 %pos2 , 127
1305+ %ofs0 = zext nneg i32 %rem0 to i128
1306+ %ofs1 = zext nneg i32 %rem1 to i128
1307+ %ofs2 = zext nneg i32 %rem2 to i128
1308+ %bit0 = shl nuw i128 1 , %ofs0
1309+ %bit1 = shl nuw i128 1 , %ofs1
1310+ %bit2 = shl nuw i128 1 , %ofs2
1311+ %ld = load i128 , ptr %word
1312+ %res0 = xor i128 %ld , %bit0
1313+ %test1 = and i128 %res0 , %bit1
1314+ %cmp1 = icmp eq i128 %test1 , 0
1315+ %res2 = or i128 %res0 , %bit2
1316+ store i128 %res2 , ptr %word
1317+ ret i1 %cmp1
1318+ }
0 commit comments