Skip to content

Commit 06300c5

Browse files
committed
x86_64: rewrite unsafe scalar int multiplication
1 parent 5db585f commit 06300c5

File tree

8 files changed

+615
-150
lines changed

8 files changed

+615
-150
lines changed

src/arch/x86_64/CodeGen.zig

Lines changed: 385 additions & 5 deletions
Large diffs are not rendered by default.

src/arch/x86_64/Emit.zig

Lines changed: 46 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,32 @@ pub fn emitMir(emit: *Emit) Error!void {
8888
lowered_relocs[0].lowered_inst_index == lowered_index) : ({
8989
lowered_relocs = lowered_relocs[1..];
9090
}) switch (lowered_relocs[0].target) {
91-
.inst => |target| try relocs.append(emit.lower.allocator, .{
92-
.source = start_offset,
93-
.source_offset = end_offset - 4,
94-
.target = target,
95-
.target_offset = lowered_relocs[0].off,
96-
.length = @intCast(end_offset - start_offset),
97-
}),
91+
.inst => |target| {
92+
const inst_length: u4 = @intCast(end_offset - start_offset);
93+
const reloc_offset, const reloc_length = reloc_offset_length: {
94+
var reloc_offset = inst_length;
95+
var op_index: usize = lowered_inst.ops.len;
96+
while (true) {
97+
op_index -= 1;
98+
const op = lowered_inst.encoding.data.ops[op_index];
99+
if (op == .none) continue;
100+
const enc_length: u4 = @intCast(
101+
std.math.divCeil(u7, @intCast(op.immBitSize()), 8) catch unreachable,
102+
);
103+
reloc_offset -= enc_length;
104+
if (op_index == lowered_relocs[0].op_index)
105+
break :reloc_offset_length .{ reloc_offset, enc_length };
106+
}
107+
};
108+
try relocs.append(emit.lower.allocator, .{
109+
.inst_offset = start_offset,
110+
.inst_length = inst_length,
111+
.source_offset = reloc_offset,
112+
.source_length = reloc_length,
113+
.target = target,
114+
.target_offset = lowered_relocs[0].off,
115+
});
116+
},
98117
.table => try table_relocs.append(emit.lower.allocator, .{
99118
.source_offset = end_offset - 4,
100119
.target_offset = lowered_relocs[0].off,
@@ -409,7 +428,7 @@ pub fn emitMir(emit: *Emit) Error!void {
409428
} } };
410429
},
411430
.pseudo_dbg_local_am => loc: {
412-
const mem = emit.lower.mem(mir_inst.data.ax.payload);
431+
const mem = emit.lower.mem(undefined, mir_inst.data.ax.payload);
413432
break :loc .{ mir_inst.data.ax.air_inst, .{ .plus = .{
414433
base: {
415434
loc_buf[0] = switch (mem.base()) {
@@ -466,15 +485,18 @@ pub fn emitMir(emit: *Emit) Error!void {
466485
}
467486
}
468487
}
469-
{
470-
// TODO this function currently assumes all relocs via JMP/CALL instructions are 32bit in size.
471-
// This should be reversed like it is done in aarch64 MIR emit code: start with the smallest
472-
// possible resolution, i.e., 8bit, and iteratively converge on the minimum required resolution
473-
// until the entire decl is correctly emitted with all JMP/CALL instructions within range.
474-
for (relocs.items) |reloc| {
475-
const target = code_offset_mapping[reloc.target];
476-
const disp = @as(i64, @intCast(target)) - @as(i64, @intCast(reloc.source + reloc.length)) + reloc.target_offset;
477-
std.mem.writeInt(i32, emit.code.items[reloc.source_offset..][0..4], @intCast(disp), .little);
488+
for (relocs.items) |reloc| {
489+
const target = code_offset_mapping[reloc.target];
490+
const disp = @as(i64, @intCast(target)) - @as(i64, @intCast(reloc.inst_offset + reloc.inst_length)) + reloc.target_offset;
491+
const inst_bytes = emit.code.items[reloc.inst_offset..][0..reloc.inst_length];
492+
switch (reloc.source_length) {
493+
else => unreachable,
494+
inline 1, 4 => |source_length| std.mem.writeInt(
495+
@Type(.{ .int = .{ .signedness = .signed, .bits = @as(u16, 8) * source_length } }),
496+
inst_bytes[reloc.source_offset..][0..source_length],
497+
@intCast(disp),
498+
.little,
499+
),
478500
}
479501
}
480502
if (emit.lower.mir.table.len > 0) {
@@ -511,15 +533,17 @@ fn fail(emit: *Emit, comptime format: []const u8, args: anytype) Error {
511533

512534
const Reloc = struct {
513535
/// Offset of the instruction.
514-
source: u32,
536+
inst_offset: u32,
537+
/// Length of the instruction.
538+
inst_length: u4,
515539
/// Offset of the relocation within the instruction.
516-
source_offset: u32,
540+
source_offset: u4,
541+
/// Length of the relocation.
542+
source_length: u4,
517543
/// Target of the relocation.
518544
target: Mir.Inst.Index,
519-
/// Offset from the target instruction.
545+
/// Offset from the target.
520546
target_offset: i32,
521-
/// Length of the instruction.
522-
length: u5,
523547
};
524548

525549
const TableReloc = struct {

src/arch/x86_64/Encoding.zig

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -304,20 +304,20 @@ pub const Mnemonic = enum {
304304
jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, jrcxz, js, jz,
305305
lahf, lar, lea, leave, lfence, lgdt, lidt, lldt, lmsw, loop, loope, loopne,
306306
lods, lodsb, lodsd, lodsq, lodsw,
307-
lsl, ltr, lzcnt,
307+
lsl, ltr,
308308
mfence, mov, movbe,
309309
movs, movsb, movsd, movsq, movsw,
310310
movsx, movsxd, movzx, mul,
311311
neg, nop, not,
312312
@"or", out, outs, outsb, outsd, outsw,
313-
pause, pop, popcnt, popf, popfd, popfq, push, pushfq,
313+
pause, pop, popf, popfd, popfq, push, pushfq,
314314
rcl, rcr,
315315
rdfsbase, rdgsbase, rdmsr, rdpid, rdpkru, rdpmc, rdrand, rdseed, rdssd, rdssq, rdtsc, rdtscp,
316-
ret, rol, ror, rorx, rsm,
317-
sahf, sal, sar, sarx, sbb,
316+
ret, rol, ror, rsm,
317+
sahf, sal, sar, sbb,
318318
scas, scasb, scasd, scasq, scasw,
319319
senduipi, serialize,
320-
shl, shld, shlx, shr, shrd, shrx,
320+
shl, shld, shr, shrd,
321321
stac, stc, std, sti, str, stui,
322322
sub, swapgs, syscall, sysenter, sysexit, sysret,
323323
seta, setae, setb, setbe, setc, sete, setg, setge, setl, setle, setna, setnae,
@@ -433,14 +433,15 @@ pub const Mnemonic = enum {
433433
roundpd, roundps, roundsd, roundss,
434434
// SSE4.2
435435
crc32, pcmpgtq,
436+
// ABM
437+
lzcnt, popcnt,
436438
// PCLMUL
437439
pclmulqdq,
438440
// AES
439441
aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist,
440442
// SHA
441443
sha1rnds4, sha1nexte, sha1msg1, sha1msg2, sha256msg1, sha256msg2, sha256rnds2,
442444
// AVX
443-
andn, bextr, blsi, blsmsk, blsr, bzhi, tzcnt,
444445
vaddpd, vaddps, vaddsd, vaddss, vaddsubpd, vaddsubps,
445446
vaesdec, vaesdeclast, vaesenc, vaesenclast, vaesimc, vaeskeygenassist,
446447
vandnpd, vandnps, vandpd, vandps,
@@ -506,6 +507,10 @@ pub const Mnemonic = enum {
506507
vtestpd, vtestps,
507508
vucomisd, vucomiss, vunpckhpd, vunpckhps, vunpcklpd, vunpcklps,
508509
vxorpd, vxorps,
510+
// BMI
511+
andn, bextr, blsi, blsmsk, blsr, tzcnt,
512+
// BMI2
513+
bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx,
509514
// F16C
510515
vcvtph2ps, vcvtps2ph,
511516
// FMA

0 commit comments

Comments
 (0)