Skip to content

Commit 3cabdab

Browse files
folkertdevsayantn
authored andcommitted
add funnel shift cranelift implementation
1 parent 4e5a6d1 commit 3cabdab

File tree

1 file changed

+60
-0
lines changed
  • compiler/rustc_codegen_cranelift/src/intrinsics

1 file changed

+60
-0
lines changed

compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,66 @@ fn codegen_regular_intrinsic_call<'tcx>(
655655
let res = fx.bcx.ins().rotr(x, y);
656656
ret.write_cvalue(fx, CValue::by_val(res, layout));
657657
}
658+
sym::funnel_shl => {
659+
intrinsic_args!(fx, args => (x, y, z); intrinsic);
660+
let layout = x.layout();
661+
662+
let width_bits = layout.size.bits() as u64;
663+
let width_bits = fx.bcx.ins().iconst(types::I32, width_bits as i64);
664+
665+
let lhs_bits = x.load_scalar(fx);
666+
let rhs_bits = y.load_scalar(fx);
667+
let raw_shift_bits = z.load_scalar(fx);
668+
669+
let ty = fx.bcx.func.dfg.value_type(lhs_bits);
670+
let zero = fx.bcx.ins().iconst(ty, 0);
671+
672+
let shift_bits = fx.bcx.ins().urem(raw_shift_bits, width_bits);
673+
674+
// lhs_bits << shift_bits
675+
let shl = fx.bcx.ins().ishl(lhs_bits, shift_bits);
676+
677+
let inv_shift_bits = fx.bcx.ins().isub(width_bits, shift_bits);
678+
679+
// rhs_bits.bounded_shr(inv_shift_bits)
680+
let inv_shift_bits_mod = fx.bcx.ins().urem(inv_shift_bits, width_bits);
681+
let shr = fx.bcx.ins().ushr(rhs_bits, inv_shift_bits_mod);
682+
let is_zero = fx.bcx.ins().icmp(IntCC::Equal, inv_shift_bits_mod, zero);
683+
let shr = fx.bcx.ins().select(is_zero, zero, shr);
684+
685+
let res = fx.bcx.ins().bor(shr, shl);
686+
ret.write_cvalue(fx, CValue::by_val(res, layout));
687+
}
688+
sym::funnel_shr => {
689+
intrinsic_args!(fx, args => (x, y, z); intrinsic);
690+
let layout = x.layout();
691+
692+
let width_bits = layout.size.bits() as u64;
693+
let width_bits = fx.bcx.ins().iconst(types::I32, width_bits as i64);
694+
695+
let lhs_bits = x.load_scalar(fx);
696+
let rhs_bits = y.load_scalar(fx);
697+
let raw_shift_bits = z.load_scalar(fx);
698+
699+
let ty = fx.bcx.func.dfg.value_type(lhs_bits);
700+
let zero = fx.bcx.ins().iconst(ty, 0);
701+
702+
let shift_bits = fx.bcx.ins().urem(raw_shift_bits, width_bits);
703+
704+
// rhs_bits >> shift_bits
705+
let shr = fx.bcx.ins().ushr(rhs_bits, shift_bits);
706+
707+
let inv_shift_bits = fx.bcx.ins().isub(width_bits, shift_bits);
708+
709+
// lhs_bits.bounded_shl(inv_shift_bits)
710+
let inv_shift_bits_mod = fx.bcx.ins().urem(inv_shift_bits, width_bits);
711+
let shl = fx.bcx.ins().ishl(lhs_bits, inv_shift_bits_mod);
712+
let is_zero = fx.bcx.ins().icmp(IntCC::Equal, inv_shift_bits_mod, zero);
713+
let shl = fx.bcx.ins().select(is_zero, zero, shl);
714+
715+
let res = fx.bcx.ins().bor(shr, shl);
716+
ret.write_cvalue(fx, CValue::by_val(res, layout));
717+
}
658718

659719
// The only difference between offset and arith_offset is regarding UB. Because Cranelift
660720
// doesn't have UB both are codegen'ed the same way

0 commit comments

Comments
 (0)