Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,7 @@ TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_tanh_bf16, "yy", "nc", "bf16-trans-insts")
TARGET_BUILTIN(__builtin_amdgcn_rcp_bf16, "yy", "nc", "bf16-trans-insts")
TARGET_BUILTIN(__builtin_amdgcn_rsq_bf16, "yy", "nc", "bf16-trans-insts")
TARGET_BUILTIN(__builtin_amdgcn_log_bf16, "yy", "nc", "bf16-trans-insts")

TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:
return EmitAMDGPUDispatchPtr(*this, E);
case AMDGPU::BI__builtin_amdgcn_logf:
case AMDGPU::BI__builtin_amdgcn_log_bf16:
return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);
case AMDGPU::BI__builtin_amdgcn_exp2f:
return emitBuiltinWithOneOverloadedType<1>(*this, E,
Expand Down
19 changes: 19 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,25 @@ void test_rsq_bf16(global __bf16* out, __bf16 a)
*out = __builtin_amdgcn_rsq_bf16(a);
}

// CHECK-LABEL: @test_log_bf16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store bfloat [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP1:%.*]] = call bfloat @llvm.amdgcn.log.bf16(bfloat [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store bfloat [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
// CHECK-NEXT: ret void
//
void test_log_bf16(global __bf16* out, __bf16 a)
{
*out = __builtin_amdgcn_log_bf16(a);
}

// CHECK-LABEL: @test_cvt_f16_fp8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,7 @@ defm V_TANH_BF16 : VOP1Inst_t16 <"v_tanh_bf16", VOP_BF16_BF16, int_amdgcn_tanh>;
defm V_RCP_BF16 : VOP1Inst_t16 <"v_rcp_bf16", VOP_BF16_BF16, AMDGPUrcp>;
defm V_SQRT_BF16 : VOP1Inst_t16 <"v_sqrt_bf16", VOP_BF16_BF16, any_amdgcn_sqrt>;
defm V_RSQ_BF16 : VOP1Inst_t16 <"v_rsq_bf16", VOP_BF16_BF16, AMDGPUrsq>;
defm V_LOG_BF16 : VOP1Inst_t16 <"v_log_bf16", VOP_BF16_BF16, AMDGPUlogf16>;
}
} // End TRANS = 1, SchedRW = [WriteTrans32]
defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
Expand Down Expand Up @@ -1143,6 +1144,7 @@ defm V_CVT_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x078>;
defm V_RCP_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x079>;
defm V_SQRT_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07a>;
defm V_RSQ_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07b>;
defm V_LOG_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07c>;

//===----------------------------------------------------------------------===//
// GFX10.
Expand Down
28 changes: 28 additions & 0 deletions llvm/test/CodeGen/AMDGPU/bf16-math.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s

; TODO: Add global-isel when it can support bf16

define amdgpu_ps void @llvm_log2_bf16_v(ptr addrspace(1) %out, bfloat %src) {
; GCN-LABEL: llvm_log2_bf16_v:
; GCN: ; %bb.0:
; GCN-NEXT: v_log_bf16_e32 v2, v2
; GCN-NEXT: global_store_b16 v[0:1], v2, off
; GCN-NEXT: s_endpgm
%log = call bfloat @llvm.log2.bf16(bfloat %src)
store bfloat %log, ptr addrspace(1) %out, align 2
ret void
}

define amdgpu_ps void @llvm_log2_bf16_s(ptr addrspace(1) %out, bfloat inreg %src) {
; GCN-LABEL: llvm_log2_bf16_s:
; GCN: ; %bb.0:
; GCN-NEXT: v_log_bf16_e32 v2, s0
; GCN-NEXT: global_store_b16 v[0:1], v2, off
; GCN-NEXT: s_endpgm
%log = call bfloat @llvm.log2.bf16(bfloat %src)
store bfloat %log, ptr addrspace(1) %out, align 2
ret void
}

declare bfloat @llvm.log2.bf16(bfloat)
33 changes: 33 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.bf16.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN %s
; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s

; FIXME: GlobalISel does not work with bf16

declare bfloat @llvm.amdgcn.log.bf16(bfloat) #0

; GCN-LABEL: {{^}}log_bf16:
; GCN: v_log_bf16_e32 {{v[0-9]+}}, {{s[0-9]+}}
define amdgpu_kernel void @log_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
%log = call bfloat @llvm.amdgcn.log.bf16(bfloat %src) #0
store bfloat %log, ptr addrspace(1) %out, align 2
ret void
}

; GCN-LABEL: {{^}}log_bf16_constant_4
; GCN: v_log_bf16_e32 v0, 4.0
define amdgpu_kernel void @log_bf16_constant_4(ptr addrspace(1) %out) #1 {
%log = call bfloat @llvm.amdgcn.log.bf16(bfloat 4.0) #0
store bfloat %log, ptr addrspace(1) %out, align 2
ret void
}

; GCN-LABEL: {{^}}log_bf16_constant_100
; GCN: v_log_bf16_e32 {{v[0-9]+}}, 0x42c8
define amdgpu_kernel void @log_bf16_constant_100(ptr addrspace(1) %out) #1 {
%log = call bfloat @llvm.amdgcn.log.bf16(bfloat 100.0) #0
store bfloat %log, ptr addrspace(1) %out, align 2
ret void
}

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
240 changes: 240 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.log2.bf16.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefix=GFX-SDAG-TRUE16 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefix=GFX-SDAG-FAKE16 %s
; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 %s -o - | FileCheck -check-prefix=GFX-GISEL-TRUE16 %s
; xUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 %s -o - | FileCheck -check-prefix=GFX-GISEL-FAKE16 %s

define bfloat @v_log2_bf16(bfloat %in) {
; GFX-SDAG-TRUE16-LABEL: v_log2_bf16:
; GFX-SDAG-TRUE16: ; %bb.0:
; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e32 v0.l, v0.l
; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX-SDAG-FAKE16-LABEL: v_log2_bf16:
; GFX-SDAG-FAKE16: ; %bb.0:
; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e32 v0, v0
; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%result = call bfloat @llvm.log2.bf16(bfloat %in)
ret bfloat %result
}

define bfloat @v_log2_fabs_bf16(bfloat %in) {
; GFX-SDAG-TRUE16-LABEL: v_log2_fabs_bf16:
; GFX-SDAG-TRUE16: ; %bb.0:
; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e64 v0.l, |v0.l|
; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX-SDAG-FAKE16-LABEL: v_log2_fabs_bf16:
; GFX-SDAG-FAKE16: ; %bb.0:
; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e64 v0, |v0|
; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%fabs = call bfloat @llvm.fabs.bf16(bfloat %in)
%result = call bfloat @llvm.log2.bf16(bfloat %fabs)
ret bfloat %result
}

define bfloat @v_log2_fneg_fabs_bf16(bfloat %in) {
; GFX-SDAG-TRUE16-LABEL: v_log2_fneg_fabs_bf16:
; GFX-SDAG-TRUE16: ; %bb.0:
; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e64 v0.l, -|v0.l|
; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX-SDAG-FAKE16-LABEL: v_log2_fneg_fabs_bf16:
; GFX-SDAG-FAKE16: ; %bb.0:
; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e64 v0, -|v0|
; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%fabs = call bfloat @llvm.fabs.bf16(bfloat %in)
%fneg.fabs = fneg bfloat %fabs
%result = call bfloat @llvm.log2.bf16(bfloat %fneg.fabs)
ret bfloat %result
}

define bfloat @v_log2_fneg_bf16(bfloat %in) {
; GFX-SDAG-TRUE16-LABEL: v_log2_fneg_bf16:
; GFX-SDAG-TRUE16: ; %bb.0:
; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e64 v0.l, -v0.l
; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX-SDAG-FAKE16-LABEL: v_log2_fneg_bf16:
; GFX-SDAG-FAKE16: ; %bb.0:
; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e64 v0, -v0
; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%fneg = fneg bfloat %in
%result = call bfloat @llvm.log2.bf16(bfloat %fneg)
ret bfloat %result
}

define bfloat @v_log2_bf16_fast(bfloat %in) {
; GFX-SDAG-TRUE16-LABEL: v_log2_bf16_fast:
; GFX-SDAG-TRUE16: ; %bb.0:
; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e32 v0.l, v0.l
; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX-SDAG-FAKE16-LABEL: v_log2_bf16_fast:
; GFX-SDAG-FAKE16: ; %bb.0:
; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e32 v0, v0
; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%result = call fast bfloat @llvm.log2.bf16(bfloat %in)
ret bfloat %result
}

define <2 x bfloat> @v_log2_v2bf16(<2 x bfloat> %in) {
; GFX-SDAG-TRUE16-LABEL: v_log2_v2bf16:
; GFX-SDAG-TRUE16: ; %bb.0:
; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e32 v0.h, v0.h
; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e32 v0.l, v0.l
; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX-SDAG-FAKE16-LABEL: v_log2_v2bf16:
; GFX-SDAG-FAKE16: ; %bb.0:
; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e32 v0, v0
; GFX-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e32 v1, v1
; GFX-SDAG-FAKE16-NEXT: v_nop
; GFX-SDAG-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%result = call <2 x bfloat> @llvm.log2.v2bf16(<2 x bfloat> %in)
ret <2 x bfloat> %result
}

define <2 x bfloat> @v_log2_fabs_v2bf16(<2 x bfloat> %in) {
; GFX-SDAG-TRUE16-LABEL: v_log2_fabs_v2bf16:
; GFX-SDAG-TRUE16: ; %bb.0:
; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
; GFX-SDAG-TRUE16-NEXT: v_bfe_u32 v2, v0, 16, 15
; GFX-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e32 v0.l, v1.l
; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e32 v0.h, v2.l
; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX-SDAG-FAKE16-LABEL: v_log2_fabs_v2bf16:
; GFX-SDAG-FAKE16: ; %bb.0:
; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
; GFX-SDAG-FAKE16-NEXT: v_bfe_u32 v0, v0, 16, 15
; GFX-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e32 v1, v1
; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e32 v0, v0
; GFX-SDAG-FAKE16-NEXT: v_nop
; GFX-SDAG-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%fabs = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> %in)
%result = call <2 x bfloat> @llvm.log2.v2bf16(<2 x bfloat> %fabs)
ret <2 x bfloat> %result
}

define <2 x bfloat> @v_log2_fneg_fabs_v2bf16(<2 x bfloat> %in) {
; GFX-SDAG-TRUE16-LABEL: v_log2_fneg_fabs_v2bf16:
; GFX-SDAG-TRUE16: ; %bb.0:
; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
; GFX-SDAG-TRUE16-NEXT: v_bfe_u32 v2, v0, 16, 15
; GFX-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e64 v0.l, -v1.l
; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e64 v0.h, -v2.l
; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX-SDAG-FAKE16-LABEL: v_log2_fneg_fabs_v2bf16:
; GFX-SDAG-FAKE16: ; %bb.0:
; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0
; GFX-SDAG-FAKE16-NEXT: v_bfe_u32 v0, v0, 16, 15
; GFX-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e64 v1, -v1
; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e64 v0, -v0
; GFX-SDAG-FAKE16-NEXT: v_nop
; GFX-SDAG-FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1)
; GFX-SDAG-FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%fabs = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> %in)
%fneg.fabs = fneg <2 x bfloat> %fabs
%result = call <2 x bfloat> @llvm.log2.v2bf16(<2 x bfloat> %fneg.fabs)
ret <2 x bfloat> %result
}

define <2 x bfloat> @v_log2_fneg_v2bf16(<2 x bfloat> %in) {
; GFX-SDAG-TRUE16-LABEL: v_log2_fneg_v2bf16:
; GFX-SDAG-TRUE16: ; %bb.0:
; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e64 v0.h, -v0.h
; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e64 v0.l, -v0.l
; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX-SDAG-FAKE16-LABEL: v_log2_fneg_v2bf16:
; GFX-SDAG-FAKE16: ; %bb.0:
; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e64 v0, -v0
; GFX-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e64 v1, -v1
; GFX-SDAG-FAKE16-NEXT: v_nop
; GFX-SDAG-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%fneg = fneg <2 x bfloat> %in
%result = call <2 x bfloat> @llvm.log2.v2bf16(<2 x bfloat> %fneg)
ret <2 x bfloat> %result
}

define <2 x bfloat> @v_log2_v2bf16_fast(<2 x bfloat> %in) {
; GFX-SDAG-TRUE16-LABEL: v_log2_v2bf16_fast:
; GFX-SDAG-TRUE16: ; %bb.0:
; GFX-SDAG-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e32 v0.h, v0.h
; GFX-SDAG-TRUE16-NEXT: v_log_bf16_e32 v0.l, v0.l
; GFX-SDAG-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GFX-SDAG-FAKE16-LABEL: v_log2_v2bf16_fast:
; GFX-SDAG-FAKE16: ; %bb.0:
; GFX-SDAG-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX-SDAG-FAKE16-NEXT: s_wait_kmcnt 0x0
; GFX-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e32 v0, v0
; GFX-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
; GFX-SDAG-FAKE16-NEXT: v_log_bf16_e32 v1, v1
; GFX-SDAG-FAKE16-NEXT: v_nop
; GFX-SDAG-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX-SDAG-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%result = call fast <2 x bfloat> @llvm.log2.v2bf16(<2 x bfloat> %in)
ret <2 x bfloat> %result
}

declare bfloat @llvm.log2.bf16(bfloat) #0
declare <2 x bfloat> @llvm.log2.v2bf16(<2 x bfloat>) #0
declare bfloat @llvm.fabs.bf16(bfloat) #0
declare <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat>) #0

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
Loading
Loading