@@ -39,7 +39,7 @@ declare a function as a kernel function. This metadata is attached to the
3939
4040.. code-block :: llvm
4141
42- !0 = metadata !{<function-ref>, metadata !"kernel", i32 1}
42+ !0 = !{<function-ref>, metadata !"kernel", i32 1}
4343
4444 The first parameter is a reference to the kernel function. The following
4545example shows a kernel function calling a device function in LLVM IR. The
@@ -54,14 +54,14 @@ function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is not.
5454 }
5555
5656 define void @my_kernel(float* %ptr) {
57- %val = load float* %ptr
57+ %val = load float, float * %ptr
5858 %ret = call float @my_fmad(float %val, float %val, float %val)
5959 store float %ret, float* %ptr
6060 ret void
6161 }
6262
6363 !nvvm.annotations = !{!1}
64- !1 = metadata !{void (float*)* @my_kernel, metadata !"kernel", i32 1}
64+ !1 = !{void (float*)* @my_kernel, !"kernel", i32 1}
6565
6666 When compiled, the PTX kernel functions are callable by host-side code.
6767
@@ -446,13 +446,13 @@ The Kernel
446446 %id = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
447447
448448 ; Compute pointers into A, B, and C
449- %ptrA = getelementptr float addrspace(1)* %A, i32 %id
450- %ptrB = getelementptr float addrspace(1)* %B, i32 %id
451- %ptrC = getelementptr float addrspace(1)* %C, i32 %id
449+ %ptrA = getelementptr float, float addrspace(1)* %A, i32 %id
450+ %ptrB = getelementptr float, float addrspace(1)* %B, i32 %id
451+ %ptrC = getelementptr float, float addrspace(1)* %C, i32 %id
452452
453453 ; Read A, B
454- %valA = load float addrspace(1)* %ptrA, align 4
455- %valB = load float addrspace(1)* %ptrB, align 4
454+ %valA = load float, float addrspace(1)* %ptrA, align 4
455+ %valB = load float, float addrspace(1)* %ptrB, align 4
456456
457457 ; Compute C = A + B
458458 %valC = fadd float %valA, %valB
@@ -464,9 +464,9 @@ The Kernel
464464 }
465465
466466 !nvvm.annotations = !{!0}
467- !0 = metadata !{void (float addrspace(1)*,
468- float addrspace(1)*,
469- float addrspace(1)*)* @kernel, metadata !"kernel", i32 1}
467+ !0 = !{void (float addrspace(1)*,
468+ float addrspace(1)*,
469+ float addrspace(1)*)* @kernel, !"kernel", i32 1}
470470
471471
472472 We can use the LLVM ``llc `` tool to directly run the NVPTX code generator:
@@ -608,16 +608,16 @@ as a PTX `kernel` function. These metadata nodes take the form:
608608
609609.. code-block :: text
610610
611- metadata !{<function ref>, metadata !"kernel", i32 1}
611+ !{<function ref>, metadata !"kernel", i32 1}
612612
613613 For the previous example, we have:
614614
615615.. code-block :: llvm
616616
617617 !nvvm.annotations = !{!0}
618- !0 = metadata !{void (float addrspace(1)*,
619- float addrspace(1)*,
620- float addrspace(1)*)* @kernel, metadata !"kernel", i32 1}
618+ !0 = !{void (float addrspace(1)*,
619+ float addrspace(1)*,
620+ float addrspace(1)*)* @kernel, !"kernel", i32 1}
621621
622622 Here, we have a single metadata declaration in ``nvvm.annotations ``. This
623623metadata annotates our ``@kernel `` function with the ``kernel `` attribute.
@@ -830,13 +830,13 @@ Libdevice provides an ``__nv_powf`` function that we will use.
830830 %id = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
831831
832832 ; Compute pointers into A, B, and C
833- %ptrA = getelementptr float addrspace(1)* %A, i32 %id
834- %ptrB = getelementptr float addrspace(1)* %B, i32 %id
835- %ptrC = getelementptr float addrspace(1)* %C, i32 %id
833+ %ptrA = getelementptr float, float addrspace(1)* %A, i32 %id
834+ %ptrB = getelementptr float, float addrspace(1)* %B, i32 %id
835+ %ptrC = getelementptr float, float addrspace(1)* %C, i32 %id
836836
837837 ; Read A, B
838- %valA = load float addrspace(1)* %ptrA, align 4
839- %valB = load float addrspace(1)* %ptrB, align 4
838+ %valA = load float, float addrspace(1)* %ptrA, align 4
839+ %valB = load float, float addrspace(1)* %ptrB, align 4
840840
841841 ; Compute C = pow(A, B)
842842 %valC = call float @__nv_powf(float %valA, float %valB)
@@ -848,9 +848,9 @@ Libdevice provides an ``__nv_powf`` function that we will use.
848848 }
849849
850850 !nvvm.annotations = !{!0}
851- !0 = metadata !{void (float addrspace(1)*,
852- float addrspace(1)*,
853- float addrspace(1)*)* @kernel, metadata !"kernel", i32 1}
851+ !0 = !{void (float addrspace(1)*,
852+ float addrspace(1)*,
853+ float addrspace(1)*)* @kernel, !"kernel", i32 1}
854854
855855
856856 To compile this kernel, we perform the following steps:
0 commit comments