11
22# FPGA lsu
33
4- The Intel FPGA ` lsu ` class is implemented in ` CL/sycl/intel /fpga_lsu.hpp ` which
5- is included in ` CL/sycl/intel /fpga_extensions.hpp ` .
4+ The Intel FPGA ` lsu ` class is implemented in ` CL/sycl/INTEL /fpga_lsu.hpp ` which
5+ is included in ` CL/sycl/INTEL /fpga_extensions.hpp ` .
66
7- The class ` cl::sycl::intel ::lsu ` allows users to explicitly request that the
7+ The class ` cl::sycl::INTEL ::lsu ` allows users to explicitly request that the
88implementation of a global memory access is configured in a certain way. The
99class has two member functions, ` load() ` and ` store() ` which allow loading from
1010and storing to a ` global_ptr ` , respectively, and is templated on the following
11114 optional paremeters:
1212
13- 1 . ** ` cl::sycl::intel ::burst_coalesce<B> ` , where ` B ` is a boolean** : request,
13+ 1 . ** ` cl::sycl::INTEL ::burst_coalesce<B> ` , where ` B ` is a boolean** : request,
1414to the extent possible, that a dynamic burst coalescer be implemented when
1515` load ` or ` store ` are called. The default value of this parameter is ` false ` .
16- 2 . ** ` cl::sycl::intel ::cache<N> ` , where ` N ` is an integer greater or equal to
16+ 2 . ** ` cl::sycl::INTEL ::cache<N> ` , where ` N ` is an integer greater or equal to
17170** : request, to the extent possible, that a read-only cache of the specified
1818size in bytes be implemented when when ` load ` is called. It is not allowed to
1919use that parameter for ` store ` . The default value of this parameter is ` 0 ` .
20- 3 . ** ` cl::sycl::intel ::statically_coalesce<N > ` , where ` B ` is a boolean** :
20+ 3 . ** ` cl::sycl::INTEL ::statically_coalesce<B > ` , where ` B ` is a boolean** :
2121request, to the extent possible, that ` load ` or ` store ` accesses, is allowed to
2222be statically coalesced with other memory accesses at compile time. The default
2323value of this parameter is ` true ` .
24- 4 . ** ` cl::sycl::intel ::prefetch<B> ` , where ` N ` is a boolean** : request, to the
24+ 4 . ** ` cl::sycl::INTEL ::prefetch<B> ` , where ` B ` is a boolean** : request, to the
2525extent possible, that a prefetcher be implemented when ` load ` is called. It is
2626not allowed to use that parameter for ` store ` . The default value of this
2727parameter is ` false ` .
2828
2929Currently, not every combination of parameters is allowed due to limitations in
3030the backend. The following rules apply:
31- 1 . For ` store ` , ` cl::sycl::intel ::cache ` must be ` 0 ` and
32- ` cl::sycl::intel ::prefetch ` must be ` false ` .
33- 2 . For ` load ` , if ` cl::sycl::intel ::cache ` is set to a value greater than ` 0 ` ,
34- then ` cl::sycl::intel ::burst_coalesce ` must be set to ` true ` .
35- 3 . For ` load ` , exactly one of ` cl::sycl::intel ::prefetch ` and
36- ` cl::sycl::intel ::burst_coalesce ` is allowed to be ` true ` .
37- 4 . For ` load ` , exactly one of ` cl::sycl::intel ::prefetch ` and
38- ` cl::sycl::intel ::cache ` is allowed to be ` true ` .
31+ 1 . For ` store ` , ` cl::sycl::INTEL ::cache ` must be ` 0 ` and
32+ ` cl::sycl::INTEL ::prefetch ` must be ` false ` .
33+ 2 . For ` load ` , if ` cl::sycl::INTEL ::cache ` is set to a value greater than ` 0 ` ,
34+ then ` cl::sycl::INTEL ::burst_coalesce ` must be set to ` true ` .
35+ 3 . For ` load ` , exactly one of ` cl::sycl::INTEL ::prefetch ` and
36+ ` cl::sycl::INTEL ::burst_coalesce ` is allowed to be ` true ` .
37+ 4 . For ` load ` , exactly one of ` cl::sycl::INTEL ::prefetch ` and
38+ ` cl::sycl::INTEL ::cache ` is allowed to be ` true ` .
3939
4040## Implementation
4141
@@ -47,7 +47,7 @@ template <class... mem_access_params> class lsu final {
4747public:
4848 lsu() = delete;
4949
50- template <typename T > static T & load(sycl::global_ptr<T > Ptr) {
50+ template <typename T > static T load(sycl::global_ptr<T > Ptr) {
5151 check_load();
5252#if defined(__ SYCL_DEVICE_ONLY__ ) && __ has_builtin(__ builtin_intel_fpga_mem)
5353 return * __ builtin_intel_fpga_mem((T * )Ptr,
@@ -77,7 +77,7 @@ public:
7777## Usage
7878
7979```c++
80- #include <CL/sycl/intel /fpga_extensions.hpp>
80+ #include <CL/sycl/INTEL /fpga_extensions.hpp>
8181...
8282cl::sycl::buffer<int, 1> output_buffer(output_data, 1);
8383cl::sycl::buffer<int, 1> input_buffer(input_data, 1);
@@ -91,19 +91,19 @@ Queue.submit([&](cl::sycl::handler &cgh) {
9191 auto output_ptr = output_accessor.get_pointer();
9292
9393 using PrefetchingLSU =
94- cl::sycl::intel ::lsu<cl::sycl::intel ::prefetch<true>,
95- cl::sycl::intel ::statically_coalesce<false>>;
94+ cl::sycl::INTEL ::lsu<cl::sycl::INTEL ::prefetch<true>,
95+ cl::sycl::INTEL ::statically_coalesce<false>>;
9696
9797 using BurstCoalescedLSU =
98- cl::sycl::intel ::lsu<cl::sycl::intel ::burst_coalesce<false>,
99- cl::sycl::intel ::statically_coalesce<false>>;
98+ cl::sycl::INTEL ::lsu<cl::sycl::INTEL ::burst_coalesce<false>,
99+ cl::sycl::INTEL ::statically_coalesce<false>>;
100100
101101 using CachingLSU =
102- cl::sycl::intel ::lsu<cl::sycl::intel ::burst_coalesce<true>,
103- cl::sycl::intel ::cache<1024>,
104- cl::sycl::intel ::statically_coalesce<true>>;
102+ cl::sycl::INTEL ::lsu<cl::sycl::INTEL ::burst_coalesce<true>,
103+ cl::sycl::INTEL ::cache<1024>,
104+ cl::sycl::INTEL ::statically_coalesce<true>>;
105105
106- using PipelinedLSU = cl::sycl::intel ::lsu<>;
106+ using PipelinedLSU = cl::sycl::INTEL ::lsu<>;
107107
108108 int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0]
109109 int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1]
0 commit comments