1- // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out %threads_lib -lOpenCL
2- // RUN: %CPU_RUN_PLACEHOLDER %t.out
3- // RUN: %GPU_RUN_PLACEHOLDER %t.out
4- // RUN: %ACC_RUN_PLACEHOLDER %t.out
1+ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+ // RUN: %CPU_RUN_PLACEHOLDER %t.out 1
3+ // RUN: %GPU_RUN_PLACEHOLDER %t.out 1
4+ // RUN: %ACC_RUN_PLACEHOLDER %t.out 1
5+
6+ // RUN: %CPU_RUN_PLACEHOLDER %t.out 2
7+ // RUN: %GPU_RUN_PLACEHOLDER %t.out 2
8+ // RUN: %ACC_RUN_PLACEHOLDER %t.out 2
9+
10+ // RUN: %CPU_RUN_PLACEHOLDER %t.out 3
11+ // RUN: %GPU_RUN_PLACEHOLDER %t.out 3
12+ // RUN: %ACC_RUN_PLACEHOLDER %t.out 3
13+
14+ // RUNx: %CPU_RUN_PLACEHOLDER %t.out 4
15+ // RUNx: %GPU_RUN_PLACEHOLDER %t.out 4
16+ // RUNx: %ACC_RUN_PLACEHOLDER %t.out 4
517
618#include < CL/sycl.hpp>
19+ #include < chrono>
20+ #include < iostream>
21+ #include < vector>
722
823using namespace cl ::sycl;
924using namespace cl ::sycl::access;
1025
1126static constexpr size_t BUFFER_SIZE = 1024 ;
1227
28+ static auto EH = [](exception_list EL) {
29+ for (const std::exception_ptr &E : EL) {
30+ throw E;
31+ }
32+ };
33+
1334// Check that a single host-task with a buffer will work
1435void test1 () {
1536 buffer<int , 1 > Buffer{BUFFER_SIZE};
1637
17- queue Q;
38+ queue Q (EH) ;
1839
1940 Q.submit ([&](handler &CGH) {
2041 auto Acc = Buffer.get_access <mode::write>(CGH);
2142 CGH.codeplay_host_task ([=] {
2243 // A no-op
2344 });
2445 });
46+
47+ Q.wait_and_throw ();
2548}
2649
50+ // Check that a host task after the kernel (deps via buffer) will work
2751void test2 () {
2852 buffer<int , 1 > Buffer1{BUFFER_SIZE};
2953 buffer<int , 1 > Buffer2{BUFFER_SIZE};
3054
31- queue Q;
55+ queue Q (EH) ;
3256
3357 Q.submit ([&](handler &CGH) {
3458 auto Acc = Buffer1.template get_access <mode::write>(CGH);
@@ -55,10 +79,163 @@ void test2() {
5579 assert (Acc[Idx] == 123 );
5680 }
5781 }
82+
83+ Q.wait_and_throw ();
84+ }
85+
86+ // Host-task depending on another host-task via both buffers and
87+ // handler::depends_on() should not hang
88+ void test3 () {
89+ queue Q (EH);
90+
91+ static constexpr size_t BufferSize = 10 * 1024 ;
92+
93+ buffer<int , 1 > B0{range<1 >{BufferSize}};
94+ buffer<int , 1 > B1{range<1 >{BufferSize}};
95+ buffer<int , 1 > B2{range<1 >{BufferSize}};
96+ buffer<int , 1 > B3{range<1 >{BufferSize}};
97+ buffer<int , 1 > B4{range<1 >{BufferSize}};
98+ buffer<int , 1 > B5{range<1 >{BufferSize}};
99+ buffer<int , 1 > B6{range<1 >{BufferSize}};
100+ buffer<int , 1 > B7{range<1 >{BufferSize}};
101+ buffer<int , 1 > B8{range<1 >{BufferSize}};
102+ buffer<int , 1 > B9{range<1 >{BufferSize}};
103+
104+ std::vector<event> Deps;
105+
106+ using namespace std ::chrono_literals;
107+ static constexpr size_t Count = 10 ;
108+
109+ auto Start = std::chrono::steady_clock::now ();
110+ for (size_t Idx = 0 ; Idx < Count; ++Idx) {
111+ event E = Q.submit ([&](handler &CGH) {
112+ CGH.depends_on (Deps);
113+
114+ std::cout << " Submit: " << Idx << std::endl;
115+
116+ auto Acc0 = B0.get_access <mode::read_write, target::host_buffer>(CGH);
117+ auto Acc1 = B1.get_access <mode::read_write, target::host_buffer>(CGH);
118+ auto Acc2 = B2.get_access <mode::read_write, target::host_buffer>(CGH);
119+ auto Acc3 = B3.get_access <mode::read_write, target::host_buffer>(CGH);
120+ auto Acc4 = B4.get_access <mode::read_write, target::host_buffer>(CGH);
121+ auto Acc5 = B5.get_access <mode::read_write, target::host_buffer>(CGH);
122+ auto Acc6 = B6.get_access <mode::read_write, target::host_buffer>(CGH);
123+ auto Acc7 = B7.get_access <mode::read_write, target::host_buffer>(CGH);
124+ auto Acc8 = B8.get_access <mode::read_write, target::host_buffer>(CGH);
125+ auto Acc9 = B9.get_access <mode::read_write, target::host_buffer>(CGH);
126+
127+ CGH.codeplay_host_task ([=] {
128+ uint64_t X = 0 ;
129+
130+ X ^= reinterpret_cast <uint64_t >(&Acc0[Idx + 0 ]);
131+ X ^= reinterpret_cast <uint64_t >(&Acc1[Idx + 1 ]);
132+ X ^= reinterpret_cast <uint64_t >(&Acc2[Idx + 2 ]);
133+ X ^= reinterpret_cast <uint64_t >(&Acc3[Idx + 3 ]);
134+ X ^= reinterpret_cast <uint64_t >(&Acc4[Idx + 4 ]);
135+ X ^= reinterpret_cast <uint64_t >(&Acc5[Idx + 5 ]);
136+ X ^= reinterpret_cast <uint64_t >(&Acc6[Idx + 6 ]);
137+ X ^= reinterpret_cast <uint64_t >(&Acc7[Idx + 7 ]);
138+ X ^= reinterpret_cast <uint64_t >(&Acc8[Idx + 8 ]);
139+ X ^= reinterpret_cast <uint64_t >(&Acc9[Idx + 9 ]);
140+ });
141+ });
142+
143+ Deps = {E};
144+ }
145+
146+ Q.wait_and_throw ();
147+ auto End = std::chrono::steady_clock::now ();
148+
149+ constexpr auto Threshold = 2s;
150+
151+ assert (End - Start < Threshold && " Host tasks were waiting for too long" );
152+ }
153+
154+ // Host-task depending on another host-task via handler::depends_on() only
155+ // should not hang
156+ void test4 () {
157+ queue Q (EH);
158+
159+ static constexpr size_t BufferSize = 10 * 1024 ;
160+
161+ buffer<int , 1 > B0{range<1 >{BufferSize}};
162+ buffer<int , 1 > B1{range<1 >{BufferSize}};
163+ buffer<int , 1 > B2{range<1 >{BufferSize}};
164+ buffer<int , 1 > B3{range<1 >{BufferSize}};
165+ buffer<int , 1 > B4{range<1 >{BufferSize}};
166+ buffer<int , 1 > B5{range<1 >{BufferSize}};
167+
168+ // This host task should be submitted without hesitation
169+ event E1 = Q.submit ([&](handler &CGH) {
170+ std::cout << " Submit 1" << std::endl;
171+
172+ auto Acc0 = B0.get_access <mode::read_write, target::host_buffer>(CGH);
173+ auto Acc1 = B1.get_access <mode::read_write, target::host_buffer>(CGH);
174+ auto Acc2 = B2.get_access <mode::read_write, target::host_buffer>(CGH);
175+
176+ CGH.codeplay_host_task ([=] {
177+ Acc0[0 ] = 1 ;
178+ Acc1[0 ] = 2 ;
179+ Acc2[0 ] = 3 ;
180+ });
181+ });
182+
183+ // This host task is going to depend on blocked empty node of the first
184+ // host-task (via buffer #2). Still this one should be enqueued.
185+ event E2 = Q.submit ([&](handler &CGH) {
186+ std::cout << " Submit 2" << std::endl;
187+
188+ auto Acc2 = B2.get_access <mode::read_write, target::host_buffer>(CGH);
189+ auto Acc3 = B3.get_access <mode::read_write, target::host_buffer>(CGH);
190+
191+ CGH.codeplay_host_task ([=] {
192+ Acc2[1 ] = 1 ;
193+ Acc3[1 ] = 2 ;
194+ });
195+ });
196+
197+ // This host-task only depends on the second host-task via
198+ // handler::depends_on(). This one should not hang and should be enqueued
199+ // after host-task #2.
200+ event E3 = Q.submit ([&](handler &CGH) {
201+ CGH.depends_on (E2 );
202+
203+ std::cout << " Submit 3" << std::endl;
204+
205+ auto Acc4 = B4.get_access <mode::read_write, target::host_buffer>(CGH);
206+ auto Acc5 = B5.get_access <mode::read_write, target::host_buffer>(CGH);
207+
208+ CGH.codeplay_host_task ([=] {
209+ Acc4[2 ] = 1 ;
210+ Acc5[2 ] = 2 ;
211+ });
212+ });
213+
214+ Q.wait_and_throw ();
58215}
59216
60- int main () {
61- test1 ();
62- test2 ();
217+ int main (int Argc, const char *Argv[]) {
218+ if (Argc < 2 )
219+ return 1 ;
220+
221+ int TestIdx = std::stoi (Argv[1 ]);
222+
223+ switch (TestIdx) {
224+ case 1 :
225+ test1 ();
226+ break ;
227+ case 2 :
228+ test2 ();
229+ break ;
230+ case 3 :
231+ test3 ();
232+ break ;
233+ case 4 :
234+ test4 ();
235+ break ;
236+ default :
237+ return 1 ;
238+ }
239+
63240 return 0 ;
64241}
0 commit comments