Skip to content

Commit 29d179d

Browse files
committed
Update clGetDeviceInfo to follow OpenCL 1.2 spec
1 parent ab09456 commit 29d179d

File tree

2 files changed

+78
-11
lines changed

2 files changed

+78
-11
lines changed

src/acl_device.cpp

Lines changed: 77 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
209209
RESULT_INT(0);
210210
break;
211211
case CL_DEVICE_GLOBAL_MEM_SIZE: {
212+
#ifdef __arm__
213+
// TODO: legacy code here, need to verify correctness with ARM board
212214
auto gmem_id = acl_get_default_device_global_memory(device->def);
213215
if (gmem_id < 0) {
214216
RESULT_INT(0);
@@ -217,10 +219,20 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
217219
cl_ulong size =
218220
ACL_RANGE_SIZE(device->def.autodiscovery_def.global_mem_defs[gmem_id]
219221
.get_usable_range());
220-
#ifdef __arm__
221222
// on SoC board, two DDR systems are not equivalent
222223
// so only half can be accessed with a single alloc.
223224
size /= 2;
225+
#else
226+
cl_ulong size = 0;
227+
for (unsigned gmem_idx = 0;
228+
gmem_idx < device->def.autodiscovery_def.num_global_mem_systems;
229+
gmem_idx++) {
230+
if (device->def.autodiscovery_def.global_mem_defs[gmem_idx].type ==
231+
ACL_GLOBAL_MEM_DEVICE_PRIVATE) {
232+
size += ACL_RANGE_SIZE(
233+
device->def.autodiscovery_def.global_mem_defs[gmem_idx].range);
234+
}
235+
}
224236
#endif
225237
RESULT_ULONG(size);
226238
break;
@@ -251,13 +263,9 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
251263
RESULT_UINT(acl_platform.max_constant_args);
252264
break;
253265

254-
// "desktop" profile says global memory must be at least 128MB
255-
// "embedded" profile says global memory must be at least 1MB
256266
case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: {
257-
// Constant memory is global memory.
258-
// However conformance_test_api min_max_constant_buffer_size
259-
// expects to allocate two buffers of the size we say here.
260-
// So be a shade conservative and cut it down by 4.
267+
#ifdef __arm__
268+
// TODO: legacy code here, need to verify correctness with ARM board
261269
auto gmem_id = acl_get_default_device_global_memory(device->def);
262270
if (gmem_id < 0) {
263271
RESULT_INT(0);
@@ -267,13 +275,44 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
267275
ACL_RANGE_SIZE(device->def.autodiscovery_def.global_mem_defs[gmem_id]
268276
.get_usable_range()) /
269277
4;
270-
#ifdef __arm__
271-
// see above
278+
// Cut by 2 again, see comment for CL_DEVICE_GLOBAL_MEM_SIZE
272279
size /= 2;
280+
#else
281+
// Return the maximum size of a single allocation to the constant memory
282+
// (i.e., global memory)
283+
cl_ulong size = 0;
284+
for (unsigned gmem_idx = 0;
285+
gmem_idx < device->def.autodiscovery_def.num_global_mem_systems;
286+
gmem_idx++) {
287+
if (device->def.autodiscovery_def.global_mem_defs[gmem_idx].type ==
288+
ACL_GLOBAL_MEM_DEVICE_PRIVATE) {
289+
cl_ulong curr_size = 0;
290+
// TODO: investigate if ACL_MEM_ALIGN of 0x400 is still required to
291+
// perform device allocations to memory with 0 starting address
292+
acl_system_global_mem_allocation_type_t alloc_type =
293+
device->def.autodiscovery_def.global_mem_defs[gmem_idx]
294+
.allocation_type;
295+
if (!alloc_type || (alloc_type & ACL_GLOBAL_MEM_DEVICE_ALLOCATION)) {
296+
curr_size = ACL_RANGE_SIZE(
297+
device->def.autodiscovery_def.global_mem_defs[gmem_idx]
298+
.get_usable_range());
299+
} else {
300+
curr_size = ACL_RANGE_SIZE(
301+
device->def.autodiscovery_def.global_mem_defs[gmem_idx].range);
302+
}
303+
if (curr_size > size) {
304+
size = curr_size;
305+
}
306+
}
307+
}
308+
// Note: devices not of type CL_DEVICE_TYPE_CUSTOM and conformant
309+
// to OpenCL 1.2 spec will return size at least of 64KB here
273310
#endif
274311
RESULT_ULONG(size);
275312
} break;
276313
case CL_DEVICE_MAX_MEM_ALLOC_SIZE: {
314+
#ifdef __arm__
315+
// TODO: legacy code here, need to verify correctness with ARM board
277316
auto gmem_id = acl_get_default_device_global_memory(device->def);
278317
if (gmem_id < 0) {
279318
RESULT_INT(0);
@@ -282,7 +321,6 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
282321
cl_ulong size =
283322
ACL_RANGE_SIZE(device->def.autodiscovery_def.global_mem_defs[gmem_id]
284323
.get_usable_range());
285-
#ifdef __arm__
286324
// on SoC board, two DDR systems are not equivalent
287325
// so only half can be accessed with a single alloc.
288326

@@ -294,6 +332,35 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
294332
} else {
295333
size = size / 8;
296334
}
335+
#else
336+
cl_ulong size = 0;
337+
for (unsigned gmem_idx = 0;
338+
gmem_idx < device->def.autodiscovery_def.num_global_mem_systems;
339+
gmem_idx++) {
340+
if (device->def.autodiscovery_def.global_mem_defs[gmem_idx].type ==
341+
ACL_GLOBAL_MEM_DEVICE_PRIVATE) {
342+
cl_ulong curr_size = 0;
343+
// TODO: investigate if ACL_MEM_ALIGN of 0x400 is still required to
344+
// perform device allocations to memory with 0 starting address
345+
acl_system_global_mem_allocation_type_t alloc_type =
346+
device->def.autodiscovery_def.global_mem_defs[gmem_idx]
347+
.allocation_type;
348+
if (!alloc_type || (alloc_type & ACL_GLOBAL_MEM_DEVICE_ALLOCATION)) {
349+
curr_size = ACL_RANGE_SIZE(
350+
device->def.autodiscovery_def.global_mem_defs[gmem_idx]
351+
.get_usable_range());
352+
} else {
353+
curr_size = ACL_RANGE_SIZE(
354+
device->def.autodiscovery_def.global_mem_defs[gmem_idx].range);
355+
}
356+
if (curr_size > size) {
357+
size = curr_size;
358+
}
359+
}
360+
}
361+
// Note: devices not of type CL_DEVICE_TYPE_CUSTOM and
362+
// conformant to OpenCL 1.2 spec will return size at least of
363+
// max(CL_DEVICE_GLOBAL_MEM_SIZE/4, 1*1024*1024) here
297364
#endif
298365
RESULT_ULONG(size);
299366
} break;

src/acl_mem.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5663,7 +5663,7 @@ void acl_mem_migrate_buffer(void *user_data, acl_device_op_t *op) {
56635663
memory_migration.src_mem_list[index].destination_mem_id;
56645664

56655665
#ifdef MEM_DEBUG_MSG
5666-
printf("object %d, mem %zx, count %d:", index, (size_t)src_mem,
5666+
printf("object %d, mem %zx, count %d:\n", index, (size_t)src_mem,
56675667
src_mem->reserved_allocations_count[dest_device][dest_mem_id]);
56685668
#endif
56695669

0 commit comments

Comments
 (0)