Skip to content

Conversation

@RossBrunton
Copy link
Contributor

Rather than having every "enqueue"-type function have an output pointer
specifically for an output event, just provide an olCreateEvent
entrypoint which pushes an event to the queue.

For example, replace:

olMemcpy(Queue, ..., EventOut);

with

olMemcpy(Queue, ...);
olCreateEvent(Queue, EventOut);

Rather than having every "enqueue"-type function have an output pointer
specifically for an output event, just provide an `olCreateEvent`
entrypoint which pushes an event to the queue.

For example, replace:
```cpp
olMemcpy(Queue, ..., EventOut);
```
with
```cpp
olMemcpy(Queue, ...);
olCreateEvent(Queue, EventOut);
```
@llvmbot
Copy link
Member

llvmbot commented Jul 23, 2025

@llvm/pr-subscribers-offload

Author: Ross Brunton (RossBrunton)

Changes

Rather than having every "enqueue"-type function have an output pointer
specifically for an output event, just provide an olCreateEvent
entrypoint which pushes an event to the queue.

For example, replace:

olMemcpy(Queue, ..., EventOut);

with

olMemcpy(Queue, ...);
olCreateEvent(Queue, EventOut);

Patch is 20.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150217.diff

11 Files Affected:

  • (modified) offload/liboffload/API/Event.td (+11)
  • (modified) offload/liboffload/API/Kernel.td (-2)
  • (modified) offload/liboffload/API/Memory.td (+1-4)
  • (modified) offload/liboffload/src/OffloadImpl.cpp (+10-22)
  • (modified) offload/unittests/OffloadAPI/CMakeLists.txt (+1)
  • (modified) offload/unittests/OffloadAPI/common/Fixtures.hpp (+2-10)
  • (added) offload/unittests/OffloadAPI/event/olCreateEvent.cpp (+29)
  • (modified) offload/unittests/OffloadAPI/event/olDestroyEvent.cpp (+1-7)
  • (modified) offload/unittests/OffloadAPI/event/olSyncEvent.cpp (+3-15)
  • (modified) offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp (+18-19)
  • (modified) offload/unittests/OffloadAPI/memory/olMemcpy.cpp (+22-31)
diff --git a/offload/liboffload/API/Event.td b/offload/liboffload/API/Event.td
index ea38b82ee145c..3c8ff7eae3a54 100644
--- a/offload/liboffload/API/Event.td
+++ b/offload/liboffload/API/Event.td
@@ -10,6 +10,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+def : Function {
+    let name = "olCreateEvent";
+    let desc = "Create an event that will resolve when all current work in the queue is complete.";
+    let details = [];
+    let params = [
+        Param<"ol_queue_handle_t", "Queue", "queue to create the event for", PARAM_IN>,
+        Param<"ol_event_handle_t*", "Event", "output pointer for the created event", PARAM_OUT>
+    ];
+    let returns = [];
+}
+
 def : Function {
     let name = "olDestroyEvent";
     let desc = "Destroy the event and free all underlying resources.";
diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td
index 1e9537452820d..502fb36467dba 100644
--- a/offload/liboffload/API/Kernel.td
+++ b/offload/liboffload/API/Kernel.td
@@ -35,10 +35,8 @@ def : Function {
         Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN_OPTIONAL>,
         Param<"size_t", "ArgumentsSize", "size of the kernel argument struct", PARAM_IN>,
         Param<"const ol_kernel_launch_size_args_t*", "LaunchSizeArgs", "pointer to the struct containing launch size parameters", PARAM_IN>,
-        Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
     ];
     let returns = [
-        Return<"OL_ERRC_INVALID_ARGUMENT", ["`Queue == NULL && EventOut != NULL`"]>,
         Return<"OL_ERRC_INVALID_ARGUMENT", ["`ArgumentsSize > 0 && ArgumentsData == NULL`"]>,
         Return<"OL_ERRC_INVALID_DEVICE", ["If Queue is non-null but does not belong to Device"]>,
         Return<"OL_ERRC_SYMBOL_KIND", ["The provided symbol is not a kernel"]>,
diff --git a/offload/liboffload/API/Memory.td b/offload/liboffload/API/Memory.td
index 029975c448295..5f7158588bc77 100644
--- a/offload/liboffload/API/Memory.td
+++ b/offload/liboffload/API/Memory.td
@@ -60,9 +60,6 @@ def : Function {
         Param<"const void*", "SrcPtr", "pointer to copy from", PARAM_IN>,
         Param<"ol_device_handle_t", "SrcDevice", "device that SrcPtr belongs to", PARAM_IN>,
         Param<"size_t", "Size", "size in bytes of data to copy", PARAM_IN>,
-        Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
-    ];
-    let returns = [
-        Return<"OL_ERRC_INVALID_ARGUMENT", ["`Queue == NULL && EventOut != NULL`"]>
     ];
+    let returns = [];
 }
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index d93e4f1db58a7..6c21c4dbefbc3 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -569,26 +569,21 @@ Error olGetEventInfoSize_impl(ol_event_handle_t Event, ol_event_info_t PropName,
   return olGetEventInfoImplDetail(Event, PropName, 0, nullptr, PropSizeRet);
 }
 
-ol_event_handle_t makeEvent(ol_queue_handle_t Queue) {
-  auto EventImpl = std::make_unique<ol_event_impl_t>(nullptr, Queue);
-  if (auto Res = Queue->Device->Device->createEvent(&EventImpl->EventInfo)) {
-    llvm::consumeError(std::move(Res));
-    return nullptr;
-  }
+Error olCreateEvent_impl(ol_queue_handle_t Queue, ol_event_handle_t *EventOut) {
+  *EventOut = new ol_event_impl_t(nullptr, Queue);
+  if (auto Res = Queue->Device->Device->createEvent(&(*EventOut)->EventInfo))
+    return Res;
 
-  if (auto Res = Queue->Device->Device->recordEvent(EventImpl->EventInfo,
-                                                    Queue->AsyncInfo)) {
-    llvm::consumeError(std::move(Res));
-    return nullptr;
-  }
+  if (auto Res = Queue->Device->Device->recordEvent((*EventOut)->EventInfo,
+                                                    Queue->AsyncInfo))
+    return Res;
 
-  return EventImpl.release();
+  return Plugin::success();
 }
 
 Error olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
                     ol_device_handle_t DstDevice, const void *SrcPtr,
-                    ol_device_handle_t SrcDevice, size_t Size,
-                    ol_event_handle_t *EventOut) {
+                    ol_device_handle_t SrcDevice, size_t Size) {
   auto Host = OffloadContext::get().HostDevice();
   if (DstDevice == Host && SrcDevice == Host) {
     if (!Queue) {
@@ -619,9 +614,6 @@ Error olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
       return Res;
   }
 
-  if (EventOut)
-    *EventOut = makeEvent(Queue);
-
   return Error::success();
 }
 
@@ -668,8 +660,7 @@ Error olDestroyProgram_impl(ol_program_handle_t Program) {
 Error olLaunchKernel_impl(ol_queue_handle_t Queue, ol_device_handle_t Device,
                           ol_symbol_handle_t Kernel, const void *ArgumentsData,
                           size_t ArgumentsSize,
-                          const ol_kernel_launch_size_args_t *LaunchSizeArgs,
-                          ol_event_handle_t *EventOut) {
+                          const ol_kernel_launch_size_args_t *LaunchSizeArgs) {
   auto *DeviceImpl = Device->Device;
   if (Queue && Device != Queue->Device) {
     return createOffloadError(
@@ -707,9 +698,6 @@ Error olLaunchKernel_impl(ol_queue_handle_t Queue, ol_device_handle_t Device,
   if (Err)
     return Err;
 
-  if (EventOut)
-    *EventOut = makeEvent(Queue);
-
   return Error::success();
 }
 
diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt
index f09cfc6bb0876..87412fc570b32 100644
--- a/offload/unittests/OffloadAPI/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/CMakeLists.txt
@@ -9,6 +9,7 @@ add_offload_unittest("device"
     device/olGetDeviceInfoSize.cpp)
 
 add_offload_unittest("event"
+    event/olCreateEvent.cpp
     event/olDestroyEvent.cpp
     event/olSyncEvent.cpp
     event/olGetEventInfo.cpp
diff --git a/offload/unittests/OffloadAPI/common/Fixtures.hpp b/offload/unittests/OffloadAPI/common/Fixtures.hpp
index 717288eede843..24ce1a0cf7d7e 100644
--- a/offload/unittests/OffloadAPI/common/Fixtures.hpp
+++ b/offload/unittests/OffloadAPI/common/Fixtures.hpp
@@ -171,16 +171,8 @@ struct OffloadQueueTest : OffloadDeviceTest {
 struct OffloadEventTest : OffloadQueueTest {
   void SetUp() override {
     RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp());
-    // Get an event from a memcpy. We can still use it in olGetEventInfo etc
-    // after it has been waited on.
-    void *Alloc;
-    uint32_t Value = 42;
-    ASSERT_SUCCESS(
-        olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, sizeof(Value), &Alloc));
-    ASSERT_SUCCESS(
-        olMemcpy(Queue, Alloc, Device, &Value, Host, sizeof(Value), &Event));
-    ASSERT_SUCCESS(olSyncEvent(Event));
-    ASSERT_SUCCESS(olMemFree(Alloc));
+    ASSERT_SUCCESS(olCreateEvent(Queue, &Event));
+    ASSERT_SUCCESS(olSyncQueue(Queue));
   }
 
   void TearDown() override {
diff --git a/offload/unittests/OffloadAPI/event/olCreateEvent.cpp b/offload/unittests/OffloadAPI/event/olCreateEvent.cpp
new file mode 100644
index 0000000000000..957e9d52215ab
--- /dev/null
+++ b/offload/unittests/OffloadAPI/event/olCreateEvent.cpp
@@ -0,0 +1,29 @@
+//===------- Offload API tests - olCreateEvent ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../common/Fixtures.hpp"
+#include <OffloadAPI.h>
+#include <gtest/gtest.h>
+
+using olCreateEventTest = OffloadQueueTest;
+OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olCreateEventTest);
+
+TEST_P(olCreateEventTest, Success) {
+  ol_event_handle_t Event = nullptr;
+  ASSERT_SUCCESS(olCreateEvent(Queue, &Event));
+  ASSERT_NE(Event, nullptr);
+}
+
+TEST_P(olCreateEventTest, InvalidNullQueue) {
+  ol_event_handle_t Event;
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olCreateEvent(nullptr, &Event));
+}
+
+TEST_P(olCreateEventTest, InvalidNullDest) {
+  ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, olCreateEvent(Queue, nullptr));
+}
diff --git a/offload/unittests/OffloadAPI/event/olDestroyEvent.cpp b/offload/unittests/OffloadAPI/event/olDestroyEvent.cpp
index 1c12cea27b160..8cee535c2258f 100644
--- a/offload/unittests/OffloadAPI/event/olDestroyEvent.cpp
+++ b/offload/unittests/OffloadAPI/event/olDestroyEvent.cpp
@@ -14,14 +14,8 @@ using olDestroyEventTest = OffloadQueueTest;
 OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olDestroyEventTest);
 
 TEST_P(olDestroyEventTest, Success) {
-  uint32_t Src = 42;
-  void *DstPtr;
-
   ol_event_handle_t Event = nullptr;
-  ASSERT_SUCCESS(
-      olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, sizeof(uint32_t), &DstPtr));
-  ASSERT_SUCCESS(
-      olMemcpy(Queue, DstPtr, Device, &Src, Host, sizeof(Src), &Event));
+  ASSERT_SUCCESS(olCreateEvent(Queue, &Event));
   ASSERT_NE(Event, nullptr);
   ASSERT_SUCCESS(olSyncQueue(Queue));
   ASSERT_SUCCESS(olDestroyEvent(Event));
diff --git a/offload/unittests/OffloadAPI/event/olSyncEvent.cpp b/offload/unittests/OffloadAPI/event/olSyncEvent.cpp
index e04a273a11869..22ecb96dd9ee5 100644
--- a/offload/unittests/OffloadAPI/event/olSyncEvent.cpp
+++ b/offload/unittests/OffloadAPI/event/olSyncEvent.cpp
@@ -14,14 +14,8 @@ using olSyncEventTest = OffloadQueueTest;
 OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olSyncEventTest);
 
 TEST_P(olSyncEventTest, Success) {
-  uint32_t Src = 42;
-  void *DstPtr;
-
   ol_event_handle_t Event = nullptr;
-  ASSERT_SUCCESS(
-      olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, sizeof(uint32_t), &DstPtr));
-  ASSERT_SUCCESS(
-      olMemcpy(Queue, DstPtr, Device, &Src, Host, sizeof(Src), &Event));
+  ASSERT_SUCCESS(olCreateEvent(Queue, &Event));
   ASSERT_NE(Event, nullptr);
   ASSERT_SUCCESS(olSyncEvent(Event));
   ASSERT_SUCCESS(olDestroyEvent(Event));
@@ -31,15 +25,9 @@ TEST_P(olSyncEventTest, InvalidNullEvent) {
   ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, olSyncEvent(nullptr));
 }
 
-TEST_P(olSyncEventTest, SuccessMultipleWait) {
-  uint32_t Src = 42;
-  void *DstPtr;
-
+TEST_P(olSyncEventTest, SuccessMultipleSync) {
   ol_event_handle_t Event = nullptr;
-  ASSERT_SUCCESS(
-      olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, sizeof(uint32_t), &DstPtr));
-  ASSERT_SUCCESS(
-      olMemcpy(Queue, DstPtr, Device, &Src, Host, sizeof(Src), &Event));
+  ASSERT_SUCCESS(olCreateEvent(Queue, &Event));
   ASSERT_NE(Event, nullptr);
 
   for (size_t I = 0; I < 10; I++)
diff --git a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
index 165c0a0929384..758f6071b030f 100644
--- a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
+++ b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
@@ -91,8 +91,8 @@ TEST_P(olLaunchKernelFooTest, Success) {
     void *Mem;
   } Args{Mem};
 
-  ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args),
-                                &LaunchArgs, nullptr));
+  ASSERT_SUCCESS(
+      olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs));
 
   ASSERT_SUCCESS(olSyncQueue(Queue));
 
@@ -106,7 +106,7 @@ TEST_P(olLaunchKernelFooTest, Success) {
 
 TEST_P(olLaunchKernelNoArgsTest, Success) {
   ASSERT_SUCCESS(
-      olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs, nullptr));
+      olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs));
 
   ASSERT_SUCCESS(olSyncQueue(Queue));
 }
@@ -121,7 +121,7 @@ TEST_P(olLaunchKernelFooTest, SuccessSynchronous) {
   } Args{Mem};
 
   ASSERT_SUCCESS(olLaunchKernel(nullptr, Device, Kernel, &Args, sizeof(Args),
-                                &LaunchArgs, nullptr));
+                                &LaunchArgs));
 
   uint32_t *Data = (uint32_t *)Mem;
   for (uint32_t i = 0; i < 64; i++) {
@@ -144,8 +144,8 @@ TEST_P(olLaunchKernelLocalMemTest, Success) {
     void *Mem;
   } Args{Mem};
 
-  ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args),
-                                &LaunchArgs, nullptr));
+  ASSERT_SUCCESS(
+      olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs));
 
   ASSERT_SUCCESS(olSyncQueue(Queue));
 
@@ -167,8 +167,8 @@ TEST_P(olLaunchKernelLocalMemReductionTest, Success) {
     void *Mem;
   } Args{Mem};
 
-  ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args),
-                                &LaunchArgs, nullptr));
+  ASSERT_SUCCESS(
+      olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs));
 
   ASSERT_SUCCESS(olSyncQueue(Queue));
 
@@ -190,8 +190,8 @@ TEST_P(olLaunchKernelLocalMemStaticTest, Success) {
     void *Mem;
   } Args{Mem};
 
-  ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args),
-                                &LaunchArgs, nullptr));
+  ASSERT_SUCCESS(
+      olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs));
 
   ASSERT_SUCCESS(olSyncQueue(Queue));
 
@@ -210,11 +210,11 @@ TEST_P(olLaunchKernelGlobalTest, Success) {
     void *Mem;
   } Args{Mem};
 
-  ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernels[0], nullptr, 0,
-                                &LaunchArgs, nullptr));
+  ASSERT_SUCCESS(
+      olLaunchKernel(Queue, Device, Kernels[0], nullptr, 0, &LaunchArgs));
   ASSERT_SUCCESS(olSyncQueue(Queue));
   ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernels[1], &Args, sizeof(Args),
-                                &LaunchArgs, nullptr));
+                                &LaunchArgs));
   ASSERT_SUCCESS(olSyncQueue(Queue));
 
   uint32_t *Data = (uint32_t *)Mem;
@@ -229,9 +229,8 @@ TEST_P(olLaunchKernelGlobalTest, InvalidNotAKernel) {
   ol_symbol_handle_t Global = nullptr;
   ASSERT_SUCCESS(
       olGetSymbol(Program, "global", OL_SYMBOL_KIND_GLOBAL_VARIABLE, &Global));
-  ASSERT_ERROR(
-      OL_ERRC_SYMBOL_KIND,
-      olLaunchKernel(Queue, Device, Global, nullptr, 0, &LaunchArgs, nullptr));
+  ASSERT_ERROR(OL_ERRC_SYMBOL_KIND,
+               olLaunchKernel(Queue, Device, Global, nullptr, 0, &LaunchArgs));
 }
 
 TEST_P(olLaunchKernelGlobalCtorTest, Success) {
@@ -242,8 +241,8 @@ TEST_P(olLaunchKernelGlobalCtorTest, Success) {
     void *Mem;
   } Args{Mem};
 
-  ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args),
-                                &LaunchArgs, nullptr));
+  ASSERT_SUCCESS(
+      olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs));
   ASSERT_SUCCESS(olSyncQueue(Queue));
 
   uint32_t *Data = (uint32_t *)Mem;
@@ -259,6 +258,6 @@ TEST_P(olLaunchKernelGlobalDtorTest, Success) {
   // find/implement a way, update this test. For now we just check that nothing
   // crashes
   ASSERT_SUCCESS(
-      olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs, nullptr));
+      olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs));
   ASSERT_SUCCESS(olSyncQueue(Queue));
 }
diff --git a/offload/unittests/OffloadAPI/memory/olMemcpy.cpp b/offload/unittests/OffloadAPI/memory/olMemcpy.cpp
index 4fefefdab913f..cc67d782ef403 100644
--- a/offload/unittests/OffloadAPI/memory/olMemcpy.cpp
+++ b/offload/unittests/OffloadAPI/memory/olMemcpy.cpp
@@ -44,8 +44,7 @@ TEST_P(olMemcpyTest, SuccessHtoD) {
   void *Alloc;
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &Alloc));
   std::vector<uint8_t> Input(Size, 42);
-  ASSERT_SUCCESS(
-      olMemcpy(Queue, Alloc, Device, Input.data(), Host, Size, nullptr));
+  ASSERT_SUCCESS(olMemcpy(Queue, Alloc, Device, Input.data(), Host, Size));
   olSyncQueue(Queue);
   olMemFree(Alloc);
 }
@@ -57,10 +56,8 @@ TEST_P(olMemcpyTest, SuccessDtoH) {
   std::vector<uint8_t> Output(Size, 0);
 
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &Alloc));
-  ASSERT_SUCCESS(
-      olMemcpy(Queue, Alloc, Device, Input.data(), Host, Size, nullptr));
-  ASSERT_SUCCESS(
-      olMemcpy(Queue, Output.data(), Host, Alloc, Device, Size, nullptr));
+  ASSERT_SUCCESS(olMemcpy(Queue, Alloc, Device, Input.data(), Host, Size));
+  ASSERT_SUCCESS(olMemcpy(Queue, Output.data(), Host, Alloc, Device, Size));
   ASSERT_SUCCESS(olSyncQueue(Queue));
   for (uint8_t Val : Output) {
     ASSERT_EQ(Val, 42);
@@ -77,12 +74,9 @@ TEST_P(olMemcpyTest, SuccessDtoD) {
 
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &AllocA));
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &AllocB));
-  ASSERT_SUCCESS(
-      olMemcpy(Queue, AllocA, Device, Input.data(), Host, Size, nullptr));
-  ASSERT_SUCCESS(
-      olMemcpy(Queue, AllocB, Device, AllocA, Device, Size, nullptr));
-  ASSERT_SUCCESS(
-      olMemcpy(Queue, Output.data(), Host, AllocB, Device, Size, nullptr));
+  ASSERT_SUCCESS(olMemcpy(Queue, AllocA, Device, Input.data(), Host, Size));
+  ASSERT_SUCCESS(olMemcpy(Queue, AllocB, Device, AllocA, Device, Size));
+  ASSERT_SUCCESS(olMemcpy(Queue, Output.data(), Host, AllocB, Device, Size));
   ASSERT_SUCCESS(olSyncQueue(Queue));
   for (uint8_t Val : Output) {
     ASSERT_EQ(Val, 42);
@@ -96,8 +90,8 @@ TEST_P(olMemcpyTest, SuccessHtoHSync) {
   std::vector<uint8_t> Input(Size, 42);
   std::vector<uint8_t> Output(Size, 0);
 
-  ASSERT_SUCCESS(olMemcpy(nullptr, Output.data(), Host, Input.data(), Host,
-                          Size, nullptr));
+  ASSERT_SUCCESS(
+      olMemcpy(nullptr, Output.data(), Host, Input.data(), Host, Size));
 
   for (uint8_t Val : Output) {
     ASSERT_EQ(Val, 42);
@@ -111,10 +105,8 @@ TEST_P(olMemcpyTest, SuccessDtoHSync) {
   std::vector<uint8_t> Output(Size, 0);
 
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_DEVICE, Size, &Alloc));
-  ASSERT_SUCCESS(
-      olMemcpy(nullptr, Alloc, Device, Input.data(), Host, Size, nullptr));
-  ASSERT_SUCCESS(
-      olMemcpy(nullptr, Output.data(), Host, Alloc, Device, Size, nullptr));
+  ASSERT_SUCCESS(olMemcpy(nullptr, Alloc, Device, Input.data(), Host, Size));
+  ASSERT_SUCCESS(olMemcpy(nullptr, Output.data(), Host, Alloc, Device, Size));
   for (uint8_t Val : Output) {
     ASSERT_EQ(Val, 42);
   }
@@ -128,8 +120,7 @@ TEST_P(olMemcpyTest, SuccessSizeZero) {
 
   // As with std::memcpy, size 0 is allowed. Keep all other arguments valid even
   // if they aren't used.
-  ASSERT_SUCCESS(
-      olMemcpy(nullptr, Output.data(), Host, Input.data(), Host, 0, nullptr));
+  ASSERT_SUCCESS(olMemcpy(nullptr, Output.data(), Host, Input.data(), Host, 0));
 }
 
 TEST_P(olMemcpyGlobalTest, SuccessRoundTrip) {
@@ -144,11 +135,11 @@ TEST_P(olMemcpyGlobalTest, SuccessRoundTrip) {
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
                             64 * sizeof(uint32_t), &DestMem));
 
-  ASSERT_SUCCESS(olMemcpy(Queue, Addr, Device, SourceMem, Host,
-                          64 * sizeof(uint32_t), nullptr));
+  ASSERT_SUCCESS(
+      olMemcpy(Queue, Addr, Device, SourceMem, Host, 64 * sizeof(uint32_t)));
   ASSERT_SUCCESS(olSyncQueue(Queue));
-  ASSERT_SUCCESS(olMemcpy(Queue, DestMem, Host, Addr, Device,
-                          64 * sizeof(uint32_t), nullptr));
+  ASSERT_SUCCESS(
+      olMemcpy(Queue, DestMem, Host, Addr, Device, 64 * sizeof(uint32_t)));
   ASSERT_SUCCESS(olSyncQueue(Queue));
 
   uint32_t *DestData = (uint32_t *)DestMem;
@@ -176,11 +167,11 @@ TEST_P(olMemcpyGlobalTest, SuccessWrite) {
     void *Mem;
   } Args{DestMem};
 
-  ASSERT_SUCCESS(olMemcpy(Queue, Addr, Device, SourceMem, Host,
-                          64 * sizeof(uint32_t), nullptr));
+  ASSERT_SUCCESS(
+      olMemcpy(Queue, Addr, Device, SourceMem, Host, 64 * sizeof(uint32_t)));
   ASSERT_SUCCESS(olSyncQueue(Queue));
   ASSERT_SUCCESS(olLaunchKernel(Queue, Device, ReadKernel, &Args, sizeof(Args),
-                                &LaunchArgs, nullptr));
+                                &LaunchArgs));
   ASSERT_SUCCESS(olSyncQueue(Queue));
 
   uint32_t *DestData = (uint32_t *)DestMem;
@@ -197,11 +188,11 @@ TEST_P(olMemcpyGlobalTest, SuccessRead) {
                             LaunchArgs.GroupSize.x * sizeof(uint32_t),
                             &DestMem));
 
-  ASSERT_SUCCESS(olLaunchKernel(Queue, Device, WriteKernel, nullptr, 0,
-                                &LaunchArgs, nullptr));
+  ASSERT_SUCCESS(
+      olLaunchKernel(Queue, Device, WriteKernel, nullptr, 0, &LaunchArgs));
   ASSERT_SUCCESS(olSyncQueue(Queue));
-  ASSERT_SUCCESS(olMemcpy(Queue, DestMem, Host, Ad...
[truncated]

@RossBrunton
Copy link
Contributor Author

Under the hood, this is how it works anyway; all of the enqueue methods just call makeEvent() at the end. I was looking at adding an output event for olWaitEvents() (which is required for the UR adapter), but having every function that touches a queue accept an extra parameter just in case you want to capture an event feels a bit cumbersome.

@RossBrunton RossBrunton merged commit 690c3ee into llvm:main Jul 24, 2025
9 checks passed
callumfare added a commit that referenced this pull request Jul 24, 2025
mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Jul 28, 2025
…0217)

Rather than having every "enqueue"-type function have an output pointer
specifically for an output event, just provide an `olCreateEvent`
entrypoint which pushes an event to the queue.

For example, replace:
```cpp
olMemcpy(Queue, ..., EventOut);
```
with
```cpp
olMemcpy(Queue, ...);
olCreateEvent(Queue, EventOut);
```
mahesh-attarde pushed a commit to mahesh-attarde/llvm-project that referenced this pull request Jul 28, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants