diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 8343d37a8c868..1650badc53ee5 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -494,6 +494,12 @@ void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) { return; } + // Do not add redundant event dependencies for in-order queues. + const QueueImplPtr &WorkerQueue = getWorkerQueue(); + if (Dep.MDepCommand && Dep.MDepCommand->getWorkerQueue() == WorkerQueue && + WorkerQueue->has_property()) + return; + ContextImplPtr DepEventContext = DepEvent->getContextImpl(); // If contexts don't match we'll connect them using host task if (DepEventContext != Context && !Context->is_host()) { @@ -507,6 +513,8 @@ ContextImplPtr Command::getContext() const { return detail::getSyclObjImpl(MQueue->get_context()); } +QueueImplPtr Command::getWorkerQueue() const { return MQueue; } + void Command::addDep(DepDesc NewDep) { if (NewDep.MDepCommand) { processDepEvent(NewDep.MDepCommand->getEvent(), NewDep); @@ -1128,12 +1136,15 @@ void MemCpyCommand::emitInstrumentationData() { } ContextImplPtr MemCpyCommand::getContext() const { - const QueueImplPtr &Queue = MQueue->is_host() ? MSrcQueue : MQueue; + const QueueImplPtr &Queue = getWorkerQueue(); return detail::getSyclObjImpl(Queue->get_context()); } +QueueImplPtr MemCpyCommand::getWorkerQueue() const { + return MQueue->is_host() ? MSrcQueue : MQueue; +} + cl_int MemCpyCommand::enqueueImp() { - QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue; waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; @@ -1266,12 +1277,16 @@ void MemCpyCommandHost::emitInstrumentationData() { } ContextImplPtr MemCpyCommandHost::getContext() const { - const QueueImplPtr &Queue = MQueue->is_host() ? MSrcQueue : MQueue; + const QueueImplPtr &Queue = getWorkerQueue(); return detail::getSyclObjImpl(Queue->get_context()); } +QueueImplPtr MemCpyCommandHost::getWorkerQueue() const { + return MQueue->is_host() ? MSrcQueue : MQueue; +} + cl_int MemCpyCommandHost::enqueueImp() { - QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue; + const QueueImplPtr &Queue = getWorkerQueue(); waitForPreparedHostEvents(); std::vector EventImpls = MPreparedDepsEvents; std::vector RawEvents = getPiEvents(EventImpls); diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index ef5eb059d05d9..f5c71c52fd29f 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -181,6 +181,10 @@ class Command { virtual ContextImplPtr getContext() const; + /// Get the queue this command will be submitted to. Could differ from MQueue + /// for memory copy commands. + virtual QueueImplPtr getWorkerQueue() const; + protected: EventImplPtr MEvent; QueueImplPtr MQueue; @@ -443,6 +447,7 @@ class MemCpyCommand : public Command { const Requirement *getRequirement() const final override { return &MDstReq; } void emitInstrumentationData() final override; ContextImplPtr getContext() const final override; + QueueImplPtr getWorkerQueue() const final override; private: cl_int enqueueImp() final override; @@ -466,6 +471,7 @@ class MemCpyCommandHost : public Command { const Requirement *getRequirement() const final override { return &MDstReq; } void emitInstrumentationData() final override; ContextImplPtr getContext() const final override; + QueueImplPtr getWorkerQueue() const final override; private: cl_int enqueueImp() final override; diff --git a/sycl/unittests/scheduler/CMakeLists.txt b/sycl/unittests/scheduler/CMakeLists.txt index 236192d8901dc..c69281a541a21 100644 --- a/sycl/unittests/scheduler/CMakeLists.txt +++ b/sycl/unittests/scheduler/CMakeLists.txt @@ -10,5 +10,6 @@ add_sycl_unittest(SchedulerTests OBJECT LeavesCollection.cpp NoUnifiedHostMemory.cpp StreamInitDependencyOnHost.cpp + InOrderQueueDeps.cpp utils.cpp ) diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp new file mode 100644 index 0000000000000..cec7c1772852c --- /dev/null +++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp @@ -0,0 +1,129 @@ +//==------------ InOrderQueueueueDeps.cpp --- Scheduler unit tests ---------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SchedulerTest.hpp" +#include "SchedulerTestUtils.hpp" + +#include + +#include +#include + +using namespace cl::sycl; + +static pi_result +redefinedMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, + void *host_ptr, pi_mem *ret_mem, + const pi_mem_properties *properties = nullptr) { + return PI_SUCCESS; +} + +static pi_result redefinedMemRelease(pi_mem mem) { return PI_SUCCESS; } + +static pi_result redefinedEnqueueMemBufferReadRect( + pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, + pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, + pi_buff_rect_region region, size_t buffer_row_pitch, + size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, + void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event) { + EXPECT_EQ(num_events_in_wait_list, 0u); + *event = reinterpret_cast(1); + return PI_SUCCESS; +} + +static pi_result redefinedEnqueueMemBufferWriteRect( + pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, + pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, + pi_buff_rect_region region, size_t buffer_row_pitch, + size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, + const void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event) { + EXPECT_EQ(num_events_in_wait_list, 0u); + *event = reinterpret_cast(1); + return PI_SUCCESS; +} + +static pi_result redefinedEnqueueMemBufferMap( + pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, + pi_map_flags map_flags, size_t offset, size_t size, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event, void **ret_map) { + EXPECT_EQ(num_events_in_wait_list, 0u); + *event = reinterpret_cast(1); + return PI_SUCCESS; +} + +static pi_result redefinedEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, + void *mapped_ptr, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event) { + EXPECT_EQ(num_events_in_wait_list, 0u); + *event = reinterpret_cast(1); + return PI_SUCCESS; +} + +static pi_result redefinedEventsWait(pi_uint32 num_events, + const pi_event *event_list) { + return PI_SUCCESS; +} + +pi_result redefinedEventRelease(pi_event event) { return PI_SUCCESS; } + +TEST_F(SchedulerTest, InOrderQueueDeps) { + default_selector Selector; + platform Plt{default_selector()}; + if (Plt.is_host()) { + std::cout << "Not run due to host-only environment\n"; + return; + } + + unittest::PiMock Mock{Plt}; + Mock.redefine(redefinedMemBufferCreate); + Mock.redefine(redefinedMemRelease); + Mock.redefine( + redefinedEnqueueMemBufferReadRect); + Mock.redefine( + redefinedEnqueueMemBufferWriteRect); + Mock.redefine( + redefinedEnqueueMemBufferMap); + Mock.redefine(redefinedEnqueueMemUnmap); + Mock.redefine(redefinedEventsWait); + Mock.redefine(redefinedEventRelease); + + context Ctx{Plt}; + queue InOrderQueue{Ctx, Selector, property::queue::in_order()}; + cl::sycl::detail::QueueImplPtr InOrderQueueImpl = + detail::getSyclObjImpl(InOrderQueue); + + device HostDevice; + std::shared_ptr DefaultHostQueue{ + new detail::queue_impl(detail::getSyclObjImpl(HostDevice), {}, {})}; + + MockScheduler MS; + + int val; + buffer Buf(&val, range<1>(1)); + detail::Requirement Req = getMockRequirement(Buf); + + detail::MemObjRecord *Record = + MS.getOrInsertMemObjRecord(InOrderQueueImpl, &Req); + MS.getOrCreateAllocaForReq(Record, &Req, InOrderQueueImpl); + MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue); + + // Check that sequential memory movements submitted to the same in-order + // queue do not depend on each other. + detail::Command *Cmd = MS.insertMemoryMove(Record, &Req, DefaultHostQueue); + detail::EnqueueResultT Res; + MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING); + Cmd = MS.insertMemoryMove(Record, &Req, InOrderQueueImpl); + MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING); + Cmd = MS.insertMemoryMove(Record, &Req, DefaultHostQueue); + MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING); +}