Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,12 @@ void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) {
return;
}

// Do not add redundant event dependencies for in-order queues.
const QueueImplPtr &WorkerQueue = getWorkerQueue();
if (Dep.MDepCommand && Dep.MDepCommand->getWorkerQueue() == WorkerQueue &&
WorkerQueue->has_property<property::queue::in_order>())
return;

ContextImplPtr DepEventContext = DepEvent->getContextImpl();
// If contexts don't match we'll connect them using host task
if (DepEventContext != Context && !Context->is_host()) {
Expand All @@ -507,6 +513,8 @@ ContextImplPtr Command::getContext() const {
return detail::getSyclObjImpl(MQueue->get_context());
}

QueueImplPtr Command::getWorkerQueue() const { return MQueue; }

void Command::addDep(DepDesc NewDep) {
if (NewDep.MDepCommand) {
processDepEvent(NewDep.MDepCommand->getEvent(), NewDep);
Expand Down Expand Up @@ -1128,12 +1136,15 @@ void MemCpyCommand::emitInstrumentationData() {
}

ContextImplPtr MemCpyCommand::getContext() const {
const QueueImplPtr &Queue = MQueue->is_host() ? MSrcQueue : MQueue;
const QueueImplPtr &Queue = getWorkerQueue();
return detail::getSyclObjImpl(Queue->get_context());
}

QueueImplPtr MemCpyCommand::getWorkerQueue() const {
return MQueue->is_host() ? MSrcQueue : MQueue;
}

cl_int MemCpyCommand::enqueueImp() {
QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue;
waitForPreparedHostEvents();
std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;

Expand Down Expand Up @@ -1266,12 +1277,16 @@ void MemCpyCommandHost::emitInstrumentationData() {
}

ContextImplPtr MemCpyCommandHost::getContext() const {
const QueueImplPtr &Queue = MQueue->is_host() ? MSrcQueue : MQueue;
const QueueImplPtr &Queue = getWorkerQueue();
return detail::getSyclObjImpl(Queue->get_context());
}

QueueImplPtr MemCpyCommandHost::getWorkerQueue() const {
return MQueue->is_host() ? MSrcQueue : MQueue;
}

cl_int MemCpyCommandHost::enqueueImp() {
QueueImplPtr Queue = MQueue->is_host() ? MSrcQueue : MQueue;
const QueueImplPtr &Queue = getWorkerQueue();
waitForPreparedHostEvents();
std::vector<EventImplPtr> EventImpls = MPreparedDepsEvents;
std::vector<RT::PiEvent> RawEvents = getPiEvents(EventImpls);
Expand Down
6 changes: 6 additions & 0 deletions sycl/source/detail/scheduler/commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,10 @@ class Command {

virtual ContextImplPtr getContext() const;

/// Get the queue this command will be submitted to. Could differ from MQueue
/// for memory copy commands.
virtual QueueImplPtr getWorkerQueue() const;

protected:
EventImplPtr MEvent;
QueueImplPtr MQueue;
Expand Down Expand Up @@ -443,6 +447,7 @@ class MemCpyCommand : public Command {
const Requirement *getRequirement() const final override { return &MDstReq; }
void emitInstrumentationData() final override;
ContextImplPtr getContext() const final override;
QueueImplPtr getWorkerQueue() const final override;

private:
cl_int enqueueImp() final override;
Expand All @@ -466,6 +471,7 @@ class MemCpyCommandHost : public Command {
const Requirement *getRequirement() const final override { return &MDstReq; }
void emitInstrumentationData() final override;
ContextImplPtr getContext() const final override;
QueueImplPtr getWorkerQueue() const final override;

private:
cl_int enqueueImp() final override;
Expand Down
1 change: 1 addition & 0 deletions sycl/unittests/scheduler/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ add_sycl_unittest(SchedulerTests OBJECT
LeavesCollection.cpp
NoUnifiedHostMemory.cpp
StreamInitDependencyOnHost.cpp
InOrderQueueDeps.cpp
utils.cpp
)
129 changes: 129 additions & 0 deletions sycl/unittests/scheduler/InOrderQueueDeps.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
//==------------ InOrderQueueueueDeps.cpp --- Scheduler unit tests ---------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "SchedulerTest.hpp"
#include "SchedulerTestUtils.hpp"

#include <helpers/PiMock.hpp>

#include <iostream>
#include <memory>

using namespace cl::sycl;

static pi_result
redefinedMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size,
void *host_ptr, pi_mem *ret_mem,
const pi_mem_properties *properties = nullptr) {
return PI_SUCCESS;
}

static pi_result redefinedMemRelease(pi_mem mem) { return PI_SUCCESS; }

static pi_result redefinedEnqueueMemBufferReadRect(
pi_queue command_queue, pi_mem buffer, pi_bool blocking_read,
pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
pi_buff_rect_region region, size_t buffer_row_pitch,
size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
void *ptr, pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list, pi_event *event) {
EXPECT_EQ(num_events_in_wait_list, 0u);
*event = reinterpret_cast<pi_event>(1);
return PI_SUCCESS;
}

static pi_result redefinedEnqueueMemBufferWriteRect(
pi_queue command_queue, pi_mem buffer, pi_bool blocking_write,
pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
pi_buff_rect_region region, size_t buffer_row_pitch,
size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
const void *ptr, pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list, pi_event *event) {
EXPECT_EQ(num_events_in_wait_list, 0u);
*event = reinterpret_cast<pi_event>(1);
return PI_SUCCESS;
}

static pi_result redefinedEnqueueMemBufferMap(
pi_queue command_queue, pi_mem buffer, pi_bool blocking_map,
pi_map_flags map_flags, size_t offset, size_t size,
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
pi_event *event, void **ret_map) {
EXPECT_EQ(num_events_in_wait_list, 0u);
*event = reinterpret_cast<pi_event>(1);
return PI_SUCCESS;
}

static pi_result redefinedEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
void *mapped_ptr,
pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list,
pi_event *event) {
EXPECT_EQ(num_events_in_wait_list, 0u);
*event = reinterpret_cast<pi_event>(1);
return PI_SUCCESS;
}

static pi_result redefinedEventsWait(pi_uint32 num_events,
const pi_event *event_list) {
return PI_SUCCESS;
}

pi_result redefinedEventRelease(pi_event event) { return PI_SUCCESS; }

TEST_F(SchedulerTest, InOrderQueueDeps) {
default_selector Selector;
platform Plt{default_selector()};
if (Plt.is_host()) {
std::cout << "Not run due to host-only environment\n";
return;
}

unittest::PiMock Mock{Plt};
Mock.redefine<detail::PiApiKind::piMemBufferCreate>(redefinedMemBufferCreate);
Mock.redefine<detail::PiApiKind::piMemRelease>(redefinedMemRelease);
Mock.redefine<detail::PiApiKind::piEnqueueMemBufferReadRect>(
redefinedEnqueueMemBufferReadRect);
Mock.redefine<detail::PiApiKind::piEnqueueMemBufferWriteRect>(
redefinedEnqueueMemBufferWriteRect);
Mock.redefine<detail::PiApiKind::piEnqueueMemBufferMap>(
redefinedEnqueueMemBufferMap);
Mock.redefine<detail::PiApiKind::piEnqueueMemUnmap>(redefinedEnqueueMemUnmap);
Mock.redefine<detail::PiApiKind::piEventsWait>(redefinedEventsWait);
Mock.redefine<detail::PiApiKind::piEventRelease>(redefinedEventRelease);

context Ctx{Plt};
queue InOrderQueue{Ctx, Selector, property::queue::in_order()};
cl::sycl::detail::QueueImplPtr InOrderQueueImpl =
detail::getSyclObjImpl(InOrderQueue);

device HostDevice;
std::shared_ptr<detail::queue_impl> DefaultHostQueue{
new detail::queue_impl(detail::getSyclObjImpl(HostDevice), {}, {})};

MockScheduler MS;

int val;
buffer<int, 1> Buf(&val, range<1>(1));
detail::Requirement Req = getMockRequirement(Buf);

detail::MemObjRecord *Record =
MS.getOrInsertMemObjRecord(InOrderQueueImpl, &Req);
MS.getOrCreateAllocaForReq(Record, &Req, InOrderQueueImpl);
MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue);

// Check that sequential memory movements submitted to the same in-order
// queue do not depend on each other.
detail::Command *Cmd = MS.insertMemoryMove(Record, &Req, DefaultHostQueue);
detail::EnqueueResultT Res;
MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING);
Cmd = MS.insertMemoryMove(Record, &Req, InOrderQueueImpl);
MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING);
Cmd = MS.insertMemoryMove(Record, &Req, DefaultHostQueue);
MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING);
}