@@ -409,22 +409,19 @@ void graph_impl::markCGMemObjs(
409
409
}
410
410
}
411
411
412
- std::shared_ptr<node_impl> graph_impl::add (nodes_range Deps) {
413
- const std::shared_ptr<node_impl> &NodeImpl = std::make_shared<node_impl>();
414
-
415
- MNodeStorage.push_back (NodeImpl);
412
+ node_impl &graph_impl::add (nodes_range Deps) {
413
+ node_impl &NodeImpl = createNode ();
416
414
417
415
addDepsToNode (NodeImpl, Deps);
418
416
// Add an event associated with this explicit node for mixed usage
419
417
addEventForNode (sycl::detail::event_impl::create_completed_host_event (),
420
- * NodeImpl);
418
+ NodeImpl);
421
419
return NodeImpl;
422
420
}
423
421
424
- std::shared_ptr<node_impl>
425
- graph_impl::add (std::function<void (handler &)> CGF,
426
- const std::vector<sycl::detail::ArgDesc> &Args,
427
- std::vector<std::shared_ptr<node_impl>> &Deps) {
422
+ node_impl &graph_impl::add (std::function<void (handler &)> CGF,
423
+ const std::vector<sycl::detail::ArgDesc> &Args,
424
+ nodes_range Deps) {
428
425
(void )Args;
429
426
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
430
427
detail::handler_impl HandlerImpl{*this };
@@ -435,7 +432,8 @@ graph_impl::add(std::function<void(handler &)> CGF,
435
432
436
433
// Pass the node deps to the handler so they are available when processing the
437
434
// CGF, need for async_malloc nodes.
438
- Handler.impl ->MNodeDeps = Deps;
435
+ for (node_impl &N : Deps)
436
+ Handler.impl ->MNodeDeps .push_back (N.shared_from_this ());
439
437
440
438
#if XPTI_ENABLE_INSTRUMENTATION
441
439
// Save code location if one was set in TLS.
@@ -471,12 +469,12 @@ graph_impl::add(std::function<void(handler &)> CGF,
471
469
: ext::oneapi::experimental::detail::getNodeTypeFromCG (
472
470
Handler.getType ());
473
471
474
- auto NodeImpl =
472
+ node_impl & NodeImpl =
475
473
this ->add (NodeType, std::move (Handler.impl ->MGraphNodeCG ), Deps);
476
474
477
475
// Add an event associated with this explicit node for mixed usage
478
476
addEventForNode (sycl::detail::event_impl::create_completed_host_event (),
479
- * NodeImpl);
477
+ NodeImpl);
480
478
481
479
// Retrieve any dynamic parameters which have been registered in the CGF and
482
480
// register the actual nodes with them.
@@ -489,44 +487,40 @@ graph_impl::add(std::function<void(handler &)> CGF,
489
487
}
490
488
491
489
for (auto &[DynamicParam, ArgIndex] : DynamicParams) {
492
- DynamicParam->registerNode (NodeImpl, ArgIndex);
490
+ DynamicParam->registerNode (NodeImpl. shared_from_this () , ArgIndex);
493
491
}
494
492
495
493
return NodeImpl;
496
494
}
497
495
498
- std::shared_ptr<node_impl>
499
- graph_impl::add (node_type NodeType,
500
- std::shared_ptr<sycl::detail::CG> CommandGroup,
501
- nodes_range Deps) {
496
+ node_impl &graph_impl::add (node_type NodeType,
497
+ std::shared_ptr<sycl::detail::CG> CommandGroup,
498
+ nodes_range Deps) {
502
499
503
500
// A unique set of dependencies obtained by checking requirements and events
504
501
std::set<node_impl *> UniqueDeps = getCGEdges (CommandGroup);
505
502
506
503
// Track and mark the memory objects being used by the graph.
507
504
markCGMemObjs (CommandGroup);
508
505
509
- const std::shared_ptr<node_impl> &NodeImpl =
510
- std::make_shared<node_impl>(NodeType, std::move (CommandGroup));
511
- MNodeStorage.push_back (NodeImpl);
506
+ node_impl &NodeImpl = createNode (NodeType, std::move (CommandGroup));
512
507
513
508
// Add any deps determined from requirements and events into the dependency
514
509
// list
515
510
addDepsToNode (NodeImpl, Deps);
516
511
addDepsToNode (NodeImpl, UniqueDeps);
517
512
518
513
if (NodeType == node_type::async_free) {
519
- auto AsyncFreeCG =
520
- static_cast <CGAsyncFree *>(NodeImpl->MCommandGroup .get ());
514
+ auto AsyncFreeCG = static_cast <CGAsyncFree *>(NodeImpl.MCommandGroup .get ());
521
515
// If this is an async free node mark that it is now available for reuse,
522
516
// and pass the async free node for tracking.
523
- MGraphMemPool.markAllocationAsAvailable (AsyncFreeCG->getPtr (), * NodeImpl);
517
+ MGraphMemPool.markAllocationAsAvailable (AsyncFreeCG->getPtr (), NodeImpl);
524
518
}
525
519
526
520
return NodeImpl;
527
521
}
528
522
529
- std::shared_ptr< node_impl>
523
+ node_impl &
530
524
graph_impl::add (std::shared_ptr<dynamic_command_group_impl> &DynCGImpl,
531
525
nodes_range Deps) {
532
526
// Set of Dependent nodes based on CG event and accessor dependencies.
@@ -550,15 +544,14 @@ graph_impl::add(std::shared_ptr<dynamic_command_group_impl> &DynCGImpl,
550
544
const auto &ActiveKernel = DynCGImpl->getActiveCG ();
551
545
node_type NodeType =
552
546
ext::oneapi::experimental::detail::getNodeTypeFromCG (DynCGImpl->MCGType );
553
- std::shared_ptr<detail::node_impl> NodeImpl =
554
- add (NodeType, ActiveKernel, Deps);
547
+ detail::node_impl &NodeImpl = add (NodeType, ActiveKernel, Deps);
555
548
556
549
// Add an event associated with this explicit node for mixed usage
557
550
addEventForNode (sycl::detail::event_impl::create_completed_host_event (),
558
- * NodeImpl);
551
+ NodeImpl);
559
552
560
553
// Track the dynamic command-group used inside the node object
561
- DynCGImpl->MNodes .push_back (NodeImpl);
554
+ DynCGImpl->MNodes .push_back (NodeImpl. shared_from_this () );
562
555
563
556
return NodeImpl;
564
557
}
@@ -651,7 +644,7 @@ void graph_impl::makeEdge(std::shared_ptr<node_impl> Src,
651
644
bool DestWasGraphRoot = Dest->MPredecessors .size () == 0 ;
652
645
653
646
// We need to add the edges first before checking for cycles
654
- Src->registerSuccessor (Dest);
647
+ Src->registerSuccessor (* Dest);
655
648
656
649
bool DestLostRootStatus = DestWasGraphRoot && Dest->MPredecessors .size () == 1 ;
657
650
if (DestLostRootStatus) {
@@ -1264,7 +1257,7 @@ void exec_graph_impl::duplicateNodes() {
1264
1257
// Look through all the original node successors, find their copies and
1265
1258
// register those as successors with the current copied node
1266
1259
for (node_impl &NextNode : OriginalNode->successors ()) {
1267
- auto Successor = NodesMap.at (NextNode.shared_from_this ());
1260
+ node_impl & Successor = * NodesMap.at (NextNode.shared_from_this ());
1268
1261
NodeCopy->registerSuccessor (Successor);
1269
1262
}
1270
1263
}
@@ -1306,7 +1299,8 @@ void exec_graph_impl::duplicateNodes() {
1306
1299
auto NodeCopy = NewSubgraphNodes[i];
1307
1300
1308
1301
for (node_impl &NextNode : SubgraphNode->successors ()) {
1309
- auto Successor = SubgraphNodesMap.at (NextNode.shared_from_this ());
1302
+ node_impl &Successor =
1303
+ *SubgraphNodesMap.at (NextNode.shared_from_this ());
1310
1304
NodeCopy->registerSuccessor (Successor);
1311
1305
}
1312
1306
}
@@ -1340,7 +1334,7 @@ void exec_graph_impl::duplicateNodes() {
1340
1334
// Add all input nodes from the subgraph as successors for this node
1341
1335
// instead
1342
1336
for (auto &Input : Inputs) {
1343
- PredNode.registerSuccessor (Input);
1337
+ PredNode.registerSuccessor (* Input);
1344
1338
}
1345
1339
}
1346
1340
@@ -1359,7 +1353,7 @@ void exec_graph_impl::duplicateNodes() {
1359
1353
// Add all Output nodes from the subgraph as predecessors for this node
1360
1354
// instead
1361
1355
for (auto &Output : Outputs) {
1362
- Output->registerSuccessor (SuccNode. shared_from_this () );
1356
+ Output->registerSuccessor (SuccNode);
1363
1357
}
1364
1358
}
1365
1359
@@ -1840,38 +1834,25 @@ node modifiable_command_graph::addImpl(dynamic_command_group &DynCGF,
1840
1834
" dynamic command-group." );
1841
1835
}
1842
1836
1843
- std::vector<std::shared_ptr<detail::node_impl>> DepImpls;
1844
- for (auto &D : Deps) {
1845
- DepImpls.push_back (sycl::detail::getSyclObjImpl (D));
1846
- }
1847
-
1848
1837
graph_impl::WriteLock Lock (impl->MMutex );
1849
- std::shared_ptr< detail::node_impl> NodeImpl = impl->add (DynCGFImpl, DepImpls );
1850
- return sycl::detail::createSyclObjFromImpl<node>(std::move ( NodeImpl) );
1838
+ detail::node_impl & NodeImpl = impl->add (DynCGFImpl, Deps );
1839
+ return sycl::detail::createSyclObjFromImpl<node>(NodeImpl);
1851
1840
}
1852
1841
1853
1842
node modifiable_command_graph::addImpl (const std::vector<node> &Deps) {
1854
1843
impl->throwIfGraphRecordingQueue (" Explicit API \" Add()\" function" );
1855
- std::vector<std::shared_ptr<detail::node_impl>> DepImpls;
1856
- for (auto &D : Deps) {
1857
- DepImpls.push_back (sycl::detail::getSyclObjImpl (D));
1858
- }
1859
1844
1860
1845
graph_impl::WriteLock Lock (impl->MMutex );
1861
- std::shared_ptr< detail::node_impl> NodeImpl = impl->add (DepImpls );
1862
- return sycl::detail::createSyclObjFromImpl<node>(std::move ( NodeImpl) );
1846
+ detail::node_impl & NodeImpl = impl->add (Deps );
1847
+ return sycl::detail::createSyclObjFromImpl<node>(NodeImpl);
1863
1848
}
1864
1849
1865
1850
node modifiable_command_graph::addImpl (std::function<void (handler &)> CGF,
1866
1851
const std::vector<node> &Deps) {
1867
1852
impl->throwIfGraphRecordingQueue (" Explicit API \" Add()\" function" );
1868
- std::vector<std::shared_ptr<detail::node_impl>> DepImpls;
1869
- for (auto &D : Deps) {
1870
- DepImpls.push_back (sycl::detail::getSyclObjImpl (D));
1871
- }
1872
1853
1873
- std::shared_ptr< detail::node_impl> NodeImpl = impl->add (CGF, {}, DepImpls );
1874
- return sycl::detail::createSyclObjFromImpl<node>(std::move ( NodeImpl) );
1854
+ detail::node_impl & NodeImpl = impl->add (CGF, {}, Deps );
1855
+ return sycl::detail::createSyclObjFromImpl<node>(NodeImpl);
1875
1856
}
1876
1857
1877
1858
void modifiable_command_graph::addGraphLeafDependencies (node Node) {
0 commit comments