Skip to content

Commit 9876e19

Browse files
authored
[SYCL][XPTI] 'queue_id' metadata feature refactoring (#13070)
- Better requirements/test cases showed gaps in previous implementation that resulted in data inconsistencies - Metadata is associated with UID and since UIDs are the same multiple instantiations of the same object, only invariant data needs to be stored in the metadata object - Adding mutable data resulted in data inconsistencies and the feature refactoring addresses these issues --------- Signed-off-by: Vasanth Tovinkere <[email protected]>
1 parent 2f9c0bb commit 9876e19

File tree

13 files changed

+313
-49
lines changed

13 files changed

+313
-49
lines changed

sycl/source/detail/queue_impl.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
113113
xpti::addMetadata(TEvent, "memory_size", Count);
114114
xpti::addMetadata(TEvent, "queue_id", MQueueID);
115115
});
116+
// Before we notifiy the subscribers, we broadcast the 'queue_id', which was a
117+
// metadata entry to TLS for use by callback handlers
118+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
116119
// Notify XPTI about the memset submission
117120
PrepareNotify.notify();
118121
// Emit a begin/end scope for this call
@@ -159,6 +162,7 @@ event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
159162
xpti::addMetadata(TEvent, "memory_size", Count);
160163
xpti::addMetadata(TEvent, "queue_id", MQueueID);
161164
});
165+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
162166
// Notify XPTI about the memset submission
163167
PrepareNotify.notify();
164168
// Emit a begin/end scope for this call

sycl/source/detail/queue_impl.hpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ class queue_impl {
9292
/// \param PropList is a list of properties to use for queue construction.
9393
queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler,
9494
const property_list &PropList)
95-
: queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList) {};
95+
: queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList){};
9696

9797
/// Constructs a SYCL queue with an async_handler and property_list provided
9898
/// form a device and a context.
@@ -176,13 +176,16 @@ class queue_impl {
176176
// This section is the second part of the instrumentation that uses the
177177
// tracepoint information and notifies
178178
}
179+
179180
// We enable XPTI tracing events using the TLS mechanism; if the code
180181
// location data is available, then the tracing data will be rich.
181182
#if XPTI_ENABLE_INSTRUMENTATION
182183
constexpr uint16_t NotificationTraceType =
183184
static_cast<uint16_t>(xpti::trace_point_type_t::queue_create);
185+
// Using the instance override constructor for use with queues as queues
186+
// maintain instance IDs in the object
184187
XPTIScope PrepareNotify((void *)this, NotificationTraceType,
185-
SYCL_STREAM_NAME, "queue_create");
188+
SYCL_STREAM_NAME, MQueueID, "queue_create");
186189
// Cache the trace event, stream id and instance IDs for the destructor
187190
if (xptiCheckTraceEnabled(PrepareNotify.streamID(),
188191
NotificationTraceType)) {
@@ -207,6 +210,8 @@ class queue_impl {
207210
xpti::addMetadata(TEvent, "queue_handle",
208211
reinterpret_cast<size_t>(getHandleRef()));
209212
});
213+
// Also publish to TLS
214+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
210215
PrepareNotify.notify();
211216
}
212217
#endif
@@ -244,7 +249,7 @@ class queue_impl {
244249
constexpr uint16_t NotificationTraceType =
245250
static_cast<uint16_t>(xpti::trace_point_type_t::queue_create);
246251
XPTIScope PrepareNotify((void *)this, NotificationTraceType,
247-
SYCL_STREAM_NAME, "queue_create");
252+
SYCL_STREAM_NAME, MQueueID, "queue_create");
248253
if (xptiCheckTraceEnabled(PrepareNotify.streamID(),
249254
NotificationTraceType)) {
250255
// Cache the trace event, stream id and instance IDs for the destructor
@@ -269,6 +274,8 @@ class queue_impl {
269274
if (!MHostQueue)
270275
xpti::addMetadata(TEvent, "queue_handle", getHandleRef());
271276
});
277+
// Also publish to TLS before notification
278+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
272279
PrepareNotify.notify();
273280
}
274281
#endif

sycl/source/detail/scheduler/commands.cpp

Lines changed: 47 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,7 +1005,10 @@ void AllocaCommandBase::emitInstrumentationData() {
10051005
xpti::addMetadata(TE, "sycl_device_name",
10061006
getSyclObjImpl(MQueue->get_device())->getDeviceName());
10071007
xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
1008-
xpti::addMetadata(TE, "queue_id", MQueue->getQueueID());
1008+
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
1009+
// as this data is mutable and the metadata is supposed to be invariant
1010+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
1011+
MQueue->getQueueID());
10091012
}
10101013
#endif
10111014
}
@@ -1124,7 +1127,8 @@ void AllocaSubBufCommand::emitInstrumentationData() {
11241127
this->MRequirement.MAccessRange[0]);
11251128
xpti::addMetadata(TE, "access_range_end",
11261129
this->MRequirement.MAccessRange[1]);
1127-
xpti::addMetadata(TE, "queue_id", MQueue->getQueueID());
1130+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
1131+
MQueue->getQueueID());
11281132
makeTraceEventEpilog();
11291133
}
11301134
#endif
@@ -1202,8 +1206,10 @@ void ReleaseCommand::emitInstrumentationData() {
12021206
getSyclObjImpl(MQueue->get_device())->getDeviceName());
12031207
xpti::addMetadata(TE, "allocation_type",
12041208
commandToName(MAllocaCmd->getType()));
1205-
xpti::addMetadata(TE, "queue_id", MQueue->getQueueID());
1206-
1209+
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
1210+
// as this data is mutable and the metadata is supposed to be invariant
1211+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
1212+
MQueue->getQueueID());
12071213
makeTraceEventEpilog();
12081214
}
12091215
#endif
@@ -1323,8 +1329,10 @@ void MapMemObject::emitInstrumentationData() {
13231329
xpti::addMetadata(TE, "sycl_device_name",
13241330
getSyclObjImpl(MQueue->get_device())->getDeviceName());
13251331
xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
1326-
xpti::addMetadata(TE, "queue_id", MQueue->getQueueID());
1327-
1332+
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
1333+
// as this data is mutable and the metadata is supposed to be invariant
1334+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
1335+
MQueue->getQueueID());
13281336
makeTraceEventEpilog();
13291337
}
13301338
#endif
@@ -1386,8 +1394,10 @@ void UnMapMemObject::emitInstrumentationData() {
13861394
xpti::addMetadata(TE, "sycl_device_name",
13871395
getSyclObjImpl(MQueue->get_device())->getDeviceName());
13881396
xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
1389-
xpti::addMetadata(TE, "queue_id", MQueue->getQueueID());
1390-
1397+
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
1398+
// as this data is mutable and the metadata is supposed to be invariant
1399+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
1400+
MQueue->getQueueID());
13911401
makeTraceEventEpilog();
13921402
}
13931403
#endif
@@ -1489,8 +1499,10 @@ void MemCpyCommand::emitInstrumentationData() {
14891499
xpti::addMetadata(
14901500
CmdTraceEvent, "copy_to",
14911501
reinterpret_cast<size_t>(getSyclObjImpl(MQueue->get_device()).get()));
1492-
xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID());
1493-
1502+
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
1503+
// as this data is mutable and the metadata is supposed to be invariant
1504+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
1505+
MQueue->getQueueID());
14941506
makeTraceEventEpilog();
14951507
}
14961508
#endif
@@ -1665,8 +1677,10 @@ void MemCpyCommandHost::emitInstrumentationData() {
16651677
xpti::addMetadata(
16661678
CmdTraceEvent, "copy_to",
16671679
reinterpret_cast<size_t>(getSyclObjImpl(MQueue->get_device()).get()));
1668-
xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID());
1669-
1680+
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
1681+
// as this data is mutable and the metadata is supposed to be invariant
1682+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
1683+
MQueue->getQueueID());
16701684
makeTraceEventEpilog();
16711685
}
16721686
#endif
@@ -1756,8 +1770,10 @@ void EmptyCommand::emitInstrumentationData() {
17561770
getSyclObjImpl(MQueue->get_device())->getDeviceName());
17571771
xpti::addMetadata(CmdTraceEvent, "memory_object",
17581772
reinterpret_cast<size_t>(MAddress));
1759-
xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID());
1760-
1773+
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
1774+
// as this data is mutable and the metadata is supposed to be invariant
1775+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
1776+
MQueue->getQueueID());
17611777
makeTraceEventEpilog();
17621778
}
17631779
#endif
@@ -1828,8 +1844,10 @@ void UpdateHostRequirementCommand::emitInstrumentationData() {
18281844
getSyclObjImpl(MQueue->get_device())->getDeviceName());
18291845
xpti::addMetadata(CmdTraceEvent, "memory_object",
18301846
reinterpret_cast<size_t>(MAddress));
1831-
xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID());
1832-
1847+
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
1848+
// as this data is mutable and the metadata is supposed to be invariant
1849+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
1850+
MQueue->getQueueID());
18331851
makeTraceEventEpilog();
18341852
}
18351853
#endif
@@ -2063,7 +2081,9 @@ void instrumentationFillCommonData(const std::string &KernelName,
20632081
xpti::addMetadata(CmdTraceEvent, "sym_column_no",
20642082
static_cast<int>(Column));
20652083
}
2066-
xpti::addMetadata(CmdTraceEvent, "queue_id", Queue->getQueueID());
2084+
// We no longer set the 'queue_id' in the metadata structure as it is a
2085+
// mutable value and multiple threads using the same queue created at the
2086+
// same location will overwrite the metadata values creating inconsistencies
20672087
}
20682088
}
20692089
#endif
@@ -2096,6 +2116,10 @@ std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
20962116
FromSource, InstanceID, CmdTraceEvent);
20972117

20982118
if (CmdTraceEvent) {
2119+
// Stash the queue_id mutable metadata in TLS
2120+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
2121+
Queue->getQueueID());
2122+
20992123
instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc,
21002124
KernelBundleImplPtr, SyclKernelName,
21012125
SyclKernel, Queue, CGArgs);
@@ -2139,6 +2163,8 @@ void ExecCGCommand::emitInstrumentationData() {
21392163
CmdTraceEvent);
21402164

21412165
if (CmdTraceEvent) {
2166+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
2167+
MQueue->getQueueID());
21422168
MTraceEvent = static_cast<void *>(CmdTraceEvent);
21432169
if (MCommandGroup->getType() == detail::CG::Kernel) {
21442170
auto KernelCG =
@@ -3351,10 +3377,12 @@ void KernelFusionCommand::emitInstrumentationData() {
33513377
deviceToString(MQueue->get_device()));
33523378
xpti::addMetadata(CmdTraceEvent, "sycl_device_name",
33533379
getSyclObjImpl(MQueue->get_device())->getDeviceName());
3354-
xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID());
33553380
}
3356-
33573381
if (MFirstInstance) {
3382+
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
3383+
// as this data is mutable and the metadata is supposed to be invariant
3384+
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
3385+
MQueue->getQueueID());
33583386
xptiNotifySubscribers(MStreamID, NotificationTraceType,
33593387
detail::GSYCLGraphEvent,
33603388
static_cast<xpti_td *>(MTraceEvent), MInstanceID,

sycl/source/detail/xpti_registry.hpp

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ extern uint8_t GMemAllocStreamID;
4242
extern xpti::trace_event_data_t *GMemAllocEvent;
4343
extern xpti::trace_event_data_t *GSYCLGraphEvent;
4444

45+
// We will pick a global constant so that the pointer in TLS never goes stale
46+
inline constexpr auto XPTI_QUEUE_INSTANCE_ID_KEY = "queue_id";
47+
4548
#define STR(x) #x
4649
#define SYCL_VERSION_STR \
4750
"sycl " STR(__LIBSYCL_MAJOR_VERSION) "." STR(__LIBSYCL_MINOR_VERSION)
@@ -165,6 +168,45 @@ class XPTIRegistry {
165168
class XPTIScope {
166169
public:
167170
using TracePoint = xpti::framework::tracepoint_t;
171+
/// @brief Scoped class for XPTI instrumentation using TLS data
172+
/// @param CodePtr The address of the class/function to help differentiate
173+
/// actions in case the code location information is not available
174+
/// @param TraceType The type of trace event being created
175+
/// @param StreamName The stream which will emit these notifications
176+
/// @param InstanceID The instance ID associated with an object, otherwise 0
177+
/// will auto-generate
178+
/// @param UserData String value that provides metadata about the
179+
/// instrumentation
180+
XPTIScope(void *CodePtr, uint16_t TraceType, const char *StreamName,
181+
uint64_t InstanceID, const char *UserData)
182+
: MUserData(UserData), MStreamID(0), MInstanceID(InstanceID),
183+
MScopedNotify(false), MTraceType(0) {
184+
detail::tls_code_loc_t Tls;
185+
auto TData = Tls.query();
186+
// If TLS is not set, we can still genertate universal IDs with user data
187+
// and CodePtr information
188+
const char *FuncName = TData.functionName();
189+
if (!TData.functionName() && !TData.fileName())
190+
FuncName = UserData;
191+
// Create a tracepoint object that has a lifetime of this class
192+
MTP = new TracePoint(TData.fileName(), FuncName, TData.lineNumber(),
193+
TData.columnNumber(), CodePtr);
194+
if (TraceType == (uint16_t)xpti::trace_point_type_t::graph_create ||
195+
TraceType == (uint16_t)xpti::trace_point_type_t::node_create ||
196+
TraceType == (uint16_t)xpti::trace_point_type_t::edge_create ||
197+
TraceType == (uint16_t)xpti::trace_point_type_t::queue_create)
198+
MTP->parent_event(GSYCLGraphEvent);
199+
// Now if tracing is enabled, create trace events and notify
200+
if (xptiTraceEnabled() && MTP) {
201+
MTP->stream(StreamName).trace_type((xpti::trace_point_type_t)TraceType);
202+
MTraceEvent = const_cast<xpti::trace_event_data_t *>(MTP->trace_event());
203+
MStreamID = MTP->stream_id();
204+
// This constructor uses a manual override for the instance ID as some
205+
// objects such as queues keep track of instance IDs
206+
MTP->override_instance_id(MInstanceID);
207+
}
208+
}
209+
168210
/// @brief Scoped class for XPTI instrumentation using TLS data
169211
/// @param CodePtr The address of the class/function to help differentiate
170212
/// actions in case the code location information is not available
@@ -188,7 +230,8 @@ class XPTIScope {
188230
TData.columnNumber(), CodePtr);
189231
if (TraceType == (uint16_t)xpti::trace_point_type_t::graph_create ||
190232
TraceType == (uint16_t)xpti::trace_point_type_t::node_create ||
191-
TraceType == (uint16_t)xpti::trace_point_type_t::edge_create)
233+
TraceType == (uint16_t)xpti::trace_point_type_t::edge_create ||
234+
TraceType == (uint16_t)xpti::trace_point_type_t::queue_create)
192235
MTP->parent_event(GSYCLGraphEvent);
193236
// Now if tracing is enabled, create trace events and notify
194237
if (xptiTraceEnabled() && MTP) {
@@ -243,6 +286,8 @@ class XPTIScope {
243286
MTraceType == (uint16_t)xpti::trace_point_type_t::graph_create ||
244287
MTraceType == (uint16_t)xpti::trace_point_type_t::node_create ||
245288
MTraceType == (uint16_t)xpti::trace_point_type_t::edge_create ||
289+
MTraceType == (uint16_t)xpti::trace_point_type_t::queue_create ||
290+
MTraceType == (uint16_t)xpti::trace_point_type_t::queue_destroy ||
246291
MTraceType == (uint16_t)xpti::trace_point_type_t::diagnostics)
247292
return;
248293

sycl/test-e2e/XPTI/Inputs/test_collector.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ XPTI_CALLBACK_API void syclCallback(uint16_t TraceType,
6262
xpti::trace_event_data_t *,
6363
xpti::trace_event_data_t *Event, uint64_t,
6464
const void *UserData) {
65+
char *Key = 0;
66+
uint64_t Value;
67+
bool HaveKeyValue =
68+
(xptiGetStashedTuple(&Key, Value) == xpti::result_t::XPTI_RESULT_SUCCESS);
6569
std::lock_guard Lock{GMutex};
6670
auto Type = static_cast<xpti::trace_point_type_t>(TraceType);
6771
switch (Type) {
@@ -99,6 +103,9 @@ XPTI_CALLBACK_API void syclCallback(uint16_t TraceType,
99103
std::cout << "Unknown tracepoint\n";
100104
}
101105

106+
if (HaveKeyValue) {
107+
std::cout << " " << Key << " : " << Value << "\n";
108+
}
102109
xpti::metadata_t *Metadata = xptiQueryMetadata(Event);
103110
for (auto &Item : *Metadata) {
104111
std::cout << " " << xptiLookupString(Item.first) << " : "

0 commit comments

Comments
 (0)