Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
xpti::addMetadata(TEvent, "memory_size", Count);
xpti::addMetadata(TEvent, "queue_id", MQueueID);
});
// Before we notifiy the subscribers, we broadcast the 'queue_id', which was a
// metadata entry to TLS for use by callback handlers
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
// Notify XPTI about the memset submission
PrepareNotify.notify();
// Emit a begin/end scope for this call
Expand Down Expand Up @@ -159,6 +162,7 @@ event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
xpti::addMetadata(TEvent, "memory_size", Count);
xpti::addMetadata(TEvent, "queue_id", MQueueID);
});
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
// Notify XPTI about the memset submission
PrepareNotify.notify();
// Emit a begin/end scope for this call
Expand Down
13 changes: 10 additions & 3 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class queue_impl {
/// \param PropList is a list of properties to use for queue construction.
queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler,
const property_list &PropList)
: queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList) {};
: queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList){};

/// Constructs a SYCL queue with an async_handler and property_list provided
/// form a device and a context.
Expand Down Expand Up @@ -176,13 +176,16 @@ class queue_impl {
// This section is the second part of the instrumentation that uses the
// tracepoint information and notifies
}

// We enable XPTI tracing events using the TLS mechanism; if the code
// location data is available, then the tracing data will be rich.
#if XPTI_ENABLE_INSTRUMENTATION
constexpr uint16_t NotificationTraceType =
static_cast<uint16_t>(xpti::trace_point_type_t::queue_create);
// Using the instance override constructor for use with queues as queues
// maintain instance IDs in the object
XPTIScope PrepareNotify((void *)this, NotificationTraceType,
SYCL_STREAM_NAME, "queue_create");
SYCL_STREAM_NAME, MQueueID, "queue_create");
// Cache the trace event, stream id and instance IDs for the destructor
if (xptiCheckTraceEnabled(PrepareNotify.streamID(),
NotificationTraceType)) {
Expand All @@ -207,6 +210,8 @@ class queue_impl {
xpti::addMetadata(TEvent, "queue_handle",
reinterpret_cast<size_t>(getHandleRef()));
});
// Also publish to TLS
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
PrepareNotify.notify();
}
#endif
Expand Down Expand Up @@ -244,7 +249,7 @@ class queue_impl {
constexpr uint16_t NotificationTraceType =
static_cast<uint16_t>(xpti::trace_point_type_t::queue_create);
XPTIScope PrepareNotify((void *)this, NotificationTraceType,
SYCL_STREAM_NAME, "queue_create");
SYCL_STREAM_NAME, MQueueID, "queue_create");
if (xptiCheckTraceEnabled(PrepareNotify.streamID(),
NotificationTraceType)) {
// Cache the trace event, stream id and instance IDs for the destructor
Expand All @@ -269,6 +274,8 @@ class queue_impl {
if (!MHostQueue)
xpti::addMetadata(TEvent, "queue_handle", getHandleRef());
});
// Also publish to TLS before notification
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
PrepareNotify.notify();
}
#endif
Expand Down
66 changes: 47 additions & 19 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1005,7 +1005,10 @@ void AllocaCommandBase::emitInstrumentationData() {
xpti::addMetadata(TE, "sycl_device_name",
getSyclObjImpl(MQueue->get_device())->getDeviceName());
xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
xpti::addMetadata(TE, "queue_id", MQueue->getQueueID());
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
}
#endif
}
Expand Down Expand Up @@ -1124,7 +1127,8 @@ void AllocaSubBufCommand::emitInstrumentationData() {
this->MRequirement.MAccessRange[0]);
xpti::addMetadata(TE, "access_range_end",
this->MRequirement.MAccessRange[1]);
xpti::addMetadata(TE, "queue_id", MQueue->getQueueID());
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
makeTraceEventEpilog();
}
#endif
Expand Down Expand Up @@ -1202,8 +1206,10 @@ void ReleaseCommand::emitInstrumentationData() {
getSyclObjImpl(MQueue->get_device())->getDeviceName());
xpti::addMetadata(TE, "allocation_type",
commandToName(MAllocaCmd->getType()));
xpti::addMetadata(TE, "queue_id", MQueue->getQueueID());

// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
makeTraceEventEpilog();
}
#endif
Expand Down Expand Up @@ -1323,8 +1329,10 @@ void MapMemObject::emitInstrumentationData() {
xpti::addMetadata(TE, "sycl_device_name",
getSyclObjImpl(MQueue->get_device())->getDeviceName());
xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
xpti::addMetadata(TE, "queue_id", MQueue->getQueueID());

// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
makeTraceEventEpilog();
}
#endif
Expand Down Expand Up @@ -1386,8 +1394,10 @@ void UnMapMemObject::emitInstrumentationData() {
xpti::addMetadata(TE, "sycl_device_name",
getSyclObjImpl(MQueue->get_device())->getDeviceName());
xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
xpti::addMetadata(TE, "queue_id", MQueue->getQueueID());

// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
makeTraceEventEpilog();
}
#endif
Expand Down Expand Up @@ -1489,8 +1499,10 @@ void MemCpyCommand::emitInstrumentationData() {
xpti::addMetadata(
CmdTraceEvent, "copy_to",
reinterpret_cast<size_t>(getSyclObjImpl(MQueue->get_device()).get()));
xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID());

// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
makeTraceEventEpilog();
}
#endif
Expand Down Expand Up @@ -1665,8 +1677,10 @@ void MemCpyCommandHost::emitInstrumentationData() {
xpti::addMetadata(
CmdTraceEvent, "copy_to",
reinterpret_cast<size_t>(getSyclObjImpl(MQueue->get_device()).get()));
xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID());

// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
makeTraceEventEpilog();
}
#endif
Expand Down Expand Up @@ -1756,8 +1770,10 @@ void EmptyCommand::emitInstrumentationData() {
getSyclObjImpl(MQueue->get_device())->getDeviceName());
xpti::addMetadata(CmdTraceEvent, "memory_object",
reinterpret_cast<size_t>(MAddress));
xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID());

// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
makeTraceEventEpilog();
}
#endif
Expand Down Expand Up @@ -1828,8 +1844,10 @@ void UpdateHostRequirementCommand::emitInstrumentationData() {
getSyclObjImpl(MQueue->get_device())->getDeviceName());
xpti::addMetadata(CmdTraceEvent, "memory_object",
reinterpret_cast<size_t>(MAddress));
xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID());

// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
makeTraceEventEpilog();
}
#endif
Expand Down Expand Up @@ -2063,7 +2081,9 @@ void instrumentationFillCommonData(const std::string &KernelName,
xpti::addMetadata(CmdTraceEvent, "sym_column_no",
static_cast<int>(Column));
}
xpti::addMetadata(CmdTraceEvent, "queue_id", Queue->getQueueID());
// We no longer set the 'queue_id' in the metadata structure as it is a
// mutable value and multiple threads using the same queue created at the
// same location will overwrite the metadata values creating inconsistencies
}
}
#endif
Expand Down Expand Up @@ -2096,6 +2116,10 @@ std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
FromSource, InstanceID, CmdTraceEvent);

if (CmdTraceEvent) {
// Stash the queue_id mutable metadata in TLS
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
Queue->getQueueID());

instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc,
KernelBundleImplPtr, SyclKernelName,
SyclKernel, Queue, CGArgs);
Expand Down Expand Up @@ -2139,6 +2163,8 @@ void ExecCGCommand::emitInstrumentationData() {
CmdTraceEvent);

if (CmdTraceEvent) {
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
MTraceEvent = static_cast<void *>(CmdTraceEvent);
if (MCommandGroup->getType() == detail::CG::Kernel) {
auto KernelCG =
Expand Down Expand Up @@ -3351,10 +3377,12 @@ void KernelFusionCommand::emitInstrumentationData() {
deviceToString(MQueue->get_device()));
xpti::addMetadata(CmdTraceEvent, "sycl_device_name",
getSyclObjImpl(MQueue->get_device())->getDeviceName());
xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID());
}

if (MFirstInstance) {
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
xptiNotifySubscribers(MStreamID, NotificationTraceType,
detail::GSYCLGraphEvent,
static_cast<xpti_td *>(MTraceEvent), MInstanceID,
Expand Down
47 changes: 46 additions & 1 deletion sycl/source/detail/xpti_registry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ extern uint8_t GMemAllocStreamID;
extern xpti::trace_event_data_t *GMemAllocEvent;
extern xpti::trace_event_data_t *GSYCLGraphEvent;

// We will pick a global constant so that the pointer in TLS never goes stale
inline constexpr auto XPTI_QUEUE_INSTANCE_ID_KEY = "queue_id";

#define STR(x) #x
#define SYCL_VERSION_STR \
"sycl " STR(__LIBSYCL_MAJOR_VERSION) "." STR(__LIBSYCL_MINOR_VERSION)
Expand Down Expand Up @@ -165,6 +168,45 @@ class XPTIRegistry {
class XPTIScope {
public:
using TracePoint = xpti::framework::tracepoint_t;
/// @brief Scoped class for XPTI instrumentation using TLS data
/// @param CodePtr The address of the class/function to help differentiate
/// actions in case the code location information is not available
/// @param TraceType The type of trace event being created
/// @param StreamName The stream which will emit these notifications
/// @param InstanceID The instance ID associated with an object, otherwise 0
/// will auto-generate
/// @param UserData String value that provides metadata about the
/// instrumentation
XPTIScope(void *CodePtr, uint16_t TraceType, const char *StreamName,
uint64_t InstanceID, const char *UserData)
: MUserData(UserData), MStreamID(0), MInstanceID(InstanceID),
MScopedNotify(false), MTraceType(0) {
detail::tls_code_loc_t Tls;
auto TData = Tls.query();
// If TLS is not set, we can still genertate universal IDs with user data
// and CodePtr information
const char *FuncName = TData.functionName();
if (!TData.functionName() && !TData.fileName())
FuncName = UserData;
// Create a tracepoint object that has a lifetime of this class
MTP = new TracePoint(TData.fileName(), FuncName, TData.lineNumber(),
TData.columnNumber(), CodePtr);
if (TraceType == (uint16_t)xpti::trace_point_type_t::graph_create ||
TraceType == (uint16_t)xpti::trace_point_type_t::node_create ||
TraceType == (uint16_t)xpti::trace_point_type_t::edge_create ||
TraceType == (uint16_t)xpti::trace_point_type_t::queue_create)
MTP->parent_event(GSYCLGraphEvent);
// Now if tracing is enabled, create trace events and notify
if (xptiTraceEnabled() && MTP) {
MTP->stream(StreamName).trace_type((xpti::trace_point_type_t)TraceType);
MTraceEvent = const_cast<xpti::trace_event_data_t *>(MTP->trace_event());
MStreamID = MTP->stream_id();
// This constructor uses a manual override for the instance ID as some
// objects such as queues keep track of instance IDs
MTP->override_instance_id(MInstanceID);
}
}

/// @brief Scoped class for XPTI instrumentation using TLS data
/// @param CodePtr The address of the class/function to help differentiate
/// actions in case the code location information is not available
Expand All @@ -188,7 +230,8 @@ class XPTIScope {
TData.columnNumber(), CodePtr);
if (TraceType == (uint16_t)xpti::trace_point_type_t::graph_create ||
TraceType == (uint16_t)xpti::trace_point_type_t::node_create ||
TraceType == (uint16_t)xpti::trace_point_type_t::edge_create)
TraceType == (uint16_t)xpti::trace_point_type_t::edge_create ||
TraceType == (uint16_t)xpti::trace_point_type_t::queue_create)
MTP->parent_event(GSYCLGraphEvent);
// Now if tracing is enabled, create trace events and notify
if (xptiTraceEnabled() && MTP) {
Expand Down Expand Up @@ -243,6 +286,8 @@ class XPTIScope {
MTraceType == (uint16_t)xpti::trace_point_type_t::graph_create ||
MTraceType == (uint16_t)xpti::trace_point_type_t::node_create ||
MTraceType == (uint16_t)xpti::trace_point_type_t::edge_create ||
MTraceType == (uint16_t)xpti::trace_point_type_t::queue_create ||
MTraceType == (uint16_t)xpti::trace_point_type_t::queue_destroy ||
MTraceType == (uint16_t)xpti::trace_point_type_t::diagnostics)
return;

Expand Down
7 changes: 7 additions & 0 deletions sycl/test-e2e/XPTI/Inputs/test_collector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ XPTI_CALLBACK_API void syclCallback(uint16_t TraceType,
xpti::trace_event_data_t *,
xpti::trace_event_data_t *Event, uint64_t,
const void *UserData) {
char *Key = 0;
uint64_t Value;
bool HaveKeyValue =
(xptiGetStashedTuple(&Key, Value) == xpti::result_t::XPTI_RESULT_SUCCESS);
std::lock_guard Lock{GMutex};
auto Type = static_cast<xpti::trace_point_type_t>(TraceType);
switch (Type) {
Expand Down Expand Up @@ -99,6 +103,9 @@ XPTI_CALLBACK_API void syclCallback(uint16_t TraceType,
std::cout << "Unknown tracepoint\n";
}

if (HaveKeyValue) {
std::cout << " " << Key << " : " << Value << "\n";
}
xpti::metadata_t *Metadata = xptiQueryMetadata(Event);
for (auto &Item : *Metadata) {
std::cout << " " << xptiLookupString(Item.first) << " : "
Expand Down
Loading