Skip to content

Commit 6fb57f7

Browse files
committed
[libc] Add basic utility support for timing functions on the GPU
This patch adds the utilities for the clocks on the GPU. This is done prior to exporting it via some other interface and is mainly just done so they are availible if we wish to do internal testing. Reviewed By: lntue Differential Revision: https://reviews.llvm.org/D153388
1 parent 869baa9 commit 6fb57f7

File tree

3 files changed

+43
-0
lines changed

3 files changed

+43
-0
lines changed

libc/src/__support/GPU/amdgpu/utils.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define LLVM_LIBC_SRC_SUPPORT_GPU_AMDGPU_IO_H
1111

1212
#include "src/__support/common.h"
13+
#include "src/__support/macros/config.h"
1314

1415
#include <stdint.h>
1516

@@ -144,6 +145,30 @@ LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; }
144145
__builtin_amdgcn_wave_barrier();
145146
}
146147

148+
/// Returns the current value of the GPU's processor clock.
149+
/// NOTE: The RDNA3 and RDNA2 architectures use a 20-bit cycle cycle counter.
150+
LIBC_INLINE uint64_t processor_clock() {
151+
if constexpr (LIBC_HAS_BUILTIN(__builtin_amdgcn_s_memtime))
152+
return __builtin_amdgcn_s_memtime();
153+
else if constexpr (LIBC_HAS_BUILTIN(__builtin_readcyclecounter))
154+
return __builtin_readcyclecounter();
155+
else
156+
return 0;
157+
}
158+
159+
/// Returns a fixed-frequency timestamp. The actual frequency is dependent on
160+
/// the card and can only be queried via the driver.
161+
LIBC_INLINE uint64_t fixed_frequrency_clock() {
162+
if constexpr (LIBC_HAS_BUILTIN(__builtin_amdgcn_s_sendmsg_rtnl))
163+
return __builtin_amdgcn_s_sendmsg_rtnl(0x83);
164+
else if constexpr (LIBC_HAS_BUILTIN(__builtin_amdgcn_s_memrealtime))
165+
return __builtin_amdgcn_s_memrealtime();
166+
else if constexpr (LIBC_HAS_BUILTIN(__builtin_amdgcn_s_memtime))
167+
return __builtin_amdgcn_s_memtime();
168+
else
169+
return 0;
170+
}
171+
147172
} // namespace gpu
148173
} // namespace __llvm_libc
149174

libc/src/__support/GPU/generic/utils.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ LIBC_INLINE void sync_threads() {}
6767

6868
LIBC_INLINE void sync_lane(uint64_t) {}
6969

70+
LIBC_INLINE uint64_t processor_clock() { return 0; }
71+
72+
LIBC_INLINE uint64_t fixed_frequrency_clock() { return 0; }
73+
7074
} // namespace gpu
7175
} // namespace __llvm_libc
7276

libc/src/__support/GPU/nvptx/utils.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,20 @@ LIBC_INLINE uint32_t get_lane_size() { return LANE_SIZE; }
134134
__nvvm_bar_warp_sync(mask);
135135
}
136136

137+
/// Returns the current value of the GPU's processor clock.
138+
LIBC_INLINE uint64_t processor_clock() {
139+
uint64_t timestamp;
140+
LIBC_INLINE_ASM("mov.u64 %0, %%clock64;" : "=l"(timestamp));
141+
return timestamp;
142+
}
143+
144+
/// Returns a global fixed-frequency timer at nanosecond frequency.
145+
LIBC_INLINE uint64_t fixed_frequrency_clock() {
146+
uint64_t nsecs;
147+
LIBC_INLINE_ASM("mov.u64 %0, %%globaltimer;" : "=l"(nsecs));
148+
return nsecs;
149+
}
150+
137151
} // namespace gpu
138152
} // namespace __llvm_libc
139153

0 commit comments

Comments
 (0)