Tachyon (current)  Current Main Branch
ProfileHooks.h
Go to the documentation of this file.
1 /*
2  * TachyonOptiX.cu - OptiX host-side RT engine implementation
3  *
4  * (C) Copyright 2013-2022 John E. Stone
5  * SPDX-License-Identifier: BSD-3-Clause
6  *
7  * $Id: ProfileHooks.h,v 1.3 2022/03/11 00:45:37 johns Exp $
8  *
9  */
10 
29 #ifndef PROFILEHOOKS_H
30 #define PROFILEHOOKS_H
31 
32 #if defined(WKFNVTX)
33 
34 #if 1
35 #define WKFUSEGETTID 1
37 #include <unistd.h>
38 #include <sys/types.h>
39 #include <sys/syscall.h>
40 
41 #ifndef gettid
42 #define gettid() syscall(SYS_gettid)
44 #endif
45 #else
46 #include <pthread.h>
48 #endif
49 
50 #include <cuda_runtime.h>
51 #include <cuda_profiler_api.h>
52 
53 // We really only want to use CUDA >= 10.0 w/ NVTX V3+,
54 // which bypasses the need to include/ship/link against
55 // any additional libs.
56 #if CUDART_VERSION >= 10000
57 #include <nvtx3/nvToolsExt.h> // CUDA >= 10 has NVTX V3+
58 #else
59 #error NVTXv3 requires CUDA 10.0 or greater
60 #include <nvToolsExt.h> // CUDA < 10 has NVTX V2
61 #endif
62 
63 
67 const uint32_t WKF_nvtx_colors[] = {
68  0xff629f57,
69  0xffee8c40,
70  0xff507ba4,
71  0xffecc65c,
72  0xffac7c9f,
73  0xff7cb7b2,
74  0xffdb565c,
75  0xffb9b0ac,
76  0xffffffff,
77 };
78 
80 const int WKF_nvtx_colors_len = sizeof(WKF_nvtx_colors)/sizeof(uint32_t);
81 
82 #define PROFILE_GREEN 0
83 #define PROFILE_ORANGE 1
84 #define PROFILE_BLUE 2
85 #define PROFILE_YELLOW 3
86 #define PROFILE_PURPLE 4
87 #define PROFILE_TEAL 5
88 #define PROFILE_RED 6
89 #define PROFILE_GRAY 7
90 #define PROFILE_WHITE 8
91 
93 #define PROFILE_INITIALIZE() do { nvtxInitialize(NULL); } while(0) // terminate with semicolon
94 
96 #define PROFILE_START() \
97  do { \
98  cudaProfilerStart(); \
99  } while (0) // terminate with semicolon
100 
102 #define PROFILE_STOP() \
103  do { \
104  cudaDeviceSynchronize(); \
105  cudaProfilerStop(); \
106  } while (0) // terminate with semicolon
107 
108 
117 #if defined(WKFUSEGETTID)
118 
120 #define PROFILE_MAIN_THREAD() \
121  do { \
122  /* On Linux use gettid() to get current thread ID */ \
123  nvtxNameOsThread(gettid(), "Main thread"); \
124  } while (0) // terminate with semicolon
125 
128 #define PROFILE_NAME_THREAD(name) \
129  do { \
130  nvtxNameOsThread(gettid(), name); \
131  } while (0) // terminate with semicolon
132 
133 #else
134 
136 #define PROFILE_MAIN_THREAD() \
137  do { \
138  /* On MacOS X or other platforms use pthread_threadid_np() */ \
139  __uint64_t tid;
140  pthread_threadid_np(pthread_self(), &tid);
141  nvtxNameOsThread(tid, "Main thread"); \
142  } while (0) // terminate with semicolon
143 
146 #define PROFILE_NAME_THREAD(name) \
147  do { \
148  __uint64_t tid;
149  pthread_threadid_np(pthread_self(), &tid);
150  nvtxNameOsThread(gettid(), name); \
151  } while (0) // terminate with semicolon
152 
153 #endif
154 
155 
160 #define PROFILE_MARK(name,cid) \
161  do { \
162  /* create an ASCII event marker */ \
163  /* nvtxMarkA(name); */ \
164  int color_id = cid; \
165  color_id = color_id % WKF_nvtx_colors_len; \
166  nvtxEventAttributes_t eventAttrib = {0}; \
167  eventAttrib.version = NVTX_VERSION; \
168  eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \
169  eventAttrib.colorType = NVTX_COLOR_ARGB; \
170  eventAttrib.color = WKF_nvtx_colors[color_id]; \
171  eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \
172  eventAttrib.message.ascii = name; \
173  nvtxMarkEx(&eventAttrib); \
174  } while(0) // terminate with semicolon
175 
176 
181 #define PROFILE_PUSH_RANGE(name,cid) \
182  do { \
183  int color_id = cid; \
184  color_id = color_id % WKF_nvtx_colors_len; \
185  nvtxEventAttributes_t eventAttrib = {0}; \
186  eventAttrib.version = NVTX_VERSION; \
187  eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \
188  eventAttrib.colorType = NVTX_COLOR_ARGB; \
189  eventAttrib.color = WKF_nvtx_colors[color_id]; \
190  eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \
191  eventAttrib.message.ascii = name; \
192  nvtxRangePushEx(&eventAttrib); \
193  } while(0) // must terminate with semi-colon
194 
195 
198 #define PROFILE_POP_RANGE(empty) \
199  do { \
200  nvtxRangePop(); \
201  } while(0) // terminate with semicolon
202 
203 
204 // embed event recording in class to automatically pop when destroyed
205 class WKF_NVTX_Tracer {
206  public:
207  WKF_NVTX_Tracer(const char *name, int cid = 0) { PROFILE_PUSH_RANGE(name, cid); }
208  ~WKF_NVTX_Tracer() { PROFILE_POP_RANGE(); }
209 };
210 
212 #define PROFILE_RANGE(name,cid) \
213  /* include cid as part of the name */ \
214  /* call RANGE at beginning of function to push event recording */ \
215  /* destructor is automatically called on return to pop event recording */ \
216  WKF_NVTX_Tracer wkf_nvtx_tracer##cid(name,cid)
217  // must terminate with semi-colon
218 
219 #if defined(WKFNVTX_SYNCPRETTY)
220 #define PROFILE_STREAM_SYNC_PRETTY(stream) \
226  do { \
227  /* Add a CUDA stream sync call, but only for the benefit of */ \
228  /* profile trace clarity, so that it can be disabled on demand */ \
229  cudaStreamSynchronize(stream); \
230  } while(0) // terminate with semicolon
231 
232 #else
233 #define PROFILE_STREAM_SYNC_PRETTY(stream) do { } while(0) // term w/ semicolon
239 #endif
240 
241 
242 #else
243 
244 //
245 // If NVTX isn't enabled, then the profiling macros become no-ops.
246 // We add inline documentation here since Doxygen sees this branch by default.
247 //
248 
250 #define PROFILE_INITIALIZE() do { } while(0) // terminate with semicolon
251 
253 #define PROFILE_START() do { } while(0) // terminate with semicolon
254 
256 #define PROFILE_STOP() do { } while(0) // terminate with semicolon
257 
259 #define PROFILE_MAIN_THREAD() do { } while(0) // terminate with semicolon
260 
263 #define PROFILE_NAME_THREAD(name) do { } while(0) // terminate with semicolon
264 
269 #define PROFILE_MARK(name,cid) do { } while(0) // terminate with semicolon
270 
275 #define PROFILE_PUSH_RANGE(name,cid) do { } while(0) // terminate with semicolon
276 
279 #define PROFILE_POP_RANGE() do { } while(0) // terminate with semicolon
280 
282 #define PROFILE_RANGE(name,cid) do { } while(0) // terminate with semicolon
283 
289 #define PROFILE_STREAM_SYNC_PRETTY(stream) do { } while(0) // term w/ semicolon
290 #endif
291 
292 #endif
#define PROFILE_PUSH_RANGE(name, cid)
Pushes a time range annotation onto the profiler&#39;s trace stack, beginning at the time of submission...
Definition: ProfileHooks.h:275
#define PROFILE_POP_RANGE()
Pops the innermost time range off of the profiler&#39;s trace stack, at the time of execution.
Definition: ProfileHooks.h:279