Tachyon (current)  Current Main Branch
threads.h
Go to the documentation of this file.
1 /*
2  * threads.h - platform-dependent CPU feature query, threads, and atomic ops
3  *
4  * (C) Copyright 1994-2022 John E. Stone
5  * SPDX-License-Identifier: BSD-3-Clause
6  *
7  * $Id: threads.h,v 1.67 2022/02/21 16:45:19 johns Exp $
8  *
9  */
10 
17 /*
18  * XXX will need to rename threads.[ch] src to avoid collision with
19  * the new headers included in the C11 standard and later
20  */
21 
22 #ifndef RT_THREADS_INC
23 #define RT_THREADS_INC 1
24 
25 #ifdef __cplusplus
26 extern "C" {
27 #endif
28 
29 /* define which thread calls to use */
30 #if defined(USEPOSIXTHREADS) && defined(USEUITHREADS)
31 #error You may only define USEPOSIXTHREADS or USEUITHREADS, but not both
32 #endif
33 
34 /* POSIX Threads */
35 #if defined(_AIX) || defined(__APPLE__) || defined(_CRAY) || defined(__hpux) || defined(__irix) || defined(__linux) || defined(__osf__) || defined(__PARAGON__)
36 #if !defined(USEUITHREADS) && !defined(USEPOSIXTHREADS)
37 #define USEPOSIXTHREADS
38 #endif
39 #endif
40 
41 /* Unix International Threads */
42 #if defined(SunOS)
43 #if !defined(USEPOSIXTHREADS) && !defined(USEUITHREADS)
44 #define USEUITHREADS
45 #endif
46 #endif
47 
48 /*
49  * CPU capability flags
50  */
51 #define CPU_SMTDEPTH_UNKNOWN 0
52 #define CPU_UNKNOWN 0x00000001
54 /* Intel x86 CPU features we may need at runtime */
55 #define CPU_HT 0x00000010
56 #define CPU_HYPERVISOR 0x00000020
57 #define CPU_SSE2 0x00000100
58 #define CPU_SSE4_1 0x00000200
59 #define CPU_F16C 0x00000400
60 #define CPU_FMA 0x00000800
61 #define CPU_AVX 0x00001000
62 #define CPU_AVX2 0x00002000
63 #define CPU_AVX512F 0x00010000
64 #define CPU_AVX512CD 0x00020000
65 #define CPU_AVX512ER 0x00040000
66 #define CPU_AVX512PF 0x00080000
67 #define CPU_KNL (CPU_AVX512F | CPU_AVX512CD | \
68  CPU_AVX512ER | CPU_AVX512PF)
70 /* ARM CPU features we may need at runtime */
71 #define CPU_ARM64_CPUID 0x00000010
72 #define CPU_ARM64_CRC32 0x00000020
73 #define CPU_ARM64_FP 0x00000080
74 #define CPU_ARM64_HPFP 0x00000080
75 #define CPU_ARM64_AES 0x00000100
76 #define CPU_ARM64_ATOMICS 0x00000200
77 #define CPU_ARM64_ASIMD 0x00000400
78 #define CPU_ARM64_ASIMDDP 0x00000800
79 #define CPU_ARM64_ASIMDHP 0x00001000
80 #define CPU_ARM64_ASIMDRDM 0x00002000
81 #define CPU_ARM64_ASIMDFHM 0x00004000
82 #define CPU_ARM64_SVE 0x00008000
83 #define CPU_ARM64_SHA512 0x00010000
84 #define CPU_ARM64_SHA1 0x00020000
85 #define CPU_ARM64_SHA2 0x00040000
86 #define CPU_ARM64_SHA3 0x00080000
88 typedef struct rt_cpu_caps_struct {
89  unsigned int flags;
90  int smtdepth;
92 
93 
94 #ifdef THR
95 #ifdef USEPOSIXTHREADS
96 #include <pthread.h>
97 
98 typedef pthread_t rt_thread_t;
99 typedef pthread_mutex_t rt_mutex_t;
100 typedef pthread_cond_t rt_cond_t;
103 typedef struct rwlock_struct {
104  pthread_mutex_t lock;
105  int rwlock;
106  pthread_cond_t rdrs_ok;
107  unsigned int waiting_writers;
108  pthread_cond_t wrtr_ok;
109 } rt_rwlock_t;
110 
111 #endif
112 
113 #ifdef USEUITHREADS
114 #include <thread.h>
115 
116 typedef thread_t rt_thread_t;
117 typedef mutex_t rt_mutex_t;
118 typedef cond_t rt_cond_t;
119 typedef rwlock_t rt_rwlock_t;
120 #endif
121 
122 
123 #ifdef _MSC_VER
124 #include <windows.h>
125 typedef HANDLE rt_thread_t;
126 typedef CRITICAL_SECTION rt_mutex_t;
127 
128 #if 0 && (NTDDI_VERSION >= NTDDI_WS08 || _WIN32_WINNT > 0x0600)
129 /* Use native condition variables only with Windows Server 2008 and newer... */
130 #define RTUSEWIN2008CONDVARS 1
131 typedef CONDITION_VARIABLE rt_cond_t;
132 #else
133 /* Every version of Windows prior to Vista/WS2008 must emulate */
134 /* variables using manually resettable events or other schemes */
135 
136 /* For higher performance, use interlocked memory operations */
137 /* rather than locking/unlocking mutexes when manipulating */
138 /* internal state. */
139 #if 1
140 #define RTUSEINTERLOCKEDATOMICOPS 1
141 #endif
142 #define RT_COND_SIGNAL 0
143 #define RT_COND_BROADCAST 1
144 typedef struct {
145  LONG waiters;
147  CRITICAL_SECTION waiters_lock;
148  HANDLE events[2];
149 } rt_cond_t;
150 #endif
151 
152 typedef struct rwlock_struct {
153  rt_mutex_t lock;
154  int rwlock;
155  rt_cond_t rdrs_ok;
156  unsigned int waiting_writers;
157  rt_cond_t wrtr_ok;
158 } rt_rwlock_t;
159 
160 #endif
161 #endif /* _MSC_VER */
162 
163 
164 #ifndef THR
165 typedef int rt_thread_t;
166 typedef int rt_mutex_t;
167 typedef int rt_cond_t;
168 typedef int rt_rwlock_t;
169 #endif
170 
171 #if defined(USENETBSDATOMICS)
172 #include <sys/atomic.h>
173 #elif defined(USESOLARISATOMICS)
174 #include <atomic.h>
175 #endif
176 
178 typedef struct atomic_int_struct {
179  int padding1[8];
181 #if defined(USENETBSDATOMICS)
182  unsigned int val;
183 #elif defined(USESOLARISATOMICS)
184  unsigned int val;
185 #elif defined(USEWIN32ATOMICS)
186  LONG val;
187 #else
188  int val;
189 #endif
190  int padding2[8];
192 
193 
195 typedef struct barrier_struct {
196  int padding1[8];
198  int n_clients;
199  int n_waiting;
200  int phase;
201  int sum;
202  int result;
204  int padding2[8];
205 } rt_barrier_t;
206 
207 
209 typedef struct rt_run_barrier_struct {
210  int padding1[8];
212  int n_clients;
213  int n_waiting;
214  int phase;
215  void * (*fctn)(void *);
216  void * parms;
217  void * (*rslt)(void *);
218  void * rsltparms;
220  int padding2[8];
222 
223 
224 /*
225  * Routines for querying processor counts, and managing CPU affinity
226  */
229 
231 int rt_thread_numprocessors(void);
232 
235 
237 /* A return value of zero means we don't know */
238 int rt_cpu_smt_depth(void);
239 
241 int * rt_cpu_affinitylist(int *cpuaffinitycount);
242 
244 int rt_thread_set_self_cpuaffinity(int cpu);
245 
247 int rt_thread_setconcurrency(int);
248 
249 
250 /*
251  * Thread management
252  */
254 int rt_thread_create(rt_thread_t *, void * fctn(void *), void *);
255 
257 int rt_thread_join(rt_thread_t, void **);
258 
259 
260 /*
261  * Mutex management
262  */
265 
268 
271 
274 
277 
280 
281 
282 /*
283  * Condition variable management
284  */
286 int rt_cond_init(rt_cond_t *);
287 
290 
293 
296 
299 
300 
301 /*
302  * Atomic operations on integers
303  */
305 int rt_atomic_int_init(rt_atomic_int_t * atomp, int val);
306 
309 
311 int rt_atomic_int_set(rt_atomic_int_t * atomp, int val);
312 
315 
317 int rt_atomic_int_fetch_and_add(rt_atomic_int_t * atomp, int inc);
318 
320 int rt_atomic_int_add_and_fetch(rt_atomic_int_t * atomp, int inc);
321 
322 
323 /*
324  * Reader/writer lock management
325  */
328 
331 
334 
337 
338 
339 /*
340  * counting barrier
341  */
343 rt_barrier_t * rt_thread_barrier_init(int n_clients);
344 
354 
357 
359 int rt_thread_barrier(rt_barrier_t *barrier, int increment);
360 
361 
362 /*
363  * This is a symmetric barrier routine designed to be used
364  * in implementing a sleepable thread pool.
365  */
367 int rt_thread_run_barrier_init(rt_run_barrier_t *barrier, int n_clients);
368 
371 
374  void * fctn(void*),
375  void * parms,
376  void **rsltparms))(void *);
377 
380 
381 
387 typedef struct rt_tasktile_struct {
388  int start;
389  int end;
390 } rt_tasktile_t;
391 
392 
393 /*
394  * tile stack
395  */
396 #define RT_TILESTACK_EMPTY -1
397 
401 typedef struct {
404  int size;
405  int top;
408 
410 int rt_tilestack_init(rt_tilestack_t *s, int size);
411 
414 
417 
420 
423 
426 
429 
430 
436 #define RT_SCHED_DONE -1
437 #define RT_SCHED_CONTINUE 0
440 typedef struct rt_shared_iterator_struct {
442  int start;
443  int end;
444  int current;
447 
450 
453 
456 
464  rt_tasktile_t *tile);
465 
468 
471 
472 
473 /*
474  * Thread pool.
475  */
477 #define RT_THREADPOOL_DEVLIST_CPUSONLY NULL
478 
480 #define RT_THREADPOOL_DEVID_CPU -1
481 
484  int padding1[8];
487  int threadid;
489  int devid;
490  float devspeed;
491  void *parms;
492  void *thrpool;
493  int padding2[8];
495 
496 
498 typedef struct rt_threadpool_struct {
500  int *devlist;
507 
508 
510 rt_threadpool_t * rt_threadpool_create(int workercount, int *devlist);
511 
514  void *fctn(void *), void *parms, int blocking);
515 
517 int rt_threadpool_wait(rt_threadpool_t *thrpool);
518 
521 
524 
526 int rt_threadpool_worker_getid(void *voiddata, int *threadid, int *threadcount);
527 
529 int rt_threadpool_worker_getdevid(void *voiddata, int *devid);
530 
537 int rt_threadpool_worker_setdevspeed(void *voiddata, float speed);
538 
543 int rt_threadpool_worker_getdevspeed(void *voiddata, float *speed);
544 
549 int rt_threadpool_worker_devscaletile(void *voiddata, int *tilesize);
550 
552 int rt_threadpool_worker_getdata(void *voiddata, void **clientdata);
553 
556 
561 int rt_threadpool_next_tile(void *thrpool, int reqsize, rt_tasktile_t *tile);
562 
567 int rt_threadpool_tile_failed(void *thrpool, rt_tasktile_t *tile);
568 
570 int rt_threadpool_setfatalerror(void *thrparms);
571 
573 int rt_threadpool_getfatalerror(void *thrparms);
574 
575 
583 typedef struct rt_threadlaunch_struct {
584  int padding1[8];
586  int threadid;
588  void * clientdata;
589  int padding2[8];
591 
593 int rt_threadlaunch(int numprocs, void *clientdata, void * fctn(void *),
594  rt_tasktile_t *tile);
595 
597 int rt_threadlaunch_getid(void *thrparms, int *threadid, int *threadcount);
598 
600 int rt_threadlaunch_getdata(void *thrparms, void **clientdata);
601 
606 int rt_threadlaunch_next_tile(void *voidparms, int reqsize,
607  rt_tasktile_t *tile);
608 
610 int rt_threadlaunch_setfatalerror(void *thrparms);
611 
612 
613 #ifdef __cplusplus
614 }
615 #endif
616 
617 #endif
int padding1[8]
avoid false sharing, cache aliasing
Definition: threads.h:584
rt_shared_iterator_t * iter
dynamic work scheduler
Definition: threads.h:485
rt_mutex_t mtx
Mutex lock for the structure.
Definition: threads.h:402
int * devlist
per-worker CPU/GPU device IDs
Definition: threads.h:500
iterator used for dynamic load balancing
Definition: threads.h:440
int n_clients
Number of threads to wait for at barrier.
Definition: threads.h:198
int rt_rwlock_writelock(rt_rwlock_t *)
set writer lock
Definition: threads.c:1270
int rt_mutex_unlock(rt_mutex_t *)
unlock a mutex
Definition: threads.c:807
int * rt_cpu_affinitylist(int *cpuaffinitycount)
query CPU affinity of the calling process (if allowed by host system)
Definition: threads.c:483
int rt_tilestack_empty(rt_tilestack_t *)
query if the task tile stack is empty or not
Definition: threads.c:1675
void * clientdata
worker parameters
Definition: threads.h:588
int result
Answer to be returned by barrier_wait.
Definition: threads.h:202
rt_mutex_t lock
Mutex lock for the structure.
Definition: threads.h:211
int rt_threadpool_worker_devscaletile(void *voiddata, int *tilesize)
worker thread calls this to scale max tile size by worker speed as determined by the SM/core count an...
Definition: threads.c:2008
Routines to generate a pool of threads which then grind through a dynamically load balanced work queu...
Definition: threads.h:583
struct rt_tasktile_struct rt_tasktile_t
Task tile struct for stack, iterator, and scheduler routines; &#39;start&#39; is inclusive, &#39;end&#39; is exclusive.
int rt_mutex_lock(rt_mutex_t *)
lock a mutex
Definition: threads.c:742
int rt_tilestack_push(rt_tilestack_t *, const rt_tasktile_t *)
push a task tile onto the stack
Definition: threads.c:1608
int rt_cpu_capability_flags(rt_cpu_caps_t *cpucaps)
CPU optional instruction set capability flags.
Definition: threads.c:281
int padding2[8]
Pad to avoid false sharing, cache aliasing.
Definition: threads.h:190
int rt_cond_t
Definition: threads.h:167
int rt_threadpool_sched_dynamic(rt_threadpool_t *thrpool, rt_tasktile_t *tile)
Set dynamic scheduler state to half-open interval defined by tile.
Definition: threads.c:2034
int rt_mutex_init(rt_mutex_t *)
initialize a mutex
Definition: threads.c:721
int padding1[8]
Pad to avoid false sharing, cache aliasing.
Definition: threads.h:210
Task tile struct for stack, iterator, and scheduler routines; &#39;start&#39; is inclusive, &#39;end&#39; is exclusive.
Definition: threads.h:387
struct rt_threadpool_workerdata_struct rt_threadpool_workerdata_t
thread-specific handle data for workers
int rt_threadpool_destroy(rt_threadpool_t *thrpool)
join all worker threads and free resources
Definition: threads.c:1917
float devspeed
speed scaling for this device
Definition: threads.h:490
int rt_shared_iterator_init(rt_shared_iterator_t *it)
initialize a shared iterator
Definition: threads.c:1700
void * rsltparms
parms to return to barrier wait callers
Definition: threads.h:218
int rt_tilestack_init(rt_tilestack_t *s, int size)
initialize task tile stack (to empty)
Definition: threads.c:1552
int rt_threadlaunch_setfatalerror(void *thrparms)
worker thread calls this to indicate that an unrecoverable error occured
Definition: threads.c:2193
int rt_cond_broadcast(rt_cond_t *)
signal a condition variable, waking all threads
Definition: threads.c:1003
struct rt_threadlaunch_struct rt_threadlaunch_t
Routines to generate a pool of threads which then grind through a dynamically load balanced work queu...
int rt_shared_iterator_getfatalerror(rt_shared_iterator_t *it)
master thread calls this to query for fatal errors
Definition: threads.c:1785
void rt_tilestack_destroy(rt_tilestack_t *)
destroy task tile stack
Definition: threads.c:1575
int size
current allocated stack size
Definition: threads.h:404
rt_threadpool_workerdata_t * workerdata
per-worker data
Definition: threads.h:504
int rt_thread_numprocessors(void)
number of processors available, subject to user override
Definition: threads.c:202
int rt_thread_setconcurrency(int)
set the concurrency level and scheduling scope for threads
Definition: threads.c:618
rt_mutex_t mtx
mutex lock
Definition: threads.h:441
rt_cond_t wait_cv
Clients wait on condition variable to proceed.
Definition: threads.h:203
int rt_cpu_smt_depth(void)
CPU logical processors (SMT depth / aka hyperthreading)
Definition: threads.c:466
int padding2[8]
Pad to avoid false sharing, cache aliasing.
Definition: threads.h:204
int rt_threadpool_getfatalerror(void *thrparms)
master thread calls this to query for fatal errors
Definition: threads.c:2077
int rt_rwlock_t
Definition: threads.h:168
int padding2[8]
avoid false sharing
Definition: threads.h:493
rt_tasktile_t * s
stack of task tiles
Definition: threads.h:406
int rt_thread_numphysprocessors(void)
number of physical processors available
Definition: threads.c:114
int top
index of top stack element
Definition: threads.h:405
int rt_shared_iterator_setfatalerror(rt_shared_iterator_t *it)
worker thread calls this to indicate a fatal error
Definition: threads.c:1772
int rt_atomic_int_add_and_fetch(rt_atomic_int_t *atomp, int inc)
fetch an atomic int and add inc to it, returning new value
Definition: threads.c:1180
int rt_threadpool_worker_getdata(void *voiddata, void **clientdata)
worker thread can call this to get its client data pointer
Definition: threads.c:2024
int end
ending value (exlusive)
Definition: threads.h:443
int n_clients
Number of threads to wait for at barrier.
Definition: threads.h:212
int rt_thread_barrier_init_proc_shared(rt_barrier_t *, int n_clients)
When rendering in the CAVE we use a special synchronization mode so that shared memory mutexes and co...
Definition: threads.c:1379
int rt_threadpool_tile_failed(void *thrpool, rt_tasktile_t *tile)
worker thread calls this when it fails computing a tile after it has already taken it from the schedu...
Definition: threads.c:2062
int rt_threadpool_wait(rt_threadpool_t *thrpool)
wait for all worker threads to complete their work
Definition: threads.c:1900
int rt_thread_join(rt_thread_t, void **)
join (wait for completion of, and merge with) a thread
Definition: threads.c:688
int rt_atomic_int_set(rt_atomic_int_t *atomp, int val)
set an atomic int variable
Definition: threads.c:1087
int rt_threadpool_get_workercount(rt_threadpool_t *thrpool)
query number of worker threads in the pool
Definition: threads.c:1951
rt_run_barrier_t runbar
master/worker run barrier
Definition: threads.h:505
int rt_thread_run_barrier_poll(rt_run_barrier_t *barrier)
non-blocking poll to see if peers are already at the barrier
Definition: threads.c:1536
struct barrier_struct rt_barrier_t
barrier sync object with padding to prevent false sharing
int rt_rwlock_readlock(rt_rwlock_t *)
set reader lock
Definition: threads.c:1241
int rt_thread_create(rt_thread_t *, void *fctn(void *), void *)
create a new child thread
Definition: threads.c:645
void *(*)(void *) rt_thread_run_barrier(rt_run_barrier_t *barrier, void *fctn(void *), void *parms, void **rsltparms)
sleeping barrier synchronization for thread pool
Definition: threads.h:373
unsigned int flags
Definition: threads.h:89
rt_tilestack_t * errorstack
stack of tiles that failed
Definition: threads.h:486
thread-specific handle data for workers
Definition: threads.h:483
int rt_atomic_int_get(rt_atomic_int_t *atomp)
get an atomic int variable
Definition: threads.c:1123
int rt_shared_iterator_set(rt_shared_iterator_t *it, rt_tasktile_t *tile)
Set shared iterator state to half-open interval defined by tile.
Definition: threads.c:1719
int rt_tilestack_pop(rt_tilestack_t *, rt_tasktile_t *)
pop a task tile off of the stack
Definition: threads.c:1637
int rt_threadlaunch_getdata(void *thrparms, void **clientdata)
worker thread can call this to get its client data pointer
Definition: threads.c:2175
int rt_threadlaunch_getid(void *thrparms, int *threadid, int *threadcount)
worker thread can call this to get its ID and number of peers
Definition: threads.c:2162
int end
ending task ID (exclusive)
Definition: threads.h:389
int rt_shared_iterator_destroy(rt_shared_iterator_t *it)
destroy a shared iterator
Definition: threads.c:1710
int rt_thread_t
Definition: threads.h:165
int val
Integer value to be atomically manipulated.
Definition: threads.h:188
struct rt_shared_iterator_struct rt_shared_iterator_t
iterator used for dynamic load balancing
int rt_mutex_spin_lock(rt_mutex_t *)
lock a mutex by spinning only
Definition: threads.c:784
int sum
Sum of arguments passed to barrier_wait.
Definition: threads.h:201
int fatalerror
cancel processing immediately for all threads
Definition: threads.h:445
int threadcount
total number of worker threads
Definition: threads.h:488
int rt_mutex_trylock(rt_mutex_t *)
try to lock a mutex
Definition: threads.c:763
rt_mutex_t lock
Mutex lock for the structure.
Definition: threads.h:197
int start
starting task ID (inclusive)
Definition: threads.h:388
void rt_thread_barrier_destroy(rt_barrier_t *barrier)
destroy counting barrier primitive
Definition: threads.c:1416
int rt_mutex_destroy(rt_mutex_t *)
destroy a mutex
Definition: threads.c:828
int rt_mutex_t
Definition: threads.h:166
void * parms
fctn parms for this worker
Definition: threads.h:491
stack of work tiles, for error handling
Definition: threads.h:401
int rt_rwlock_init(rt_rwlock_t *)
initialize a reader/writer lock
Definition: threads.c:1212
void rt_thread_run_barrier_destroy(rt_run_barrier_t *barrier)
destroy thread pool barrier
Definition: threads.c:1478
int n_waiting
Number of currently waiting threads.
Definition: threads.h:213
int rt_threadlaunch(int numprocs, void *clientdata, void *fctn(void *), rt_tasktile_t *tile)
launch up to numprocs threads using shared iterator as a load balancer
Definition: threads.c:2085
atomic int structure with padding to prevent false sharing
Definition: threads.h:178
int padding1[8]
Pad to avoid false sharing, cache aliasing.
Definition: threads.h:179
rt_shared_iterator_t iter
dynamic work scheduler
Definition: threads.h:501
int rt_threadpool_worker_setdevspeed(void *voiddata, float speed)
Worker thread calls this to set relative speed of this device as determined by the SM/core count and ...
Definition: threads.c:1985
int rt_threadpool_worker_getdevid(void *voiddata, int *devid)
worker thread can call this to get its CPU/GPU device ID
Definition: threads.c:1970
int padding1[8]
Pad to avoid false sharing, cache aliasing.
Definition: threads.h:196
int rt_threadpool_next_tile(void *thrpool, int reqsize, rt_tasktile_t *tile)
worker thread calls this to get its next work unit iterate the shared iterator, returns -1 if no iter...
Definition: threads.c:2042
void * thrpool
void ptr to thread pool struct
Definition: threads.h:492
int growthrate
stack growth chunk size
Definition: threads.h:403
rt_tilestack_t errorstack
stack of tiles that failed
Definition: threads.h:502
int rt_tilestack_compact(rt_tilestack_t *)
shrink memory buffers associated with task tile stack if possible
Definition: threads.c:1584
int rt_tilestack_popall(rt_tilestack_t *)
pop all of the task tiles off of the stack
Definition: threads.c:1660
int n_waiting
Number of currently waiting threads.
Definition: threads.h:199
int rt_threadpool_setfatalerror(void *thrparms)
worker thread calls this to indicate that an unrecoverable error occured
Definition: threads.c:2069
int padding2[8]
avoid false sharing, cache aliasing
Definition: threads.h:589
struct rt_run_barrier_struct rt_run_barrier_t
run-barrier sync object with padding to prevent false sharing
int rt_cond_wait(rt_cond_t *, rt_mutex_t *)
wait on a condition variable
Definition: threads.c:912
barrier sync object with padding to prevent false sharing
Definition: threads.h:195
int rt_threadpool_worker_getdevspeed(void *voiddata, float *speed)
Worker thread calls this to get relative speed of this device as determined by the SM/core count and ...
Definition: threads.c:1996
rt_threadpool_t * rt_threadpool_create(int workercount, int *devlist)
create a thread pool with a specified number of worker threads
Definition: threads.c:1822
struct atomic_int_struct rt_atomic_int_t
atomic int structure with padding to prevent false sharing
rt_mutex_t lock
Mutex lock for the structure.
Definition: threads.h:180
int rt_atomic_int_init(rt_atomic_int_t *atomp, int val)
initialize an atomic int variable
Definition: threads.c:1043
int start
starting value (inclusive)
Definition: threads.h:442
int workercount
number of worker threads
Definition: threads.h:499
int rt_threadpool_launch(rt_threadpool_t *thrpool, void *fctn(void *), void *parms, int blocking)
launch threads onto a new function, with associated parms
Definition: threads.c:1882
int padding2[8]
Pad to avoid false sharing, cache aliasing.
Definition: threads.h:220
int rt_cond_destroy(rt_cond_t *)
destroy a condition variable
Definition: threads.c:888
rt_shared_iterator_t * iter
dynamic scheduler iterator
Definition: threads.h:585
int rt_threadpool_worker_getid(void *voiddata, int *threadid, int *threadcount)
worker thread can call this to get its ID and number of peers
Definition: threads.c:1957
rt_cond_t wait_cv
Clients wait on condition variable to proceed.
Definition: threads.h:219
int current
current value
Definition: threads.h:444
int phase
Flag to separate waiters from fast workers.
Definition: threads.h:200
int rt_atomic_int_fetch_and_add(rt_atomic_int_t *atomp, int inc)
fetch an atomic int and add inc to it, returning original value
Definition: threads.c:1152
struct rt_cpu_caps_struct rt_cpu_caps_t
void * parms
parms for fctn pointer
Definition: threads.h:216
int rt_rwlock_unlock(rt_rwlock_t *)
unlock reader/writer lock
Definition: threads.c:1305
int rt_cond_init(rt_cond_t *)
initialize a condition variable
Definition: threads.c:852
int rt_thread_barrier(rt_barrier_t *barrier, int increment)
synchronize on counting barrier primitive
Definition: threads.c:1425
int rt_threadlaunch_next_tile(void *voidparms, int reqsize, rt_tasktile_t *tile)
worker thread calls this to get its next work unit iterate the shared iterator, returns -1 if no iter...
Definition: threads.c:2185
rt_thread_t * threads
worker threads
Definition: threads.h:503
int threadid
ID of worker thread.
Definition: threads.h:586
int rt_thread_run_barrier_init(rt_run_barrier_t *barrier, int n_clients)
initialize thread pool barrier
Definition: threads.c:1462
int threadid
worker thread&#39;s id
Definition: threads.h:487
int padding1[8]
avoid false sharing
Definition: threads.h:484
run-barrier sync object with padding to prevent false sharing
Definition: threads.h:209
int devid
worker CPU/GPU device ID
Definition: threads.h:489
struct rt_threadpool_struct rt_threadpool_t
persistent thread pool
int rt_atomic_int_destroy(rt_atomic_int_t *atomp)
destroy an atomic int variable
Definition: threads.c:1066
int threadcount
number of workers
Definition: threads.h:587
int phase
Flag to separate waiters from fast workers.
Definition: threads.h:214
int rt_shared_iterator_next_tile(rt_shared_iterator_t *it, int reqsize, rt_tasktile_t *tile)
iterate the shared iterator with a requested tile size, returns the tile received, and a return code of -1 if no iterations left or a fatal error has occured during processing, canceling all worker threads.
Definition: threads.c:1736
int rt_thread_set_self_cpuaffinity(int cpu)
set the CPU affinity of the current thread (if allowed by host system)
Definition: threads.c:572
rt_barrier_t * rt_thread_barrier_init(int n_clients)
initialize counting barrier primitive
Definition: threads.c:1355
persistent thread pool
Definition: threads.h:498
int rt_cond_signal(rt_cond_t *)
signal a condition variable, waking at least one thread
Definition: threads.c:971