22 #ifndef RT_THREADS_INC 23 #define RT_THREADS_INC 1 30 #if defined(USEPOSIXTHREADS) && defined(USEUITHREADS) 31 #error You may only define USEPOSIXTHREADS or USEUITHREADS, but not both 35 #if defined(_AIX) || defined(__APPLE__) || defined(_CRAY) || defined(__hpux) || defined(__irix) || defined(__linux) || defined(__osf__) || defined(__PARAGON__) 36 #if !defined(USEUITHREADS) && !defined(USEPOSIXTHREADS) 37 #define USEPOSIXTHREADS 43 #if !defined(USEPOSIXTHREADS) && !defined(USEUITHREADS) 51 #define CPU_SMTDEPTH_UNKNOWN 0 52 #define CPU_UNKNOWN 0x00000001 55 #define CPU_HT 0x00000010 56 #define CPU_HYPERVISOR 0x00000020 57 #define CPU_SSE2 0x00000100 58 #define CPU_SSE4_1 0x00000200 59 #define CPU_F16C 0x00000400 60 #define CPU_FMA 0x00000800 61 #define CPU_AVX 0x00001000 62 #define CPU_AVX2 0x00002000 63 #define CPU_AVX512F 0x00010000 64 #define CPU_AVX512CD 0x00020000 65 #define CPU_AVX512ER 0x00040000 66 #define CPU_AVX512PF 0x00080000 67 #define CPU_KNL (CPU_AVX512F | CPU_AVX512CD | \ 68 CPU_AVX512ER | CPU_AVX512PF) 71 #define CPU_ARM64_CPUID 0x00000010 72 #define CPU_ARM64_CRC32 0x00000020 73 #define CPU_ARM64_FP 0x00000080 74 #define CPU_ARM64_HPFP 0x00000080 75 #define CPU_ARM64_AES 0x00000100 76 #define CPU_ARM64_ATOMICS 0x00000200 77 #define CPU_ARM64_ASIMD 0x00000400 78 #define CPU_ARM64_ASIMDDP 0x00000800 79 #define CPU_ARM64_ASIMDHP 0x00001000 80 #define CPU_ARM64_ASIMDRDM 0x00002000 81 #define CPU_ARM64_ASIMDFHM 0x00004000 82 #define CPU_ARM64_SVE 0x00008000 83 #define CPU_ARM64_SHA512 0x00010000 84 #define CPU_ARM64_SHA1 0x00020000 85 #define CPU_ARM64_SHA2 0x00040000 86 #define CPU_ARM64_SHA3 0x00080000 88 typedef struct rt_cpu_caps_struct { 95 #ifdef USEPOSIXTHREADS 103 typedef struct rwlock_struct {
104 pthread_mutex_t lock;
106 pthread_cond_t rdrs_ok;
107 unsigned int waiting_writers;
108 pthread_cond_t wrtr_ok;
128 #if 0 && (NTDDI_VERSION >= NTDDI_WS08 || _WIN32_WINNT > 0x0600) 130 #define RTUSEWIN2008CONDVARS 1 140 #define RTUSEINTERLOCKEDATOMICOPS 1 142 #define RT_COND_SIGNAL 0 143 #define RT_COND_BROADCAST 1 147 CRITICAL_SECTION waiters_lock;
152 typedef struct rwlock_struct {
156 unsigned int waiting_writers;
171 #if defined(USENETBSDATOMICS) 172 #include <sys/atomic.h> 173 #elif defined(USESOLARISATOMICS) 181 #if defined(USENETBSDATOMICS) 183 #elif defined(USESOLARISATOMICS) 185 #elif defined(USEWIN32ATOMICS) 215 void * (*fctn)(
void *);
217 void * (*rslt)(
void *);
376 void **rsltparms))(
void *);
396 #define RT_TILESTACK_EMPTY -1 436 #define RT_SCHED_DONE -1 437 #define RT_SCHED_CONTINUE 0 440 typedef struct rt_shared_iterator_struct { 477 #define RT_THREADPOOL_DEVLIST_CPUSONLY NULL 480 #define RT_THREADPOOL_DEVID_CPU -1 514 void *fctn(
void *),
void *parms,
int blocking);
593 int rt_threadlaunch(
int numprocs,
void *clientdata,
void * fctn(
void *),
int padding1[8]
avoid false sharing, cache aliasing
rt_shared_iterator_t * iter
dynamic work scheduler
rt_mutex_t mtx
Mutex lock for the structure.
int * devlist
per-worker CPU/GPU device IDs
iterator used for dynamic load balancing
int n_clients
Number of threads to wait for at barrier.
int rt_rwlock_writelock(rt_rwlock_t *)
set writer lock
int rt_mutex_unlock(rt_mutex_t *)
unlock a mutex
int * rt_cpu_affinitylist(int *cpuaffinitycount)
query CPU affinity of the calling process (if allowed by host system)
int rt_tilestack_empty(rt_tilestack_t *)
query if the task tile stack is empty or not
void * clientdata
worker parameters
int result
Answer to be returned by barrier_wait.
rt_mutex_t lock
Mutex lock for the structure.
int rt_threadpool_worker_devscaletile(void *voiddata, int *tilesize)
worker thread calls this to scale max tile size by worker speed as determined by the SM/core count an...
Routines to generate a pool of threads which then grind through a dynamically load balanced work queu...
struct rt_tasktile_struct rt_tasktile_t
Task tile struct for stack, iterator, and scheduler routines; 'start' is inclusive, 'end' is exclusive.
int rt_mutex_lock(rt_mutex_t *)
lock a mutex
int rt_tilestack_push(rt_tilestack_t *, const rt_tasktile_t *)
push a task tile onto the stack
int rt_cpu_capability_flags(rt_cpu_caps_t *cpucaps)
CPU optional instruction set capability flags.
int padding2[8]
Pad to avoid false sharing, cache aliasing.
int rt_threadpool_sched_dynamic(rt_threadpool_t *thrpool, rt_tasktile_t *tile)
Set dynamic scheduler state to half-open interval defined by tile.
int rt_mutex_init(rt_mutex_t *)
initialize a mutex
int padding1[8]
Pad to avoid false sharing, cache aliasing.
Task tile struct for stack, iterator, and scheduler routines; 'start' is inclusive, 'end' is exclusive.
struct rt_threadpool_workerdata_struct rt_threadpool_workerdata_t
thread-specific handle data for workers
int rt_threadpool_destroy(rt_threadpool_t *thrpool)
join all worker threads and free resources
float devspeed
speed scaling for this device
int rt_shared_iterator_init(rt_shared_iterator_t *it)
initialize a shared iterator
void * rsltparms
parms to return to barrier wait callers
int rt_tilestack_init(rt_tilestack_t *s, int size)
initialize task tile stack (to empty)
int rt_threadlaunch_setfatalerror(void *thrparms)
worker thread calls this to indicate that an unrecoverable error occured
int rt_cond_broadcast(rt_cond_t *)
signal a condition variable, waking all threads
struct rt_threadlaunch_struct rt_threadlaunch_t
Routines to generate a pool of threads which then grind through a dynamically load balanced work queu...
int rt_shared_iterator_getfatalerror(rt_shared_iterator_t *it)
master thread calls this to query for fatal errors
void rt_tilestack_destroy(rt_tilestack_t *)
destroy task tile stack
int size
current allocated stack size
rt_threadpool_workerdata_t * workerdata
per-worker data
int rt_thread_numprocessors(void)
number of processors available, subject to user override
int rt_thread_setconcurrency(int)
set the concurrency level and scheduling scope for threads
rt_cond_t wait_cv
Clients wait on condition variable to proceed.
int rt_cpu_smt_depth(void)
CPU logical processors (SMT depth / aka hyperthreading)
int padding2[8]
Pad to avoid false sharing, cache aliasing.
int rt_threadpool_getfatalerror(void *thrparms)
master thread calls this to query for fatal errors
int padding2[8]
avoid false sharing
rt_tasktile_t * s
stack of task tiles
int rt_thread_numphysprocessors(void)
number of physical processors available
int top
index of top stack element
int rt_shared_iterator_setfatalerror(rt_shared_iterator_t *it)
worker thread calls this to indicate a fatal error
int rt_atomic_int_add_and_fetch(rt_atomic_int_t *atomp, int inc)
fetch an atomic int and add inc to it, returning new value
int rt_threadpool_worker_getdata(void *voiddata, void **clientdata)
worker thread can call this to get its client data pointer
int end
ending value (exlusive)
int n_clients
Number of threads to wait for at barrier.
int rt_thread_barrier_init_proc_shared(rt_barrier_t *, int n_clients)
When rendering in the CAVE we use a special synchronization mode so that shared memory mutexes and co...
int rt_threadpool_tile_failed(void *thrpool, rt_tasktile_t *tile)
worker thread calls this when it fails computing a tile after it has already taken it from the schedu...
int rt_threadpool_wait(rt_threadpool_t *thrpool)
wait for all worker threads to complete their work
int rt_thread_join(rt_thread_t, void **)
join (wait for completion of, and merge with) a thread
int rt_atomic_int_set(rt_atomic_int_t *atomp, int val)
set an atomic int variable
int rt_threadpool_get_workercount(rt_threadpool_t *thrpool)
query number of worker threads in the pool
rt_run_barrier_t runbar
master/worker run barrier
int rt_thread_run_barrier_poll(rt_run_barrier_t *barrier)
non-blocking poll to see if peers are already at the barrier
struct barrier_struct rt_barrier_t
barrier sync object with padding to prevent false sharing
int rt_rwlock_readlock(rt_rwlock_t *)
set reader lock
int rt_thread_create(rt_thread_t *, void *fctn(void *), void *)
create a new child thread
void *(*)(void *) rt_thread_run_barrier(rt_run_barrier_t *barrier, void *fctn(void *), void *parms, void **rsltparms)
sleeping barrier synchronization for thread pool
rt_tilestack_t * errorstack
stack of tiles that failed
thread-specific handle data for workers
int rt_atomic_int_get(rt_atomic_int_t *atomp)
get an atomic int variable
int rt_shared_iterator_set(rt_shared_iterator_t *it, rt_tasktile_t *tile)
Set shared iterator state to half-open interval defined by tile.
int rt_tilestack_pop(rt_tilestack_t *, rt_tasktile_t *)
pop a task tile off of the stack
int rt_threadlaunch_getdata(void *thrparms, void **clientdata)
worker thread can call this to get its client data pointer
int rt_threadlaunch_getid(void *thrparms, int *threadid, int *threadcount)
worker thread can call this to get its ID and number of peers
int end
ending task ID (exclusive)
int rt_shared_iterator_destroy(rt_shared_iterator_t *it)
destroy a shared iterator
int val
Integer value to be atomically manipulated.
struct rt_shared_iterator_struct rt_shared_iterator_t
iterator used for dynamic load balancing
int rt_mutex_spin_lock(rt_mutex_t *)
lock a mutex by spinning only
int sum
Sum of arguments passed to barrier_wait.
int fatalerror
cancel processing immediately for all threads
int threadcount
total number of worker threads
int rt_mutex_trylock(rt_mutex_t *)
try to lock a mutex
rt_mutex_t lock
Mutex lock for the structure.
int start
starting task ID (inclusive)
void rt_thread_barrier_destroy(rt_barrier_t *barrier)
destroy counting barrier primitive
int rt_mutex_destroy(rt_mutex_t *)
destroy a mutex
void * parms
fctn parms for this worker
stack of work tiles, for error handling
int rt_rwlock_init(rt_rwlock_t *)
initialize a reader/writer lock
void rt_thread_run_barrier_destroy(rt_run_barrier_t *barrier)
destroy thread pool barrier
int n_waiting
Number of currently waiting threads.
int rt_threadlaunch(int numprocs, void *clientdata, void *fctn(void *), rt_tasktile_t *tile)
launch up to numprocs threads using shared iterator as a load balancer
atomic int structure with padding to prevent false sharing
int padding1[8]
Pad to avoid false sharing, cache aliasing.
rt_shared_iterator_t iter
dynamic work scheduler
int rt_threadpool_worker_setdevspeed(void *voiddata, float speed)
Worker thread calls this to set relative speed of this device as determined by the SM/core count and ...
int rt_threadpool_worker_getdevid(void *voiddata, int *devid)
worker thread can call this to get its CPU/GPU device ID
int padding1[8]
Pad to avoid false sharing, cache aliasing.
int rt_threadpool_next_tile(void *thrpool, int reqsize, rt_tasktile_t *tile)
worker thread calls this to get its next work unit iterate the shared iterator, returns -1 if no iter...
void * thrpool
void ptr to thread pool struct
int growthrate
stack growth chunk size
rt_tilestack_t errorstack
stack of tiles that failed
int rt_tilestack_compact(rt_tilestack_t *)
shrink memory buffers associated with task tile stack if possible
int rt_tilestack_popall(rt_tilestack_t *)
pop all of the task tiles off of the stack
int n_waiting
Number of currently waiting threads.
int rt_threadpool_setfatalerror(void *thrparms)
worker thread calls this to indicate that an unrecoverable error occured
int padding2[8]
avoid false sharing, cache aliasing
struct rt_run_barrier_struct rt_run_barrier_t
run-barrier sync object with padding to prevent false sharing
int rt_cond_wait(rt_cond_t *, rt_mutex_t *)
wait on a condition variable
barrier sync object with padding to prevent false sharing
int rt_threadpool_worker_getdevspeed(void *voiddata, float *speed)
Worker thread calls this to get relative speed of this device as determined by the SM/core count and ...
rt_threadpool_t * rt_threadpool_create(int workercount, int *devlist)
create a thread pool with a specified number of worker threads
struct atomic_int_struct rt_atomic_int_t
atomic int structure with padding to prevent false sharing
rt_mutex_t lock
Mutex lock for the structure.
int rt_atomic_int_init(rt_atomic_int_t *atomp, int val)
initialize an atomic int variable
int start
starting value (inclusive)
int workercount
number of worker threads
int rt_threadpool_launch(rt_threadpool_t *thrpool, void *fctn(void *), void *parms, int blocking)
launch threads onto a new function, with associated parms
int padding2[8]
Pad to avoid false sharing, cache aliasing.
int rt_cond_destroy(rt_cond_t *)
destroy a condition variable
rt_shared_iterator_t * iter
dynamic scheduler iterator
int rt_threadpool_worker_getid(void *voiddata, int *threadid, int *threadcount)
worker thread can call this to get its ID and number of peers
rt_cond_t wait_cv
Clients wait on condition variable to proceed.
int phase
Flag to separate waiters from fast workers.
int rt_atomic_int_fetch_and_add(rt_atomic_int_t *atomp, int inc)
fetch an atomic int and add inc to it, returning original value
struct rt_cpu_caps_struct rt_cpu_caps_t
void * parms
parms for fctn pointer
int rt_rwlock_unlock(rt_rwlock_t *)
unlock reader/writer lock
int rt_cond_init(rt_cond_t *)
initialize a condition variable
int rt_thread_barrier(rt_barrier_t *barrier, int increment)
synchronize on counting barrier primitive
int rt_threadlaunch_next_tile(void *voidparms, int reqsize, rt_tasktile_t *tile)
worker thread calls this to get its next work unit iterate the shared iterator, returns -1 if no iter...
rt_thread_t * threads
worker threads
int threadid
ID of worker thread.
int rt_thread_run_barrier_init(rt_run_barrier_t *barrier, int n_clients)
initialize thread pool barrier
int threadid
worker thread's id
int padding1[8]
avoid false sharing
run-barrier sync object with padding to prevent false sharing
int devid
worker CPU/GPU device ID
struct rt_threadpool_struct rt_threadpool_t
persistent thread pool
int rt_atomic_int_destroy(rt_atomic_int_t *atomp)
destroy an atomic int variable
int threadcount
number of workers
int phase
Flag to separate waiters from fast workers.
int rt_shared_iterator_next_tile(rt_shared_iterator_t *it, int reqsize, rt_tasktile_t *tile)
iterate the shared iterator with a requested tile size, returns the tile received, and a return code of -1 if no iterations left or a fatal error has occured during processing, canceling all worker threads.
int rt_thread_set_self_cpuaffinity(int cpu)
set the CPU affinity of the current thread (if allowed by host system)
rt_barrier_t * rt_thread_barrier_init(int n_clients)
initialize counting barrier primitive
int rt_cond_signal(rt_cond_t *)
signal a condition variable, waking at least one thread