00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00034 
00035 
00036 
00037 
00038 
00039 
00040 
00041 
00042 
00043 
00044 
00045 
00046 
00047 
00048 
00049 
00050 
00051 
00052 
00053 
00054 
00055 
00056 
00057 
00058 
00059 
00060 
00061 
00062 
00063 #ifndef WKF_THREADS_INC
00064 #define WKF_THREADS_INC 1
00065 
00066 #ifdef __cplusplus
00067 extern "C" {
00068 #endif
00069 
00070 
00071 #if defined(USEPOSIXTHREADS) && defined(USEUITHREADS)
00072 #error You may only define USEPOSIXTHREADS or USEUITHREADS, but not both
00073 #endif
00074 
00075 
00076 #if defined(_AIX) || defined(__APPLE__) || defined(_CRAY) || defined(__hpux) || defined(__irix) || defined(__linux) || defined(__osf__) ||  defined(__PARAGON__)
00077 #if !defined(USEUITHREADS) && !defined(USEPOSIXTHREADS)
00078 #define USEPOSIXTHREADS
00079 #endif
00080 #endif
00081 
00082 
00083 #if defined(SunOS)
00084 #if !defined(USEPOSIXTHREADS) && !defined(USEUITHREADS)
00085 #define USEUITHREADS
00086 #endif
00087 #endif
00088 
00089 
00090 
00091 
00092 
00093 #define CPU_SMTDEPTH_UNKNOWN           0
00094 #define CPU_UNKNOWN           0x00000001
00095 
00096 
00097 #define CPU_HT                0x00000010
00098 #define CPU_HYPERVISOR        0x00000020
00099 #define CPU_SSE2              0x00000100
00100 #define CPU_SSE4_1            0x00000200
00101 #define CPU_F16C              0x00000400
00102 #define CPU_FMA               0x00000800
00103 #define CPU_AVX               0x00001000
00104 #define CPU_AVX2              0x00002000
00105 #define CPU_AVX512F           0x00010000
00106 #define CPU_AVX512CD          0x00020000
00107 #define CPU_AVX512ER          0x00040000
00108 #define CPU_AVX512PF          0x00080000
00109 #define CPU_KNL         (CPU_AVX512F | CPU_AVX512CD | \
00110                          CPU_AVX512ER | CPU_AVX512PF)
00111 
00112 
00113 #define CPU_ARM64_CPUID       0x00000010
00114 #define CPU_ARM64_CRC32       0x00000020
00115 #define CPU_ARM64_FP          0x00000080
00116 #define CPU_ARM64_HPFP        0x00000080
00117 #define CPU_ARM64_AES         0x00000100
00118 #define CPU_ARM64_ATOMICS     0x00000200
00119 #define CPU_ARM64_ASIMD       0x00000400
00120 #define CPU_ARM64_ASIMDDP     0x00000800
00121 #define CPU_ARM64_ASIMDHP     0x00001000
00122 #define CPU_ARM64_ASIMDRDM    0x00002000
00123 #define CPU_ARM64_ASIMDFHM    0x00004000
00124 #define CPU_ARM64_SVE         0x00008000
00125 #define CPU_ARM64_SHA512      0x00010000
00126 #define CPU_ARM64_SHA1        0x00020000
00127 #define CPU_ARM64_SHA2        0x00040000
00128 #define CPU_ARM64_SHA3        0x00080000
00129 
00130 typedef struct wkf_cpu_caps_struct {
00131   unsigned int flags;
00132   int smtdepth;
00133 } wkf_cpu_caps_t;
00134 
00135 
00136 #ifdef WKFTHREADS
00137 #ifdef USEPOSIXTHREADS
00138 #include <pthread.h>
00139 
00140 typedef pthread_t        wkf_thread_t;
00141 typedef pthread_mutex_t   wkf_mutex_t;
00142 typedef pthread_cond_t     wkf_cond_t;
00143 
00144 typedef struct rwlock_struct {
00145   pthread_mutex_t lock;          
00146   int rwlock;                    
00147   pthread_cond_t  rdrs_ok;       
00148   unsigned int waiting_writers;  
00149   pthread_cond_t  wrtr_ok;       
00150 } wkf_rwlock_t;
00151 
00152 #endif
00153 
00154 #ifdef USEUITHREADS
00155 #include <thread.h>
00156 
00157 typedef thread_t  wkf_thread_t;
00158 typedef mutex_t   wkf_mutex_t;
00159 typedef cond_t    wkf_cond_t;
00160 typedef rwlock_t  wkf_rwlock_t;
00161 #endif
00162 
00163 
00164 #ifdef _MSC_VER
00165 #include <windows.h>
00166 typedef HANDLE wkf_thread_t;
00167 typedef CRITICAL_SECTION wkf_mutex_t;
00168 
00169 #if 0 && (NTDDI_VERSION >= NTDDI_WS08 || _WIN32_WINNT > 0x0600)
00170 
00171 #define WKFUSEWIN2008CONDVARS 1
00172 typedef CONDITION_VARIABLE wkf_cond_t;
00173 #else
00174 
00175 
00176 
00177 
00178 
00179 
00180 #if 1
00181 #define WKFUSEINTERLOCKEDATOMICOPS 1
00182 #endif
00183 #define WKF_COND_SIGNAL    0
00184 #define WKF_COND_BROADCAST 1
00185 typedef struct {
00186   LONG waiters;                  
00188   CRITICAL_SECTION waiters_lock; 
00189   HANDLE events[2];              
00190 } wkf_cond_t;
00191 #endif
00192 
00193 typedef struct rwlock_struct {
00194   wkf_mutex_t lock;               
00195   int rwlock;                    
00196   wkf_cond_t  rdrs_ok;            
00197   unsigned int waiting_writers;  
00198   wkf_cond_t  wrtr_ok;            
00199 } wkf_rwlock_t;
00200 
00201 #endif
00202 #endif 
00203 
00204 
00205 #ifndef WKFTHREADS
00206 typedef int wkf_thread_t;
00207 typedef int wkf_mutex_t;
00208 typedef int wkf_cond_t;
00209 typedef int wkf_rwlock_t;
00210 #endif
00211 
00212 #if defined(USENETBSDATOMICS) 
00213 #include <sys/atomic.h>
00214 #elif defined(USESOLARISATOMICS)
00215 #include <atomic.h>
00216 #endif
00217 
00218 typedef struct atomic_int_struct {
00219   int padding1[8];        
00220   wkf_mutex_t lock;        
00221 #if defined(USENETBSDATOMICS)
00222   unsigned int val;       
00223 #elif defined(USESOLARISATOMICS)
00224   unsigned int val;       
00225 #elif defined(USEWIN32ATOMICS)
00226   LONG val;               
00227 #else
00228   int val;                
00229 #endif
00230   int padding2[8];        
00231 } wkf_atomic_int_t;
00232 
00233 
00234 typedef struct barrier_struct {
00235   int padding1[8];        
00236   wkf_mutex_t lock;        
00237   int n_clients;          
00238   int n_waiting;          
00239   int phase;              
00240   int sum;                
00241   int result;             
00242   wkf_cond_t wait_cv;      
00243   int padding2[8];        
00244 } wkf_barrier_t;
00245 
00246 typedef struct wkf_run_barrier_struct {
00247   int padding1[8];        
00248   wkf_mutex_t lock;        
00249   int n_clients;          
00250   int n_waiting;          
00251   int phase;              
00252   void * (*fctn)(void *); 
00253   void * parms;           
00254   void * (*rslt)(void *); 
00255   void * rsltparms;       
00256   wkf_cond_t wait_cv;      
00257   int padding2[8];        
00258 } wkf_run_barrier_t;
00259 
00260 
00261 
00262 
00263 
00265 int wkf_thread_numphysprocessors(void);
00266 
00268 int wkf_thread_numprocessors(void);
00269 
00271 int wkf_cpu_capability_flags(wkf_cpu_caps_t *cpucaps);
00272 
00274 int * wkf_cpu_affinitylist(int *cpuaffinitycount);
00275 
00277 int wkf_thread_set_self_cpuaffinity(int cpu);
00278 
00280 int wkf_thread_setconcurrency(int);
00281 
00282 
00283 
00284 
00285 
00287 int wkf_thread_create(wkf_thread_t *, void * fctn(void *), void *);
00288 
00290 int wkf_thread_join(wkf_thread_t, void **);
00291 
00292 
00293 
00294 
00295 
00297 int wkf_mutex_init(wkf_mutex_t *);
00298 
00300 int wkf_mutex_lock(wkf_mutex_t *);
00301 
00303 int wkf_mutex_trylock(wkf_mutex_t *);
00304 
00306 int wkf_mutex_spin_lock(wkf_mutex_t *);
00307 
00309 int wkf_mutex_unlock(wkf_mutex_t *);
00310 
00312 int wkf_mutex_destroy(wkf_mutex_t *);
00313 
00314 
00315 
00316 
00317 
00319 int wkf_cond_init(wkf_cond_t *);
00320 
00322 int wkf_cond_destroy(wkf_cond_t *);
00323 
00325 int wkf_cond_wait(wkf_cond_t *, wkf_mutex_t *);
00326 
00328 int wkf_cond_signal(wkf_cond_t *);
00329 
00331 int wkf_cond_broadcast(wkf_cond_t *);
00332 
00333 
00334 
00335 
00336 
00338 int wkf_atomic_int_init(wkf_atomic_int_t * atomp, int val);
00339 
00341 int wkf_atomic_int_destroy(wkf_atomic_int_t * atomp);
00342 
00344 int wkf_atomic_int_set(wkf_atomic_int_t * atomp, int val);
00345 
00347 int wkf_atomic_int_get(wkf_atomic_int_t * atomp);
00348 
00350 int wkf_atomic_int_fetch_and_add(wkf_atomic_int_t * atomp, int inc);
00351 
00353 int wkf_atomic_int_add_and_fetch(wkf_atomic_int_t * atomp, int inc);
00354 
00355 
00356 
00357 
00358 
00360 int wkf_rwlock_init(wkf_rwlock_t *);
00361 
00363 int wkf_rwlock_readlock(wkf_rwlock_t *);
00364 
00366 int wkf_rwlock_writelock(wkf_rwlock_t *);
00367 
00369 int wkf_rwlock_unlock(wkf_rwlock_t *);
00370 
00371 
00372 
00373 
00374 
00376 wkf_barrier_t * wkf_thread_barrier_init(int n_clients);
00377 
00386 int wkf_thread_barrier_init_proc_shared(wkf_barrier_t *, int n_clients);
00387 
00389 void wkf_thread_barrier_destroy(wkf_barrier_t *barrier);
00390 
00392 int wkf_thread_barrier(wkf_barrier_t *barrier, int increment);
00393 
00394 
00395 
00396 
00397 
00398 
00400 int wkf_thread_run_barrier_init(wkf_run_barrier_t *barrier, int n_clients);
00401 
00403 void wkf_thread_run_barrier_destroy(wkf_run_barrier_t *barrier);
00404 
00406 void * (*wkf_thread_run_barrier(wkf_run_barrier_t *barrier,
00407                                 void * fctn(void*),
00408                                 void * parms,
00409                                 void **rsltparms))(void *);
00410 
00412 int wkf_thread_run_barrier_poll(wkf_run_barrier_t *barrier);
00413 
00414 
00420 typedef struct wkf_tasktile_struct {
00421   int start;         
00422   int end;           
00423 } wkf_tasktile_t;
00424 
00425 
00426 
00427 
00428 
00429 #define WKF_TILESTACK_EMPTY -1
00430 
00434 typedef struct {
00435   wkf_mutex_t mtx;    
00436   int growthrate;    
00437   int size;          
00438   int top;           
00439   wkf_tasktile_t *s;  
00440 } wkf_tilestack_t;
00441 
00443 int wkf_tilestack_init(wkf_tilestack_t *s, int size);
00444 
00446 void wkf_tilestack_destroy(wkf_tilestack_t *);
00447 
00449 int wkf_tilestack_compact(wkf_tilestack_t *);
00450 
00452 int wkf_tilestack_push(wkf_tilestack_t *, const wkf_tasktile_t *);
00453 
00455 int wkf_tilestack_pop(wkf_tilestack_t *, wkf_tasktile_t *);
00456 
00458 int wkf_tilestack_popall(wkf_tilestack_t *);
00459 
00461 int wkf_tilestack_empty(wkf_tilestack_t *);
00462 
00463 
00469 #define WKF_SCHED_DONE     -1   
00470 #define WKF_SCHED_CONTINUE  0   
00473 typedef struct wkf_shared_iterator_struct {
00474   wkf_mutex_t mtx;      
00475   int start;           
00476   int end;             
00477   int current;         
00478   int fatalerror;      
00479 } wkf_shared_iterator_t;
00480 
00482 int wkf_shared_iterator_init(wkf_shared_iterator_t *it);
00483 
00485 int wkf_shared_iterator_destroy(wkf_shared_iterator_t *it);
00486 
00488 int wkf_shared_iterator_set(wkf_shared_iterator_t *it, wkf_tasktile_t *tile);
00489 
00496 int wkf_shared_iterator_next_tile(wkf_shared_iterator_t *it, int reqsize,
00497                                  wkf_tasktile_t *tile);
00498 
00500 int wkf_shared_iterator_setfatalerror(wkf_shared_iterator_t *it);
00501 
00503 int wkf_shared_iterator_getfatalerror(wkf_shared_iterator_t *it);
00504 
00505 
00506 
00507 
00508 
00510 #define WKF_THREADPOOL_DEVLIST_CPUSONLY NULL
00511 
00513 #define WKF_THREADPOOL_DEVID_CPU -1
00514 
00516 typedef struct wkf_threadpool_workerdata_struct {
00517   int padding1[8];                        
00518   wkf_shared_iterator_t *iter;             
00519   wkf_tilestack_t *errorstack;             
00520   int threadid;                           
00521   int threadcount;                        
00522   int devid;                              
00523   float devspeed;                         
00524   void *parms;                            
00525   void *thrpool;                          
00526   int padding2[8];                        
00527 } wkf_threadpool_workerdata_t;
00528 
00529 typedef struct wkf_threadpool_struct {
00530   int workercount;                        
00531   int *devlist;                           
00532   wkf_shared_iterator_t iter;              
00533   wkf_tilestack_t errorstack;              
00534   wkf_thread_t *threads;                   
00535   wkf_threadpool_workerdata_t *workerdata; 
00536   wkf_run_barrier_t runbar;                
00537 } wkf_threadpool_t;
00538 
00540 wkf_threadpool_t * wkf_threadpool_create(int workercount, int *devlist);
00541 
00543 int wkf_threadpool_launch(wkf_threadpool_t *thrpool,
00544                          void *fctn(void *), void *parms, int blocking);
00545 
00547 int wkf_threadpool_wait(wkf_threadpool_t *thrpool);
00548 
00550 int wkf_threadpool_destroy(wkf_threadpool_t *thrpool);
00551 
00553 int wkf_threadpool_get_workercount(wkf_threadpool_t *thrpool);
00554 
00556 int wkf_threadpool_worker_getid(void *voiddata, int *threadid, int *threadcount);
00557 
00559 int wkf_threadpool_worker_getdevid(void *voiddata, int *devid);
00560 
00567 int wkf_threadpool_worker_setdevspeed(void *voiddata, float speed);
00568 
00573 int wkf_threadpool_worker_getdevspeed(void *voiddata, float *speed);
00574 
00579 int wkf_threadpool_worker_devscaletile(void *voiddata, int *tilesize);
00580 
00582 int wkf_threadpool_worker_getdata(void *voiddata, void **clientdata);
00583 
00585 int wkf_threadpool_sched_dynamic(wkf_threadpool_t *thrpool, wkf_tasktile_t *tile);
00586 
00591 int wkf_threadpool_next_tile(void *thrpool, int reqsize, wkf_tasktile_t *tile);
00592 
00597 int wkf_threadpool_tile_failed(void *thrpool, wkf_tasktile_t *tile);
00598 
00600 int wkf_threadpool_setfatalerror(void *thrparms);
00601 
00603 int wkf_threadpool_getfatalerror(void *thrparms);
00604 
00605 
00613 typedef struct wkf_threadlaunch_struct {
00614   int padding1[8];              
00615   wkf_shared_iterator_t *iter;   
00616   int threadid;                 
00617   int threadcount;              
00618   void * clientdata;            
00619   int padding2[8];              
00620 } wkf_threadlaunch_t;
00621 
00623 int wkf_threadlaunch(int numprocs, void *clientdata, void * fctn(void *),
00624                     wkf_tasktile_t *tile);
00625 
00627 int wkf_threadlaunch_getid(void *thrparms, int *threadid, int *threadcount);
00628 
00630 int wkf_threadlaunch_getdata(void *thrparms, void **clientdata);
00631 
00636 int wkf_threadlaunch_next_tile(void *voidparms, int reqsize,
00637                               wkf_tasktile_t *tile);
00638 
00640 int wkf_threadlaunch_setfatalerror(void *thrparms);
00641 
00642 
00643 #ifdef __cplusplus
00644 }
00645 #endif
00646 
00647 #endif