1 #ifndef COMPUTE_NONBONDED_MIC_KERNEL_H
2 #define COMPUTE_NONBONDED_MIC_KERNEL_H
134 #define MIC_HANDCODE_FORCE ( 1 ) // 1 // 0 - disable, !0 - enable hand-coded force generation loop
135 #define MIC_HANDCODE_FORCE_PFDIST ( 0 ) // (currently broken!) 0 - disable, >=8 - enable software prefetch (value = prefetch distance)
136 #define MIC_HANDCODE_FORCE_USEGATHER ( 1 ) // 1 //
137 #define MIC_HANDCODE_FORCE_USEGATHER_NBTBL ( 0 )
138 #define MIC_HANDCODE_FORCE_LOAD_VDW_UPFRONT ( 0 ) // 0 - disable, !0 - enable loading of vdw type information prior to the cutoff check within the force loops
139 #define MIC_HANDCODE_FORCE_COMBINE_FORCES ( 1 ) // 1 // 0 - disable, !0 - enable a single set of force updates (short-fast and full), rather than two separate updates in the force loop body (allowing indexes, etc. to be reused for both)
142 #define MIC_HANDCODE_FORCE_SINGLE ( 1 ) // 1 // 0 - disable, !0 - enable use of single precision to calculate force magnitude in force loops (forces still accumulated in double precision)
143 #define MIC_HANDCODE_FORCE_CALCR2TABLE ( 1 ) // 1 // 0 - disable, !0 - enable r2_table value inline calculation (vs r2_table value loads)
144 #define MIC_HANDCODE_FORCE_SOA_VS_AOS ( 1 ) // 1 // 0 - SoA, !0 - AoS
146 #define MIC_SORT_ATOMS ( 1 ) // 1 // 0 - disable, !0 - enable atom sorting within each patch
147 #define MIC_SORT_LISTS ( 0 ) // (currently not working, leave at 0)
150 #define MIC_HANDCODE_PLGEN ( 1 ) // 1 // 0 - disable, !0 - enable hand-coded pairlist generation
151 #define MIC_TILE_PLGEN ( 0 ) // 0 - disable, !0 - enable pairlist tiling (value = number of tiles to create for each atom list)
152 #define MIC_CONDITION_NORMAL ( 0 ) // 0 - disable, !0 - enable normal pairlist "conditioning" (enabling this flag will cause the normal pairlist to be separated into two sets, such that pairs that are likely to be within cutoff are grouped together and pairs that are less likely to be within cutoff... so vector lanes tend to all active or all not-active)
153 #define MIC_PAD_PLGEN ( 1 ) // 1 // 0 - disable, !0 - enable pairlist padding
156 #define MULTIPLE_THREADS ( 1 ) // 1 // 0 - disable, !0 - use multiple threads when processing compute objects (a.k.a. patch pairs)
158 #define MIC_SORT_COMPUTES ( 1 )
160 #define MIC_SYNC_INPUT ( 1 )
161 #define MIC_SYNC_OUTPUT ( 1 )
163 #define MIC_SUBMIT_ATOMS_ON_ARRIVAL ( 0 )
166 #define REFINE_PAIRLISTS ( 0 ) // 0 - disable, !0 - enable pairlist refinement
167 #define REFINE_PAIRLIST_HANDCODE ( 0 ) // 0 - disable, !0 - use hand-coded version of the pairlist refinement code
168 #define REFINE_PAIRLISTS_XYZ ( 0 ) // 0 - disable, !0 - save dX (delta X), dY, and dZ values along with r2 values as part of the pairlist refinement output process
170 #define MIC_PREFETCH_DISTANCE ( 0 )
171 #define MIC_PREFETCH_HINT ( 3 )
174 #define MIC_FULL_CHECK ( 1 ) // 0 - disable, !0 - enable various checks use to help verify correct operation, but that are not required to perform the calculations themselves
175 #define MIC_EXCL_CHECKSUM_FULL ( 1 ) // NOTE: Mutually exclusive with MIC_EXCL_CHECKSUM
176 #define MIC_DATA_STRUCT_VERIFY ( 0 )
177 #define MIC_DATA_STRUCT_VERIFY_KERNELS ( 0 )
178 #define MIC_DEVICE_FPRINTF ( 0 ) // Print some debug info from the MICs directly to a device file on the MICs (for debug purposes)
179 #define MIC_DEVICE_FPRINTF_REOPEN_FREQ ( 0 ) // If >0, will cause the code to periodically reopen (and thus truncate) the device file, helping to limit the amount of memory it takes up
180 #define MIC_DUMP_COMPUTE_MAPPING ( 0 ) // 0: do nothing, !0: dump directToDevice info from ComputeMap
181 #define MIC_TRACK_DEVICE_MEM_USAGE ( 0 ) // 0: do nothing, !0: print memusage on the device every 100 timesteps
182 #define MIC_HEARTBEAT ( 0 )
185 #define MIC_ALIGN ( 64 ) //4 * 1024 ) // NOTE : Must be > 0 and must be a multiple of the cacheline size (in bytes)
187 #define MIC_MAX_DEVICES_PER_NODE ( 16 )
189 #define MIC_PRINT_CONFIG ( 0 ) // 0 - disable, !0 - enable the printing of configuration information (macros defined in this file) at the start of a simulation (TODO | NOTE: This is meant as a reference for repeating performance experiments... not required for production use... disable at some point)
191 #define MIC_VERBOSE_HVD_LDB ( 0 ) // 0 - disable, !0 - print extra info about HvD LDB
193 #define MIC_DEBUG ( 0 ) // 0 - disable, !0 - enable extra debugging output to be printed to files (1 file per PE) via MICP statements
195 #define NUM_PAIRLIST_TYPES ( 4 ) // Number of pairlist types (i.e. multiplier used for mallocs, etc.)
196 #define PL_NORM_INDEX ( 0 ) // Index for normal pairlist
197 #define PL_EXCL_INDEX ( 1 ) // Index for excluded pairlist
198 #define PL_MOD_INDEX ( 2 ) // Index for modified pairlist
199 #define PL_NORMHI_INDEX ( 3 ) // Index for the "normal high" pairlist (temporary pairlist used for entries that are far apart, used to separate the sets of entries, close and far, used in normal pairlist conditioning... see MIC_CONDITION_NORMAL)
202 #define MIC_TRACING ( 1 ) // 0 - disable, !0 - enable tracing information to be collected on the host
203 #define MIC_DEVICE_TRACING ( 1 ) // 0 - disable, !0 - enable the collection of extra tracing information on the device itself (only high-level events, like the total time for all computes, for all patches, etc.) (requires MIC_TRACING to be set)
204 #define MIC_DEVICE_TRACING_DETAILED ( 0 ) // 0 - disable, !0 - enable the collection of more tracing information on the device, including start and stop times for each task (requires MIC_DEVICE_TRACING to be sent)
205 #define MIC_TRACING_EVENT_BASE ( 10000 )
206 #define MIC_EVENT_OFFLOAD_REMOTE ( MIC_TRACING_EVENT_BASE + 0 )
207 #define MIC_EVENT_OFFLOAD_LOCAL ( MIC_TRACING_EVENT_BASE + 1 )
208 #define MIC_EVENT_OFFLOAD_PRAGMA ( MIC_TRACING_EVENT_BASE + 2 )
209 #define MIC_EVENT_OFFLOAD_POLLSET ( MIC_TRACING_EVENT_BASE + 3 )
210 #define MIC_EVENT_FUNC_DOWORK ( MIC_TRACING_EVENT_BASE + 4 )
211 #define MIC_EVENT_FUNC_FINISHWORK ( MIC_TRACING_EVENT_BASE + 5 )
212 #define MIC_EVENT_ATOMS_SUBMIT ( MIC_TRACING_EVENT_BASE + 6 )
213 #define MIC_EVENT_ATOMS_TRANSFER ( MIC_TRACING_EVENT_BASE + 7 )
214 #define MIC_EVENT_ATOMS_WAIT ( MIC_TRACING_EVENT_BASE + 8 )
215 #define MIC_EVENT_SYNC_INPUT_PRAGMA ( MIC_TRACING_EVENT_BASE + 9 )
216 #define MIC_EVENT_SYNC_OUTPUT_REMOTE_PRAGMA ( MIC_TRACING_EVENT_BASE + 10 )
217 #define MIC_EVENT_SYNC_OUTPUT_LOCAL_PRAGMA ( MIC_TRACING_EVENT_BASE + 11 )
218 #define MIC_EVENT_DEVICE_COMPUTES ( MIC_TRACING_EVENT_BASE + 12 )
219 #define MIC_EVENT_DEVICE_VIRIALS ( MIC_TRACING_EVENT_BASE + 13 )
220 #define MIC_EVENT_DEVICE_PATCHES ( MIC_TRACING_EVENT_BASE + 14 )
221 #define MIC_EVENT_DEVICE_PATCH ( MIC_TRACING_EVENT_BASE + 15 )
222 #define MIC_EVENT_DEVICE_COMPUTE ( MIC_TRACING_EVENT_BASE + 16 )
223 #define MIC_TRACING_NUM_EVENTS ( 24 )
226 #define MIC_KERNEL_DATA_TRANSFER_STATS ( 0 )
228 __declspec(target(mic))
void* _mm_malloc_withPrint(
size_t s,
int a,
char * l);
229 __declspec(target(mic))
void _mm_free_withPrint(
void * prt);
237 #define _MM_MALLOC_WRAPPER(s, a, l) _mm_malloc((s), (a))
238 #define _MM_FREE_WRAPPER(p) _mm_free(p)
239 #define _MM_MALLOC_VERIFY
248 #if defined(__MIC__) || defined(__MIC2__)
249 #include <immintrin.h>
251 #undef MIC_HANDCODE_FORCE
252 #define MIC_HANDCODE_FORCE (0)
253 #undef MIC_HANDCODE_PLGEN
254 #define MIC_HANDCODE_PLGEN (0)
260 #if MIC_FULL_CHECK != 0
261 #define __FULL_CHECK(X) X
262 #define __ASSERT(c) assert(c)
263 #define CHECK_ASSUME (1)
264 #define CHECK_ASSUME_ALIGNED (1)
265 #if (MIC_EXCL_CHECKSUM_FULL == 0)
266 #warning "MIC_EXCL_CHECKSUM_FULL enabled (because MIC_FULL_CHECK was enabled)"
268 #undef MIC_EXCL_CHECKSUM_FULL
269 #define MIC_EXCL_CHECKSUM_FULL (1)
271 #define __FULL_CHECK(X)
273 #define CHECK_ASSUME (0)
274 #define CHECK_ASSUME_ALIGNED (0)
280 #define __MUST_BE_MIC
282 #define __MUST_BE_MIC __ASSERT(0)
286 #if (MIC_PAD_PLGEN != 0) && (MIC_HANDCODE_FORCE != 0)
287 #if REFINE_PAIRLISTS != 0
288 #warning "REFINE_PAIRLISTS disabled (because MIC_PAD_PLGEN && MIC_HANDCODE_FORCE were enabled; combination not supported yet)"
290 #undef REFINE_PAIRLISTS
291 #define REFINE_PAIRLISTS (0)
295 #if MIC_HANDCODE_FORCE_SINGLE != 0
296 #if REFINE_PAIRLISTS != 0
297 #warning "REFINE_PAIRLISTS disabled (because MIC_HANDCODE_FORCE_SINGLE was enabled)"
299 #undef REFINE_PAIRLISTS
300 #define REFINE_PAIRLISTS (0)
305 #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
306 #if MIC_HANDCODE_FORCE_LOAD_VDW_UPFRONT != 0
307 #warning "MIC_HANDCODE_FORCE_LOAD_VDW_UPFRONT disabled (because MIC_HANDCODE_FORCE_SOA_VS_AOS was enabled; combination not supported)"
309 #undef MIC_HANDCODE_FORCE_LOAD_VDW_UPFRONT
310 #define MIC_HANDCODE_FORCE_LOAD_VDW_UPFRONT (0)
314 #if MIC_PREFETCH_DISTANCE < 0
315 #warning "INVALID MIC_PREFETCH_DISTANCE value (disabling)"
316 #undef MIC_PREFETCH_DISTANCE
317 #define MIC_PREFETCH_DISTANCE (0)
321 #if MIC_DEVICE_TRACING_DETAILED != 0
322 #if MIC_DEVICE_TRACING == 0
323 #warning "MIC_DEVICE_TRACING enabled (because MIC_DEVICE_TRACING_DETAILED was enabled)"
325 #undef MIC_DEVICE_TRACING
326 #define MIC_DEVICE_TRACING (1)
328 #if MIC_DEVICE_TRACING != 0
330 #warning "MIC_TRACING enabled (because MIC_DEVICE_TRACING was enabled)"
333 #define MIC_TRACING (1)
342 #define MIC_TRACING_REGISTER_EVENT(str, id) traceRegisterUserEvent(str, id)
344 #define MIC_TRACING_REGISTER_EVENTS \
345 MIC_TRACING_REGISTER_EVENT("MIC device offload (remote)", MIC_EVENT_OFFLOAD_REMOTE); \
346 MIC_TRACING_REGISTER_EVENT("MIC device offload (local)", MIC_EVENT_OFFLOAD_LOCAL); \
347 MIC_TRACING_REGISTER_EVENT("MIC offload pragma", MIC_EVENT_OFFLOAD_PRAGMA); \
348 MIC_TRACING_REGISTER_EVENT("MIC polling set", MIC_EVENT_OFFLOAD_POLLSET); \
349 MIC_TRACING_REGISTER_EVENT("MIC ::doWork", MIC_EVENT_FUNC_DOWORK); \
350 MIC_TRACING_REGISTER_EVENT("MIC ::finishWork", MIC_EVENT_FUNC_FINISHWORK); \
351 MIC_TRACING_REGISTER_EVENT("MIC atom submit", MIC_EVENT_ATOMS_SUBMIT); \
352 MIC_TRACING_REGISTER_EVENT("MIC atom transfer", MIC_EVENT_ATOMS_TRANSFER); \
353 MIC_TRACING_REGISTER_EVENT("MIC atom wait", MIC_EVENT_ATOMS_WAIT); \
354 MIC_TRACING_REGISTER_EVENT("MIC sync input pragma", MIC_EVENT_SYNC_INPUT_PRAGMA); \
355 MIC_TRACING_REGISTER_EVENT("MIC sync output remote pragma", MIC_EVENT_SYNC_OUTPUT_REMOTE_PRAGMA); \
356 MIC_TRACING_REGISTER_EVENT("MIC sync output local pragma", MIC_EVENT_SYNC_OUTPUT_LOCAL_PRAGMA); \
357 MIC_TRACING_REGISTER_EVENT("[MIC] computes", MIC_EVENT_DEVICE_COMPUTES); \
358 MIC_TRACING_REGISTER_EVENT("[MIC] virials", MIC_EVENT_DEVICE_VIRIALS); \
359 MIC_TRACING_REGISTER_EVENT("[MIC] patches", MIC_EVENT_DEVICE_PATCHES); \
360 MIC_TRACING_REGISTER_EVENT("[MIC] patch", MIC_EVENT_DEVICE_PATCH); \
361 MIC_TRACING_REGISTER_EVENT("[MIC] compute (d0)", MIC_EVENT_DEVICE_COMPUTE + 0); \
362 MIC_TRACING_REGISTER_EVENT("[MIC] compute (d1)", MIC_EVENT_DEVICE_COMPUTE + 1); \
363 MIC_TRACING_REGISTER_EVENT("[MIC] compute (d2)", MIC_EVENT_DEVICE_COMPUTE + 2); \
364 MIC_TRACING_REGISTER_EVENT("[MIC] compute (d3)", MIC_EVENT_DEVICE_COMPUTE + 3); \
365 MIC_TRACING_REGISTER_EVENT("[MIC] compute (d4)", MIC_EVENT_DEVICE_COMPUTE + 4); \
366 MIC_TRACING_REGISTER_EVENT("[MIC] compute (d5)", MIC_EVENT_DEVICE_COMPUTE + 5); \
367 MIC_TRACING_REGISTER_EVENT("[MIC] compute (d6)", MIC_EVENT_DEVICE_COMPUTE + 6); \
368 MIC_TRACING_REGISTER_EVENT("[MIC] compute (d7+)", MIC_EVENT_DEVICE_COMPUTE + 7); \
370 MIC_TRACING_REGISTER_EVENT("Pair", 11000); \
371 MIC_TRACING_REGISTER_EVENT("Self", 11001); \
372 MIC_TRACING_REGISTER_EVENT("PME", 11002); \
373 MIC_TRACING_REGISTER_EVENT("Tuple", 11003); \
374 MIC_TRACING_REGISTER_EVENT("Box Closing", 11010); \
375 MIC_TRACING_REGISTER_EVENT("Positions Ready", 11020); \
376 MIC_TRACING_REGISTER_EVENT("Post Run Computes", 11021); \
377 MIC_TRACING_REGISTER_EVENT("Pre Run Computes", 11022); \
378 MIC_TRACING_REGISTER_EVENT("Reduction Submit", 11023); \
379 MIC_TRACING_REGISTER_EVENT("MIC Atom Work", 11030); \
380 MIC_TRACING_REGISTER_EVENT("Patch::positionsReady", 11031); \
381 MIC_TRACING_REGISTER_EVENT("Atom send", 11032); \
382 MIC_TRACING_REGISTER_EVENT("DEBUG 0", 11040); \
383 MIC_TRACING_REGISTER_EVENT("DEBUG 1", 11041); \
384 MIC_TRACING_REGISTER_EVENT("DEBUG 2", 11042); \
385 MIC_TRACING_REGISTER_EVENT("DEBUG 3", 11043); \
386 MIC_TRACING_REGISTER_EVENT("DEBUG 4", 11044); \
387 MIC_TRACING_REGISTER_EVENT("recvImmediateProxyData", 11046); \
388 MIC_TRACING_REGISTER_EVENT("msg copy send", 11047); \
389 MIC_TRACING_REGISTER_EVENT("ProxyMgr::sendProxyData", 11048); \
390 MIC_TRACING_REGISTER_EVENT("MIC box close", 11050); \
391 MIC_TRACING_REGISTER_EVENT("MIC reduction submit", 11051); \
392 MIC_TRACING_REGISTER_EVENT("WorkDistrib compute", 11070); \
393 MIC_TRACING_REGISTER_EVENT("doWork::work", 11080); \
394 MIC_TRACING_REGISTER_EVENT("doWork::progress", 11081);
396 #define MIC_TRACING_RECORD(id, start, stop) traceUserBracketEvent(id, start, stop)
398 extern void traceUserBracketEvent(
int,
double,
double);
399 extern double CmiWallTimer();
403 #define MIC_TRACING_REGISTER_EVENT(str, id)
404 #define MIC_TRACING_REGISTER_EVENTS
405 #define MIC_TRACING_RECORD(id, start, stop)
414 extern void debugInit(FILE* fout);
415 extern void debugClose();
417 extern __thread FILE* mic_output;
418 extern double CmiWallTimer();
419 extern int CmiMyPe();
420 #define MICP(fmt, ...) fprintf(((mic_output == NULL) ? (stdout) : (mic_output)), "[%013.6lf, %05d] " fmt, CmiWallTimer(), CmiMyPe(), ##__VA_ARGS__)
421 #define MICPF fflush((mic_output != NULL) ? (mic_output) : (stdout))
423 #define MICP(fmt, ...)
429 #if MIC_DEVICE_FPRINTF != 0
430 #define DEVICE_FPRINTF(fmt, ...) if (device__fout != NULL) { fprintf(device__fout, fmt, ##__VA_ARGS__); fflush(device__fout); }
431 #define DEVICE_FPRINTF_CLAUSE nocopy(device__fout)
433 #define DEVICE_FPRINTF(fmt, ...)
434 #define DEVICE_FPRINTF_CLAUSE
441 #pragma offload_attribute (push, target(mic))
446 struct float3 {
float x,
y,
z ; };
447 struct float4 {
float x,
y,
z,w; };
448 struct double3 {
double x,
y,
z ; };
449 struct double4 {
double x,
y,
z,w; };
450 struct int2 {
int x,
y ; };
451 struct int3 {
int x,
y,
z ; };
452 struct int4 {
int x,
y,
z,w; };
454 typedef float mic_position_t;
455 typedef float3 mic_position3_t;
456 typedef float4 mic_position4_t;
464 mic_position4_t offset;
466 unsigned int patch1_size;
467 unsigned int patch1_force_size;
468 unsigned int patch1_atom_start;
469 unsigned int patch1_force_start;
470 unsigned int patch1_force_list_index;
471 unsigned int patch1_force_list_size;
473 unsigned int patch2_size;
474 unsigned int patch2_force_size;
475 unsigned int patch2_atom_start;
476 unsigned int patch2_force_start;
477 unsigned int patch2_force_list_index;
478 unsigned int patch2_force_list_size;
482 unsigned int block_flags_start;
483 unsigned int virial_start;
488 #if MIC_SUBMIT_ATOMS_ON_ARRIVAL != 0
489 uint64_t patch1_atomDataPtr;
490 uint64_t patch2_atomDataPtr;
494 mic_position_t patch1_center_x;
495 mic_position_t patch1_center_y;
496 mic_position_t patch1_center_z;
497 mic_position_t patch2_center_x;
498 mic_position_t patch2_center_y;
499 mic_position_t patch2_center_z;
512 unsigned int force_list_start;
513 unsigned int force_list_size;
514 unsigned int patch_size;
515 unsigned int patch_stride;
516 unsigned int force_output_start;
517 unsigned int atom_start;
518 unsigned int virial_list_start;
519 unsigned int virial_output_start;
545 struct mic_constants {
558 int singleKernelFlag;
563 struct mic_kernel_data {
568 int numLocalComputes;
575 mic_position3_t
lata;
576 mic_position3_t
latb;
577 mic_position3_t
latc;
581 size_t forceBuffersReqSize;
590 double fullElectVirial_xx;
591 double fullElectVirial_xy;
592 double fullElectVirial_xz;
593 double fullElectVirial_yy;
594 double fullElectVirial_yz;
595 double fullElectVirial_zz;
598 double fullElectEnergy;
616 mic_constants *constants;
623 #if REFINE_PAIRLISTS != 0
626 double** r2Array_ptr;
629 void *table_four_base_ptr;
630 void *lj_table_base_ptr;
633 unsigned int *exclusion_bits;
635 mic_position4_t offset;
640 #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
649 mic_position_t *p_x[2];
650 mic_position_t *p_y[2];
651 mic_position_t *p_z[2];
652 mic_position_t *p_q[2];
653 int *pExt_vdwType[2];
655 int *pExt_exclIndex[2];
656 int *pExt_exclMaxDiff[2];
681 double fullElectVirial_xx;
682 double fullElectVirial_xy;
683 double fullElectVirial_xz;
684 double fullElectVirial_yy;
685 double fullElectVirial_yz;
686 double fullElectVirial_zz;
689 double fullElectEnergy;
694 mic_position_t patch1_center_x;
695 mic_position_t patch1_center_y;
696 mic_position_t patch1_center_z;
697 mic_position_t patch2_center_x;
698 mic_position_t patch2_center_y;
699 mic_position_t patch2_center_z;
713 #pragma offload_attribute (pop)
722 void mic_init_device(
const int pe,
const int node,
const int deviceNum);
725 void mic_free_device(
const int deviceNum);
730 int mic_check(
int deviceNum);
733 void mic_bind_table_four(
const int deviceNum,
734 const double * table_four,
735 const int table_four_n,
736 const int table_four_n_16
740 void mic_bind_lj_table(
const int deviceNum,
742 const int lj_table_dim,
747 void mic_bind_exclusions(
const int deviceNum,
748 unsigned int *exclusion_bits,
749 const long int exclusion_bits_size
753 void mic_bind_constants(
const int deviceNum,
755 const double dielectric_1,
756 const double scaling,
757 const double scale14,
758 const double r2_delta,
759 const int r2_delta_exp,
765 void mic_bind_patch_pairs_only(
const int deviceNum,
768 const int patch_pairs_bufSize
771 void mic_bind_force_lists_only(
const int deviceNum,
772 force_list *force_lists,
773 const int force_lists_size,
774 const int force_lists_bufSize
777 void mic_bind_atoms_only(
const int deviceNum,
783 const int atoms_bufSize
786 void mic_bind_force_buffers_only(
const int deviceNum,
const size_t force_buffers_size);
789 #if MIC_SUBMIT_ATOMS_ON_ARRIVAL != 0
790 void mic_submit_patch_data(
const int deviceNum,
791 void *
const hostPtr,
793 const int transferBytes,
794 const int allocBytes_host,
795 int &allocBytes_device,
799 #endif // MIC_SUBMIT_ATOMS_ON_ARRIVAL
805 void mic_nonbonded_forces(
const int deviceNum,
807 const int numLocalAtoms,
808 const int numLocalComputes,
809 const int numLocalPatches,
810 const mic_position3_t
lata,
811 const mic_position3_t
latb,
812 const mic_position3_t
latc,
815 const int usePairlists,
816 const int savePairlists,
817 const int atomsChanged
820 void mic_transfer_output(
const int deviceNum,
822 const int numLocalAtoms,
828 int mic_check_remote_kernel_complete(
const int deviceNum);
829 int mic_check_local_kernel_complete(
const int deviceNum);
832 void _mic_print_stats(
const int deviceNum);
833 void mic_dumpHostDeviceComputeMap();
837 #if MIC_TRACK_DEVICE_MEM_USAGE != 0
839 __declspec(target(mic))
840 typedef struct _mem_info {
850 __declspec(target(mic))
851 void printMemInfo(
int device__pe,
int device__timestep, MemInfo * mi);
853 __declspec(target(mic))
854 void readMemInfo_processLine(MemInfo * memInfo,
char * n,
char * v,
char * u);
856 __declspec(target(mic))
857 bool readMemInfo(MemInfo * memInfo);
859 #endif // MIC_TRACK_DEVICE_MEM_USAGE != 0
863 #endif // COMPUTE_NONBONDED_MIC_KERNEL_H
register BigReal virial_xy
register BigReal virial_xz
register BigReal virial_yz
static __global__ void(const patch_pair *patch_pairs, const atom *atoms, const atom_param *atom_params, const int *vdw_types, unsigned int *plist, float4 *tmpforces, float4 *slow_tmpforces, float4 *forces, float4 *slow_forces, float *tmpvirials, float *slow_tmpvirials, float *virials, float *slow_virials, unsigned int *global_counters, int *force_ready_queue, const unsigned int *overflow_exclusions, const int npatches, const int block_begin, const int total_block_count, int *block_order, exclmask *exclmasks, const int lj_table_size, const float3 lata, const float3 latb, const float3 latc, const float cutoff2, const float plcutoff2, const int doSlow)
static __thread int patch_pairs_size
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 lata
static __thread int lj_table_size
register BigReal electEnergy
static __thread atom * atoms
static __thread float4 * forces
static __thread float4 * slow_forces
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 latb
register BigReal virial_yy
static __thread patch_pair * patch_pairs
texture< float2, 1, cudaReadModeElementType > lj_table
register BigReal virial_zz
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cutoff2
register BigReal virial_xx
static __thread atom_param * atom_params
k< npairi;++k){TABENERGY(const int numtypes=simParams->tableNumTypes;const float table_spacing=simParams->tableSpacing;const int npertype=(int)(namdnearbyint(simParams->tableMaxDist/simParams->tableSpacing)+1);) int table_i=(r2iilist[2 *k] >> 14)+r2_delta_expc;const int j=pairlisti[k];#define p_j BigReal diffa=r2list[k]-r2_table[table_i];#define table_four_i TABENERGY(register const int tabtype=-1-(lj_pars->A< 0?lj_pars->A:0);) BigReal kqq=kq_i *p_j-> charge
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 latc
static __thread int atoms_size