26 #include "WorkDistrib.decl.h" 30 #include "main.decl.h" 49 #include "TopoManager.h" 54 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 56 #define __thread __declspec(thread) 62 #define MIN_DEBUG_LEVEL 2 64 #ifdef MEM_OPT_VERSION 92 randtopo = CmiGetArgFlag(argv,
"+randtopo");
93 if ( CkMyPe() >= CkNumPes() )
return;
94 #if CCD_COND_FN_EXISTS 95 CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdCondFn)
build_ordering, (
void*)0);
97 CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)
build_ordering, (
void*)0);
108 CkpvAccess(BOCclass_group).workDistrib = thisgroup;
109 patchMapArrived =
false;
110 computeMapArrived =
false;
113 #define MACHINE_PROGRESS 115 #define MACHINE_PROGRESS { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); } 116 if ( CkMyNodeSize() > 1 )
NAMD_bug(
"CkMyNodeSize() > 1 for non-smp build");
128 if ( d )
while ( ! (d & c) ) {
131 return (a & c) - (b & c);
137 if ( d )
while ( ! (d & c) ) {
148 if ( c < 0 )
return true;
149 if ( c > 0 )
return false;
152 if ( c < 0 )
return true;
153 if ( c > 0 )
return false;
165 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 173 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 187 const int numPhys = CmiNumPhysicalNodes();
188 const int numNode = CmiNumNodes();
189 const int numPe = CmiNumPes();
199 for (
int ph=0; ph<numPhys; ++ph ) {
201 CmiGetPesOnPhysicalNode(ph, &pes, &npes);
202 for (
int i=0; i<npes; ++i, ++k ) {
205 numNodeInPhys[ph] = 0;
206 for (
int i=0, j=0; i<npes; i += CmiNodeSize(CmiNodeOf(pes[i])), ++j ) {
207 rankInPhysOfNode[CmiNodeOf(pes[i])] = j;
208 numNodeInPhys[ph] += 1;
213 if ( ! CkMyNode() ) {
214 iout <<
iWARN <<
"RANDOMIZING PHYSICAL NODE ORDERING\n" <<
endi;
217 for (
int j=0; j<numPhys; ++j ) {
218 randPhysOrder[j] = j;
221 for (
int j=0, k=0; j<numPhys; ++j ) {
222 const int ph = randPhysOrder[j];
224 CmiGetPesOnPhysicalNode(ph, &pes, &npes);
225 for (
int i=0; i<npes; ++i, ++k ) {
231 for (
int i=0; i<numPe; ++i ) {
237 for (
int i=0; i<numPe; ++i ) {
242 if ( 0 && CmiMyNode() == 0 )
for (
int i=0; i<numPe; ++i ) {
243 CkPrintf(
"order %5d %5d %5d %5d %5d\n", i,
274 int x_begin,
int x_end,
int y_begin,
int y_end,
276 int *result,
int ydim
278 int x_len = x_end - x_begin;
279 int y_len = y_end - y_begin;
280 if ( x_len == 1 && y_len == 1 ) {
282 if ( 0 ) CkPrintf(
"pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
283 coord[*pe_begin].x, coord[*pe_begin].y);
284 result[x_begin*ydim + y_begin] = *pe_begin;
287 int *pe_end = pe_begin + x_len * y_len;
288 if ( x_len >= y_len ) {
290 int x_split = x_begin + x_len / 2;
291 int* pe_split = pe_begin + (x_split - x_begin) * y_len;
297 int y_split = y_begin + y_len / 2;
298 int* pe_split = pe_begin + (y_split - y_begin) * x_len;
306 int numpes = CkNumPes();
310 for (
int i=0; i<numpes; ++i ) {
316 for (
int i=0, npatches=patchMap->
numPatches(); i<npatches; ++i ) {
317 int pe = patchMap->
node(i);
319 sumPos[pe] += patchMap->
center(i);
321 const int npmepes = xdim*ydim;
323 for (
int i=0; i<npmepes; ++i ) {
324 int pe = sortpes[i] = pmepes[i];
329 int node = CkNodeOf(pe);
330 int nsize = CkNodeSize(node);
331 int pe2 = CkNodeFirst(node);
332 for (
int j=0; j<nsize; ++j, ++pe2 ) {
339 int node = CmiPhysicalNodeID(pe);
341 CmiGetPesOnPhysicalNode(node, &nlist, &nsize);
342 for (
int j=0; j<nsize; ++j ) {
349 avgPos[pe] = sum / cnt;
359 saveComputeMapReturnEP = ep;
360 saveComputeMapReturnChareID = chareID;
363 CProxy_WorkDistrib(thisgroup).recvComputeMapChanges(mapMsg);
388 for (i=0; i<nc; i++) {
389 int data = computeMap->
newNode(i);
393 for (i=0; i<nc; i++) {
401 }
else if ( ! CkMyRank() ) {
405 if ( i != nc )
NAMD_bug(
"WorkDistrib::recvComputeMapChanges check 1 failed\n");
406 for (i=0; i<nc; i++) {
412 if ( i != nc )
NAMD_bug(
"WorkDistrib::recvComputeMapChanges check 2 failed\n");
413 for (i=0; i<nc; i++) {
419 if ( i != nc )
NAMD_bug(
"WorkDistrib::recvComputeMapChanges check 3 failed\n");
424 CkCallback cb(CkIndex_WorkDistrib::doneSaveComputeMap(NULL), 0, thisgroup);
425 contribute(0, NULL, CkReduction::random, cb);
431 CkSendMsgBranch(saveComputeMapReturnEP, CkAllocMsg(0,0,0), 0, saveComputeMapReturnChareID);
434 #ifdef MEM_OPT_VERSION 439 void WorkDistrib::fillAtomListForOnePatch(
int pid,
FullAtomList &alist){
443 0.5*(patchMap->
min_b(pid)+patchMap->
max_b(pid)),
444 0.5*(patchMap->
min_c(pid)+patchMap->
max_c(pid)));
446 int n = alist.
size();
462 for(
int j=0; j < n; j++)
469 if ( a[j].migrationGroupSize ) {
470 if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
475 for (
int k=a[j].hydrogenGroupSize; k<mgs;
483 pos = lattice.
nearest(pos,center,&mother_transform);
487 a[j].
position = lattice.
nearest(a[j].position, center, &(a[j].transform));
516 }
else if ((a[j].status &
DrudeAtom)!=0) {
531 for(
int j=0; j < n; j+=size) {
534 NAMD_bug(
"Mother atom with hydrogenGroupSize of 0!");
537 for (
int k = 0; k < size; ++k ) {
538 allfixed = ( allfixed && (a[j+k].
atomFixed) );
540 for (
int k = 0; k < size; ++k ) {
546 if (a[j].rigidBondLength > 0) {
547 if (size != wathgsize) {
550 "Water molecule starting with atom %d contains %d atoms " 551 "but the specified water model requires %d atoms.\n",
552 a[j].
id+1, size, wathgsize
557 for (
int k = 0; k < size; k++) {
558 anyfixed += ( fixedAtomsOn && a[j+k].
atomFixed );
560 if (useSettle && !anyfixed) {
561 for (
int k = 0; k < size; k++) {
570 int numAtomsInPatch = n;
571 int numFixedAtomsInPatch = 0;
572 int numAtomsInFixedGroupsInPatch = 0;
573 for(
int j=0; j < n; j++) {
574 numFixedAtomsInPatch += ( a[j].
atomFixed ? 1 : 0 );
575 numAtomsInFixedGroupsInPatch += ( a[j].
groupFixed ? 1 : 0 );
577 iout <<
"PATCH_DETAILS:" 578 <<
" on proc " << CkMyPe()
579 <<
" patch " << patchId
580 <<
" atoms " << numAtomsInPatch
581 <<
" fixed_atoms " << numFixedAtomsInPatch
582 <<
" fixed_groups " << numAtomsInFixedGroupsInPatch
597 int lesReduceTemp = lesOn &&
simParams->lesReduceTemp;
602 int totalAtoms = inAtoms.
size();
603 for(i=0;i<totalAtoms;i++)
605 Real atomMs=inAtoms[i].mass;
617 kbToverM = sqrt(kbT * 1.0 / atomMs);
619 for (randnum=0.0, j=0; j<12; j++)
621 randnum += vel_random.uniform();
626 inAtoms[i].velocity.x = randnum*kbToverM;
628 for (randnum=0.0, j=0; j<12; j++)
630 randnum += vel_random.uniform();
635 inAtoms[i].velocity.y = randnum*kbToverM;
637 for (randnum=0.0, j=0; j<12; j++)
639 randnum += vel_random.uniform();
644 inAtoms[i].velocity.z = randnum*kbToverM;
656 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
657 Node *node = nd.ckLocalBranch();
659 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
660 PatchMgr *patchMgr = pm.ckLocalBranch();
673 read_binary_file((std::string(basename)+
".coor").c_str(), positions, numAtoms);
674 read_binary_file((std::string(basename)+
".vel").c_str(), velocities, numAtoms);
676 PDB coorpdb((std::string(basename)+
".coor").c_str());
678 NAMD_die(
"Incorrect atom count in coordinate pdb file");
681 velocities_from_PDB((std::string(basename)+
".vel").c_str(), velocities, numAtoms);
692 if (current == NULL) {
698 velocities_from_PDB(current->
data, velocities, numAtoms);
701 velocities_from_binfile(current->
data, velocities, numAtoms);
706 random_velocities(params->
initialTemp, molecule, velocities, numAtoms);
712 remove_com_motion(velocities, molecule, numAtoms);
721 for ( i=0; i < numAtoms; i++ ) {
723 if ( ! h.
isMP )
continue;
731 for ( i=0; i < sortAtoms.
size(); i++ ) {
734 int *breaks =
new int[numPatches];
736 sortAtoms.
size(),numAtoms,
742 for (
int pid = 0; pid < numPatches; ++pid ) {
743 int iend = breaks[pid];
744 for ( ; i<iend; ++i ) {
753 for (
int k=0; k<mgs; ++k ) {
773 CkPrintf(
"patch %d (%d %d %d) has %d atoms\n",
783 for(i=0; i < numAtoms; i++)
812 delete [] velocities;
814 for(i=0; i < numPatches; i++)
820 int n = atoms[i].
size();
842 if ( a[j].migrationGroupSize ) {
843 if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
847 for (
int k=a[j].hydrogenGroupSize; k<mgs;
854 pos = lattice.
nearest(pos,center,&mother_transform);
859 a[j].position, center, &(a[j].transform));
873 if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
884 int size, allfixed, k;
885 for(j=0; j < n; j+=size) {
888 NAMD_bug(
"Mother atom with hydrogenGroupSize of 0!");
891 for ( k = 0; k < size; ++k ) {
892 allfixed = ( allfixed && (a[j+k].
atomFixed) );
894 for ( k = 0; k < size; ++k ) {
900 if (a[j].rigidBondLength > 0) {
901 for (k = 0; k < size; k++) {
914 for(
int j=0; j < n; j+=size) {
917 NAMD_bug(
"Mother atom with hydrogenGroupSize of 0!");
920 for (
int k = 0; k < size; ++k ) {
921 allfixed = ( allfixed && (a[j+k].
atomFixed) );
923 for (
int k = 0; k < size; ++k ) {
929 if (a[j].rigidBondLength > 0) {
930 if (size != wathgsize) {
933 "Water molecule starting with atom %d contains %d atoms " 934 "but the specified water model requires %d atoms.\n",
935 a[j].
id+1, size, wathgsize
940 for (
int k = 0; k < size; k++) {
941 anyfixed += ( fixedAtomsOn && a[j+k].
atomFixed );
943 if (useSettle && !anyfixed) {
944 for (
int k = 0; k < size; k++) {
954 int numAtomsInPatch = n;
955 int numFixedAtomsInPatch = 0;
956 int numAtomsInFixedGroupsInPatch = 0;
957 for(j=0; j < n; j++) {
958 numFixedAtomsInPatch += ( a[j].
atomFixed ? 1 : 0 );
959 numAtomsInFixedGroupsInPatch += ( a[j].
groupFixed ? 1 : 0 );
961 iout <<
"PATCH_DETAILS:" 962 <<
" patch " << patchId
963 <<
" atoms " << numAtomsInPatch
964 <<
" fixed_atoms " << numFixedAtomsInPatch
965 <<
" fixed_groups " << numAtomsInFixedGroupsInPatch
981 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
982 PatchMgr *patchMgr = pm.ckLocalBranch();
988 #ifdef MEM_OPT_VERSION 998 for(i=0; i < numPatches; i++) {
999 int numAtoms = atoms[i].
size();
1000 if ( numAtoms > maxAtoms ) { maxAtoms = numAtoms; maxPatch = i; }
1002 iout <<
iINFO <<
"LARGEST PATCH (" << maxPatch <<
1003 ") HAS " << maxAtoms <<
" ATOMS\n" <<
endi;
1005 #ifdef SHOW_HISTOGRAM_HGROUP_SIZES 1007 int hgroupsize[9] = { 0 };
1010 int maxhgroupsize = 0;
1011 for (i = 0; i < numPatches; i++) {
1013 int numAtoms = a.
size();
1015 for (
int j = 0; j < numAtoms; j += hgs) {
1016 hgs = a[j].hydrogenGroupSize;
1017 int histndx = ( hgs > 8 ? 8 : hgs );
1018 hgroupsize[ histndx ]++;
1020 if (a[j].
isWater) numwaters++;
1021 if (maxhgroupsize < hgs) maxhgroupsize = hgs;
1024 int hgslast = ( maxhgroupsize > 8 ? 8 : maxhgroupsize );
1025 printf(
"Number of hydrogen groups: %7d\n", numhgroups);
1026 printf(
"Number of settle water molecules: %7d\n", numwaters);
1027 printf(
"Number of remaining hydrogen groups: %7d\n", numhgroups - numwaters);
1028 printf(
"Largest hydrogen group size: %7d\n", maxhgroupsize);
1029 printf(
"Histogram of hydrogen group sizes:\n");
1031 for (i = 0; i <= hgslast; i++) {
1032 printf(
" size %d count %d\n", i, hgroupsize[i]);
1033 hgstotal += hgroupsize[i];
1035 printf(
"Checksum over hydrogen group sizes: %7d\n", hgstotal);
1038 for(i=0; i < numPatches; i++)
1040 if ( ! ( i % 100 ) )
1042 DebugM(3,
"Created " << i <<
" patches so far.\n");
1053 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1054 Node *node = nd.ckLocalBranch();
1055 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1056 PatchMgr *patchMgr = pm.ckLocalBranch();
1063 if (patchMap->
node(i) != node->
myid() )
1065 DebugM(3,
"patchMgr->movePatch(" 1066 << i <<
"," << patchMap->
node(i) <<
")\n");
1076 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1077 PatchMgr *patchMgr = pm.ckLocalBranch();
1083 for(
int i=0; i < numPatches; i++) {
1101 if ( CkNumPes() == 1 ) {
1102 patchMapArrived =
true;
1107 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1108 Node *node = nd.ckLocalBranch();
1111 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1112 || CkNumPes() > CkNumNodes()
1113 ) && ( CkNumNodes() > 1
1118 #ifdef NODEAWARE_PROXY_SPANNINGTREE 1119 if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
1130 CProxy_WorkDistrib workProxy(thisgroup);
1131 workProxy[0].savePatchMap(mapMsg);
1143 if ( CkMyRank() ) patchMapArrived =
true;
1145 if ( patchMapArrived && CkMyPe() ) {
1149 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1150 Node *node = nd.ckLocalBranch();
1153 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1154 || CkNumPes() > CkNumNodes()
1155 ) && ( CkNumNodes() > 1
1160 #ifdef NODEAWARE_PROXY_SPANNINGTREE 1161 if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
1167 if ( patchMapArrived ) {
1168 if ( CkMyRank() + 1 < CkNodeSize(CkMyNode()) ) {
1169 ((CProxy_WorkDistrib(thisgroup))[CkMyPe()+1]).
savePatchMap(msg);
1176 patchMapArrived =
true;
1178 int self = CkMyNode();
1179 int range_begin = 0;
1180 int range_end = CkNumNodes();
1181 while (
self != range_begin ) {
1183 int split = range_begin + ( range_end - range_begin ) / 2;
1185 else { range_begin =
split; }
1187 int send_near =
self + 1;
1188 int send_far = send_near + ( range_end - send_near ) / 2;
1192 if ( send_far < range_end ) pids[npid++] = CkNodeFirst(send_far);
1193 if ( send_near < send_far ) pids[npid++] = CkNodeFirst(send_near);
1194 pids[npid++] = CkMyPe();
1195 CProxy_WorkDistrib(thisgroup).savePatchMap(msg,npid,pids);
1201 if ( CkMyRank() )
return;
1203 if ( CkNumNodes() == 1 ) {
1204 computeMapArrived =
true;
1214 }
else if ( ! CkMyRank() ) {
1220 computeMapArrived =
true;
1229 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1230 Node *node = nd.ckLocalBranch();
1236 #ifndef MEM_OPT_VERSION 1244 double maxNumPatches = 1.e9;
1248 DebugM(3,
"Mapping patches\n");
1249 if ( lattice.
a_p() && lattice.
b_p() && lattice.
c_p() ) {
1250 xmin = 0.; xmax = 0.;
1262 printf(
"+++ center=%.4f %.4f %.4f\n",
1264 printf(
"+++ xmin=%.4f xmax=%.4f\n", xmin.
x, xmax.
x);
1265 printf(
"+++ ymin=%.4f ymax=%.4f\n", xmin.
y, xmax.
y);
1266 printf(
"+++ zmin=%.4f zmax=%.4f\n", xmin.
z, xmax.
z);
1278 iout <<
iINFO <<
"ORIGINAL ATOMS MINMAX IS " << xmin <<
" " << xmax <<
"\n" <<
endi;
1279 double frac = ( (double)totalAtoms - 10000. ) / (double)totalAtoms;
1280 if ( frac < 0.9 ) { frac = 0.9; }
1283 iout <<
iINFO <<
"ADJUSTED ATOMS MINMAX IS " << xmin <<
" " << xmax <<
"\n" <<
endi;
1288 origin_shift = lattice.
a_r() * lattice.
origin();
1289 xmin.
x -= origin_shift;
1290 xmax.
x -= origin_shift;
1291 origin_shift = lattice.
b_r() * lattice.
origin();
1292 xmin.
y -= origin_shift;
1293 xmax.
y -= origin_shift;
1294 origin_shift = lattice.
c_r() * lattice.
origin();
1295 xmin.
z -= origin_shift;
1296 xmax.
z -= origin_shift;
1305 if (params->
LCPOOn && patchSize < 32.4) {
1306 if ( twoAwayX > 0 || twoAwayY > 0 || twoAwayZ > 0 ) {
1307 iout <<
iWARN <<
"Ignoring twoAway[XYZ] due to LCPO SASA implementation.\n" <<
endi;
1309 twoAwayX = twoAwayY = twoAwayZ = 0;
1313 if ( twoAwayX > 0 ) maxNumPatches = 1.e9;
1314 if ( twoAwayY > 0 ) maxNumPatches = 1.e9;
1315 if ( twoAwayZ > 0 ) maxNumPatches = 1.e9;
1318 iout <<
iINFO <<
"LIMITING NUMBER OF PATCHES TO " <<
1319 maxNumPatches <<
"\n" <<
endi;
1322 int numpes = CkNumPes();
1326 delete [] patchMap->nPatchesOnNode;
1327 patchMap->nPatchesOnNode =
new int[numpes];
1328 memset(patchMap->nPatchesOnNode, 0, numpes*
sizeof(
int));
1331 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC) 1334 int numPatches = patchMap->
sizeGrid(
1336 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1337 if ( numPatches < numpes && twoAwayX < 0 ) {
1341 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1343 if ( numPatches < numpes && twoAwayY < 0 ) {
1347 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1349 if ( numPatches < numpes && twoAwayZ < 0 ) {
1353 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1355 if ( numPatches < numpes ) {
1356 #if defined(NAMD_MIC) 1357 NAMD_die(
"MIC-enabled NAMD requires at least one patch per thread.");
1360 NAMD_die(
"GPU-resident NAMD requires at least one patch per thread.");
1364 if ( numPatches % numpes && numPatches <= 1.4 * numpes ) {
1365 int exactFit = numPatches - numPatches % numpes;
1366 int newNumPatches = patchMap->
sizeGrid(
1368 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1369 if ( newNumPatches == exactFit ) {
1370 iout <<
iINFO <<
"REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" <<
endi;
1371 maxNumPatches = exactFit;
1375 patchMap->
makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
1377 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1381 int availPes = numpes;
1387 #ifdef MEM_OPT_VERSION 1400 int numPatches = patchMap->
sizeGrid(
1402 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1403 if ( ( numPatches > (0.3*availPes) || numPatches > maxNumPatches
1404 ) && twoAwayZ < 0 ) {
1408 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1410 if ( ( numPatches > (0.6*availPes) || numPatches > maxNumPatches
1411 ) && twoAwayY < 0 ) {
1415 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1417 if ( ( numPatches > availPes || numPatches > maxNumPatches
1418 ) && twoAwayX < 0 ) {
1422 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1424 if ( numPatches > availPes && numPatches <= (1.4*availPes) && availPes <= maxNumPatches ) {
1425 int newNumPatches = patchMap->
sizeGrid(
1427 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1428 if ( newNumPatches <= availPes && numPatches <= (1.4*newNumPatches) ) {
1429 iout <<
iINFO <<
"REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" <<
endi;
1430 maxNumPatches = availPes;
1434 patchMap->
makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
1436 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1453 #if (CMK_BLUEGENEP | CMK_BLUEGENEL) && USE_TOPOMAP 1455 int numPes = tmgr.getDimNX() * tmgr.getDimNY() * tmgr.getDimNZ();
1456 if (numPes > patchMap->
numPatches() && (assignPatchesTopoGridRecBisection() > 0)) {
1457 CkPrintf (
"Blue Gene/L topology partitioner finished successfully \n");
1461 assignPatchesSpaceFillingCurve();
1463 int *nAtoms =
new int[nNodes];
1466 for(i=0; i < nNodes; i++)
1476 #ifdef MEM_OPT_VERSION 1477 numAtoms += patchMap->numAtoms(i);
1478 nAtoms[patchMap->
node(i)] += patchMap->numAtoms(i);
1480 if (patchMap->
patch(i)) {
1487 if ( numAtoms !=
Node::Object()->molecule->numAtoms ) {
1488 for(i=0; i < nNodes; i++)
1489 iout <<
iINFO << nAtoms[i] <<
" atoms assigned to node " << i <<
"\n" <<
endi;
1491 NAMD_die(
"Incorrect atom count in WorkDistrib::assignNodeToPatch\n");
1533 void WorkDistrib::assignPatchesToLowestLoadNode()
1536 int assignedNode = 0;
1538 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1539 Node *node = nd.ckLocalBranch();
1546 int *load =
new int[ncpus];
1547 int *assignedNodes =
new int[patchMap->
numPatches()];
1548 for (
int i=0; i<ncpus; i++) {
1551 CkPrintf(
"assignPatchesToLowestLoadNode\n");
1552 int defaultNode = 0;
1553 if (
simParams->noPatchesOnZero && ncpus > 1 ){
1555 if(
simParams->noPatchesOnOne && ncpus > 2)
1559 for(pid=0; pid < patchMap->
numPatches(); pid++) {
1560 assignedNode = defaultNode;
1561 for (
int i=assignedNode + 1; i < ncpus; i++) {
1562 if (load[i] < load[assignedNode]) assignedNode = i;
1564 assignedNodes[pid] = assignedNode;
1565 #ifdef MEM_OPT_VERSION 1566 load[assignedNode] += patchMap->numAtoms(pid) + 1;
1573 sortNodesAndAssign(assignedNodes);
1574 delete[] assignedNodes;
1578 void WorkDistrib::assignPatchesBitReversal()
1582 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1583 Node *node = nd.ckLocalBranch();
1591 if ( ncpus <= npatches )
1592 NAMD_bug(
"WorkDistrib::assignPatchesBitReversal called improperly");
1596 for (
int i = 1; i < ncpus; ++i ) {
1601 sortNodesAndAssign(seq.begin());
1602 if ( ncpus > 2*npatches ) sortNodesAndAssign(seq.begin()+npatches, 1);
1620 return ((a1 == a2) && (b1 == b2) && (c1 == c2));
1629 return ( (a1 < a2) || ((a1 == a2) && (b1 < b2)) ||
1630 ((a1 == a2) && (b1 == b2) && (c1 < c2)) );
1634 void WorkDistrib::sortNodesAndAssign(
int *assignedNode,
int baseNodes) {
1640 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1641 Node *node = nd.ckLocalBranch();
1649 for ( i=0; i < nnodes; ++i ) {
1650 allnodes[i].node = i;
1652 for ( pid=0; pid<npatches; ++pid ) {
1654 allnodes[assignedNode[pid]].npatches++;
1655 allnodes[assignedNode[pid]].a_total += patchMap->
index_a(pid);
1656 allnodes[assignedNode[pid]].b_total += patchMap->
index_b(pid);
1657 allnodes[assignedNode[pid]].c_total += patchMap->
index_c(pid);
1660 usednodes.resize(0);
1661 for ( i=0; i < nnodes; ++i ) {
1662 if ( allnodes[i].npatches ) usednodes.add(allnodes[i]);
1666 for ( i=0; i < nnodes; ++i ) {
1668 if ( allnodes[pe].npatches ) allnodes[usednodes[i2++].node].node = pe;
1671 for ( pid=0; pid<npatches; ++pid ) {
1673 if ( ! baseNodes ) {
1674 patchMap->
assignNode(pid, allnodes[assignedNode[pid]].node);
1681 void WorkDistrib::assignPatchesRoundRobin()
1685 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1686 Node *node = nd.ckLocalBranch();
1692 int *assignedNode =
new int[patchMap->
numPatches()];
1694 for(pid=0; pid < patchMap->
numPatches(); pid++) {
1695 assignedNode[pid] = pid % ncpus;
1698 sortNodesAndAssign(assignedNode);
1699 delete [] assignedNode;
1703 void WorkDistrib::assignPatchesRecursiveBisection()
1706 int *assignedNode =
new int[patchMap->
numPatches()];
1713 int usedNodes = numNodes;
1714 int unusedNodes = 0;
1715 CkPrintf(
"assignPatchesRecursiveBisection\n");
1716 if (
simParams->noPatchesOnZero && numNodes > 1 ){
1718 if(
simParams->noPatchesOnOne && numNodes > 2)
1721 unusedNodes = numNodes - usedNodes;
1723 if ( recBisec.partition(assignedNode) ) {
1724 if ( unusedNodes !=0 ) {
1725 for (
int i=0; i<patchMap->
numPatches(); ++i ) {
1726 assignedNode[i] += unusedNodes;
1729 sortNodesAndAssign(assignedNode);
1730 delete [] assignedNode;
1735 delete [] assignedNode;
1738 <<
"WorkDistrib: Recursive bisection fails, " 1739 <<
"invoking space-filling curve algorithm\n";
1740 assignPatchesSpaceFillingCurve();
1751 return CmiGetFirstPeOnPhysicalNode(CmiPhysicalNodeID(pe));
1755 int na=
tmgr.getDimNA();
1756 int nb=
tmgr.getDimNB();
1757 int nc=
tmgr.getDimNC();
1758 int nd=
tmgr.getDimND();
1759 int ne=
tmgr.getDimNE();
1761 int na=
tmgr.getDimNX();
1762 int nb=
tmgr.getDimNY();
1763 int nc=
tmgr.getDimNZ();
1772 for (
int i=0; i<na; ++i ) { a_flags[i] = 0; }
1773 for (
int i=0; i<nb; ++i ) { b_flags[i] = 0; }
1774 for (
int i=0; i<nc; ++i ) { c_flags[i] = 0; }
1775 for (
int i=0; i<nd; ++i ) { d_flags[i] = 0; }
1776 for (
int i=0; i<ne; ++i ) { e_flags[i] = 0; }
1777 int npes = CkNumPes();
1778 for (
int pe=0; pe<npes; ++pe ) {
1781 tmgr.rankToCoordinates(
fixpe(pe),a,b,c,d,e,t);
1783 tmgr.rankToCoordinates(
fixpe(pe),a,b,c,t);
1786 if ( a < 0 || a >= na )
NAMD_bug(
"inconsistent torus topology!");
1787 if ( b < 0 || b >= nb )
NAMD_bug(
"inconsistent torus topology!");
1788 if ( c < 0 || c >= nc )
NAMD_bug(
"inconsistent torus topology!");
1789 if ( d < 0 || d >= nd )
NAMD_bug(
"inconsistent torus topology!");
1790 if ( e < 0 || e >= ne )
NAMD_bug(
"inconsistent torus topology!");
1797 iout <<
iINFO <<
"TORUS A SIZE " << na <<
" USING";
1798 for (
int i=0; i<na; ++i ) {
if ( a_flags[i] )
iout <<
" " << i; }
1800 iout <<
iINFO <<
"TORUS B SIZE " << nb <<
" USING";
1801 for (
int i=0; i<nb; ++i ) {
if ( b_flags[i] )
iout <<
" " << i; }
1803 iout <<
iINFO <<
"TORUS C SIZE " << nc <<
" USING";
1804 for (
int i=0; i<nc; ++i ) {
if ( c_flags[i] )
iout <<
" " << i; }
1807 iout <<
iINFO <<
"TORUS D SIZE " << nd <<
" USING";
1808 for (
int i=0; i<nd; ++i ) {
if ( d_flags[i] )
iout <<
" " << i; }
1810 iout <<
iINFO <<
"TORUS E SIZE " << ne <<
" USING";
1811 for (
int i=0; i<ne; ++i ) {
if ( e_flags[i] )
iout <<
" " << i; }
1818 if (
tmgr.absA(na) == 0 )
1820 if (
tmgr.absX(na) == 0 )
1822 for (
int i=0, gaplen=0, gapstart=0; i<2*na; ++i ) {
1823 if ( a_flags[i%na] ) gapstart = i+1;
1824 else if ( i - gapstart >= gaplen ) {
1825 a_rot = 2*na-i-1; gaplen = i - gapstart;
1829 if (
tmgr.absB(nb) == 0 )
1831 if (
tmgr.absY(nb) == 0 )
1833 for (
int i=0, gaplen=0, gapstart=0; i<2*nb; ++i ) {
1834 if ( b_flags[i%nb] ) gapstart = i+1;
1835 else if ( i - gapstart >= gaplen ) {
1836 b_rot = 2*nb-i-1; gaplen = i - gapstart;
1840 if (
tmgr.absC(nc) == 0 )
1842 if (
tmgr.absZ(nc) == 0 )
1844 for (
int i=0, gaplen=0, gapstart=0; i<2*nc; ++i ) {
1845 if ( c_flags[i%nc] ) gapstart = i+1;
1846 else if ( i - gapstart >= gaplen ) {
1847 c_rot = 2*nc-i-1; gaplen = i - gapstart;
1851 if (
tmgr.absD(nd) == 0 )
1852 for (
int i=0, gaplen=0, gapstart=0; i<2*nd; ++i ) {
1853 if ( d_flags[i%nd] ) gapstart = i+1;
1854 else if ( i - gapstart >= gaplen ) {
1855 d_rot = 2*nd-i-1; gaplen = i - gapstart;
1858 if (
tmgr.absE(ne) == 0 )
1859 for (
int i=0, gaplen=0, gapstart=0; i<2*ne; ++i ) {
1860 if ( e_flags[i%ne] ) gapstart = i+1;
1861 else if ( i - gapstart >= gaplen ) {
1862 e_rot = 2*ne-i-1; gaplen = i - gapstart;
1867 int a_min=na, a_max=-1;
1868 int b_min=nb, b_max=-1;
1869 int c_min=nc, c_max=-1;
1870 int d_min=nd, d_max=-1;
1871 int e_min=ne, e_max=-1;
1872 for (
int pe=0; pe<npes; ++pe ) {
1875 tmgr.rankToCoordinates(
fixpe(pe),a,b,c,d,e,t);
1877 tmgr.rankToCoordinates(
fixpe(pe),a,b,c,t);
1885 if ( a < a_min ) a_min = a;
1886 if ( b < b_min ) b_min = b;
1887 if ( c < c_min ) c_min = c;
1888 if ( d < d_min ) d_min = d;
1889 if ( e < e_min ) e_min = e;
1890 if ( a > a_max ) a_max = a;
1891 if ( b > b_max ) b_max = b;
1892 if ( c > c_max ) c_max = c;
1893 if ( d > d_max ) d_max = d;
1894 if ( e > e_max ) e_max = e;
1896 int a_len = a_max - a_min + 1;
1897 int b_len = b_max - b_min + 1;
1898 int c_len = c_max - c_min + 1;
1899 int d_len = d_max - d_min + 1;
1900 int e_len = e_max - e_min + 1;
1902 lensort[0] = (a_len << 3) + 0;
1903 lensort[1] = (b_len << 3) + 1;
1904 lensort[2] = (c_len << 3) + 2;
1905 lensort[3] = (d_len << 3) + 3;
1906 lensort[4] = (e_len << 3) + 4;
1908 std::sort(lensort, lensort+5);
1910 for (
int i=0; i<5; ++i ) {
if ( (lensort[i] & 7) == 0 )
a_dim = 4-i; }
1911 for (
int i=0; i<5; ++i ) {
if ( (lensort[i] & 7) == 1 )
b_dim = 4-i; }
1912 for (
int i=0; i<5; ++i ) {
if ( (lensort[i] & 7) == 2 )
c_dim = 4-i; }
1913 for (
int i=0; i<5; ++i ) {
if ( (lensort[i] & 7) == 3 )
d_dim = 4-i; }
1914 for (
int i=0; i<5; ++i ) {
if ( (lensort[i] & 7) == 4 )
e_dim = 4-i; }
1916 if ( a_len >= b_len && a_len >= c_len ) {
1918 if ( b_len >= c_len ) {
1923 }
else if ( b_len >= a_len && b_len >= c_len ) {
1925 if ( a_len >= c_len ) {
1932 if ( a_len >= b_len ) {
1939 iout <<
iINFO <<
"TORUS MINIMAL MESH SIZE IS " << a_len <<
" BY " << b_len <<
" BY " << c_len
1941 <<
" BY " << d_len <<
" BY " << e_len
1949 tmgr.rankToCoordinates(
fixpe(pe),a,b,c,d,e,t);
1951 tmgr.rankToCoordinates(
fixpe(pe),a,b,c,t);
1970 int crds1[3], crds2[3];
1973 for (
int i=0; i<3; ++i ) {
1975 if ( crds1[d] != crds2[d] )
return ( crds1[d] < crds2[d] );
1978 return ( index[pe1] < index[pe2] );
1982 if ( node_begin == node_end )
return node_begin;
1983 int tmins[3], tmaxs[3], tlens[3], sortdims[3];
1984 coords(*node_begin, tmins);
1985 coords(*node_begin, tmaxs);
1986 for (
int *peitr = node_begin; peitr != node_end; ++peitr ) {
1989 for (
int i=0; i<3; ++i ) {
1990 if ( tvals[i] < tmins[i] ) tmins[i] = tvals[i];
1991 if ( tvals[i] > tmaxs[i] ) tmaxs[i] = tvals[i];
1994 for (
int i=0; i<3; ++i ) {
1995 tlens[i] = tmaxs[i] - tmins[i];
1997 sortdims[0] = splitdim;
1998 for (
int i=0, j=0; i<3; ++i ) {
1999 if ( i != splitdim ) sortdims[++j] = i;
2001 if ( tlens[sortdims[1]] < tlens[sortdims[2]] ) {
2002 int tmp = sortdims[1];
2003 sortdims[1] = sortdims[2];
2007 int *nodes = node_begin;
2008 int nnodes = node_end - node_begin;
2011 int c_split =
coord(nodes[0],splitdim);
2012 for (
int i=0; i<nnodes; ++i ) {
2013 if (
coord(nodes[i],splitdim) != c_split ) {
2014 int mid = (nnodes+1)/2;
2015 if ( abs(i-mid) < abs(i_split-mid) ) {
2017 c_split =
coord(i,splitdim);
2023 for (
int i=0; i<nnodes; ++i ) {
2024 if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
2025 int mid = (nnodes+1)/2;
2026 if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2030 return ( node_begin + i_split );
2040 if ( a1 < a2 )
return true;
2041 if ( a1 > a2 )
return false;
2042 int dir = ( (a1 & 1) ? -1 : 1 );
2045 if ( b1 * dir < b2 * dir )
return true;
2046 if ( b1 * dir > b2 * dir )
return false;
2047 dir *= ( (b1 & 1) ? -1 : 1 );
2050 if ( c1 * dir < c2 * dir )
return true;
2061 if ( a1 < a2 )
return true;
2062 if ( a1 > a2 )
return false;
2063 int dir = ( (a1 & 1) ? -1 : 1 );
2066 if ( b1 * dir < b2 * dir )
return true;
2067 if ( b1 * dir > b2 * dir )
return false;
2068 dir *= ( (b1 & 1) ? -1 : 1 );
2071 if ( c1 * dir < c2 * dir )
return true;
2082 if ( a1 < a2 )
return true;
2083 if ( a1 > a2 )
return false;
2084 int dir = ( (a1 & 1) ? -1 : 1 );
2087 if ( b1 * dir < b2 * dir )
return true;
2088 if ( b1 * dir > b2 * dir )
return false;
2089 dir *= ( (b1 & 1) ? -1 : 1 );
2092 if ( c1 * dir < c2 * dir )
return true;
2098 int *patch_begin,
int *patch_end,
2099 int *node_begin,
int *node_end,
2101 double *sortedLoads,
2108 int *patches = patch_begin;
2109 int npatches = patch_end - patch_begin;
2110 int *nodes = node_begin;
2111 int nnodes = node_end - node_begin;
2114 const int emptyPatchLoad =
simParams->emptyPatchLoad;
2115 double totalRawLoad = 0;
2116 for (
int i=0; i<npatches; ++i ) {
2118 #ifdef MEM_OPT_VERSION 2119 double load = patchMap->numAtoms(pid) + emptyPatchLoad;
2123 patchLoads[pid] = load;
2124 sortedLoads[i] = load;
2125 totalRawLoad += load;
2127 std::sort(sortedLoads,sortedLoads+npatches);
2131 double maxPatchLoad = 1;
2132 for (
int i=0; i<npatches; ++i ) {
2133 double load = sortedLoads[i];
2134 double total = sumLoad + (npatches-i) * load;
2135 if ( nnodes * load > total )
break;
2137 maxPatchLoad = load;
2139 double totalLoad = 0;
2140 for (
int i=0; i<npatches; ++i ) {
2142 if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
2143 totalLoad += patchLoads[pid];
2145 if ( nnodes * maxPatchLoad > totalLoad )
2146 NAMD_bug(
"algorithm failure in WorkDistrib recursive_bisect_with_curve()");
2148 int a_len, b_len, c_len;
2149 int a_min, b_min, c_min;
2151 a_min = patchMap->
index_a(patches[0]);
2152 b_min = patchMap->
index_b(patches[0]);
2153 c_min = patchMap->
index_c(patches[0]);
2157 for (
int i=1; i<npatches; ++i ) {
2158 int a = patchMap->
index_a(patches[i]);
2159 int b = patchMap->
index_b(patches[i]);
2160 int c = patchMap->
index_c(patches[i]);
2161 if ( a < a_min ) a_min = a;
2162 if ( b < b_min ) b_min = b;
2163 if ( c < c_min ) c_min = c;
2164 if ( a > a_max ) a_max = a;
2165 if ( b > b_max ) b_max = b;
2166 if ( c > c_max ) c_max = c;
2168 a_len = a_max - a_min;
2169 b_len = b_max - b_min;
2170 c_len = c_max - c_min;
2173 int *node_split = node_begin;
2175 if (
simParams->disableTopology ) ;
else 2176 if ( a_len >= b_len && a_len >= c_len ) {
2178 }
else if ( b_len >= a_len && b_len >= c_len ) {
2180 }
else if ( c_len >= a_len && c_len >= b_len ) {
2184 if ( node_split == node_begin ) {
2189 for (
int i=0; i<nnodes; ++i ) {
2190 if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
2191 int mid = (nnodes+1)/2;
2192 if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2196 node_split = node_begin + i_split;
2199 bool final_patch_sort =
false;
2201 if ( node_split == node_begin ) {
2203 nnodes == CmiNumPesOnPhysicalNode(CmiPhysicalNodeID(*node_begin)) ) {
2205 tmgr.
coords(*node_begin, crds);
2206 CkPrintf(
"WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2207 CmiPhysicalNodeID(*node_begin), *node_begin,
2208 CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
2209 a_min, b_min, c_min, npatches,
2210 a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2214 final_patch_sort =
true;
2218 for (
int i=0; i<nnodes; ++i ) {
2219 if ( CmiNodeOf(nodes[i_split]) != CmiNodeOf(nodes[i]) ) {
2220 int mid = (nnodes+1)/2;
2221 if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2225 node_split = node_begin + i_split;
2228 if ( node_split == node_begin ) {
2230 nnodes == CmiNodeSize(CmiNodeOf(*node_begin)) ) {
2232 tmgr.
coords(*node_begin, crds);
2233 CkPrintf(
"WorkDistrib: node %5d pe %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2234 CmiNodeOf(*node_begin), *node_begin, npatches,
2235 a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2239 node_split = node_begin + nnodes/2;
2242 if ( nnodes == 1 ) {
2244 int *node = node_begin;
2246 for (
int i=0; i < npatches; ++i ) {
2247 int pid = patches[i];
2248 assignedNode[pid] = *node;
2249 sumLoad += patchLoads[pid];
2250 if ( 0 ) CkPrintf(
"assign %5d node %5d patch %5d %5d %5d load %7f total %7f\n",
2255 patchLoads[pid], sumLoad);
2261 if ( final_patch_sort ) {
2264 }
else if ( a_len >= b_len && a_len >= c_len ) {
2265 if ( 0 ) CkPrintf(
"sort a\n");
2267 }
else if ( b_len >= a_len && b_len >= c_len ) {
2268 if ( 0 ) CkPrintf(
"sort b\n");
2270 }
else if ( c_len >= a_len && c_len >= b_len ) {
2271 if ( 0 ) CkPrintf(
"sort c\n");
2277 int *node = node_begin;
2279 for ( patch_split = patch_begin;
2280 patch_split != patch_end && node != node_split;
2282 sumLoad += patchLoads[*patch_split];
2283 double targetLoad = totalLoad *
2284 ((double)(node-node_begin+1) / (double)nnodes);
2285 if ( 0 ) CkPrintf(
"test %5ld node %5d patch %5d %5d %5d load %7f target %7f\n",
2286 patch_split - patch_begin, *node,
2287 patchMap->
index_a(*patch_split),
2288 patchMap->
index_b(*patch_split),
2289 patchMap->
index_c(*patch_split),
2290 sumLoad, targetLoad);
2291 double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
2292 if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
2294 double targetLoad = totalLoad *
2295 ((double)(node_split-node_begin) / (double)nnodes);
2296 if ( 0 ) CkPrintf(
"split node %5ld/%5d patch %5ld/%5d load %7f target %7f\n",
2297 node_split-node_begin, nnodes,
2298 patch_split-patch_begin, npatches,
2299 sumLoad, targetLoad);
2304 patch_begin, patch_split, node_begin, node_split,
2305 patchLoads, sortedLoads, assignedNode, tmgr);
2307 patch_split, patch_end, node_split, node_end,
2308 patchLoads, sortedLoads, assignedNode, tmgr);
2312 void WorkDistrib::assignPatchesSpaceFillingCurve()
2316 const int numPatches = patchMap->
numPatches();
2317 int *assignedNode =
new int[numPatches];
2323 NAMD_die(
"simulateInitialMapping not supported by assignPatchesSpaceFillingCurve()");
2328 for (
int i=0; i<numPatches; ++i ) {
2329 patchOrdering[i] = i;
2333 nodeOrdering.resize(0);
2334 for (
int i=0; i<numNodes; ++i ) {
2336 if (
simParams->noPatchesOnZero && numNodes > 1 ) {
2337 if ( pe == 0 )
continue;
2338 if(
simParams->noPatchesOnOne && numNodes > 2) {
2339 if ( pe == 1 )
continue;
2342 #ifdef MEM_OPT_VERSION 2347 nodeOrdering.add(pe);
2348 if ( 0 ) CkPrintf(
"using pe %5d\n", pe);
2351 int *node_begin = nodeOrdering.begin();
2352 int *node_end = nodeOrdering.end();
2353 if ( nodeOrdering.size() > numPatches ) {
2354 node_end = node_begin + numPatches;
2356 std::sort(node_begin, node_end, pe_sortop_compact());
2358 int *basenode_begin = node_begin;
2359 int *basenode_end = node_end;
2360 if ( nodeOrdering.size() > 2*numPatches ) {
2361 basenode_begin = node_end;
2362 basenode_end = basenode_begin + numPatches;
2363 std::sort(basenode_begin, basenode_end, pe_sortop_compact());
2367 iout <<
iWARN <<
"IGNORING TORUS TOPOLOGY DURING PATCH PLACEMENT\n" <<
endi;
2371 patchOrdering.begin(), patchOrdering.end(),
2372 node_begin, node_end,
2373 patchLoads.begin(), sortedLoads.begin(), assignedNode, tmgr);
2375 std::sort(node_begin, node_end, pe_sortop_compact());
2377 int samenodecount = 0;
2379 for (
int pid=0; pid<numPatches; ++pid ) {
2380 int node = assignedNode[pid];
2382 int nodeidx = std::lower_bound(node_begin, node_end, node,
2383 pe_sortop_compact()) - node_begin;
2384 int basenode = basenode_begin[nodeidx];
2386 if ( CmiPeOnSamePhysicalNode(node,basenode) ) ++samenodecount;
2389 iout <<
iINFO <<
"Placed " << (samenodecount*100./numPatches) <<
"% of base nodes on same physical node as patch\n" <<
endi;
2391 delete [] assignedNode;
2399 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2400 Node *node = nd.ckLocalBranch();
2402 DebugM(3,
"Mapping computes\n");
2411 mapComputeHomePatches(computeDPMTAType);
2413 NAMD_die(
"This binary does not include DPMTA (FMA).");
2418 mapComputeHomePatches(computeDPMEType);
2425 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 2439 DebugM(2,
"adding ComputeGlobal\n");
2457 #ifdef CHARM_HAS_MSA 2468 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 2471 mapComputeNode(computeBondedCUDAType);
2476 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 2486 mapComputeNonbonded();
2552 CkPrintf(
"ComputeMap has been loaded from %s.\n",
simParams->computeMapFilename);
2557 void WorkDistrib::mapComputeHomeTuples(
ComputeType type)
2561 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2562 Node *node = nd.ckLocalBranch();
2570 char *isBaseNode =
new char[numNodes];
2571 memset(isBaseNode,0,numNodes*
sizeof(
char));
2574 for(
int j=0; j<numPatches; j++) {
2575 isBaseNode[patchMap->
basenode(j)] = 1;
2578 for(
int i=0; i<numNodes; i++) {
2579 if ( isBaseNode[i] ) {
2584 delete [] isBaseNode;
2588 void WorkDistrib::mapComputeHomePatches(
ComputeType type)
2592 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2593 Node *node = nd.ckLocalBranch();
2601 for(
int i=0; i<numNodes; i++) {
2609 void WorkDistrib::mapComputePatch(
ComputeType type)
2620 computeMap->
newPid(cid,i);
2627 void WorkDistrib::mapComputeNode(
ComputeType type)
2635 int ncpus = CkNumPes();
2641 for(
int i=0; i<ncpus; i++) {
2648 void WorkDistrib::mapComputeNonbonded(
void)
2656 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2657 Node *node = nd.ckLocalBranch();
2659 int ncpus = CkNumPes();
2660 int nodesize = CkMyNodeSize();
2663 nodesize =
simParams->simulatedNodeSize;
2674 double partScaling = 1.0;
2675 if ( ncpus < patchMap->numPatches() ) {
2676 partScaling = ((double)ncpus) / ((double)patchMap->
numPatches());
2682 int numPartitions = 1;
2685 #ifdef MEM_OPT_VERSION 2686 int64 numFixed = patchMap->numFixedAtoms(i);
2687 int64 numAtoms = patchMap->numAtoms(i);
2695 numPartitions = (int) ( partScaling * ( 0.5 +
2696 (numAtoms*numAtoms-numFixed*numFixed) / (
double)(2*divide*divide) ) );
2698 if (numPartitions < 1) numPartitions = 1;
2702 DebugM(4,
"Mapping " << numPartitions <<
" ComputeNonbondedSelf objects for patch " << i <<
"\n");
2717 computeMap->
newPid(cid,i);
2722 for(
int p1=0; p1 <patchMap->
numPatches(); p1++)
2726 for(j=0;j<numNeighbors;j++)
2728 int p2 = oneAway[j];
2729 int dsp = oneAwayDownstream[j];
2731 int numPartitions = 1;
2734 #ifdef MEM_OPT_VERSION 2735 int64 numAtoms1 = patchMap->numAtoms(p1);
2736 int64 numAtoms2 = patchMap->numAtoms(p2);
2737 int64 numFixed1 = patchMap->numFixedAtoms(p1);
2738 int64 numFixed2 = patchMap->numFixedAtoms(p2);
2747 const int t2 = oneAwayTrans[j];
2754 const int ia1 = patchMap->
index_a(p1);
2756 const int ib1 = patchMap->
index_b(p1);
2758 const int ic1 = patchMap->
index_c(p1);
2761 if ( abs(ia2-ia1) > nax ||
2762 abs(ib2-ib1) > nay ||
2763 abs(ic2-ic1) > naz )
2764 NAMD_bug(
"Bad patch distance in WorkDistrib::mapComputeNonbonded");
2767 if ( ia1 == ia2 ) --distance;
2768 else if ( ia1 == ia2 + nax - 1 ) --distance;
2769 else if ( ia1 + nax - 1 == ia2 ) --distance;
2770 if ( ib1 == ib2 ) --distance;
2771 else if ( ib1 == ib2 + nay - 1 ) --distance;
2772 else if ( ib1 + nay - 1 == ib2 ) --distance;
2773 if ( ic1 == ic2 ) --distance;
2774 else if ( ic1 == ic2 + naz - 1 ) --distance;
2775 else if ( ic1 + naz - 1 == ic2 ) --distance;
2777 if ( distance == 0 ) {
2779 }
else if (distance == 1) {
2785 numPartitions = (int) ( partScaling * ( 0.5 +
2786 (numAtoms1*numAtoms2-numFixed1*numFixed2)/(
double)(divide*divide) ) );
2788 if ( numPartitions < 1 ) numPartitions = 1;
2798 computeMap->
newPid(cid,p1);
2799 computeMap->
newPid(cid,p2,oneAwayTrans[j]);
2800 patchMap->
newCid(p1,cid);
2801 patchMap->
newCid(p2,cid);
2808 void WorkDistrib::mapComputeLCPO(
void) {
2813 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2814 Node *node = nd.ckLocalBranch();
2816 int ncpus = CkNumPes();
2817 int nodesize = CkMyNodeSize();
2818 const int maxPatches = 8;
2820 int numPatchesInOctet;
2821 PatchID patchesInOctet[maxPatches];
2822 int oneAwayTrans[maxPatches];
2825 int numPartitions = 1;
2841 for (
int p = 0; p < numPatchesInOctet; p++) {
2842 computeMap->
newPid(cid, patchesInOctet[p], oneAwayTrans[p]);
2844 for (
int p = 0; p < numPatchesInOctet; p++) {
2845 patchMap->
newCid(patchesInOctet[p],cid);
2858 NAMD_bug(
"compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
2864 int type = compute->
type();
2865 int cid = compute->
cid;
2867 CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
2871 wdProxy[CkMyPe()].enqueueExcls(msg);
2875 wdProxy[CkMyPe()].enqueueBonds(msg);
2879 wdProxy[CkMyPe()].enqueueAngles(msg);
2883 wdProxy[CkMyPe()].enqueueDihedrals(msg);
2887 wdProxy[CkMyPe()].enqueueImpropers(msg);
2891 wdProxy[CkMyPe()].enqueueThole(msg);
2895 wdProxy[CkMyPe()].enqueueAniso(msg);
2899 wdProxy[CkMyPe()].enqueueCrossterms(msg);
2904 wdProxy[CkMyPe()].enqueueGromacsPair(msg);
2908 wdProxy[CkMyPe()].enqueueLCPO(msg);
2911 switch ( seq % 2 ) {
2914 switch ( gbisPhase ) {
2916 wdProxy[CkMyPe()].enqueueSelfA1(msg);
2919 wdProxy[CkMyPe()].enqueueSelfA2(msg);
2922 wdProxy[CkMyPe()].enqueueSelfA3(msg);
2928 switch ( gbisPhase ) {
2930 wdProxy[CkMyPe()].enqueueSelfB1(msg);
2933 wdProxy[CkMyPe()].enqueueSelfB2(msg);
2936 wdProxy[CkMyPe()].enqueueSelfB3(msg);
2941 NAMD_bug(
"WorkDistrib::messageEnqueueSelf case statement error!");
2945 switch ( seq % 2 ) {
2948 switch ( gbisPhase ) {
2950 wdProxy[CkMyPe()].enqueueWorkA1(msg);
2953 wdProxy[CkMyPe()].enqueueWorkA2(msg);
2956 wdProxy[CkMyPe()].enqueueWorkA3(msg);
2962 switch ( gbisPhase ) {
2964 wdProxy[CkMyPe()].enqueueWorkB1(msg);
2967 wdProxy[CkMyPe()].enqueueWorkB2(msg);
2970 wdProxy[CkMyPe()].enqueueWorkB3(msg);
2975 wdProxy[CkMyPe()].enqueueWorkC(msg);
2978 NAMD_bug(
"WorkDistrib::messageEnqueueWork case statement error!");
2981 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 2985 switch ( gbisPhase ) {
2987 wdProxy[CkMyPe()].enqueueCUDA(msg);
2990 wdProxy[CkMyPe()].enqueueCUDAP2(msg);
2993 wdProxy[CkMyPe()].enqueueCUDAP3(msg);
3002 wdProxy[CkMyPe()].enqueueMIC(msg);
3007 wdProxy[CkMyPe()].enqueuePme(msg);
3009 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 3011 wdProxy[CkMyPe()].enqueuePme(msg);
3015 wdProxy[CkMyPe()].enqueueWork(msg);
3026 NAMD_bug(
"compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
3032 CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3034 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 3036 switch ( gbisPhase ) {
3038 wdProxy[CkMyPe()].finishCUDA(msg);
3041 wdProxy[CkMyPe()].finishCUDAP2(msg);
3044 wdProxy[CkMyPe()].finishCUDAP3(msg);
3059 NAMD_bug(
"compute->sequence() < 0 in WorkDistrib::messageFinishMIC");
3065 CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3068 wdProxy[CkMyPe()].finishMIC(msg);
3077 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3083 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3089 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3095 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3101 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3107 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3113 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3119 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3125 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3133 NAMD_bug(
"\nWorkDistrib LocalWorkMsg recycling failed! Check enqueueGromacsPair from WorkDistrib.C\n");
3140 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3146 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3151 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3156 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3161 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3167 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3172 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3177 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3183 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3188 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3193 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3199 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3204 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3209 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3217 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3266 void WorkDistrib::velocities_from_PDB(
const char *filename,
3267 Vector *v,
int totalAtoms)
3273 v_pdb =
new PDB(filename);
3274 if ( v_pdb == NULL )
3276 NAMD_die(
"memory allocation failed in Node::velocities_from_PDB");
3285 sprintf(err_msg,
"FOUND %d COORDINATES IN VELOCITY PDB!!",
3295 for (i=0; i<totalAtoms; i++)
3320 void WorkDistrib::velocities_from_binfile(
const char *fname,
Vector *vels,
int n)
3341 Vector *v,
int totalAtoms)
3351 int lesReduceTemp = lesOn &&
simParams->lesReduceTemp;
3358 for (i=0; i<totalAtoms; i++)
3360 if (structure->
atommass(i) <= 0.) {
3363 kbToverM = sqrt(kbT *
3364 ( lesOn && structure->
get_fep_type(i) ? tempFactor : 1.0 ) /
3377 for (randnum=0.0, j=0; j<12; j++)
3379 randnum += vel_random.uniform();
3384 v[i].
x = randnum*kbToverM;
3386 for (randnum=0.0, j=0; j<12; j++)
3388 randnum += vel_random.uniform();
3393 v[i].
y = randnum*kbToverM;
3395 for (randnum=0.0, j=0; j<12; j++)
3397 randnum += vel_random.uniform();
3402 v[i].
z = randnum*kbToverM;
3405 if (
simParams->drudeOn )
for (i=0; i<totalAtoms; i++) {
3424 void WorkDistrib::remove_com_motion(
Vector *vel,
Molecule *structure,
int n)
3434 mv += mass * vel[i];
3440 iout <<
iINFO <<
"REMOVING COM VELOCITY " 3443 for (i=0; i<n; i++) { vel[i] -= mv; }
3452 int WorkDistrib::assignPatchesTopoGridRecBisection() {
3455 int *assignedNode =
new int[patchMap->
numPatches()];
3462 int usedNodes = numNodes;
3463 CkPrintf(
"assignPatchesTopoGridRecBisection\n");
3464 if (
simParams->noPatchesOnZero && numNodes > 1 ) {
3466 if (
simParams->noPatchesOnOne && numNodes > 2 )
3471 int xsize = 0, ysize = 0, zsize = 0;
3475 xsize = tmgr.getDimNX();
3476 ysize = tmgr.getDimNY();
3477 zsize = tmgr.getDimNZ();
3480 int rc = recBisec.partitionProcGrid(xsize, ysize, zsize, assignedNode);
3482 delete [] assignedNode;
3489 #if defined(NAMD_MIC) 3490 extern void mic_hostDeviceLDB();
3491 extern void mic_contributeHostDeviceLDB(
int idLen,
int *
id);
3492 extern void mic_setDeviceLDBParams(
int dt,
int hs,
int sp1,
int pp1,
int pp2);
3496 #if defined(NAMD_MIC) 3497 CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3498 wdProxy.initHostDeviceLDB();
3503 #if defined(NAMD_MIC) 3504 mic_hostDeviceLDB();
3509 #if defined(NAMD_MIC) 3510 CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3511 wdProxy[0].contributeHostDeviceLDB(peSetLen, peSet);
3516 #if defined(NAMD_MIC) 3517 mic_contributeHostDeviceLDB(peSetLen, peSet);
3522 #if defined(NAMD_MIC) 3523 CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3524 wdProxy.setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
3529 #if defined(NAMD_MIC) 3530 mic_setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
3535 #include "WorkDistrib.def.h"
Real atomcharge(int anum) const
bool operator()(int p1, int p2) const
void setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2)
void enqueueMIC(LocalWorkMsg *msg)
std::ostream & iINFO(std::ostream &s)
static void sortPmePes(int *pmepes, int xdim, int ydim)
ScaledPosition center(int pid) const
Bool simulateInitialMapping
static void messageFinishMIC(Compute *)
bool operator()(int a, int b) const
int isSendSpanningTreeUnset()
patch_sortop_curve_b(PatchMap *m)
void enqueueAngles(LocalWorkMsg *msg)
static void messageFinishCUDA(Compute *)
int getNumFixedAtoms() const
PatchID assignToPatch(Position p, const Lattice &l)
void setNewNumPartitions(ComputeID cid, char numPartitions)
static bool less_than_bit_reversed(int a, int b)
static void recursive_bisect_with_curve(int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)
void saveComputeMap(const char *fname)
static ProxyMgr * Object()
NAMD_HOST_DEVICE int c_p() const
static int * peCompactOrdering
BigReal max_a(int pid) const
void finishCUDAP3(LocalWorkMsg *msg)
void enqueueCrossterms(LocalWorkMsg *msg)
bool operator()(int p1, int p2) const
static void partition(int *order, const FullAtom *atoms, int begin, int end)
int isRecvSpanningTreeUnset()
void enqueuePme(LocalWorkMsg *msg)
static PatchMap * Object()
void enqueueWorkA3(LocalWorkMsg *msg)
void enqueueWork(LocalWorkMsg *msg)
void enqueueGromacsPair(LocalWorkMsg *msg)
void enqueueSelfA1(LocalWorkMsg *msg)
void finishCUDAP2(LocalWorkMsg *msg)
static void send_contributeHostDeviceLDB(int peSetLen, int *peSet)
SimParameters * simParameters
void loadComputeMap(const char *fname)
Bool CUDASOAintegrateMode
void createHomePatch(PatchID pid, FullAtomList &a)
void sendAtoms(PatchID pid, FullAtomList &a)
void enqueueExcls(LocalWorkMsg *msg)
void enqueueBonds(LocalWorkMsg *msg)
std::ostream & endi(std::ostream &s)
void enqueueAniso(LocalWorkMsg *msg)
void enqueueSelfB1(LocalWorkMsg *msg)
void enqueueWorkB1(LocalWorkMsg *msg)
static void messageEnqueueWork(Compute *)
static void peOrderingReady()
std::ostream & iWARN(std::ostream &s)
int operator==(const nodesort &o) const
MIStream * get(char &data)
int index_a(int pid) const
int sizeGrid(ScaledPosition xmin, ScaledPosition xmax, const Lattice &lattice, BigReal patchSize, double maxNumPatches, int staticAtomAssignment, int asplit, int bsplit, int csplit)
ComputeID storeCompute(int node, int maxPids, ComputeType type, int partition=-1, int numPartitions=0)
Patch * patch(PatchID pid)
void enqueueSelfA3(LocalWorkMsg *msg)
int add(const Elem &elem)
bool operator()(int pe1, int pe2) const
Molecule stores the structural information for the system.
NAMD_HOST_DEVICE int b_p() const
void movePatch(PatchID, NodeID)
LocalWorkMsg *const localWorkMsg
void recvComputeMapChanges(ComputeMapChangeMsg *)
int gridsize_c(void) const
char newNumPartitions(ComputeID cid)
void reorder(Elem *a, int n)
HydrogenGroup hydrogenGroup
void enqueueCUDA(LocalWorkMsg *msg)
void sendComputeMap(void)
void enqueueWorkB2(LocalWorkMsg *msg)
void enqueueCUDAP2(LocalWorkMsg *msg)
void assignBaseNode(PatchID, NodeID)
static void recursive_bisect_coord(int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
void newCid(int pid, int cid)
constexpr int getWaterModelGroupSize(const WaterModel &watmodel)
void enqueueSelfB3(LocalWorkMsg *msg)
int coord(int pe, int dim)
int gridsize_a(void) const
TopoManagerWrapper & tmgr
int numPatches(void) const
static NAMD_HOST_DEVICE int offset_b(int i)
void enqueueWorkC(LocalWorkMsg *msg)
pe_sortop_bit_reversed(int *r)
void reinitAtoms(const char *basename=0)
int operator<(const nodesort &o) const
void enqueueThole(LocalWorkMsg *msg)
void enqueueWorkA2(LocalWorkMsg *msg)
NAMD_HOST_DEVICE Position apply_transform(Position data, const Transform &t) const
void createHomePatches(void)
void NAMD_bug(const char *err_msg)
static NAMD_HOST_DEVICE int offset_c(int i)
void enqueueImpropers(LocalWorkMsg *msg)
BigReal min_c(int pid) const
static int eventMachineProgress
Real langevin_param(int atomnum) const
Index atomvdwtype(int anum) const
int numaway_c(void) const
void enqueueLCPO(LocalWorkMsg *msg)
int oneOrTwoAwayNeighbors(int pid, PatchID *neighbor_ids, PatchID *downstream_ids=0, int *transform_ids=0)
int index_b(int pid) const
Bool staticAtomAssignment
pe_sortop_coord_y(ScaledPosition *s)
Bool replicaUniformPatchGrids
bool operator()(int a, int b) const
void finishCUDA(LocalWorkMsg *msg)
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
int numaway_a(void) const
NAMD_HOST_DEVICE int a_p() const
NAMD_HOST_DEVICE Vector a_r() const
NAMD_HOST_DEVICE Vector b_r() const
void setNewNode(ComputeID cid, NodeID node)
virtual void finishPatch(int)
NAMD_HOST_DEVICE Position nearest(Position data, ScaledPosition ref) const
void NAMD_die(const char *err_msg)
void enqueueCUDAP3(LocalWorkMsg *msg)
static int * peDiffuseOrderingIndex
BigReal min_a(int pid) const
NAMD_HOST_DEVICE Vector c_r() const
Real atommass(int anum) const
static int compare_bit_reversed(int a, int b)
void enqueueWorkA1(LocalWorkMsg *msg)
Bool pressureProfileEwaldOn
std::vector< std::string > split(const std::string &text, std::string delimiter)
static int * peDiffuseOrdering
void makePatches(ScaledPosition xmin, ScaledPosition xmax, const Lattice &lattice, BigReal patchSize, double maxNumPatches, int staticAtomAssignment, int replicaUniformPatchGrids, int lcpo, int asplit, int bsplit, int csplit)
int basenode(int pid) const
int index_c(int pid) const
unsigned char get_fep_type(int anum) const
static int peOrderingInit
void find_extremes(const Lattice &, BigReal frac=1.0)
void saveComputeMapChanges(int, CkGroupID)
int32 status
Atom status bit fields defined in structures.h.
void finishCUDAPatch(FinishWorkMsg *msg)
void savePatchMap(PatchMapMsg *msg)
void topo_getargs(char **argv)
static int * peCompactOrderingIndex
static void buildNodeAwarePeOrdering(void)
patch_sortop_curve_a(PatchMap *m)
int pressureProfileAtomTypes
int atomsInMigrationGroup
void newPid(ComputeID cid, int pid, int trans=13)
static void send_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2)
static NAMD_HOST_DEVICE int offset_a(int i)
BigReal max_b(int pid) const
void enqueueSelfA2(LocalWorkMsg *msg)
static ComputeMap * Object()
static void build_ordering(void *)
int numaway_b(void) const
void distributeHomePatches(void)
void assignNode(PatchID, NodeID)
patch_sortop_curve_c(PatchMap *m)
BigReal max_c(int pid) const
void enqueueSelfB2(LocalWorkMsg *msg)
int gridsize_b(void) const
int numPatchesOnNode(int node)
MOStream * put(char data)
static void send_initHostDeviceLDB()
FullAtomList * createAtomLists(const char *basename=0)
#define SET_PRIORITY(MSG, SEQ, PRIO)
pe_sortop_coord_x(ScaledPosition *s)
void enqueueDihedrals(LocalWorkMsg *msg)
Bool is_atom_fixed(int atomnum) const
void finishMIC(LocalWorkMsg *msg)
StringList * find(const char *name) const
void contributeHostDeviceLDB(int peSetLen, int *peSet)
void pack(char *buf, int size)
int isOutputProcessor(int pe)
void doneSaveComputeMap(CkReductionMsg *)
void unpack(MIStream *msg)
Real rigid_bond_length(int atomnum) const
__thread DeviceCUDA * deviceCUDA
void get_all_positions(Vector *)
BigReal min_b(int pid) const
pe_sortop_topo(TopoManagerWrapper &t, int *d)
void coords(int pe, int *crds)
void enqueueWorkB3(LocalWorkMsg *msg)
NAMD_HOST_DEVICE Vector origin() const
bool operator()(int a, int b) const
Bool noPatchesOnOutputPEs
int * sortAndSplit(int *node_begin, int *node_end, int splitdim)
void sortAtomsForPatches(int *order, int *breaks, const FullAtom *atoms, int nmgrps, int natoms, int ni, int nj, int nk)
bool operator()(int p1, int p2) const
void assignNodeToPatch(void)
int getPatchesInOctet(int pid, PatchID *pids, int *transform_ids=0)
NodeID newNode(ComputeID cid)