27 #include "WorkDistrib.decl.h" 31 #include "main.decl.h" 51 #include "TopoManager.h" 56 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 58 #define __thread __declspec(thread) 64 #define MIN_DEBUG_LEVEL 2 66 #ifdef MEM_OPT_VERSION 94 randtopo = CmiGetArgFlag(argv,
"+randtopo");
95 if ( CkMyPe() >= CkNumPes() )
return;
96 #if CCD_COND_FN_EXISTS 97 CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdCondFn)
build_ordering, (
void*)0);
99 CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)
build_ordering, (
void*)0);
110 CkpvAccess(BOCclass_group).workDistrib = thisgroup;
111 patchMapArrived =
false;
112 computeMapArrived =
false;
115 #define MACHINE_PROGRESS 117 #define MACHINE_PROGRESS { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); } 118 if ( CkMyNodeSize() > 1 )
NAMD_bug(
"CkMyNodeSize() > 1 for non-smp build");
130 if ( d )
while ( ! (d & c) ) {
133 return (a & c) - (b & c);
139 if ( d )
while ( ! (d & c) ) {
150 if ( c < 0 )
return true;
151 if ( c > 0 )
return false;
154 if ( c < 0 )
return true;
155 if ( c > 0 )
return false;
167 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 175 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 189 const int numPhys = CmiNumPhysicalNodes();
190 const int numNode = CmiNumNodes();
191 const int numPe = CmiNumPes();
201 for (
int ph=0; ph<numPhys; ++ph ) {
203 CmiGetPesOnPhysicalNode(ph, &pes, &npes);
204 for (
int i=0; i<npes; ++i, ++k ) {
207 numNodeInPhys[ph] = 0;
208 for (
int i=0, j=0; i<npes; i += CmiNodeSize(CmiNodeOf(pes[i])), ++j ) {
209 rankInPhysOfNode[CmiNodeOf(pes[i])] = j;
210 numNodeInPhys[ph] += 1;
215 if ( ! CkMyNode() ) {
216 iout <<
iWARN <<
"RANDOMIZING PHYSICAL NODE ORDERING\n" <<
endi;
219 for (
int j=0; j<numPhys; ++j ) {
220 randPhysOrder[j] = j;
223 for (
int j=0, k=0; j<numPhys; ++j ) {
224 const int ph = randPhysOrder[j];
226 CmiGetPesOnPhysicalNode(ph, &pes, &npes);
227 for (
int i=0; i<npes; ++i, ++k ) {
233 for (
int i=0; i<numPe; ++i ) {
239 for (
int i=0; i<numPe; ++i ) {
244 if ( 0 && CmiMyNode() == 0 )
for (
int i=0; i<numPe; ++i ) {
245 CkPrintf(
"order %5d %5d %5d %5d %5d\n", i,
276 int x_begin,
int x_end,
int y_begin,
int y_end,
278 int *result,
int ydim
280 int x_len = x_end - x_begin;
281 int y_len = y_end - y_begin;
282 if ( x_len == 1 && y_len == 1 ) {
284 if ( 0 ) CkPrintf(
"pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
285 coord[*pe_begin].x, coord[*pe_begin].y);
286 result[x_begin*ydim + y_begin] = *pe_begin;
289 int *pe_end = pe_begin + x_len * y_len;
290 if ( x_len >= y_len ) {
292 int x_split = x_begin + x_len / 2;
293 int* pe_split = pe_begin + (x_split - x_begin) * y_len;
299 int y_split = y_begin + y_len / 2;
300 int* pe_split = pe_begin + (y_split - y_begin) * x_len;
308 int numpes = CkNumPes();
312 for (
int i=0; i<numpes; ++i ) {
318 for (
int i=0, npatches=patchMap->
numPatches(); i<npatches; ++i ) {
319 int pe = patchMap->
node(i);
321 sumPos[pe] += patchMap->
center(i);
323 const int npmepes = xdim*ydim;
325 for (
int i=0; i<npmepes; ++i ) {
326 int pe = sortpes[i] = pmepes[i];
331 int node = CkNodeOf(pe);
332 int nsize = CkNodeSize(node);
333 int pe2 = CkNodeFirst(node);
334 for (
int j=0; j<nsize; ++j, ++pe2 ) {
341 int node = CmiPhysicalNodeID(pe);
343 CmiGetPesOnPhysicalNode(node, &nlist, &nsize);
344 for (
int j=0; j<nsize; ++j ) {
351 avgPos[pe] = sum / cnt;
361 saveComputeMapReturnEP = ep;
362 saveComputeMapReturnChareID = chareID;
365 CProxy_WorkDistrib(thisgroup).recvComputeMapChanges(mapMsg);
390 for (i=0; i<nc; i++) {
391 int data = computeMap->
newNode(i);
395 for (i=0; i<nc; i++) {
403 }
else if ( ! CkMyRank() ) {
407 if ( i != nc )
NAMD_bug(
"WorkDistrib::recvComputeMapChanges check 1 failed\n");
408 for (i=0; i<nc; i++) {
414 if ( i != nc )
NAMD_bug(
"WorkDistrib::recvComputeMapChanges check 2 failed\n");
415 for (i=0; i<nc; i++) {
421 if ( i != nc )
NAMD_bug(
"WorkDistrib::recvComputeMapChanges check 3 failed\n");
426 CkCallback cb(CkIndex_WorkDistrib::doneSaveComputeMap(NULL), 0, thisgroup);
427 contribute(0, NULL, CkReduction::random, cb);
433 CkSendMsgBranch(saveComputeMapReturnEP, CkAllocMsg(0,0,0), 0, saveComputeMapReturnChareID);
436 #ifdef MEM_OPT_VERSION 441 void WorkDistrib::fillAtomListForOnePatch(
int pid,
FullAtomList &alist){
445 0.5*(patchMap->
min_b(pid)+patchMap->
max_b(pid)),
446 0.5*(patchMap->
min_c(pid)+patchMap->
max_c(pid)));
448 int n = alist.
size();
464 for(
int j=0; j < n; j++)
471 if ( a[j].migrationGroupSize ) {
472 if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
477 for (
int k=a[j].hydrogenGroupSize; k<mgs;
485 pos = lattice.
nearest(pos,center,&mother_transform);
489 a[j].
position = lattice.
nearest(a[j].position, center, &(a[j].transform));
518 }
else if ((a[j].status &
DrudeAtom)!=0) {
533 for(
int j=0; j < n; j+=size) {
536 NAMD_bug(
"Mother atom with hydrogenGroupSize of 0!");
539 for (
int k = 0; k < size; ++k ) {
540 allfixed = ( allfixed && (a[j+k].
atomFixed) );
542 for (
int k = 0; k < size; ++k ) {
548 if (a[j].rigidBondLength > 0) {
549 if (size != wathgsize) {
552 "Water molecule starting with atom %d contains %d atoms " 553 "but the specified water model requires %d atoms.\n",
554 a[j].
id+1, size, wathgsize
559 for (
int k = 0; k < size; k++) {
560 anyfixed += ( fixedAtomsOn && a[j+k].
atomFixed );
562 if (useSettle && !anyfixed) {
563 for (
int k = 0; k < size; k++) {
572 int numAtomsInPatch = n;
573 int numFixedAtomsInPatch = 0;
574 int numAtomsInFixedGroupsInPatch = 0;
575 for(
int j=0; j < n; j++) {
576 numFixedAtomsInPatch += ( a[j].
atomFixed ? 1 : 0 );
577 numAtomsInFixedGroupsInPatch += ( a[j].
groupFixed ? 1 : 0 );
579 iout <<
"PATCH_DETAILS:" 580 <<
" on proc " << CkMyPe()
581 <<
" patch " << patchId
582 <<
" atoms " << numAtomsInPatch
583 <<
" fixed_atoms " << numFixedAtomsInPatch
584 <<
" fixed_groups " << numAtomsInFixedGroupsInPatch
599 int lesReduceTemp = lesOn &&
simParams->lesReduceTemp;
604 int totalAtoms = inAtoms.
size();
605 for(i=0;i<totalAtoms;i++)
607 Real atomMs=inAtoms[i].mass;
619 kbToverM = sqrt(kbT * 1.0 / atomMs);
621 for (randnum=0.0, j=0; j<12; j++)
623 randnum += vel_random.uniform();
628 inAtoms[i].velocity.x = randnum*kbToverM;
630 for (randnum=0.0, j=0; j<12; j++)
632 randnum += vel_random.uniform();
637 inAtoms[i].velocity.y = randnum*kbToverM;
639 for (randnum=0.0, j=0; j<12; j++)
641 randnum += vel_random.uniform();
646 inAtoms[i].velocity.z = randnum*kbToverM;
658 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
659 Node *node = nd.ckLocalBranch();
661 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
662 PatchMgr *patchMgr = pm.ckLocalBranch();
675 read_binary_file((std::string(basename)+
".coor").c_str(), positions, numAtoms);
676 read_binary_file((std::string(basename)+
".vel").c_str(), velocities, numAtoms);
678 PDB coorpdb((std::string(basename)+
".coor").c_str());
680 NAMD_die(
"Incorrect atom count in coordinate pdb file");
683 velocities_from_PDB((std::string(basename)+
".vel").c_str(), velocities, numAtoms);
694 if (current == NULL) {
700 velocities_from_PDB(current->
data, velocities, numAtoms);
703 velocities_from_binfile(current->
data, velocities, numAtoms);
708 random_velocities(params->
initialTemp, molecule, velocities, numAtoms);
714 remove_com_motion(velocities, molecule, numAtoms);
723 for ( i=0; i < numAtoms; i++ ) {
725 if ( ! h.
isMP )
continue;
733 for ( i=0; i < sortAtoms.
size(); i++ ) {
736 int *breaks =
new int[numPatches];
738 sortAtoms.
size(),numAtoms,
744 for (
int pid = 0; pid < numPatches; ++pid ) {
745 int iend = breaks[pid];
746 for ( ; i<iend; ++i ) {
755 for (
int k=0; k<mgs; ++k ) {
775 CkPrintf(
"patch %d (%d %d %d) has %d atoms\n",
785 for(i=0; i < numAtoms; i++)
814 delete [] velocities;
816 for(i=0; i < numPatches; i++)
822 int n = atoms[i].
size();
844 if ( a[j].migrationGroupSize ) {
845 if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
849 for (
int k=a[j].hydrogenGroupSize; k<mgs;
856 pos = lattice.
nearest(pos,center,&mother_transform);
861 a[j].position, center, &(a[j].transform));
874 const int index = a[j].
vdwType;
876 float sigma, epsilon, sigma14, epsilon14;
877 molecule->params->
get_vdw_params(&sigma, &epsilon, &sigma14, &epsilon14, index);
878 a[j].
dispcoef = 2*sigma*sigma*sigma*sqrt(scaling * epsilon);
885 if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
896 int size, allfixed, k;
897 for(j=0; j < n; j+=size) {
900 NAMD_bug(
"Mother atom with hydrogenGroupSize of 0!");
903 for ( k = 0; k < size; ++k ) {
904 allfixed = ( allfixed && (a[j+k].
atomFixed) );
906 for ( k = 0; k < size; ++k ) {
912 if (a[j].rigidBondLength > 0) {
913 for (k = 0; k < size; k++) {
926 for(
int j=0; j < n; j+=size) {
929 NAMD_bug(
"Mother atom with hydrogenGroupSize of 0!");
932 for (
int k = 0; k < size; ++k ) {
933 allfixed = ( allfixed && (a[j+k].
atomFixed) );
935 for (
int k = 0; k < size; ++k ) {
941 if (a[j].rigidBondLength > 0) {
942 if (size != wathgsize) {
945 "Water molecule starting with atom %d contains %d atoms " 946 "but the specified water model requires %d atoms.\n",
947 a[j].
id+1, size, wathgsize
952 for (
int k = 0; k < size; k++) {
953 anyfixed += ( fixedAtomsOn && a[j+k].
atomFixed );
955 if (useSettle && !anyfixed) {
956 for (
int k = 0; k < size; k++) {
966 int numAtomsInPatch = n;
967 int numFixedAtomsInPatch = 0;
968 int numAtomsInFixedGroupsInPatch = 0;
969 for(j=0; j < n; j++) {
970 numFixedAtomsInPatch += ( a[j].
atomFixed ? 1 : 0 );
971 numAtomsInFixedGroupsInPatch += ( a[j].
groupFixed ? 1 : 0 );
973 iout <<
"PATCH_DETAILS:" 974 <<
" patch " << patchId
975 <<
" atoms " << numAtomsInPatch
976 <<
" fixed_atoms " << numFixedAtomsInPatch
977 <<
" fixed_groups " << numAtomsInFixedGroupsInPatch
993 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
994 PatchMgr *patchMgr = pm.ckLocalBranch();
1000 #ifdef MEM_OPT_VERSION 1010 for(i=0; i < numPatches; i++) {
1011 int numAtoms = atoms[i].
size();
1012 if ( numAtoms > maxAtoms ) { maxAtoms = numAtoms; maxPatch = i; }
1014 iout <<
iINFO <<
"LARGEST PATCH (" << maxPatch <<
1015 ") HAS " << maxAtoms <<
" ATOMS\n" <<
endi;
1017 #ifdef SHOW_HISTOGRAM_HGROUP_SIZES 1019 int hgroupsize[9] = { 0 };
1022 int maxhgroupsize = 0;
1023 for (i = 0; i < numPatches; i++) {
1025 int numAtoms = a.
size();
1027 for (
int j = 0; j < numAtoms; j += hgs) {
1028 hgs = a[j].hydrogenGroupSize;
1029 int histndx = ( hgs > 8 ? 8 : hgs );
1030 hgroupsize[ histndx ]++;
1032 if (a[j].
isWater) numwaters++;
1033 if (maxhgroupsize < hgs) maxhgroupsize = hgs;
1036 int hgslast = ( maxhgroupsize > 8 ? 8 : maxhgroupsize );
1037 printf(
"Number of hydrogen groups: %7d\n", numhgroups);
1038 printf(
"Number of settle water molecules: %7d\n", numwaters);
1039 printf(
"Number of remaining hydrogen groups: %7d\n", numhgroups - numwaters);
1040 printf(
"Largest hydrogen group size: %7d\n", maxhgroupsize);
1041 printf(
"Histogram of hydrogen group sizes:\n");
1043 for (i = 0; i <= hgslast; i++) {
1044 printf(
" size %d count %d\n", i, hgroupsize[i]);
1045 hgstotal += hgroupsize[i];
1047 printf(
"Checksum over hydrogen group sizes: %7d\n", hgstotal);
1050 for(i=0; i < numPatches; i++)
1052 if ( ! ( i % 100 ) )
1054 DebugM(3,
"Created " << i <<
" patches so far.\n");
1065 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1066 Node *node = nd.ckLocalBranch();
1067 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1068 PatchMgr *patchMgr = pm.ckLocalBranch();
1075 if (patchMap->
node(i) != node->
myid() )
1077 DebugM(3,
"patchMgr->movePatch(" 1078 << i <<
"," << patchMap->
node(i) <<
")\n");
1088 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1089 PatchMgr *patchMgr = pm.ckLocalBranch();
1095 for(
int i=0; i < numPatches; i++) {
1113 if ( CkNumPes() == 1 ) {
1114 patchMapArrived =
true;
1119 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1120 Node *node = nd.ckLocalBranch();
1123 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1124 || CkNumPes() > CkNumNodes()
1125 ) && ( CkNumNodes() > 1
1130 #ifdef NODEAWARE_PROXY_SPANNINGTREE 1131 if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
1142 CProxy_WorkDistrib workProxy(thisgroup);
1143 workProxy[0].savePatchMap(mapMsg);
1155 if ( CkMyRank() ) patchMapArrived =
true;
1157 if ( patchMapArrived && CkMyPe() ) {
1161 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1162 Node *node = nd.ckLocalBranch();
1165 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1166 || CkNumPes() > CkNumNodes()
1167 ) && ( CkNumNodes() > 1
1172 #ifdef NODEAWARE_PROXY_SPANNINGTREE 1173 if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
1179 if ( patchMapArrived ) {
1180 if ( CkMyRank() + 1 < CkNodeSize(CkMyNode()) ) {
1181 ((CProxy_WorkDistrib(thisgroup))[CkMyPe()+1]).
savePatchMap(msg);
1188 patchMapArrived =
true;
1190 int self = CkMyNode();
1191 int range_begin = 0;
1192 int range_end = CkNumNodes();
1193 while (
self != range_begin ) {
1195 int split = range_begin + ( range_end - range_begin ) / 2;
1197 else { range_begin =
split; }
1199 int send_near =
self + 1;
1200 int send_far = send_near + ( range_end - send_near ) / 2;
1204 if ( send_far < range_end ) pids[npid++] = CkNodeFirst(send_far);
1205 if ( send_near < send_far ) pids[npid++] = CkNodeFirst(send_near);
1206 pids[npid++] = CkMyPe();
1207 CProxy_WorkDistrib(thisgroup).savePatchMap(msg,npid,pids);
1213 if ( CkMyRank() )
return;
1215 if ( CkNumNodes() == 1 ) {
1216 computeMapArrived =
true;
1226 }
else if ( ! CkMyRank() ) {
1232 computeMapArrived =
true;
1241 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1242 Node *node = nd.ckLocalBranch();
1248 #ifndef MEM_OPT_VERSION 1256 double maxNumPatches = 1.e9;
1260 DebugM(3,
"Mapping patches\n");
1261 if ( lattice.
a_p() && lattice.
b_p() && lattice.
c_p() ) {
1262 xmin = 0.; xmax = 0.;
1274 printf(
"+++ center=%.4f %.4f %.4f\n",
1276 printf(
"+++ xmin=%.4f xmax=%.4f\n", xmin.
x, xmax.
x);
1277 printf(
"+++ ymin=%.4f ymax=%.4f\n", xmin.
y, xmax.
y);
1278 printf(
"+++ zmin=%.4f zmax=%.4f\n", xmin.
z, xmax.
z);
1290 iout <<
iINFO <<
"ORIGINAL ATOMS MINMAX IS " << xmin <<
" " << xmax <<
"\n" <<
endi;
1291 double frac = ( (double)totalAtoms - 10000. ) / (double)totalAtoms;
1292 if ( frac < 0.9 ) { frac = 0.9; }
1295 iout <<
iINFO <<
"ADJUSTED ATOMS MINMAX IS " << xmin <<
" " << xmax <<
"\n" <<
endi;
1300 origin_shift = lattice.
a_r() * lattice.
origin();
1301 xmin.
x -= origin_shift;
1302 xmax.
x -= origin_shift;
1303 origin_shift = lattice.
b_r() * lattice.
origin();
1304 xmin.
y -= origin_shift;
1305 xmax.
y -= origin_shift;
1306 origin_shift = lattice.
c_r() * lattice.
origin();
1307 xmin.
z -= origin_shift;
1308 xmax.
z -= origin_shift;
1317 if (params->
LCPOOn && patchSize < 32.4) {
1318 if ( twoAwayX > 0 || twoAwayY > 0 || twoAwayZ > 0 ) {
1319 iout <<
iWARN <<
"Ignoring twoAway[XYZ] due to LCPO SASA implementation.\n" <<
endi;
1321 twoAwayX = twoAwayY = twoAwayZ = 0;
1325 if ( twoAwayX > 0 ) maxNumPatches = 1.e9;
1326 if ( twoAwayY > 0 ) maxNumPatches = 1.e9;
1327 if ( twoAwayZ > 0 ) maxNumPatches = 1.e9;
1330 iout <<
iINFO <<
"LIMITING NUMBER OF PATCHES TO " <<
1331 maxNumPatches <<
"\n" <<
endi;
1334 int numpes = CkNumPes();
1338 delete [] patchMap->nPatchesOnNode;
1339 patchMap->nPatchesOnNode =
new int[numpes];
1340 memset(patchMap->nPatchesOnNode, 0, numpes*
sizeof(
int));
1343 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC) 1346 int numPatches = patchMap->
sizeGrid(
1348 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1349 if ( numPatches < numpes && twoAwayX < 0 ) {
1353 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1355 if ( numPatches < numpes && twoAwayY < 0 ) {
1359 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1361 if ( numPatches < numpes && twoAwayZ < 0 ) {
1365 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1367 if ( numPatches < numpes ) {
1368 #if defined(NAMD_MIC) 1369 NAMD_die(
"MIC-enabled NAMD requires at least one patch per thread.");
1372 NAMD_die(
"GPU-resident NAMD requires at least one patch per thread.");
1376 if ( numPatches % numpes && numPatches <= 1.4 * numpes ) {
1377 int exactFit = numPatches - numPatches % numpes;
1378 int newNumPatches = patchMap->
sizeGrid(
1380 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1381 if ( newNumPatches == exactFit ) {
1382 iout <<
iINFO <<
"REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" <<
endi;
1383 maxNumPatches = exactFit;
1387 patchMap->
makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
1389 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1393 int availPes = numpes;
1399 #ifdef MEM_OPT_VERSION 1412 int numPatches = patchMap->
sizeGrid(
1414 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1415 if ( ( numPatches > (0.3*availPes) || numPatches > maxNumPatches
1416 ) && twoAwayZ < 0 ) {
1420 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1422 if ( ( numPatches > (0.6*availPes) || numPatches > maxNumPatches
1423 ) && twoAwayY < 0 ) {
1427 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1429 if ( ( numPatches > availPes || numPatches > maxNumPatches
1430 ) && twoAwayX < 0 ) {
1434 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1436 if ( numPatches > availPes && numPatches <= (1.4*availPes) && availPes <= maxNumPatches ) {
1437 int newNumPatches = patchMap->
sizeGrid(
1439 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1440 if ( newNumPatches <= availPes && numPatches <= (1.4*newNumPatches) ) {
1441 iout <<
iINFO <<
"REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" <<
endi;
1442 maxNumPatches = availPes;
1446 patchMap->
makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
1448 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1465 #if (CMK_BLUEGENEP | CMK_BLUEGENEL) && USE_TOPOMAP 1467 int numPes = tmgr.getDimNX() * tmgr.getDimNY() * tmgr.getDimNZ();
1468 if (numPes > patchMap->
numPatches() && (assignPatchesTopoGridRecBisection() > 0)) {
1469 CkPrintf (
"Blue Gene/L topology partitioner finished successfully \n");
1473 assignPatchesSpaceFillingCurve();
1475 int *nAtoms =
new int[nNodes];
1478 for(i=0; i < nNodes; i++)
1488 #ifdef MEM_OPT_VERSION 1489 numAtoms += patchMap->numAtoms(i);
1490 nAtoms[patchMap->
node(i)] += patchMap->numAtoms(i);
1492 if (patchMap->
patch(i)) {
1499 if ( numAtoms !=
Node::Object()->molecule->numAtoms ) {
1500 for(i=0; i < nNodes; i++)
1501 iout <<
iINFO << nAtoms[i] <<
" atoms assigned to node " << i <<
"\n" <<
endi;
1503 NAMD_die(
"Incorrect atom count in WorkDistrib::assignNodeToPatch\n");
1545 void WorkDistrib::assignPatchesToLowestLoadNode()
1548 int assignedNode = 0;
1550 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1551 Node *node = nd.ckLocalBranch();
1558 int *load =
new int[ncpus];
1559 int *assignedNodes =
new int[patchMap->
numPatches()];
1560 for (
int i=0; i<ncpus; i++) {
1563 CkPrintf(
"assignPatchesToLowestLoadNode\n");
1564 int defaultNode = 0;
1565 if (
simParams->noPatchesOnZero && ncpus > 1 ){
1567 if(
simParams->noPatchesOnOne && ncpus > 2)
1571 for(pid=0; pid < patchMap->
numPatches(); pid++) {
1572 assignedNode = defaultNode;
1573 for (
int i=assignedNode + 1; i < ncpus; i++) {
1574 if (load[i] < load[assignedNode]) assignedNode = i;
1576 assignedNodes[pid] = assignedNode;
1577 #ifdef MEM_OPT_VERSION 1578 load[assignedNode] += patchMap->numAtoms(pid) + 1;
1585 sortNodesAndAssign(assignedNodes);
1586 delete[] assignedNodes;
1590 void WorkDistrib::assignPatchesBitReversal()
1594 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1595 Node *node = nd.ckLocalBranch();
1603 if ( ncpus <= npatches )
1604 NAMD_bug(
"WorkDistrib::assignPatchesBitReversal called improperly");
1608 for (
int i = 1; i < ncpus; ++i ) {
1613 sortNodesAndAssign(seq.begin());
1614 if ( ncpus > 2*npatches ) sortNodesAndAssign(seq.begin()+npatches, 1);
1632 return ((a1 == a2) && (b1 == b2) && (c1 == c2));
1641 return ( (a1 < a2) || ((a1 == a2) && (b1 < b2)) ||
1642 ((a1 == a2) && (b1 == b2) && (c1 < c2)) );
1646 void WorkDistrib::sortNodesAndAssign(
int *assignedNode,
int baseNodes) {
1652 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1653 Node *node = nd.ckLocalBranch();
1661 for ( i=0; i < nnodes; ++i ) {
1662 allnodes[i].node = i;
1664 for ( pid=0; pid<npatches; ++pid ) {
1666 allnodes[assignedNode[pid]].npatches++;
1667 allnodes[assignedNode[pid]].a_total += patchMap->
index_a(pid);
1668 allnodes[assignedNode[pid]].b_total += patchMap->
index_b(pid);
1669 allnodes[assignedNode[pid]].c_total += patchMap->
index_c(pid);
1672 usednodes.resize(0);
1673 for ( i=0; i < nnodes; ++i ) {
1674 if ( allnodes[i].npatches ) usednodes.add(allnodes[i]);
1678 for ( i=0; i < nnodes; ++i ) {
1680 if ( allnodes[pe].npatches ) allnodes[usednodes[i2++].node].node = pe;
1683 for ( pid=0; pid<npatches; ++pid ) {
1685 if ( ! baseNodes ) {
1686 patchMap->
assignNode(pid, allnodes[assignedNode[pid]].node);
1693 void WorkDistrib::assignPatchesRoundRobin()
1697 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1698 Node *node = nd.ckLocalBranch();
1704 int *assignedNode =
new int[patchMap->
numPatches()];
1706 for(pid=0; pid < patchMap->
numPatches(); pid++) {
1707 assignedNode[pid] = pid % ncpus;
1710 sortNodesAndAssign(assignedNode);
1711 delete [] assignedNode;
1715 void WorkDistrib::assignPatchesRecursiveBisection()
1718 int *assignedNode =
new int[patchMap->
numPatches()];
1725 int usedNodes = numNodes;
1726 int unusedNodes = 0;
1727 CkPrintf(
"assignPatchesRecursiveBisection\n");
1728 if (
simParams->noPatchesOnZero && numNodes > 1 ){
1730 if(
simParams->noPatchesOnOne && numNodes > 2)
1733 unusedNodes = numNodes - usedNodes;
1735 if ( recBisec.partition(assignedNode) ) {
1736 if ( unusedNodes !=0 ) {
1737 for (
int i=0; i<patchMap->
numPatches(); ++i ) {
1738 assignedNode[i] += unusedNodes;
1741 sortNodesAndAssign(assignedNode);
1742 delete [] assignedNode;
1747 delete [] assignedNode;
1750 <<
"WorkDistrib: Recursive bisection fails, " 1751 <<
"invoking space-filling curve algorithm\n";
1752 assignPatchesSpaceFillingCurve();
1763 return CmiGetFirstPeOnPhysicalNode(CmiPhysicalNodeID(pe));
1767 int na=
tmgr.getDimNA();
1768 int nb=
tmgr.getDimNB();
1769 int nc=
tmgr.getDimNC();
1770 int nd=
tmgr.getDimND();
1771 int ne=
tmgr.getDimNE();
1773 int na=
tmgr.getDimNX();
1774 int nb=
tmgr.getDimNY();
1775 int nc=
tmgr.getDimNZ();
1784 for (
int i=0; i<na; ++i ) { a_flags[i] = 0; }
1785 for (
int i=0; i<nb; ++i ) { b_flags[i] = 0; }
1786 for (
int i=0; i<nc; ++i ) { c_flags[i] = 0; }
1787 for (
int i=0; i<nd; ++i ) { d_flags[i] = 0; }
1788 for (
int i=0; i<ne; ++i ) { e_flags[i] = 0; }
1789 int npes = CkNumPes();
1790 for (
int pe=0; pe<npes; ++pe ) {
1793 tmgr.rankToCoordinates(
fixpe(pe),a,b,c,d,e,t);
1795 tmgr.rankToCoordinates(
fixpe(pe),a,b,c,t);
1798 if ( a < 0 || a >= na )
NAMD_bug(
"inconsistent torus topology!");
1799 if ( b < 0 || b >= nb )
NAMD_bug(
"inconsistent torus topology!");
1800 if ( c < 0 || c >= nc )
NAMD_bug(
"inconsistent torus topology!");
1801 if ( d < 0 || d >= nd )
NAMD_bug(
"inconsistent torus topology!");
1802 if ( e < 0 || e >= ne )
NAMD_bug(
"inconsistent torus topology!");
1809 iout <<
iINFO <<
"TORUS A SIZE " << na <<
" USING";
1810 for (
int i=0; i<na; ++i ) {
if ( a_flags[i] )
iout <<
" " << i; }
1812 iout <<
iINFO <<
"TORUS B SIZE " << nb <<
" USING";
1813 for (
int i=0; i<nb; ++i ) {
if ( b_flags[i] )
iout <<
" " << i; }
1815 iout <<
iINFO <<
"TORUS C SIZE " << nc <<
" USING";
1816 for (
int i=0; i<nc; ++i ) {
if ( c_flags[i] )
iout <<
" " << i; }
1819 iout <<
iINFO <<
"TORUS D SIZE " << nd <<
" USING";
1820 for (
int i=0; i<nd; ++i ) {
if ( d_flags[i] )
iout <<
" " << i; }
1822 iout <<
iINFO <<
"TORUS E SIZE " << ne <<
" USING";
1823 for (
int i=0; i<ne; ++i ) {
if ( e_flags[i] )
iout <<
" " << i; }
1830 if (
tmgr.absA(na) == 0 )
1832 if (
tmgr.absX(na) == 0 )
1834 for (
int i=0, gaplen=0, gapstart=0; i<2*na; ++i ) {
1835 if ( a_flags[i%na] ) gapstart = i+1;
1836 else if ( i - gapstart >= gaplen ) {
1837 a_rot = 2*na-i-1; gaplen = i - gapstart;
1841 if (
tmgr.absB(nb) == 0 )
1843 if (
tmgr.absY(nb) == 0 )
1845 for (
int i=0, gaplen=0, gapstart=0; i<2*nb; ++i ) {
1846 if ( b_flags[i%nb] ) gapstart = i+1;
1847 else if ( i - gapstart >= gaplen ) {
1848 b_rot = 2*nb-i-1; gaplen = i - gapstart;
1852 if (
tmgr.absC(nc) == 0 )
1854 if (
tmgr.absZ(nc) == 0 )
1856 for (
int i=0, gaplen=0, gapstart=0; i<2*nc; ++i ) {
1857 if ( c_flags[i%nc] ) gapstart = i+1;
1858 else if ( i - gapstart >= gaplen ) {
1859 c_rot = 2*nc-i-1; gaplen = i - gapstart;
1863 if (
tmgr.absD(nd) == 0 )
1864 for (
int i=0, gaplen=0, gapstart=0; i<2*nd; ++i ) {
1865 if ( d_flags[i%nd] ) gapstart = i+1;
1866 else if ( i - gapstart >= gaplen ) {
1867 d_rot = 2*nd-i-1; gaplen = i - gapstart;
1870 if (
tmgr.absE(ne) == 0 )
1871 for (
int i=0, gaplen=0, gapstart=0; i<2*ne; ++i ) {
1872 if ( e_flags[i%ne] ) gapstart = i+1;
1873 else if ( i - gapstart >= gaplen ) {
1874 e_rot = 2*ne-i-1; gaplen = i - gapstart;
1879 int a_min=na, a_max=-1;
1880 int b_min=nb, b_max=-1;
1881 int c_min=nc, c_max=-1;
1882 int d_min=nd, d_max=-1;
1883 int e_min=ne, e_max=-1;
1884 for (
int pe=0; pe<npes; ++pe ) {
1887 tmgr.rankToCoordinates(
fixpe(pe),a,b,c,d,e,t);
1889 tmgr.rankToCoordinates(
fixpe(pe),a,b,c,t);
1897 if ( a < a_min ) a_min = a;
1898 if ( b < b_min ) b_min = b;
1899 if ( c < c_min ) c_min = c;
1900 if ( d < d_min ) d_min = d;
1901 if ( e < e_min ) e_min = e;
1902 if ( a > a_max ) a_max = a;
1903 if ( b > b_max ) b_max = b;
1904 if ( c > c_max ) c_max = c;
1905 if ( d > d_max ) d_max = d;
1906 if ( e > e_max ) e_max = e;
1908 int a_len = a_max - a_min + 1;
1909 int b_len = b_max - b_min + 1;
1910 int c_len = c_max - c_min + 1;
1911 int d_len = d_max - d_min + 1;
1912 int e_len = e_max - e_min + 1;
1914 lensort[0] = (a_len << 3) + 0;
1915 lensort[1] = (b_len << 3) + 1;
1916 lensort[2] = (c_len << 3) + 2;
1917 lensort[3] = (d_len << 3) + 3;
1918 lensort[4] = (e_len << 3) + 4;
1920 std::sort(lensort, lensort+5);
1922 for (
int i=0; i<5; ++i ) {
if ( (lensort[i] & 7) == 0 )
a_dim = 4-i; }
1923 for (
int i=0; i<5; ++i ) {
if ( (lensort[i] & 7) == 1 )
b_dim = 4-i; }
1924 for (
int i=0; i<5; ++i ) {
if ( (lensort[i] & 7) == 2 )
c_dim = 4-i; }
1925 for (
int i=0; i<5; ++i ) {
if ( (lensort[i] & 7) == 3 )
d_dim = 4-i; }
1926 for (
int i=0; i<5; ++i ) {
if ( (lensort[i] & 7) == 4 )
e_dim = 4-i; }
1928 if ( a_len >= b_len && a_len >= c_len ) {
1930 if ( b_len >= c_len ) {
1935 }
else if ( b_len >= a_len && b_len >= c_len ) {
1937 if ( a_len >= c_len ) {
1944 if ( a_len >= b_len ) {
1951 iout <<
iINFO <<
"TORUS MINIMAL MESH SIZE IS " << a_len <<
" BY " << b_len <<
" BY " << c_len
1953 <<
" BY " << d_len <<
" BY " << e_len
1961 tmgr.rankToCoordinates(
fixpe(pe),a,b,c,d,e,t);
1963 tmgr.rankToCoordinates(
fixpe(pe),a,b,c,t);
1982 int crds1[3], crds2[3];
1985 for (
int i=0; i<3; ++i ) {
1987 if ( crds1[d] != crds2[d] )
return ( crds1[d] < crds2[d] );
1990 return ( index[pe1] < index[pe2] );
1994 if ( node_begin == node_end )
return node_begin;
1995 int tmins[3], tmaxs[3], tlens[3], sortdims[3];
1996 coords(*node_begin, tmins);
1997 coords(*node_begin, tmaxs);
1998 for (
int *peitr = node_begin; peitr != node_end; ++peitr ) {
2001 for (
int i=0; i<3; ++i ) {
2002 if ( tvals[i] < tmins[i] ) tmins[i] = tvals[i];
2003 if ( tvals[i] > tmaxs[i] ) tmaxs[i] = tvals[i];
2006 for (
int i=0; i<3; ++i ) {
2007 tlens[i] = tmaxs[i] - tmins[i];
2009 sortdims[0] = splitdim;
2010 for (
int i=0, j=0; i<3; ++i ) {
2011 if ( i != splitdim ) sortdims[++j] = i;
2013 if ( tlens[sortdims[1]] < tlens[sortdims[2]] ) {
2014 int tmp = sortdims[1];
2015 sortdims[1] = sortdims[2];
2019 int *nodes = node_begin;
2020 int nnodes = node_end - node_begin;
2023 int c_split =
coord(nodes[0],splitdim);
2024 for (
int i=0; i<nnodes; ++i ) {
2025 if (
coord(nodes[i],splitdim) != c_split ) {
2026 int mid = (nnodes+1)/2;
2027 if ( abs(i-mid) < abs(i_split-mid) ) {
2029 c_split =
coord(i,splitdim);
2035 for (
int i=0; i<nnodes; ++i ) {
2036 if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
2037 int mid = (nnodes+1)/2;
2038 if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2042 return ( node_begin + i_split );
2052 if ( a1 < a2 )
return true;
2053 if ( a1 > a2 )
return false;
2054 int dir = ( (a1 & 1) ? -1 : 1 );
2057 if ( b1 * dir < b2 * dir )
return true;
2058 if ( b1 * dir > b2 * dir )
return false;
2059 dir *= ( (b1 & 1) ? -1 : 1 );
2062 if ( c1 * dir < c2 * dir )
return true;
2073 if ( a1 < a2 )
return true;
2074 if ( a1 > a2 )
return false;
2075 int dir = ( (a1 & 1) ? -1 : 1 );
2078 if ( b1 * dir < b2 * dir )
return true;
2079 if ( b1 * dir > b2 * dir )
return false;
2080 dir *= ( (b1 & 1) ? -1 : 1 );
2083 if ( c1 * dir < c2 * dir )
return true;
2094 if ( a1 < a2 )
return true;
2095 if ( a1 > a2 )
return false;
2096 int dir = ( (a1 & 1) ? -1 : 1 );
2099 if ( b1 * dir < b2 * dir )
return true;
2100 if ( b1 * dir > b2 * dir )
return false;
2101 dir *= ( (b1 & 1) ? -1 : 1 );
2104 if ( c1 * dir < c2 * dir )
return true;
2110 int *patch_begin,
int *patch_end,
2111 int *node_begin,
int *node_end,
2113 double *sortedLoads,
2120 int *patches = patch_begin;
2121 int npatches = patch_end - patch_begin;
2122 int *nodes = node_begin;
2123 int nnodes = node_end - node_begin;
2126 const int emptyPatchLoad =
simParams->emptyPatchLoad;
2127 double totalRawLoad = 0;
2128 for (
int i=0; i<npatches; ++i ) {
2130 #ifdef MEM_OPT_VERSION 2131 double load = patchMap->numAtoms(pid) + emptyPatchLoad;
2135 patchLoads[pid] = load;
2136 sortedLoads[i] = load;
2137 totalRawLoad += load;
2139 std::sort(sortedLoads,sortedLoads+npatches);
2143 double maxPatchLoad = 1;
2144 for (
int i=0; i<npatches; ++i ) {
2145 double load = sortedLoads[i];
2146 double total = sumLoad + (npatches-i) * load;
2147 if ( nnodes * load > total )
break;
2149 maxPatchLoad = load;
2151 double totalLoad = 0;
2152 for (
int i=0; i<npatches; ++i ) {
2154 if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
2155 totalLoad += patchLoads[pid];
2157 if ( nnodes * maxPatchLoad > totalLoad )
2158 NAMD_bug(
"algorithm failure in WorkDistrib recursive_bisect_with_curve()");
2160 int a_len, b_len, c_len;
2161 int a_min, b_min, c_min;
2163 a_min = patchMap->
index_a(patches[0]);
2164 b_min = patchMap->
index_b(patches[0]);
2165 c_min = patchMap->
index_c(patches[0]);
2169 for (
int i=1; i<npatches; ++i ) {
2170 int a = patchMap->
index_a(patches[i]);
2171 int b = patchMap->
index_b(patches[i]);
2172 int c = patchMap->
index_c(patches[i]);
2173 if ( a < a_min ) a_min = a;
2174 if ( b < b_min ) b_min = b;
2175 if ( c < c_min ) c_min = c;
2176 if ( a > a_max ) a_max = a;
2177 if ( b > b_max ) b_max = b;
2178 if ( c > c_max ) c_max = c;
2180 a_len = a_max - a_min;
2181 b_len = b_max - b_min;
2182 c_len = c_max - c_min;
2185 int *node_split = node_begin;
2187 if (
simParams->disableTopology ) ;
else 2188 if ( a_len >= b_len && a_len >= c_len ) {
2190 }
else if ( b_len >= a_len && b_len >= c_len ) {
2192 }
else if ( c_len >= a_len && c_len >= b_len ) {
2196 if ( node_split == node_begin ) {
2201 for (
int i=0; i<nnodes; ++i ) {
2202 if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
2203 int mid = (nnodes+1)/2;
2204 if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2208 node_split = node_begin + i_split;
2211 bool final_patch_sort =
false;
2213 if ( node_split == node_begin ) {
2215 nnodes == CmiNumPesOnPhysicalNode(CmiPhysicalNodeID(*node_begin)) ) {
2217 tmgr.
coords(*node_begin, crds);
2218 CkPrintf(
"WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2219 CmiPhysicalNodeID(*node_begin), *node_begin,
2220 CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
2221 a_min, b_min, c_min, npatches,
2222 a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2226 final_patch_sort =
true;
2230 for (
int i=0; i<nnodes; ++i ) {
2231 if ( CmiNodeOf(nodes[i_split]) != CmiNodeOf(nodes[i]) ) {
2232 int mid = (nnodes+1)/2;
2233 if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2237 node_split = node_begin + i_split;
2240 if ( node_split == node_begin ) {
2242 nnodes == CmiNodeSize(CmiNodeOf(*node_begin)) ) {
2244 tmgr.
coords(*node_begin, crds);
2245 CkPrintf(
"WorkDistrib: node %5d pe %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2246 CmiNodeOf(*node_begin), *node_begin, npatches,
2247 a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2251 node_split = node_begin + nnodes/2;
2254 if ( nnodes == 1 ) {
2256 int *node = node_begin;
2258 for (
int i=0; i < npatches; ++i ) {
2259 int pid = patches[i];
2260 assignedNode[pid] = *node;
2261 sumLoad += patchLoads[pid];
2262 if ( 0 ) CkPrintf(
"assign %5d node %5d patch %5d %5d %5d load %7f total %7f\n",
2267 patchLoads[pid], sumLoad);
2273 if ( final_patch_sort ) {
2276 }
else if ( a_len >= b_len && a_len >= c_len ) {
2277 if ( 0 ) CkPrintf(
"sort a\n");
2279 }
else if ( b_len >= a_len && b_len >= c_len ) {
2280 if ( 0 ) CkPrintf(
"sort b\n");
2282 }
else if ( c_len >= a_len && c_len >= b_len ) {
2283 if ( 0 ) CkPrintf(
"sort c\n");
2289 int *node = node_begin;
2291 for ( patch_split = patch_begin;
2292 patch_split != patch_end && node != node_split;
2294 sumLoad += patchLoads[*patch_split];
2295 double targetLoad = totalLoad *
2296 ((double)(node-node_begin+1) / (double)nnodes);
2297 if ( 0 ) CkPrintf(
"test %5ld node %5d patch %5d %5d %5d load %7f target %7f\n",
2298 patch_split - patch_begin, *node,
2299 patchMap->
index_a(*patch_split),
2300 patchMap->
index_b(*patch_split),
2301 patchMap->
index_c(*patch_split),
2302 sumLoad, targetLoad);
2303 double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
2304 if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
2306 double targetLoad = totalLoad *
2307 ((double)(node_split-node_begin) / (double)nnodes);
2308 if ( 0 ) CkPrintf(
"split node %5ld/%5d patch %5ld/%5d load %7f target %7f\n",
2309 node_split-node_begin, nnodes,
2310 patch_split-patch_begin, npatches,
2311 sumLoad, targetLoad);
2316 patch_begin, patch_split, node_begin, node_split,
2317 patchLoads, sortedLoads, assignedNode, tmgr);
2319 patch_split, patch_end, node_split, node_end,
2320 patchLoads, sortedLoads, assignedNode, tmgr);
2324 void WorkDistrib::assignPatchesSpaceFillingCurve()
2328 const int numPatches = patchMap->
numPatches();
2329 int *assignedNode =
new int[numPatches];
2335 NAMD_die(
"simulateInitialMapping not supported by assignPatchesSpaceFillingCurve()");
2340 for (
int i=0; i<numPatches; ++i ) {
2341 patchOrdering[i] = i;
2345 nodeOrdering.resize(0);
2346 for (
int i=0; i<numNodes; ++i ) {
2348 if (
simParams->noPatchesOnZero && numNodes > 1 ) {
2349 if ( pe == 0 )
continue;
2350 if(
simParams->noPatchesOnOne && numNodes > 2) {
2351 if ( pe == 1 )
continue;
2354 #ifdef MEM_OPT_VERSION 2359 nodeOrdering.add(pe);
2360 if ( 0 ) CkPrintf(
"using pe %5d\n", pe);
2363 int *node_begin = nodeOrdering.begin();
2364 int *node_end = nodeOrdering.end();
2365 if ( nodeOrdering.size() > numPatches ) {
2366 node_end = node_begin + numPatches;
2368 std::sort(node_begin, node_end, pe_sortop_compact());
2370 int *basenode_begin = node_begin;
2371 int *basenode_end = node_end;
2372 if ( nodeOrdering.size() > 2*numPatches ) {
2373 basenode_begin = node_end;
2374 basenode_end = basenode_begin + numPatches;
2375 std::sort(basenode_begin, basenode_end, pe_sortop_compact());
2379 iout <<
iWARN <<
"IGNORING TORUS TOPOLOGY DURING PATCH PLACEMENT\n" <<
endi;
2383 patchOrdering.begin(), patchOrdering.end(),
2384 node_begin, node_end,
2385 patchLoads.begin(), sortedLoads.begin(), assignedNode, tmgr);
2387 std::sort(node_begin, node_end, pe_sortop_compact());
2389 int samenodecount = 0;
2391 for (
int pid=0; pid<numPatches; ++pid ) {
2392 int node = assignedNode[pid];
2394 int nodeidx = std::lower_bound(node_begin, node_end, node,
2395 pe_sortop_compact()) - node_begin;
2396 int basenode = basenode_begin[nodeidx];
2398 if ( CmiPeOnSamePhysicalNode(node,basenode) ) ++samenodecount;
2401 iout <<
iINFO <<
"Placed " << (samenodecount*100./numPatches) <<
"% of base nodes on same physical node as patch\n" <<
endi;
2403 delete [] assignedNode;
2411 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2412 Node *node = nd.ckLocalBranch();
2414 DebugM(3,
"Mapping computes\n");
2423 mapComputeHomePatches(computeDPMTAType);
2425 NAMD_die(
"This binary does not include DPMTA (FMA).");
2430 mapComputeHomePatches(computeDPMEType);
2437 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 2451 DebugM(2,
"adding ComputeGlobal\n");
2469 #ifdef CHARM_HAS_MSA 2480 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 2483 mapComputeNode(computeBondedCUDAType);
2488 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 2498 mapComputeNonbonded();
2566 CkPrintf(
"ComputeMap has been loaded from %s.\n",
simParams->computeMapFilename);
2571 void WorkDistrib::mapComputeHomeTuples(
ComputeType type)
2575 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2576 Node *node = nd.ckLocalBranch();
2584 char *isBaseNode =
new char[numNodes];
2585 memset(isBaseNode,0,numNodes*
sizeof(
char));
2588 for(
int j=0; j<numPatches; j++) {
2589 isBaseNode[patchMap->
basenode(j)] = 1;
2592 for(
int i=0; i<numNodes; i++) {
2593 if ( isBaseNode[i] ) {
2598 delete [] isBaseNode;
2602 void WorkDistrib::mapComputeHomePatches(
ComputeType type)
2606 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2607 Node *node = nd.ckLocalBranch();
2615 for(
int i=0; i<numNodes; i++) {
2623 void WorkDistrib::mapComputePatch(
ComputeType type)
2634 computeMap->
newPid(cid,i);
2641 void WorkDistrib::mapComputeNode(
ComputeType type)
2649 int ncpus = CkNumPes();
2655 for(
int i=0; i<ncpus; i++) {
2662 void WorkDistrib::mapComputeNonbonded(
void)
2670 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2671 Node *node = nd.ckLocalBranch();
2673 int ncpus = CkNumPes();
2674 int nodesize = CkMyNodeSize();
2677 nodesize =
simParams->simulatedNodeSize;
2688 double partScaling = 1.0;
2689 if ( ncpus < patchMap->numPatches() ) {
2690 partScaling = ((double)ncpus) / ((double)patchMap->
numPatches());
2696 int numPartitions = 1;
2699 #ifdef MEM_OPT_VERSION 2700 int64 numFixed = patchMap->numFixedAtoms(i);
2701 int64 numAtoms = patchMap->numAtoms(i);
2709 numPartitions = (int) ( partScaling * ( 0.5 +
2710 (numAtoms*numAtoms-numFixed*numFixed) / (
double)(2*divide*divide) ) );
2712 if (numPartitions < 1) numPartitions = 1;
2716 DebugM(4,
"Mapping " << numPartitions <<
" ComputeNonbondedSelf objects for patch " << i <<
"\n");
2731 computeMap->
newPid(cid,i);
2736 for(
int p1=0; p1 <patchMap->
numPatches(); p1++)
2740 for(j=0;j<numNeighbors;j++)
2742 int p2 = oneAway[j];
2743 int dsp = oneAwayDownstream[j];
2745 int numPartitions = 1;
2748 #ifdef MEM_OPT_VERSION 2749 int64 numAtoms1 = patchMap->numAtoms(p1);
2750 int64 numAtoms2 = patchMap->numAtoms(p2);
2751 int64 numFixed1 = patchMap->numFixedAtoms(p1);
2752 int64 numFixed2 = patchMap->numFixedAtoms(p2);
2761 const int t2 = oneAwayTrans[j];
2768 const int ia1 = patchMap->
index_a(p1);
2770 const int ib1 = patchMap->
index_b(p1);
2772 const int ic1 = patchMap->
index_c(p1);
2775 if ( abs(ia2-ia1) > nax ||
2776 abs(ib2-ib1) > nay ||
2777 abs(ic2-ic1) > naz )
2778 NAMD_bug(
"Bad patch distance in WorkDistrib::mapComputeNonbonded");
2781 if ( ia1 == ia2 ) --distance;
2782 else if ( ia1 == ia2 + nax - 1 ) --distance;
2783 else if ( ia1 + nax - 1 == ia2 ) --distance;
2784 if ( ib1 == ib2 ) --distance;
2785 else if ( ib1 == ib2 + nay - 1 ) --distance;
2786 else if ( ib1 + nay - 1 == ib2 ) --distance;
2787 if ( ic1 == ic2 ) --distance;
2788 else if ( ic1 == ic2 + naz - 1 ) --distance;
2789 else if ( ic1 + naz - 1 == ic2 ) --distance;
2791 if ( distance == 0 ) {
2793 }
else if (distance == 1) {
2799 numPartitions = (int) ( partScaling * ( 0.5 +
2800 (numAtoms1*numAtoms2-numFixed1*numFixed2)/(
double)(divide*divide) ) );
2802 if ( numPartitions < 1 ) numPartitions = 1;
2812 computeMap->
newPid(cid,p1);
2813 computeMap->
newPid(cid,p2,oneAwayTrans[j]);
2814 patchMap->
newCid(p1,cid);
2815 patchMap->
newCid(p2,cid);
2822 void WorkDistrib::mapComputeLCPO(
void) {
2827 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2828 Node *node = nd.ckLocalBranch();
2830 int ncpus = CkNumPes();
2831 int nodesize = CkMyNodeSize();
2832 const int maxPatches = 8;
2834 int numPatchesInOctet;
2835 PatchID patchesInOctet[maxPatches];
2836 int oneAwayTrans[maxPatches];
2839 int numPartitions = 1;
2855 for (
int p = 0; p < numPatchesInOctet; p++) {
2856 computeMap->
newPid(cid, patchesInOctet[p], oneAwayTrans[p]);
2858 for (
int p = 0; p < numPatchesInOctet; p++) {
2859 patchMap->
newCid(patchesInOctet[p],cid);
2872 NAMD_bug(
"compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
2878 int type = compute->
type();
2879 int cid = compute->
cid;
2881 CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
2885 wdProxy[CkMyPe()].enqueueExcls(msg);
2889 wdProxy[CkMyPe()].enqueueBonds(msg);
2893 wdProxy[CkMyPe()].enqueueAngles(msg);
2897 wdProxy[CkMyPe()].enqueueDihedrals(msg);
2901 wdProxy[CkMyPe()].enqueueImpropers(msg);
2905 wdProxy[CkMyPe()].enqueueThole(msg);
2909 wdProxy[CkMyPe()].enqueueAniso(msg);
2913 wdProxy[CkMyPe()].enqueueCrossterms(msg);
2917 wdProxy[CkMyPe()].enqueueOneFourNbThole(msg);
2922 wdProxy[CkMyPe()].enqueueGromacsPair(msg);
2926 wdProxy[CkMyPe()].enqueueLCPO(msg);
2929 switch ( seq % 2 ) {
2932 switch ( gbisPhase ) {
2934 wdProxy[CkMyPe()].enqueueSelfA1(msg);
2937 wdProxy[CkMyPe()].enqueueSelfA2(msg);
2940 wdProxy[CkMyPe()].enqueueSelfA3(msg);
2946 switch ( gbisPhase ) {
2948 wdProxy[CkMyPe()].enqueueSelfB1(msg);
2951 wdProxy[CkMyPe()].enqueueSelfB2(msg);
2954 wdProxy[CkMyPe()].enqueueSelfB3(msg);
2959 NAMD_bug(
"WorkDistrib::messageEnqueueSelf case statement error!");
2963 switch ( seq % 2 ) {
2966 switch ( gbisPhase ) {
2968 wdProxy[CkMyPe()].enqueueWorkA1(msg);
2971 wdProxy[CkMyPe()].enqueueWorkA2(msg);
2974 wdProxy[CkMyPe()].enqueueWorkA3(msg);
2980 switch ( gbisPhase ) {
2982 wdProxy[CkMyPe()].enqueueWorkB1(msg);
2985 wdProxy[CkMyPe()].enqueueWorkB2(msg);
2988 wdProxy[CkMyPe()].enqueueWorkB3(msg);
2993 wdProxy[CkMyPe()].enqueueWorkC(msg);
2996 NAMD_bug(
"WorkDistrib::messageEnqueueWork case statement error!");
2999 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 3003 switch ( gbisPhase ) {
3005 wdProxy[CkMyPe()].enqueueCUDA(msg);
3008 wdProxy[CkMyPe()].enqueueCUDAP2(msg);
3011 wdProxy[CkMyPe()].enqueueCUDAP3(msg);
3020 wdProxy[CkMyPe()].enqueueMIC(msg);
3025 wdProxy[CkMyPe()].enqueuePme(msg);
3027 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 3029 wdProxy[CkMyPe()].enqueuePme(msg);
3033 wdProxy[CkMyPe()].enqueueWork(msg);
3044 NAMD_bug(
"compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
3050 CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3052 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 3054 switch ( gbisPhase ) {
3056 wdProxy[CkMyPe()].finishCUDA(msg);
3059 wdProxy[CkMyPe()].finishCUDAP2(msg);
3062 wdProxy[CkMyPe()].finishCUDAP3(msg);
3077 NAMD_bug(
"compute->sequence() < 0 in WorkDistrib::messageFinishMIC");
3083 CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3086 wdProxy[CkMyPe()].finishMIC(msg);
3095 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3101 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3107 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3113 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3119 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3125 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3131 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3137 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3143 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3149 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3157 NAMD_bug(
"\nWorkDistrib LocalWorkMsg recycling failed! Check enqueueGromacsPair from WorkDistrib.C\n");
3164 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3170 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3175 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3180 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3185 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3191 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3196 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3201 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3207 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3212 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3217 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3223 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3228 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3233 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3241 NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
3290 void WorkDistrib::velocities_from_PDB(
const char *filename,
3291 Vector *v,
int totalAtoms)
3297 v_pdb =
new PDB(filename);
3298 if ( v_pdb == NULL )
3300 NAMD_die(
"memory allocation failed in Node::velocities_from_PDB");
3309 sprintf(err_msg,
"FOUND %d COORDINATES IN VELOCITY PDB!!",
3319 for (i=0; i<totalAtoms; i++)
3344 void WorkDistrib::velocities_from_binfile(
const char *fname,
Vector *vels,
int n)
3365 Vector *v,
int totalAtoms)
3375 int lesReduceTemp = lesOn &&
simParams->lesReduceTemp;
3382 for (i=0; i<totalAtoms; i++)
3384 if (structure->
atommass(i) <= 0.) {
3387 kbToverM = sqrt(kbT *
3388 ( lesOn && structure->
get_fep_type(i) ? tempFactor : 1.0 ) /
3401 for (randnum=0.0, j=0; j<12; j++)
3403 randnum += vel_random.uniform();
3408 v[i].
x = randnum*kbToverM;
3410 for (randnum=0.0, j=0; j<12; j++)
3412 randnum += vel_random.uniform();
3417 v[i].
y = randnum*kbToverM;
3419 for (randnum=0.0, j=0; j<12; j++)
3421 randnum += vel_random.uniform();
3426 v[i].
z = randnum*kbToverM;
3429 if (
simParams->drudeOn )
for (i=0; i<totalAtoms; i++) {
3448 void WorkDistrib::remove_com_motion(
Vector *vel,
Molecule *structure,
int n)
3458 mv += mass * vel[i];
3464 iout <<
iINFO <<
"REMOVING COM VELOCITY " 3467 for (i=0; i<n; i++) { vel[i] -= mv; }
3476 int WorkDistrib::assignPatchesTopoGridRecBisection() {
3479 int *assignedNode =
new int[patchMap->
numPatches()];
3486 int usedNodes = numNodes;
3487 CkPrintf(
"assignPatchesTopoGridRecBisection\n");
3488 if (
simParams->noPatchesOnZero && numNodes > 1 ) {
3490 if (
simParams->noPatchesOnOne && numNodes > 2 )
3495 int xsize = 0, ysize = 0, zsize = 0;
3499 xsize = tmgr.getDimNX();
3500 ysize = tmgr.getDimNY();
3501 zsize = tmgr.getDimNZ();
3504 int rc = recBisec.partitionProcGrid(xsize, ysize, zsize, assignedNode);
3506 delete [] assignedNode;
3513 #if defined(NAMD_MIC) 3514 extern void mic_hostDeviceLDB();
3515 extern void mic_contributeHostDeviceLDB(
int idLen,
int *
id);
3516 extern void mic_setDeviceLDBParams(
int dt,
int hs,
int sp1,
int pp1,
int pp2);
3520 #if defined(NAMD_MIC) 3521 CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3522 wdProxy.initHostDeviceLDB();
3527 #if defined(NAMD_MIC) 3528 mic_hostDeviceLDB();
3533 #if defined(NAMD_MIC) 3534 CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3535 wdProxy[0].contributeHostDeviceLDB(peSetLen, peSet);
3540 #if defined(NAMD_MIC) 3541 mic_contributeHostDeviceLDB(peSetLen, peSet);
3546 #if defined(NAMD_MIC) 3547 CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3548 wdProxy.setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
3553 #if defined(NAMD_MIC) 3554 mic_setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
3559 #include "WorkDistrib.def.h"
Real atomcharge(int anum) const
bool operator()(int p1, int p2) const
void setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2)
void enqueueMIC(LocalWorkMsg *msg)
void enqueueOneFourNbThole(LocalWorkMsg *msg)
std::ostream & iINFO(std::ostream &s)
static void sortPmePes(int *pmepes, int xdim, int ydim)
int get_mother_atom(int) const
ScaledPosition center(int pid) const
Bool simulateInitialMapping
static void messageFinishMIC(Compute *)
bool operator()(int a, int b) const
int isSendSpanningTreeUnset()
patch_sortop_curve_b(PatchMap *m)
void enqueueAngles(LocalWorkMsg *msg)
static void messageFinishCUDA(Compute *)
int getNumFixedAtoms() const
PatchID assignToPatch(Position p, const Lattice &l)
void setNewNumPartitions(ComputeID cid, char numPartitions)
static bool less_than_bit_reversed(int a, int b)
static void recursive_bisect_with_curve(int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)
void saveComputeMap(const char *fname)
static ProxyMgr * Object()
NAMD_HOST_DEVICE int c_p() const
static int * peCompactOrdering
BigReal max_a(int pid) const
void finishCUDAP3(LocalWorkMsg *msg)
void enqueueCrossterms(LocalWorkMsg *msg)
bool operator()(int p1, int p2) const
static void partition(int *order, const FullAtom *atoms, int begin, int end)
int isRecvSpanningTreeUnset()
void enqueuePme(LocalWorkMsg *msg)
static PatchMap * Object()
void enqueueWorkA3(LocalWorkMsg *msg)
void enqueueWork(LocalWorkMsg *msg)
void enqueueGromacsPair(LocalWorkMsg *msg)
void enqueueSelfA1(LocalWorkMsg *msg)
void finishCUDAP2(LocalWorkMsg *msg)
static void send_contributeHostDeviceLDB(int peSetLen, int *peSet)
SimParameters * simParameters
void loadComputeMap(const char *fname)
Bool CUDASOAintegrateMode
void createHomePatch(PatchID pid, FullAtomList &a)
void sendAtoms(PatchID pid, FullAtomList &a)
void enqueueExcls(LocalWorkMsg *msg)
void enqueueBonds(LocalWorkMsg *msg)
std::ostream & endi(std::ostream &s)
void enqueueAniso(LocalWorkMsg *msg)
void enqueueSelfB1(LocalWorkMsg *msg)
void enqueueWorkB1(LocalWorkMsg *msg)
static void messageEnqueueWork(Compute *)
static void peOrderingReady()
std::ostream & iWARN(std::ostream &s)
int operator==(const nodesort &o) const
MIStream * get(char &data)
int index_a(int pid) const
int sizeGrid(ScaledPosition xmin, ScaledPosition xmax, const Lattice &lattice, BigReal patchSize, double maxNumPatches, int staticAtomAssignment, int asplit, int bsplit, int csplit)
ComputeID storeCompute(int node, int maxPids, ComputeType type, int partition=-1, int numPartitions=0)
Patch * patch(PatchID pid)
void enqueueSelfA3(LocalWorkMsg *msg)
int add(const Elem &elem)
bool operator()(int pe1, int pe2) const
Molecule stores the structural information for the system.
NAMD_HOST_DEVICE int b_p() const
void movePatch(PatchID, NodeID)
LocalWorkMsg *const localWorkMsg
void recvComputeMapChanges(ComputeMapChangeMsg *)
int gridsize_c(void) const
char newNumPartitions(ComputeID cid)
void reorder(Elem *a, int n)
HydrogenGroup hydrogenGroup
void enqueueCUDA(LocalWorkMsg *msg)
void sendComputeMap(void)
void enqueueWorkB2(LocalWorkMsg *msg)
void enqueueCUDAP2(LocalWorkMsg *msg)
void assignBaseNode(PatchID, NodeID)
static void recursive_bisect_coord(int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
void newCid(int pid, int cid)
constexpr int getWaterModelGroupSize(const WaterModel &watmodel)
void enqueueSelfB3(LocalWorkMsg *msg)
int coord(int pe, int dim)
int gridsize_a(void) const
TopoManagerWrapper & tmgr
int numPatches(void) const
static NAMD_HOST_DEVICE int offset_b(int i)
void enqueueWorkC(LocalWorkMsg *msg)
pe_sortop_bit_reversed(int *r)
void reinitAtoms(const char *basename=0)
int operator<(const nodesort &o) const
void enqueueThole(LocalWorkMsg *msg)
void enqueueWorkA2(LocalWorkMsg *msg)
NAMD_HOST_DEVICE Position apply_transform(Position data, const Transform &t) const
void createHomePatches(void)
void NAMD_bug(const char *err_msg)
static NAMD_HOST_DEVICE int offset_c(int i)
void enqueueImpropers(LocalWorkMsg *msg)
BigReal min_c(int pid) const
static int eventMachineProgress
Real langevin_param(int atomnum) const
Index atomvdwtype(int anum) const
int numaway_c(void) const
void enqueueLCPO(LocalWorkMsg *msg)
int oneOrTwoAwayNeighbors(int pid, PatchID *neighbor_ids, PatchID *downstream_ids=0, int *transform_ids=0)
int index_b(int pid) const
Bool staticAtomAssignment
pe_sortop_coord_y(ScaledPosition *s)
Bool replicaUniformPatchGrids
bool operator()(int a, int b) const
void finishCUDA(LocalWorkMsg *msg)
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
int numaway_a(void) const
NAMD_HOST_DEVICE int a_p() const
NAMD_HOST_DEVICE Vector a_r() const
NAMD_HOST_DEVICE Vector b_r() const
void setNewNode(ComputeID cid, NodeID node)
virtual void finishPatch(int)
NAMD_HOST_DEVICE Position nearest(Position data, ScaledPosition ref) const
void NAMD_die(const char *err_msg)
void enqueueCUDAP3(LocalWorkMsg *msg)
static int * peDiffuseOrderingIndex
BigReal min_a(int pid) const
NAMD_HOST_DEVICE Vector c_r() const
Real atommass(int anum) const
static int compare_bit_reversed(int a, int b)
void enqueueWorkA1(LocalWorkMsg *msg)
Bool pressureProfileEwaldOn
std::vector< std::string > split(const std::string &text, std::string delimiter)
static int * peDiffuseOrdering
void makePatches(ScaledPosition xmin, ScaledPosition xmax, const Lattice &lattice, BigReal patchSize, double maxNumPatches, int staticAtomAssignment, int replicaUniformPatchGrids, int lcpo, int asplit, int bsplit, int csplit)
int basenode(int pid) const
int index_c(int pid) const
unsigned char get_fep_type(int anum) const
static int peOrderingInit
void find_extremes(const Lattice &, BigReal frac=1.0)
void saveComputeMapChanges(int, CkGroupID)
int32 status
Atom status bit fields defined in structures.h.
void finishCUDAPatch(FinishWorkMsg *msg)
void savePatchMap(PatchMapMsg *msg)
void topo_getargs(char **argv)
static int * peCompactOrderingIndex
static void buildNodeAwarePeOrdering(void)
patch_sortop_curve_a(PatchMap *m)
int pressureProfileAtomTypes
int atomsInMigrationGroup
void newPid(ComputeID cid, int pid, int trans=13)
static void send_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2)
static NAMD_HOST_DEVICE int offset_a(int i)
BigReal max_b(int pid) const
void enqueueSelfA2(LocalWorkMsg *msg)
static ComputeMap * Object()
static void build_ordering(void *)
int numaway_b(void) const
void distributeHomePatches(void)
void assignNode(PatchID, NodeID)
patch_sortop_curve_c(PatchMap *m)
BigReal max_c(int pid) const
void enqueueSelfB2(LocalWorkMsg *msg)
int gridsize_b(void) const
int numPatchesOnNode(int node)
MOStream * put(char data)
static void send_initHostDeviceLDB()
FullAtomList * createAtomLists(const char *basename=0)
#define SET_PRIORITY(MSG, SEQ, PRIO)
pe_sortop_coord_x(ScaledPosition *s)
void enqueueDihedrals(LocalWorkMsg *msg)
Bool is_atom_fixed(int atomnum) const
void finishMIC(LocalWorkMsg *msg)
StringList * find(const char *name) const
void contributeHostDeviceLDB(int peSetLen, int *peSet)
void pack(char *buf, int size)
int isOutputProcessor(int pe)
void get_vdw_params(Real *sigma, Real *epsilon, Real *sigma14, Real *epsilon14, Index index)
void doneSaveComputeMap(CkReductionMsg *)
void unpack(MIStream *msg)
Real rigid_bond_length(int atomnum) const
__thread DeviceCUDA * deviceCUDA
void get_all_positions(Vector *)
BigReal min_b(int pid) const
pe_sortop_topo(TopoManagerWrapper &t, int *d)
void coords(int pe, int *crds)
void enqueueWorkB3(LocalWorkMsg *msg)
NAMD_HOST_DEVICE Vector origin() const
bool operator()(int a, int b) const
Bool noPatchesOnOutputPEs
int * sortAndSplit(int *node_begin, int *node_end, int splitdim)
void sortAtomsForPatches(int *order, int *breaks, const FullAtom *atoms, int nmgrps, int natoms, int ni, int nj, int nk)
bool operator()(int p1, int p2) const
void assignNodeToPatch(void)
int getPatchesInOctet(int pid, PatchID *pids, int *transform_ids=0)
NodeID newNode(ComputeID cid)