32 #include "ReductionMgr.decl.h" 38 #define MIN_DEBUG_LEVEL 4 63 NAMD_bug(
"ReductionSet size specified for REDUCTIONS_BASIC or REDUCTIONS_AMD.");
67 if ( size == -1 )
NAMD_bug(
"ReductionSet size not specified.");
96 if ( (*current)->sequenceNumber == seqNum )
return *current;
97 current = &((*current)->next);
111 if ( (*current)->sequenceNumber == seqNum )
break;
112 current = &((*current)->next);
115 if ( ! *current ) {
NAMD_die(
"ReductionSet::removeData on missing seqNum"); }
118 *current = (*current)->
next;
123 const int max_intranode_children,
124 const int max_internode_children,
135 const int num_pes = CkNumPes();
136 const int num_node_pes = CmiNumPesOnPhysicalNode(CmiPhysicalNodeID(pe));
137 int *node_pes =
new int[num_node_pes];
139 const int first_pe = CmiGetFirstPeOnPhysicalNode(CmiPhysicalNodeID(pe));
141 int *node_ids =
new int[num_pes];
142 int first_pe_index = -1;
146 if (pe == 0 && first_pe != pe) {
147 NAMD_die(
"PE 0 is not the first physical node. This shouldn't happen");
152 for (i = 0; i < num_pes; i++) {
154 if (CmiGetFirstPeOnPhysicalNode(CmiPhysicalNodeID(i)) == i) {
155 node_ids[num_nodes] = i;
157 first_pe_index = num_nodes;
162 const int i1 = (i + first_pe) % num_pes;
163 if (CmiPeOnSamePhysicalNode(first_pe,i1)) {
164 if ( node_pe_count == num_node_pes )
165 NAMD_bug(
"ReductionMgr::buildSpanTree found inconsistent physical node data from Charm++ runtime");
166 node_pes[node_pe_count] = i1;
168 pe_index = node_pe_count;
172 if ( pe_index < 0 || first_pe_index < 0 )
173 NAMD_bug(
"ReductionMgr::buildSpanTree found inconsistent physical node data from Charm++ runtime");
178 int first_loc_child_index = pe_index * max_intranode_children + 1;
179 int last_loc_child_index
180 = first_loc_child_index + max_intranode_children - 1;
181 if (first_loc_child_index > num_node_pes) {
182 first_loc_child_index = num_node_pes;
183 last_loc_child_index = num_node_pes;
185 if (last_loc_child_index >= num_node_pes)
186 last_loc_child_index = num_node_pes-1;
192 int first_rem_child_index = num_nodes;
193 int last_rem_child_index = num_nodes;
195 int *rem_child_index =
new int[max_internode_children];
197 if (first_pe != pe) {
200 my_parent_index = (pe_index-1)/max_intranode_children;
201 *parent = node_pes[my_parent_index];
207 int range_end = num_nodes;
210 my_parent_index = -1;
214 while ( first_pe_index != range_begin ) {
215 my_parent_index = range_begin;
217 for (
int i = 0; i < max_internode_children; ++i ) {
218 int split = range_begin + ( range_end - range_begin ) / ( max_internode_children - i );
219 if ( first_pe_index <
split ) { range_end =
split;
break; }
220 else { range_begin =
split; }
223 *parent = node_ids[my_parent_index];
227 int prev_child_index = range_begin;
229 for (
int i = 0; i < max_internode_children; ++i ) {
230 if ( range_begin >= range_end )
break;
231 if ( range_begin > prev_child_index ) {
232 rem_child_index[rem_children++] = prev_child_index = range_begin;
234 range_begin += ( range_end - range_begin ) / ( max_internode_children - i );
242 if (first_loc_child_index != num_node_pes) {
243 loc_children = last_loc_child_index - first_loc_child_index + 1;
244 *num_children += loc_children;
251 *num_children += rem_children;
256 if (*num_children == 0)
259 *children =
new int[*num_children];
263 if (loc_children > 0) {
264 for(k=first_loc_child_index; k <= last_loc_child_index; k++) {
266 (*children)[child++]=node_pes[k];
269 if (rem_children > 0) {
270 for(k=0; k < rem_children; k++) {
272 (*children)[child++]=node_ids[rem_child_index[k]];
276 delete [] rem_child_index;
283 if (CkpvAccess(ReductionMgr_instance) == 0) {
284 CkpvAccess(ReductionMgr_instance) =
this;
286 DebugM(1,
"ReductionMgr::ReductionMgr() - another instance exists!\n");
290 &myParent,&numChildren,&children);
301 #if 0 // Old spanning tree 308 if (firstChild > CkNumPes()) firstChild = CkNumPes();
310 if (lastChild > CkNumPes()) lastChild = CkNumPes();
315 reductionSets[i] = 0;
318 DebugM(1,
"ReductionMgr() instantiated.\n");
326 delete reductionSets[i];
332 ReductionSet* ReductionMgr::getSet(
int setID,
int size) {
333 if ( reductionSets[setID] == 0 ) {
334 reductionSets[setID] =
new ReductionSet(setID,size,numChildren);
340 CProxy_ReductionMgr reductionProxy(thisgroup);
341 reductionProxy[myParent].remoteRegister(msg);
344 if ( size != -1 )
NAMD_bug(
"ReductionMgr::getSet size set");
345 }
else if ( size < 0 || reductionSets[setID]->dataSize != size ) {
346 NAMD_bug(
"ReductionMgr::getSet size mismatch");
348 return reductionSets[setID];
352 void ReductionMgr::delSet(
int setID) {
354 if (
set && ! set->submitsRegistered & ! set->requireRegistered ) {
359 CProxy_ReductionMgr reductionProxy(thisgroup);
360 reductionProxy[myParent].remoteUnregister(msg);
363 reductionSets[setID] = 0;
379 NAMD_die(
"ReductionMgr::willSubmit called while reductions outstanding!");
382 set->submitsRegistered++;
387 charm_handle->
master =
this;
400 if ( set->getData(set->nextSequenceNumber)->submitsRecorded ) {
401 NAMD_die(
"SubmitReduction deleted while reductions outstanding!");
404 set->submitsRegistered--;
418 mergeAndDeliver(
set,seqNum);
422 handle->
data =
set->getData(seqNum)->data;
431 if ( set->getData(set->nextSequenceNumber)->submitsRecorded ) {
432 NAMD_die(
"ReductionMgr::remoteRegister called while reductions outstanding on parent!");
435 set->submitsRegistered++;
436 set->addToRemoteSequenceNumber[childIndex(msg->
sourceNode)]
437 =
set->nextSequenceNumber;
449 if ( set->getData(set->nextSequenceNumber)->submitsRecorded ) {
450 NAMD_die(
"SubmitReduction deleted while reductions outstanding on parent!");
453 set->submitsRegistered--;
464 +
set->addToRemoteSequenceNumber[childIndex(msg->
sourceNode)];
468 if ( size != set->dataSize ) {
469 NAMD_bug(
"ReductionMgr::remoteSubmit data sizes do not match.");
476 #pragma disjoint (*curData, *newData) 480 for (
int i = 0; i < size; ++i ) {
481 if ( newData[i] > curData[i] ) {
482 curData[i] = newData[i];
486 for (
int i = 0; i < size; ++i ) {
487 curData[i] += newData[i];
496 mergeAndDeliver(
set,seqNum);
501 void ReductionMgr::mergeAndDeliver(
ReductionSet *
set,
int seqNum) {
505 set->nextSequenceNumber++;
509 NAMD_bug(
"ReductionMgr::mergeAndDeliver not ready to deliver.");
513 if ( set->requireRegistered ) {
514 if ( set->threadIsWaiting && set->waitingForSequenceNumber == seqNum) {
516 CthAwaken(set->waitingThread);
519 NAMD_die(
"ReductionSet::deliver will never deliver data");
528 for (
int i = 0; i < msg->
dataSize; ++i ) {
531 CProxy_ReductionMgr reductionProxy(thisgroup);
532 reductionProxy[myParent].remoteSubmit(msg);
533 delete set->removeData(seqNum);
547 set->requireRegistered++;
548 if ( set->getData(set->nextSequenceNumber)->submitsRecorded ) {
549 NAMD_die(
"ReductionMgr::willRequire called while reductions outstanding!");
553 charm_handle->reductionSetID = setID;
554 charm_handle->sequenceNumber =
set->nextSequenceNumber;
555 charm_handle->master =
this;
565 int setID = handle->reductionSetID;
567 if ( set->getData(set->nextSequenceNumber)->submitsRecorded ) {
568 NAMD_die(
"RequireReduction deleted while reductions outstanding!");
571 set->requireRegistered--;
578 int setID = handle->reductionSetID;
580 int seqNum = handle->sequenceNumber;
583 set->threadIsWaiting = 1;
584 set->waitingForSequenceNumber = seqNum;
585 set->waitingThread = CthSelf();
589 set->threadIsWaiting = 0;
592 delete handle->currentData;
593 handle->currentData =
set->removeData(seqNum);
594 handle->
data = handle->currentData->
data;
595 handle->sequenceNumber = ++seqNum;
638 ReductionValue::operator int(){
639 return (
int)reducedValue;
642 ReductionValue::operator double(){
649 this->
set[i].reducedValue = 0.0;
667 this->
set[i].reducedValue = 0.0;
674 CmiLock(this->
set[i].valueLock);
675 this->
set[i].reducedValue = 0.0;
676 CmiUnlock(this->
set[i].valueLock);
694 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 695 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
696 PatchData *patchData = cpdata.ckLocalBranch();
698 nodeReduction->
zero();
704 NAMD_bug(
"SubmitReductionShared is only supported with CUDA/HIP");
709 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 710 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
711 PatchData *patchData = cpdata.ckLocalBranch();
718 NAMD_bug(
"SubmitReductionShared is only supported with CUDA/HIP");
722 #include "ReductionMgr.def.h"
void buildSpanTree(const int pe, const int max_intranode_children, const int max_internode_children, int *parent, int *num_children, int **children)
ReductionValue & operator[](int index)
int * addToRemoteSequenceNumber
friend class RequireReductionCharm
SimParameters * simParameters
ReductionSetData * getData(int seqNum)
ReductionSet(int setID, int size, int numChildren)
void remoteRegister(ReductionRegisterMsg *msg)
SubmitReduction * willSubmit(int setID, int size=-1)
NodeReduction * reductionBackend
double operator+=(double other)
#define REDUCTION_MAX_CHILDREN
friend class SubmitReductionShared
static Units next(Units u)
void NAMD_bug(const char *err_msg)
ReductionValue & item(int index)
ReductionSetData * removeData(int seqNum)
SubmitReductionShared(void)
void NAMD_die(const char *err_msg)
ReductionValue set[REDUCTION_MAX_RESERVED]
std::vector< std::string > split(const std::string &text, std::string delimiter)
void remoteSubmit(ReductionSubmitMsg *msg)
void increment_no_lock(double rvalue)
void setVal(const NodeReduction *other)
RequireReduction * willRequire(int setID, int size=-1)
friend class SubmitReductionCharm
friend class RequireReductionShared
RequireReductionShared(void)
void remoteUnregister(ReductionRegisterMsg *msg)
ReductionSetData * dataQueue