46 #ifdef TIMER_COLLECTION 50 struct TimerMicrosecond {
53 clock_gettime(CLOCK_REALTIME, &ts);
55 inline double stop() {
56 struct timespec tsend;
57 clock_gettime(CLOCK_REALTIME, &tsend);
58 return( (tsend.tv_sec - ts.tv_sec) * 1e6
59 + (tsend.tv_nsec - ts.tv_nsec) * 1e-3 );
63 #define TIMER_SLOTS 101 64 #define TIMER_SLOT_WIDTH 1 68 TimerMicrosecond tmicro;
76 #if defined(DEBUG_TIMER_COLLECTION) 79 #if defined(TIMER_HISTOGRAM) 82 int hist[TIMER_SLOTS];
85 TimerEntry() { reset(); }
88 memset(
this, 0,
sizeof(TimerEntry));
90 inline void init(
double t = TIMER_SLOT_WIDTH) {
92 #if defined(TIMER_HISTOGRAM) 94 inv_slotwidth = (slotwidth > 0 ? 1./slotwidth : 0);
99 #if defined(DEBUG_TIMER_COLLECTION) 100 tcharm = CkWallTimer();
106 tcur = tmicro.stop();
107 #if defined(DEBUG_TIMER_COLLECTION) 108 tcharm = CkWallTimer() - tcharm;
117 inline void update() {
120 double delta = tcur - tavg;
121 tavg = tavg + delta / count;
122 double delta2 = tcur - tavg;
123 tvar += delta * delta2;
124 if (tcur > tmax) tmax = tcur;
125 if (tcur < tmin) tmin = tcur;
126 #if defined(TIMER_HISTOGRAM) 127 int index = int(floor(tcur * inv_slotwidth));
128 if (index >= TIMER_SLOTS) index = TIMER_SLOTS - 1;
135 inline void finalize() {
136 if (count > 0) tvar /= count;
138 if (tmin > tmax) tmin = tmax;
156 TimerEntry t[NUMTIMERS];
157 static const char *tlabel[NUMTIMERS];
160 #define TIMER_INIT(T,TYPE) \ 162 (T).t[TimerSet::TYPE].init(); \ 165 #define TIMER_INIT_WIDTH(T,TYPE,WIDTH) \ 167 (T).t[TimerSet::TYPE].init(WIDTH); \ 170 #define TIMER_START(T,TYPE) \ 172 (T).t[TimerSet::TYPE].start(); \ 175 #if defined(DEBUG_TIMER_COLLECTION) 184 #define TIMER_STOP(T,TYPE) \ 186 (T).t[TimerSet::TYPE].stop(); \ 187 (T).t[TimerSet::TYPE].update(); \ 188 double tcur = (T).t[TimerSet::TYPE].tcur; \ 189 int count = (T).t[TimerSet::TYPE].count; \ 190 if (tcur >= 100 && patch->patchID == SPECIAL_PATCH_ID) { \ 191 printf("*** %s timing: %g count: %d line: %d charm: %g\n", \ 192 (T).tlabel[TimerSet::TYPE], tcur, count, __LINE__, \ 193 (T).t[TimerSet::TYPE].tcharm); \ 199 #define TIMER_STOP(T,TYPE) \ 201 (T).t[TimerSet::TYPE].stop(); \ 202 (T).t[TimerSet::TYPE].update(); \ 205 #endif // DEBUG_TIMER_COLLECTION 207 #define TIMER_DONE(T) \ 209 for (int i=0; i < TimerSet::NUMTIMERS; i++) { \ 210 (T).t[i].finalize(); \ 214 #if defined(TIMER_HISTOGRAM) 216 #define TIMER_REPORT(T) \ 218 printf("%13s %11s %11s %8s %8s %11s %8s\n", \ 219 "name", "avg", "std", "min", "max", "sum", "calls"); \ 220 printf("---------------------------------------------------------------" \ 221 "-------------\n"); \ 222 for (int i=0; i < TimerSet::NUMTIMERS; i++) { \ 223 printf("%13s %11g %11g %8g %8g %11g %8d\n", \ 224 (T).tlabel[i], (T).t[i].tavg, (T).t[i].tstd, \ 225 (T).t[i].tmin, (T).t[i].tmax, (T).t[i].tsum, (T).t[i].count); \ 227 printf("---------------------------------------------------------------" \ 228 "-------------\n"); \ 229 for (int i=0; i < TimerSet::NUMTIMERS; i++) { \ 230 printf("%13s %8s %8s %8s\n", \ 231 (T).tlabel[i], "slot", "time", "count"); \ 232 for (int j=0; j < TIMER_SLOTS; j++) { \ 233 printf("%13s %8d %8g %8d\n", \ 234 " ", j, (j+1)*(T).t[i].slotwidth, (T).t[i].hist[j]); \ 236 printf("---------------------------------------------------------------" \ 237 "-------------\n"); \ 241 #else // no HISTOGRAM 243 #define TIMER_REPORT(T) \ 245 printf("%13s %11s %11s %8s %8s %11s %8s\n", \ 246 "name", "avg", "std", "min", "max", "sum", "calls"); \ 247 printf("---------------------------------------------------------------" \ 248 "-------------\n"); \ 249 for (int i=0; i < TimerSet::NUMTIMERS; i++) { \ 250 printf("%13s %11g %11g %8g %8g %11g %8d\n", \ 251 (T).tlabel[i], (T).t[i].tavg, (T).t[i].tstd, \ 252 (T).t[i].tmin, (T).t[i].tmax, (T).t[i].tsum, (T).t[i].count); \ 254 printf("---------------------------------------------------------------" \ 255 "-------------\n"); \ 258 #endif // TIMER_HISTOGRAM 262 #define TIMER_INIT(T,TYPE) do { } while(0) 263 #define TIMER_INIT_WIDTH(T,TYPE,WIDTH) do{ } while(0) 264 #define TIMER_START(T,TYPE) do { } while(0) 265 #define TIMER_STOP(T,TYPE) do { } while(0) 266 #define TIMER_DONE(T) do { } while(0) 267 #define TIMER_REPORT(T) do { } while(0) 269 #endif // TIMER_COLLECTION 333 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 344 BigReal aAwayDist, bAwayDist, cAwayDist;
355 Bool isNewProxyAdded;
356 int numGBISP1Arrived, numGBISP2Arrived, numGBISP3Arrived;
357 bool phase1BoxClosedCalled;
358 bool phase2BoxClosedCalled;
359 bool phase3BoxClosedCalled;
407 const Force * __restrict force_arr,
411 #if !defined(WIN32) && !defined(WIN64) 412 __attribute__((__noinline__))
417 const Force * __restrict force_arr1,
418 const Force * __restrict force_arr2,
419 const Force * __restrict force_arr3,
425 #if !defined(WIN32) && !defined(WIN64) 426 __attribute__((__noinline__))
434 #if !defined(WIN32) && !defined(WIN64) 435 __attribute__((__noinline__))
532 #ifdef NODEAWARE_PROXY_SPANNINGTREE 534 void buildNodeAwareSpanningTree(
void);
535 void setupChildrenFromProxySpanningTree();
551 int findSubroots(
int dim,
int* subroots,
int psize,
int* pidscopy);
556 #ifdef TIMER_COLLECTION 586 int sizeCudaAtomList;
594 void sort_solvent_atoms();
598 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 599 unsigned char* soa_buffer =
nullptr;
600 size_t soa_buffer_size = 0;
610 void copy_atoms_to_SOA();
618 void calculate_derived_SOA();
624 void copy_forces_to_SOA();
629 void copy_updates_to_AOS();
630 void copy_forces_to_AOS();
635 void zero_global_forces_SOA();
642 #if NAMD_SeparateWaters != 0 645 void separateAtoms();
656 #if NAMD_SeparateWaters != 0 657 int checkpoint_numWaterAtoms;
663 Lattice doPairlistCheck_lattice;
664 BigReal doPairlistCheck_newTolerance;
680 int migrationSuspended;
681 int patchMigrationCounter;
686 #ifdef NODEAWARE_PROXY_SPANNINGTREE 702 int settle_initialized;
712 void redistrib_lonepair_forces(
const int,
Tensor *);
715 void redistrib_alchpair_forces(
const int);
719 void redistrib_tip4p_forces(
const int,
Tensor*);
724 void redistrib_swm4_forces(
const int,
Tensor*);
733 void reposition_colinear_lonepair(
741 void reposition_relative_lonepair(
748 void reposition_all_lonepairs(
void);
752 void reposition_all_alchpairs(
void);
757 void redistrib_colinear_lp_force(
765 void redistrib_relative_lp_force(
768 Tensor *virial,
int midpt);
777 void redistrib_lp_water_force(
783 void write_tip4_props(
void);
787 #if CMK_PERSISTENT_COMM 788 PersistentHandle *localphs;
void depositMigration(MigrateAtomsMsg *)
void recvCheckpointLoad(CheckpointAtomsMsg *msg)
void minimize_rattle2(const BigReal, Tensor *virial, bool forces=false)
void positionsReady_SOA(int doMigration=0)
void registerProxy(RegisterProxyMsg *)
void addForceToMomentum(FullAtom *__restrict atom_arr, const Force *__restrict force_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
int rattle1(const BigReal, Tensor *virial, SubmitReduction *)
void rattle2(const BigReal, Tensor *virial)
void recvNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *msg)
void gbisComputeAfterP2()
void receiveResults(ProxyResultVarsizeMsg *msg)
ExchangeAtomsMsg * exchange_msg
virtual void boxClosed(int)
void loweAndersenVelocities()
void recvCheckpointReq(int task, const char *key, int replica, int pe)
void exchangeCheckpoint(int scriptTask, int &bpc)
void PatchDataSOA_initialize(PatchDataSOA *p)
void positionsReady(int doMigration=0)
std::vector< RattleList > rattleList
size_t PatchDataSOA_set_size(PatchDataSOA *p, int natoms, int pad=MAXFACTOR)
int berendsenPressure_count
void doGroupSizeCheck_SOA()
void positionsReady_GPU(int doMigration=0, int startup=0)
int rattle1_SOA(const BigReal, Tensor *virial, SubmitReduction *)
void updateAtomCount(const int n, const int reallocate)
FullAtomList & getAtomList()
void unregisterProxy(UnregisterProxyMsg *)
ResizeArray< FullAtom > atoms
std::map< std::string, checkpoint_t * > checkpoints
void submitLoadStats(int timestep)
void mollyMollify(Tensor *virial)
int rattle1old(const BigReal, Tensor *virial, SubmitReduction *)
void recvSpanningTree(int *t, int n)
void replaceForces(ExtForce *f)
std::vector< int > settleList
void gbisComputeAfterP1()
void sendNodeAwareSpanningTree()
MigrateAtomsMsg * msgbuf[PatchMap::MaxOneAway]
int hardWallDrude(const BigReal, Tensor *virial, SubmitReduction *)
void setGBISIntrinsicRadii()
void saveForce(const int ftag=Results::normal)
std::vector< Vector > posNew
void buildRattleList_SOA()
std::vector< RattleParam > rattleParam
friend class SequencerCUDA
void PatchDataSOA_set_buffer(PatchDataSOA *p, void *mybuffer)
pad length of arrays up to this next multiple
void recvExchangeReq(int req)
void buildSpanningTree(void)
std::vector< Vector > velNew
void receiveResult(ProxyGBISP1ResultMsg *msg)
void addForceToMomentum3(FullAtom *__restrict atom_arr, const Force *__restrict force_arr1, const Force *__restrict force_arr2, const Force *__restrict force_arr3, const BigReal dt1, const BigReal dt2, const BigReal dt3, int num_atoms) __attribute__((__noinline__))
void addVelocityToPosition(FullAtom *__restrict atom_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
std::vector< int > noconstList
ForceList f[Results::maxNumForces]
void loweAndersenFinish()
void pad(char *s, int len)
void addRattleForce(const BigReal invdt, Tensor &wc)
void useSequencer(Sequencer *sequencerPtr)
void recvCheckpointStore(CheckpointAtomsMsg *msg)
void recvExchangeMsg(ExchangeAtomsMsg *msg)
void exchangeAtoms(int scriptTask)