47 #ifdef TIMER_COLLECTION
51 struct TimerMicrosecond {
54 clock_gettime(CLOCK_REALTIME, &ts);
56 inline double stop() {
57 struct timespec tsend;
58 clock_gettime(CLOCK_REALTIME, &tsend);
59 return( (tsend.tv_sec - ts.tv_sec) * 1e6
60 + (tsend.tv_nsec - ts.tv_nsec) * 1e-3 );
64 #define TIMER_SLOTS 101
65 #define TIMER_SLOT_WIDTH 1
69 TimerMicrosecond tmicro;
77 #if defined(DEBUG_TIMER_COLLECTION)
80 #if defined(TIMER_HISTOGRAM)
83 int hist[TIMER_SLOTS];
86 TimerEntry() { reset(); }
89 memset(
this, 0,
sizeof(TimerEntry));
91 inline void init(
double t = TIMER_SLOT_WIDTH) {
93 #if defined(TIMER_HISTOGRAM)
95 inv_slotwidth = (slotwidth > 0 ? 1./slotwidth : 0);
100 #if defined(DEBUG_TIMER_COLLECTION)
101 tcharm = CkWallTimer();
107 tcur = tmicro.stop();
108 #if defined(DEBUG_TIMER_COLLECTION)
109 tcharm = CkWallTimer() - tcharm;
118 inline void update() {
121 double delta = tcur - tavg;
122 tavg = tavg + delta / count;
123 double delta2 = tcur - tavg;
124 tvar += delta * delta2;
125 if (tcur > tmax) tmax = tcur;
126 if (tcur < tmin) tmin = tcur;
127 #if defined(TIMER_HISTOGRAM)
128 int index = int(floor(tcur * inv_slotwidth));
129 if (index >= TIMER_SLOTS) index = TIMER_SLOTS - 1;
136 inline void finalize() {
137 if (count > 0) tvar /= count;
139 if (tmin > tmax) tmin = tmax;
157 TimerEntry t[NUMTIMERS];
158 static const char *tlabel[NUMTIMERS];
161 #define TIMER_INIT(T,TYPE) \
163 (T).t[TimerSet::TYPE].init(); \
166 #define TIMER_INIT_WIDTH(T,TYPE,WIDTH) \
168 (T).t[TimerSet::TYPE].init(WIDTH); \
171 #define TIMER_START(T,TYPE) \
173 (T).t[TimerSet::TYPE].start(); \
176 #if defined(DEBUG_TIMER_COLLECTION)
185 #define TIMER_STOP(T,TYPE) \
187 (T).t[TimerSet::TYPE].stop(); \
188 (T).t[TimerSet::TYPE].update(); \
189 double tcur = (T).t[TimerSet::TYPE].tcur; \
190 int count = (T).t[TimerSet::TYPE].count; \
191 if (tcur >= 100 && patch->patchID == SPECIAL_PATCH_ID) { \
192 printf("*** %s timing: %g count: %d line: %d charm: %g\n", \
193 (T).tlabel[TimerSet::TYPE], tcur, count, __LINE__, \
194 (T).t[TimerSet::TYPE].tcharm); \
200 #define TIMER_STOP(T,TYPE) \
202 (T).t[TimerSet::TYPE].stop(); \
203 (T).t[TimerSet::TYPE].update(); \
206 #endif // DEBUG_TIMER_COLLECTION
208 #define TIMER_DONE(T) \
210 for (int i=0; i < TimerSet::NUMTIMERS; i++) { \
211 (T).t[i].finalize(); \
215 #if defined(TIMER_HISTOGRAM)
217 #define TIMER_REPORT(T) \
219 printf("%13s %11s %11s %8s %8s %11s %8s\n", \
220 "name", "avg", "std", "min", "max", "sum", "calls"); \
221 printf("---------------------------------------------------------------" \
222 "-------------\n"); \
223 for (int i=0; i < TimerSet::NUMTIMERS; i++) { \
224 printf("%13s %11g %11g %8g %8g %11g %8d\n", \
225 (T).tlabel[i], (T).t[i].tavg, (T).t[i].tstd, \
226 (T).t[i].tmin, (T).t[i].tmax, (T).t[i].tsum, (T).t[i].count); \
228 printf("---------------------------------------------------------------" \
229 "-------------\n"); \
230 for (int i=0; i < TimerSet::NUMTIMERS; i++) { \
231 printf("%13s %8s %8s %8s\n", \
232 (T).tlabel[i], "slot", "time", "count"); \
233 for (int j=0; j < TIMER_SLOTS; j++) { \
234 printf("%13s %8d %8g %8d\n", \
235 " ", j, (j+1)*(T).t[i].slotwidth, (T).t[i].hist[j]); \
237 printf("---------------------------------------------------------------" \
238 "-------------\n"); \
242 #else // no HISTOGRAM
244 #define TIMER_REPORT(T) \
246 printf("%13s %11s %11s %8s %8s %11s %8s\n", \
247 "name", "avg", "std", "min", "max", "sum", "calls"); \
248 printf("---------------------------------------------------------------" \
249 "-------------\n"); \
250 for (int i=0; i < TimerSet::NUMTIMERS; i++) { \
251 printf("%13s %11g %11g %8g %8g %11g %8d\n", \
252 (T).tlabel[i], (T).t[i].tavg, (T).t[i].tstd, \
253 (T).t[i].tmin, (T).t[i].tmax, (T).t[i].tsum, (T).t[i].count); \
255 printf("---------------------------------------------------------------" \
256 "-------------\n"); \
259 #endif // TIMER_HISTOGRAM
263 #define TIMER_INIT(T,TYPE) do { } while(0)
264 #define TIMER_INIT_WIDTH(T,TYPE,WIDTH) do{ } while(0)
265 #define TIMER_START(T,TYPE) do { } while(0)
266 #define TIMER_STOP(T,TYPE) do { } while(0)
267 #define TIMER_DONE(T) do { } while(0)
268 #define TIMER_REPORT(T) do { } while(0)
270 #endif // TIMER_COLLECTION
284 BigReal aAwayDist, bAwayDist, cAwayDist;
295 Bool isNewProxyAdded;
296 int numGBISP1Arrived, numGBISP2Arrived, numGBISP3Arrived;
297 bool phase1BoxClosedCalled;
298 bool phase2BoxClosedCalled;
299 bool phase3BoxClosedCalled;
339 const Force * __restrict force_arr,
343 #if !defined(WIN32) && !defined(WIN64)
344 __attribute__((__noinline__))
349 const Force * __restrict force_arr1,
350 const Force * __restrict force_arr2,
351 const Force * __restrict force_arr3,
357 #if !defined(WIN32) && !defined(WIN64)
358 __attribute__((__noinline__))
366 #if !defined(WIN32) && !defined(WIN64)
367 __attribute__((__noinline__))
457 #ifdef NODEAWARE_PROXY_SPANNINGTREE
459 void buildNodeAwareSpanningTree(
void);
460 void setupChildrenFromProxySpanningTree();
476 int findSubroots(
int dim,
int* subroots,
int psize,
int* pidscopy);
481 #ifdef TIMER_COLLECTION
505 #if NAMD_SeparateWaters != 0
508 void separateAtoms();
519 #if NAMD_SeparateWaters != 0
520 int checkpoint_numWaterAtoms;
526 Lattice doPairlistCheck_lattice;
527 BigReal doPairlistCheck_newTolerance;
543 int migrationSuspended;
544 int patchMigrationCounter;
549 #ifdef NODEAWARE_PROXY_SPANNINGTREE
565 int settle_initialized;
573 void redistrib_lonepair_forces(
const int,
Tensor *);
576 void redistrib_alchpair_forces(
const int);
580 void redistrib_tip4p_forces(
const int,
Tensor*);
585 void redistrib_swm4_forces(
const int,
Tensor*);
594 void reposition_colinear_lonepair(
602 void reposition_relative_lonepair(
609 void reposition_all_lonepairs(
void);
613 void reposition_all_alchpairs(
void);
618 void redistrib_colinear_lp_force(
626 void redistrib_relative_lp_force(
629 Tensor *virial,
int midpt);
638 void redistrib_lp_water_force(
644 void write_tip4_props(
void);
648 #if CMK_PERSISTENT_COMM
649 PersistentHandle *localphs;
void depositMigration(MigrateAtomsMsg *)
void recvCheckpointLoad(CheckpointAtomsMsg *msg)
void addVelocityToPosition(FullAtom *__restrict atom_arr, const BigReal dt, int num_atoms)
void minimize_rattle2(const BigReal, Tensor *virial, bool forces=false)
void registerProxy(RegisterProxyMsg *)
int rattle1(const BigReal, Tensor *virial, SubmitReduction *)
void addForceToMomentum(FullAtom *__restrict atom_arr, const Force *__restrict force_arr, const BigReal dt, int num_atoms)
void rattle2(const BigReal, Tensor *virial)
static __thread float4 * forces
void recvNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *msg)
void gbisComputeAfterP2()
void receiveResults(ProxyResultVarsizeMsg *msg)
ExchangeAtomsMsg * exchange_msg
virtual void boxClosed(int)
void loweAndersenVelocities()
void recvCheckpointReq(int task, const char *key, int replica, int pe)
void exchangeCheckpoint(int scriptTask, int &bpc)
void positionsReady(int doMigration=0)
std::vector< RattleList > rattleList
int berendsenPressure_count
FullAtomList & getAtomList()
void unregisterProxy(UnregisterProxyMsg *)
ResizeArray< FullAtom > atoms
std::map< std::string, checkpoint_t * > checkpoints
void submitLoadStats(int timestep)
void mollyMollify(Tensor *virial)
int rattle1old(const BigReal, Tensor *virial, SubmitReduction *)
void recvSpanningTree(int *t, int n)
void replaceForces(ExtForce *f)
std::vector< int > settleList
void gbisComputeAfterP1()
void sendNodeAwareSpanningTree()
MigrateAtomsMsg * msgbuf[PatchMap::MaxOneAway]
int hardWallDrude(const BigReal, Tensor *virial, SubmitReduction *)
void setGBISIntrinsicRadii()
void saveForce(const int ftag=Results::normal)
std::vector< Vector > posNew
std::vector< RattleParam > rattleParam
void recvExchangeReq(int req)
void buildSpanningTree(void)
std::vector< Vector > velNew
void receiveResult(ProxyGBISP1ResultMsg *msg)
void addForceToMomentum3(FullAtom *__restrict atom_arr, const Force *__restrict force_arr1, const Force *__restrict force_arr2, const Force *__restrict force_arr3, const BigReal dt1, const BigReal dt2, const BigReal dt3, int num_atoms)
std::vector< int > noconstList
ForceList f[Results::maxNumForces]
void loweAndersenFinish()
void addRattleForce(const BigReal invdt, Tensor &wc)
static __thread int num_atoms
void useSequencer(Sequencer *sequencerPtr)
void recvCheckpointStore(CheckpointAtomsMsg *msg)
void recvExchangeMsg(ExchangeAtomsMsg *msg)
void exchangeAtoms(int scriptTask)