18 #include "ComputeMgr.decl.h" 32 #define MSM_REDUCE_GRID 36 #define MSM_GRID_CUTOFF_DECOMP 40 #define MSM_SKIP_TOO_DISTANT_BLOCKS 46 #define MSM_SKIP_BEYOND_SPHERE 50 #define MSM_NODE_MAPPING 53 #define MSM_NODE_MAPPING_STATS 54 #undef MSM_NODE_MAPPING_STATS 61 #define MSM_FOLD_FACTOR 76 #define MSM_FIXED_SIZE_GRID_MSG 77 #undef MSM_FIXED_SIZE_GRID_MSG 85 #define DEBUG_MEMORY_ALIGNMENT 86 #undef DEBUG_MEMORY_ALIGNMENT 111 class GridMsg :
public CkMcastBaseMsg,
public CMessage_GridMsg {
162 NAMD_die(
"Exceeded maximum number of MSM levels\n");
185 NAMD_die(
"Exceeded maximum number of MSM levels\n");
188 nlevels*
sizeof(CProxy_MsmC1HermiteBlock));
195 nlevels*
sizeof(CProxy_MsmC1HermiteBlock));
205 void put(
const CProxy_MsmGridCutoff *p) {
210 void get(CProxy_MsmGridCutoff *p) {
217 public CMessage_MsmC1HermiteGridCutoffProxyMsg
223 void put(
const CProxy_MsmC1HermiteGridCutoff *p) {
225 sizeof(CProxy_MsmC1HermiteGridCutoff));
229 void get(CProxy_MsmC1HermiteGridCutoff *p) {
231 sizeof(CProxy_MsmC1HermiteGridCutoff));
246 public CkMcastBaseMsg,
public CMessage_MsmGridCutoffSetupMsg
253 const CProxyElement_MsmBlock *q
260 CProxyElement_MsmBlock *q
268 public CkMcastBaseMsg,
public CMessage_MsmC1HermiteGridCutoffSetupMsg
275 const CProxyElement_MsmC1HermiteBlock *q
278 sizeof(CProxyElement_MsmC1HermiteBlock));
283 CProxyElement_MsmC1HermiteBlock *q
286 sizeof(CProxyElement_MsmC1HermiteBlock));
297 for (
int i = 0; i <
MAX; i++)
timing[i] = 0;
299 void done(
double tm[],
int n) {
300 for (
int i = 0; i <
MAX; i++)
timing[i] = tm[i];
304 CkPrintf(
"MSM timings:\n");
310 CkPrintf(
" communication %8.6f sec\n",
timing[
COMM]);
332 CkPrintf(
"MSM profiling:\n");
333 CkPrintf(
" total executions of inner loop: %d\n", sum);
334 for (
int i = 0; i <
MAX; i++) {
335 CkPrintf(
" executing %d times: %d (%5.2f%%)\n",
407 void subtractTiming() {
411 if (++cntTiming >= numTiming) {
412 CkCallback cb(CkReductionTarget(
MsmTimer, done), msmTimer);
414 CkReduction::sum_double, cb);
421 void initProfiling() {
426 void addProfiling() {
430 void subtractProfiling() {
433 void doneProfiling() {
434 if (++cntProfiling >= numProfiling) {
435 CkCallback cb(CkReductionTarget(
MsmProfiler, done), msmProfiler);
437 CkReduction::sum_int, cb);
452 static inline int sign(
int n) {
453 return (n < 0 ? -1 : (n > 0 ? 1 : 0));
458 int& ia,
int& ib,
int isperiodic);
483 #ifdef MSM_NODE_MAPPING 489 for (
int l = 0; l < level; l++) {
496 const float scalingFactor = 3;
503 gn =
map.
gc[0].extent();
505 const int volumeFullCutoff = (
map.
bsx[0] + gn.
i - 1) *
508 int volumeBlock = n.
i * n.
j * n.
k;
510 int volumeCutoff = nc.
i * nc.
j * nc.
k;
511 return( scalingFactor * (
float(volumeBlock) / volumeFullBlock) *
512 (
float(volumeCutoff) / volumeFullCutoff) );
517 int volumeBlock = n.
i * n.
j * n.
k;
518 return(
float(volumeBlock) / volumeFullBlock );
542 for (
int n = 0; n <
VMAX; n++) {
virial[n] = 0; }
549 for (
int k = ka; k <= kb; k++) {
550 for (
int j = ja; j <= jb; j++) {
551 for (
int i = ia; i <= ib; i++) {
573 CProxy_MsmTimer msmTimer;
577 CkCallback *cbTiming;
581 CProxy_MsmProfiler msmProfiler;
585 CkCallback *cbProfiling;
671 g = 1 + (s-1)*(-1./2 + (s-1)*(3./8));
672 dg = (2*r_a)*(-1./2 + (s-1)*(3./4));
675 g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16)));
676 dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16)));
679 g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
680 + (s-1)*(35./128))));
681 dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
685 g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
686 + (s-1)*(35./128 + (s-1)*(-63./256)))));
687 dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
688 + (s-1)*(35./32 + (s-1)*(-315./256)))));
691 g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
692 + (s-1)*(35./128 + (s-1)*(-63./256
693 + (s-1)*(231./1024))))));
694 dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
695 + (s-1)*(35./32 + (s-1)*(-315./256
696 + (s-1)*(693./512))))));
699 g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
700 + (s-1)*(35./128 + (s-1)*(-63./256
701 + (s-1)*(231./1024 + (s-1)*(-429./2048)))))));
702 dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
703 + (s-1)*(35./32 + (s-1)*(-315./256
704 + (s-1)*(693./512 + (s-1)*(-3003./2048)))))));
707 g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
708 + (s-1)*(35./128 + (s-1)*(-63./256
709 + (s-1)*(231./1024 + (s-1)*(-429./2048
710 + (s-1)*(6435./32768))))))));
711 dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
712 + (s-1)*(35./32 + (s-1)*(-315./256
713 + (s-1)*(693./512 + (s-1)*(-3003./2048
714 + (s-1)*(6435./4096))))))));
717 g = 1 + (s-1)*(-3 + (s-1)*(6));
718 dg = (2*r_a)*(-3 + (s-1)*(12));
721 g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10)));
722 dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30)));
725 g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10 + (s-1)*(15))));
726 dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30 + (s-1)*(60))));
729 g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10
730 + (s-1)*(15 + (s-1)*(-21)))));
731 dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30
732 + (s-1)*(60 + (s-1)*(-105)))));
735 g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10
736 + (s-1)*(15 + (s-1)*(-21 + (s-1)*(28))))));
737 dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30
738 + (s-1)*(60 + (s-1)*(-105 + (s-1)*(168))))));
741 g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10
742 + (s-1)*(15 + (s-1)*(-21 + (s-1)*(28
744 dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30
745 + (s-1)*(60 + (s-1)*(-105 + (s-1)*(168
746 + (s-1)*(-252)))))));
749 g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10
750 + (s-1)*(15 + (s-1)*(-21 + (s-1)*(28
751 + (s-1)*(-36 + (s-1)*(45))))))));
752 dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30
753 + (s-1)*(60 + (s-1)*(-105 + (s-1)*(168
754 + (s-1)*(-252 + (s-1)*(360))))))));
764 phi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t);
766 phi[1] = (1 - t) * (1 + t - 1.5f * t * t);
768 phi[2] = (1 + t) * (1 - t - 1.5f * t * t);
770 phi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t);
773 phi[0] = (1.f/24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t);
775 phi[1] = (1-t) * (2-t) * (3-t) * ((1.f/6)
776 + t * (0.375f - (5.f/24)*t));
778 phi[2] = (1-t*t) * (2-t) * (0.5f + t * (0.25f - (5.f/12)*t));
780 phi[3] = (1-t*t) * (2+t) * (0.5f - t * (0.25f + (5.f/12)*t));
782 phi[4] = (1+t) * (2+t) * (3+t) * ((1.f/6)
783 - t * (0.375f + (5.f/24)*t));
785 phi[5] = (1.f/24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t);
788 phi[0] = (1.f/24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8);
790 phi[1] = (-1.f/24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25)));
792 phi[2] = (1.f/12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25))));
794 phi[3] = (1.f/12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25))));
796 phi[4] = (-1.f/24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25)));
798 phi[5] = (1.f/24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8);
801 phi[0] = (-1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6);
803 phi[1] = (1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t));
805 phi[2] = (-1.f/240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t));
807 phi[3] = (1.f/144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t));
809 phi[4] = (-1.f/144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t));
811 phi[5] = (1.f/240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t));
813 phi[6] = (-1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t));
815 phi[7] = (1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6);
818 phi[0] = (3632.f/5) + t*((-7456.f/5) + t*((58786.f/45) + t*(-633
819 + t*((26383.f/144) + t*((-22807.f/720) + t*((727.f/240)
820 + t*(-89.f/720)))))));
822 phi[1] = -440 + t*((25949.f/20) + t*((-117131.f/72) + t*((2247.f/2)
823 + t*((-66437.f/144) + t*((81109.f/720) + t*((-727.f/48)
824 + t*(623.f/720)))))));
826 phi[2] = (138.f/5) + t*((-8617.f/60) + t*((12873.f/40) + t*((-791.f/2)
827 + t*((4557.f/16) + t*((-9583.f/80) + t*((2181.f/80)
828 + t*(-623.f/240)))))));
830 phi[3] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((2569.f/144)
831 + t*((-727.f/48) + t*(623.f/144)))));
833 phi[4] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((-2569.f/144)
834 + t*((-727.f/48) + t*(-623.f/144)))));
836 phi[5] = (138.f/5) + t*((8617.f/60) + t*((12873.f/40) + t*((791.f/2)
837 + t*((4557.f/16) + t*((9583.f/80) + t*((2181.f/80)
838 + t*(623.f/240)))))));
840 phi[6] = -440 + t*((-25949.f/20) + t*((-117131.f/72) + t*((-2247.f/2)
841 + t*((-66437.f/144) + t*((-81109.f/720) + t*((-727.f/48)
842 + t*(-623.f/720)))))));
844 phi[7] = (3632.f/5) + t*((7456.f/5) + t*((58786.f/45) + t*(633
845 + t*((26383.f/144) + t*((22807.f/720) + t*((727.f/240)
846 + t*(89.f/720)))))));
849 phi[0] = (-1.f/40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)*
852 phi[1] = (1.f/40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*
855 phi[2] = (-1.f/10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*
858 phi[3] = (1.f/1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*
861 phi[4] = (-1.f/2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*
864 phi[5] = (1.f/2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*
867 phi[6] = (-1.f/1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*
870 phi[7] = (1.f/10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*
873 phi[8] = (-1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)*
876 phi[9] = (1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)*
883 Tphi[0] = 439375./7+T*(-64188125./504+T*(231125375./2016
884 +T*(-17306975./288+T*(7761805./384+T*(-2895587./640
885 +T*(129391./192+T*(-259715./4032+T*(28909./8064
886 +T*(-3569./40320)))))))));
888 Tphi[1] = -56375+T*(8314091./56+T*(-49901303./288+T*(3763529./32
889 +T*(-19648027./384+T*(9469163./640+T*(-545977./192
890 +T*(156927./448+T*(-28909./1152
891 +T*(3569./4480)))))))));
893 Tphi[2] = 68776./7+T*(-1038011./28+T*(31157515./504+T*(-956669./16
894 +T*(3548009./96+T*(-2422263./160+T*(197255./48
895 +T*(-19959./28+T*(144545./2016
896 +T*(-3569./1120)))))))));
898 Tphi[3] = -154+T*(12757./12+T*(-230123./72+T*(264481./48
899 +T*(-576499./96+T*(686147./160+T*(-96277./48
900 +T*(14221./24+T*(-28909./288+T*(3569./480)))))))));
902 Tphi[4] = 1+T*T*(-205./144+T*T*(91./192+T*(-6181./320
903 +T*(6337./96+T*(-2745./32+T*(28909./576
904 +T*(-3569./320)))))));
906 Tphi[5] = 1+T*T*(-205./144+T*T*(91./192+T*(6181./320
907 +T*(6337./96+T*(2745./32+T*(28909./576
908 +T*(3569./320)))))));
910 Tphi[6] = -154+T*(-12757./12+T*(-230123./72+T*(-264481./48
911 +T*(-576499./96+T*(-686147./160+T*(-96277./48
912 +T*(-14221./24+T*(-28909./288+T*(-3569./480)))))))));
914 Tphi[7] = 68776./7+T*(1038011./28+T*(31157515./504+T*(956669./16
915 +T*(3548009./96+T*(2422263./160+T*(197255./48
916 +T*(19959./28+T*(144545./2016+T*(3569./1120)))))))));
918 Tphi[8] = -56375+T*(-8314091./56+T*(-49901303./288+T*(-3763529./32
919 +T*(-19648027./384+T*(-9469163./640+T*(-545977./192
920 +T*(-156927./448+T*(-28909./1152
921 +T*(-3569./4480)))))))));
923 Tphi[9] = 439375./7+T*(64188125./504+T*(231125375./2016
924 +T*(17306975./288+T*(7761805./384+T*(2895587./640
925 +T*(129391./192+T*(259715./4032+T*(28909./8064
926 +T*(3569./40320)))))))));
927 for (
int i=0; i < 10; i++) {
928 phi[i] =
Float(Tphi[i]);
933 NAMD_die(
"Unknown MSM approximation.");
940 phi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t);
941 dphi[0] = (1.5f * t - 2) * (2 - t) * h_1;
943 phi[1] = (1 - t) * (1 + t - 1.5f * t * t);
944 dphi[1] = (-5 + 4.5f * t) * t * h_1;
946 phi[2] = (1 + t) * (1 - t - 1.5f * t * t);
947 dphi[2] = (-5 - 4.5f * t) * t * h_1;
949 phi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t);
950 dphi[3] = (1.5f * t + 2) * (2 + t) * h_1;
953 phi[0] = (1.f/24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t);
954 dphi[0] = ((-1.f/24) * ((3-t) * (3-t) * (14 + t * (-14 + 3*t))
955 + 2 * (1-t) * (2-t) * (3-t) * (4-t))) * h_1;
957 phi[1] = (1-t) * (2-t) * (3-t) * ((1.f/6)
958 + t * (0.375f - (5.f/24)*t));
959 dphi[1] = (-((1.f/6) + t * (0.375f - (5.f/24)*t)) *
960 (11 + t * (-12 + 3*t)) + (1-t) * (2-t) * (3-t) *
961 (0.375f - (5.f/12)*t)) * h_1;
963 phi[2] = (1-t*t) * (2-t) * (0.5f + t * (0.25f - (5.f/12)*t));
964 dphi[2] = (-(0.5f + t * (0.25f - (5.f/12)*t)) * (1 + t * (4 - 3*t))
965 + (1-t*t) * (2-t) * (0.25f - (5.f/6)*t)) * h_1;
967 phi[3] = (1-t*t) * (2+t) * (0.5f - t * (0.25f + (5.f/12)*t));
968 dphi[3] = ((0.5f + t * (-0.25f - (5.f/12)*t)) * (1 + t * (-4 - 3*t))
969 - (1-t*t) * (2+t) * (0.25f + (5.f/6)*t)) * h_1;
971 phi[4] = (1+t) * (2+t) * (3+t) * ((1.f/6)
972 - t * (0.375f + (5.f/24)*t));
973 dphi[4] = (((1.f/6) + t * (-0.375f - (5.f/24)*t)) *
974 (11 + t * (12 + 3*t)) - (1+t) * (2+t) * (3+t) *
975 (0.375f + (5.f/12)*t)) * h_1;
977 phi[5] = (1.f/24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t);
978 dphi[5] = ((1.f/24) * ((3+t) * (3+t) * (14 + t * (14 + 3*t))
979 + 2 * (1+t) * (2+t) * (3+t) * (4+t))) * h_1;
982 phi[0] = (1.f/24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8);
983 dphi[0] = ((1.f/24) * (3-t) * (3-t) * ((3-t)*(5*t-8)
984 - 3*(t-2)*(5*t-8) + 5*(t-2)*(3-t))) * h_1;
986 phi[1] = (-1.f/24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25)));
987 dphi[1] = ((-1.f/24) * ((2-t)*(-48+t*(153+t*(-114+t*25)))
988 - (t-1)* (-48+t*(153+t*(-114+t*25)))
989 + (2-t)*(t-1)*(153+t*(-228+t*75)))) * h_1;
991 phi[2] = (1.f/12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25))));
992 dphi[2] = ((1.f/12) * (-(12+t*(12+t*(-3+t*(-38+t*25))))
993 + (1-t)*(12+t*(-6+t*(-114+t*100))))) * h_1;
995 phi[3] = (1.f/12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25))));
996 dphi[3] = ((1.f/12) * ((12+t*(-12+t*(-3+t*(38+t*25))))
997 + (1+t)*(-12+t*(-6+t*(114+t*100))))) * h_1;
999 phi[4] = (-1.f/24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25)));
1000 dphi[4] = ((-1.f/24) * ((2+t)*(48+t*(153+t*(114+t*25)))
1001 + (t+1)* (48+t*(153+t*(114+t*25)))
1002 + (2+t)*(t+1)*(153+t*(228+t*75)))) * h_1;
1004 phi[5] = (1.f/24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8);
1005 dphi[5] = ((1.f/24) * (3+t) * (3+t) * ((3+t)*(5*t+8)
1006 + 3*(t+2)*(5*t+8) + 5*(t+2)*(3+t))) * h_1;
1009 phi[0] = (-1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6);
1010 dphi[0] = (-1.f/720)*(t-4)*(-1944+t*(3644+t*(-2512+t*(807
1011 +t*(-122+t*7))))) * h_1;
1013 phi[1] = (1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t));
1014 dphi[1] = (1.f/720)*(756+t*(-9940+t*(17724+t*(-12740+t*(4445
1015 +t*(-750+t*49)))))) * h_1;
1017 phi[2] = (-1.f/240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t));
1018 dphi[2] = (-1.f/240)*(-28+t*(1260+t*(-756+t*(-1260+t*(1365
1019 +t*(-450+t*49)))))) * h_1;
1021 phi[3] = (1.f/144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t));
1022 dphi[3] = (1.f/144)*t*(-560+t*(84+t*(644+t*(-175
1023 +t*(-150+t*49))))) * h_1;
1025 phi[4] = (-1.f/144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t));
1026 dphi[4] = (-1.f/144)*t*(560+t*(84+t*(-644+t*(-175
1027 +t*(150+t*49))))) * h_1;
1029 phi[5] = (1.f/240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t));
1030 dphi[5] = (1.f/240)*(-28+t*(-1260+t*(-756+t*(1260+t*(1365
1031 +t*(450+t*49)))))) * h_1;
1033 phi[6] = (-1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t));
1034 dphi[6] = (-1.f/720)*(756+t*(9940+t*(17724+t*(12740+t*(4445
1035 +t*(750+t*49)))))) * h_1;
1037 phi[7] = (1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6);
1038 dphi[7] = (1.f/720)*(t+4)*(1944+t*(3644+t*(2512+t*(807
1039 +t*(122+t*7))))) * h_1;
1042 phi[0] = (3632.f/5) + t*((-7456.f/5) + t*((58786.f/45) + t*(-633
1043 + t*((26383.f/144) + t*((-22807.f/720) + t*((727.f/240)
1044 + t*(-89.f/720)))))));
1045 dphi[0] = ((-7456.f/5) + t*((117572.f/45) + t*(-1899
1046 + t*((26383.f/36) + t*((-22807.f/144) + t*((727.f/40)
1047 + t*(-623.f/720))))))) * h_1;
1049 phi[1] = -440 + t*((25949.f/20) + t*((-117131.f/72) + t*((2247.f/2)
1050 + t*((-66437.f/144) + t*((81109.f/720) + t*((-727.f/48)
1051 + t*(623.f/720)))))));
1052 dphi[1] = ((25949.f/20) + t*((-117131.f/36) + t*((6741.f/2)
1053 + t*((-66437.f/36) + t*((81109.f/144) + t*((-727.f/8)
1054 + t*(4361.f/720))))))) * h_1;
1056 phi[2] = (138.f/5) + t*((-8617.f/60) + t*((12873.f/40) + t*((-791.f/2)
1057 + t*((4557.f/16) + t*((-9583.f/80) + t*((2181.f/80)
1058 + t*(-623.f/240)))))));
1059 dphi[2] = ((-8617.f/60) + t*((12873.f/20) + t*((-2373.f/2)
1060 + t*((4557.f/4) + t*((-9583.f/16) + t*((6543.f/40)
1061 + t*(-4361.f/240))))))) * h_1;
1063 phi[3] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((2569.f/144)
1064 + t*((-727.f/48) + t*(623.f/144)))));
1065 dphi[3] = (t*((-49.f/18) + t*t*((-959.f/36) + t*((12845.f/144)
1066 + t*((-727.f/8) + t*(4361.f/144)))))) * h_1;
1068 phi[4] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((-2569.f/144)
1069 + t*((-727.f/48) + t*(-623.f/144)))));
1070 dphi[4] = (t*((-49.f/18) + t*t*((-959.f/36) + t*((-12845.f/144)
1071 + t*((-727.f/8) + t*(-4361.f/144)))))) * h_1;
1073 phi[5] = (138.f/5) + t*((8617.f/60) + t*((12873.f/40) + t*((791.f/2)
1074 + t*((4557.f/16) + t*((9583.f/80) + t*((2181.f/80)
1075 + t*(623.f/240)))))));
1076 dphi[5] = ((8617.f/60) + t*((12873.f/20) + t*((2373.f/2)
1077 + t*((4557.f/4) + t*((9583.f/16) + t*((6543.f/40)
1078 + t*(4361.f/240))))))) * h_1;
1080 phi[6] = -440 + t*((-25949.f/20) + t*((-117131.f/72) + t*((-2247.f/2)
1081 + t*((-66437.f/144) + t*((-81109.f/720) + t*((-727.f/48)
1082 + t*(-623.f/720)))))));
1083 dphi[6] = ((-25949.f/20) + t*((-117131.f/36) + t*((-6741.f/2)
1084 + t*((-66437.f/36) + t*((-81109.f/144) + t*((-727.f/8)
1085 + t*(-4361.f/720))))))) * h_1;
1087 phi[7] = (3632.f/5) + t*((7456.f/5) + t*((58786.f/45) + t*(633
1088 + t*((26383.f/144) + t*((22807.f/720) + t*((727.f/240)
1089 + t*(89.f/720)))))));
1090 dphi[7] = ((7456.f/5) + t*((117572.f/45) + t*(1899
1091 + t*((26383.f/36) + t*((22807.f/144) + t*((727.f/40)
1092 + t*(623.f/720))))))) * h_1;
1095 phi[0] = (-1.f/40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)*
1097 dphi[0] = (-1.f/40320)*(t-5)*(-117648+t*(256552+t*(-221416
1098 +t*(99340+t*(-25261+t*(3667+t*(-283+t*9)))))))*h_1;
1100 phi[1] = (1.f/40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*
1102 dphi[1] = (1.f/40320)*(71856+t*(-795368+t*(1569240+t*(-1357692
1103 +t*(634725+t*(-172116+t*(27090+t*(-2296+t*81))))))))*h_1;
1105 phi[2] = (-1.f/10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*
1107 dphi[2] = (1.f/10080)*(3384+t*(-69080+t*(55026
1108 +t*(62580+t*(-99225+t*(51660+t*(-13104+t*(1640
1109 +t*(-81)))))))))*h_1;
1111 phi[3] = (1.f/1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*
1113 dphi[3] = (1.f/1440)*(72+t*(-6344+t*(2070
1114 +t*(7644+t*(-4725+t*(-828+t*(1260+t*(-328+t*27))))))))*h_1;
1116 phi[4] = (-1.f/2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*
1118 dphi[4] = (-1.f/2880)*t*(10792+t*(-972+t*(-12516
1119 +t*(2205+t*(3924+t*(-882+t*(-328+t*81)))))))*h_1;
1121 phi[5] = (1.f/2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*
1123 dphi[5] = (1.f/2880)*t*(-10792+t*(-972+t*(12516
1124 +t*(2205+t*(-3924+t*(-882+t*(328+t*81)))))))*h_1;
1126 phi[6] = (-1.f/1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*
1128 dphi[6] = (1.f/1440)*(-72+t*(-6344+t*(-2070
1129 +t*(7644+t*(4725+t*(-828+t*(-1260+t*(-328+t*(-27)))))))))*h_1;
1131 phi[7] = (1.f/10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*
1133 dphi[7] = (1.f/10080)*(-3384+t*(-69080+t*(-55026
1134 +t*(62580+t*(99225+t*(51660+t*(13104+t*(1640+t*81))))))))*h_1;
1136 phi[8] = (-1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)*
1138 dphi[8] = (-1.f/40320)*(71856+t*(795368+t*(1569240
1139 +t*(1357692+t*(634725+t*(172116+t*(27090+t*(2296
1142 phi[9] = (1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)*
1144 dphi[9] = (1.f/40320)*(t+5)*(117648+t*(256552+t*(221416
1145 +t*(99340+t*(25261+t*(3667+t*(283+t*9)))))))*h_1;
1149 double Tphi[10], Tdphi[10];
1151 Tphi[0] = 439375./7+T*(-64188125./504+T*(231125375./2016
1152 +T*(-17306975./288+T*(7761805./384+T*(-2895587./640
1153 +T*(129391./192+T*(-259715./4032+T*(28909./8064
1154 +T*(-3569./40320)))))))));
1155 Tdphi[0] = (-64188125./504+T*(231125375./1008
1156 +T*(-17306975./96+T*(7761805./96+T*(-2895587./128
1157 +T*(129391./32+T*(-259715./576+T*(28909./1008
1158 +T*(-3569./4480))))))))) * h_1;
1160 Tphi[1] = -56375+T*(8314091./56+T*(-49901303./288+T*(3763529./32
1161 +T*(-19648027./384+T*(9469163./640+T*(-545977./192
1162 +T*(156927./448+T*(-28909./1152
1163 +T*(3569./4480)))))))));
1164 Tdphi[1] = (8314091./56+T*(-49901303./144+T*(11290587./32
1165 +T*(-19648027./96+T*(9469163./128+T*(-545977./32
1166 +T*(156927./64+T*(-28909./144
1167 +T*(32121./4480))))))))) * h_1;
1169 Tphi[2] = 68776./7+T*(-1038011./28+T*(31157515./504+T*(-956669./16
1170 +T*(3548009./96+T*(-2422263./160+T*(197255./48
1171 +T*(-19959./28+T*(144545./2016
1172 +T*(-3569./1120)))))))));
1173 Tdphi[2] = (-1038011./28+T*(31157515./252+T*(-2870007./16
1174 +T*(3548009./24+T*(-2422263./32+T*(197255./8
1175 +T*(-19959./4+T*(144545./252
1176 +T*(-32121./1120))))))))) * h_1;
1178 Tphi[3] = -154+T*(12757./12+T*(-230123./72+T*(264481./48
1179 +T*(-576499./96+T*(686147./160+T*(-96277./48
1180 +T*(14221./24+T*(-28909./288+T*(3569./480)))))))));
1181 Tdphi[3] = (12757./12+T*(-230123./36+T*(264481./16
1182 +T*(-576499./24+T*(686147./32+T*(-96277./8
1183 +T*(99547./24+T*(-28909./36
1184 +T*(10707./160))))))))) * h_1;
1186 Tphi[4] = 1+T*T*(-205./144+T*T*(91./192+T*(-6181./320
1187 +T*(6337./96+T*(-2745./32+T*(28909./576
1188 +T*(-3569./320)))))));
1189 Tdphi[4] = T*(-205./72+T*T*(91./48+T*(-6181./64
1190 +T*(6337./16+T*(-19215./32+T*(28909./72
1191 +T*(-32121./320))))))) * h_1;
1193 Tphi[5] = 1+T*T*(-205./144+T*T*(91./192+T*(6181./320
1194 +T*(6337./96+T*(2745./32+T*(28909./576
1195 +T*(3569./320)))))));
1196 Tdphi[5] = T*(-205./72+T*T*(91./48+T*(6181./64
1197 +T*(6337./16+T*(19215./32+T*(28909./72
1198 +T*(32121./320))))))) * h_1;
1200 Tphi[6] = -154+T*(-12757./12+T*(-230123./72+T*(-264481./48
1201 +T*(-576499./96+T*(-686147./160+T*(-96277./48
1202 +T*(-14221./24+T*(-28909./288+T*(-3569./480)))))))));
1203 Tdphi[6] = (-12757./12+T*(-230123./36+T*(-264481./16
1204 +T*(-576499./24+T*(-686147./32+T*(-96277./8
1205 +T*(-99547./24+T*(-28909./36
1206 +T*(-10707./160))))))))) * h_1;
1208 Tphi[7] = 68776./7+T*(1038011./28+T*(31157515./504+T*(956669./16
1209 +T*(3548009./96+T*(2422263./160+T*(197255./48
1210 +T*(19959./28+T*(144545./2016+T*(3569./1120)))))))));
1211 Tdphi[7] = (1038011./28+T*(31157515./252+T*(2870007./16
1212 +T*(3548009./24+T*(2422263./32+T*(197255./8
1213 +T*(19959./4+T*(144545./252
1214 +T*(32121./1120))))))))) * h_1;
1216 Tphi[8] = -56375+T*(-8314091./56+T*(-49901303./288+T*(-3763529./32
1217 +T*(-19648027./384+T*(-9469163./640+T*(-545977./192
1218 +T*(-156927./448+T*(-28909./1152
1219 +T*(-3569./4480)))))))));
1220 Tdphi[8] = (-8314091./56+T*(-49901303./144+T*(-11290587./32
1221 +T*(-19648027./96+T*(-9469163./128+T*(-545977./32
1222 +T*(-156927./64+T*(-28909./144
1223 +T*(-32121./4480))))))))) * h_1;
1225 Tphi[9] = 439375./7+T*(64188125./504+T*(231125375./2016
1226 +T*(17306975./288+T*(7761805./384+T*(2895587./640
1227 +T*(129391./192+T*(259715./4032+T*(28909./8064
1228 +T*(3569./40320)))))))));
1229 Tdphi[9] = (64188125./504+T*(231125375./1008
1230 +T*(17306975./96+T*(7761805./96+T*(2895587./128
1231 +T*(129391./32+T*(259715./576+T*(28909./1008
1232 +T*(3569./4480))))))))) * h_1;
1233 for (
int i=0; i < 10; i++) {
1234 phi[i] =
Float(Tphi[i]);
1235 dphi[i] =
Float(Tdphi[i]);
1240 NAMD_die(
"Unknown MSM approximation.");
1245 phi[0] = (1 - t) * (1 - t) * (1 + 2*t);
1246 psi[0] = h * t * (1 - t) * (1 - t);
1248 phi[1] = (1 + t) * (1 + t) * (1 - 2*t);
1249 psi[1] = h * t * (1 + t) * (1 + t);
1255 phi[0] = (1 - t) * (1 - t) * (1 + 2*t);
1256 dphi[0] = -6 * t * (1 - t) * h_1;
1257 psi[0] = h * t * (1 - t) * (1 - t);
1258 dpsi[0] = (1 - t) * (1 - 3*t);
1260 phi[1] = (1 + t) * (1 + t) * (1 - 2*t);
1261 dphi[1] = -6 * t * (1 + t) * h_1;
1262 psi[1] = h * t * (1 + t) * (1 + t);
1263 dpsi[1] = (1 + t) * (1 + 3*t);
1273 pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8));
1275 pg[k++] = -1./2 + (s-1)*(3./4);
1280 pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16)));
1282 pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16));
1284 pg[k++] = 3./4 + (s-1)*(-15./8);
1289 pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
1290 + (s-1)*(35./128))));
1292 pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32)));
1294 pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32));
1296 pg[k++] = -15./8 + (s-1)*(105./16);
1301 pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
1302 + (s-1)*(35./128 + (s-1)*(-63./256)))));
1304 pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32
1305 + (s-1)*(-315./256))));
1307 pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32 + (s-1)*(-315./64)));
1309 pg[k++] = -15./8 + (s-1)*(105./16 + (s-1)*(-945./64));
1311 pg[k++] = 105./16 + (s-1)*(-945./32);
1316 pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
1317 + (s-1)*(35./128 + (s-1)*(-63./256 + (s-1)*(231./1024))))));
1319 pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32
1320 + (s-1)*(-315./256 + (s-1)*(693./512)))));
1322 pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32 + (s-1)*(-315./64
1323 + (s-1)*(3465./512))));
1325 pg[k++] = -15./8 + (s-1)*(105./16 + (s-1)*(-945./64
1326 + (s-1)*(3465./128)));
1328 pg[k++] = 105./16 + (s-1)*(-945./32 + (s-1)*(10395./128));
1330 pg[k++] = -945./32 + (s-1)*(10395./64);
1332 pg[k++] = 10395./64;
1335 pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
1336 + (s-1)*(35./128 + (s-1)*(-63./256
1337 + (s-1)*(231./1024 + (s-1)*(-429./2048)))))));
1339 pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32
1340 + (s-1)*(-315./256 + (s-1)*(693./512
1341 + (s-1)*(-3003./2048))))));
1343 pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32 + (s-1)*(-315./64
1344 + (s-1)*(3465./512 + (s-1)*(-9009./1024)))));
1346 pg[k++] = -15./8 + (s-1)*(105./16 + (s-1)*(-945./64 + (s-1)*(3465./128
1347 + (s-1)*(-45045./1024))));
1349 pg[k++] = 105./16 + (s-1)*(-945./32 + (s-1)*(10395./128
1350 + (s-1)*(-45045./256)));
1352 pg[k++] = -945./32 + (s-1)*(10395./64 + (s-1)*(-135135./256));
1354 pg[k++] = 10395./64 + (s-1)*(-135135./128);
1356 pg[k++] = -135135./128;
1359 pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
1360 + (s-1)*(35./128 + (s-1)*(-63./256
1361 + (s-1)*(231./1024 + (s-1)*(-429./2048
1362 + (s-1)*(6435./32768))))))));
1364 pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32
1365 + (s-1)*(-315./256 + (s-1)*(693./512
1366 + (s-1)*(-3003./2048 + (s-1)*(6435./4096)))))));
1368 pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32 + (s-1)*(-315./64
1369 + (s-1)*(3465./512 + (s-1)*(-9009./1024
1370 + (s-1)*(45045./4096))))));
1372 pg[k++] = -15./8 + (s-1)*(105./16 + (s-1)*(-945./64 + (s-1)*(3465./128
1373 + (s-1)*(-45045./1024 + (s-1)*(135135./2048)))));
1375 pg[k++] = 105./16 + (s-1)*(-945./32 + (s-1)*(10395./128
1376 + (s-1)*(-45045./256 + (s-1)*(675675./2048))));
1378 pg[k++] = -945./32 + (s-1)*(10395./64 + (s-1)*(-135135./256
1379 + (s-1)*(675675./512)));
1381 pg[k++] = 10395./64 + (s-1)*(-135135./128 + (s-1)*(2027025./512));
1383 pg[k++] = -135135./128 + (s-1)*(2027025./256);
1385 pg[k++] = 2027025./256;
1388 NAMD_die(
"Unknown MSM splitting.");
1406 while (k < n) pg[k++] = 0;
1413 const BigReal a_2 = a_1 * a_1;
1435 tmp = _c * p[1] * dx;
1439 tmp = _c * p[1] * dy;
1443 tmp = _c * p[1] * dz;
1450 tmp = _c * p[2] * dx * dy;
1456 tmp = _c * p[2] * dx * dz;
1462 tmp = _c * p[2] * dy * dz;
1468 tmp = _c * (p[2] * dx*dx + p[1] * dd);
1470 tmp = _c * (p[2] * dy*dy + p[1] * dd);
1472 tmp = _c * (p[2] * dz*dz + p[1] * dd);
1476 if (_split ==
TAYLOR2)
return;
1479 tmp = _c * p[3] * dx * dy * dz;
1489 tmp = _c * (p[3] * dx*dx * dy + p[2] * dd * dy);
1493 tmp = _c * (p[3] * dx*dx * dz + p[2] * dd * dz);
1497 tmp = _c * (p[3] * dy*dy * dx + p[2] * dd * dx);
1501 tmp = _c * (p[3] * dy*dy * dz + p[2] * dd * dz);
1505 tmp = _c * (p[3] * dz*dz * dx + p[2] * dd * dx);
1509 tmp = _c * (p[3] * dz*dz * dy + p[2] * dd * dy);
1514 if (_split ==
TAYLOR3)
return;
1517 tmp = _c * (p[4] * dx*dx * dy * dz + p[3] * dd * dy * dz);
1523 tmp = _c * (p[4] * dy*dy * dx * dz + p[3] * dd * dx * dz);
1529 tmp = _c * (p[4] * dz*dz * dx * dy + p[3] * dd * dx * dy);
1535 tmp = _c * (p[4] * dx*dx * dy*dy + p[3] * dx*dx * dd
1536 + p[3] * dd * dy*dy + p[2] * dd * dd);
1538 tmp = _c * (p[4] * dx*dx * dz*dz + p[3] * dx*dx * dd
1539 + p[3] * dd * dz*dz + p[2] * dd * dd);
1541 tmp = _c * (p[4] * dy*dy * dz*dz + p[3] * dy*dy * dd
1542 + p[3] * dd * dz*dz + p[2] * dd * dd);
1546 if (_split ==
TAYLOR4)
return;
1549 tmp = _c * (p[5] * dx*dx * dy*dy * dz + p[4] * dx*dx * dd * dz
1550 + p[4] * dd * dy*dy * dz + p[3] * dd * dd * dz);
1554 tmp = _c * (p[5] * dx*dx * dz*dz * dy + p[4] * dx*dx * dd * dy
1555 + p[4] * dd * dz*dz * dy + p[3] * dd * dd * dy);
1559 tmp = _c * (p[5] * dy*dy * dz*dz * dx + p[4] * dy*dy * dd * dx
1560 + p[4] * dd * dz*dz * dx + p[3] * dd * dd * dx);
1565 if (_split ==
TAYLOR5)
return;
1568 tmp = _c * (p[6] * dx*dx * dy*dy * dz*dz + p[5] * dx*dx * dy*dy * dd
1569 + p[5] * dx*dx * dd * dz*dz + p[5] * dd * dy*dy * dz*dz
1570 + p[4] * dx*dx * dd * dd + p[4] * dd * dy*dy * dd
1571 + p[4] * dd * dd * dz*dz + p[3] * dd * dd * dd);
1586 3, 5, 5, 7, 7, 9, 9, 1,
1591 5, 7, 7, 9, 9, 11, 11, 3,
1602 {-5, -3, -1, 0, 1, 3, 5},
1605 {-5, -3, -1, 0, 1, 3, 5},
1608 {-7, -5, -3, -1, 0, 1, 3, 5, 7},
1611 {-7, -5, -3, -1, 0, 1, 3, 5, 7},
1614 {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
1617 {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
1628 {-1.f/16, 9.f/16, 1, 9.f/16, -1.f/16},
1631 {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
1634 {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
1637 { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
1638 -245.f/2048, 49.f/2048, -5.f/2048 },
1641 { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
1642 -245.f/2048, 49.f/2048, -5.f/2048 },
1645 { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384,
1646 19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384,
1647 -405.f/65536, 35.f/65536 },
1650 { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384,
1651 19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384,
1652 -405.f/65536, 35.f/65536 },
1665 mgrLocal = CProxy_ComputeMsmMgr::ckLocalBranch(
1666 CkpvAccess(BOCclass_group).computeMsmMgr);
1667 #ifdef MSM_NODE_MAPPING 1679 int *pn = (
int *)idx.data();
1680 #ifdef MSM_NODE_MAPPING 1697 ComputeMsmMgr *mgrLocal = CProxy_ComputeMsmMgr::ckLocalBranch(
1698 CkpvAccess(BOCclass_group).computeMsmMgr);
1699 #ifdef MSM_NODE_MAPPING 1710 int n = *((
int *)idx.data());
1711 #ifdef MSM_NODE_MAPPING 1760 void init(
int natoms);
1789 template <
class Vtype,
class Mtype>
1810 mgrLocal = CProxy_ComputeMsmMgr::ckLocalBranch(
1811 CkpvAccess(BOCclass_group).computeMsmMgr);
1817 #ifdef MSM_PROFILING 1822 #ifdef MSM_MIGRATION 1823 void pup(PUP::er& p) {
1827 #ifdef MSM_PROFILING 1836 #endif // MSM_MIGRATION 1906 double startTime, stopTime;
1907 startTime = CkWallTimer();
1917 stopTime = CkWallTimer();
1927 startTime = stopTime;
1933 int gia =
pgc->
ia();
1934 int gib =
pgc->
ib();
1935 int gja =
pgc->
ja();
1936 int gjb =
pgc->
jb();
1937 int gka =
pgc->
ka();
1938 int gkb =
pgc->
kb();
1939 int gni =
pgc->
ni();
1940 int gnj =
pgc->
nj();
1967 #ifndef MSM_COMM_ONLY 1969 for (
int k = ka; k <= kb; k++) {
1971 int mka = ( qka >= gka + k ? qka : gka + k );
1972 int mkb = ( qkb <= gkb + k ? qkb : gkb + k );
1974 for (
int j = ja; j <= jb; j++) {
1976 int mja = ( qja >= gja + j ? qja : gja + j );
1977 int mjb = ( qjb <= gjb + j ? qjb : gjb + j );
1979 for (
int i = ia; i <= ib; i++) {
1981 int mia = ( qia >= gia + i ? qia : gia + i );
1982 int mib = ( qib <= gib + i ? qib : gib + i );
1989 for (
int qk = mka; qk <= mkb; qk++) {
1990 int qkoff = (qk - qka) * qnj;
1991 int gkoff = ((qk-k) - gka) * gnj;
1993 for (
int qj = mja; qj <= mjb; qj++) {
1994 int qjkoff = (qkoff + qj - qja) * qni;
1995 int gjkoff = (gkoff + (qj-j) - gja) * gni;
1998 #if defined(__INTEL_COMPILER) 1999 #pragma vector always 2001 for (
int qi = mia; qi <= mib; qi++) {
2002 int qijkoff = qjkoff + qi - qia;
2003 int gijkoff = gjkoff + (qi-i) - gia;
2005 ehsum += gcbuffer[gijkoff] * qhbuffer[qijkoff];
2013 int nn = mib - mia + 1;
2014 for (
int qk = mka; qk <= mkb; qk++) {
2015 int qkoff = (qk - qka) * qnj;
2016 int gkoff = ((qk-k) - gka) * gnj;
2018 for (
int qj = mja; qj <= mjb; qj++) {
2019 int qjkoff = (qkoff + qj - qja) * qni;
2020 int gjkoff = (gkoff + (qj-j) - gja) * gni;
2022 const Float *qbuf = qhbuffer + (qjkoff - qia + mia);
2023 const Float *gbuf = gcbuffer + (gjkoff - i - gia + mia);
2024 #ifdef MSM_PROFILING 2028 #if defined(__INTEL_COMPILER) 2029 #pragma vector always 2031 for (
int ii = 0; ii < nn; ii++) {
2032 ehsum += gbuf[ii] * qbuf[ii];
2038 int nn = mib - mia + 1;
2040 int qnji = qnj * qni;
2041 int qkoff = -qka*qnji - qja*qni - qia + mia;
2042 int gnji = gnj * gni;
2043 int gkoff = (-k-gka)*gnji + (-j-gja)*gni - i - gia + mia;
2045 for (
int qk = mka; qk <= mkb; qk++) {
2046 int qjkoff = qkoff + qk*qnji;
2047 int gjkoff = gkoff + qk*gnji;
2049 for (
int qj = mja; qj <= mjb; qj++) {
2050 const Vtype *qbuf = qhbuffer + (qjkoff + qj*qni);
2051 const Mtype *gbuf = gcbuffer + (gjkoff + qj*gni);
2054 #ifdef MSM_PROFILING 2058 #if defined(__INTEL_COMPILER) 2059 #pragma vector always 2061 for (
int ii = 0; ii < 8; ii++) {
2062 ehsum += gbuf[ii] * qbuf[ii];
2069 int qnji = qnj * qni;
2070 int qkoff = -qka*qnji - qja*qni - qia + mia;
2071 int gnji = gnj * gni;
2072 int gkoff = (-k-gka)*gnji + (-j-gja)*gni - i - gia + mia;
2074 for (
int qk = mka; qk <= mkb; qk++) {
2075 int qjkoff = qkoff + qk*qnji;
2076 int gjkoff = gkoff + qk*gnji;
2078 for (
int qj = mja; qj <= mjb; qj++) {
2079 const Vtype *qbuf = qhbuffer + (qjkoff + qj*qni);
2080 const Mtype *gbuf = gcbuffer + (gjkoff + qj*gni);
2083 #ifdef MSM_PROFILING 2087 #if defined(__INTEL_COMPILER) 2088 #pragma vector always 2090 for (
int ii = 0; ii < nn; ii++) {
2091 ehsum += gbuf[ii] * qbuf[ii];
2101 ehbuffer[index] = ehsum;
2106 #endif // !MSM_COMM_ONLY 2108 #ifdef MSM_PROFILING 2116 #ifdef MSM_FOLD_FACTOR 2125 #ifdef DEBUG_MSM_GRID 2126 printf(
"level=%d ehfold: [%d..%d] x [%d..%d] x [%d..%d] " 2128 " eh: [%d..%d] x [%d..%d] x [%d..%d] " 2130 " eh lower: %d %d %d\n",
2150 for (
int k = ka; k <= kb; k++) {
2152 if (kk <
eka)
do { kk +=
enk; }
while (kk <
eka);
2153 else if (kk >
ekb)
do { kk -=
enk; }
while (kk >
ekb);
2154 int koff = (kk -
eka) *
enj;
2155 for (
int j = ja; j <= jb; j++) {
2157 if (jj <
eja)
do { jj +=
enj; }
while (jj <
eja);
2158 else if (jj >
ejb)
do { jj -=
enj; }
while (jj >
ejb);
2159 int jkoff = (koff + (jj -
eja)) *
eni;
2160 for (
int i = ia; i <= ib; i++, index++) {
2162 if (ii <
eia)
do { ii +=
eni; }
while (ii <
eia);
2163 else if (ii >
eib)
do { ii -=
eni; }
while (ii >
eib);
2164 int ijkoff = jkoff + (ii -
eia);
2165 ehbuf[ijkoff] += ehfoldbuf[index];
2174 #else // !MSM_FOLD_FACTOR 2177 #endif // MSM_FOLD_FACTOR 2180 stopTime = CkWallTimer();
2194 public CBase_MsmGridCutoff,
2200 #ifdef MSM_REDUCE_GRID 2202 #endif // MSM_REDUCE_GRID 2207 #if ! defined(MSM_MIGRATION) 2209 #else // MSM_MIGRATION 2210 : CBase_MsmGridCutoff(m) {
2211 #ifdef DEBUG_MSM_MIGRATE 2212 printf(
"MsmGridCutoff element %d migrated to processor %d\n",
2213 thisIndex, CkMyPe());
2223 virtual void pup(PUP::er& p) {
2224 #ifdef DEBUG_MSM_MIGRATE 2225 printf(
"MsmGridCutoff element %d pupped on processor %d\n",
2226 thisIndex, CkMyPe());
2228 CBase_MsmGridCutoff::pup(p);
2231 #endif // MSM_MIGRATION 2245 #ifdef MSM_REDUCE_GRID 2252 #endif // MSM_REDUCE_GRID 2253 #ifdef DEBUG_MSM_GRID 2254 printf(
"MsmGridCutoff[%d]: setup()" 2255 " send to level=%d block=(%d,%d,%d)\n",
2264 #ifdef DEBUG_MSM_GRID 2265 CkPrintf(
"MSM GRID CUTOFF %d setup section on PE %d\n",
2266 thisIndex, CkMyPe());
2268 CkGetSectionInfo(
cookie, msg);
2274 #ifdef DEBUG_MSM_GRID 2275 printf(
"MsmGridCutoff %d: compute()\n", thisIndex);
2281 double startTime, stopTime;
2282 startTime = CkWallTimer();
2284 #ifdef MSM_REDUCE_GRID 2287 CProxy_CkMulticastMgr mcastProxy =
2288 CkpvAccess(BOCclass_group).multicastMgr;
2289 CkMulticastMgr *mcastPtr =
2290 CProxy_CkMulticastMgr(mcastProxy).ckLocalBranch();
2291 CkCallback cb(CkIndex_MsmBlock::sumReducedPotential(NULL),
2296 mcastPtr->contribute(
2298 CkReduction::sum_float,
cookie, cb);
2309 bindex.
n.
i, bindex.
n.
j, bindex.
n.
k).addPotential(gm);
2311 #endif // MSM_REDUCE_GRID 2314 stopTime = CkWallTimer();
2328 public CBase_MsmC1HermiteGridCutoff,
2334 #ifdef MSM_REDUCE_GRID 2336 #endif // MSM_REDUCE_GRID 2341 #if ! defined(MSM_MIGRATION) 2343 #else // MSM_MIGRATION 2344 : CBase_MsmC1HermiteGridCutoff(m) {
2345 #ifdef DEBUG_MSM_MIGRATE 2346 printf(
"MsmC1HermiteGridCutoff element %d migrated to processor %d\n",
2347 thisIndex, CkMyPe());
2357 virtual void pup(PUP::er& p) {
2358 #ifdef DEBUG_MSM_MIGRATE 2359 printf(
"MsmC1HermiteGridCutoff element %d pupped on processor %d\n",
2360 thisIndex, CkMyPe());
2362 CBase_MsmC1HermiteGridCutoff::pup(p);
2365 #endif // MSM_MIGRATION 2379 #ifdef DEBUG_MSM_GRID 2380 printf(
"MsmC1HermiteGridCutoff[%d]: setup()" 2381 " send to level=%d block=(%d,%d,%d)\n",
2387 #ifdef MSM_REDUCE_GRID 2394 #endif // MSM_REDUCE_GRID 2398 #ifdef DEBUG_MSM_GRID 2399 CkPrintf(
"MSM C1 HERMITE GRID CUTOFF %d setup section on PE %d\n",
2400 thisIndex, CkMyPe());
2402 CkGetSectionInfo(
cookie, msg);
2408 #ifdef DEBUG_MSM_GRID 2409 printf(
"MsmC1HermiteGridCutoff %d: compute()\n", thisIndex);
2419 double startTime, stopTime;
2420 startTime = CkWallTimer();
2422 #ifdef MSM_REDUCE_GRID 2425 CProxy_CkMulticastMgr mcastProxy =
2426 CkpvAccess(BOCclass_group).multicastMgr;
2427 CkMulticastMgr *mcastPtr =
2428 CProxy_CkMulticastMgr(mcastProxy).ckLocalBranch();
2429 CkCallback cb(CkIndex_MsmC1HermiteBlock::sumReducedPotential(NULL),
2434 mcastPtr->contribute(
2436 CkReduction::sum_float,
cookie, cb);
2447 bindex.
n.
i, bindex.
n.
j, bindex.
n.
k).addPotential(gm);
2449 #endif // MSM_REDUCE_GRID 2452 stopTime = CkWallTimer();
2465 double startTime, stopTime;
2466 startTime = CkWallTimer();
2476 stopTime = CkWallTimer();
2486 startTime = stopTime;
2492 int gia =
pgc->
ia();
2493 int gib =
pgc->
ib();
2494 int gja =
pgc->
ja();
2495 int gjb =
pgc->
jb();
2496 int gka =
pgc->
ka();
2497 int gkb =
pgc->
kb();
2498 int gni =
pgc->
ni();
2499 int gnj =
pgc->
nj();
2523 #ifdef DEBUG_MEMORY_ALIGNMENT 2524 printf(
"gcbuffer mem: addr=%p div32=%lu mod32=%lu\n",
2526 (
unsigned long)(gcbuffer)/32,
2527 (
unsigned long)(gcbuffer)%32);
2528 printf(
"qhbuffer mem: addr=%p div32=%lu mod32=%lu\n",
2530 (
unsigned long)(qhbuffer)/32,
2531 (
unsigned long)(qhbuffer)%32);
2532 printf(
"ehbuffer mem: addr=%p div32=%lu mod32=%lu\n",
2534 (
unsigned long)(ehbuffer)/32,
2535 (
unsigned long)(ehbuffer)%32);
2538 #ifndef MSM_COMM_ONLY 2540 for (
int k = ka; k <= kb; k++) {
2542 int mka = ( qka >= gka + k ? qka : gka + k );
2543 int mkb = ( qkb <= gkb + k ? qkb : gkb + k );
2545 for (
int j = ja; j <= jb; j++) {
2547 int mja = ( qja >= gja + j ? qja : gja + j );
2548 int mjb = ( qjb <= gjb + j ? qjb : gjb + j );
2550 for (
int i = ia; i <= ib; i++) {
2552 int mia = ( qia >= gia + i ? qia : gia + i );
2553 int mib = ( qib <= gib + i ? qib : gib + i );
2559 int nn = mib - mia + 1;
2562 int qnji = qnj * qni;
2563 int qkoff = -qka*qnji - qja*qni - qia + mia;
2564 int gnji = gnj * gni;
2565 int gkoff = (-k-gka)*gnji + (-j-gja)*gni - i - gia + mia;
2567 for (
int qk = mka; qk <= mkb; qk++) {
2568 int qjkoff = qkoff + qk*qnji;
2569 int gjkoff = gkoff + qk*gnji;
2571 for (
int qj = mja; qj <= mjb; qj++) {
2572 const C1Vector *qbuf = qhbuffer + (qjkoff + qj*qni);
2573 const C1Matrix *gbuf = gcbuffer + (gjkoff + qj*gni);
2574 #ifdef MSM_PROFILING 2578 #if defined(__INTEL_COMPILER) 2579 #pragma vector always 2581 for (
int ii = 0; ii < nn; ii++) {
2584 ehsum += gbuf[ii] * qbuf[ii];
2588 if ( *((
int *)(gbuf)) != 0) {
2591 #if defined(__INTEL_COMPILER) 2592 #pragma vector always 2596 ehsum.
velem[jm] += gbuf->melem[km] * qbuf->velem[im];
2609 ehbuffer[index] = ehsum;
2614 #endif // !MSM_COMM_ONLY 2616 #ifdef MSM_PROFILING 2624 #ifdef MSM_FOLD_FACTOR 2633 #ifdef DEBUG_MSM_GRID 2634 printf(
"level=%d ehfold: [%d..%d] x [%d..%d] x [%d..%d] " 2636 " eh: [%d..%d] x [%d..%d] x [%d..%d] " 2638 " eh lower: %d %d %d\n",
2658 for (
int k = ka; k <= kb; k++) {
2660 if (kk <
eka)
do { kk +=
enk; }
while (kk <
eka);
2661 else if (kk >
ekb)
do { kk -=
enk; }
while (kk >
ekb);
2662 int koff = (kk -
eka) *
enj;
2663 for (
int j = ja; j <= jb; j++) {
2665 if (jj <
eja)
do { jj +=
enj; }
while (jj <
eja);
2666 else if (jj >
ejb)
do { jj -=
enj; }
while (jj >
ejb);
2667 int jkoff = (koff + (jj -
eja)) *
eni;
2668 for (
int i = ia; i <= ib; i++, index++) {
2670 if (ii <
eia)
do { ii +=
eni; }
while (ii <
eia);
2671 else if (ii >
eib)
do { ii -=
eni; }
while (ii >
eib);
2672 int ijkoff = jkoff + (ii -
eia);
2673 ehbuf[ijkoff] += ehfoldbuf[index];
2682 #else // !MSM_FOLD_FACTOR 2685 #endif // MSM_FOLD_FACTOR 2688 stopTime = CkWallTimer();
2732 template <
class Vtype,
class Mtype>
2741 #ifndef MSM_GRID_CUTOFF_DECOMP 2762 #ifndef MSM_GRID_CUTOFF_DECOMP 2785 #ifndef MSM_GRID_CUTOFF_DECOMP 2786 void gridCutoffKernel();
2792 template <
class Vtype,
class Mtype>
2794 blockIndex = bindex;
2795 mgrProxy = CProxy_ComputeMsmMgr(CkpvAccess(BOCclass_group).computeMsmMgr);
2796 mgrLocal = CProxy_ComputeMsmMgr::ckLocalBranch(
2797 CkpvAccess(BOCclass_group).computeMsmMgr);
2799 bd = &(map->
blockLevel[blockIndex.level](blockIndex.n));
2800 qh.
init( bd->nrange );
2801 eh.
init( bd->nrange );
2802 #ifndef MSM_GRID_CUTOFF_DECOMP 2803 ehCutoff.init( bd->nrangeCutoff );
2805 qhRestricted.init( bd->nrangeRestricted );
2806 ehProlongated.init( bd->nrangeProlongated );
2807 #ifdef DEBUG_MSM_GRID 2808 printf(
"MsmBlockKernel level=%d, n=%d %d %d: constructor\n",
2809 blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
2812 mgrLocal->addTiming();
2818 template <
class Vtype,
class Mtype>
2822 #ifndef MSM_GRID_CUTOFF_DECOMP 2825 qhRestricted.reset(0);
2826 ehProlongated.reset(0);
2828 cntRecvsPotential = 0;
2832 template <
class Vtype,
class Mtype>
2835 #ifdef DEBUG_MSM_GRID 2836 printf(
"MsmBlockKernel level=%d, id=%d %d %d: restriction\n",
2837 blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
2841 double startTime, stopTime;
2842 startTime = CkWallTimer();
2845 #ifndef MSM_COMM_ONLY 2847 const int approx = mgrLocal->
approx;
2861 int ia2 = qhRestricted.ia();
2862 int ib2 = qhRestricted.ib();
2863 int ja2 = qhRestricted.ja();
2864 int jb2 = qhRestricted.jb();
2865 int ka2 = qhRestricted.ka();
2866 int kb2 = qhRestricted.kb();
2869 qhRestricted.reset(0);
2872 for (
int k2 = ka2; k2 <= kb2; k2++) {
2874 for (
int j2 = ja2; j2 <= jb2; j2++) {
2876 for (
int i2 = ia2; i2 <= ib2; i2++) {
2880 Vtype& q2hsum = qhRestricted(i2,j2,k2);
2882 for (
int k = 0; k < nstencil; k++) {
2883 int kn = k1 + offset[k];
2884 if (kn < ka1)
continue;
2885 else if (kn > kb1)
break;
2887 for (
int j = 0; j < nstencil; j++) {
2888 int jn = j1 + offset[j];
2889 if (jn < ja1)
continue;
2890 else if (jn > jb1)
break;
2892 for (
int i = 0; i < nstencil; i++) {
2893 int in = i1 + offset[i];
2894 if (in < ia1)
continue;
2895 else if (in > ib1)
break;
2897 q2hsum += res(i,j,k) * qh(in,jn,kn);
2906 qhRestricted.reset(0);
2907 #endif // !MSM_COMM_ONLY 2910 stopTime = CkWallTimer();
2916 #ifndef MSM_GRID_CUTOFF_DECOMP 2917 template <
class Vtype,
class Mtype>
2920 #ifdef DEBUG_MSM_GRID 2921 printf(
"MsmBlockKernel level=%d, id=%d %d %d: grid cutoff\n",
2922 blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
2925 double startTime, stopTime;
2926 startTime = CkWallTimer();
2928 #ifndef MSM_COMM_ONLY 2946 int ia = ehCutoff.ia();
2947 int ib = ehCutoff.ib();
2948 int ja = ehCutoff.ja();
2949 int jb = ehCutoff.jb();
2950 int ka = ehCutoff.ka();
2951 int kb = ehCutoff.kb();
2955 for (
int k = ka; k <= kb; k++) {
2956 for (
int j = ja; j <= jb; j++) {
2957 for (
int i = ia; i <= ib; i++) {
2959 int mia = ( qia >= gia + i ? qia : gia + i );
2960 int mib = ( qib <= gib + i ? qib : gib + i );
2961 int mja = ( qja >= gja + j ? qja : gja + j );
2962 int mjb = ( qjb <= gjb + j ? qjb : gjb + j );
2963 int mka = ( qka >= gka + k ? qka : gka + k );
2964 int mkb = ( qkb <= gkb + k ? qkb : gkb + k );
2966 Vtype& ehsum = ehCutoff(i,j,k);
2968 for (
int qk = mka; qk <= mkb; qk++) {
2969 for (
int qj = mja; qj <= mjb; qj++) {
2970 for (
int qi = mia; qi <= mib; qi++) {
2971 ehsum += gc(qi-i, qj-j, qk-k) * qh(qi,qj,qk);
2981 #endif // !MSM_COMM_ONLY 2983 stopTime = CkWallTimer();
2987 #endif // MSM_GRID_CUTOFF_DECOMP 2990 template <
class Vtype,
class Mtype>
2993 #ifdef DEBUG_MSM_GRID 2994 printf(
"MsmBlockKernel level=%d, id=%d %d %d: prolongation\n",
2995 blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
2999 double startTime, stopTime;
3000 startTime = CkWallTimer();
3002 #ifndef MSM_COMM_ONLY 3004 const int approx = mgrLocal->
approx;
3010 int ia1 = ehProlongated.
ia();
3011 int ib1 = ehProlongated.ib();
3012 int ja1 = ehProlongated.ja();
3013 int jb1 = ehProlongated.jb();
3014 int ka1 = ehProlongated.ka();
3015 int kb1 = ehProlongated.kb();
3026 for (
int k2 = ka2; k2 <= kb2; k2++) {
3028 for (
int j2 = ja2; j2 <= jb2; j2++) {
3030 for (
int i2 = ia2; i2 <= ib2; i2++) {
3034 for (
int k = 0; k < nstencil; k++) {
3035 int kn = k1 + offset[k];
3036 if (kn < ka1)
continue;
3037 else if (kn > kb1)
break;
3039 for (
int j = 0; j < nstencil; j++) {
3040 int jn = j1 + offset[j];
3041 if (jn < ja1)
continue;
3042 else if (jn > jb1)
break;
3044 for (
int i = 0; i < nstencil; i++) {
3045 int in = i1 + offset[i];
3046 if (in < ia1)
continue;
3047 else if (in > ib1)
break;
3049 ehProlongated(in,jn,kn) += pro(i,j,k) * eh(i2,j2,k2);
3058 ehProlongated.reset(0);
3059 #endif // !MSM_COMM_ONLY 3061 stopTime = CkWallTimer();
3072 public CBase_MsmBlock,
3081 msm::BlockIndex(level,
3082 msm::Ivec(thisIndex.x, thisIndex.y, thisIndex.z))
3085 #ifndef MSM_GRID_CUTOFF_DECOMP 3097 double startTime, stopTime;
3098 startTime = CkWallTimer();
3102 memcpy(ehfull.
data().
buffer(), msg->getData(), msg->getSize());
3111 stopTime = CkWallTimer();
3125 #ifndef MSM_GRID_CUTOFF_DECOMP 3126 void sendAcrossPotential();
3142 #ifdef DEBUG_MSM_GRID 3143 CkPrintf(
"LEVEL %d MSM BLOCK (%d,%d,%d): " 3144 "creating broadcast section on PE %d\n",
3147 std::vector<CkArrayIndex1D> elems;
3155 CProxy_CkMulticastMgr mcastProxy = CkpvAccess(BOCclass_group).multicastMgr;
3156 CkMulticastMgr *mcastPtr = CProxy_CkMulticastMgr(mcastProxy).ckLocalBranch();
3159 #ifdef DEBUG_MSM_GRID 3161 sprintf(s,
"LEVEL %d MSM BLOCK (%d,%d,%d): " 3162 "creating reduction section on PE %d\n",
3165 std::vector<CkArrayIndex1D> elems2;
3167 #ifdef DEBUG_MSM_GRID 3168 strcat(s,
"receiving from MsmGridCutoff ID:");
3171 #ifdef DEBUG_MSM_GRID 3178 #ifdef DEBUG_MSM_GRID 3187 CProxyElement_MsmBlock thisElementProxy = thisProxy(thisIndex);
3188 msg->
put(&thisElementProxy);
3205 double startTime, stopTime;
3206 startTime = CkWallTimer();
3213 stopTime = CkWallTimer();
3229 double startTime, stopTime;
3230 startTime = CkWallTimer();
3234 for (
int n = 0; n <
bd->
sendUp.len(); n++) {
3255 stopTime = CkWallTimer();
3263 #ifdef DEBUG_MSM_GRID 3264 printf(
"MsmBlock level=%d, id=%d %d %d: grid cutoff\n",
3267 #ifndef MSM_GRID_CUTOFF_DECOMP 3269 sendAcrossPotential();
3270 #else // MSM_GRID_CUTOFF_DECOMP 3274 double startTime, stopTime;
3275 startTime = CkWallTimer();
3283 for (
int n = 0; n < len; n++) {
3285 startTime = CkWallTimer();
3292 stopTime = CkWallTimer();
3305 stopTime = CkWallTimer();
3311 #endif // MSM_GRID_CUTOFF_DECOMP 3316 #ifndef MSM_GRID_CUTOFF_DECOMP 3317 void MsmBlock::sendAcrossPotential()
3320 double startTime, stopTime;
3321 startTime = CkWallTimer();
3346 stopTime = CkWallTimer();
3356 double startTime, stopTime;
3357 startTime = CkWallTimer();
3365 stopTime = CkWallTimer();
3382 double startTime, stopTime;
3383 startTime = CkWallTimer();
3388 for (
int n = 0; n <
bd->
sendDown.len(); n++) {
3408 stopTime = CkWallTimer();
3419 double startTime, stopTime;
3420 startTime = CkWallTimer();
3442 int pe = pm->
node(pid);
3446 stopTime = CkWallTimer();
3459 public CBase_MsmC1HermiteBlock,
3468 msm::BlockIndex(level,
3469 msm::Ivec(thisIndex.x, thisIndex.y, thisIndex.z))
3472 int isfirstlevel = (level == 0);
3478 #ifndef MSM_GRID_CUTOFF_DECOMP 3492 double startTime, stopTime;
3493 startTime = CkWallTimer();
3497 memcpy(ehfull.
data().
buffer(), msg->getData(), msg->getSize());
3506 stopTime = CkWallTimer();
3520 #ifndef MSM_GRID_CUTOFF_DECOMP 3521 void sendAcrossPotential();
3537 #ifdef DEBUG_MSM_GRID 3538 CkPrintf(
"LEVEL %d MSM C1 HERMITE BLOCK (%d,%d,%d): " 3539 "creating broadcast section on PE %d\n",
3542 std::vector<CkArrayIndex1D> elems;
3550 CProxy_CkMulticastMgr mcastProxy = CkpvAccess(BOCclass_group).multicastMgr;
3551 CkMulticastMgr *mcastPtr = CProxy_CkMulticastMgr(mcastProxy).ckLocalBranch();
3554 #ifdef DEBUG_MSM_GRID 3556 sprintf(s,
"LEVEL %d MSM C1 HERMITE BLOCK (%d,%d,%d): " 3557 "creating reduction section on PE %d\n",
3560 std::vector<CkArrayIndex1D> elems2;
3562 #ifdef DEBUG_MSM_GRID 3563 strcat(s,
"receiving from MsmC1HermiteGridCutoff ID:");
3566 #ifdef DEBUG_MSM_GRID 3573 #ifdef DEBUG_MSM_GRID 3582 CProxyElement_MsmC1HermiteBlock thisElementProxy = thisProxy(thisIndex);
3583 msg->
put(&thisElementProxy);
3600 double startTime, stopTime;
3601 startTime = CkWallTimer();
3608 stopTime = CkWallTimer();
3624 double startTime, stopTime;
3625 startTime = CkWallTimer();
3629 for (
int n = 0; n <
bd->
sendUp.len(); n++) {
3650 stopTime = CkWallTimer();
3658 #ifdef DEBUG_MSM_GRID 3659 printf(
"MsmC1HermiteBlock level=%d, id=%d %d %d: grid cutoff\n",
3662 #ifndef MSM_GRID_CUTOFF_DECOMP 3664 sendAcrossPotential();
3665 #else // MSM_GRID_CUTOFF_DECOMP 3669 double startTime, stopTime;
3670 startTime = CkWallTimer();
3678 for (
int n = 0; n < len; n++) {
3680 startTime = CkWallTimer();
3687 stopTime = CkWallTimer();
3700 stopTime = CkWallTimer();
3706 #endif // MSM_GRID_CUTOFF_DECOMP 3711 #ifndef MSM_GRID_CUTOFF_DECOMP 3712 void MsmC1HermiteBlock::sendAcrossPotential()
3715 double startTime, stopTime;
3716 startTime = CkWallTimer();
3741 stopTime = CkWallTimer();
3751 double startTime, stopTime;
3752 startTime = CkWallTimer();
3760 stopTime = CkWallTimer();
3777 double startTime, stopTime;
3778 startTime = CkWallTimer();
3783 for (
int n = 0; n <
bd->
sendDown.len(); n++) {
3803 stopTime = CkWallTimer();
3814 double startTime, stopTime;
3815 startTime = CkWallTimer();
3837 int pe = pm->
node(pid);
3841 stopTime = CkWallTimer();
3855 msmProxy(thisgroup), msmCompute(0)
3857 #ifdef DEBUG_MSM_VERBOSE 3858 printf(
"ComputeMsmMgr: (constructor) PE %d\n", CkMyPe());
3860 CkpvAccess(BOCclass_group).computeMsmMgr = thisgroup;
3863 if (CkMyPe() == 0) {
3864 msmTimer = CProxy_MsmTimer::ckNew();
3868 #ifdef MSM_PROFILING 3869 if (CkMyPe() == 0) {
3870 msmProfiler = CProxy_MsmProfiler::ckNew();
3878 #ifdef DEBUG_MSM_VERBOSE 3879 printf(
"ComputeMsmMgr: (destructor) PE %d\n", CkMyPe());
3894 int& ia,
int& ib,
int isperiodic)
3899 const BigReal hmax = 1.5 * hmin;
3902 while (hh >= hmax) {
3908 NAMD_die(
"Basis vector is too short or MSM grid spacing is too large");
3923 nn = (int) floor(len/hh + 0.5);
3934 if (n % bsize != 0) {
3937 if (n % 3 == 0) newbsize = 3;
3938 while (newbsize < bsize && newbsize < n) newbsize *= 2;
3939 if (bsize < newbsize) newbsize /= 2;
3940 if (n % newbsize != 0) {
3941 NAMD_die(
"MSM grid size for periodic dimensions must be " 3942 "a power of 2 times at most one power of 3");
3971 #ifdef DEBUG_MSM_VERBOSE 3972 printf(
"ComputeMsmMgr: initialize() PE %d\n", CkMyPe());
3980 printf(
"PE%d: initializing MSM\n", CkMyPe());
3994 if (sdmin.
x > sdmax.
x) {
double t=sdmin.
x; sdmin.
x=sdmax.
x; sdmax.
x=t; }
3995 if (sdmin.
y > sdmax.
y) {
double t=sdmin.
y; sdmin.
y=sdmax.
y; sdmax.
y=t; }
3996 if (sdmin.
z > sdmax.
z) {
double t=sdmin.
z; sdmin.
z=sdmax.
z; sdmax.
z=t; }
3998 if ( !
lattice.
a_p() && (sdmin.
x != 0 || sdmax.
x != 0)) {
4001 if (CkMyPe() == 0) {
4002 iout <<
iINFO <<
"MSM extending minimum X to " 4008 if (CkMyPe() == 0) {
4009 iout <<
iINFO <<
"MSM extending maximum X to " 4014 if ( !
lattice.
b_p() && (sdmin.
y != 0 || sdmax.
y != 0)) {
4017 if (CkMyPe() == 0) {
4018 iout <<
iINFO <<
"MSM extending minimum Y to " 4024 if (CkMyPe() == 0) {
4025 iout <<
iINFO <<
"MSM extending maximum Y to " 4030 if ( !
lattice.
c_p() && (sdmin.
z != 0 || sdmax.
z != 0)) {
4033 if (CkMyPe() == 0) {
4034 iout <<
iINFO <<
"MSM extending minimum Z to " 4040 if (CkMyPe() == 0) {
4041 iout <<
iINFO <<
"MSM extending maximum Z to " 4047 #ifdef DEBUG_MSM_VERBOSE 4048 printf(
"smin = %g %g %g smax = %g %g %g\n",
4054 NAMD_die(
"MSM: unknown approximation requested (MSMApprox)");
4059 NAMD_die(
"MSM: unknown splitting requested (MSMSplit)");
4062 if (CkMyPe() == 0) {
4063 const char *approx_str, *split_str;
4065 case CUBIC: approx_str =
"C1 cubic";
break;
4066 case QUINTIC: approx_str =
"C1 quintic";
break;
4067 case QUINTIC2: approx_str =
"C2 quintic";
break;
4068 case SEPTIC: approx_str =
"C1 septic";
break;
4069 case SEPTIC3: approx_str =
"C3 septic";
break;
4070 case NONIC: approx_str =
"C1 nonic";
break;
4071 case NONIC4: approx_str =
"C4 nonic";
break;
4072 case C1HERMITE: approx_str =
"C1 Hermite";
break;
4073 default: approx_str =
"unknown";
break;
4076 case TAYLOR2: split_str =
"C2 Taylor";
break;
4077 case TAYLOR3: split_str =
"C3 Taylor";
break;
4078 case TAYLOR4: split_str =
"C4 Taylor";
break;
4079 case TAYLOR5: split_str =
"C5 Taylor";
break;
4080 case TAYLOR6: split_str =
"C6 Taylor";
break;
4081 case TAYLOR7: split_str =
"C7 Taylor";
break;
4082 case TAYLOR8: split_str =
"C8 Taylor";
break;
4083 default: split_str =
"unknown";
break;
4086 << approx_str <<
" interpolation\n";
4088 << split_str <<
" splitting function\n";
4102 NAMD_die(
"MSM: grid spacing must be greater than 0 (MSMGridSpacing)");
4105 NAMD_die(
"MSM: grid spacing must be less than cutoff (MSMGridSpacing)");
4110 NAMD_die(
"MSM: padding must be non-negative (MSMPadding)");
4119 NAMD_die(
"MSM: requested splitting for long-range dispersion " 4120 "(not implemented)");
4127 if (bsx <= 0 || bsy <= 0 || bsz <= 0) {
4128 NAMD_die(
"MSM: invalid block size requested (MSMBlockSize[XYZ])");
4130 #ifdef MSM_FIXED_SIZE_GRID_MSG 4132 NAMD_die(
"MSM: requested block size (MSMBlockSize[XYZ]) too big");
4135 if (CkMyPe() == 0) {
4136 iout <<
iINFO <<
"MSM block size decomposition along X is " 4137 << bsx <<
" grid points\n";
4138 iout <<
iINFO <<
"MSM block size decomposition along Y is " 4139 << bsy <<
" grid points\n";
4140 iout <<
iINFO <<
"MSM block size decomposition along Z is " 4141 << bsz <<
" grid points\n";
4152 int ispany = (ispx || ispy || ispz);
4168 xlen = sgmax.
x - sgmin.
x;
4170 #ifdef DEBUG_MSM_VERBOSE 4171 printf(
"xlen = %g sgmin.x = %g sgmax.x = %g\n", xlen, sgmin.
x, sgmax.
x);
4188 ylen = sgmax.
y - sgmin.
y;
4190 #ifdef DEBUG_MSM_VERBOSE 4191 printf(
"ylen = %g sgmin.y = %g sgmax.y = %g\n", ylen, sgmin.
y, sgmax.
y);
4208 zlen = sgmax.
z - sgmin.
z;
4210 #ifdef DEBUG_MSM_VERBOSE 4211 printf(
"zlen = %g sgmin.z = %g sgmax.z = %g\n", zlen, sgmin.
z, sgmax.
z);
4215 int ia, ib, ja, jb, ka, kb;
4222 if (CkMyPe() == 0) {
4223 if (ispx || ispy || ispz) {
4224 iout <<
iINFO <<
"MSM grid spacing along X is "<<
hxlen <<
" A\n";
4225 iout <<
iINFO <<
"MSM grid spacing along Y is "<<
hylen <<
" A\n";
4226 iout <<
iINFO <<
"MSM grid spacing along Z is "<<
hzlen <<
" A\n";
4231 if ( ! ispx || ! ispy || ! ispz ) {
4236 int ni = ib - ia + 1;
4237 int nj = jb - ja + 1;
4238 int nk = kb - ka + 1;
4250 int smallestnbox = 1;
4254 #ifdef DEBUG_MSM_VERBOSE 4255 printf(
"maxlevels = %d\n", maxlevels);
4261 for (maxlevels = 1; n > 0; n >>= 1) maxlevels++;
4264 int ngci = (int) ceil(3*
a /
hxlen) - 1;
4265 int ngcj = (int) ceil(3*
a /
hylen) - 1;
4266 int ngck = (int) ceil(3*
a /
hzlen) - 1;
4268 int nhalf = (int) sqrt((
double)ni * nj * nk);
4269 lastnelems = (nhalf > omega3 ? nhalf : omega3);
4270 smallestnbox = ngci * ngcj * ngck;
4274 #ifdef DEBUG_MSM_VERBOSE 4275 printf(
"maxlevels = %d\n", maxlevels);
4290 map.
gridrange[level].setbounds(ia, ib, ja, jb, ka, kb);
4294 if (++level ==
nlevels) done |= 0x07;
4297 int nelems = ni * nj * nk;
4298 if (nelems <= lastnelems) done |= 0x07;
4299 if (nelems <= smallestnbox) done |= 0x07;
4302 alldone = (done == 0x07);
4307 if (ni & 1) done |= 0x07;
4308 else if (ni == 2) done |= 0x01;
4311 ia = -((-ia+1)/2) -
s_edge;
4314 if (ni <=
omega) done |= 0x01;
4320 if (nj & 1) done |= 0x07;
4321 else if (nj == 2) done |= 0x02;
4324 ja = -((-ja+1)/2) -
s_edge;
4327 if (nj <=
omega) done |= 0x02;
4333 if (nk & 1) done |= 0x07;
4334 else if (nk == 2) done |= 0x04;
4337 ka = -((-ka+1)/2) -
s_edge;
4340 if (nk <=
omega) done |= 0x04;
4342 }
while ( ! alldone );
4353 if (CkMyPe() == 0) {
4355 for (n = 0; n <
nlevels; n++) {
4357 snprintf(s,
sizeof(s),
" level %d: " 4358 "[%d..%d] x [%d..%d] x [%d..%d]\n", n,
4413 s = (
hv * pv) / (pv * pv);
4417 s = (
hw * pw) / (pw * pw);
4421 ni = (int) ceil(2*
a / pu.length() ) - 1;
4422 nj = (int) ceil(2*
a / pv.length() ) - 1;
4423 nk = (int) ceil(2*
a / pw.length() ) - 1;
4426 Float scaling_factor = 0.5f;
4430 a_p = a_p * a_p * a_p;
4432 scaling_factor = 1.f/64;
4440 for (level = 0; level < toplevel; level++) {
4441 map.
gc[level].setbounds(-ni, ni, -nj, nj, -nk, nk);
4442 map.
gvc[level].setbounds(-ni, ni, -nj, nj, -nk, nk);
4444 for (k = -nk; k <= nk; k++) {
4445 for (j = -nj; j <= nj; j++) {
4446 for (i = -ni; i <= ni; i++) {
4448 BigReal s, t, gs=0, gt=0, g=0, dgs=0, dgt=0, dg=0;
4461 gs = s_1 * s_1 * s_1;
4463 dgs = -6 * gs * s_1;
4473 g = (gs - scaling_factor * gt) * a_p;
4476 c = a_p * a_1 / vlen;
4478 dg = 0.5 * (dgs - 0.5*scaling_factor * dgt) *
c;
4487 map.
gc[level](i,j,k) = scaling *
map.
gc[0](i,j,k);
4494 scaling *= scaling_factor;
4501 for (i = 0; i <
VMAX; i++) {
virial[i] = 0; }
4510 map.
gc[toplevel].setbounds(-ni, ni, -nj, nj, -nk, nk);
4514 for (k = -nk; k <= nk; k++) {
4515 for (j = -nj; j <= nj; j++) {
4516 for (i = -ni; i <= ni; i++) {
4523 gs = s_1 * s_1 * s_1;
4533 map.
gc[toplevel](i,j,k) = scaling *
Float(gs * a_p);
4543 for (k = 0; k < nstencil; k++) {
4544 for (j = 0; j < nstencil; j++) {
4545 for (i = 0; i < nstencil; i++) {
4546 map.
grespro(i,j,k) = phi[i] * phi[j] * phi[k];
4558 for (level = 0; level < toplevel; level++) {
4567 for (k = -nk; k <= nk; k++) {
4568 for (j = -nj; j <= nj; j++) {
4569 for (i = -ni; i <= ni; i++) {
4601 for (k = -nk; k <= nk; k++) {
4602 for (j = -nj; j <= nj; j++) {
4603 for (i = -ni; i <= ni; i++) {
4626 const double PHI0[ND] = { 0.5, 1, 0.5 };
4627 const double DPHI0[ND] = { 1.5, 0, -1.5 };
4628 const double PHI1[ND] = { -0.125, 0, 0.125 };
4629 const double DPHI1[ND] = { -0.25, 1, -0.25 };
4632 double xphi0_base_array[ND];
4633 double dxphi0_base_array[ND];
4634 double yphi0_base_array[ND];
4635 double dyphi0_base_array[ND];
4636 double zphi0_base_array[ND];
4637 double dzphi0_base_array[ND];
4638 double xphi1_base_array[ND];
4639 double dxphi1_base_array[ND];
4640 double yphi1_base_array[ND];
4641 double dyphi1_base_array[ND];
4642 double zphi1_base_array[ND];
4643 double dzphi1_base_array[ND];
4645 double *xphi0, *dxphi0, *xphi1, *dxphi1;
4646 double *yphi0, *dyphi0, *yphi1, *dyphi1;
4647 double *zphi0, *dzphi0, *zphi1, *dzphi1;
4649 for (n = 0; n < ND; n++) {
4650 xphi0_base_array[n] = PHI0[n];
4651 dxphi0_base_array[n] =
hxlen_1 * DPHI0[n];
4652 xphi1_base_array[n] =
hxlen * PHI1[n];
4653 dxphi1_base_array[n] = DPHI1[n];
4654 yphi0_base_array[n] = PHI0[n];
4655 dyphi0_base_array[n] =
hylen_1 * DPHI0[n];
4656 yphi1_base_array[n] =
hylen * PHI1[n];
4657 dyphi1_base_array[n] = DPHI1[n];
4658 zphi0_base_array[n] = PHI0[n];
4659 dzphi0_base_array[n] =
hzlen_1 * DPHI0[n];
4660 zphi1_base_array[n] =
hzlen * PHI1[n];
4661 dzphi1_base_array[n] = DPHI1[n];
4663 xphi0 = xphi0_base_array + NR;
4664 dxphi0 = dxphi0_base_array + NR;
4665 xphi1 = xphi1_base_array + NR;
4666 dxphi1 = dxphi1_base_array + NR;
4667 yphi0 = yphi0_base_array + NR;
4668 dyphi0 = dyphi0_base_array + NR;
4669 yphi1 = yphi1_base_array + NR;
4670 dyphi1 = dyphi1_base_array + NR;
4671 zphi0 = zphi0_base_array + NR;
4672 dzphi0 = dzphi0_base_array + NR;
4673 zphi1 = zphi1_base_array + NR;
4674 dzphi1 = dzphi1_base_array + NR;
4676 for (level = 0; level <
nlevels-1; level++) {
4686 for (n = -NR; n <= NR; n++) {
4706 for (k = -NR; k <= NR; k++) {
4707 for (j = -NR; j <= NR; j++) {
4708 for (i = -NR; i <= NR; i++) {
4788 for (k = -NR; k <= NR; k++) {
4789 for (j = -NR; j <= NR; j++) {
4790 for (i = -NR; i <= NR; i++) {
4878 if (CkMyPe() == 0) {
4879 iout <<
iINFO <<
"MSM finished calculating stencils\n" <<
endi;
4886 #ifdef DEBUG_MSM_VERBOSE 4887 printf(
"numPatches = %d\n", numpatches);
4895 #ifdef MSM_FOLD_FACTOR 4898 for (level = 0; level <
nlevels; level++) {
4917 #ifdef MSM_DEBUG_VERBOSE 4918 if (CkMyPe() == 0) {
4919 printf(
"level = %d\n map.bs* = %d %d %d gn* = %d %d %d\n",
4925 int bni = (gni /
map.
bsx[level]) + (gni %
map.
bsx[level] != 0);
4926 int bnj = (gnj /
map.
bsy[level]) + (gnj %
map.
bsy[level] != 0);
4927 int bnk = (gnk /
map.
bsz[level]) + (gnk %
map.
bsz[level] != 0);
4928 #ifdef MSM_FOLD_FACTOR 4929 if ( (bni == 1 || bnj == 1 || bnk == 1)) {
4932 if (CkMyPe() == 0) {
4933 printf(
"Setting MSM FoldFactor level %d: %d %d %d\n",
4934 level, bsx / gni, bsy / gnj, bsz / gnk);
4939 b.
set(0, bni, 0, bnj, 0, bnk);
4940 for (k = 0; k < bnk; k++) {
4941 for (j = 0; j < bnj; j++) {
4942 for (i = 0; i < bni; i++) {
4944 int ia = gia + i*
map.
bsx[level];
4945 int ib = ia +
map.
bsx[level] - 1;
4946 int ja = gja + j*
map.
bsy[level];
4947 int jb = ja +
map.
bsy[level] - 1;
4948 int ka = gka + k*
map.
bsz[level];
4949 int kb = ka +
map.
bsz[level] - 1;
4950 if (ib >= gia + gni) ib = gia + gni - 1;
4951 if (jb >= gja + gnj) jb = gja + gnj - 1;
4952 if (kb >= gka + gnk) kb = gka + gnk - 1;
4953 b(i,j,k).nrange.
setbounds(ia, ib, ja, jb, ka, kb);
4972 #ifdef DEBUG_MSM_VERBOSE 4973 printf(
"Done allocating map for grid levels\n");
4974 printf(
"Grid level decomposition:\n");
4975 for (level = 0; level <
nlevels; level++) {
4983 for (k = bka; k <= bkb; k++) {
4984 for (j = bja; j <= bjb; j++) {
4985 for (i = bia; i <= bib; i++) {
4986 int ia = b(i,j,k).nrange.
ia();
4987 int ib = b(i,j,k).nrange.
ib();
4988 int ja = b(i,j,k).nrange.
ja();
4989 int jb = b(i,j,k).nrange.
jb();
4990 int ka = b(i,j,k).nrange.
ka();
4991 int kb = b(i,j,k).nrange.
kb();
4992 printf(
"level=%d id=%d %d %d [%d..%d] x [%d..%d] x [%d..%d]" 4994 level, i, j, k, ia, ib, ja, jb, ka, kb,
4995 b(i,j,k).nrange.
nn());
5001 if (CkMyPe() == 0) {
5002 iout <<
iINFO <<
"MSM finished creating map for grid levels\n" <<
endi;
5009 void ComputeMsmMgr::initialize2()
5014 int i, j, k, n, level;
5022 BigReal xmargin = 0.5 * (patchdim -
a) / sysdima;
5023 BigReal ymargin = 0.5 * (patchdim -
a) / sysdimb;
5024 BigReal zmargin = 0.5 * (patchdim -
a) / sysdimc;
5037 for (pid = 0; pid < numpatches; pid++) {
5076 if (ia < ia_min) ia = ia_min;
5077 if (ib > ib_max) ib = ib_max;
5078 if (ja < ja_min) ja = ja_min;
5079 if (jb > jb_max) jb = jb_max;
5080 if (ka < ka_min) ka = ka_min;
5081 if (kb > kb_max) kb = kb_max;
5090 if (mi == 0) ia = ia_min;
5091 if (mi == npi-1) ib = ib_max;
5092 if (mj == 0) ja = ja_min;
5093 if (mj == npj-1) jb = jb_max;
5094 if (mk == 0) ka = ka_min;
5095 if (mk == npk-1) kb = kb_max;
5098 printf(
"patch %d: grid covering: [%d..%d] x [%d..%d] x [%d..%d]\n",
5099 pid, ia, ib, ja, jb, ka, kb);
5107 int maxarrlen = (bupper.
n.
i - blower.
n.
i + 1) *
5108 (bupper.
n.
j - blower.
n.
j + 1) * (bupper.
n.
k - blower.
n.
k + 1);
5109 p.
send.setmax(maxarrlen);
5112 printf(
"blower: level=%d n=%d %d %d bupper: level=%d n=%d %d %d\n",
5114 bupper.
level, bupper.
n.
i, bupper.
n.
j, bupper.
n.
k);
5117 for (
int kk = blower.
n.
k; kk <= bupper.
n.
k; kk++) {
5118 for (
int jj = blower.
n.
j; jj <= bupper.
n.
j; jj++) {
5119 for (
int ii = blower.
n.
i; ii <= bupper.
n.
i; ii++) {
5121 printf(
"ii=%d jj=%d kk=%d\n", ii, jj, kk);
5147 #ifdef DEBUG_MSM_VERBOSE 5148 if (CkMyPe() == 0) {
5149 printf(
"Done allocating map for patches\n");
5150 printf(
"Patch level decomposition:\n");
5151 for (pid = 0; pid < numpatches; pid++) {
5159 printf(
"patch id=%d [%d..%d] x [%d..%d] x [%d..%d]\n",
5160 pid, ia, ib, ja, jb, ka, kb);
5164 if (CkMyPe() == 0) {
5165 iout <<
iINFO <<
"MSM finished creating map for patches\n" <<
endi;
5171 for (level = 0; level <
nlevels; level++) {
5176 #ifdef MSM_SKIP_BEYOND_SPHERE 5177 int gia, gib, gja, gjb, gka, gkb;
5187 gia =
map.
gc[level].ia();
5188 gib =
map.
gc[level].ib();
5189 gja =
map.
gc[level].ja();
5190 gjb =
map.
gc[level].jb();
5191 gka =
map.
gc[level].ka();
5192 gkb =
map.
gc[level].kb();
5195 #ifdef MSM_SKIP_TOO_DISTANT_BLOCKS 5196 int bsx =
map.
bsx[level];
5197 int bsy =
map.
bsy[level];
5198 int bsz =
map.
bsz[level];
5200 #ifdef MSM_FOLD_FACTOR 5207 for (k = 0; k < bnk; k++) {
5208 for (j = 0; j < bnj; j++) {
5209 for (i = 0; i < bni; i++) {
5212 int ia = b(i,j,k).nrange.
ia();
5213 int ib = b(i,j,k).nrange.
ib();
5214 int ja = b(i,j,k).nrange.
ja();
5215 int jb = b(i,j,k).nrange.
jb();
5216 int ka = b(i,j,k).nrange.
ka();
5217 int kb = b(i,j,k).nrange.
kb();
5227 ia +=
map.
gc[level].ia();
5228 ib +=
map.
gc[level].ib();
5229 ja +=
map.
gc[level].ja();
5230 jb +=
map.
gc[level].jb();
5231 ka +=
map.
gc[level].ka();
5232 kb +=
map.
gc[level].kb();
5238 #ifdef MSM_FOLD_FACTOR 5245 int maxarrlen = (bupper.
n.
i - blower.
n.
i + 1) *
5246 (bupper.
n.
j - blower.
n.
j + 1) * (bupper.
n.
k - blower.
n.
k + 1);
5247 b(i,j,k).sendAcross.setmax(maxarrlen);
5248 b(i,j,k).indexGridCutoff.setmax(maxarrlen);
5249 b(i,j,k).recvGridCutoff.setmax(maxarrlen);
5255 printf(
"ME %4d [%d..%d] x [%d..%d] x [%d..%d]\n",
5262 for (kk = blower.
n.
k; kk <= bupper.
n.
k; kk++) {
5263 for (jj = blower.
n.
j; jj <= bupper.
n.
j; jj++) {
5264 for (ii = blower.
n.
i; ii <= bupper.
n.
i; ii++) {
5265 #ifdef MSM_SKIP_TOO_DISTANT_BLOCKS 5267 int si =
sign(ii-i);
5268 int sj =
sign(jj-j);
5269 int sk =
sign(kk-k);
5270 int di = (ii-i)*bsx + si*(1-bsx);
5271 int dj = (jj-j)*bsy + sj*(1-bsy);
5272 int dk = (kk-k)*bsz + sk*(1-bsz);
5280 #ifdef MSM_FOLD_FACTOR 5282 b(i,j,k).nrangeCutoff);
5286 b(i,j,k).nrangeCutoff);
5289 #ifdef MSM_SKIP_BEYOND_SPHERE 5291 printf(
"send to volume %4d [%d..%d] x [%d..%d] x [%d..%d]\n",
5305 int inc_in = (bn.
ni() > 1 ? bn.
ni()-1 : 1);
5306 int inc_jn = (bn.
nj() > 1 ? bn.
nj()-1 : 1);
5307 int inc_kn = (bn.
nk() > 1 ? bn.
nk()-1 : 1);
5310 for (
int kn = bn.
ka(); kn <= bn.
kb(); kn += inc_kn) {
5311 for (
int jn = bn.
ja(); jn <= bn.
jb(); jn += inc_jn) {
5312 for (
int in = bn.
ia(); in <= bn.
ib(); in += inc_in) {
5314 int mia = ( qia >= gia + in ? qia : gia + in );
5315 int mib = ( qib <= gib + in ? qib : gib + in );
5316 int mja = ( qja >= gja + jn ? qja : gja + jn );
5317 int mjb = ( qjb <= gjb + jn ? qjb : gjb + jn );
5318 int mka = ( qka >= gka + kn ? qka : gka + kn );
5319 int mkb = ( qkb <= gkb + kn ? qkb : gkb + kn );
5320 int inc_im = (mib-mia > 0 ? mib-mia : 1);
5321 int inc_jm = (mjb-mja > 0 ? mjb-mja : 1);
5322 int inc_km = (mkb-mka > 0 ? mkb-mka : 1);
5325 for (
int km = mka; km <= mkb; km += inc_km) {
5326 for (
int jm = mja; jm <= mjb; jm += inc_jm) {
5327 for (
int im = mia; im <= mib; im += inc_im) {
5334 g =
map.
gc[level](im-in,jm-jn,km-kn);
5349 b(i,j,k).sendAcross.append(bs);
5363 int ia2, ib2, ja2, jb2, ka2, kb2;
5364 ia = b(i,j,k).nrange.
ia();
5365 ib = b(i,j,k).nrange.
ib();
5366 ja = b(i,j,k).nrange.
ja();
5367 jb = b(i,j,k).nrange.
jb();
5368 ka = b(i,j,k).nrange.
ka();
5369 kb = b(i,j,k).nrange.
kb();
5371 if ( ia==ib && ((ia & 1)==0) ) {
5375 ia2 = (ia / 2) - ((polydeg+1) / 2) + 1;
5376 ib2 = ((ib+1) / 2) + ((polydeg+1) / 2) - 1;
5378 if ( ja==jb && ((ja & 1)==0) ) {
5382 ja2 = (ja / 2) - ((polydeg+1) / 2) + 1;
5383 jb2 = ((jb+1) / 2) + ((polydeg+1) / 2) - 1;
5385 if ( ka==kb && ((ka & 1)==0) ) {
5389 ka2 = (ka / 2) - ((polydeg+1) / 2) + 1;
5390 kb2 = ((kb+1) / 2) + ((polydeg+1) / 2) - 1;
5396 b(i,j,k).nrangeRestricted.
setbounds(na2.
i, nb2.
i, na2.
j, nb2.
j,
5401 int maxarrlen = (bupper.
n.
i - blower.
n.
i + 1) *
5402 (bupper.
n.
j - blower.
n.
j + 1) * (bupper.
n.
k - blower.
n.
k + 1);
5403 b(i,j,k).sendUp.setmax(maxarrlen);
5406 for (kk = blower.
n.
k; kk <= bupper.
n.
k; kk++) {
5407 for (jj = blower.
n.
j; jj <= bupper.
n.
j; jj++) {
5408 for (ii = blower.
n.
i; ii <= bupper.
n.
i; ii++) {
5414 b(i,j,k).nrangeRestricted);
5416 b(i,j,k).sendUp.append(bs);
5427 int ia2 = b(i,j,k).nrange.
ia();
5428 int ib2 = b(i,j,k).nrange.
ib();
5429 int ja2 = b(i,j,k).nrange.
ja();
5430 int jb2 = b(i,j,k).nrange.
jb();
5431 int ka2 = b(i,j,k).nrange.
ka();
5432 int kb2 = b(i,j,k).nrange.
kb();
5434 ia = 2*ia2 - polydeg;
5435 ib = 2*ib2 + polydeg;
5436 ja = 2*ja2 - polydeg;
5437 jb = 2*jb2 + polydeg;
5438 ka = 2*ka2 - polydeg;
5439 kb = 2*kb2 + polydeg;
5444 b(i,j,k).nrangeProlongated.
setbounds(na.
i, nb.
i, na.
j, nb.
j,
5449 int maxarrlen = (bupper.
n.
i - blower.
n.
i + 1) *
5450 (bupper.
n.
j - blower.
n.
j + 1) * (bupper.
n.
k - blower.
n.
k + 1);
5451 b(i,j,k).sendDown.setmax(maxarrlen);
5454 for (kk = blower.
n.
k; kk <= bupper.
n.
k; kk++) {
5455 for (jj = blower.
n.
j; jj <= bupper.
n.
j; jj++) {
5456 for (ii = blower.
n.
i; ii <= bupper.
n.
i; ii++) {
5462 b(i,j,k).nrangeProlongated);
5464 b(i,j,k).sendDown.append(bs);
5473 #ifdef MSM_REDUCE_GRID 5476 b(i,j,k).numRecvsPotential -= ( b(i,j,k).indexGridCutoff.len() - 1 );
5501 #ifdef DEBUG_MSM_VERBOSE 5502 printf(
"Allocating patchPtr array length %d\n", pm->
numPatches());
5504 if (CkMyPe() == 0) {
5506 <<
" interpolation / anterpolation objects" 5507 <<
" (one per patch)\n" <<
endi;
5511 #ifdef MSM_NODE_MAPPING 5531 int numNodes = CkNumNodes();
5532 int numPes = CkNumPes();
5534 int numPesPerNode = numPes / numNodes;
5536 for (level = 0; level <
nlevels; level++) {
5551 #ifdef MSM_NODE_MAPPING_STATS 5557 for (n = 0; n < numNodes; n++) {
5562 for (level = 0; level <
nlevels; level++) {
5567 for (k = 0; k < bnk; k++) {
5568 for (j = 0; j < bnj; j++) {
5569 for (i = 0; i < bni; i++) {
5571 nodeQueue.remove(wn);
5574 nodeWork[wn.
index] += bw;
5576 blockWork[bindex] = bw;
5577 nodeQueue.insert(wn);
5585 for (n = 0; n < numBlocks; n++) {
5587 nodeQueue.remove(wn);
5590 nodeWork[wn.
index] += bw;
5593 nodeQueue.insert(wn);
5600 for (level = 0; level <
nlevels; level++) {
5605 for (k = 0; k < bnk; k++) {
5606 for (j = 0; j < bnj; j++) {
5607 for (i = 0; i < bni; i++) {
5610 int numSendAcross = b(i,j,k).sendAcross.len();
5611 ASSERT( numSendAcross == b(i,j,k).indexGridCutoff.len() );
5612 for (n = 0; n < numSendAcross; n++) {
5618 if (nodeWork[nsrc] <= nodeWork[ndest]) {
5620 nodeWork[nsrc] += gcutWork[gindex];
5624 nodeWork[ndest] += gcutWork[gindex];
5634 for (n = 0; n < numNodes; n++) {
5635 peQueue[n].init(numPesPerNode);
5636 for (
int poff = 0; poff < numPesPerNode; poff++) {
5637 peQueue[n].insert(
WorkIndex(0, n*numPesPerNode + poff));
5641 for (n = 0; n < numBlocks; n++) {
5644 peQueue[node].remove(wn);
5646 wn.
work += blockWork[n];
5647 peQueue[node].insert(wn);
5648 #ifdef MSM_NODE_MAPPING_STATS 5649 peWork[wn.
index] += blockWork[n];
5656 peQueue[node].remove(wn);
5658 wn.
work += gcutWork[n];
5659 peQueue[node].insert(wn);
5660 #ifdef MSM_NODE_MAPPING_STATS 5661 peWork[wn.
index] += gcutWork[n];
5665 #ifdef MSM_NODE_MAPPING_STATS 5666 if (CkMyPe() == 0) {
5667 printf(
"Mapping of MSM work (showing scaled estimated work units):\n");
5668 for (n = 0; n < numNodes; n++) {
5669 printf(
" node %d work %8.3f\n", n, nodeWork[n]);
5670 for (
int poff = 0; poff < numPesPerNode; poff++) {
5671 int p = n*numPesPerNode + poff;
5672 printf(
" pe %d work %8.3f\n", p, peWork[p]);
5681 for (level = 0; level <
nlevels; level++) {
5689 nodecnt.resize(numNodes);
5694 int r = numBlocks % numNodes;
5695 int q = numBlocks / numNodes;
5697 for (n = 0; n < numNodes - r; n++) {
5698 int moffset = n * q;
5699 for (
int m = 0; m < q; m++) {
5704 for ( ; n < numNodes; n++) {
5705 int moffset = (numNodes - r)*q + (n - (numNodes - r))*qp;
5706 for (
int m = 0; m < qp; m++) {
5712 if (CkMyPe() == 0) {
5713 CkPrintf(
"%d objects to %d nodes\n", q, numNodes-r);
5715 CkPrintf(
"%d objects to %d nodes\n", qp, r);
5717 CkPrintf(
"%d =? %d\n", (numNodes-r)*q + r*qp, numBlocks);
5724 for (level = 0; level <
nlevels; level++) {
5729 for (k = 0; k < bnk; k++) {
5730 for (j = 0; j < bnj; j++) {
5731 for (i = 0; i < bni; i++) {
5734 int numSendAcross = b(i,j,k).sendAcross.len();
5735 ASSERT( numSendAcross == b(i,j,k).indexGridCutoff.len() );
5736 for (n = 0; n < numSendAcross; n++) {
5741 if (nodecnt[nsrc] <= nodecnt[ndest]) {
5758 int ppn = numPes / numNodes;
5760 for (n = 0; n < numNodes; n++) nodecnt[n] = 0;
5761 for (n = 0; n < numBlocks; n++) {
5765 if (nodecnt[node] >= ppn) nodecnt[node] = 0;
5771 if (nodecnt[node] >= ppn) nodecnt[node] = 0;
5776 if (CkMyPe() == 0) {
5777 for (n = 0; n < numBlocks; n++) {
5778 CkPrintf(
"block %d: node=%d pe=%d\n",
5783 CkPrintf(
"grid cutoff %d: node=%d pe=%d\n",
5793 #endif // MSM_NODE_MAPPING 5799 int i, j, k, n, level;
5801 if (CkMyPe() == 0) {
5811 for (level = 0; level <
nlevels; level++) {
5815 #ifdef MSM_NODE_MAPPING 5816 CkPrintf(
"Using MsmBlockMap for level %d\n", level);
5817 CProxy_MsmBlockMap blockMap = CProxy_MsmBlockMap::ckNew(level);
5818 CkArrayOptions opts(ni, nj, nk);
5819 opts.setMap(blockMap);
5822 CProxy_MsmC1HermiteBlock::ckNew(level, opts);
5825 msmBlock[level] = CProxy_MsmBlock::ckNew(level, opts);
5830 CProxy_MsmC1HermiteBlock::ckNew(level, ni, nj, nk);
5833 msmBlock[level] = CProxy_MsmBlock::ckNew(level, ni, nj, nk);
5836 #ifdef DEBUG_MSM_VERBOSE 5837 printf(
"Create MsmBlock[%d] 3D chare array ( %d x %d x %d )\n",
5841 int nijk = ni * nj * nk;
5842 sprintf(msg,
"MSM grid level %d decomposed into %d block%s" 5843 " ( %d x %d x %d )\n",
5844 level, nijk, (nijk==1 ?
"" :
"s"), ni, nj, nk);
5850 msmProxy.recvMsmC1HermiteBlockProxy(msg);
5858 #ifdef MSM_GRID_CUTOFF_DECOMP 5861 #ifdef MSM_NODE_MAPPING 5862 CkPrintf(
"Using MsmGridCutoffMap\n");
5863 CProxy_MsmGridCutoffMap gcutMap = CProxy_MsmGridCutoffMap::ckNew();
5865 optsgcut.setMap(gcutMap);
5885 msmProxy.recvMsmC1HermiteGridCutoffProxy(gcmsg);
5890 msmProxy.recvMsmGridCutoffProxy(gcmsg);
5896 for (level = 0; level <
nlevels; level++) {
5901 for (k = 0; k < bnk; k++) {
5902 for (j = 0; j < bnj; j++) {
5903 for (i = 0; i < bni; i++) {
5906 int numSendAcross = b(i,j,k).sendAcross.len();
5907 ASSERT( numSendAcross == b(i,j,k).indexGridCutoff.len() );
5909 for (n = 0; n < numSendAcross; n++) {
5911 int index = b(i,j,k).indexGridCutoff[n];
5927 iout <<
iINFO <<
"MSM grid cutoff calculation decomposed into " 5933 #ifdef DEBUG_MSM_VERBOSE 5934 printf(
"end of initialization\n");
5969 #ifdef DEBUG_MSM_VERBOSE 5970 printf(
"ComputeMsmMgr: update() PE %d\n", CkMyPe());
5975 if (CkMyPe() == 0) {
5976 for (
int level = 0; level <
nlevels; level++) {
5992 #ifdef DEBUG_MSM_VERBOSE 5993 printf(
"ComputeMsmMgr: compute() PE=%d\n", CkMyPe());
5997 for (n = 0; n < patchIDList.
len(); n++) {
5998 int patchID = patchIDList[n];
6001 snprintf(msg,
sizeof(msg),
6002 "Expected MSM data for patch %d does not exist on PE %d",
6007 patchPtr[patchID]->anterpolationC1Hermite();
6010 patchPtr[patchID]->anterpolation();
6031 snprintf(msg,
sizeof(msg),
"Expecting patch %d to exist on PE %d",
6059 CProxy_ComputeMsmMgr::ckLocalBranch(
6060 CkpvAccess(BOCclass_group).computeMsmMgr)->setCompute(
this);
6064 #ifdef DEBUG_MSM_VERBOSE 6065 printf(
"ComputeMsm: (constructor) PE=%d\n", CkMyPe());
6072 #ifdef DEBUG_MSM_VERBOSE 6073 printf(
"ComputeMsm: (destructor) PE=%d\n", CkMyPe());
6080 #ifdef DEBUG_MSM_VERBOSE 6081 printf(
"ComputeMsm: doWork() PE=%d\n", CkMyPe());
6086 myMgr->initTiming();
6088 #ifdef MSM_PROFILING 6089 myMgr->initProfiling();
6096 cntLocalPatches = 0;
6097 ASSERT(cntLocalPatches < numLocalPatches);
6099 #ifdef DEBUG_MSM_VERBOSE 6104 if ( !
patchList[0].p->flags.doFullElectrostatics ) {
6105 for (ap = ap.
begin(); ap != ap.
end(); ap++) {
6106 CompAtom *x = (*ap).positionBox->open();
6107 Results *r = (*ap).forceBox->open();
6108 (*ap).positionBox->close(&x);
6109 (*ap).forceBox->close(&r);
6122 for (ap = ap.
begin(); ap != ap.
end(); ap++) {
6123 CompAtom *x = (*ap).positionBox->open();
6124 CompAtomExt *xExt = (*ap).p->getCompAtomExtInfo();
6126 (*ap).positionBox->close(&x);
6127 x = (*ap).avgPositionBox->open();
6129 int numAtoms = (*ap).p->getNumAtoms();
6130 int patchID = (*ap).patchID;
6131 patchIDList.
append(patchID);
6132 if (patchPtr[patchID] == NULL) {
6135 #ifdef DEBUG_MSM_VERBOSE 6136 printf(
"Creating new PatchData: patchID=%d PE=%d\n",
6141 patch.
init(numAtoms);
6144 for (n = 0; n < numAtoms; n++) {
6146 coord[n].charge = qscaling * x[n].
charge;
6147 coord[n].id = xExt[n].
id;
6150 (*ap).avgPositionBox->close(&x);
6153 (*ap).positionBox->close(&x);
6163 if (++cntLocalPatches != numLocalPatches)
return;
6168 for (ap = ap.
begin(); ap != ap.
end(); ap++) {
6169 int patchID = (*ap).patchID;
6178 #ifdef DEBUG_MSM_VERBOSE 6179 printf(
"ComputeMsm: saveResults() PE=%d\n", CkMyPe());
6186 for (ap = ap.
begin(); ap != ap.
end(); ap++) {
6187 Results *r = (*ap).forceBox->open();
6189 int numAtoms = (*ap).p->getNumAtoms();
6190 int patchID = (*ap).patchID;
6191 if (patchPtr[patchID] == NULL) {
6193 snprintf(msg,
sizeof(msg),
"Expecting patch %d to exist on PE %d",
6199 for (n = 0; n < numAtoms; n++) {
6200 f[n] += patch.
force[n];
6202 (*ap).forceBox->close(&r);
6258 for (
int i = 0; i < natoms; i++)
force[i] = 0;
6270 #ifdef DEBUG_MSM_GRID 6271 printf(
"patchID %d: anterpolation\n",
patchID);
6275 double startTime, stopTime;
6276 startTime = CkWallTimer();
6278 #ifndef MSM_COMM_ONLY 6286 const int ia =
qh.
ia();
6287 const int ib =
qh.
ib();
6288 const int ja =
qh.
ja();
6289 const int jb =
qh.
jb();
6290 const int ka =
qh.
ka();
6291 const int kb =
qh.
kb();
6292 const int ni =
qh.
ni();
6293 const int nj =
qh.
nj();
6297 for (
int n = 0; n <
coord.
len(); n++) {
6307 BigReal xlo = floor(sx_hx) - rs_edge;
6308 BigReal ylo = floor(sy_hy) - rs_edge;
6309 BigReal zlo = floor(sz_hz) - rs_edge;
6324 int iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
6325 ja <= jlo && (jlo+(s_size-1)) <= jb &&
6326 ka <= klo && (klo+(s_size-1)) <= kb );
6330 printf(
"PE %d: atom %d: pos= %g %g %g patchID=%d\n",
6331 CkMyPe(),
coord[n].
id,
6334 printf(
"PE %d: atom subgrid [%d..%d] x [%d..%d] x [%d..%d]\n",
6336 ilo, ilo+s_size-1, jlo, jlo+s_size-1, klo, klo+s_size-1);
6337 printf(
"PE %d: patch grid [%d..%d] x [%d..%d] x [%d..%d]\n",
6339 ia, ib, ja, jb, ka, kb);
6342 snprintf(msg,
sizeof(msg),
"Atom %d is outside of the MSM grid.",
6348 for (
int k = 0; k < s_size; k++) {
6349 int koff = ((k+klo) - ka) * nj;
6350 Float ck = zphi[k] * q;
6351 for (
int j = 0; j < s_size; j++) {
6352 int jkoff = (koff + (j+jlo) - ja) * ni;
6353 Float cjk = yphi[j] * ck;
6354 for (
int i = 0; i < s_size; i++) {
6355 int ijkoff = jkoff + (i+ilo) - ia;
6356 qhbuffer[ijkoff] += xphi[i] * cjk;
6362 #endif // !MSM_COMM_ONLY 6364 stopTime = CkWallTimer();
6373 double startTime, stopTime;
6380 for (
int n = 0; n <
pd->
send.len(); n++) {
6382 startTime = CkWallTimer();
6398 stopTime = CkWallTimer();
6402 bindex.
n.
i, bindex.
n.
j, bindex.
n.
k).addCharge(gm);
6408 double startTime, stopTime;
6409 startTime = CkWallTimer();
6413 stopTime = CkWallTimer();
6422 #ifdef DEBUG_MSM_GRID 6423 printf(
"patchID %d: interpolation\n",
patchID);
6427 double startTime, stopTime;
6428 startTime = CkWallTimer();
6430 #ifndef MSM_COMM_ONLY 6447 const int ia =
eh.
ia();
6448 const int ib =
eh.
ib();
6449 const int ja =
eh.
ja();
6450 const int jb =
eh.
jb();
6451 const int ka =
eh.
ka();
6452 const int kb =
eh.
kb();
6453 const int ni =
eh.
ni();
6454 const int nj =
eh.
nj();
6458 for (
int n = 0; n <
coord.
len(); n++) {
6468 BigReal xlo = floor(sx_hx) - rs_edge;
6469 BigReal ylo = floor(sy_hy) - rs_edge;
6470 BigReal zlo = floor(sz_hz) - rs_edge;
6488 int iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
6489 ja <= jlo && (jlo+(s_size-1)) <= jb &&
6490 ka <= klo && (klo+(s_size-1)) <= kb );
6494 snprintf(msg,
sizeof(msg),
"Atom %d is outside of the MSM grid.",
6503 Float fx=0, fy=0, fz=0, e=0;
6504 for (
int k = 0; k < s_size; k++) {
6505 int koff = ((k+klo) - ka) * nj;
6506 for (
int j = 0; j < s_size; j++) {
6507 int jkoff = (koff + (j+jlo) - ja) * ni;
6508 Float cx = yphi[j] * zphi[k];
6509 Float cy = dyphi[j] * zphi[k];
6510 Float cz = yphi[j] * dzphi[k];
6511 for (
int i = 0; i < s_size; i++) {
6512 int ijkoff = jkoff + (i+ilo) - ia;
6513 Float ec = ehbuffer[ijkoff];
6514 fx += ec * dxphi[i] * cx;
6515 fy += ec * xphi[i] * cy;
6516 fz += ec * xphi[i] * cz;
6517 e += ec * xphi[i] * cx;
6527 force[n].x -= q * fx;
6528 force[n].y -= q * fy;
6529 force[n].z -= q * fz;
6531 energy_self += q * q;
6538 #endif // !MSM_COMM_ONLY 6540 stopTime = CkWallTimer();
6548 #ifdef DEBUG_MSM_GRID 6549 printf(
"patchID %d: anterpolationC1Hermite\n",
patchID);
6553 double startTime, stopTime;
6554 startTime = CkWallTimer();
6556 #ifndef MSM_COMM_ONLY 6557 Float xphi[2], xpsi[2];
6558 Float yphi[2], ypsi[2];
6559 Float zphi[2], zpsi[2];
6576 for (
int n = 0; n <
coord.
len(); n++) {
6603 int iswithin = ( ia <= ilo && ilo < ib &&
6604 ja <= jlo && jlo < jb &&
6605 ka <= klo && klo < kb );
6609 snprintf(msg,
sizeof(msg),
"Atom %d is outside of the MSM grid.",
6615 for (
int k = 0; k < 2; k++) {
6616 int koff = ((k+klo) - ka) * nj;
6617 Float c_zphi = zphi[k] * q;
6618 Float c_zpsi = zpsi[k] * q;
6619 for (
int j = 0; j < 2; j++) {
6620 int jkoff = (koff + (j+jlo) - ja) * ni;
6621 Float c_yphi_zphi = yphi[j] * c_zphi;
6622 Float c_ypsi_zphi = ypsi[j] * c_zphi;
6623 Float c_yphi_zpsi = yphi[j] * c_zpsi;
6624 Float c_ypsi_zpsi = ypsi[j] * c_zpsi;
6625 for (
int i = 0; i < 2; i++) {
6626 int ijkoff = jkoff + (i+ilo) - ia;
6627 qhbuffer[ijkoff].
velem[
D000] += xphi[i] * c_yphi_zphi;
6628 qhbuffer[ijkoff].
velem[
D100] += xpsi[i] * c_yphi_zphi;
6629 qhbuffer[ijkoff].
velem[
D010] += xphi[i] * c_ypsi_zphi;
6630 qhbuffer[ijkoff].
velem[
D001] += xphi[i] * c_yphi_zpsi;
6631 qhbuffer[ijkoff].
velem[
D110] += xpsi[i] * c_ypsi_zphi;
6632 qhbuffer[ijkoff].
velem[
D101] += xpsi[i] * c_yphi_zpsi;
6633 qhbuffer[ijkoff].
velem[
D011] += xphi[i] * c_ypsi_zpsi;
6634 qhbuffer[ijkoff].
velem[
D111] += xpsi[i] * c_ypsi_zpsi;
6641 #endif // !MSM_COMM_ONLY 6643 stopTime = CkWallTimer();
6652 double startTime, stopTime;
6656 for (
int n = 0; n <
pd->
send.len(); n++) {
6658 startTime = CkWallTimer();
6674 stopTime = CkWallTimer();
6678 bindex.
n.
i, bindex.
n.
j, bindex.
n.
k).addCharge(gm);
6684 double startTime, stopTime;
6685 startTime = CkWallTimer();
6689 stopTime = CkWallTimer();
6698 #ifdef DEBUG_MSM_GRID 6699 printf(
"patchID %d: interpolation\n",
patchID);
6703 double startTime, stopTime;
6704 startTime = CkWallTimer();
6706 #ifndef MSM_COMM_ONLY 6709 Float xphi[2], dxphi[2], xpsi[2], dxpsi[2];
6710 Float yphi[2], dyphi[2], ypsi[2], dypsi[2];
6711 Float zphi[2], dzphi[2], zpsi[2], dzpsi[2];
6732 for (
int n = 0; n <
coord.
len(); n++) {
6765 int iswithin = ( ia <= ilo && ilo < ib &&
6766 ja <= jlo && jlo < jb &&
6767 ka <= klo && klo < kb );
6771 snprintf(msg,
sizeof(msg),
"Atom %d is outside of the MSM grid.",
6778 Float fx=0, fy=0, fz=0, e=0;
6779 for (
int k = 0; k < 2; k++) {
6780 int koff = ((k+klo) - ka) * nj;
6781 for (
int j = 0; j < 2; j++) {
6782 int jkoff = (koff + (j+jlo) - ja) * ni;
6783 Float c_yphi_zphi = yphi[j] * zphi[k];
6784 Float c_ypsi_zphi = ypsi[j] * zphi[k];
6785 Float c_yphi_zpsi = yphi[j] * zpsi[k];
6786 Float c_ypsi_zpsi = ypsi[j] * zpsi[k];
6787 Float c_yphi_dzphi = yphi[j] * dzphi[k];
6788 Float c_ypsi_dzphi = ypsi[j] * dzphi[k];
6789 Float c_yphi_dzpsi = yphi[j] * dzpsi[k];
6790 Float c_ypsi_dzpsi = ypsi[j] * dzpsi[k];
6791 Float c_dyphi_zphi = dyphi[j] * zphi[k];
6792 Float c_dypsi_zphi = dypsi[j] * zphi[k];
6793 Float c_dyphi_zpsi = dyphi[j] * zpsi[k];
6794 Float c_dypsi_zpsi = dypsi[j] * zpsi[k];
6795 for (
int i = 0; i < 2; i++) {
6796 int ijkoff = jkoff + (i+ilo) - ia;
6797 fx += dxphi[i] * (c_yphi_zphi * ehbuffer[ijkoff].
velem[
D000]
6798 + c_ypsi_zphi * ehbuffer[ijkoff].
velem[
D010]
6799 + c_yphi_zpsi * ehbuffer[ijkoff].
velem[
D001]
6800 + c_ypsi_zpsi * ehbuffer[ijkoff].
velem[
D011])
6801 + dxpsi[i] * (c_yphi_zphi * ehbuffer[ijkoff].velem[
D100]
6802 + c_ypsi_zphi * ehbuffer[ijkoff].velem[
D110]
6803 + c_yphi_zpsi * ehbuffer[ijkoff].velem[
D101]
6804 + c_ypsi_zpsi * ehbuffer[ijkoff].velem[
D111]);
6805 fy += xphi[i] * (c_dyphi_zphi * ehbuffer[ijkoff].
velem[
D000]
6806 + c_dypsi_zphi * ehbuffer[ijkoff].
velem[
D010]
6807 + c_dyphi_zpsi * ehbuffer[ijkoff].
velem[
D001]
6808 + c_dypsi_zpsi * ehbuffer[ijkoff].
velem[
D011])
6809 + xpsi[i] * (c_dyphi_zphi * ehbuffer[ijkoff].velem[
D100]
6810 + c_dypsi_zphi * ehbuffer[ijkoff].velem[
D110]
6811 + c_dyphi_zpsi * ehbuffer[ijkoff].velem[
D101]
6812 + c_dypsi_zpsi * ehbuffer[ijkoff].velem[
D111]);
6813 fz += xphi[i] * (c_yphi_dzphi * ehbuffer[ijkoff].
velem[
D000]
6814 + c_ypsi_dzphi * ehbuffer[ijkoff].
velem[
D010]
6815 + c_yphi_dzpsi * ehbuffer[ijkoff].
velem[
D001]
6816 + c_ypsi_dzpsi * ehbuffer[ijkoff].
velem[
D011])
6817 + xpsi[i] * (c_yphi_dzphi * ehbuffer[ijkoff].velem[
D100]
6818 + c_ypsi_dzphi * ehbuffer[ijkoff].velem[
D110]
6819 + c_yphi_dzpsi * ehbuffer[ijkoff].velem[
D101]
6820 + c_ypsi_dzpsi * ehbuffer[ijkoff].velem[
D111]);
6821 e += xphi[i] * (c_yphi_zphi * ehbuffer[ijkoff].
velem[
D000]
6822 + c_ypsi_zphi * ehbuffer[ijkoff].
velem[
D010]
6823 + c_yphi_zpsi * ehbuffer[ijkoff].
velem[
D001]
6824 + c_ypsi_zpsi * ehbuffer[ijkoff].
velem[
D011])
6825 + xpsi[i] * (c_yphi_zphi * ehbuffer[ijkoff].velem[
D100]
6826 + c_ypsi_zphi * ehbuffer[ijkoff].velem[
D110]
6827 + c_yphi_zpsi * ehbuffer[ijkoff].velem[
D101]
6828 + c_ypsi_zpsi * ehbuffer[ijkoff].velem[
D111]);
6838 force[n].x -= q * fx;
6839 force[n].y -= q * fy;
6840 force[n].z -= q * fz;
6842 energy_self += q * q;
6849 #endif // !MSM_COMM_ONLY 6851 stopTime = CkWallTimer();
6861 #include "ComputeMsmMgr.def.h"
char msmGridCutoffProxyData[sizeof(CProxy_MsmGridCutoff)]
msm::Grid< Float > ehfull
Array< Grid< C1Matrix > > gpro_c1hermite
Array< PatchDiagram > patchList
msm::Grid< C1Vector > ehfull
Grid< C1Vector > qh_c1hermite
msm::Array< int > blockAssign
float calcGcutWork(const msm::BlockSend &bs)
float calcBlockWork(const msm::BlockDiagram &b)
int registerArray(CkArrayIndex &numElements, CkArrayID aid)
std::ostream & iINFO(std::ostream &s)
Array< int > recvGridCutoff
void setCompute(ComputeMsm *c)
int procNum(int, const CkArrayIndex &idx)
void sumReducedPotential(CkReductionMsg *msg)
#define MSM_MAX_BLOCK_SIZE
msm::PatchPtrArray patchPtr
Grid< C1Vector > subgrid_c1hermite
NAMD_HOST_DEVICE Vector c() const
MsmC1HermiteBlock(CkMigrateMessage *m)
BlockIndex blockOfGridIndexFold(const Ivec &n, int level) const
void put(const msm::Array< CProxy_MsmC1HermiteBlock > &a)
void addCharge(GridMsg *)
NAMD_HOST_DEVICE int c_p() const
Ivec clipIndexToLevel(const Ivec &n, int level) const
void setup(MsmGridCutoffInitMsg *bmsg)
void get(msm::Array< CProxy_MsmBlock > &a)
msm::Grid< Vtype > ehfold
BigReal max_a(int pid) const
IndexRange clipBlockToIndexRangeFold(const BlockIndex &nb, const IndexRange &nrange) const
#define MSM_MAX_BLOCK_VOLUME
void interpolationC1Hermite()
static PatchMap * Object()
SimParameters * simParameters
void compute_specialized(GridMsg *gmsg)
ComputeHomePatchList patchList
PatchData(ComputeMsmMgr *pmgr, int pid)
void addPotential(GridMsg *)
static const int IndexOffset[NUM_APPROX][MAX_NSTENCIL_SKIP_ZERO]
void compute(GridMsg *gmsg)
std::ostream & endi(std::ostream &s)
msm::BlockSend ehblockSend
CProxySection_MsmC1HermiteGridCutoff msmGridCutoffBroadcast
char msmGridCutoffProxyData[sizeof(CProxy_MsmC1HermiteGridCutoff)]
ForceArray & forceArray()
void done(int lc[], int n)
SubmitReduction * willSubmit(int setID, int size=-1)
msm::BlockIndex qhblockIndex
void wrapBlockSend(BlockSend &bs) const
ResizeArrayIter< T > begin(void) const
int index_a(int pid) const
static ReductionMgr * Object(void)
msm::Grid< Vtype > subgrid
void get(msm::Array< CProxy_MsmC1HermiteBlock > &a)
void setup_periodic_blocksize(int &bsize, int n)
CProxy_ComputeMsmMgr mgrProxy
Array< BlockSend > sendUp
void extract(Grid< T > &g)
#define C1INDEX(drj, dri)
void put(const CProxyElement_MsmBlock *q)
void compute(GridMsg *gmsg)
void addPotential(const Grid< Float > &epart)
char msmBlockProxyData[maxlevels *sizeof(CProxy_MsmBlock)]
void setbounds(int pia, int pib, int pja, int pjb, int pka, int pkb)
NAMD_HOST_DEVICE int b_p() const
void wrapBlockIndex(BlockIndex &bn) const
void compute(msm::Array< int > &patchIDList)
void stencil_1d(Float phi[], Float t)
BigReal gridScalingFactor
void setupSections(MsmC1HermiteGridCutoffSetupMsg *msg)
void set(int pia, int pni, int pja, int pnj, int pka, int pnk)
msm::BlockIndex blockIndex
void setbounds(int pia, int pib, int pja, int pjb, int pka, int pkb)
int gridsize_c(void) const
CProxyElement_MsmC1HermiteBlock msmBlockElementProxy
Array< Grid< BlockDiagram > > blockLevel
void put(const msm::Array< CProxy_MsmBlock > &a)
MsmC1HermiteBlock(int level)
void recvMsmBlockProxy(MsmBlockProxyMsg *)
CProxyElement_MsmBlock msmBlockElementProxy
msm::BlockIndex qhBlockIndex
void init(const IndexRange &n)
int gridsize_a(void) const
void addPotential(GridMsg *)
void recvMsmC1HermiteBlockProxy(MsmC1HermiteBlockProxyMsg *)
static const Float PhiStencil[NUM_APPROX_FORMS][MAX_NSTENCIL_SKIP_ZERO]
Array< BlockSend > sendAcross
int numPatches(void) const
WorkIndex(float w, int i)
void prolongationKernel()
CProxy_ComputeMsmMgr msmProxy
void setupSections(MsmGridCutoffSetupMsg *msg)
Array< Grid< C1Matrix > > gres_c1hermite
MsmBlockKernel(CkMigrateMessage *m)
MsmBlockMap(CkMigrateMessage *m)
NAMD_HOST_DEVICE BigReal length(void) const
static const int Nstencil[NUM_APPROX]
char msmBlockProxyData[maxlevels *sizeof(CProxy_MsmC1HermiteBlock)]
BigReal min_c(int pid) const
const msm::Grid< Mtype > * pgc
void get(CProxyElement_MsmBlock *q)
void get(CProxy_MsmC1HermiteGridCutoff *p)
int registerArray(CkArrayIndex &numElements, CkArrayID aid)
void setup_hgrid_1d(BigReal len, BigReal &hh, int &nn, int &ia, int &ib, int isperiodic)
int index_b(int pid) const
NAMD_HOST_DEVICE ScaledPosition scale(Position p) const
void addCharge(GridMsg *)
NAMD_HOST_DEVICE BigReal length2(void) const
void setup(MsmGridCutoffInitMsg *bmsg)
NAMD_HOST_DEVICE int a_p() const
NAMD_HOST_DEVICE Vector a_r() const
msm::Grid< Float > subgrid
NAMD_HOST_DEVICE Vector b_r() const
CProxySection_MsmC1HermiteGridCutoff msmGridCutoffReduction
msm::BlockSend ehBlockSend
Array< IndexRange > gridrange
void put(const msm::Grid< T > &g, int id, int seq)
AtomCoordArray & coordArray()
void recvMsmGridCutoffProxy(MsmGridCutoffProxyMsg *)
Array< PatchSend > sendPatch
void NAMD_die(const char *err_msg)
int operator<=(const WorkIndex &wn)
CProxySection_MsmGridCutoff msmGridCutoffBroadcast
MsmGridCutoff(CkMigrateMessage *m)
BigReal min_a(int pid) const
BlockIndex blockOfGridIndex(const Ivec &n, int level) const
NAMD_HOST_DEVICE Vector c_r() const
CProxy_MsmGridCutoff msmGridCutoff
NAMD_HOST_DEVICE Vector b() const
void get(CProxy_MsmGridCutoff *p)
const Array< T > & data() const
void setupWeights(const msm::Grid< Mtype > *ptrgc, const msm::Grid< Mtype > *ptrgvc)
CProxy_MsmC1HermiteGridCutoff msmC1HermiteGridCutoff
int index_c(int pid) const
int blockFlatIndex(int level, int i, int j, int k)
static void ndsplitting(BigReal pg[], BigReal s, int n, int _split)
void updateLower(const Ivec &n)
Float melem[C1_MATRIX_SIZE]
void get(CProxyElement_MsmC1HermiteBlock *q)
void done(double tm[], int n)
void addPotential(GridMsg *)
const msm::Grid< Mtype > * proStencil
IndexRange clipBlockToIndexRange(const BlockIndex &nb, const IndexRange &nrange) const
BigReal max_b(int pid) const
msm::Grid< C1Vector > subgrid_c1hermite
void addPotentialC1Hermite(const Grid< C1Vector > &epart)
void d_stencil_1d_c1hermite(Float dphi[], Float phi[], Float dpsi[], Float psi[], Float t, Float h, Float h_1)
Array< FoldFactor > foldfactor
void subtractVirialContrib()
void d_stencil_1d(Float dphi[], Float phi[], Float t, Float h_1)
msm::Array< CProxy_MsmBlock > msmBlock
static const int PolyDegree[NUM_APPROX]
void stencil_1d_c1hermite(Float phi[], Float psi[], Float t, Float h)
void setup(MsmGridCutoffInitMsg *bmsg)
static void splitting(BigReal &g, BigReal &dg, BigReal r_a, int _split)
msm::Array< CProxy_MsmC1HermiteBlock > msmC1HermiteBlock
void recvMsmC1HermiteGridCutoffProxy(MsmC1HermiteGridCutoffProxyMsg *)
Array< int > indexGridCutoff
msm::PatchPtrArray & patchPtrArray()
static void gc_c1hermite_elem_accum(C1Matrix &matrix, BigReal _c, Vector rv, BigReal _a, int _split)
Grid< C1Vector > eh_c1hermite
msm::Array< int > gcutAssign
BigReal max_c(int pid) const
Array< Grid< Float > > gc
msm::Grid< Vtype > ehProlongated
MsmBlockKernel(const msm::BlockIndex &)
int gridsize_b(void) const
void wrapBlockSendFold(BlockSend &bs) const
MsmBlock(CkMigrateMessage *m)
Array< Grid< Float > > gvc
void put(const CProxyElement_MsmC1HermiteBlock *q)
MsmC1HermiteGridCutoff(CkMigrateMessage *m)
CProxySection_MsmGridCutoff msmGridCutoffReduction
void put(const CProxy_MsmC1HermiteGridCutoff *p)
#define SET_PRIORITY(MSG, SEQ, PRIO)
void sendChargeC1Hermite()
void get(msm::Grid< T > &g, int &id, int &seq)
char msmBlockElementProxyData[sizeof(CProxyElement_MsmBlock)]
MsmGridCutoffInitMsg(const msm::BlockIndex &i, const msm::BlockSend &b)
void compute(GridMsg *gmsg)
NAMD_HOST_DEVICE Vector a() const
Array< Grid< C1Matrix > > gc_c1hermite
char msmBlockElementProxyData[sizeof(CProxyElement_MsmC1HermiteBlock)]
msm::Grid< Vtype > qhRestricted
BigReal min_b(int pid) const
NAMD_HOST_DEVICE Vector unit(void) const
ResizeArrayIter< T > end(void) const
const msm::Grid< Mtype > * pgvc
void initialize(MsmInitMsg *)
Float velem[C1_VECTOR_SIZE]
void sumReducedPotential(CkReductionMsg *msg)
Array< BlockSend > sendDown
void setupStencils(const msm::Grid< Mtype > *res, const msm::Grid< Mtype > *pro)
void anterpolationC1Hermite()
int procNum(int, const CkArrayIndex &idx)
const msm::Grid< Mtype > * resStencil
void put(const CProxy_MsmGridCutoff *p)