3 #if __CUDACC_VER_MAJOR__ >= 11
6 #include <namd_cub/cub.cuh>
11 #include "CudaComputeNonbondedKernel.h"
12 #include "CudaTileListKernel.h"
13 #include "DeviceCUDA.h"
14 #include "CudaComputeNonbondedInteractions.h"
16 #if defined(NAMD_CUDA)
19 #define __thread __declspec(thread)
21 extern __thread DeviceCUDA *deviceCUDA;
23 #define OVERALLOC 1.2f
25 void NAMD_die(const char *);
26 void NAMD_bug(const char *);
28 #define MAX_CONST_EXCLUSIONS 2048 // cache size is 8k
29 __constant__ unsigned int constExclusions[MAX_CONST_EXCLUSIONS];
32 __constant__ AlchData alchflags;
33 #define NONBONDKERNEL_NUM_WARP 4
36 __device__ __forceinline__
40 __device__ __forceinline__
41 float3 make_zero<float3>() {
42 return make_float3(0.0f, 0.0f, 0.0f);
46 __device__ __forceinline__
47 float4 make_zero<float4>() {
48 return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
51 template<bool doEnergy, bool doSlow, typename jForceType>
52 __device__ __forceinline__
53 void calcForceEnergyMath(const float r2, const float qi, const float qj,
54 const float dx, const float dy, const float dz,
55 const int vdwtypei, const int vdwtypej, const float2* __restrict__ vdwCoefTable,
56 cudaTextureObject_t vdwCoefTableTex,
57 cudaTextureObject_t forceTableTex, cudaTextureObject_t energyTableTex,
58 float3& iforce, float3& iforceSlow, jForceType& jforce, jForceType& jforceSlow,
59 float& energyVdw, float& energyElec, float& energySlow,
60 const CudaNBConstants nbConstants) {
62 int vdwIndex = vdwtypej + vdwtypei;
63 #if __CUDA_ARCH__ >= 350
64 float2 ljab = __ldg(&vdwCoefTable[vdwIndex]);
66 float2 ljab = tex1Dfetch<float2>(vdwCoefTableTex, vdwIndex);
69 float rinv = rsqrtf(r2);
71 float charge = qi * qj;
73 cudaNBForceMagCalc_VdwEnergySwitch_PMEC1<doEnergy, doSlow>(
74 r2, rinv, charge, ljab, nbConstants,
75 f, fSlow, energyVdw, energyElec, energySlow);
87 float fxSlow = dx * fSlow;
88 float fySlow = dy * fSlow;
89 float fzSlow = dz * fSlow;
90 iforceSlow.x += fxSlow;
91 iforceSlow.y += fySlow;
92 iforceSlow.z += fzSlow;
93 jforceSlow.x -= fxSlow;
94 jforceSlow.y -= fySlow;
95 jforceSlow.z -= fzSlow;
99 // TODO: NbThole FEP/TI. Is this really useful?
100 template <bool doEnergy, typename jForceType>
101 __device__ __forceinline__
102 void calcForceEnergyNbThole(
103 const int vdwtypei, const int vdwtypej,
104 const float r2, const float qi, const float qj,
105 const float dx, const float dy, const float dz,
106 cudaTextureObject_t drudeNbTholeTijTableTex,
107 const float* __restrict drudeNbTholeTijTable,
108 const float alpha_i, const float alpha_j,
109 float& energyElec, float3& iforce, jForceType& jforce)
111 #if __CUDA_ARCH__ >= 350
112 const float tij = __ldg(&drudeNbTholeTijTable[vdwtypej + vdwtypei]);
114 const float tij = tex1Dfetch<float>(drudeNbTholeTijTableTex, vdwtypej + vdwtypei);
117 const float aprod = alpha_i * alpha_j;
118 const float rinv = rsqrtf(r2);
119 // cbrtf(rsqrtf(aprod)) = powf(aprod, -1.f/6)
120 const float aa = tij * rsqrtf(aprod);
121 // qi and qj are already scaled by sqrt(COULOMB * ComputeNonbondedUtil::scaling *
122 // ComputeNonbondedUtil::dielectric_1), respectively.
123 const float qqaa = qi * qj;
124 const float auaa = aa / rinv;
125 const float expauaa = expf(-auaa);
126 float polyauaa = 1.0f + 0.5f * auaa;
128 energyElec += qqaa * rinv * (- polyauaa * expauaa);
130 polyauaa = 1.0f + auaa*polyauaa;
131 const float rinv3 = rinv * rinv * rinv;
132 const float f = qqaa * rinv3 * (polyauaa*expauaa);
133 const float fx = dx * f;
134 const float fy = dy * f;
135 const float fz = dz * f;
146 template<bool doEnergy, bool doSlow, typename jForceType>
147 __device__ __forceinline__
148 void calcForceEnergy(const float r2, const float qi, const float qj,
149 const float dx, const float dy, const float dz,
150 const int vdwtypei, const int vdwtypej, const float2* __restrict__ vdwCoefTable,
151 cudaTextureObject_t vdwCoefTableTex,
152 cudaTextureObject_t forceTableTex, cudaTextureObject_t energyTableTex,
153 float3& iforce, float3& iforceSlow, jForceType& jforce, jForceType& jforceSlow,
154 float& energyVdw, float& energyElec, float& energySlow) {
156 int vdwIndex = vdwtypej + vdwtypei;
157 #if __CUDA_ARCH__ >= 350
158 float2 ljab = __ldg(&vdwCoefTable[vdwIndex]);
160 float2 ljab = tex1Dfetch<float2>(vdwCoefTableTex, vdwIndex);
163 float rinv = rsqrtf(r2);
167 fi = tex1D<float4>(forceTableTex, rinv);
168 if (doEnergy) ei = tex1D<float4>(energyTableTex, rinv);
171 f = ljab.x * fi.z + ljab.y * fi.y + fSlow * fi.x;
174 energyVdw += ljab.x * ei.z + ljab.y * ei.y;
175 energyElec += fSlow * ei.x;
178 energySlow += fSlow * ei.w;
181 if (doSlow) fSlow *= fi.w;
194 float fxSlow = dx * fSlow;
195 float fySlow = dy * fSlow;
196 float fzSlow = dz * fSlow;
197 iforceSlow.x += fxSlow;
198 iforceSlow.y += fySlow;
199 iforceSlow.z += fzSlow;
200 jforceSlow.x -= fxSlow;
201 jforceSlow.y -= fySlow;
202 jforceSlow.z -= fzSlow;
207 /* JM: Special __device__ function to compute VDW forces for alchemy.
208 * Partially swiped from ComputeNonbondedFEP.C
210 template<bool doEnergy, bool doSlow, bool shift, bool vdwForceSwitch, typename jForceType>
211 __device__ __forceinline__
212 void calcForceEnergyFEP(const float r2, const float qi, const float qj,
213 const float dx, const float dy, const float dz,
214 const int vdwtypei, const int vdwtypej,
216 /*const AlchData &alchflags, */
217 const float2* __restrict__ vdwCoefTable,
218 cudaTextureObject_t vdwCoefTableTex,
219 cudaTextureObject_t forceTableTex, cudaTextureObject_t energyTableTex,
220 float3& iforce, float3& iforceSlow, jForceType& jforce, jForceType& jforceSlow,
221 float& energyVdw, float &energyVdw_s, float& energyElec, float& energySlow,
222 float& energyElec_s, float& energySlow_s) {
225 int vdwIndex = vdwtypej + vdwtypei;
226 #if __CUDA_ARCH__ >= 350
227 float2 ljab = __ldg(&vdwCoefTable[vdwIndex]);
229 float2 ljab = tex1D<float2>(vdwCoefTableTex, vdwIndex); //ljab.x is A and ljab.y is B
232 float myVdwLambda = 0.0f;
233 float myVdwLambda2 = 0.0f;
234 float myElecLambda = 0.0f;
235 float myElecLambda2 = 0.0f;
236 float rinv = rsqrtf(r2);
238 float alch_vdw_energy = 0.0f;
239 float alch_vdw_energy_2 = 0.0f;
240 float alch_vdw_force = 0.0f;
241 float fSlow = qi * qj;
243 float4 fi = tex1D<float4>(forceTableTex, rinv);
244 if (doEnergy) ei = tex1D<float4>(energyTableTex, rinv);
246 //John said that there is a better way to avoid divergences here
247 //alch: true if => 1-0, 1-1, 2-0, 2-2
248 //dec: true if => 1-1, 2-2 && decouple
249 //up: true if => 1-0 && 1,1
250 //down: true if => 2-0, && 2,2
251 int ref = (p1 == 0 && p2 == 0);
252 int alch = (!ref && !(p1 == 1 && p2 ==2) && !(p1 == 2 && p2 == 1));
253 int dec = (alch && (p1 == p2) && alchflags.alchDecouple);
254 int up = (alch && (p1 == 1 || p2 == 1) && !dec);
255 int down = (alch && (p1 == 2 || p2 == 2) && !dec);
260 /*--------------- VDW SPECIAL ALCH FORCES (Swiped from ComputeNonbondedFEP.C) ---------------*/
262 myVdwLambda = alchflags.vdwLambdaUp*(up) + alchflags.vdwLambdaDown*(down) + 1.f*(ref || dec);
263 myVdwLambda2 = alchflags.vdwLambda2Up*(up) + alchflags.vdwLambda2Down*(down) + 1.f*(ref || dec);
264 myElecLambda = alchflags.elecLambdaUp*(up) + alchflags.elecLambdaDown*(down) + 1.f*(ref || dec);
265 myElecLambda2 = alchflags.elecLambda2Up*(up) + alchflags.elecLambda2Down*(down) + 1.f*(ref || dec);
268 if (vdwForceSwitch) {
270 float switchdist6_1, switchdist6_2;
271 const float cutoff6 = alchflags.cutoff2 * alchflags.cutoff2 * alchflags.cutoff2;
273 //Templated parameter. No control divergence here
275 const float myVdwShift = alchflags.vdwShiftUp*up + alchflags.vdwShiftDown*(!up);
276 const float myVdwShift2 = alchflags.vdwShift2Up*up + alchflags.vdwShift2Down*(!up);
277 r2_1 = __fdividef(1.f,(r2 + myVdwShift));
278 r2_2 = __fdividef(1.f,(r2 + myVdwShift2));
279 switchdist6_1 = alchflags.switchdist2 + myVdwShift;
280 switchdist6_1 = switchdist6_1 * switchdist6_1 * switchdist6_1;
281 switchdist6_2 = alchflags.switchdist2 + myVdwShift2;
282 switchdist6_2 = switchdist6_2 * switchdist6_2 * switchdist6_2;
286 switchdist6_1 = alchflags.switchdist2 * alchflags.switchdist2 * alchflags.switchdist2;
287 switchdist6_2 = switchdist6_1;
289 const float r6_1 = r2_1*r2_1*r2_1;
290 const float r6_2 = r2_2*r2_2*r2_2;
291 if (r2 <= alchflags.switchdist2) {
292 const float U1 = ljab.x*r6_1*r6_1 - ljab.y*r6_1; // NB: unscaled, shorthand only!
293 const float U2 = ljab.x*r6_2*r6_2 - ljab.y*r6_2;
294 // A == ljab.x, B == ljab.y
295 const float dU_1 = -ljab.x / (cutoff6 * switchdist6_1) - (-ljab.y * rsqrtf(cutoff6 * switchdist6_1));
296 const float dU_2 = -ljab.x / (cutoff6 * switchdist6_2) - (-ljab.y * rsqrtf(cutoff6 * switchdist6_2));
297 alch_vdw_energy = myVdwLambda * (U1 + dU_1);
298 alch_vdw_energy_2 = myVdwLambda2 * (U2 + dU_2);
300 //Multiplied by -1.0 to match CPU values
301 alch_vdw_force =-1.f*myVdwLambda*((12.f*U1 + 6.f*ljab.y*r6_1)*r2_1);
303 const float r3_1 = sqrtf(r6_1);
304 const float r3_2 = sqrtf(r6_2);
305 const float inv_cutoff6 = 1.0f / cutoff6;
306 const float inv_cutoff3 = rsqrtf(cutoff6);
307 // A == ljab.x, B == ljab.y
308 const float k_vdwa_1 = ljab.x / (1.0f - switchdist6_1 * inv_cutoff6);
309 const float k_vdwb_1 = ljab.y / (1.0f - sqrtf(switchdist6_1 * inv_cutoff6));
310 const float k_vdwa_2 = ljab.x / (1.0f - switchdist6_2 * inv_cutoff6);
311 const float k_vdwb_2 = ljab.y / (1.0f - sqrtf(switchdist6_2 * inv_cutoff6));
312 const float tmpa_1 = r6_1 - inv_cutoff6;
313 const float tmpb_1 = r3_1 - inv_cutoff3;
314 const float tmpa_2 = r6_2 - inv_cutoff6;
315 const float tmpb_2 = r3_2 - inv_cutoff3;
316 alch_vdw_energy = myVdwLambda * (k_vdwa_1 * tmpa_1 * tmpa_1 - k_vdwb_1 * tmpb_1 * tmpb_1);
317 alch_vdw_energy_2 = myVdwLambda2 * (k_vdwa_2 * tmpa_2 * tmpa_2 - k_vdwb_2 * tmpb_2 * tmpb_2);
318 //Multiplied by -1.0 to match CPU values
319 alch_vdw_force = -1.0f * myVdwLambda * (6.0f * r2_1 * (2.0f * k_vdwa_1 * tmpa_1 * r6_1 - k_vdwb_1 * tmpb_1 * r3_1));
320 } // r2 <= alchflags.switchdist2
322 // potential switching
323 const float diff = alchflags.cutoff2 - r2;
325 const float switchmul = (alchflags.switchfactor*(diff)*(diff)*(alchflags.cutoff2 - 3.f*alchflags.switchdist2 + 2.f*r2))*(r2 > alchflags.switchdist2) + (1.f)*(r2 <= alchflags.switchdist2);
326 const float switchmul2 = (12.f*alchflags.switchfactor*(diff)*(r2 - alchflags.switchdist2))*(r2 > alchflags.switchdist2) + (0.f) * (r2 <= alchflags.switchdist2);
328 //Templated parameter. No control divergence here
330 //This templated parameter lets me get away with not making 2 divisions. But for myVdwShift != 0, how do I do this?
331 const float myVdwShift = alchflags.vdwShiftUp*up + alchflags.vdwShiftDown*(!up);
332 const float myVdwShift2 = alchflags.vdwShift2Up*up + alchflags.vdwShift2Down*(!up);
333 //r2_1 = 1.0/(r2 + myVdwShift);
334 //r2_2 = 1.0/(r2 + myVdwShift2);
335 r2_1 = __fdividef(1.f,(r2 + myVdwShift));
336 r2_2 = __fdividef(1.f,(r2 + myVdwShift2));
342 const float r6_1 = r2_1*r2_1*r2_1;
343 const float r6_2 = r2_2*r2_2*r2_2;
344 const float U1 = ljab.x*r6_1*r6_1 - ljab.y*r6_1; // NB: unscaled, shorthand only!
345 const float U2 = ljab.x*r6_2*r6_2 - ljab.y*r6_2;
346 alch_vdw_energy = myVdwLambda*switchmul*U1;
347 alch_vdw_energy_2 = myVdwLambda2*switchmul*U2;
349 //Multiplied by -1.0 to match CPU values
350 alch_vdw_force =-1.f*myVdwLambda*((switchmul*(12.f*U1 + 6.f*ljab.y*r6_1)*r2_1+ switchmul2*U1));
354 /*-----------------------------------------------------------*/
357 //All energies should be scaled by the corresponding lambda
358 energyVdw += (ljab.x * ei.z + ljab.y * ei.y)*(ref || dec) + alch_vdw_energy*(alch && !dec);
359 energyElec += (fSlow * ei.x)*myElecLambda;
360 energyVdw_s += (ljab.x * ei.z + ljab.y * ei.y)*(ref || dec) + alch_vdw_energy_2*(alch && !dec);
361 energyElec_s += (fSlow * ei.x)*myElecLambda2;
363 energySlow += (fSlow * ei.w)*myElecLambda;
364 energySlow_s += (fSlow * ei.w)*myElecLambda2;
368 if (doSlow) fSlow *= fi.w;
370 //We should include the regular VDW forces if not dealing with alch pairs
371 f = (f + ((ljab.x * fi.z + ljab.y * fi.y)*(!alch || dec)))*myElecLambda
372 + alch_vdw_force*(alch && !dec);
386 /*There's stuff that needs to be added here, when FAST AND NOSHORT macros are on*/
387 fSlow = myElecLambda*fSlow;
388 float fxSlow = dx * fSlow;
389 float fySlow = dy * fSlow;
390 float fzSlow = dz * fSlow;
391 iforceSlow.x += fxSlow;
392 iforceSlow.y += fySlow;
393 iforceSlow.z += fzSlow;
394 jforceSlow.x -= fxSlow;
395 jforceSlow.y -= fySlow;
396 jforceSlow.z -= fzSlow;
400 /* JM: Special __device__ function to compute VDW forces for TI.
403 template<bool doEnergy, bool doSlow, bool shift, bool vdwForceSwitch, typename jForceType>
404 __device__ __forceinline__
405 void calcForceEnergyTI(const float r2, const float qi, const float qj,
406 const float dx, const float dy, const float dz,
407 const int vdwtypei, const int vdwtypej,
409 const float2* __restrict__ vdwCoefTable,
410 cudaTextureObject_t vdwCoefTableTex,
411 cudaTextureObject_t forceTableTex, cudaTextureObject_t energyTableTex,
412 float3& iforce, float3& iforceSlow, jForceType& jforce, jForceType& jforceSlow,
413 float& energyVdw, float& energyVdw_ti_1, float& energyVdw_ti_2,
414 float& energyElec, float& energyElec_ti_1, float& energyElec_ti_2,
415 float& energySlow, float& energySlow_ti_1, float& energySlow_ti_2) {
417 int vdwIndex = vdwtypej + vdwtypei;
418 #if __CUDA_ARCH__ >= 350
419 float2 ljab = __ldg(&vdwCoefTable[vdwIndex]);
421 float2 ljab = tex1D<float2>(vdwCoefTableTex, vdwIndex); //ljab.x is A and ljab.y is B
424 /* JM: For TI, we have to deal ALCH1 OR ALCH2 during ComputeNonbondedBase2
425 * ALCH1 for appearing terms;
426 * ALCH2 for dissapearing terms;
427 * Instead of the _s energy terms, we need the to calculate:
429 * vdwEnergy_ti_1 and _2 for VDW energies. For those we need to add the special terms calculated on
430 * ComputeNonbondedTI.C
432 * elecEnergy_ti_1 and _2 for electrostatic energy. No correction needed here though.
436 float myVdwLambda = 0.0f;
437 float myElecLambda = 0.0f;
438 float rinv = rsqrtf(r2);
440 float alch_vdw_energy = 0.0f;
441 float alch_vdw_force = 0.0f;
442 float alch_vdw_dUdl = 0.0f;
443 float fSlow = qi * qj;
445 float4 fi = tex1D<float4>(forceTableTex, rinv);
446 if (doEnergy) ei = tex1D<float4>(energyTableTex, rinv);
448 //John said that there is a better way to avoid divergences here
449 //alch: true if => 1-0, 1-1, 2-0, 2-2
450 //dec: true if => 1-1, 2-2 && decouple
451 //up: true if => 1-0 && 1,1
452 //down: true if => 2-0, && 2,2
453 int ref = (p1 == 0 && p2 == 0);
454 int alch = (!ref && !(p1 == 1 && p2 ==2) && !(p1 == 2 && p2 == 1));
455 int dec = (alch && (p1 == p2) && alchflags.alchDecouple);
456 int up = (alch && (p1 == 1 || p2 == 1) && !dec);
457 int down = (alch && (p1 == 2 || p2 == 2) && !dec);
462 /*--------------- VDW SPECIAL ALCH STUFF (Swiped from ComputeNonbondedTI.C) ---------------*/
463 myVdwLambda = alchflags.vdwLambdaUp*(up) + alchflags.vdwLambdaDown*(down) + 1.f*(ref || dec);
464 myElecLambda = alchflags.elecLambdaUp*(up) + alchflags.elecLambdaDown*(down) + 1.f*(ref || dec);
466 if (vdwForceSwitch) {
467 const float cutoff6 = alchflags.cutoff2 * alchflags.cutoff2 * alchflags.cutoff2;
470 const float myVdwShift = alchflags.vdwShiftUp*up + alchflags.vdwShiftDown*(!up);
471 r2_1 = __fdividef(1.f,(r2 + myVdwShift));
472 switchdist6 = alchflags.switchdist2 + myVdwShift;
473 switchdist6 = switchdist6 * switchdist6 * switchdist6;
476 switchdist6 = alchflags.switchdist2 * alchflags.switchdist2 * alchflags.switchdist2;
478 const float r6_1 = r2_1*r2_1*r2_1;
479 if (r2 <= alchflags.switchdist2) {
480 const float U = ljab.x*r6_1*r6_1 - ljab.y*r6_1;
481 const float dU = -ljab.x / (cutoff6 * switchdist6) - (-ljab.y * rsqrtf(cutoff6 * switchdist6));
482 alch_vdw_force = -1.f*(myVdwLambda*((12.f*U + 6.f*ljab.y*r6_1)*r2_1));
483 alch_vdw_energy = myVdwLambda * (U + dU);
484 alch_vdw_dUdl = U + myVdwLambda * alchflags.alchVdwShiftCoeff * (6.f*U + 3.f*ljab.y*r6_1)*r2_1 + dU;
486 const float r3_1 = sqrtf(r6_1);
487 const float inv_cutoff6 = 1.0f / cutoff6;
488 const float inv_cutoff3 = sqrtf(inv_cutoff6);
489 const float k_vdwa_1 = ljab.x / (1.0f - switchdist6 * inv_cutoff6);
490 const float k_vdwb_1 = ljab.y / (1.0f - sqrtf(switchdist6 * inv_cutoff6));
491 const float tmpa_1 = r6_1 - inv_cutoff6;
492 const float tmpb_1 = r3_1 - inv_cutoff3;
493 const float U = k_vdwa_1 * tmpa_1 * tmpa_1 - k_vdwb_1 * tmpb_1 * tmpb_1;
494 alch_vdw_force = -1.0f * myVdwLambda * (6.0f * r2_1 * (2.0f * k_vdwa_1 * tmpa_1 * r6_1 - k_vdwb_1 * tmpb_1 * r3_1));
495 alch_vdw_energy = myVdwLambda * U;
496 alch_vdw_dUdl = U + myVdwLambda * alchflags.alchVdwShiftCoeff * (3.0f * r2_1 * (2.0f * k_vdwa_1 * tmpa_1 * r6_1 - k_vdwb_1 * tmpb_1 * r3_1));
497 } // r2 <= alchflags.switchdist2
499 const float diff = alchflags.cutoff2 - r2;
500 const float switchmul = (r2 > alchflags.switchdist2 ? alchflags.switchfactor*(diff)*(diff) \
501 *(alchflags.cutoff2 - 3.f*alchflags.switchdist2 + 2.f*r2) : 1.f);
503 const float switchmul2 = (r2 > alchflags.switchdist2 ? \
504 12.f*alchflags.switchfactor*(diff) \
505 *(r2 - alchflags.switchdist2) : 0.f);
506 //Templated parameter. No control divergence here
508 const float myVdwShift = alchflags.vdwShiftUp*up + alchflags.vdwShiftDown*(!up);
509 r2_1 = __fdividef(1.f,(r2 + myVdwShift));
510 }else r2_1 = rinv*rinv;
512 const float r6_1 = r2_1*r2_1*r2_1;
513 const float U = ljab.x*r6_1*r6_1 - ljab.y*r6_1; // NB: unscaled! for shorthand only!
514 alch_vdw_energy = myVdwLambda*switchmul*U;
515 //Multiplied by -1.0 to match CPU values
516 alch_vdw_force = -1.f*(myVdwLambda*(switchmul*(12.f*U + 6.f*ljab.y*r6_1)*r2_1 \
518 alch_vdw_dUdl = (switchmul*(U + myVdwLambda*alchflags.alchVdwShiftCoeff \
519 *(6.f*U + 3.f*ljab.y*r6_1)*r2_1));
522 /*-------------------------------------------------------------------------*/
525 //All energies should be scaled by the corresponding lambda
526 energyVdw += (ljab.x * ei.z + ljab.y * ei.y)*(ref || dec) + alch_vdw_energy*(alch && !dec);
527 energyElec += (fSlow * ei.x)*myElecLambda;
529 energyVdw_ti_1 += alch_vdw_dUdl*up;
530 energyVdw_ti_2 += alch_vdw_dUdl*down;
531 energyElec_ti_1 += (fSlow * ei.x)*up;
532 energyElec_ti_2 += (fSlow * ei.x)*down;
535 energySlow += (fSlow * ei.w)*myElecLambda;
537 energySlow_ti_1 += (fSlow * ei.w)*up;
538 energySlow_ti_2 += (fSlow * ei.w)*down;
543 if (doSlow) fSlow *= fi.w;
544 //We should include the regular VDW forces if not dealing with alch pairs
545 f = (f + ((ljab.x * fi.z + ljab.y * fi.y)*(ref || dec)))*myElecLambda
546 + alch_vdw_force*(alch && !dec);
560 /*There's stuff that needs to be added here, when FAST AND NOSHORT macros are on*/
561 fSlow = myElecLambda*fSlow; /* FAST(NOSHORT(+alch_vdw_force))*/ //Those should also be zeroed
562 float fxSlow = dx * fSlow;
563 float fySlow = dy * fSlow;
564 float fzSlow = dz * fSlow;
565 iforceSlow.x += fxSlow;
566 iforceSlow.y += fySlow;
567 iforceSlow.z += fzSlow;
568 jforceSlow.x -= fxSlow;
569 jforceSlow.y -= fySlow;
570 jforceSlow.z -= fzSlow;
575 template<bool doSlow, typename jForceType>
576 __device__ __forceinline__
577 void storeForces(const int pos, const jForceType force, const jForceType forceSlow,
578 float4* __restrict__ devForces, float4* __restrict__ devForcesSlow) {
579 atomicAdd(&devForces[pos].x, force.x);
580 atomicAdd(&devForces[pos].y, force.y);
581 atomicAdd(&devForces[pos].z, force.z);
583 atomicAdd(&devForcesSlow[pos].x, forceSlow.x);
584 atomicAdd(&devForcesSlow[pos].y, forceSlow.y);
585 atomicAdd(&devForcesSlow[pos].z, forceSlow.z);
589 template<bool doSlow, typename jForceType>
590 __device__ __forceinline__
591 void storeForces(const int pos, const jForceType force, const jForceType forceSlow,
592 float* __restrict__ devForces_x,
593 float* __restrict__ devForces_y,
594 float* __restrict__ devForces_z,
595 float* __restrict__ devForcesSlow_x,
596 float* __restrict__ devForcesSlow_y,
597 float* __restrict__ devForcesSlow_z)
599 atomicAdd(&devForces_x[pos], force.x);
600 atomicAdd(&devForces_y[pos], force.y);
601 atomicAdd(&devForces_z[pos], force.z);
603 atomicAdd(&devForcesSlow_x[pos], forceSlow.x);
604 atomicAdd(&devForcesSlow_y[pos], forceSlow.y);
605 atomicAdd(&devForcesSlow_z[pos], forceSlow.z);
609 template<bool doSlow, typename jForceType>
610 __device__ __forceinline__
611 void storeForces(const int pos, const jForceType force, const jForceType forceSlow,
612 float3* __restrict__ forces, float3* __restrict__ forcesSlow) {
613 atomicAdd(&forces[pos].x, force.x);
614 atomicAdd(&forces[pos].y, force.y);
615 atomicAdd(&forces[pos].z, force.z);
617 atomicAdd(&forcesSlow[pos].x, forceSlow.x);
618 atomicAdd(&forcesSlow[pos].y, forceSlow.y);
619 atomicAdd(&forcesSlow[pos].z, forceSlow.z);
623 template<bool doPairlist>
624 __device__ __forceinline__
625 void shuffleNext(float& xyzq_j_w, int& vdwtypej, int& jatomIndex, int& jexclMaxdiff, int& jexclIndex) {
626 xyzq_j_w = WARP_SHUFFLE(WARP_FULL_MASK, xyzq_j_w, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
627 vdwtypej = WARP_SHUFFLE(WARP_FULL_MASK, vdwtypej, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
629 jatomIndex = WARP_SHUFFLE(WARP_FULL_MASK, jatomIndex, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
630 jexclIndex = WARP_SHUFFLE(WARP_FULL_MASK, jexclIndex, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
631 jexclMaxdiff = WARP_SHUFFLE(WARP_FULL_MASK, jexclMaxdiff, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
635 template<bool doPairlist>
636 __device__ __forceinline__
637 void shuffleNext(float& xyzq_j_w, int& vdwtypej, int& jatomIndex) {
638 xyzq_j_w = WARP_SHUFFLE(WARP_FULL_MASK, xyzq_j_w, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
639 vdwtypej = WARP_SHUFFLE(WARP_FULL_MASK, vdwtypej, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
641 jatomIndex = WARP_SHUFFLE(WARP_FULL_MASK, jatomIndex, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
645 template<bool doSlow, typename jForceType>
646 __device__ __forceinline__
647 void shuffleNext(jForceType& jforce, jForceType& jforceSlow) {
648 jforce.x = WARP_SHUFFLE(WARP_FULL_MASK, jforce.x, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
649 jforce.y = WARP_SHUFFLE(WARP_FULL_MASK, jforce.y, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
650 jforce.z = WARP_SHUFFLE(WARP_FULL_MASK, jforce.z, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
652 jforceSlow.x = WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.x, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
653 jforceSlow.y = WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.y, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
654 jforceSlow.z = WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.z, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
658 //#define USE_NEW_EXCL_METHOD
661 // Returns the lower estimate for the distance between a bounding box and a set of atoms
663 __device__ __forceinline__ float distsq(const BoundingBox a, const float4 b) {
664 float dx = max(0.0f, fabsf(a.x - b.x) - a.wx);
665 float dy = max(0.0f, fabsf(a.y - b.y) - a.wy);
666 float dz = max(0.0f, fabsf(a.z - b.z) - a.wz);
667 float r2 = dx*dx + dy*dy + dz*dz;
671 #define LARGE_FLOAT (float)(1.0e10)
674 // Nonbonded force kernel
676 template <bool doEnergy, bool doVirial, bool doSlow, bool doPairlist, bool doAlch, bool doFEP, bool doTI, bool doStreaming, bool doTable, bool doAlchVdwForceSwitching, bool doNbThole>
678 __launch_bounds__(WARPSIZE*NONBONDKERNEL_NUM_WARP,
679 doPairlist ? (10) : (doEnergy ? (10) : (12) )
681 nonbondedForceKernel(
682 const int start, const int numTileLists,
683 const TileList* __restrict__ tileLists, TileExcl* __restrict__ tileExcls,
684 const int* __restrict__ tileJatomStart,
685 const int vdwCoefTableWidth, const float2* __restrict__ vdwCoefTable, const int* __restrict__ vdwTypes,
686 const float3 lata, const float3 latb, const float3 latc,
687 const float4* __restrict__ xyzq,
688 const float cutoff2, const CudaNBConstants nbConstants,
689 cudaTextureObject_t vdwCoefTableTex,
690 cudaTextureObject_t forceTableTex, cudaTextureObject_t energyTableTex,
693 const int atomStorageSize, const float plcutoff2, const PatchPairRecord* __restrict__ patchPairs,
694 const int* __restrict__ atomIndex,
695 const int2* __restrict__ exclIndexMaxDiff, const unsigned int* __restrict__ overflowExclusions,
696 unsigned int* __restrict__ tileListDepth, int* __restrict__ tileListOrder,
697 int* __restrict__ jtiles, TileListStat* __restrict__ tileListStat,
698 const BoundingBox* __restrict__ boundingBoxes,
699 #ifdef USE_NEW_EXCL_METHOD
700 const int* __restrict__ minmaxExclAtom,
703 float4* __restrict__ devForces, float4* __restrict__ devForcesSlow,
704 float * __restrict__ devForce_x,
705 float * __restrict__ devForce_y,
706 float * __restrict__ devForce_z,
707 float * __restrict__ devForce_w,
708 float * __restrict__ devForceSlow_x,
709 float * __restrict__ devForceSlow_y,
710 float * __restrict__ devForceSlow_z,
711 float * __restrict__ devForceSlow_w,
712 // ---- USE_STREAMING_FORCES ----
713 const int numPatches,
714 unsigned int* __restrict__ patchNumCount,
715 const CudaPatchRecord* __restrict__ cudaPatches,
716 float4* __restrict__ mapForces, float4* __restrict__ mapForcesSlow,
717 int* __restrict__ mapPatchReadyQueue,
718 int* __restrict__ outputOrder,
719 // ------------------------------
720 TileListVirialEnergy* __restrict__ virialEnergy,
722 char* __restrict__ p,
723 // ---- doNbThole ----
724 cudaTextureObject_t drudeNbTholeTijTableTex,
725 const float* __restrict drudeNbTholeTijTable,
726 const int* __restrict isDrude,
727 const float* __restrict drudeAtomAlpha,
728 const float drudeNbtholeCut2
730 using jForceType = typename std::conditional<doSlow, float3, float4>::type;
731 // Single warp takes care of one list of tiles
732 // for (int itileList = (threadIdx.x + blockDim.x*blockIdx.x)/WARPSIZE;itileList < numTileLists;itileList += blockDim.x*gridDim.x/WARPSIZE)
733 const int itileListTemp = start + (threadIdx.x + blockDim.x*blockIdx.x) / WARPSIZE;
734 const int itileList = WARP_SHUFFLE(WARP_FULL_MASK, itileListTemp, 0, WARPSIZE);
735 if (itileList < numTileLists)
740 float energyVdw, energyElec, energySlow;
742 float energyVdw_s, energyElec_s, energySlow_s;
744 float energyVdw_ti_1, energyVdw_ti_2, energyElec_ti_1, energyElec_ti_2, energySlow_ti_1, energySlow_ti_2;
746 unsigned int itileListLen;
749 char part1, part2, p2;
750 // ***** Drude/NbThole start
751 int drude_i, drude_j_tmp, drude_j;
752 float alpha_i, alpha_j_tmp, alpha_j;
753 // ***** Drude/NbThole end
754 bool doShift = (alchflags.alchVdwShiftCoeff != 0.0f);
755 __shared__ float4 s_xyzq[NONBONDKERNEL_NUM_WARP][WARPSIZE];
756 __shared__ jForceType s_jforce[NONBONDKERNEL_NUM_WARP][WARPSIZE];
757 __shared__ jForceType s_jforceSlow[NONBONDKERNEL_NUM_WARP][WARPSIZE];
758 __shared__ int s_vdwtypej[NONBONDKERNEL_NUM_WARP][WARPSIZE];
759 __shared__ int s_jatomIndex[NONBONDKERNEL_NUM_WARP][WARPSIZE];
761 __shared__ int s_iatomStart[NONBONDKERNEL_NUM_WARP];
762 __shared__ int s_jatomStart[NONBONDKERNEL_NUM_WARP];
766 // Warp index (0...warpsize-1)
767 const int wid = threadIdx.x & (WARPSIZE-1);
768 const int iwarp = WARP_SHUFFLE(WARP_FULL_MASK, threadIdx.x / WARPSIZE, 0, WARPSIZE);
770 TileList tmp = tileLists[itileList];
771 int iatomStart = tmp.iatomStart;
772 int jtileStart = tmp.jtileStart;
773 int jtileEnd = tmp.jtileEnd;
774 patchInd = tmp.patchInd;
775 patchNumList = tmp.patchNumList;
777 float shx = tmp.offsetXYZ.x*lata.x + tmp.offsetXYZ.y*latb.x + tmp.offsetXYZ.z*latc.x;
778 float shy = tmp.offsetXYZ.x*lata.y + tmp.offsetXYZ.y*latb.y + tmp.offsetXYZ.z*latc.y;
779 float shz = tmp.offsetXYZ.x*lata.z + tmp.offsetXYZ.y*latb.z + tmp.offsetXYZ.z*latc.z;
781 // DH - set zeroShift flag if magnitude of shift vector is zero
782 bool zeroShift = ! (shx*shx + shy*shy + shz*shz > 0);
784 int iatomSize, iatomFreeSize, jatomSize, jatomFreeSize;
786 PatchPairRecord PPStmp = patchPairs[itileList];
787 iatomSize = PPStmp.iatomSize;
788 iatomFreeSize = PPStmp.iatomFreeSize;
789 jatomSize = PPStmp.jatomSize;
790 jatomFreeSize = PPStmp.jatomFreeSize;
793 // Write to global memory here to avoid register spilling
796 virialEnergy[itileList].shx = shx;
797 virialEnergy[itileList].shy = shy;
798 virialEnergy[itileList].shz = shz;
802 // Load i-atom data (and shift coordinates)
803 float4 xyzq_i = xyzq[iatomStart + wid];
804 if (doAlch) part1 = p[iatomStart + wid];
807 drude_i = isDrude[iatomStart + wid];
808 alpha_i = drudeAtomAlpha[iatomStart + wid];
813 int vdwtypei = vdwTypes[iatomStart + wid]*vdwCoefTableWidth;
815 // Load i-atom data (and shift coordinates)
816 BoundingBox boundingBoxI;
818 boundingBoxI = boundingBoxes[iatomStart/WARPSIZE];
819 boundingBoxI.x += shx;
820 boundingBoxI.y += shy;
821 boundingBoxI.z += shz;
824 // Get i-atom global index
825 #ifdef USE_NEW_EXCL_METHOD
826 int iatomIndex, minExclAtom, maxExclAtom;
831 #ifdef USE_NEW_EXCL_METHOD
832 iatomIndex = atomIndex[iatomStart + wid];
833 int2 tmp = minmaxExclAtom[iatomStart + wid];
837 iatomIndex = atomIndex[iatomStart + wid];
841 // i-forces in registers
847 // float3 iforceSlow;
854 // float energyVdw, energyElec, energySlow;
858 energyVdw_ti_1 = 0.0f;
859 energyVdw_ti_2 = 0.0f;
861 energyElec_ti_1 = 0.0f;
862 energyElec_ti_2 = 0.0f;
867 energySlow_ti_1 = 0.0f;
868 energySlow_ti_2 = 0.0f;
872 // Number of exclusions
873 // NOTE: Lowest bit is used as indicator bit for tile pairs:
874 // bit 0 tile has no atoms within pairlist cutoff
875 // bit 1 tile has atoms within pairlist cutoff
877 if (doPairlist) nexcluded = 0;
879 // Number of i loops and free atoms
882 int nloopi = min(iatomSize - iatomStart, WARPSIZE);
883 nfreei = max(iatomFreeSize - iatomStart, 0);
885 xyzq_i.x = -LARGE_FLOAT;
886 xyzq_i.y = -LARGE_FLOAT;
887 xyzq_i.z = -LARGE_FLOAT;
893 // int minJatomStart;
895 // minJatomStart = tileJatomStart[jtileStart];
899 // Exclusion index and maxdiff
900 int iexclIndex, iexclMaxdiff;
902 int2 tmp = exclIndexMaxDiff[iatomStart + wid];
904 iexclMaxdiff = tmp.y;
906 s_iatomStart[iwarp] = iatomStart;
908 // If the tile is within a patch, then the first jtile is a self tile
909 if (patchInd.x == patchInd.y & zeroShift) {
910 int jtile = jtileStart;
911 // Load j-atom starting index and exclusion mask
912 int jatomStart = tileJatomStart[jtile];
914 float4 xyzq_j = xyzq[jatomStart + wid];
915 WARP_SYNC(WARP_FULL_MASK);
916 if (doAlch) p2 = p[jatomStart + wid];
920 drude_j_tmp = isDrude[jatomStart + wid];
921 alpha_j_tmp = drudeAtomAlpha[jatomStart + wid];
924 // Check for early bail
925 // No point of early bail for self
927 unsigned int excl = (doPairlist) ? 0 : tileExcls[jtile].excl[wid];
928 int vdwtypej = vdwTypes[jatomStart + wid];
929 s_vdwtypej[iwarp][wid] = vdwtypej;
931 // Get i-atom global index
933 s_jatomIndex[iwarp][wid] = atomIndex[jatomStart + wid];
936 // Number of j loops and free atoms
939 int nloopj = min(jatomSize - jatomStart, WARPSIZE);
940 nfreej = max(jatomFreeSize - jatomStart, 0);
941 //if (nfreei == 0 && nfreej == 0) continue;
943 xyzq_j.x = LARGE_FLOAT;
944 xyzq_j.y = LARGE_FLOAT;
945 xyzq_j.z = LARGE_FLOAT;
948 s_xyzq[iwarp][wid] = xyzq_j;
950 // DH - self requires that zeroShift is also set
951 // DC - In this case self is always true
952 const int modval = 2*WARPSIZE-1;
954 s_jforce[iwarp][wid] = make_zero<jForceType>();
956 s_jforceSlow[iwarp][wid] = make_zero<jForceType>();
957 WARP_SYNC(WARP_FULL_MASK);
961 // NOTE: Pairlist update, we must also include the diagonal since this is used
963 // Clear the lowest (indicator) bit
966 // For self tiles, do the diagonal term (t=0).
967 // NOTE: No energies are computed here, since this self-diagonal term is only for GBIS phase 2
968 int j = (0 + wid) & modval;
969 xyzq_j = s_xyzq[iwarp][j];
970 float dx = xyzq_j.x - xyzq_i.x;
971 float dy = xyzq_j.y - xyzq_i.y;
972 float dz = xyzq_j.z - xyzq_i.z;
973 float r2 = dx*dx + dy*dy + dz*dz;
975 if (j < WARPSIZE && r2 < plcutoff2) {
976 // We have atom pair within the pairlist cutoff => Set indicator bit
979 WARP_SYNC(WARP_FULL_MASK);
981 // TODO this can be done in fewer iterations if we take advantage of Newtons's 3rd
983 for (int t = 1;t < WARPSIZE;t++) {
984 int j = (t + wid) & modval;
986 // NOTE: __shfl() operation can give non-sense here because j may be >= WARPSIZE.
987 // However, if (j < WARPSIZE ..) below makes sure that these non-sense
988 // results are not used
989 if (doAlch) part2 = WARP_SHUFFLE(WARP_FULL_MASK, p2, j, WARPSIZE);
991 drude_j = WARP_SHUFFLE(WARP_FULL_MASK, drude_j_tmp, j, WARPSIZE);
992 alpha_j = WARP_SHUFFLE(WARP_FULL_MASK, alpha_j_tmp, j, WARPSIZE);
997 xyzq_j = s_xyzq[iwarp][j];
998 float dx = xyzq_j.x - xyzq_i.x;
999 float dy = xyzq_j.y - xyzq_i.y;
1000 float dz = xyzq_j.z - xyzq_i.z;
1001 float r2 = dx*dx + dy*dy + dz*dz;
1002 if (r2 < plcutoff2) {
1003 // We have atom pair within the pairlist cutoff => Set indicator bit
1005 if (j < nfreej || wid < nfreei) {
1006 bool excluded = false;
1007 int indexdiff = s_jatomIndex[iwarp][j] - iatomIndex;
1008 if ( abs(indexdiff) <= iexclMaxdiff) {
1009 indexdiff += iexclIndex;
1010 int indexword = ((unsigned int) indexdiff) >> 5;
1012 if ( indexword < MAX_CONST_EXCLUSIONS ) {
1013 indexword = constExclusions[indexword];
1015 indexword = overflowExclusions[indexword];
1018 excluded = ((indexword & (1<<(indexdiff&31))) != 0);
1020 if (excluded) nexcluded += 2;
1021 if (!excluded) excl |= 0x80000000;
1023 if(!excluded && r2 < cutoff2){
1026 calcForceEnergyFEP<doEnergy, doSlow, true, doAlchVdwForceSwitching, jForceType>(
1027 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1028 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1029 vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
1031 s_jforce[iwarp][j], s_jforceSlow[iwarp][j],
1032 energyVdw, energyVdw_s,
1033 energyElec, energySlow, energyElec_s, energySlow_s);
1035 calcForceEnergyTI<doEnergy, doSlow, true, doAlchVdwForceSwitching, jForceType>(
1036 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1037 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1038 vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
1040 s_jforce[iwarp][j], s_jforceSlow[iwarp][j],
1041 energyVdw, energyVdw_ti_1,
1042 energyVdw_ti_2, energyElec, energyElec_ti_1, energyElec_ti_2,
1043 energySlow, energySlow_ti_1, energySlow_ti_2);
1047 calcForceEnergyFEP<doEnergy, doSlow, false, doAlchVdwForceSwitching, jForceType>(
1048 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1049 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1050 vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
1052 s_jforce[iwarp][j], s_jforceSlow[iwarp][j],
1053 energyVdw, energyVdw_s,
1054 energyElec, energySlow, energyElec_s, energySlow_s);
1056 calcForceEnergyTI<doEnergy, doSlow, false, doAlchVdwForceSwitching, jForceType>(
1057 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1058 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1059 vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
1061 s_jforce[iwarp][j], s_jforceSlow[iwarp][j],
1062 energyVdw, energyVdw_ti_1,
1063 energyVdw_ti_2, energyElec, energyElec_ti_1, energyElec_ti_2,
1064 energySlow, energySlow_ti_1, energySlow_ti_2);
1067 }//if !excluded && r2 < cutoff2
1069 if (!excluded && r2 < cutoff2) {
1071 calcForceEnergy<doEnergy, doSlow>(
1072 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1073 vdwtypei, s_vdwtypej[iwarp][j], vdwCoefTable,
1074 vdwCoefTableTex, forceTableTex, energyTableTex,
1077 s_jforceSlow[iwarp][j],
1078 energyVdw, energyElec, energySlow);
1080 calcForceEnergyMath<doEnergy, doSlow, jForceType>(
1081 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1082 vdwtypei, s_vdwtypej[iwarp][j], vdwCoefTable,
1083 vdwCoefTableTex, forceTableTex, energyTableTex,
1086 s_jforceSlow[iwarp][j],
1087 energyVdw, energyElec, energySlow,
1091 if (r2 < drudeNbtholeCut2) {
1092 // If drude_i >= 0, then the particle i is DRUD and drude_i is the NB index of its mother atom.
1093 // If drude_j >= 0, then the particle j is DRUD and drude_j is the NB index of its mother atom.
1094 calcForceEnergyNbThole<doEnergy>(
1095 drude_i >= 0 ? vdwTypes[drude_i]*vdwCoefTableWidth : vdwtypei,
1096 drude_j >= 0 ? vdwTypes[drude_j] : s_vdwtypej[iwarp][j],
1097 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1098 drudeNbTholeTijTableTex, drudeNbTholeTijTable,
1100 energyElec, iforce, s_jforce[iwarp][j]);
1101 } // r2 < drudeNbtholeCut2
1108 WARP_SYNC(WARP_FULL_MASK);
1111 // Just compute forces
1114 for (int t = 1;t < WARPSIZE;t++) {
1116 int j = (t + wid) & modval;
1117 part2 = WARP_SHUFFLE(WARP_FULL_MASK, p2, j, WARPSIZE);
1120 int j = (t + wid) & modval;
1121 drude_j = WARP_SHUFFLE(WARP_FULL_MASK, drude_j_tmp, j, WARPSIZE);
1122 alpha_j = WARP_SHUFFLE(WARP_FULL_MASK, alpha_j_tmp, j, WARPSIZE);
1125 int j = ((t + wid) & (WARPSIZE-1));
1126 xyzq_j = s_xyzq[iwarp][j];
1127 float dx = xyzq_j.x - xyzq_i.x;
1128 float dy = xyzq_j.y - xyzq_i.y;
1129 float dz = xyzq_j.z - xyzq_i.z;
1131 float r2 = dx*dx + dy*dy + dz*dz;
1133 if(r2 < cutoff2){ // (r2 < cutoff2)
1136 calcForceEnergyFEP<doEnergy, doSlow, true, doAlchVdwForceSwitching, jForceType>(
1137 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1138 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1139 vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
1142 s_jforceSlow[iwarp][j],
1143 energyVdw, energyVdw_s,
1144 energyElec, energySlow, energyElec_s, energySlow_s);
1146 calcForceEnergyTI<doEnergy, doSlow, true, doAlchVdwForceSwitching, jForceType>(
1147 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1148 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1149 vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
1152 s_jforceSlow[iwarp][j],
1153 energyVdw, energyVdw_ti_1,
1154 energyVdw_ti_2, energyElec, energyElec_ti_1, energyElec_ti_2,
1155 energySlow, energySlow_ti_1, energySlow_ti_2);
1159 calcForceEnergyFEP<doEnergy, doSlow, false, doAlchVdwForceSwitching, jForceType>(
1160 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1161 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1162 vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
1165 s_jforceSlow[iwarp][j],
1166 energyVdw, energyVdw_s,
1167 energyElec, energySlow, energyElec_s, energySlow_s);
1169 calcForceEnergyTI<doEnergy, doSlow, false, doAlchVdwForceSwitching, jForceType>(
1170 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1171 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1172 vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
1175 s_jforceSlow[iwarp][j],
1176 energyVdw, energyVdw_ti_1,
1177 energyVdw_ti_2, energyElec, energyElec_ti_1, energyElec_ti_2,
1178 energySlow, energySlow_ti_1, energySlow_ti_2);
1185 calcForceEnergy<doEnergy, doSlow>(
1186 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1187 vdwtypei, s_vdwtypej[iwarp][j], vdwCoefTable,
1188 vdwCoefTableTex, forceTableTex, energyTableTex,
1191 s_jforceSlow[iwarp][j],
1192 energyVdw, energyElec, energySlow);
1194 calcForceEnergyMath<doEnergy, doSlow, jForceType>(
1195 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1196 vdwtypei, s_vdwtypej[iwarp][j], vdwCoefTable,
1197 vdwCoefTableTex, forceTableTex, energyTableTex,
1200 s_jforceSlow[iwarp][j],
1201 energyVdw, energyElec, energySlow,
1205 if (r2 < drudeNbtholeCut2) {
1206 // If drude_i >= 0, then the particle i is DRUD and drude_i is the NB index of its mother atom.
1207 // If drude_j >= 0, then the particle j is DRUD and drude_j is the NB index of its mother atom.
1208 calcForceEnergyNbThole<doEnergy>(
1209 drude_i >= 0 ? vdwTypes[drude_i]*vdwCoefTableWidth : vdwtypei,
1210 drude_j >= 0 ? vdwTypes[drude_j] : s_vdwtypej[iwarp][j],
1211 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1212 drudeNbTholeTijTableTex, drudeNbTholeTijTable,
1214 energyElec, iforce, s_jforce[iwarp][j]);
1215 } // r2 < drudeNbtholeCut2
1221 WARP_SYNC(WARP_FULL_MASK);
1224 WARP_SYNC(WARP_FULL_MASK);
1227 storeForces<doSlow, jForceType>(jatomStart + wid, s_jforce[iwarp][wid], s_jforceSlow[iwarp][wid],
1228 devForce_x, devForce_y, devForce_z,
1229 devForceSlow_x, devForceSlow_y, devForceSlow_z);
1232 const unsigned int warp_exclude = WARP_BALLOT(WARP_FULL_MASK, nexcluded & 1);
1233 const unsigned int warp_any_exclude = WARP_BALLOT(WARP_FULL_MASK, excl);
1235 int anyexcl = warp_any_exclude ? 1 : 0;
1237 // Mark this jtile as non-empty:
1238 // VdW: 1 if tile has atom pairs within pairlist cutoff and some these atoms interact
1239 // GBIS: 65536 if tile has atom pairs within pairlist cutoff but not necessary interacting (i.e. these atoms are fixed or excluded)
1240 if (wid == 0 && anyexcl) jtiles[jtile] = anyexcl;
1242 tileExcls[jtile].excl[wid] = excl;
1244 // lower 16 bits number of tiles with atom pairs within pairlist cutoff that interact
1245 // upper 16 bits number of tiles with atom pairs within pairlist cutoff (but not necessary interacting)
1246 itileListLen += anyexcl;
1247 // NOTE, this minJatomStart is only stored once for the first tile list entry
1248 // minJatomStart = min(minJatomStart, jatomStart);
1254 WARP_SYNC(WARP_FULL_MASK);
1256 for (int jtile=jtileStart; jtile <= jtileEnd; jtile++) {
1258 unsigned int excl = 0;
1262 // Load j-atom starting index and exclusion mask
1263 jatomStart = tileJatomStart[jtile];
1265 xyzq_j = xyzq[jatomStart + wid];
1266 if (doAlch) p2 = p[jatomStart + wid];
1270 drude_j_tmp = isDrude[jatomStart + wid];
1271 alpha_j_tmp = drudeAtomAlpha[jatomStart + wid];
1274 // Check for early bail
1275 // DC - I found this was slower
1277 // float r2bb = distsq(boundingBoxI, xyzq_j);
1278 // if (WARP_ALL(WARP_FULL_MASK, r2bb > plcutoff2)) continue;
1281 excl = (doPairlist) ? 0 : tileExcls[jtile].excl[wid];
1282 vdwtypej = vdwTypes[jatomStart + wid];
1283 s_vdwtypej[iwarp][wid] = vdwtypej;
1285 // Get i-atom global index
1287 s_jatomIndex[iwarp][wid] = atomIndex[jatomStart + wid];
1290 // Number of j loops and free atoms
1293 int nloopj = min(jatomSize - jatomStart, WARPSIZE);
1294 nfreej = max(jatomFreeSize - jatomStart, 0);
1295 //if (nfreei == 0 && nfreej == 0) continue;
1296 if (wid >= nloopj) {
1297 xyzq_j.x = LARGE_FLOAT;
1298 xyzq_j.y = LARGE_FLOAT;
1299 xyzq_j.z = LARGE_FLOAT;
1303 s_jatomStart[iwarp] = jatomStart;
1305 WARP_SYNC(WARP_FULL_MASK);
1306 s_xyzq[iwarp][wid] = xyzq_j;
1308 // DH - self requires that zeroShift is also set
1309 // DC - In this case self is always false
1310 const int modval = WARPSIZE-1;
1312 s_jforce[iwarp][wid] = make_zero<jForceType>();
1314 s_jforceSlow[iwarp][wid] = make_zero<jForceType>();
1315 WARP_SYNC(WARP_FULL_MASK);
1319 // NOTE: Pairlist update, we must also include the diagonal since this is used
1321 // Clear the lowest (indicator) bit
1325 for (int t = 0;t < WARPSIZE;t++) {
1326 const int j = (t + wid) & modval;
1328 // NOTE: __shfl() operation can give non-sense here because j may be >= WARPSIZE.
1329 // However, if (j < WARPSIZE ..) below makes sure that these non-sense
1330 // results are not used
1331 if (doAlch) part2 = WARP_SHUFFLE(WARP_FULL_MASK, p2, j, WARPSIZE);
1333 drude_j = WARP_SHUFFLE(WARP_FULL_MASK, drude_j_tmp, j, WARPSIZE);
1334 alpha_j = WARP_SHUFFLE(WARP_FULL_MASK, alpha_j_tmp, j, WARPSIZE);
1338 xyzq_j = s_xyzq[iwarp][j];
1339 float dx = xyzq_j.x - xyzq_i.x;
1340 float dy = xyzq_j.y - xyzq_i.y;
1341 float dz = xyzq_j.z - xyzq_i.z;
1342 float r2 = dx*dx + dy*dy + dz*dz;
1343 if (r2 < plcutoff2) {
1344 // We have atom pair within the pairlist cutoff => Set indicator bit
1346 if (j < nfreej || wid < nfreei) {
1347 bool excluded = false;
1348 int indexdiff = s_jatomIndex[iwarp][j] - iatomIndex;
1349 if ( abs(indexdiff) <= iexclMaxdiff) {
1350 indexdiff += iexclIndex;
1351 int indexword = ((unsigned int) indexdiff) >> 5;
1353 if ( indexword < MAX_CONST_EXCLUSIONS ) {
1354 indexword = constExclusions[indexword];
1356 indexword = overflowExclusions[indexword];
1359 excluded = ((indexword & (1<<(indexdiff&31))) != 0);
1361 if (excluded) nexcluded += 2;
1362 if (!excluded) excl |= 0x80000000;
1364 if(!excluded && r2 < cutoff2){
1367 calcForceEnergyFEP<doEnergy, doSlow, true, doAlchVdwForceSwitching, jForceType>(
1368 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1369 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1370 vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
1372 s_jforce[iwarp][j], s_jforceSlow[iwarp][j],
1373 energyVdw, energyVdw_s,
1374 energyElec, energySlow, energyElec_s, energySlow_s);
1376 calcForceEnergyTI<doEnergy, doSlow, true, doAlchVdwForceSwitching, jForceType>(
1377 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1378 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1379 vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
1381 s_jforce[iwarp][j], s_jforceSlow[iwarp][j],
1382 energyVdw, energyVdw_ti_1,
1383 energyVdw_ti_2, energyElec, energyElec_ti_1, energyElec_ti_2,
1384 energySlow, energySlow_ti_1, energySlow_ti_2);
1388 calcForceEnergyFEP<doEnergy, doSlow, false, doAlchVdwForceSwitching, jForceType>(
1389 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1390 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1391 vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
1393 s_jforce[iwarp][j], s_jforceSlow[iwarp][j],
1394 energyVdw, energyVdw_s,
1395 energyElec, energySlow, energyElec_s, energySlow_s);
1397 calcForceEnergyTI<doEnergy, doSlow, false, doAlchVdwForceSwitching, jForceType>(
1398 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1399 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1400 vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
1402 s_jforce[iwarp][j], s_jforceSlow[iwarp][j],
1403 energyVdw, energyVdw_ti_1,
1404 energyVdw_ti_2, energyElec, energyElec_ti_1, energyElec_ti_2,
1405 energySlow, energySlow_ti_1, energySlow_ti_2);
1408 }//if !excluded && r2 < cutoff2
1410 if (!excluded && r2 < cutoff2) {
1412 calcForceEnergy<doEnergy, doSlow>(
1413 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1414 vdwtypei, s_vdwtypej[iwarp][j], vdwCoefTable,
1415 vdwCoefTableTex, forceTableTex, energyTableTex,
1418 s_jforceSlow[iwarp][j],
1419 energyVdw, energyElec, energySlow);
1421 calcForceEnergyMath<doEnergy, doSlow, jForceType>(
1422 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1423 vdwtypei, s_vdwtypej[iwarp][j], vdwCoefTable,
1424 vdwCoefTableTex, forceTableTex, energyTableTex,
1427 s_jforceSlow[iwarp][j],
1428 energyVdw, energyElec, energySlow,
1432 if (r2 < drudeNbtholeCut2) {
1433 // If drude_i >= 0, then the particle i is DRUD and drude_i is the NB index of its mother atom.
1434 // If drude_j >= 0, then the particle j is DRUD and drude_j is the NB index of its mother atom.
1435 calcForceEnergyNbThole<doEnergy>(
1436 drude_i >= 0 ? vdwTypes[drude_i]*vdwCoefTableWidth : vdwtypei,
1437 drude_j >= 0 ? vdwTypes[drude_j] : s_vdwtypej[iwarp][j],
1438 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1439 drudeNbTholeTijTableTex, drudeNbTholeTijTable,
1442 energyElec, iforce, s_jforce[iwarp][j]);
1443 } // r2 < drudeNbtholeCut2
1449 WARP_SYNC(WARP_FULL_MASK);
1452 // Just compute forces
1454 for (int t = 0; t < WARPSIZE; t++) {
1455 const int j = ((t + wid) & (WARPSIZE-1));
1457 part2 = WARP_SHUFFLE(WARP_FULL_MASK, p2, j, WARPSIZE);
1460 drude_j = WARP_SHUFFLE(WARP_FULL_MASK, drude_j_tmp, j, WARPSIZE);
1461 alpha_j = WARP_SHUFFLE(WARP_FULL_MASK, alpha_j_tmp, j, WARPSIZE);
1464 xyzq_j = s_xyzq[iwarp][j];
1465 float dx = xyzq_j.x - xyzq_i.x;
1466 float dy = xyzq_j.y - xyzq_i.y;
1467 float dz = xyzq_j.z - xyzq_i.z;
1469 float r2 = dx*dx + dy*dy + dz*dz;
1471 if(r2 < cutoff2){ // (r2 < cutoff2)
1474 calcForceEnergyFEP<doEnergy, doSlow, true, doAlchVdwForceSwitching, jForceType>(
1475 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1476 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1477 vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
1480 s_jforceSlow[iwarp][j],
1481 energyVdw, energyVdw_s,
1482 energyElec, energySlow, energyElec_s, energySlow_s);
1484 calcForceEnergyTI<doEnergy, doSlow, true, doAlchVdwForceSwitching, jForceType>(
1485 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1486 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1487 vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
1490 s_jforceSlow[iwarp][j],
1491 energyVdw, energyVdw_ti_1,
1492 energyVdw_ti_2, energyElec, energyElec_ti_1, energyElec_ti_2,
1493 energySlow, energySlow_ti_1, energySlow_ti_2);
1497 calcForceEnergyFEP<doEnergy, doSlow, false, doAlchVdwForceSwitching, jForceType>(
1498 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1499 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1500 vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
1503 s_jforceSlow[iwarp][j],
1504 energyVdw, energyVdw_s,
1505 energyElec, energySlow, energyElec_s, energySlow_s);
1507 calcForceEnergyTI<doEnergy, doSlow, false, doAlchVdwForceSwitching, jForceType>(
1508 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1509 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1510 vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
1513 s_jforceSlow[iwarp][j],
1514 energyVdw, energyVdw_ti_1,
1515 energyVdw_ti_2, energyElec, energyElec_ti_1, energyElec_ti_2,
1516 energySlow, energySlow_ti_1, energySlow_ti_2);
1523 calcForceEnergy<doEnergy, doSlow>(
1524 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1525 vdwtypei, s_vdwtypej[iwarp][j], vdwCoefTable,
1526 vdwCoefTableTex, forceTableTex, energyTableTex,
1529 s_jforceSlow[iwarp][j],
1530 energyVdw, energyElec, energySlow);
1532 calcForceEnergyMath<doEnergy, doSlow, jForceType>(
1533 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1534 vdwtypei, s_vdwtypej[iwarp][j], vdwCoefTable,
1535 vdwCoefTableTex, forceTableTex, energyTableTex,
1538 s_jforceSlow[iwarp][j],
1539 energyVdw, energyElec, energySlow,
1543 if (r2 < drudeNbtholeCut2) {
1544 // If drude_i >= 0, then the particle i is DRUD and drude_i is the NB index of its mother atom.
1545 // If drude_j >= 0, then the particle j is DRUD and drude_j is the NB index of its mother atom.
1546 calcForceEnergyNbThole<doEnergy>(
1547 drude_i >= 0 ? vdwTypes[drude_i]*vdwCoefTableWidth : vdwtypei,
1548 drude_j >= 0 ? vdwTypes[drude_j] : s_vdwtypej[iwarp][j],
1549 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1550 drudeNbTholeTijTableTex, drudeNbTholeTijTable,
1552 energyElec, iforce, s_jforce[iwarp][j]);
1553 } // r2 < drudeNbtholeCut2
1559 WARP_SYNC(WARP_FULL_MASK);
1564 storeForces<doSlow, jForceType>(s_jatomStart[iwarp] + wid, s_jforce[iwarp][wid], s_jforceSlow[iwarp][wid],
1565 devForce_x, devForce_y, devForce_z,
1566 devForceSlow_x, devForceSlow_y, devForceSlow_z);
1569 const unsigned int warp_exclude = WARP_BALLOT(WARP_FULL_MASK, nexcluded & 1);
1570 const unsigned int warp_any_exclude = WARP_BALLOT(WARP_FULL_MASK, excl);
1572 int anyexcl = warp_any_exclude ? 1 : 0;
1574 // Mark this jtile as non-empty:
1575 // VdW: 1 if tile has atom pairs within pairlist cutoff and some these atoms interact
1576 // GBIS: 65536 if tile has atom pairs within pairlist cutoff but not necessary interacting (i.e. these atoms are fixed or excluded)
1577 if (wid == 0 && anyexcl) jtiles[jtile] = anyexcl;
1579 tileExcls[jtile].excl[wid] = excl;
1581 // lower 16 bits number of tiles with atom pairs within pairlist cutoff that interact
1582 // upper 16 bits number of tiles with atom pairs within pairlist cutoff (but not necessary interacting)
1583 itileListLen += anyexcl;
1584 // NOTE, this minJatomStart is only stored once for the first tile list entry
1585 // minJatomStart = min(minJatomStart, jatomStart);
1587 WARP_SYNC(WARP_FULL_MASK);
1592 storeForces<doSlow, float3>(s_iatomStart[iwarp] + wid, iforce, iforceSlow,
1593 devForce_x, devForce_y, devForce_z,
1594 devForceSlow_x, devForceSlow_y, devForceSlow_z);
1596 // Done with computation
1598 // Save pairlist stuff
1601 // Warp index (0...warpsize-1)
1602 const int wid = threadIdx.x % WARPSIZE;
1605 // minJatomStart is in range [0 ... atomStorageSize-1]
1606 //int atom0 = (minJatomStart)/WARPSIZE;
1608 // int storageOffset = atomStorageSize/WARPSIZE;
1609 // int itileListLen = 0;
1610 // for (int jtile=jtileStart;jtile <= jtileEnd;jtile++) itileListLen += jtiles[jtile];
1611 // Store 0 if itileListLen == 0
1612 // tileListDepth[itileList] = (itileListLen > 0)*(itileListLen*storageOffset + atom0);
1613 tileListDepth[itileList] = itileListLen;
1614 tileListOrder[itileList] = itileList;
1615 // Number of active tilelists with tile with atom pairs within pairlist cutoff that interact
1616 if ((itileListLen & 65535) > 0) atomicAdd(&tileListStat->numTileLists, 1);
1617 // Number of active tilelists with tiles with atom pairs within pairlist cutoff (but not necessary interacting)
1618 if (itileListLen > 0) atomicAdd(&tileListStat->numTileListsGBIS, 1);
1619 // NOTE: always numTileListsGBIS >= numTileLists
1622 typedef cub::WarpReduce<int> WarpReduceInt;
1623 __shared__ typename WarpReduceInt::TempStorage tempStorage[NONBONDKERNEL_NUM_WARP];
1624 const int warpId = threadIdx.x / WARPSIZE;
1625 // Remove indicator bit
1627 volatile int nexcludedWarp = WarpReduceInt(tempStorage[warpId]).Sum(nexcluded);
1628 if (wid == 0) atomicAdd(&tileListStat->numExcluded, nexcludedWarp);
1633 // Warp index (0...warpsize-1)
1634 const int wid = threadIdx.x % WARPSIZE;
1636 typedef cub::WarpReduce<float> WarpReduce;
1637 __shared__ typename WarpReduce::TempStorage tempStorage[NONBONDKERNEL_NUM_WARP];
1638 const int warpId = threadIdx.x / WARPSIZE;
1639 volatile float iforcexSum = WarpReduce(tempStorage[warpId]).Sum(iforce.x);
1640 WARP_SYNC(WARP_FULL_MASK);
1641 volatile float iforceySum = WarpReduce(tempStorage[warpId]).Sum(iforce.y);
1642 WARP_SYNC(WARP_FULL_MASK);
1643 volatile float iforcezSum = WarpReduce(tempStorage[warpId]).Sum(iforce.z);
1644 WARP_SYNC(WARP_FULL_MASK);
1646 virialEnergy[itileList].forcex = iforcexSum;
1647 virialEnergy[itileList].forcey = iforceySum;
1648 virialEnergy[itileList].forcez = iforcezSum;
1652 iforcexSum = WarpReduce(tempStorage[warpId]).Sum(iforceSlow.x);
1653 WARP_SYNC(WARP_FULL_MASK);
1654 iforceySum = WarpReduce(tempStorage[warpId]).Sum(iforceSlow.y);
1655 WARP_SYNC(WARP_FULL_MASK);
1656 iforcezSum = WarpReduce(tempStorage[warpId]).Sum(iforceSlow.z);
1657 WARP_SYNC(WARP_FULL_MASK);
1659 virialEnergy[itileList].forceSlowx = iforcexSum;
1660 virialEnergy[itileList].forceSlowy = iforceySum;
1661 virialEnergy[itileList].forceSlowz = iforcezSum;
1668 // NOTE: We must hand write these warp-wide reductions to avoid excess register spillage
1669 // (Why does CUB suck here?)
1671 for (int i=16;i >= 1;i/=2) {
1672 energyVdw += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyVdw, i, 32);
1673 energyElec += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyElec, i, 32);
1674 if(doFEP) energyVdw_s += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyVdw_s, i, 32);
1675 if(doFEP) energyElec_s += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyElec_s, i, 32);
1677 energyVdw_ti_1 += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyVdw_ti_1, i, 32);
1678 energyVdw_ti_2 += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyVdw_ti_2, i, 32);
1679 energyElec_ti_1 += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyElec_ti_1, i, 32);
1680 energyElec_ti_2 += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyElec_ti_2, i, 32);
1683 energySlow += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energySlow, i, 32);
1684 if(doFEP) energySlow_s += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energySlow_s, i, 32);
1686 energySlow_ti_1 += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energySlow_ti_1, i, 32);
1687 energySlow_ti_2 += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energySlow_ti_2, i, 32);
1692 if (threadIdx.x % WARPSIZE == 0) {
1693 virialEnergy[itileList].energyVdw = energyVdw;
1694 virialEnergy[itileList].energyElec = energyElec;
1695 if (doFEP) virialEnergy[itileList].energyVdw_s = energyVdw_s;
1696 if (doFEP) virialEnergy[itileList].energyElec_s = energyElec_s;
1698 virialEnergy[itileList].energyVdw_ti_1 = energyVdw_ti_1;
1699 virialEnergy[itileList].energyVdw_ti_2 = energyVdw_ti_2;
1700 virialEnergy[itileList].energyElec_ti_1 = energyElec_ti_1;
1701 virialEnergy[itileList].energyElec_ti_2 = energyElec_ti_2;
1704 virialEnergy[itileList].energySlow = energySlow;
1705 if(doFEP) virialEnergy[itileList].energySlow_s = energySlow_s;
1707 virialEnergy[itileList].energySlow_ti_1 = energySlow_ti_1;
1708 virialEnergy[itileList].energySlow_ti_2 = energySlow_ti_2;
1713 // XXX TODO: Disable streaming and see what happens
1716 // Make sure devForces and devForcesSlow have been written into device memory
1717 WARP_SYNC(WARP_FULL_MASK);
1720 int patchDone[2] = {false, false};
1721 const int wid = threadIdx.x % WARPSIZE;
1723 int patchCountOld0 = atomicInc(&patchNumCount[patchInd.x], (unsigned int)(patchNumList.x-1));
1724 patchDone[0] = (patchCountOld0 + 1 == patchNumList.x);
1725 if (patchInd.x != patchInd.y) {
1726 int patchCountOld1 = atomicInc(&patchNumCount[patchInd.y], (unsigned int)(patchNumList.y-1));
1727 patchDone[1] = (patchCountOld1 + 1 == patchNumList.y);
1731 patchDone[0] = WARP_ANY(WARP_FULL_MASK, patchDone[0]);
1732 patchDone[1] = WARP_ANY(WARP_FULL_MASK, patchDone[1]);
1735 // Patch 1 is done, write onto host-mapped memory
1736 CudaPatchRecord patch = cudaPatches[patchInd.x];
1737 int start = patch.atomStart;
1738 int end = start + patch.numAtoms;
1739 for (int i=start+wid;i < end;i+=WARPSIZE) {
1740 mapForces[i] = make_float4(devForce_x[i],
1741 devForce_y[i], devForce_z[i], devForce_w[i]);
1743 mapForcesSlow[i] = make_float4(devForceSlow_x[i],
1744 devForceSlow_y[i], devForceSlow_z[i], devForceSlow_w[i]);
1750 CudaPatchRecord patch = cudaPatches[patchInd.y];
1751 int start = patch.atomStart;
1752 int end = start + patch.numAtoms;
1753 for (int i=start+wid;i < end;i+=WARPSIZE) {
1754 mapForces[i] = make_float4(devForce_x[i],
1755 devForce_y[i], devForce_z[i], devForce_w[i]);
1757 mapForcesSlow[i] = make_float4(devForceSlow_x[i],
1758 devForceSlow_y[i], devForceSlow_z[i], devForceSlow_w[i]);
1763 if (patchDone[0] || patchDone[1]) {
1764 // Make sure mapForces and mapForcesSlow are up-to-date
1765 WARP_SYNC(WARP_FULL_MASK);
1766 __threadfence_system();
1767 // Add patch into "patchReadyQueue"
1770 int ind = atomicAdd(&tileListStat->patchReadyQueueCount, 1);
1771 // int ind = atomicInc((unsigned int *)&mapPatchReadyQueue[numPatches], numPatches-1);
1772 mapPatchReadyQueue[ind] = patchInd.x;
1775 int ind = atomicAdd(&tileListStat->patchReadyQueueCount, 1);
1776 // int ind = atomicInc((unsigned int *)&mapPatchReadyQueue[numPatches], numPatches-1);
1777 mapPatchReadyQueue[ind] = patchInd.y;
1783 if (doStreaming && outputOrder != NULL && threadIdx.x % WARPSIZE == 0) {
1784 int index = atomicAdd(&tileListStat->outputOrderIndex, 1);
1785 outputOrder[index] = itileList;
1787 } // if (itileList < numTileLists)
1791 // Finish up - reduce virials from nonbonded kernel
1793 #define REDUCENONBONDEDVIRIALKERNEL_NUM_WARP 32
1794 __global__ void reduceNonbondedVirialKernel(const bool doSlow,
1795 const int atomStorageSize,
1796 const float4* __restrict__ xyzq,
1797 const float4* __restrict__ devForces, const float4* __restrict__ devForcesSlow,
1798 VirialEnergy* __restrict__ virialEnergy) {
1800 for (int ibase = blockIdx.x*blockDim.x;ibase < atomStorageSize;ibase += blockDim.x*gridDim.x)
1802 int i = ibase + threadIdx.x;
1804 // Set to zero to avoid nan*0
1809 float4 force, forceSlow;
1816 if (i < atomStorageSize) {
1818 force = devForces[i];
1819 if (doSlow) forceSlow = devForcesSlow[i];
1821 // Reduce across the entire thread block
1822 float vxxt = force.x*pos.x;
1823 float vxyt = force.x*pos.y;
1824 float vxzt = force.x*pos.z;
1825 float vyxt = force.y*pos.x;
1826 float vyyt = force.y*pos.y;
1827 float vyzt = force.y*pos.z;
1828 float vzxt = force.z*pos.x;
1829 float vzyt = force.z*pos.y;
1830 float vzzt = force.z*pos.z;
1831 // atomicAdd(&virialEnergy->virial[0], (double)vxx);
1832 // atomicAdd(&virialEnergy->virial[1], (double)vxy);
1833 // atomicAdd(&virialEnergy->virial[2], (double)vxz);
1834 // atomicAdd(&virialEnergy->virial[3], (double)vyx);
1835 // atomicAdd(&virialEnergy->virial[4], (double)vyy);
1836 // atomicAdd(&virialEnergy->virial[5], (double)vyz);
1837 // atomicAdd(&virialEnergy->virial[6], (double)vzx);
1838 // atomicAdd(&virialEnergy->virial[7], (double)vzy);
1839 // atomicAdd(&virialEnergy->virial[8], (double)vzz);
1841 typedef cub::BlockReduce<float, REDUCENONBONDEDVIRIALKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
1842 __shared__ typename BlockReduce::TempStorage tempStorage;
1843 volatile float vxx = BlockReduce(tempStorage).Sum(vxxt); BLOCK_SYNC;
1844 volatile float vxy = BlockReduce(tempStorage).Sum(vxyt); BLOCK_SYNC;
1845 volatile float vxz = BlockReduce(tempStorage).Sum(vxzt); BLOCK_SYNC;
1846 volatile float vyx = BlockReduce(tempStorage).Sum(vyxt); BLOCK_SYNC;
1847 volatile float vyy = BlockReduce(tempStorage).Sum(vyyt); BLOCK_SYNC;
1848 volatile float vyz = BlockReduce(tempStorage).Sum(vyzt); BLOCK_SYNC;
1849 volatile float vzx = BlockReduce(tempStorage).Sum(vzxt); BLOCK_SYNC;
1850 volatile float vzy = BlockReduce(tempStorage).Sum(vzyt); BLOCK_SYNC;
1851 volatile float vzz = BlockReduce(tempStorage).Sum(vzzt); BLOCK_SYNC;
1852 if (threadIdx.x == 0) {
1853 atomicAdd(&virialEnergy->virial[0], (double)vxx);
1854 atomicAdd(&virialEnergy->virial[1], (double)vxy);
1855 atomicAdd(&virialEnergy->virial[2], (double)vxz);
1856 atomicAdd(&virialEnergy->virial[3], (double)vyx);
1857 atomicAdd(&virialEnergy->virial[4], (double)vyy);
1858 atomicAdd(&virialEnergy->virial[5], (double)vyz);
1859 atomicAdd(&virialEnergy->virial[6], (double)vzx);
1860 atomicAdd(&virialEnergy->virial[7], (double)vzy);
1861 atomicAdd(&virialEnergy->virial[8], (double)vzz);
1865 // if (isnan(forceSlow.x) || isnan(forceSlow.y) || isnan(forceSlow.z))
1866 float vxxSlowt = forceSlow.x*pos.x;
1867 float vxySlowt = forceSlow.x*pos.y;
1868 float vxzSlowt = forceSlow.x*pos.z;
1869 float vyxSlowt = forceSlow.y*pos.x;
1870 float vyySlowt = forceSlow.y*pos.y;
1871 float vyzSlowt = forceSlow.y*pos.z;
1872 float vzxSlowt = forceSlow.z*pos.x;
1873 float vzySlowt = forceSlow.z*pos.y;
1874 float vzzSlowt = forceSlow.z*pos.z;
1875 // atomicAdd(&virialEnergy->virialSlow[0], (double)vxxSlow);
1876 // atomicAdd(&virialEnergy->virialSlow[1], (double)vxySlow);
1877 // atomicAdd(&virialEnergy->virialSlow[2], (double)vxzSlow);
1878 // atomicAdd(&virialEnergy->virialSlow[3], (double)vyxSlow);
1879 // atomicAdd(&virialEnergy->virialSlow[4], (double)vyySlow);
1880 // atomicAdd(&virialEnergy->virialSlow[5], (double)vyzSlow);
1881 // atomicAdd(&virialEnergy->virialSlow[6], (double)vzxSlow);
1882 // atomicAdd(&virialEnergy->virialSlow[7], (double)vzySlow);
1883 // atomicAdd(&virialEnergy->virialSlow[8], (double)vzzSlow);
1884 volatile float vxxSlow = BlockReduce(tempStorage).Sum(vxxSlowt); BLOCK_SYNC;
1885 volatile float vxySlow = BlockReduce(tempStorage).Sum(vxySlowt); BLOCK_SYNC;
1886 volatile float vxzSlow = BlockReduce(tempStorage).Sum(vxzSlowt); BLOCK_SYNC;
1887 volatile float vyxSlow = BlockReduce(tempStorage).Sum(vyxSlowt); BLOCK_SYNC;
1888 volatile float vyySlow = BlockReduce(tempStorage).Sum(vyySlowt); BLOCK_SYNC;
1889 volatile float vyzSlow = BlockReduce(tempStorage).Sum(vyzSlowt); BLOCK_SYNC;
1890 volatile float vzxSlow = BlockReduce(tempStorage).Sum(vzxSlowt); BLOCK_SYNC;
1891 volatile float vzySlow = BlockReduce(tempStorage).Sum(vzySlowt); BLOCK_SYNC;
1892 volatile float vzzSlow = BlockReduce(tempStorage).Sum(vzzSlowt); BLOCK_SYNC;
1893 if (threadIdx.x == 0) {
1894 atomicAdd(&virialEnergy->virialSlow[0], (double)vxxSlow);
1895 atomicAdd(&virialEnergy->virialSlow[1], (double)vxySlow);
1896 atomicAdd(&virialEnergy->virialSlow[2], (double)vxzSlow);
1897 atomicAdd(&virialEnergy->virialSlow[3], (double)vyxSlow);
1898 atomicAdd(&virialEnergy->virialSlow[4], (double)vyySlow);
1899 atomicAdd(&virialEnergy->virialSlow[5], (double)vyzSlow);
1900 atomicAdd(&virialEnergy->virialSlow[6], (double)vzxSlow);
1901 atomicAdd(&virialEnergy->virialSlow[7], (double)vzySlow);
1902 atomicAdd(&virialEnergy->virialSlow[8], (double)vzzSlow);
1909 #define REDUCEVIRIALENERGYKERNEL_NUM_WARP 32
1910 __global__ void reduceVirialEnergyKernel(
1911 const bool doEnergy, const bool doVirial, const bool doSlow,
1912 const int numTileLists,
1913 const TileListVirialEnergy* __restrict__ tileListVirialEnergy,
1914 VirialEnergy* __restrict__ virialEnergy) {
1916 for (int ibase = blockIdx.x*blockDim.x;ibase < numTileLists;ibase += blockDim.x*gridDim.x)
1918 int itileList = ibase + threadIdx.x;
1919 TileListVirialEnergy ve;
1920 if (itileList < numTileLists) {
1921 ve = tileListVirialEnergy[itileList];
1923 // Set to zero to avoid nan*0
1931 ve.forceSlowx = 0.0f;
1932 ve.forceSlowy = 0.0f;
1933 ve.forceSlowz = 0.0f;
1937 ve.energyVdw_s = 0.0;
1938 ve.energyElec = 0.0;
1939 ve.energySlow = 0.0;
1940 ve.energyElec_s = 0.0;
1941 ve.energySlow_s = 0.0;
1944 ve.energyVdw_ti_1 = 0.0;
1945 ve.energyVdw_ti_2 = 0.0;
1946 ve.energyElec_ti_1 = 0.0;
1947 ve.energyElec_ti_2 = 0.0;
1948 ve.energySlow_ti_1 = 0.0;
1949 ve.energySlow_ti_2 = 0.0;
1950 // ve.energyGBIS = 0.0;
1955 typedef cub::BlockReduce<float, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
1956 __shared__ typename BlockReduce::TempStorage tempStorage;
1957 float vxxt = ve.forcex*ve.shx;
1958 float vxyt = ve.forcex*ve.shy;
1959 float vxzt = ve.forcex*ve.shz;
1960 float vyxt = ve.forcey*ve.shx;
1961 float vyyt = ve.forcey*ve.shy;
1962 float vyzt = ve.forcey*ve.shz;
1963 float vzxt = ve.forcez*ve.shx;
1964 float vzyt = ve.forcez*ve.shy;
1965 float vzzt = ve.forcez*ve.shz;
1966 volatile float vxx = BlockReduce(tempStorage).Sum(vxxt); BLOCK_SYNC;
1967 volatile float vxy = BlockReduce(tempStorage).Sum(vxyt); BLOCK_SYNC;
1968 volatile float vxz = BlockReduce(tempStorage).Sum(vxzt); BLOCK_SYNC;
1969 volatile float vyx = BlockReduce(tempStorage).Sum(vyxt); BLOCK_SYNC;
1970 volatile float vyy = BlockReduce(tempStorage).Sum(vyyt); BLOCK_SYNC;
1971 volatile float vyz = BlockReduce(tempStorage).Sum(vyzt); BLOCK_SYNC;
1972 volatile float vzx = BlockReduce(tempStorage).Sum(vzxt); BLOCK_SYNC;
1973 volatile float vzy = BlockReduce(tempStorage).Sum(vzyt); BLOCK_SYNC;
1974 volatile float vzz = BlockReduce(tempStorage).Sum(vzzt); BLOCK_SYNC;
1975 if (threadIdx.x == 0) {
1976 atomicAdd(&virialEnergy->virial[0], (double)vxx);
1977 atomicAdd(&virialEnergy->virial[1], (double)vxy);
1978 atomicAdd(&virialEnergy->virial[2], (double)vxz);
1979 atomicAdd(&virialEnergy->virial[3], (double)vyx);
1980 atomicAdd(&virialEnergy->virial[4], (double)vyy);
1981 atomicAdd(&virialEnergy->virial[5], (double)vyz);
1982 atomicAdd(&virialEnergy->virial[6], (double)vzx);
1983 atomicAdd(&virialEnergy->virial[7], (double)vzy);
1984 atomicAdd(&virialEnergy->virial[8], (double)vzz);
1988 typedef cub::BlockReduce<float, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
1989 __shared__ typename BlockReduce::TempStorage tempStorage;
1990 float vxxt = ve.forceSlowx*ve.shx;
1991 float vxyt = ve.forceSlowx*ve.shy;
1992 float vxzt = ve.forceSlowx*ve.shz;
1993 float vyxt = ve.forceSlowy*ve.shx;
1994 float vyyt = ve.forceSlowy*ve.shy;
1995 float vyzt = ve.forceSlowy*ve.shz;
1996 float vzxt = ve.forceSlowz*ve.shx;
1997 float vzyt = ve.forceSlowz*ve.shy;
1998 float vzzt = ve.forceSlowz*ve.shz;
1999 volatile float vxx = BlockReduce(tempStorage).Sum(vxxt); BLOCK_SYNC;
2000 volatile float vxy = BlockReduce(tempStorage).Sum(vxyt); BLOCK_SYNC;
2001 volatile float vxz = BlockReduce(tempStorage).Sum(vxzt); BLOCK_SYNC;
2002 volatile float vyx = BlockReduce(tempStorage).Sum(vyxt); BLOCK_SYNC;
2003 volatile float vyy = BlockReduce(tempStorage).Sum(vyyt); BLOCK_SYNC;
2004 volatile float vyz = BlockReduce(tempStorage).Sum(vyzt); BLOCK_SYNC;
2005 volatile float vzx = BlockReduce(tempStorage).Sum(vzxt); BLOCK_SYNC;
2006 volatile float vzy = BlockReduce(tempStorage).Sum(vzyt); BLOCK_SYNC;
2007 volatile float vzz = BlockReduce(tempStorage).Sum(vzzt); BLOCK_SYNC;
2008 if (threadIdx.x == 0) {
2009 atomicAdd(&virialEnergy->virialSlow[0], (double)vxx);
2010 atomicAdd(&virialEnergy->virialSlow[1], (double)vxy);
2011 atomicAdd(&virialEnergy->virialSlow[2], (double)vxz);
2012 atomicAdd(&virialEnergy->virialSlow[3], (double)vyx);
2013 atomicAdd(&virialEnergy->virialSlow[4], (double)vyy);
2014 atomicAdd(&virialEnergy->virialSlow[5], (double)vyz);
2015 atomicAdd(&virialEnergy->virialSlow[6], (double)vzx);
2016 atomicAdd(&virialEnergy->virialSlow[7], (double)vzy);
2017 atomicAdd(&virialEnergy->virialSlow[8], (double)vzz);
2023 typedef cub::BlockReduce<double, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
2024 /* Maybe we should guard the TI and FEP energies, since those are not to be calculated on regular MDs */
2025 __shared__ typename BlockReduce::TempStorage tempStorage;
2026 volatile double energyVdw = BlockReduce(tempStorage).Sum(ve.energyVdw); BLOCK_SYNC;
2027 volatile double energyVdw_s = BlockReduce(tempStorage).Sum(ve.energyVdw_s); BLOCK_SYNC;
2028 volatile double energyElec = BlockReduce(tempStorage).Sum(ve.energyElec); BLOCK_SYNC;
2029 volatile double energyElec_s = BlockReduce(tempStorage).Sum(ve.energyElec_s); BLOCK_SYNC;
2030 volatile double energyVdw_ti_1 = BlockReduce(tempStorage).Sum(ve.energyVdw_ti_1); BLOCK_SYNC;
2031 volatile double energyVdw_ti_2 = BlockReduce(tempStorage).Sum(ve.energyVdw_ti_2); BLOCK_SYNC;
2032 volatile double energyElec_ti_1= BlockReduce(tempStorage).Sum(ve.energyElec_ti_1); BLOCK_SYNC;
2033 volatile double energyElec_ti_2= BlockReduce(tempStorage).Sum(ve.energyElec_ti_2); BLOCK_SYNC;
2034 if (threadIdx.x == 0){
2035 atomicAdd(&virialEnergy->energyVdw, (double)energyVdw);
2036 atomicAdd(&virialEnergy->energyVdw_s, (double)energyVdw_s);
2037 atomicAdd(&virialEnergy->energyElec, (double)energyElec);
2038 atomicAdd(&virialEnergy->energyElec_s, (double)energyElec_s);
2039 atomicAdd(&virialEnergy->energyVdw_ti_1, (double)energyVdw_ti_1);
2040 atomicAdd(&virialEnergy->energyVdw_ti_2, (double)energyVdw_ti_2);
2041 atomicAdd(&virialEnergy->energyElec_ti_1, (double)energyElec_ti_1);
2042 atomicAdd(&virialEnergy->energyElec_ti_2, (double)energyElec_ti_2);
2045 volatile double energySlow = BlockReduce(tempStorage).Sum(ve.energySlow); BLOCK_SYNC;
2046 volatile double energySlow_s = BlockReduce(tempStorage).Sum(ve.energySlow_s); BLOCK_SYNC;
2047 volatile double energySlow_ti_1 = BlockReduce(tempStorage).Sum(ve.energySlow_ti_1); BLOCK_SYNC;
2048 volatile double energySlow_ti_2 = BlockReduce(tempStorage).Sum(ve.energySlow_ti_2); BLOCK_SYNC;
2049 if (threadIdx.x == 0) {
2050 atomicAdd(&virialEnergy->energySlow, (double)energySlow);
2051 atomicAdd(&virialEnergy->energySlow_s, (double)energySlow_s);
2052 atomicAdd(&virialEnergy->energySlow_ti_1,(double)energySlow_ti_1);
2053 atomicAdd(&virialEnergy->energySlow_ti_2,(double)energySlow_ti_2);
2057 // double energyGBIS = BlockReduce(tempStorage).Sum(ve.energyGBIS); BLOCK_SYNC;
2058 // if (threadIdx.x == 0) atomicAdd(&virialEnergy->energyGBIS, (double)energyGBIS);
2066 #define REDUCEGBISENERGYKERNEL_NUM_WARP 32
2067 __global__ void reduceGBISEnergyKernel(const int numTileLists,
2068 const TileListVirialEnergy* __restrict__ tileListVirialEnergy,
2069 VirialEnergy* __restrict__ virialEnergy) {
2071 for (int ibase = blockIdx.x*blockDim.x;ibase < numTileLists;ibase += blockDim.x*gridDim.x)
2073 int itileList = ibase + threadIdx.x;
2074 double energyGBISt = 0.0;
2075 if (itileList < numTileLists) {
2076 energyGBISt = tileListVirialEnergy[itileList].energyGBIS;
2079 typedef cub::BlockReduce<double, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
2080 __shared__ typename BlockReduce::TempStorage tempStorage;
2081 volatile double energyGBIS = BlockReduce(tempStorage).Sum(energyGBISt); BLOCK_SYNC;
2082 if (threadIdx.x == 0) atomicAdd(&virialEnergy->energyGBIS, (double)energyGBIS);
2087 // ##############################################################################################
2088 // ##############################################################################################
2089 // ##############################################################################################
2091 CudaComputeNonbondedKernel::CudaComputeNonbondedKernel(int deviceID, CudaNonbondedTables& cudaNonbondedTables,
2092 bool doStreaming) : deviceID(deviceID), cudaNonbondedTables(cudaNonbondedTables), doStreaming(doStreaming) {
2094 cudaCheck(cudaSetDevice(deviceID));
2096 d_exclusionsByAtom = NULL;
2098 overflowExclusions = NULL;
2099 overflowExclusionsSize = 0;
2101 exclIndexMaxDiff = NULL;
2102 exclIndexMaxDiffSize = 0;
2110 patchNumCount = NULL;
2111 patchNumCountSize = 0;
2113 patchReadyQueue = NULL;
2114 patchReadyQueueSize = 0;
2116 force_x = force_y = force_z = force_w = NULL;
2118 forceSlow_x = forceSlow_y = forceSlow_z = forceSlow_w = NULL;
2123 drudeAtomAlpha = nullptr;
2124 drudeAtomAlphaSize = 0;
2127 void CudaComputeNonbondedKernel::reallocate_forceSOA(int atomStorageSize)
2130 size_t forceSizeCurrent;
2132 // reallocate_device will update forceSizeCurrent, so we need to reset it to the current
2133 // value for each array
2134 forceSizeCurrent = forceSize;
2135 reallocate_device<float>(&force_x, &forceSizeCurrent, atomStorageSize, 1.4f);
2136 forceSizeCurrent = forceSize;
2137 reallocate_device<float>(&force_y, &forceSizeCurrent, atomStorageSize, 1.4f);
2138 forceSizeCurrent = forceSize;
2139 reallocate_device<float>(&force_z, &forceSizeCurrent, atomStorageSize, 1.4f);
2140 forceSizeCurrent = forceSize;
2141 reallocate_device<float>(&force_w, &forceSizeCurrent, atomStorageSize, 1.4f);
2144 size_t forceSlowSizeCurrent;
2145 forceSlowSizeCurrent = forceSlowSize;
2146 reallocate_device<float>(&forceSlow_x, &forceSlowSizeCurrent, atomStorageSize, 1.4f);
2147 forceSlowSizeCurrent = forceSlowSize;
2148 reallocate_device<float>(&forceSlow_y, &forceSlowSizeCurrent, atomStorageSize, 1.4f);
2149 forceSlowSizeCurrent = forceSlowSize;
2150 reallocate_device<float>(&forceSlow_z, &forceSlowSizeCurrent, atomStorageSize, 1.4f);
2151 forceSlowSizeCurrent = forceSlowSize;
2152 reallocate_device<float>(&forceSlow_w, &forceSlowSizeCurrent, atomStorageSize, 1.4f);
2154 reallocate_device<float>(&force_x, &forceSize, atomStorageSize*8, 1.4f);
2155 force_y = force_x + atomStorageSize;
2156 force_z = force_y + atomStorageSize;
2157 force_w = force_z + atomStorageSize;
2158 forceSlow_x = force_w + atomStorageSize;
2159 forceSlow_y = forceSlow_x + atomStorageSize;
2160 forceSlow_z = forceSlow_y + atomStorageSize;
2161 forceSlow_w = forceSlow_z + atomStorageSize;
2165 CudaComputeNonbondedKernel::~CudaComputeNonbondedKernel() {
2166 cudaCheck(cudaSetDevice(deviceID));
2167 if (overflowExclusions != NULL) deallocate_device<unsigned int>(&overflowExclusions);
2168 if (exclIndexMaxDiff != NULL) deallocate_device<int2>(&exclIndexMaxDiff);
2169 if (atomIndex != NULL) deallocate_device<int>(&atomIndex);
2170 if (vdwTypes != NULL) deallocate_device<int>(&vdwTypes);
2171 if (patchNumCount != NULL) deallocate_device<unsigned int>(&patchNumCount);
2172 if (patchReadyQueue != NULL) deallocate_host<int>(&patchReadyQueue);
2174 if (force_x != NULL) deallocate_device<float>(&force_x);
2175 if (force_y != NULL) deallocate_device<float>(&force_y);
2176 if (force_z != NULL) deallocate_device<float>(&force_z);
2177 if (force_w != NULL) deallocate_device<float>(&force_w);
2178 if (forceSlow_x != NULL) deallocate_device<float>(&forceSlow_x);
2179 if (forceSlow_y != NULL) deallocate_device<float>(&forceSlow_y);
2180 if (forceSlow_z != NULL) deallocate_device<float>(&forceSlow_z);
2181 if (forceSlow_w != NULL) deallocate_device<float>(&forceSlow_w);
2183 if (force_x != NULL) deallocate_device<float>(&force_x);
2185 if (isDrude != nullptr) deallocate_device(&isDrude);
2186 if (drudeAtomAlpha != nullptr) deallocate_device(&drudeAtomAlpha);
2189 void CudaComputeNonbondedKernel::updateVdwTypesExcl(const int atomStorageSize, const int* h_vdwTypes,
2190 const int2* h_exclIndexMaxDiff, const int* h_atomIndex, cudaStream_t stream) {
2192 reallocate_device<int>(&vdwTypes, &vdwTypesSize, atomStorageSize, OVERALLOC);
2193 reallocate_device<int2>(&exclIndexMaxDiff, &exclIndexMaxDiffSize, atomStorageSize, OVERALLOC);
2194 reallocate_device<int>(&atomIndex, &atomIndexSize, atomStorageSize, OVERALLOC);
2196 copy_HtoD<int>(h_vdwTypes, vdwTypes, atomStorageSize, stream);
2197 copy_HtoD<int2>(h_exclIndexMaxDiff, exclIndexMaxDiff, atomStorageSize, stream);
2198 copy_HtoD<int>(h_atomIndex, atomIndex, atomStorageSize, stream);
2201 int* CudaComputeNonbondedKernel::getPatchReadyQueue() {
2203 NAMD_die("CudaComputeNonbondedKernel::getPatchReadyQueue() called on non-streaming kernel");
2205 return patchReadyQueue;
2208 template <int doSlow>
2209 __global__ void transposeForcesKernel(float4 *f, float4 *fSlow,
2210 float *fx, float *fy, float *fz, float *fw,
2211 float *fSlowx, float *fSlowy, float *fSlowz, float *fSloww,
2214 int tid = blockIdx.x*blockDim.x + threadIdx.x;
2216 f[tid] = make_float4(fx[tid], fy[tid], fz[tid], fw[tid]);
2217 fx[tid] = 0.f; fy[tid] = 0.f; fz[tid] = 0.f; fw[tid] = 0.f;
2219 fSlow[tid] = make_float4(fSlowx[tid], fSlowy[tid], fSlowz[tid], fSloww[tid]);
2220 fSlowx[tid] = 0.f; fSlowy[tid] = 0.f; fSlowz[tid] = 0.f; fSloww[tid] = 0.f;
2227 void CudaComputeNonbondedKernel::nonbondedForce(CudaTileListKernel& tlKernel,
2228 const int atomStorageSize, const bool atomsChanged, const bool doMinimize,
2229 const bool doPairlist, const bool doEnergy, const bool doVirial,
2230 const bool doSlow, const bool doAlch, const bool doAlchVdwForceSwitching,
2231 const bool doFEP, const bool doTI,
2232 const bool doNbThole, const bool doTable,
2233 const float3 lata, const float3 latb, const float3 latc,
2234 const float4* h_xyzq, const float cutoff2,
2235 const CudaNBConstants nbConstants,
2236 float4* d_forces, float4* d_forcesSlow,
2237 float4* h_forces, float4* h_forcesSlow, AlchData *srcFlags,
2238 bool lambdaWindowUpdated, char *part,
2239 bool CUDASOAintegrator, bool useDeviceMigration,
2240 const float drudeNbtholeCut2,
2241 cudaStream_t stream) {
2243 #ifdef NODEGROUP_FORCE_REGISTER
2244 if (!atomsChanged && !CUDASOAintegrator) copy_HtoD<float4>(h_xyzq, tlKernel.get_xyzq(), atomStorageSize, stream);
2246 if (!doPairlist) copy_HtoD<float4>(h_xyzq, tlKernel.get_xyzq(), atomStorageSize, stream);
2250 // Copy partition to device. This is not necessary if both CUDASOAintegrator and useDeviceMigration
2252 if (doPairlist && (!CUDASOAintegrator || !useDeviceMigration)) {
2253 copy_HtoD< char>(part, tlKernel.get_part(), atomStorageSize, stream);
2255 //Copies flags to constant memory
2256 if(lambdaWindowUpdated) cudaCheck(cudaMemcpyToSymbol(alchflags, srcFlags, sizeof(AlchData)));
2259 // XXX TODO: Get rid of the clears
2261 // clear_device_array<float4>(d_forces, atomStorageSize, stream);
2262 // if (doSlow) clear_device_array<float4>(d_forcesSlow, atomStorageSize, stream);
2264 // This needs to go.
2265 if (doStreaming) tlKernel.clearTileListStat(stream);
2266 if(atomsChanged || doMinimize){
2267 clear_device_array<float>(force_x, atomStorageSize*4, stream);
2268 if(doSlow) clear_device_array<float>(forceSlow_x, atomStorageSize*4, stream);
2272 // --- streaming ----
2273 float4* m_forces = NULL;
2274 float4* m_forcesSlow = NULL;
2275 int* m_patchReadyQueue = NULL;
2277 unsigned int* patchNumCountPtr = NULL;
2279 numPatches = tlKernel.getNumPatches();
2280 if (reallocate_device<unsigned int>(&patchNumCount, &patchNumCountSize, numPatches)) {
2281 // If re-allocated, clear array
2282 clear_device_array<unsigned int>(patchNumCount, numPatches, stream);
2284 patchNumCountPtr = patchNumCount;
2285 bool re = reallocate_host<int>(&patchReadyQueue, &patchReadyQueueSize, numPatches, cudaHostAllocMapped);
2287 // If re-allocated, re-set to "-1"
2288 for (int i=0;i < numPatches;i++) patchReadyQueue[i] = -1;
2290 cudaCheck(cudaHostGetDevicePointer(&m_patchReadyQueue, patchReadyQueue, 0));
2291 cudaCheck(cudaHostGetDevicePointer(&m_forces, h_forces, 0));
2292 cudaCheck(cudaHostGetDevicePointer(&m_forcesSlow, h_forcesSlow, 0));
2294 // -----------------
2296 if (doVirial || doEnergy) {
2297 tlKernel.setTileListVirialEnergyLength(tlKernel.getNumTileLists());
2302 int* outputOrderPtr = tlKernel.getOutputOrder();
2304 int nwarp = NONBONDKERNEL_NUM_WARP;
2305 int nthread = WARPSIZE*nwarp;
2308 const int doActualNbThole = doNbThole ? int(cudaNonbondedTables.getNumPotentialNbtholeTerms() > 0) : int(false);
2314 int options = doEnergy + (doVirial << 1) + (doSlow << 2) +
2315 (doPairlist << 3) + (doAlch << 4) + (doFEP << 5) + (doTI << 6) + (doStreaming << 7) + (doTable << 8) + (doAlchVdwForceSwitching << 9) + (doActualNbThole << 10);
2318 while (start < tlKernel.getNumTileLists()) {
2320 int nleft = tlKernel.getNumTileLists() - start;
2321 int nblock = min(deviceCUDA->getMaxNumBlocks(), (nleft-1)/nwarp+1);
2323 #define CALL(DOENERGY, DOVIRIAL, DOSLOW, DOPAIRLIST, DOALCH, DOFEP, DOTI, DOSTREAMING, DOALCHWDWFORCESWITCHING) \
2324 nonbondedForceKernel<DOENERGY, DOVIRIAL, DOSLOW, DOPAIRLIST, DOALCH, DOFEP, DOTI, DOSTREAMING, DOALCHWDWFORCESWITCHING> \
2325 <<< nblock, nthread, shMemSize, stream >>> \
2326 (start, tlKernel.getNumTileLists(), tlKernel.getTileLists(), tlKernel.getTileExcls(), tlKernel.getTileJatomStart(), \
2327 cudaNonbondedTables.getVdwCoefTableWidth(), cudaNonbondedTables.getVdwCoefTable(), \
2328 vdwTypes, lata, latb, latc, tlKernel.get_xyzq(), cutoff2, nbConstants, \
2329 cudaNonbondedTables.getVdwCoefTableTex(), cudaNonbondedTables.getForceTableTex(), cudaNonbondedTables.getEnergyTableTex(), \
2330 atomStorageSize, tlKernel.get_plcutoff2(), tlKernel.getPatchPairs(), atomIndex, exclIndexMaxDiff, overflowExclusions, \
2331 tlKernel.getTileListDepth(), tlKernel.getTileListOrder(), tlKernel.getJtiles(), tlKernel.getTileListStatDevPtr(), \
2332 tlKernel.getBoundingBoxes(), d_forces, d_forcesSlow, \
2333 force_x, force_y, force_z, force_w, \
2334 forceSlow_x, forceSlow_y, forceSlow_z, forceSlow_w, \
2335 numPatches, patchNumCountPtr, tlKernel.getCudaPatches(), m_forces, m_forcesSlow, m_patchReadyQueue, \
2336 outputOrderPtr, tlKernel.getTileListVirialEnergy(), tlKernel.get_part()); called=true
2338 bool called = false;
2341 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 0, 0, 0, 1, 0);
2342 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 0, 0, 0, 1, 0);
2343 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 0, 0, 0, 1, 0);
2344 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 0, 0, 0, 1, 0);
2345 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 0, 0, 0, 1, 0);
2346 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 0, 0, 0, 1, 0);
2347 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 0, 0, 0, 1, 0);
2348 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 0, 0, 0, 1, 0);
2351 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 0, 0, 0, 1, 0);
2352 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 0, 0, 0, 1, 0);
2353 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 0, 0, 0, 1, 0);
2354 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 0, 0, 0, 1, 0);
2355 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 0, 0, 0, 1, 0);
2356 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 0, 0, 0, 1, 0);
2357 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 0, 0, 0, 1, 0);
2358 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 0, 0, 0, 1, 0);
2361 if (doAlchVdwForceSwitching) {
2362 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 1, 1, 0, 1, 1);
2363 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 1, 1, 0, 1, 1);
2364 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 1, 1, 0, 1, 1);
2365 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 1, 1, 0, 1, 1);
2366 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 1, 1, 0, 1, 1);
2367 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 1, 1, 0, 1, 1);
2368 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 1, 1, 0, 1, 1);
2369 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 1, 1, 0, 1, 1);
2371 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 1, 1, 0, 1, 1);
2372 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 1, 1, 0, 1, 1);
2373 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 1, 1, 0, 1, 1);
2374 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 1, 1, 0, 1, 1);
2375 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 1, 1, 0, 1, 1);
2376 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 1, 1, 0, 1, 1);
2377 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 1, 1, 0, 1, 1);
2378 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 1, 1, 0, 1, 1);
2380 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 1, 1, 0, 1, 0);
2381 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 1, 1, 0, 1, 0);
2382 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 1, 1, 0, 1, 0);
2383 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 1, 1, 0, 1, 0);
2384 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 1, 1, 0, 1, 0);
2385 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 1, 1, 0, 1, 0);
2386 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 1, 1, 0, 1, 0);
2387 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 1, 1, 0, 1, 0);
2389 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 1, 1, 0, 1, 0);
2390 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 1, 1, 0, 1, 0);
2391 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 1, 1, 0, 1, 0);
2392 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 1, 1, 0, 1, 0);
2393 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 1, 1, 0, 1, 0);
2394 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 1, 1, 0, 1, 0);
2395 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 1, 1, 0, 1, 0);
2396 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 1, 1, 0, 1, 0);
2397 } // doAlchVdwForceSwitching
2400 if (doAlchVdwForceSwitching) {
2401 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 1, 0, 1, 1, 1);
2402 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 1, 0, 1, 1, 1);
2403 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 1, 0, 1, 1, 1);
2404 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 1, 0, 1, 1, 1);
2405 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 1, 0, 1, 1, 1);
2406 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 1, 0, 1, 1, 1);
2407 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 1, 0, 1, 1, 1);
2408 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 1, 0, 1, 1, 1);
2410 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 1, 0, 1, 1, 1);
2411 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 1, 0, 1, 1, 1);
2412 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 1, 0, 1, 1, 1);
2413 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 1, 0, 1, 1, 1);
2414 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 1, 0, 1, 1, 1);
2415 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 1, 0, 1, 1, 1);
2416 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 1, 0, 1, 1, 1);
2417 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 1, 0, 1, 1, 1);
2419 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 1, 0, 1, 1, 0);
2420 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 1, 0, 1, 1, 0);
2421 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 1, 0, 1, 1, 0);
2422 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 1, 0, 1, 1, 0);
2423 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 1, 0, 1, 1, 0);
2424 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 1, 0, 1, 1, 0);
2425 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 1, 0, 1, 1, 0);
2426 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 1, 0, 1, 1, 0);
2428 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 1, 0, 1, 1, 0);
2429 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 1, 0, 1, 1, 0);
2430 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 1, 0, 1, 1, 0);
2431 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 1, 0, 1, 1, 0);
2432 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 1, 0, 1, 1, 0);
2433 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 1, 0, 1, 1, 0);
2434 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 1, 0, 1, 1, 0);
2435 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 1, 0, 1, 1, 0);
2436 } // doAlchVdwForceSwitching
2443 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 0, 0, 0, 0, 0);
2444 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 0, 0, 0, 0, 0);
2445 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 0, 0, 0, 0, 0);
2446 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 0, 0, 0, 0, 0);
2447 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 0, 0, 0, 0, 0);
2448 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 0, 0, 0, 0, 0);
2449 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 0, 0, 0, 0, 0);
2450 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 0, 0, 0, 0, 0);
2453 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 0, 0, 0, 0, 0);
2454 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 0, 0, 0, 0, 0);
2455 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 0, 0, 0, 0, 0);
2456 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 0, 0, 0, 0, 0);
2457 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 0, 0, 0, 0, 0);
2458 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 0, 0, 0, 0, 0);
2459 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 0, 0, 0, 0, 0);
2460 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 0, 0, 0, 0, 0);
2463 if (doAlchVdwForceSwitching) {
2464 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 1, 1, 0, 0, 1);
2465 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 1, 1, 0, 0, 1);
2466 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 1, 1, 0, 0, 1);
2467 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 1, 1, 0, 0, 1);
2468 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 1, 1, 0, 0, 1);
2469 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 1, 1, 0, 0, 1);
2470 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 1, 1, 0, 0, 1);
2471 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 1, 1, 0, 0, 1);
2473 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 1, 1, 0, 0, 1);
2474 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 1, 1, 0, 0, 1);
2475 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 1, 1, 0, 0, 1);
2476 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 1, 1, 0, 0, 1);
2477 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 1, 1, 0, 0, 1);
2478 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 1, 1, 0, 0, 1);
2479 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 1, 1, 0, 0, 1);
2480 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 1, 1, 0, 0, 1);
2482 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 1, 1, 0, 0, 0);
2483 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 1, 1, 0, 0, 0);
2484 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 1, 1, 0, 0, 0);
2485 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 1, 1, 0, 0, 0);
2486 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 1, 1, 0, 0, 0);
2487 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 1, 1, 0, 0, 0);
2488 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 1, 1, 0, 0, 0);
2489 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 1, 1, 0, 0, 0);
2491 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 1, 1, 0, 0, 0);
2492 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 1, 1, 0, 0, 0);
2493 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 1, 1, 0, 0, 0);
2494 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 1, 1, 0, 0, 0);
2495 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 1, 1, 0, 0, 0);
2496 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 1, 1, 0, 0, 0);
2497 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 1, 1, 0, 0, 0);
2498 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 1, 1, 0, 0, 0);
2502 if (doAlchVdwForceSwitching) {
2503 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 1, 0, 1, 0, 1);
2504 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 1, 0, 1, 0, 1);
2505 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 1, 0, 1, 0, 1);
2506 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 1, 0, 1, 0, 1);
2507 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 1, 0, 1, 0, 1);
2508 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 1, 0, 1, 0, 1);
2509 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 1, 0, 1, 0, 1);
2510 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 1, 0, 1, 0, 1);
2512 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 1, 0, 1, 0, 1);
2513 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 1, 0, 1, 0, 1);
2514 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 1, 0, 1, 0, 1);
2515 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 1, 0, 1, 0, 1);
2516 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 1, 0, 1, 0, 1);
2517 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 1, 0, 1, 0, 1);
2518 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 1, 0, 1, 0, 1);
2519 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 1, 0, 1, 0, 1);
2521 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 1, 0, 1, 0, 0);
2522 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 1, 0, 1, 0, 0);
2523 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 1, 0, 1, 0, 0);
2524 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 1, 0, 1, 0, 0);
2525 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 1, 0, 1, 0, 0);
2526 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 1, 0, 1, 0, 0);
2527 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 1, 0, 1, 0, 0);
2528 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 1, 0, 1, 0, 0);
2530 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 1, 0, 1, 0, 0);
2531 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 1, 0, 1, 0, 0);
2532 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 1, 0, 1, 0, 0);
2533 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 1, 0, 1, 0, 0);
2534 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 1, 0, 1, 0, 0);
2535 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 1, 0, 1, 0, 0);
2536 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 1, 0, 1, 0, 0);
2537 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 1, 0, 1, 0, 0);
2544 NAMD_die("CudaComputeNonbondedKernel::nonbondedForce, none of the kernels called");
2550 #define CALL(DOENERGY, DOVIRIAL, DOSLOW, DOPAIRLIST, DOALCH, DOFEP, DOTI, DOSTREAMING, DOTABLE, DOALCHWDWFORCESWITCHING, DONBTHOLE) \
2551 nonbondedForceKernel<DOENERGY, DOVIRIAL, DOSLOW, DOPAIRLIST, DOALCH, DOFEP, DOTI, DOSTREAMING, DOTABLE, DOALCHWDWFORCESWITCHING, DONBTHOLE> \
2552 <<< nblock, nthread, shMemSize, stream >>> \
2553 (start, tlKernel.getNumTileLists(), tlKernel.getTileLists(), tlKernel.getTileExcls(), tlKernel.getTileJatomStart(), \
2554 cudaNonbondedTables.getVdwCoefTableWidth(), cudaNonbondedTables.getVdwCoefTable(), \
2555 vdwTypes, lata, latb, latc, tlKernel.get_xyzq(), cutoff2, nbConstants, \
2556 cudaNonbondedTables.getVdwCoefTableTex(), cudaNonbondedTables.getForceTableTex(), cudaNonbondedTables.getEnergyTableTex(), \
2557 atomStorageSize, tlKernel.get_plcutoff2(), tlKernel.getPatchPairs(), atomIndex, exclIndexMaxDiff, overflowExclusions, \
2558 tlKernel.getTileListDepth(), tlKernel.getTileListOrder(), tlKernel.getJtiles(), tlKernel.getTileListStatDevPtr(), \
2559 tlKernel.getBoundingBoxes(), d_forces, d_forcesSlow, \
2560 force_x, force_y, force_z, force_w, \
2561 forceSlow_x, forceSlow_y, forceSlow_z, forceSlow_w, \
2562 numPatches, patchNumCountPtr, tlKernel.getCudaPatches(), m_forces, m_forcesSlow, m_patchReadyQueue, \
2563 outputOrderPtr, tlKernel.getTileListVirialEnergy(), tlKernel.get_part(), \
2564 cudaNonbondedTables.getDrudeNbTholeTijTableTex(), cudaNonbondedTables.getDrudeNbTholeTijTable(), \
2565 isDrude, drudeAtomAlpha, drudeNbtholeCut2)
2573 case 0: CALL(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); break;
2574 case 1: CALL(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); break;
2575 case 2: CALL(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0); break;
2576 case 3: CALL(1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0); break;
2577 case 4: CALL(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0); break;
2578 case 5: CALL(1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0); break;
2579 case 6: CALL(0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0); break;
2580 case 7: CALL(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0); break;
2581 case 8: CALL(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); break;
2582 case 9: CALL(1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); break;
2583 case 10: CALL(0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0); break;
2584 case 11: CALL(1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0); break;
2585 case 12: CALL(0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0); break;
2586 case 13: CALL(1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0); break;
2587 case 14: CALL(0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0); break;
2588 case 15: CALL(1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0); break;
2591 case 16: CALL(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0); break;
2592 case 17: CALL(1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0); break;
2593 case 18: CALL(0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0); break;
2594 case 19: CALL(1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0); break;
2595 case 20: CALL(0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0); break;
2596 case 21: CALL(1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0); break;
2597 case 22: CALL(0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0); break;
2598 case 23: CALL(1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0); break;
2599 case 24: CALL(0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0); break;
2600 case 25: CALL(1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0); break;
2601 case 26: CALL(0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0); break;
2602 case 27: CALL(1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0); break;
2603 case 28: CALL(0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0); break;
2604 case 29: CALL(1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0); break;
2605 case 30: CALL(0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0); break;
2606 case 31: CALL(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0); break;
2608 case 32: CALL(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0); break;
2609 case 33: CALL(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0); break;
2610 case 34: CALL(0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0); break;
2611 case 35: CALL(1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0); break;
2612 case 36: CALL(0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0); break;
2613 case 37: CALL(1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0); break;
2614 case 38: CALL(0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0); break;
2615 case 39: CALL(1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0); break;
2616 case 40: CALL(0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0); break;
2617 case 41: CALL(1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0); break;
2618 case 42: CALL(0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0); break;
2619 case 43: CALL(1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0); break;
2620 case 44: CALL(0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0); break;
2621 case 45: CALL(1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0); break;
2622 case 46: CALL(0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0); break;
2623 case 47: CALL(1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0); break;
2625 case 48: CALL(0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0); break;
2626 case 49: CALL(1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0); break;
2627 case 50: CALL(0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0); break;
2628 case 51: CALL(1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0); break;
2629 case 52: CALL(0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0); break;
2630 case 53: CALL(1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0); break;
2631 case 54: CALL(0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0); break;
2632 case 55: CALL(1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0); break;
2633 case 56: CALL(0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0); break;
2634 case 57: CALL(1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0); break;
2635 case 58: CALL(0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0); break;
2636 case 59: CALL(1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0); break;
2637 case 60: CALL(0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0); break;
2638 case 61: CALL(1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0); break;
2639 case 62: CALL(0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0); break;
2640 case 63: CALL(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0); break;
2642 case 64: CALL(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0); break;
2643 case 65: CALL(1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0); break;
2644 case 66: CALL(0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0); break;
2645 case 67: CALL(1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0); break;
2646 case 68: CALL(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0); break;
2647 case 69: CALL(1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0); break;
2648 case 70: CALL(0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0); break;
2649 case 71: CALL(1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0); break;
2650 case 72: CALL(0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0); break;
2651 case 73: CALL(1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0); break;
2652 case 74: CALL(0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0); break;
2653 case 75: CALL(1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0); break;
2654 case 76: CALL(0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0); break;
2655 case 77: CALL(1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0); break;
2656 case 78: CALL(0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0); break;
2657 case 79: CALL(1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0); break;
2659 case 80: CALL(0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0); break;
2660 case 81: CALL(1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0); break;
2661 case 82: CALL(0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0); break;
2662 case 83: CALL(1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0); break;
2663 case 84: CALL(0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0); break;
2664 case 85: CALL(1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0); break;
2665 case 86: CALL(0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0); break;
2666 case 87: CALL(1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0); break;
2667 case 88: CALL(0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0); break;
2668 case 89: CALL(1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0); break;
2669 case 90: CALL(0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0); break;
2670 case 91: CALL(1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0); break;
2671 case 92: CALL(0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0); break;
2672 case 93: CALL(1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0); break;
2673 case 94: CALL(0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0); break;
2674 case 95: CALL(1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0); break;
2676 case 96: CALL(0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0); break;
2677 case 97: CALL(1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0); break;
2678 case 98: CALL(0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0); break;
2679 case 99: CALL(1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0); break;
2680 case 100: CALL(0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0); break;
2681 case 101: CALL(1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0); break;
2682 case 102: CALL(0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0); break;
2683 case 103: CALL(1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0); break;
2684 case 104: CALL(0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0); break;
2685 case 105: CALL(1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0); break;
2686 case 106: CALL(0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0); break;
2687 case 107: CALL(1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0); break;
2688 case 108: CALL(0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0); break;
2689 case 109: CALL(1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0); break;
2690 case 110: CALL(0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0); break;
2691 case 111: CALL(1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0); break;
2693 case 112: CALL(0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0); break;
2694 case 113: CALL(1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0); break;
2695 case 114: CALL(0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0); break;
2696 case 115: CALL(1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0); break;
2697 case 116: CALL(0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0); break;
2698 case 117: CALL(1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0); break;
2699 case 118: CALL(0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0); break;
2700 case 119: CALL(1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0); break;
2701 case 120: CALL(0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0); break;
2702 case 121: CALL(1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0); break;
2703 case 122: CALL(0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0); break;
2704 case 123: CALL(1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0); break;
2705 case 124: CALL(0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0); break;
2706 case 125: CALL(1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0); break;
2707 case 126: CALL(0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); break;
2708 case 127: CALL(1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); break;
2711 case 128: CALL(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0); break;
2712 case 129: CALL(1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0); break;
2713 case 130: CALL(0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0); break;
2714 case 131: CALL(1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0); break;
2715 case 132: CALL(0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0); break;
2716 case 133: CALL(1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0); break;
2717 case 134: CALL(0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0); break;
2718 case 135: CALL(1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0); break;
2719 case 136: CALL(0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0); break;
2720 case 137: CALL(1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0); break;
2721 case 138: CALL(0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0); break;
2722 case 139: CALL(1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0); break;
2723 case 140: CALL(0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0); break;
2724 case 141: CALL(1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0); break;
2725 case 142: CALL(0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0); break;
2726 case 143: CALL(1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0); break;
2729 case 144: CALL(0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0); break;
2730 case 145: CALL(1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0); break;
2731 case 146: CALL(0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0); break;
2732 case 147: CALL(1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0); break;
2733 case 148: CALL(0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0); break;
2734 case 149: CALL(1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0); break;
2735 case 150: CALL(0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0); break;
2736 case 151: CALL(1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0); break;
2737 case 152: CALL(0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0); break;
2738 case 153: CALL(1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0); break;
2739 case 154: CALL(0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0); break;
2740 case 155: CALL(1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0); break;
2741 case 156: CALL(0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0); break;
2742 case 157: CALL(1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0); break;
2743 case 158: CALL(0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0); break;
2744 case 159: CALL(1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0); break;
2746 case 160: CALL(0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0); break;
2747 case 161: CALL(1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0); break;
2748 case 162: CALL(0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0); break;
2749 case 163: CALL(1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0); break;
2750 case 164: CALL(0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0); break;
2751 case 165: CALL(1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0); break;
2752 case 166: CALL(0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0); break;
2753 case 167: CALL(1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0); break;
2754 case 168: CALL(0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0); break;
2755 case 169: CALL(1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0); break;
2756 case 170: CALL(0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0); break;
2757 case 171: CALL(1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0); break;
2758 case 172: CALL(0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0); break;
2759 case 173: CALL(1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0); break;
2760 case 174: CALL(0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0); break;
2761 case 175: CALL(1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0); break;
2763 case 176: CALL(0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0); break;
2764 case 177: CALL(1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0); break;
2765 case 178: CALL(0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0); break;
2766 case 179: CALL(1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0); break;
2767 case 180: CALL(0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0); break;
2768 case 181: CALL(1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0); break;
2769 case 182: CALL(0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0); break;
2770 case 183: CALL(1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0); break;
2771 case 184: CALL(0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0); break;
2772 case 185: CALL(1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0); break;
2773 case 186: CALL(0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0); break;
2774 case 187: CALL(1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0); break;
2775 case 188: CALL(0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0); break;
2776 case 189: CALL(1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0); break;
2777 case 190: CALL(0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0); break;
2778 case 191: CALL(1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0); break;
2780 case 192: CALL(0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0); break;
2781 case 193: CALL(1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0); break;
2782 case 194: CALL(0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0); break;
2783 case 195: CALL(1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0); break;
2784 case 196: CALL(0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0); break;
2785 case 197: CALL(1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0); break;
2786 case 198: CALL(0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0); break;
2787 case 199: CALL(1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0); break;
2788 case 200: CALL(0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0); break;
2789 case 201: CALL(1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0); break;
2790 case 202: CALL(0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0); break;
2791 case 203: CALL(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0); break;
2792 case 204: CALL(0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0); break;
2793 case 205: CALL(1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0); break;
2794 case 206: CALL(0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0); break;
2795 case 207: CALL(1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0); break;
2797 case 208: CALL(0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0); break;
2798 case 209: CALL(1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0); break;
2799 case 210: CALL(0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0); break;
2800 case 211: CALL(1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0); break;
2801 case 212: CALL(0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0); break;
2802 case 213: CALL(1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0); break;
2803 case 214: CALL(0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0); break;
2804 case 215: CALL(1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0); break;
2805 case 216: CALL(0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0); break;
2806 case 217: CALL(1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0); break;
2807 case 218: CALL(0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0); break;
2808 case 219: CALL(1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0); break;
2809 case 220: CALL(0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0); break;
2810 case 221: CALL(1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0); break;
2811 case 222: CALL(0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0); break;
2812 case 223: CALL(1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0); break;
2814 case 224: CALL(0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0); break;
2815 case 225: CALL(1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0); break;
2816 case 226: CALL(0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0); break;
2817 case 227: CALL(1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0); break;
2818 case 228: CALL(0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0); break;
2819 case 229: CALL(1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0); break;
2820 case 230: CALL(0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0); break;
2821 case 231: CALL(1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0); break;
2822 case 232: CALL(0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0); break;
2823 case 233: CALL(1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0); break;
2824 case 234: CALL(0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0); break;
2825 case 235: CALL(1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0); break;
2826 case 236: CALL(0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0); break;
2827 case 237: CALL(1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0); break;
2828 case 238: CALL(0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0); break;
2829 case 239: CALL(1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0); break;
2831 case 240: CALL(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0); break;
2832 case 241: CALL(1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0); break;
2833 case 242: CALL(0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0); break;
2834 case 243: CALL(1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0); break;
2835 case 244: CALL(0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0); break;
2836 case 245: CALL(1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0); break;
2837 case 246: CALL(0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0); break;
2838 case 247: CALL(1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0); break;
2839 case 248: CALL(0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0); break;
2840 case 249: CALL(1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0); break;
2841 case 250: CALL(0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0); break;
2842 case 251: CALL(1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0); break;
2843 case 252: CALL(0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0); break;
2844 case 253: CALL(1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0); break;
2845 case 254: CALL(0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0); break;
2846 case 255: CALL(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0); break;
2849 case 256: CALL(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0); break;
2850 case 257: CALL(1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0); break;
2851 case 258: CALL(0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0); break;
2852 case 259: CALL(1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0); break;
2853 case 260: CALL(0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0); break;
2854 case 261: CALL(1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0); break;
2855 case 262: CALL(0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0); break;
2856 case 263: CALL(1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0); break;
2857 case 264: CALL(0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0); break;
2858 case 265: CALL(1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0); break;
2859 case 266: CALL(0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0); break;
2860 case 267: CALL(1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0); break;
2861 case 268: CALL(0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0); break;
2862 case 269: CALL(1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0); break;
2863 case 270: CALL(0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0); break;
2864 case 271: CALL(1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0); break;
2867 case 272: CALL(0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0); break;
2868 case 273: CALL(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0); break;
2869 case 274: CALL(0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0); break;
2870 case 275: CALL(1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0); break;
2871 case 276: CALL(0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0); break;
2872 case 277: CALL(1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0); break;
2873 case 278: CALL(0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0); break;
2874 case 279: CALL(1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0); break;
2875 case 280: CALL(0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0); break;
2876 case 281: CALL(1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0); break;
2877 case 282: CALL(0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0); break;
2878 case 283: CALL(1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0); break;
2879 case 284: CALL(0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0); break;
2880 case 285: CALL(1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0); break;
2881 case 286: CALL(0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0); break;
2882 case 287: CALL(1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0); break;
2884 case 288: CALL(0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0); break;
2885 case 289: CALL(1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0); break;
2886 case 290: CALL(0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0); break;
2887 case 291: CALL(1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0); break;
2888 case 292: CALL(0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0); break;
2889 case 293: CALL(1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0); break;
2890 case 294: CALL(0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0); break;
2891 case 295: CALL(1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0); break;
2892 case 296: CALL(0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0); break;
2893 case 297: CALL(1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0); break;
2894 case 298: CALL(0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0); break;
2895 case 299: CALL(1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0); break;
2896 case 300: CALL(0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0); break;
2897 case 301: CALL(1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0); break;
2898 case 302: CALL(0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0); break;
2899 case 303: CALL(1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0); break;
2902 case 304: CALL(0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0); break;
2903 case 305: CALL(1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0); break;
2904 case 306: CALL(0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0); break;
2905 case 307: CALL(1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0); break;
2906 case 308: CALL(0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0); break;
2907 case 309: CALL(1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0); break;
2908 case 310: CALL(0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0); break;
2909 case 311: CALL(1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0); break;
2910 case 312: CALL(0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0); break;
2911 case 313: CALL(1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0); break;
2912 case 314: CALL(0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0); break;
2913 case 315: CALL(1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0); break;
2914 case 316: CALL(0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0); break;
2915 case 317: CALL(1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0); break;
2916 case 318: CALL(0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0); break;
2917 case 319: CALL(1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0); break;
2920 case 320: CALL(0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0); break;
2921 case 321: CALL(1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0); break;
2922 case 322: CALL(0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0); break;
2923 case 323: CALL(1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0); break;
2924 case 324: CALL(0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0); break;
2925 case 325: CALL(1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0); break;
2926 case 326: CALL(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0); break;
2927 case 327: CALL(1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0); break;
2928 case 328: CALL(0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0); break;
2929 case 329: CALL(1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0); break;
2930 case 330: CALL(0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0); break;
2931 case 331: CALL(1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0); break;
2932 case 332: CALL(0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0); break;
2933 case 333: CALL(1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0); break;
2934 case 334: CALL(0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0); break;
2935 case 335: CALL(1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0); break;
2938 case 336: CALL(0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0); break;
2939 case 337: CALL(1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0); break;
2940 case 338: CALL(0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0); break;
2941 case 339: CALL(1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0); break;
2942 case 340: CALL(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0); break;
2943 case 341: CALL(1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0); break;
2944 case 342: CALL(0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0); break;
2945 case 343: CALL(1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0); break;
2946 case 344: CALL(0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0); break;
2947 case 345: CALL(1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0); break;
2948 case 346: CALL(0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0); break;
2949 case 347: CALL(1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0); break;
2950 case 348: CALL(0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0); break;
2951 case 349: CALL(1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0); break;
2952 case 350: CALL(0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0); break;
2953 case 351: CALL(1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0); break;
2956 case 352: CALL(0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0); break;
2957 case 353: CALL(1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0); break;
2958 case 354: CALL(0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0); break;
2959 case 355: CALL(1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0); break;
2960 case 356: CALL(0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0); break;
2961 case 357: CALL(1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0); break;
2962 case 358: CALL(0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0); break;
2963 case 359: CALL(1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0); break;
2964 case 360: CALL(0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0); break;
2965 case 361: CALL(1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0); break;
2966 case 362: CALL(0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0); break;
2967 case 363: CALL(1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0); break;
2968 case 364: CALL(0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0); break;
2969 case 365: CALL(1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0); break;
2970 case 366: CALL(0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0); break;
2971 case 367: CALL(1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0); break;
2973 case 368: CALL(0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0); break;
2974 case 369: CALL(1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0); break;
2975 case 370: CALL(0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0); break;
2976 case 371: CALL(1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0); break;
2977 case 372: CALL(0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0); break;
2978 case 373: CALL(1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0); break;
2979 case 374: CALL(0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0); break;
2980 case 375: CALL(1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0); break;
2981 case 376: CALL(0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0); break;
2982 case 377: CALL(1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0); break;
2983 case 378: CALL(0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0); break;
2984 case 379: CALL(1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0); break;
2985 case 380: CALL(0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0); break;
2986 case 381: CALL(1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0); break;
2987 case 382: CALL(0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0); break;
2988 case 383: CALL(1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0); break;
2991 case 384: CALL(0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0); break;
2992 case 385: CALL(1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0); break;
2993 case 386: CALL(0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0); break;
2994 case 387: CALL(1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0); break;
2995 case 388: CALL(0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0); break;
2996 case 389: CALL(1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0); break;
2997 case 390: CALL(0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0); break;
2998 case 391: CALL(1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0); break;
2999 case 392: CALL(0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0); break;
3000 case 393: CALL(1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0); break;
3001 case 394: CALL(0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0); break;
3002 case 395: CALL(1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0); break;
3003 case 396: CALL(0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0); break;
3004 case 397: CALL(1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0); break;
3005 case 398: CALL(0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0); break;
3006 case 399: CALL(1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0); break;
3009 case 400: CALL(0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0); break;
3010 case 401: CALL(1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0); break;
3011 case 402: CALL(0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0); break;
3012 case 403: CALL(1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0); break;
3013 case 404: CALL(0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0); break;
3014 case 405: CALL(1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0); break;
3015 case 406: CALL(0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0); break;
3016 case 407: CALL(1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0); break;
3017 case 408: CALL(0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0); break;
3018 case 409: CALL(1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0); break;
3019 case 410: CALL(0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0); break;
3020 case 411: CALL(1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0); break;
3021 case 412: CALL(0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0); break;
3022 case 413: CALL(1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0); break;
3023 case 414: CALL(0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0); break;
3024 case 415: CALL(1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0); break;
3026 case 416: CALL(0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0); break;
3027 case 417: CALL(1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0); break;
3028 case 418: CALL(0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0); break;
3029 case 419: CALL(1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0); break;
3030 case 420: CALL(0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0); break;
3031 case 421: CALL(1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0); break;
3032 case 422: CALL(0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0); break;
3033 case 423: CALL(1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0); break;
3034 case 424: CALL(0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0); break;
3035 case 425: CALL(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0); break;
3036 case 426: CALL(0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0); break;
3037 case 427: CALL(1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0); break;
3038 case 428: CALL(0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0); break;
3039 case 429: CALL(1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0); break;
3040 case 430: CALL(0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0); break;
3041 case 431: CALL(1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0); break;
3044 case 432: CALL(0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0); break;
3045 case 433: CALL(1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0); break;
3046 case 434: CALL(0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0); break;
3047 case 435: CALL(1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0); break;
3048 case 436: CALL(0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0); break;
3049 case 437: CALL(1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0); break;
3050 case 438: CALL(0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0); break;
3051 case 439: CALL(1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0); break;
3052 case 440: CALL(0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0); break;
3053 case 441: CALL(1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0); break;
3054 case 442: CALL(0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0); break;
3055 case 443: CALL(1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0); break;
3056 case 444: CALL(0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0); break;
3057 case 445: CALL(1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0); break;
3058 case 446: CALL(0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0); break;
3059 case 447: CALL(1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0); break;
3062 case 448: CALL(0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0); break;
3063 case 449: CALL(1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0); break;
3064 case 450: CALL(0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0); break;
3065 case 451: CALL(1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0); break;
3066 case 452: CALL(0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0); break;
3067 case 453: CALL(1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0); break;
3068 case 454: CALL(0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0); break;
3069 case 455: CALL(1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0); break;
3070 case 456: CALL(0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0); break;
3071 case 457: CALL(1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0); break;
3072 case 458: CALL(0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0); break;
3073 case 459: CALL(1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0); break;
3074 case 460: CALL(0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0); break;
3075 case 461: CALL(1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0); break;
3076 case 462: CALL(0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0); break;
3077 case 463: CALL(1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0); break;
3080 case 464: CALL(0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0); break;
3081 case 465: CALL(1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0); break;
3082 case 466: CALL(0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0); break;
3083 case 467: CALL(1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0); break;
3084 case 468: CALL(0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0); break;
3085 case 469: CALL(1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0); break;
3086 case 470: CALL(0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0); break;
3087 case 471: CALL(1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0); break;
3088 case 472: CALL(0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0); break;
3089 case 473: CALL(1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0); break;
3090 case 474: CALL(0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0); break;
3091 case 475: CALL(1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0); break;
3092 case 476: CALL(0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0); break;
3093 case 477: CALL(1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0); break;
3094 case 478: CALL(0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0); break;
3095 case 479: CALL(1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0); break;
3098 case 480: CALL(0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0); break;
3099 case 481: CALL(1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0); break;
3100 case 482: CALL(0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0); break;
3101 case 483: CALL(1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0); break;
3102 case 484: CALL(0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0); break;
3103 case 485: CALL(1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0); break;
3104 case 486: CALL(0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0); break;
3105 case 487: CALL(1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0); break;
3106 case 488: CALL(0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0); break;
3107 case 489: CALL(1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0); break;
3108 case 490: CALL(0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0); break;
3109 case 491: CALL(1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0); break;
3110 case 492: CALL(0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0); break;
3111 case 493: CALL(1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0); break;
3112 case 494: CALL(0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0); break;
3113 case 495: CALL(1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0); break;
3115 case 496: CALL(0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0); break;
3116 case 497: CALL(1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0); break;
3117 case 498: CALL(0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0); break;
3118 case 499: CALL(1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0); break;
3119 case 500: CALL(0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0); break;
3120 case 501: CALL(1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0); break;
3121 case 502: CALL(0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0); break;
3122 case 503: CALL(1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0); break;
3123 case 504: CALL(0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0); break;
3124 case 505: CALL(1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0); break;
3125 case 506: CALL(0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0); break;
3126 case 507: CALL(1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0); break;
3127 case 508: CALL(0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0); break;
3128 case 509: CALL(1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0); break;
3129 case 510: CALL(0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0); break;
3130 case 511: CALL(1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0); break;
3133 * Haochuan: the calls starting from 512 to 2047 were generated by the following python script
3136 #!/usr/bin/env python3
3137 def gen_call(option: int):
3138 doEnergy = option & 1
3139 doVirial = (option >> 1) & 1
3140 doSlow = (option >> 2) & 1
3141 doPairlist = (option >> 3) & 1
3142 doAlch = (option >> 4) & 1
3143 doFEP = (option >> 5) & 1
3144 doTI = (option >> 6) & 1
3145 doStreaming = (option >> 7) & 1
3146 doTable = (option >> 8) & 1
3147 doAlchVdwForceSwitching = (option >> 9) & 1
3148 doNbThole = (option >> 10) & 1
3149 incompatible = False
3150 incompatible = incompatible | (doFEP and doTI)
3151 incompatible = incompatible | (doAlch and ((not doFEP) and (not doTI)))
3152 incompatible = incompatible | ((not doAlch) and (doFEP or doTI or doAlchVdwForceSwitching))
3153 incompatible = incompatible | ((not doTable) and (doAlch or doTI or doFEP or doAlchVdwForceSwitching))
3154 incompatible = incompatible | (doAlch and doNbThole)
3155 incompatible = incompatible | (doFEP and doNbThole)
3156 incompatible = incompatible | (doTI and doNbThole)
3157 incompatible = incompatible | (doAlchVdwForceSwitching and doNbThole)
3160 print(f' // case {option}: CALL({doEnergy}, {doVirial}, {doSlow}, {doPairlist}, {doAlch}, {doFEP}, {doTI}, {doStreaming}, {doTable}, {doAlchVdwForceSwitching}, {doNbThole}); break;')
3162 print(f' case {option}: CALL({doEnergy}, {doVirial}, {doSlow}, {doPairlist}, {doAlch}, {doFEP}, {doTI}, {doStreaming}, {doTable}, {doAlchVdwForceSwitching}, {doNbThole}); break;')
3166 for i in range(512, 2048):
3171 // case 512: CALL(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0); break;
3172 // case 513: CALL(1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0); break;
3173 // case 514: CALL(0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0); break;
3174 // case 515: CALL(1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0); break;
3175 // case 516: CALL(0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0); break;
3176 // case 517: CALL(1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0); break;
3177 // case 518: CALL(0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0); break;
3178 // case 519: CALL(1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0); break;
3179 // case 520: CALL(0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0); break;
3180 // case 521: CALL(1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0); break;
3181 // case 522: CALL(0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0); break;
3182 // case 523: CALL(1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0); break;
3183 // case 524: CALL(0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0); break;
3184 // case 525: CALL(1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0); break;
3185 // case 526: CALL(0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0); break;
3186 // case 527: CALL(1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0); break;
3187 // case 528: CALL(0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0); break;
3188 // case 529: CALL(1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0); break;
3189 // case 530: CALL(0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0); break;
3190 // case 531: CALL(1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0); break;
3191 // case 532: CALL(0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0); break;
3192 // case 533: CALL(1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0); break;
3193 // case 534: CALL(0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0); break;
3194 // case 535: CALL(1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0); break;
3195 // case 536: CALL(0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0); break;
3196 // case 537: CALL(1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0); break;
3197 // case 538: CALL(0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0); break;
3198 // case 539: CALL(1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0); break;
3199 // case 540: CALL(0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0); break;
3200 // case 541: CALL(1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0); break;
3201 // case 542: CALL(0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0); break;
3202 // case 543: CALL(1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0); break;
3203 // case 544: CALL(0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0); break;
3204 // case 545: CALL(1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0); break;
3205 // case 546: CALL(0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0); break;
3206 // case 547: CALL(1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0); break;
3207 // case 548: CALL(0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0); break;
3208 // case 549: CALL(1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0); break;
3209 // case 550: CALL(0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0); break;
3210 // case 551: CALL(1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0); break;
3211 // case 552: CALL(0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0); break;
3212 // case 553: CALL(1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0); break;
3213 // case 554: CALL(0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0); break;
3214 // case 555: CALL(1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0); break;
3215 // case 556: CALL(0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0); break;
3216 // case 557: CALL(1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0); break;
3217 // case 558: CALL(0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0); break;
3218 // case 559: CALL(1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0); break;
3219 // case 560: CALL(0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0); break;
3220 // case 561: CALL(1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0); break;
3221 // case 562: CALL(0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0); break;
3222 // case 563: CALL(1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0); break;
3223 // case 564: CALL(0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0); break;
3224 // case 565: CALL(1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0); break;
3225 // case 566: CALL(0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0); break;
3226 // case 567: CALL(1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0); break;
3227 // case 568: CALL(0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0); break;
3228 // case 569: CALL(1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0); break;
3229 // case 570: CALL(0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0); break;
3230 // case 571: CALL(1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0); break;
3231 // case 572: CALL(0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0); break;
3232 // case 573: CALL(1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0); break;
3233 // case 574: CALL(0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0); break;
3234 // case 575: CALL(1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0); break;
3235 // case 576: CALL(0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0); break;
3236 // case 577: CALL(1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0); break;
3237 // case 578: CALL(0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0); break;
3238 // case 579: CALL(1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0); break;
3239 // case 580: CALL(0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0); break;
3240 // case 581: CALL(1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0); break;
3241 // case 582: CALL(0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0); break;
3242 // case 583: CALL(1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0); break;
3243 // case 584: CALL(0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0); break;
3244 // case 585: CALL(1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0); break;
3245 // case 586: CALL(0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0); break;
3246 // case 587: CALL(1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0); break;
3247 // case 588: CALL(0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0); break;
3248 // case 589: CALL(1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0); break;
3249 // case 590: CALL(0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0); break;
3250 // case 591: CALL(1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0); break;
3251 // case 592: CALL(0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0); break;
3252 // case 593: CALL(1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0); break;
3253 // case 594: CALL(0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0); break;
3254 // case 595: CALL(1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0); break;
3255 // case 596: CALL(0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0); break;
3256 // case 597: CALL(1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0); break;
3257 // case 598: CALL(0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0); break;
3258 // case 599: CALL(1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0); break;
3259 // case 600: CALL(0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0); break;
3260 // case 601: CALL(1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0); break;
3261 // case 602: CALL(0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0); break;
3262 // case 603: CALL(1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0); break;
3263 // case 604: CALL(0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0); break;
3264 // case 605: CALL(1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0); break;
3265 // case 606: CALL(0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0); break;
3266 // case 607: CALL(1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0); break;
3267 // case 608: CALL(0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0); break;
3268 // case 609: CALL(1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0); break;
3269 // case 610: CALL(0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0); break;
3270 // case 611: CALL(1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0); break;
3271 // case 612: CALL(0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0); break;
3272 // case 613: CALL(1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0); break;
3273 // case 614: CALL(0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0); break;
3274 // case 615: CALL(1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0); break;
3275 // case 616: CALL(0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0); break;
3276 // case 617: CALL(1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0); break;
3277 // case 618: CALL(0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0); break;
3278 // case 619: CALL(1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0); break;
3279 // case 620: CALL(0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0); break;
3280 // case 621: CALL(1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0); break;
3281 // case 622: CALL(0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0); break;
3282 // case 623: CALL(1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0); break;
3283 // case 624: CALL(0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0); break;
3284 // case 625: CALL(1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0); break;
3285 // case 626: CALL(0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0); break;
3286 // case 627: CALL(1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0); break;
3287 // case 628: CALL(0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0); break;
3288 // case 629: CALL(1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0); break;
3289 // case 630: CALL(0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0); break;
3290 // case 631: CALL(1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0); break;
3291 // case 632: CALL(0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0); break;
3292 // case 633: CALL(1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0); break;
3293 // case 634: CALL(0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0); break;
3294 // case 635: CALL(1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0); break;
3295 // case 636: CALL(0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0); break;
3296 // case 637: CALL(1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0); break;
3297 // case 638: CALL(0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0); break;
3298 // case 639: CALL(1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0); break;
3299 // case 640: CALL(0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0); break;
3300 // case 641: CALL(1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0); break;
3301 // case 642: CALL(0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0); break;
3302 // case 643: CALL(1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0); break;
3303 // case 644: CALL(0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0); break;
3304 // case 645: CALL(1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0); break;
3305 // case 646: CALL(0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0); break;
3306 // case 647: CALL(1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0); break;
3307 // case 648: CALL(0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0); break;
3308 // case 649: CALL(1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0); break;
3309 // case 650: CALL(0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0); break;
3310 // case 651: CALL(1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0); break;
3311 // case 652: CALL(0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0); break;
3312 // case 653: CALL(1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0); break;
3313 // case 654: CALL(0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0); break;
3314 // case 655: CALL(1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0); break;
3315 // case 656: CALL(0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0); break;
3316 // case 657: CALL(1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0); break;
3317 // case 658: CALL(0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0); break;
3318 // case 659: CALL(1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0); break;
3319 // case 660: CALL(0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0); break;
3320 // case 661: CALL(1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0); break;
3321 // case 662: CALL(0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0); break;
3322 // case 663: CALL(1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0); break;
3323 // case 664: CALL(0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0); break;
3324 // case 665: CALL(1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0); break;
3325 // case 666: CALL(0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0); break;
3326 // case 667: CALL(1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0); break;
3327 // case 668: CALL(0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0); break;
3328 // case 669: CALL(1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0); break;
3329 // case 670: CALL(0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0); break;
3330 // case 671: CALL(1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0); break;
3331 // case 672: CALL(0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0); break;
3332 // case 673: CALL(1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0); break;
3333 // case 674: CALL(0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0); break;
3334 // case 675: CALL(1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0); break;
3335 // case 676: CALL(0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0); break;
3336 // case 677: CALL(1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0); break;
3337 // case 678: CALL(0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0); break;
3338 // case 679: CALL(1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0); break;
3339 // case 680: CALL(0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0); break;
3340 // case 681: CALL(1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0); break;
3341 // case 682: CALL(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0); break;
3342 // case 683: CALL(1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0); break;
3343 // case 684: CALL(0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0); break;
3344 // case 685: CALL(1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0); break;
3345 // case 686: CALL(0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0); break;
3346 // case 687: CALL(1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0); break;
3347 // case 688: CALL(0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0); break;
3348 // case 689: CALL(1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0); break;
3349 // case 690: CALL(0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0); break;
3350 // case 691: CALL(1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0); break;
3351 // case 692: CALL(0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0); break;
3352 // case 693: CALL(1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0); break;
3353 // case 694: CALL(0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0); break;
3354 // case 695: CALL(1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0); break;
3355 // case 696: CALL(0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0); break;
3356 // case 697: CALL(1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0); break;
3357 // case 698: CALL(0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0); break;
3358 // case 699: CALL(1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0); break;
3359 // case 700: CALL(0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0); break;
3360 // case 701: CALL(1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0); break;
3361 // case 702: CALL(0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0); break;
3362 // case 703: CALL(1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0); break;
3363 // case 704: CALL(0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0); break;
3364 // case 705: CALL(1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0); break;
3365 // case 706: CALL(0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0); break;
3366 // case 707: CALL(1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0); break;
3367 // case 708: CALL(0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0); break;
3368 // case 709: CALL(1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0); break;
3369 // case 710: CALL(0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0); break;
3370 // case 711: CALL(1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0); break;
3371 // case 712: CALL(0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0); break;
3372 // case 713: CALL(1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0); break;
3373 // case 714: CALL(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0); break;
3374 // case 715: CALL(1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0); break;
3375 // case 716: CALL(0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0); break;
3376 // case 717: CALL(1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0); break;
3377 // case 718: CALL(0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0); break;
3378 // case 719: CALL(1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0); break;
3379 // case 720: CALL(0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0); break;
3380 // case 721: CALL(1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0); break;
3381 // case 722: CALL(0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0); break;
3382 // case 723: CALL(1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0); break;
3383 // case 724: CALL(0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0); break;
3384 // case 725: CALL(1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0); break;
3385 // case 726: CALL(0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0); break;
3386 // case 727: CALL(1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0); break;
3387 // case 728: CALL(0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0); break;
3388 // case 729: CALL(1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0); break;
3389 // case 730: CALL(0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0); break;
3390 // case 731: CALL(1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0); break;
3391 // case 732: CALL(0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0); break;
3392 // case 733: CALL(1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0); break;
3393 // case 734: CALL(0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0); break;
3394 // case 735: CALL(1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0); break;
3395 // case 736: CALL(0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0); break;
3396 // case 737: CALL(1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0); break;
3397 // case 738: CALL(0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0); break;
3398 // case 739: CALL(1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0); break;
3399 // case 740: CALL(0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0); break;
3400 // case 741: CALL(1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0); break;
3401 // case 742: CALL(0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0); break;
3402 // case 743: CALL(1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0); break;
3403 // case 744: CALL(0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0); break;
3404 // case 745: CALL(1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0); break;
3405 // case 746: CALL(0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0); break;
3406 // case 747: CALL(1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0); break;
3407 // case 748: CALL(0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0); break;
3408 // case 749: CALL(1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0); break;
3409 // case 750: CALL(0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0); break;
3410 // case 751: CALL(1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0); break;
3411 // case 752: CALL(0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0); break;
3412 // case 753: CALL(1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0); break;
3413 // case 754: CALL(0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0); break;
3414 // case 755: CALL(1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0); break;
3415 // case 756: CALL(0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0); break;
3416 // case 757: CALL(1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0); break;
3417 // case 758: CALL(0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0); break;
3418 // case 759: CALL(1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0); break;
3419 // case 760: CALL(0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0); break;
3420 // case 761: CALL(1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0); break;
3421 // case 762: CALL(0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0); break;
3422 // case 763: CALL(1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0); break;
3423 // case 764: CALL(0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0); break;
3424 // case 765: CALL(1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0); break;
3425 // case 766: CALL(0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0); break;
3426 // case 767: CALL(1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0); break;
3427 // case 768: CALL(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0); break;
3428 // case 769: CALL(1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0); break;
3429 // case 770: CALL(0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0); break;
3430 // case 771: CALL(1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0); break;
3431 // case 772: CALL(0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0); break;
3432 // case 773: CALL(1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0); break;
3433 // case 774: CALL(0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0); break;
3434 // case 775: CALL(1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0); break;
3435 // case 776: CALL(0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0); break;
3436 // case 777: CALL(1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0); break;
3437 // case 778: CALL(0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0); break;
3438 // case 779: CALL(1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0); break;
3439 // case 780: CALL(0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0); break;
3440 // case 781: CALL(1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0); break;
3441 // case 782: CALL(0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0); break;
3442 // case 783: CALL(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0); break;
3443 // case 784: CALL(0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0); break;
3444 // case 785: CALL(1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0); break;
3445 // case 786: CALL(0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0); break;
3446 // case 787: CALL(1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0); break;
3447 // case 788: CALL(0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0); break;
3448 // case 789: CALL(1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0); break;
3449 // case 790: CALL(0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0); break;
3450 // case 791: CALL(1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0); break;
3451 // case 792: CALL(0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0); break;
3452 // case 793: CALL(1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0); break;
3453 // case 794: CALL(0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0); break;
3454 // case 795: CALL(1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0); break;
3455 // case 796: CALL(0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0); break;
3456 // case 797: CALL(1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0); break;
3457 // case 798: CALL(0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0); break;
3458 // case 799: CALL(1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0); break;
3459 // case 800: CALL(0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0); break;
3460 // case 801: CALL(1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0); break;
3461 // case 802: CALL(0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0); break;
3462 // case 803: CALL(1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0); break;
3463 // case 804: CALL(0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0); break;
3464 // case 805: CALL(1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0); break;
3465 // case 806: CALL(0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0); break;
3466 // case 807: CALL(1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0); break;
3467 // case 808: CALL(0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0); break;
3468 // case 809: CALL(1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0); break;
3469 // case 810: CALL(0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0); break;
3470 // case 811: CALL(1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0); break;
3471 // case 812: CALL(0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0); break;
3472 // case 813: CALL(1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0); break;
3473 // case 814: CALL(0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0); break;
3474 // case 815: CALL(1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0); break;
3475 case 816: CALL(0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0); break;
3476 case 817: CALL(1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0); break;
3477 case 818: CALL(0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0); break;
3478 case 819: CALL(1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0); break;
3479 case 820: CALL(0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0); break;
3480 case 821: CALL(1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0); break;
3481 case 822: CALL(0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0); break;
3482 case 823: CALL(1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0); break;
3483 case 824: CALL(0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0); break;
3484 case 825: CALL(1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0); break;
3485 case 826: CALL(0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0); break;
3486 case 827: CALL(1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0); break;
3487 case 828: CALL(0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0); break;
3488 case 829: CALL(1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0); break;
3489 case 830: CALL(0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0); break;
3490 case 831: CALL(1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0); break;
3491 // case 832: CALL(0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0); break;
3492 // case 833: CALL(1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0); break;
3493 // case 834: CALL(0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0); break;
3494 // case 835: CALL(1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0); break;
3495 // case 836: CALL(0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0); break;
3496 // case 837: CALL(1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0); break;
3497 // case 838: CALL(0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0); break;
3498 // case 839: CALL(1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0); break;
3499 // case 840: CALL(0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0); break;
3500 // case 841: CALL(1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0); break;
3501 // case 842: CALL(0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0); break;
3502 // case 843: CALL(1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0); break;
3503 // case 844: CALL(0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0); break;
3504 // case 845: CALL(1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0); break;
3505 // case 846: CALL(0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0); break;
3506 // case 847: CALL(1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0); break;
3507 case 848: CALL(0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0); break;
3508 case 849: CALL(1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0); break;
3509 case 850: CALL(0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0); break;
3510 case 851: CALL(1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0); break;
3511 case 852: CALL(0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0); break;
3512 case 853: CALL(1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0); break;
3513 case 854: CALL(0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0); break;
3514 case 855: CALL(1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0); break;
3515 case 856: CALL(0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0); break;
3516 case 857: CALL(1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0); break;
3517 case 858: CALL(0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0); break;
3518 case 859: CALL(1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0); break;
3519 case 860: CALL(0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0); break;
3520 case 861: CALL(1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0); break;
3521 case 862: CALL(0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0); break;
3522 case 863: CALL(1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0); break;
3523 // case 864: CALL(0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0); break;
3524 // case 865: CALL(1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0); break;
3525 // case 866: CALL(0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0); break;
3526 // case 867: CALL(1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0); break;
3527 // case 868: CALL(0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0); break;
3528 // case 869: CALL(1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0); break;
3529 // case 870: CALL(0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0); break;
3530 // case 871: CALL(1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0); break;
3531 // case 872: CALL(0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0); break;
3532 // case 873: CALL(1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0); break;
3533 // case 874: CALL(0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0); break;
3534 // case 875: CALL(1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0); break;
3535 // case 876: CALL(0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0); break;
3536 // case 877: CALL(1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0); break;
3537 // case 878: CALL(0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0); break;
3538 // case 879: CALL(1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0); break;
3539 // case 880: CALL(0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0); break;
3540 // case 881: CALL(1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0); break;
3541 // case 882: CALL(0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0); break;
3542 // case 883: CALL(1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0); break;
3543 // case 884: CALL(0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0); break;
3544 // case 885: CALL(1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0); break;
3545 // case 886: CALL(0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0); break;
3546 // case 887: CALL(1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0); break;
3547 // case 888: CALL(0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0); break;
3548 // case 889: CALL(1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0); break;
3549 // case 890: CALL(0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0); break;
3550 // case 891: CALL(1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0); break;
3551 // case 892: CALL(0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0); break;
3552 // case 893: CALL(1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0); break;
3553 // case 894: CALL(0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0); break;
3554 // case 895: CALL(1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0); break;
3555 // case 896: CALL(0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0); break;
3556 // case 897: CALL(1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0); break;
3557 // case 898: CALL(0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0); break;
3558 // case 899: CALL(1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0); break;
3559 // case 900: CALL(0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0); break;
3560 // case 901: CALL(1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0); break;
3561 // case 902: CALL(0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0); break;
3562 // case 903: CALL(1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0); break;
3563 // case 904: CALL(0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0); break;
3564 // case 905: CALL(1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0); break;
3565 // case 906: CALL(0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0); break;
3566 // case 907: CALL(1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0); break;
3567 // case 908: CALL(0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0); break;
3568 // case 909: CALL(1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0); break;
3569 // case 910: CALL(0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0); break;
3570 // case 911: CALL(1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0); break;
3571 // case 912: CALL(0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0); break;
3572 // case 913: CALL(1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0); break;
3573 // case 914: CALL(0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0); break;
3574 // case 915: CALL(1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0); break;
3575 // case 916: CALL(0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0); break;
3576 // case 917: CALL(1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0); break;
3577 // case 918: CALL(0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0); break;
3578 // case 919: CALL(1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0); break;
3579 // case 920: CALL(0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0); break;
3580 // case 921: CALL(1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0); break;
3581 // case 922: CALL(0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0); break;
3582 // case 923: CALL(1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0); break;
3583 // case 924: CALL(0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0); break;
3584 // case 925: CALL(1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0); break;
3585 // case 926: CALL(0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0); break;
3586 // case 927: CALL(1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0); break;
3587 // case 928: CALL(0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0); break;
3588 // case 929: CALL(1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0); break;
3589 // case 930: CALL(0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0); break;
3590 // case 931: CALL(1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0); break;
3591 // case 932: CALL(0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0); break;
3592 // case 933: CALL(1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0); break;
3593 // case 934: CALL(0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0); break;
3594 // case 935: CALL(1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0); break;
3595 // case 936: CALL(0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0); break;
3596 // case 937: CALL(1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0); break;
3597 // case 938: CALL(0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0); break;
3598 // case 939: CALL(1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0); break;
3599 // case 940: CALL(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0); break;
3600 // case 941: CALL(1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0); break;
3601 // case 942: CALL(0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0); break;
3602 // case 943: CALL(1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0); break;
3603 case 944: CALL(0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0); break;
3604 case 945: CALL(1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0); break;
3605 case 946: CALL(0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0); break;
3606 case 947: CALL(1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0); break;
3607 case 948: CALL(0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0); break;
3608 case 949: CALL(1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0); break;
3609 case 950: CALL(0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0); break;
3610 case 951: CALL(1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0); break;
3611 case 952: CALL(0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0); break;
3612 case 953: CALL(1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0); break;
3613 case 954: CALL(0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0); break;
3614 case 955: CALL(1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0); break;
3615 case 956: CALL(0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0); break;
3616 case 957: CALL(1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0); break;
3617 case 958: CALL(0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0); break;
3618 case 959: CALL(1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0); break;
3619 // case 960: CALL(0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0); break;
3620 // case 961: CALL(1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0); break;
3621 // case 962: CALL(0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0); break;
3622 // case 963: CALL(1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0); break;
3623 // case 964: CALL(0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0); break;
3624 // case 965: CALL(1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0); break;
3625 // case 966: CALL(0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0); break;
3626 // case 967: CALL(1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0); break;
3627 // case 968: CALL(0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0); break;
3628 // case 969: CALL(1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0); break;
3629 // case 970: CALL(0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0); break;
3630 // case 971: CALL(1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0); break;
3631 // case 972: CALL(0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0); break;
3632 // case 973: CALL(1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0); break;
3633 // case 974: CALL(0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0); break;
3634 // case 975: CALL(1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0); break;
3635 case 976: CALL(0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0); break;
3636 case 977: CALL(1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0); break;
3637 case 978: CALL(0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0); break;
3638 case 979: CALL(1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0); break;
3639 case 980: CALL(0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0); break;
3640 case 981: CALL(1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0); break;
3641 case 982: CALL(0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0); break;
3642 case 983: CALL(1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0); break;
3643 case 984: CALL(0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0); break;
3644 case 985: CALL(1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0); break;
3645 case 986: CALL(0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0); break;
3646 case 987: CALL(1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0); break;
3647 case 988: CALL(0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0); break;
3648 case 989: CALL(1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0); break;
3649 case 990: CALL(0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0); break;
3650 case 991: CALL(1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0); break;
3651 // case 992: CALL(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0); break;
3652 // case 993: CALL(1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0); break;
3653 // case 994: CALL(0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0); break;
3654 // case 995: CALL(1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0); break;
3655 // case 996: CALL(0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0); break;
3656 // case 997: CALL(1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0); break;
3657 // case 998: CALL(0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0); break;
3658 // case 999: CALL(1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0); break;
3659 // case 1000: CALL(0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0); break;
3660 // case 1001: CALL(1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0); break;
3661 // case 1002: CALL(0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0); break;
3662 // case 1003: CALL(1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0); break;
3663 // case 1004: CALL(0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0); break;
3664 // case 1005: CALL(1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0); break;
3665 // case 1006: CALL(0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0); break;
3666 // case 1007: CALL(1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0); break;
3667 // case 1008: CALL(0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0); break;
3668 // case 1009: CALL(1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0); break;
3669 // case 1010: CALL(0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0); break;
3670 // case 1011: CALL(1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0); break;
3671 // case 1012: CALL(0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0); break;
3672 // case 1013: CALL(1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0); break;
3673 // case 1014: CALL(0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0); break;
3674 // case 1015: CALL(1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0); break;
3675 // case 1016: CALL(0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0); break;
3676 // case 1017: CALL(1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0); break;
3677 // case 1018: CALL(0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0); break;
3678 // case 1019: CALL(1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0); break;
3679 // case 1020: CALL(0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0); break;
3680 // case 1021: CALL(1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0); break;
3681 // case 1022: CALL(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0); break;
3682 // case 1023: CALL(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0); break;
3683 case 1024: CALL(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); break;
3684 case 1025: CALL(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); break;
3685 case 1026: CALL(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1); break;
3686 case 1027: CALL(1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1); break;
3687 case 1028: CALL(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1); break;
3688 case 1029: CALL(1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1); break;
3689 case 1030: CALL(0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1); break;
3690 case 1031: CALL(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1); break;
3691 case 1032: CALL(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1); break;
3692 case 1033: CALL(1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1); break;
3693 case 1034: CALL(0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1); break;
3694 case 1035: CALL(1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1); break;
3695 case 1036: CALL(0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1); break;
3696 case 1037: CALL(1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1); break;
3697 case 1038: CALL(0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1); break;
3698 case 1039: CALL(1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1); break;
3699 // case 1040: CALL(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1); break;
3700 // case 1041: CALL(1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1); break;
3701 // case 1042: CALL(0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1); break;
3702 // case 1043: CALL(1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1); break;
3703 // case 1044: CALL(0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1); break;
3704 // case 1045: CALL(1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1); break;
3705 // case 1046: CALL(0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1); break;
3706 // case 1047: CALL(1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1); break;
3707 // case 1048: CALL(0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1); break;
3708 // case 1049: CALL(1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1); break;
3709 // case 1050: CALL(0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1); break;
3710 // case 1051: CALL(1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1); break;
3711 // case 1052: CALL(0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1); break;
3712 // case 1053: CALL(1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1); break;
3713 // case 1054: CALL(0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1); break;
3714 // case 1055: CALL(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1); break;
3715 // case 1056: CALL(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1); break;
3716 // case 1057: CALL(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1); break;
3717 // case 1058: CALL(0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1); break;
3718 // case 1059: CALL(1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1); break;
3719 // case 1060: CALL(0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1); break;
3720 // case 1061: CALL(1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1); break;
3721 // case 1062: CALL(0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1); break;
3722 // case 1063: CALL(1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1); break;
3723 // case 1064: CALL(0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1); break;
3724 // case 1065: CALL(1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1); break;
3725 // case 1066: CALL(0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1); break;
3726 // case 1067: CALL(1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1); break;
3727 // case 1068: CALL(0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1); break;
3728 // case 1069: CALL(1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1); break;
3729 // case 1070: CALL(0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1); break;
3730 // case 1071: CALL(1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1); break;
3731 // case 1072: CALL(0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1); break;
3732 // case 1073: CALL(1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1); break;
3733 // case 1074: CALL(0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1); break;
3734 // case 1075: CALL(1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1); break;
3735 // case 1076: CALL(0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1); break;
3736 // case 1077: CALL(1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1); break;
3737 // case 1078: CALL(0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1); break;
3738 // case 1079: CALL(1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1); break;
3739 // case 1080: CALL(0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1); break;
3740 // case 1081: CALL(1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1); break;
3741 // case 1082: CALL(0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1); break;
3742 // case 1083: CALL(1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1); break;
3743 // case 1084: CALL(0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1); break;
3744 // case 1085: CALL(1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1); break;
3745 // case 1086: CALL(0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1); break;
3746 // case 1087: CALL(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1); break;
3747 // case 1088: CALL(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1); break;
3748 // case 1089: CALL(1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1); break;
3749 // case 1090: CALL(0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1); break;
3750 // case 1091: CALL(1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1); break;
3751 // case 1092: CALL(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1); break;
3752 // case 1093: CALL(1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1); break;
3753 // case 1094: CALL(0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1); break;
3754 // case 1095: CALL(1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1); break;
3755 // case 1096: CALL(0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1); break;
3756 // case 1097: CALL(1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1); break;
3757 // case 1098: CALL(0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1); break;
3758 // case 1099: CALL(1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1); break;
3759 // case 1100: CALL(0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1); break;
3760 // case 1101: CALL(1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1); break;
3761 // case 1102: CALL(0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1); break;
3762 // case 1103: CALL(1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1); break;
3763 // case 1104: CALL(0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1); break;
3764 // case 1105: CALL(1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1); break;
3765 // case 1106: CALL(0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1); break;
3766 // case 1107: CALL(1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1); break;
3767 // case 1108: CALL(0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1); break;
3768 // case 1109: CALL(1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1); break;
3769 // case 1110: CALL(0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1); break;
3770 // case 1111: CALL(1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1); break;
3771 // case 1112: CALL(0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1); break;
3772 // case 1113: CALL(1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1); break;
3773 // case 1114: CALL(0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1); break;
3774 // case 1115: CALL(1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1); break;
3775 // case 1116: CALL(0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1); break;
3776 // case 1117: CALL(1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1); break;
3777 // case 1118: CALL(0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1); break;
3778 // case 1119: CALL(1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1); break;
3779 // case 1120: CALL(0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1); break;
3780 // case 1121: CALL(1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1); break;
3781 // case 1122: CALL(0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1); break;
3782 // case 1123: CALL(1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1); break;
3783 // case 1124: CALL(0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1); break;
3784 // case 1125: CALL(1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1); break;
3785 // case 1126: CALL(0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1); break;
3786 // case 1127: CALL(1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1); break;
3787 // case 1128: CALL(0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1); break;
3788 // case 1129: CALL(1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1); break;
3789 // case 1130: CALL(0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1); break;
3790 // case 1131: CALL(1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1); break;
3791 // case 1132: CALL(0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1); break;
3792 // case 1133: CALL(1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1); break;
3793 // case 1134: CALL(0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1); break;
3794 // case 1135: CALL(1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1); break;
3795 // case 1136: CALL(0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1); break;
3796 // case 1137: CALL(1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1); break;
3797 // case 1138: CALL(0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1); break;
3798 // case 1139: CALL(1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1); break;
3799 // case 1140: CALL(0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1); break;
3800 // case 1141: CALL(1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1); break;
3801 // case 1142: CALL(0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1); break;
3802 // case 1143: CALL(1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1); break;
3803 // case 1144: CALL(0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1); break;
3804 // case 1145: CALL(1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1); break;
3805 // case 1146: CALL(0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1); break;
3806 // case 1147: CALL(1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1); break;
3807 // case 1148: CALL(0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1); break;
3808 // case 1149: CALL(1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1); break;
3809 // case 1150: CALL(0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1); break;
3810 // case 1151: CALL(1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1); break;
3811 case 1152: CALL(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1); break;
3812 case 1153: CALL(1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1); break;
3813 case 1154: CALL(0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1); break;
3814 case 1155: CALL(1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1); break;
3815 case 1156: CALL(0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1); break;
3816 case 1157: CALL(1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1); break;
3817 case 1158: CALL(0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1); break;
3818 case 1159: CALL(1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1); break;
3819 case 1160: CALL(0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1); break;
3820 case 1161: CALL(1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1); break;
3821 case 1162: CALL(0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1); break;
3822 case 1163: CALL(1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1); break;
3823 case 1164: CALL(0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1); break;
3824 case 1165: CALL(1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1); break;
3825 case 1166: CALL(0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1); break;
3826 case 1167: CALL(1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1); break;
3827 // case 1168: CALL(0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1); break;
3828 // case 1169: CALL(1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1); break;
3829 // case 1170: CALL(0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1); break;
3830 // case 1171: CALL(1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1); break;
3831 // case 1172: CALL(0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1); break;
3832 // case 1173: CALL(1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1); break;
3833 // case 1174: CALL(0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1); break;
3834 // case 1175: CALL(1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1); break;
3835 // case 1176: CALL(0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1); break;
3836 // case 1177: CALL(1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1); break;
3837 // case 1178: CALL(0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1); break;
3838 // case 1179: CALL(1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1); break;
3839 // case 1180: CALL(0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1); break;
3840 // case 1181: CALL(1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1); break;
3841 // case 1182: CALL(0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1); break;
3842 // case 1183: CALL(1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1); break;
3843 // case 1184: CALL(0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1); break;
3844 // case 1185: CALL(1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1); break;
3845 // case 1186: CALL(0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1); break;
3846 // case 1187: CALL(1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1); break;
3847 // case 1188: CALL(0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1); break;
3848 // case 1189: CALL(1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1); break;
3849 // case 1190: CALL(0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1); break;
3850 // case 1191: CALL(1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1); break;
3851 // case 1192: CALL(0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1); break;
3852 // case 1193: CALL(1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1); break;
3853 // case 1194: CALL(0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1); break;
3854 // case 1195: CALL(1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1); break;
3855 // case 1196: CALL(0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1); break;
3856 // case 1197: CALL(1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1); break;
3857 // case 1198: CALL(0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1); break;
3858 // case 1199: CALL(1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1); break;
3859 // case 1200: CALL(0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1); break;
3860 // case 1201: CALL(1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1); break;
3861 // case 1202: CALL(0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1); break;
3862 // case 1203: CALL(1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1); break;
3863 // case 1204: CALL(0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1); break;
3864 // case 1205: CALL(1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1); break;
3865 // case 1206: CALL(0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1); break;
3866 // case 1207: CALL(1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1); break;
3867 // case 1208: CALL(0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1); break;
3868 // case 1209: CALL(1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1); break;
3869 // case 1210: CALL(0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1); break;
3870 // case 1211: CALL(1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1); break;
3871 // case 1212: CALL(0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1); break;
3872 // case 1213: CALL(1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1); break;
3873 // case 1214: CALL(0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1); break;
3874 // case 1215: CALL(1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1); break;
3875 // case 1216: CALL(0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1); break;
3876 // case 1217: CALL(1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1); break;
3877 // case 1218: CALL(0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1); break;
3878 // case 1219: CALL(1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1); break;
3879 // case 1220: CALL(0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1); break;
3880 // case 1221: CALL(1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1); break;
3881 // case 1222: CALL(0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1); break;
3882 // case 1223: CALL(1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1); break;
3883 // case 1224: CALL(0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1); break;
3884 // case 1225: CALL(1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1); break;
3885 // case 1226: CALL(0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1); break;
3886 // case 1227: CALL(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1); break;
3887 // case 1228: CALL(0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1); break;
3888 // case 1229: CALL(1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1); break;
3889 // case 1230: CALL(0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1); break;
3890 // case 1231: CALL(1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1); break;
3891 // case 1232: CALL(0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1); break;
3892 // case 1233: CALL(1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1); break;
3893 // case 1234: CALL(0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1); break;
3894 // case 1235: CALL(1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1); break;
3895 // case 1236: CALL(0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1); break;
3896 // case 1237: CALL(1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1); break;
3897 // case 1238: CALL(0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1); break;
3898 // case 1239: CALL(1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1); break;
3899 // case 1240: CALL(0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1); break;
3900 // case 1241: CALL(1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1); break;
3901 // case 1242: CALL(0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1); break;
3902 // case 1243: CALL(1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1); break;
3903 // case 1244: CALL(0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1); break;
3904 // case 1245: CALL(1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1); break;
3905 // case 1246: CALL(0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1); break;
3906 // case 1247: CALL(1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1); break;
3907 // case 1248: CALL(0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1); break;
3908 // case 1249: CALL(1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1); break;
3909 // case 1250: CALL(0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1); break;
3910 // case 1251: CALL(1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1); break;
3911 // case 1252: CALL(0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1); break;
3912 // case 1253: CALL(1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1); break;
3913 // case 1254: CALL(0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1); break;
3914 // case 1255: CALL(1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1); break;
3915 // case 1256: CALL(0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1); break;
3916 // case 1257: CALL(1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1); break;
3917 // case 1258: CALL(0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1); break;
3918 // case 1259: CALL(1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1); break;
3919 // case 1260: CALL(0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1); break;
3920 // case 1261: CALL(1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1); break;
3921 // case 1262: CALL(0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1); break;
3922 // case 1263: CALL(1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1); break;
3923 // case 1264: CALL(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1); break;
3924 // case 1265: CALL(1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1); break;
3925 // case 1266: CALL(0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1); break;
3926 // case 1267: CALL(1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1); break;
3927 // case 1268: CALL(0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1); break;
3928 // case 1269: CALL(1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1); break;
3929 // case 1270: CALL(0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1); break;
3930 // case 1271: CALL(1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1); break;
3931 // case 1272: CALL(0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1); break;
3932 // case 1273: CALL(1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1); break;
3933 // case 1274: CALL(0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1); break;
3934 // case 1275: CALL(1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1); break;
3935 // case 1276: CALL(0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1); break;
3936 // case 1277: CALL(1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1); break;
3937 // case 1278: CALL(0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1); break;
3938 // case 1279: CALL(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1); break;
3939 case 1280: CALL(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1); break;
3940 case 1281: CALL(1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1); break;
3941 case 1282: CALL(0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1); break;
3942 case 1283: CALL(1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1); break;
3943 case 1284: CALL(0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1); break;
3944 case 1285: CALL(1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1); break;
3945 case 1286: CALL(0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1); break;
3946 case 1287: CALL(1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1); break;
3947 case 1288: CALL(0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1); break;
3948 case 1289: CALL(1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1); break;
3949 case 1290: CALL(0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1); break;
3950 case 1291: CALL(1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1); break;
3951 case 1292: CALL(0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1); break;
3952 case 1293: CALL(1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1); break;
3953 case 1294: CALL(0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1); break;
3954 case 1295: CALL(1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1); break;
3955 // case 1296: CALL(0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1); break;
3956 // case 1297: CALL(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1); break;
3957 // case 1298: CALL(0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1); break;
3958 // case 1299: CALL(1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1); break;
3959 // case 1300: CALL(0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1); break;
3960 // case 1301: CALL(1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1); break;
3961 // case 1302: CALL(0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1); break;
3962 // case 1303: CALL(1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1); break;
3963 // case 1304: CALL(0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1); break;
3964 // case 1305: CALL(1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1); break;
3965 // case 1306: CALL(0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1); break;
3966 // case 1307: CALL(1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1); break;
3967 // case 1308: CALL(0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1); break;
3968 // case 1309: CALL(1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1); break;
3969 // case 1310: CALL(0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1); break;
3970 // case 1311: CALL(1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1); break;
3971 // case 1312: CALL(0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1); break;
3972 // case 1313: CALL(1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1); break;
3973 // case 1314: CALL(0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1); break;
3974 // case 1315: CALL(1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1); break;
3975 // case 1316: CALL(0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1); break;
3976 // case 1317: CALL(1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1); break;
3977 // case 1318: CALL(0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1); break;
3978 // case 1319: CALL(1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1); break;
3979 // case 1320: CALL(0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1); break;
3980 // case 1321: CALL(1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1); break;
3981 // case 1322: CALL(0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1); break;
3982 // case 1323: CALL(1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1); break;
3983 // case 1324: CALL(0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1); break;
3984 // case 1325: CALL(1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1); break;
3985 // case 1326: CALL(0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1); break;
3986 // case 1327: CALL(1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1); break;
3987 // case 1328: CALL(0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1); break;
3988 // case 1329: CALL(1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1); break;
3989 // case 1330: CALL(0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1); break;
3990 // case 1331: CALL(1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1); break;
3991 // case 1332: CALL(0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1); break;
3992 // case 1333: CALL(1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1); break;
3993 // case 1334: CALL(0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1); break;
3994 // case 1335: CALL(1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1); break;
3995 // case 1336: CALL(0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1); break;
3996 // case 1337: CALL(1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1); break;
3997 // case 1338: CALL(0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1); break;
3998 // case 1339: CALL(1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1); break;
3999 // case 1340: CALL(0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1); break;
4000 // case 1341: CALL(1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1); break;
4001 // case 1342: CALL(0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1); break;
4002 // case 1343: CALL(1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1); break;
4003 // case 1344: CALL(0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1); break;
4004 // case 1345: CALL(1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1); break;
4005 // case 1346: CALL(0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1); break;
4006 // case 1347: CALL(1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1); break;
4007 // case 1348: CALL(0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1); break;
4008 // case 1349: CALL(1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1); break;
4009 // case 1350: CALL(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1); break;
4010 // case 1351: CALL(1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1); break;
4011 // case 1352: CALL(0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1); break;
4012 // case 1353: CALL(1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1); break;
4013 // case 1354: CALL(0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1); break;
4014 // case 1355: CALL(1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1); break;
4015 // case 1356: CALL(0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1); break;
4016 // case 1357: CALL(1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1); break;
4017 // case 1358: CALL(0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1); break;
4018 // case 1359: CALL(1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1); break;
4019 // case 1360: CALL(0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1); break;
4020 // case 1361: CALL(1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1); break;
4021 // case 1362: CALL(0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1); break;
4022 // case 1363: CALL(1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1); break;
4023 // case 1364: CALL(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); break;
4024 // case 1365: CALL(1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); break;
4025 // case 1366: CALL(0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1); break;
4026 // case 1367: CALL(1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1); break;
4027 // case 1368: CALL(0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1); break;
4028 // case 1369: CALL(1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1); break;
4029 // case 1370: CALL(0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1); break;
4030 // case 1371: CALL(1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1); break;
4031 // case 1372: CALL(0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1); break;
4032 // case 1373: CALL(1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1); break;
4033 // case 1374: CALL(0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1); break;
4034 // case 1375: CALL(1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1); break;
4035 // case 1376: CALL(0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1); break;
4036 // case 1377: CALL(1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1); break;
4037 // case 1378: CALL(0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1); break;
4038 // case 1379: CALL(1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1); break;
4039 // case 1380: CALL(0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1); break;
4040 // case 1381: CALL(1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1); break;
4041 // case 1382: CALL(0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1); break;
4042 // case 1383: CALL(1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1); break;
4043 // case 1384: CALL(0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1); break;
4044 // case 1385: CALL(1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1); break;
4045 // case 1386: CALL(0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1); break;
4046 // case 1387: CALL(1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1); break;
4047 // case 1388: CALL(0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1); break;
4048 // case 1389: CALL(1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1); break;
4049 // case 1390: CALL(0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1); break;
4050 // case 1391: CALL(1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1); break;
4051 // case 1392: CALL(0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1); break;
4052 // case 1393: CALL(1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1); break;
4053 // case 1394: CALL(0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1); break;
4054 // case 1395: CALL(1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1); break;
4055 // case 1396: CALL(0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1); break;
4056 // case 1397: CALL(1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1); break;
4057 // case 1398: CALL(0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1); break;
4058 // case 1399: CALL(1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1); break;
4059 // case 1400: CALL(0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1); break;
4060 // case 1401: CALL(1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1); break;
4061 // case 1402: CALL(0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1); break;
4062 // case 1403: CALL(1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1); break;
4063 // case 1404: CALL(0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1); break;
4064 // case 1405: CALL(1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1); break;
4065 // case 1406: CALL(0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1); break;
4066 // case 1407: CALL(1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1); break;
4067 case 1408: CALL(0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1); break;
4068 case 1409: CALL(1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1); break;
4069 case 1410: CALL(0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1); break;
4070 case 1411: CALL(1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1); break;
4071 case 1412: CALL(0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1); break;
4072 case 1413: CALL(1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1); break;
4073 case 1414: CALL(0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1); break;
4074 case 1415: CALL(1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1); break;
4075 case 1416: CALL(0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1); break;
4076 case 1417: CALL(1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1); break;
4077 case 1418: CALL(0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1); break;
4078 case 1419: CALL(1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1); break;
4079 case 1420: CALL(0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1); break;
4080 case 1421: CALL(1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1); break;
4081 case 1422: CALL(0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1); break;
4082 case 1423: CALL(1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1); break;
4083 // case 1424: CALL(0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1); break;
4084 // case 1425: CALL(1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1); break;
4085 // case 1426: CALL(0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1); break;
4086 // case 1427: CALL(1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1); break;
4087 // case 1428: CALL(0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1); break;
4088 // case 1429: CALL(1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1); break;
4089 // case 1430: CALL(0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1); break;
4090 // case 1431: CALL(1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1); break;
4091 // case 1432: CALL(0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1); break;
4092 // case 1433: CALL(1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1); break;
4093 // case 1434: CALL(0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1); break;
4094 // case 1435: CALL(1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1); break;
4095 // case 1436: CALL(0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1); break;
4096 // case 1437: CALL(1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1); break;
4097 // case 1438: CALL(0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1); break;
4098 // case 1439: CALL(1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1); break;
4099 // case 1440: CALL(0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1); break;
4100 // case 1441: CALL(1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1); break;
4101 // case 1442: CALL(0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1); break;
4102 // case 1443: CALL(1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1); break;
4103 // case 1444: CALL(0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1); break;
4104 // case 1445: CALL(1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1); break;
4105 // case 1446: CALL(0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1); break;
4106 // case 1447: CALL(1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1); break;
4107 // case 1448: CALL(0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1); break;
4108 // case 1449: CALL(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1); break;
4109 // case 1450: CALL(0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1); break;
4110 // case 1451: CALL(1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1); break;
4111 // case 1452: CALL(0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1); break;
4112 // case 1453: CALL(1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1); break;
4113 // case 1454: CALL(0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1); break;
4114 // case 1455: CALL(1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1); break;
4115 // case 1456: CALL(0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1); break;
4116 // case 1457: CALL(1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1); break;
4117 // case 1458: CALL(0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1); break;
4118 // case 1459: CALL(1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1); break;
4119 // case 1460: CALL(0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1); break;
4120 // case 1461: CALL(1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1); break;
4121 // case 1462: CALL(0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1); break;
4122 // case 1463: CALL(1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1); break;
4123 // case 1464: CALL(0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1); break;
4124 // case 1465: CALL(1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1); break;
4125 // case 1466: CALL(0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1); break;
4126 // case 1467: CALL(1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1); break;
4127 // case 1468: CALL(0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1); break;
4128 // case 1469: CALL(1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1); break;
4129 // case 1470: CALL(0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1); break;
4130 // case 1471: CALL(1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1); break;
4131 // case 1472: CALL(0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1); break;
4132 // case 1473: CALL(1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1); break;
4133 // case 1474: CALL(0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1); break;
4134 // case 1475: CALL(1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1); break;
4135 // case 1476: CALL(0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1); break;
4136 // case 1477: CALL(1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1); break;
4137 // case 1478: CALL(0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1); break;
4138 // case 1479: CALL(1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1); break;
4139 // case 1480: CALL(0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1); break;
4140 // case 1481: CALL(1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1); break;
4141 // case 1482: CALL(0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1); break;
4142 // case 1483: CALL(1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1); break;
4143 // case 1484: CALL(0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1); break;
4144 // case 1485: CALL(1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1); break;
4145 // case 1486: CALL(0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1); break;
4146 // case 1487: CALL(1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1); break;
4147 // case 1488: CALL(0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1); break;
4148 // case 1489: CALL(1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1); break;
4149 // case 1490: CALL(0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1); break;
4150 // case 1491: CALL(1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1); break;
4151 // case 1492: CALL(0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1); break;
4152 // case 1493: CALL(1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1); break;
4153 // case 1494: CALL(0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1); break;
4154 // case 1495: CALL(1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1); break;
4155 // case 1496: CALL(0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1); break;
4156 // case 1497: CALL(1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1); break;
4157 // case 1498: CALL(0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1); break;
4158 // case 1499: CALL(1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1); break;
4159 // case 1500: CALL(0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1); break;
4160 // case 1501: CALL(1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1); break;
4161 // case 1502: CALL(0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1); break;
4162 // case 1503: CALL(1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1); break;
4163 // case 1504: CALL(0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1); break;
4164 // case 1505: CALL(1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1); break;
4165 // case 1506: CALL(0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1); break;
4166 // case 1507: CALL(1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1); break;
4167 // case 1508: CALL(0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1); break;
4168 // case 1509: CALL(1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1); break;
4169 // case 1510: CALL(0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1); break;
4170 // case 1511: CALL(1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1); break;
4171 // case 1512: CALL(0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1); break;
4172 // case 1513: CALL(1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1); break;
4173 // case 1514: CALL(0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1); break;
4174 // case 1515: CALL(1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1); break;
4175 // case 1516: CALL(0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1); break;
4176 // case 1517: CALL(1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1); break;
4177 // case 1518: CALL(0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1); break;
4178 // case 1519: CALL(1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1); break;
4179 // case 1520: CALL(0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1); break;
4180 // case 1521: CALL(1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1); break;
4181 // case 1522: CALL(0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1); break;
4182 // case 1523: CALL(1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1); break;
4183 // case 1524: CALL(0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1); break;
4184 // case 1525: CALL(1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1); break;
4185 // case 1526: CALL(0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1); break;
4186 // case 1527: CALL(1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1); break;
4187 // case 1528: CALL(0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1); break;
4188 // case 1529: CALL(1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1); break;
4189 // case 1530: CALL(0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1); break;
4190 // case 1531: CALL(1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1); break;
4191 // case 1532: CALL(0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1); break;
4192 // case 1533: CALL(1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1); break;
4193 // case 1534: CALL(0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1); break;
4194 // case 1535: CALL(1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1); break;
4195 // case 1536: CALL(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1); break;
4196 // case 1537: CALL(1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1); break;
4197 // case 1538: CALL(0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1); break;
4198 // case 1539: CALL(1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1); break;
4199 // case 1540: CALL(0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1); break;
4200 // case 1541: CALL(1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1); break;
4201 // case 1542: CALL(0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1); break;
4202 // case 1543: CALL(1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1); break;
4203 // case 1544: CALL(0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1); break;
4204 // case 1545: CALL(1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1); break;
4205 // case 1546: CALL(0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1); break;
4206 // case 1547: CALL(1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1); break;
4207 // case 1548: CALL(0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1); break;
4208 // case 1549: CALL(1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1); break;
4209 // case 1550: CALL(0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1); break;
4210 // case 1551: CALL(1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1); break;
4211 // case 1552: CALL(0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1); break;
4212 // case 1553: CALL(1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1); break;
4213 // case 1554: CALL(0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1); break;
4214 // case 1555: CALL(1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1); break;
4215 // case 1556: CALL(0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1); break;
4216 // case 1557: CALL(1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1); break;
4217 // case 1558: CALL(0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1); break;
4218 // case 1559: CALL(1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1); break;
4219 // case 1560: CALL(0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1); break;
4220 // case 1561: CALL(1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1); break;
4221 // case 1562: CALL(0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1); break;
4222 // case 1563: CALL(1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1); break;
4223 // case 1564: CALL(0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1); break;
4224 // case 1565: CALL(1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1); break;
4225 // case 1566: CALL(0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1); break;
4226 // case 1567: CALL(1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1); break;
4227 // case 1568: CALL(0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1); break;
4228 // case 1569: CALL(1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1); break;
4229 // case 1570: CALL(0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1); break;
4230 // case 1571: CALL(1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1); break;
4231 // case 1572: CALL(0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1); break;
4232 // case 1573: CALL(1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1); break;
4233 // case 1574: CALL(0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1); break;
4234 // case 1575: CALL(1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1); break;
4235 // case 1576: CALL(0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1); break;
4236 // case 1577: CALL(1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1); break;
4237 // case 1578: CALL(0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1); break;
4238 // case 1579: CALL(1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1); break;
4239 // case 1580: CALL(0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1); break;
4240 // case 1581: CALL(1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1); break;
4241 // case 1582: CALL(0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1); break;
4242 // case 1583: CALL(1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1); break;
4243 // case 1584: CALL(0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1); break;
4244 // case 1585: CALL(1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1); break;
4245 // case 1586: CALL(0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1); break;
4246 // case 1587: CALL(1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1); break;
4247 // case 1588: CALL(0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1); break;
4248 // case 1589: CALL(1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1); break;
4249 // case 1590: CALL(0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1); break;
4250 // case 1591: CALL(1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1); break;
4251 // case 1592: CALL(0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1); break;
4252 // case 1593: CALL(1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1); break;
4253 // case 1594: CALL(0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1); break;
4254 // case 1595: CALL(1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1); break;
4255 // case 1596: CALL(0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1); break;
4256 // case 1597: CALL(1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1); break;
4257 // case 1598: CALL(0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1); break;
4258 // case 1599: CALL(1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1); break;
4259 // case 1600: CALL(0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1); break;
4260 // case 1601: CALL(1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1); break;
4261 // case 1602: CALL(0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1); break;
4262 // case 1603: CALL(1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1); break;
4263 // case 1604: CALL(0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1); break;
4264 // case 1605: CALL(1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1); break;
4265 // case 1606: CALL(0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1); break;
4266 // case 1607: CALL(1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1); break;
4267 // case 1608: CALL(0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1); break;
4268 // case 1609: CALL(1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1); break;
4269 // case 1610: CALL(0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1); break;
4270 // case 1611: CALL(1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1); break;
4271 // case 1612: CALL(0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1); break;
4272 // case 1613: CALL(1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1); break;
4273 // case 1614: CALL(0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1); break;
4274 // case 1615: CALL(1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1); break;
4275 // case 1616: CALL(0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1); break;
4276 // case 1617: CALL(1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1); break;
4277 // case 1618: CALL(0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1); break;
4278 // case 1619: CALL(1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1); break;
4279 // case 1620: CALL(0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1); break;
4280 // case 1621: CALL(1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1); break;
4281 // case 1622: CALL(0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1); break;
4282 // case 1623: CALL(1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1); break;
4283 // case 1624: CALL(0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1); break;
4284 // case 1625: CALL(1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1); break;
4285 // case 1626: CALL(0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1); break;
4286 // case 1627: CALL(1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1); break;
4287 // case 1628: CALL(0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1); break;
4288 // case 1629: CALL(1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1); break;
4289 // case 1630: CALL(0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1); break;
4290 // case 1631: CALL(1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1); break;
4291 // case 1632: CALL(0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1); break;
4292 // case 1633: CALL(1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1); break;
4293 // case 1634: CALL(0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1); break;
4294 // case 1635: CALL(1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1); break;
4295 // case 1636: CALL(0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1); break;
4296 // case 1637: CALL(1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1); break;
4297 // case 1638: CALL(0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1); break;
4298 // case 1639: CALL(1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1); break;
4299 // case 1640: CALL(0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1); break;
4300 // case 1641: CALL(1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1); break;
4301 // case 1642: CALL(0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1); break;
4302 // case 1643: CALL(1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1); break;
4303 // case 1644: CALL(0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1); break;
4304 // case 1645: CALL(1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1); break;
4305 // case 1646: CALL(0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1); break;
4306 // case 1647: CALL(1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1); break;
4307 // case 1648: CALL(0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1); break;
4308 // case 1649: CALL(1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1); break;
4309 // case 1650: CALL(0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1); break;
4310 // case 1651: CALL(1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1); break;
4311 // case 1652: CALL(0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1); break;
4312 // case 1653: CALL(1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1); break;
4313 // case 1654: CALL(0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1); break;
4314 // case 1655: CALL(1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1); break;
4315 // case 1656: CALL(0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1); break;
4316 // case 1657: CALL(1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1); break;
4317 // case 1658: CALL(0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1); break;
4318 // case 1659: CALL(1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1); break;
4319 // case 1660: CALL(0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1); break;
4320 // case 1661: CALL(1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1); break;
4321 // case 1662: CALL(0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1); break;
4322 // case 1663: CALL(1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1); break;
4323 // case 1664: CALL(0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1); break;
4324 // case 1665: CALL(1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1); break;
4325 // case 1666: CALL(0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1); break;
4326 // case 1667: CALL(1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1); break;
4327 // case 1668: CALL(0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1); break;
4328 // case 1669: CALL(1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1); break;
4329 // case 1670: CALL(0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1); break;
4330 // case 1671: CALL(1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1); break;
4331 // case 1672: CALL(0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1); break;
4332 // case 1673: CALL(1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1); break;
4333 // case 1674: CALL(0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1); break;
4334 // case 1675: CALL(1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1); break;
4335 // case 1676: CALL(0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1); break;
4336 // case 1677: CALL(1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1); break;
4337 // case 1678: CALL(0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1); break;
4338 // case 1679: CALL(1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1); break;
4339 // case 1680: CALL(0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1); break;
4340 // case 1681: CALL(1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1); break;
4341 // case 1682: CALL(0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1); break;
4342 // case 1683: CALL(1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1); break;
4343 // case 1684: CALL(0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1); break;
4344 // case 1685: CALL(1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1); break;
4345 // case 1686: CALL(0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1); break;
4346 // case 1687: CALL(1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1); break;
4347 // case 1688: CALL(0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1); break;
4348 // case 1689: CALL(1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1); break;
4349 // case 1690: CALL(0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1); break;
4350 // case 1691: CALL(1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1); break;
4351 // case 1692: CALL(0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1); break;
4352 // case 1693: CALL(1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1); break;
4353 // case 1694: CALL(0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1); break;
4354 // case 1695: CALL(1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1); break;
4355 // case 1696: CALL(0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1); break;
4356 // case 1697: CALL(1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1); break;
4357 // case 1698: CALL(0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1); break;
4358 // case 1699: CALL(1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1); break;
4359 // case 1700: CALL(0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1); break;
4360 // case 1701: CALL(1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1); break;
4361 // case 1702: CALL(0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1); break;
4362 // case 1703: CALL(1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1); break;
4363 // case 1704: CALL(0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1); break;
4364 // case 1705: CALL(1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1); break;
4365 // case 1706: CALL(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1); break;
4366 // case 1707: CALL(1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1); break;
4367 // case 1708: CALL(0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1); break;
4368 // case 1709: CALL(1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1); break;
4369 // case 1710: CALL(0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1); break;
4370 // case 1711: CALL(1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1); break;
4371 // case 1712: CALL(0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1); break;
4372 // case 1713: CALL(1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1); break;
4373 // case 1714: CALL(0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1); break;
4374 // case 1715: CALL(1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1); break;
4375 // case 1716: CALL(0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1); break;
4376 // case 1717: CALL(1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1); break;
4377 // case 1718: CALL(0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1); break;
4378 // case 1719: CALL(1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1); break;
4379 // case 1720: CALL(0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1); break;
4380 // case 1721: CALL(1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1); break;
4381 // case 1722: CALL(0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1); break;
4382 // case 1723: CALL(1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1); break;
4383 // case 1724: CALL(0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1); break;
4384 // case 1725: CALL(1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1); break;
4385 // case 1726: CALL(0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1); break;
4386 // case 1727: CALL(1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1); break;
4387 // case 1728: CALL(0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1); break;
4388 // case 1729: CALL(1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1); break;
4389 // case 1730: CALL(0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1); break;
4390 // case 1731: CALL(1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1); break;
4391 // case 1732: CALL(0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1); break;
4392 // case 1733: CALL(1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1); break;
4393 // case 1734: CALL(0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1); break;
4394 // case 1735: CALL(1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1); break;
4395 // case 1736: CALL(0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1); break;
4396 // case 1737: CALL(1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1); break;
4397 // case 1738: CALL(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1); break;
4398 // case 1739: CALL(1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1); break;
4399 // case 1740: CALL(0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1); break;
4400 // case 1741: CALL(1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1); break;
4401 // case 1742: CALL(0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1); break;
4402 // case 1743: CALL(1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1); break;
4403 // case 1744: CALL(0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1); break;
4404 // case 1745: CALL(1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1); break;
4405 // case 1746: CALL(0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1); break;
4406 // case 1747: CALL(1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1); break;
4407 // case 1748: CALL(0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1); break;
4408 // case 1749: CALL(1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1); break;
4409 // case 1750: CALL(0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1); break;
4410 // case 1751: CALL(1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1); break;
4411 // case 1752: CALL(0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1); break;
4412 // case 1753: CALL(1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1); break;
4413 // case 1754: CALL(0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1); break;
4414 // case 1755: CALL(1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1); break;
4415 // case 1756: CALL(0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1); break;
4416 // case 1757: CALL(1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1); break;
4417 // case 1758: CALL(0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1); break;
4418 // case 1759: CALL(1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1); break;
4419 // case 1760: CALL(0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1); break;
4420 // case 1761: CALL(1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1); break;
4421 // case 1762: CALL(0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1); break;
4422 // case 1763: CALL(1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1); break;
4423 // case 1764: CALL(0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1); break;
4424 // case 1765: CALL(1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1); break;
4425 // case 1766: CALL(0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1); break;
4426 // case 1767: CALL(1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1); break;
4427 // case 1768: CALL(0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1); break;
4428 // case 1769: CALL(1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1); break;
4429 // case 1770: CALL(0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1); break;
4430 // case 1771: CALL(1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1); break;
4431 // case 1772: CALL(0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1); break;
4432 // case 1773: CALL(1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1); break;
4433 // case 1774: CALL(0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1); break;
4434 // case 1775: CALL(1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1); break;
4435 // case 1776: CALL(0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1); break;
4436 // case 1777: CALL(1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1); break;
4437 // case 1778: CALL(0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1); break;
4438 // case 1779: CALL(1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1); break;
4439 // case 1780: CALL(0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1); break;
4440 // case 1781: CALL(1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1); break;
4441 // case 1782: CALL(0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1); break;
4442 // case 1783: CALL(1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1); break;
4443 // case 1784: CALL(0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1); break;
4444 // case 1785: CALL(1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1); break;
4445 // case 1786: CALL(0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1); break;
4446 // case 1787: CALL(1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1); break;
4447 // case 1788: CALL(0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1); break;
4448 // case 1789: CALL(1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1); break;
4449 // case 1790: CALL(0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1); break;
4450 // case 1791: CALL(1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1); break;
4451 // case 1792: CALL(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1); break;
4452 // case 1793: CALL(1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1); break;
4453 // case 1794: CALL(0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1); break;
4454 // case 1795: CALL(1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1); break;
4455 // case 1796: CALL(0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1); break;
4456 // case 1797: CALL(1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1); break;
4457 // case 1798: CALL(0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1); break;
4458 // case 1799: CALL(1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1); break;
4459 // case 1800: CALL(0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1); break;
4460 // case 1801: CALL(1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1); break;
4461 // case 1802: CALL(0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1); break;
4462 // case 1803: CALL(1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1); break;
4463 // case 1804: CALL(0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1); break;
4464 // case 1805: CALL(1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1); break;
4465 // case 1806: CALL(0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1); break;
4466 // case 1807: CALL(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1); break;
4467 // case 1808: CALL(0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1); break;
4468 // case 1809: CALL(1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1); break;
4469 // case 1810: CALL(0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1); break;
4470 // case 1811: CALL(1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1); break;
4471 // case 1812: CALL(0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1); break;
4472 // case 1813: CALL(1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1); break;
4473 // case 1814: CALL(0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1); break;
4474 // case 1815: CALL(1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1); break;
4475 // case 1816: CALL(0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1); break;
4476 // case 1817: CALL(1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1); break;
4477 // case 1818: CALL(0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1); break;
4478 // case 1819: CALL(1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1); break;
4479 // case 1820: CALL(0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1); break;
4480 // case 1821: CALL(1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1); break;
4481 // case 1822: CALL(0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1); break;
4482 // case 1823: CALL(1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1); break;
4483 // case 1824: CALL(0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1); break;
4484 // case 1825: CALL(1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1); break;
4485 // case 1826: CALL(0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1); break;
4486 // case 1827: CALL(1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1); break;
4487 // case 1828: CALL(0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1); break;
4488 // case 1829: CALL(1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1); break;
4489 // case 1830: CALL(0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1); break;
4490 // case 1831: CALL(1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1); break;
4491 // case 1832: CALL(0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1); break;
4492 // case 1833: CALL(1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1); break;
4493 // case 1834: CALL(0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1); break;
4494 // case 1835: CALL(1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1); break;
4495 // case 1836: CALL(0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1); break;
4496 // case 1837: CALL(1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1); break;
4497 // case 1838: CALL(0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1); break;
4498 // case 1839: CALL(1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1); break;
4499 // case 1840: CALL(0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1); break;
4500 // case 1841: CALL(1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1); break;
4501 // case 1842: CALL(0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1); break;
4502 // case 1843: CALL(1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1); break;
4503 // case 1844: CALL(0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1); break;
4504 // case 1845: CALL(1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1); break;
4505 // case 1846: CALL(0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1); break;
4506 // case 1847: CALL(1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1); break;
4507 // case 1848: CALL(0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1); break;
4508 // case 1849: CALL(1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1); break;
4509 // case 1850: CALL(0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1); break;
4510 // case 1851: CALL(1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1); break;
4511 // case 1852: CALL(0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1); break;
4512 // case 1853: CALL(1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1); break;
4513 // case 1854: CALL(0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1); break;
4514 // case 1855: CALL(1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1); break;
4515 // case 1856: CALL(0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1); break;
4516 // case 1857: CALL(1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1); break;
4517 // case 1858: CALL(0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1); break;
4518 // case 1859: CALL(1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1); break;
4519 // case 1860: CALL(0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1); break;
4520 // case 1861: CALL(1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1); break;
4521 // case 1862: CALL(0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1); break;
4522 // case 1863: CALL(1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1); break;
4523 // case 1864: CALL(0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1); break;
4524 // case 1865: CALL(1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1); break;
4525 // case 1866: CALL(0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1); break;
4526 // case 1867: CALL(1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1); break;
4527 // case 1868: CALL(0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1); break;
4528 // case 1869: CALL(1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1); break;
4529 // case 1870: CALL(0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1); break;
4530 // case 1871: CALL(1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1); break;
4531 // case 1872: CALL(0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1); break;
4532 // case 1873: CALL(1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1); break;
4533 // case 1874: CALL(0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1); break;
4534 // case 1875: CALL(1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1); break;
4535 // case 1876: CALL(0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1); break;
4536 // case 1877: CALL(1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1); break;
4537 // case 1878: CALL(0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1); break;
4538 // case 1879: CALL(1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1); break;
4539 // case 1880: CALL(0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1); break;
4540 // case 1881: CALL(1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1); break;
4541 // case 1882: CALL(0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1); break;
4542 // case 1883: CALL(1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1); break;
4543 // case 1884: CALL(0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1); break;
4544 // case 1885: CALL(1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1); break;
4545 // case 1886: CALL(0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1); break;
4546 // case 1887: CALL(1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1); break;
4547 // case 1888: CALL(0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1); break;
4548 // case 1889: CALL(1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1); break;
4549 // case 1890: CALL(0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1); break;
4550 // case 1891: CALL(1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1); break;
4551 // case 1892: CALL(0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1); break;
4552 // case 1893: CALL(1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1); break;
4553 // case 1894: CALL(0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1); break;
4554 // case 1895: CALL(1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1); break;
4555 // case 1896: CALL(0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1); break;
4556 // case 1897: CALL(1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1); break;
4557 // case 1898: CALL(0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1); break;
4558 // case 1899: CALL(1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1); break;
4559 // case 1900: CALL(0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1); break;
4560 // case 1901: CALL(1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1); break;
4561 // case 1902: CALL(0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1); break;
4562 // case 1903: CALL(1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1); break;
4563 // case 1904: CALL(0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1); break;
4564 // case 1905: CALL(1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1); break;
4565 // case 1906: CALL(0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1); break;
4566 // case 1907: CALL(1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1); break;
4567 // case 1908: CALL(0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1); break;
4568 // case 1909: CALL(1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1); break;
4569 // case 1910: CALL(0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1); break;
4570 // case 1911: CALL(1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1); break;
4571 // case 1912: CALL(0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1); break;
4572 // case 1913: CALL(1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1); break;
4573 // case 1914: CALL(0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1); break;
4574 // case 1915: CALL(1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1); break;
4575 // case 1916: CALL(0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1); break;
4576 // case 1917: CALL(1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1); break;
4577 // case 1918: CALL(0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1); break;
4578 // case 1919: CALL(1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1); break;
4579 // case 1920: CALL(0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); break;
4580 // case 1921: CALL(1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); break;
4581 // case 1922: CALL(0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1); break;
4582 // case 1923: CALL(1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1); break;
4583 // case 1924: CALL(0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1); break;
4584 // case 1925: CALL(1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1); break;
4585 // case 1926: CALL(0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1); break;
4586 // case 1927: CALL(1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1); break;
4587 // case 1928: CALL(0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1); break;
4588 // case 1929: CALL(1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1); break;
4589 // case 1930: CALL(0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1); break;
4590 // case 1931: CALL(1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1); break;
4591 // case 1932: CALL(0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1); break;
4592 // case 1933: CALL(1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1); break;
4593 // case 1934: CALL(0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1); break;
4594 // case 1935: CALL(1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1); break;
4595 // case 1936: CALL(0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1); break;
4596 // case 1937: CALL(1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1); break;
4597 // case 1938: CALL(0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1); break;
4598 // case 1939: CALL(1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1); break;
4599 // case 1940: CALL(0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1); break;
4600 // case 1941: CALL(1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1); break;
4601 // case 1942: CALL(0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1); break;
4602 // case 1943: CALL(1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1); break;
4603 // case 1944: CALL(0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1); break;
4604 // case 1945: CALL(1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1); break;
4605 // case 1946: CALL(0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1); break;
4606 // case 1947: CALL(1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1); break;
4607 // case 1948: CALL(0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1); break;
4608 // case 1949: CALL(1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1); break;
4609 // case 1950: CALL(0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1); break;
4610 // case 1951: CALL(1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1); break;
4611 // case 1952: CALL(0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1); break;
4612 // case 1953: CALL(1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1); break;
4613 // case 1954: CALL(0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1); break;
4614 // case 1955: CALL(1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1); break;
4615 // case 1956: CALL(0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1); break;
4616 // case 1957: CALL(1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1); break;
4617 // case 1958: CALL(0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1); break;
4618 // case 1959: CALL(1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1); break;
4619 // case 1960: CALL(0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1); break;
4620 // case 1961: CALL(1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1); break;
4621 // case 1962: CALL(0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1); break;
4622 // case 1963: CALL(1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1); break;
4623 // case 1964: CALL(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1); break;
4624 // case 1965: CALL(1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1); break;
4625 // case 1966: CALL(0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1); break;
4626 // case 1967: CALL(1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1); break;
4627 // case 1968: CALL(0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1); break;
4628 // case 1969: CALL(1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1); break;
4629 // case 1970: CALL(0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1); break;
4630 // case 1971: CALL(1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1); break;
4631 // case 1972: CALL(0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1); break;
4632 // case 1973: CALL(1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1); break;
4633 // case 1974: CALL(0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1); break;
4634 // case 1975: CALL(1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1); break;
4635 // case 1976: CALL(0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1); break;
4636 // case 1977: CALL(1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1); break;
4637 // case 1978: CALL(0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1); break;
4638 // case 1979: CALL(1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1); break;
4639 // case 1980: CALL(0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1); break;
4640 // case 1981: CALL(1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1); break;
4641 // case 1982: CALL(0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1); break;
4642 // case 1983: CALL(1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1); break;
4643 // case 1984: CALL(0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1); break;
4644 // case 1985: CALL(1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1); break;
4645 // case 1986: CALL(0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1); break;
4646 // case 1987: CALL(1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1); break;
4647 // case 1988: CALL(0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1); break;
4648 // case 1989: CALL(1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1); break;
4649 // case 1990: CALL(0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1); break;
4650 // case 1991: CALL(1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1); break;
4651 // case 1992: CALL(0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1); break;
4652 // case 1993: CALL(1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1); break;
4653 // case 1994: CALL(0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1); break;
4654 // case 1995: CALL(1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1); break;
4655 // case 1996: CALL(0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1); break;
4656 // case 1997: CALL(1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1); break;
4657 // case 1998: CALL(0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1); break;
4658 // case 1999: CALL(1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1); break;
4659 // case 2000: CALL(0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1); break;
4660 // case 2001: CALL(1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1); break;
4661 // case 2002: CALL(0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1); break;
4662 // case 2003: CALL(1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1); break;
4663 // case 2004: CALL(0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1); break;
4664 // case 2005: CALL(1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1); break;
4665 // case 2006: CALL(0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1); break;
4666 // case 2007: CALL(1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1); break;
4667 // case 2008: CALL(0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1); break;
4668 // case 2009: CALL(1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1); break;
4669 // case 2010: CALL(0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1); break;
4670 // case 2011: CALL(1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1); break;
4671 // case 2012: CALL(0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1); break;
4672 // case 2013: CALL(1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1); break;
4673 // case 2014: CALL(0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1); break;
4674 // case 2015: CALL(1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1); break;
4675 // case 2016: CALL(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1); break;
4676 // case 2017: CALL(1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1); break;
4677 // case 2018: CALL(0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1); break;
4678 // case 2019: CALL(1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1); break;
4679 // case 2020: CALL(0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1); break;
4680 // case 2021: CALL(1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1); break;
4681 // case 2022: CALL(0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1); break;
4682 // case 2023: CALL(1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1); break;
4683 // case 2024: CALL(0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1); break;
4684 // case 2025: CALL(1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1); break;
4685 // case 2026: CALL(0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1); break;
4686 // case 2027: CALL(1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1); break;
4687 // case 2028: CALL(0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1); break;
4688 // case 2029: CALL(1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1); break;
4689 // case 2030: CALL(0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1); break;
4690 // case 2031: CALL(1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1); break;
4691 // case 2032: CALL(0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1); break;
4692 // case 2033: CALL(1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1); break;
4693 // case 2034: CALL(0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1); break;
4694 // case 2035: CALL(1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1); break;
4695 // case 2036: CALL(0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1); break;
4696 // case 2037: CALL(1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1); break;
4697 // case 2038: CALL(0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1); break;
4698 // case 2039: CALL(1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1); break;
4699 // case 2040: CALL(0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); break;
4700 // case 2041: CALL(1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); break;
4701 // case 2042: CALL(0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1); break;
4702 // case 2043: CALL(1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1); break;
4703 // case 2044: CALL(0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1); break;
4704 // case 2045: CALL(1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1); break;
4705 // case 2046: CALL(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); break;
4706 // case 2047: CALL(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); break;
4711 "doEnergy=%d doVirial=%d doSlow=%d doPairlist=%d "
4712 "doAlch=%d doFEP=%d doTI=%d doStreaming=%d doTable=%d "
4714 doEnergy, doVirial, doSlow, doPairlist, doAlch, doFEP, doTI,
4715 doStreaming, doTable, options);
4718 std::string call_options;
4719 call_options += "doEnergy = " + std::to_string(int(doEnergy));
4720 call_options += ", doVirial = " + std::to_string(int(doVirial));
4721 call_options += ", doSlow = " + std::to_string(int(doSlow));
4722 call_options += ", doPairlist = " + std::to_string(int(doPairlist));
4723 call_options += ", doAlch = " + std::to_string(int(doAlch));
4724 call_options += ", doFEP = " + std::to_string(int(doFEP));
4725 call_options += ", doTI = " + std::to_string(int(doTI));
4726 call_options += ", doStreaming = " + std::to_string(int(doStreaming));
4727 call_options += ", doTable = " + std::to_string(int(doTable));
4728 call_options += ", doAlchVdwForceSwitching = " + std::to_string(int(doAlchVdwForceSwitching));
4729 call_options += ", doNbThole = " + std::to_string(int(doNbThole));
4730 const std::string error = "CudaComputeNonbondedKernel::nonbondedForce, none of the kernels called. Options are:\n" + call_options;
4731 NAMD_bug(error.c_str());
4740 cudaCheck(cudaGetLastError());
4742 start += nblock*nwarp;
4744 if ( doVirial || ! doStreaming ){
4746 int grid = (atomStorageSize + block - 1)/block;
4748 transposeForcesKernel<1><<<grid, block, 0, stream>>>(d_forces, d_forcesSlow,
4749 force_x, force_y, force_z, force_w,
4750 forceSlow_x, forceSlow_y, forceSlow_z, forceSlow_w,
4753 transposeForcesKernel<0><<<grid, block, 0, stream>>>(d_forces, d_forcesSlow,
4754 force_x, force_y, force_z, force_w,
4755 forceSlow_x, forceSlow_y, forceSlow_z, forceSlow_w,
4759 cudaCheck(cudaStreamSynchronize(stream));
4761 // Haochuan: I add atomIndex to the output and also debug the slow forces.
4762 // XXX TODO: ERASE THIS AFTERWARDS
4763 // this is not numAtoms, this is something else
4764 // will print the force inside the compute and afterwards
4768 allocate_host<float4>(&h_f, atomStorageSize);
4769 allocate_host<float4>(&h_f_slow, atomStorageSize);
4770 allocate_host<int>(&h_index, atomStorageSize);
4771 copy_DtoH_sync<float4>(d_forces, h_f, atomStorageSize);
4772 copy_DtoH_sync<float4>(d_forcesSlow, h_f_slow, atomStorageSize);
4773 copy_DtoH_sync<int>(atomIndex, h_index, atomStorageSize);
4775 FILE* pos_nb_f = fopen("compute_nb_dforces.txt", "a+");
4776 fprintf(pos_nb_f, "forces after kernel\n");
4777 // I'm gonna copy back the forces and just print them
4779 for(int i = 0; i < atomStorageSize; i++){
4780 //for(int i = 83000; i < 85000; i++){
4781 fprintf(pos_nb_f, "%10d %10.5f %10.5f %10.5f\n", h_index[i], h_f[i].x,
4782 h_f[i].y, h_f[i].z);
4786 FILE* pos_slow_f = fopen("compute_slow_dforces.txt", "a+");
4787 for(int i = 0; i < atomStorageSize; i++){
4788 fprintf(pos_slow_f, "%10d %10.5f %10.5f %10.5f\n", h_index[i], h_f_slow[i].x,
4789 h_f_slow[i].y, h_f_slow[i].z);
4792 deallocate_host(&h_f_slow);
4795 deallocate_host<int>(&h_index);
4796 deallocate_host<float4>(&h_f);
4802 // Perform virial and energy reductions for non-bonded force calculation
4804 void CudaComputeNonbondedKernel::reduceVirialEnergy(CudaTileListKernel& tlKernel,
4805 const int atomStorageSize, const bool doEnergy, const bool doVirial, const bool doSlow, const bool doGBIS,
4806 float4* d_forces, float4* d_forcesSlow,
4807 VirialEnergy* d_virialEnergy, cudaStream_t stream) {
4809 if (doEnergy || doVirial) {
4810 clear_device_array<VirialEnergy>(d_virialEnergy, 1, stream);
4815 int nthread = REDUCENONBONDEDVIRIALKERNEL_NUM_WARP*WARPSIZE;
4816 int nblock = min(deviceCUDA->getMaxNumBlocks(), (atomStorageSize-1)/nthread+1);
4817 reduceNonbondedVirialKernel <<< nblock, nthread, 0, stream >>>
4818 (doSlow, atomStorageSize, tlKernel.get_xyzq(), d_forces, d_forcesSlow, d_virialEnergy);
4819 cudaCheck(cudaGetLastError());
4822 if (doVirial || doEnergy)
4824 int nthread = REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE;
4825 int nblock = min(deviceCUDA->getMaxNumBlocks(), (tlKernel.getTileListVirialEnergyLength()-1)/nthread+1);
4826 reduceVirialEnergyKernel <<< nblock, nthread, 0, stream >>>
4827 (doEnergy, doVirial, doSlow, tlKernel.getTileListVirialEnergyLength(), tlKernel.getTileListVirialEnergy(), d_virialEnergy);
4828 cudaCheck(cudaGetLastError());
4831 if (doGBIS && doEnergy)
4833 int nthread = REDUCEGBISENERGYKERNEL_NUM_WARP*WARPSIZE;
4834 int nblock = min(deviceCUDA->getMaxNumBlocks(), (tlKernel.getTileListVirialEnergyGBISLength()-1)/nthread+1);
4835 reduceGBISEnergyKernel <<< nblock, nthread, 0, stream >>>
4836 (tlKernel.getTileListVirialEnergyGBISLength(), tlKernel.getTileListVirialEnergy(), d_virialEnergy);
4837 cudaCheck(cudaGetLastError());
4842 void CudaComputeNonbondedKernel::bindExclusions(int numExclusions, unsigned int* exclusion_bits) {
4843 int nconst = ( numExclusions < MAX_CONST_EXCLUSIONS ? numExclusions : MAX_CONST_EXCLUSIONS );
4844 cudaCheck(cudaMemcpyToSymbol(constExclusions, exclusion_bits, nconst*sizeof(unsigned int), 0));
4846 reallocate_device<unsigned int>(&overflowExclusions, &overflowExclusionsSize, numExclusions);
4847 copy_HtoD_sync<unsigned int>(exclusion_bits, overflowExclusions, numExclusions);
4851 void CudaComputeNonbondedKernel::setExclusionsByAtom(int2* h_data, const int num_atoms) {
4852 // Global data structure shouldn't be reallocated
4853 if (d_exclusionsByAtom == NULL) allocate_device<int2>(&d_exclusionsByAtom, num_atoms);
4854 copy_HtoD_sync<int2>(h_data, d_exclusionsByAtom, num_atoms);
4859 template<bool kDoAlch>
4860 __global__ void updateVdwTypesExclKernel(
4861 const int numPatches,
4862 const CudaLocalRecord* localRecords,
4863 const int* global_vdwTypes,
4864 const int* global_id,
4865 const int* patchSortOrder,
4866 const int2* exclusionsByAtom,
4867 const int* global_partition,
4873 __shared__ CudaLocalRecord s_record;
4874 using AccessType = int32_t;
4875 AccessType* s_record_buffer = (AccessType*) &s_record;
4877 for (int patchIndex = blockIdx.x; patchIndex < numPatches; patchIndex += gridDim.x) {
4878 // Read in the CudaLocalRecord using multiple threads. This should
4880 for (int i = threadIdx.x; i < sizeof(CudaLocalRecord)/sizeof(AccessType); i += blockDim.x) {
4881 s_record_buffer[i] = ((AccessType*) &(localRecords[patchIndex]))[i];
4885 const int numAtoms = s_record.numAtoms;
4886 const int offset = s_record.bufferOffset;
4887 const int offsetNB = s_record.bufferOffsetNBPad;
4889 for (int i = threadIdx.x; i < numAtoms; i += blockDim.x) {
4890 const int order = patchSortOrder[offset + i];
4891 const int id = global_id[offset + order];
4892 vdwTypes [offsetNB + i] = global_vdwTypes[offset + order];
4893 atomIndex [offsetNB + i] = id;
4894 exclusions[offsetNB + i].x = exclusionsByAtom[id].y;
4895 exclusions[offsetNB + i].y = exclusionsByAtom[id].x;
4897 part [offsetNB + i] = global_partition[offset + order];
4905 void CudaComputeNonbondedKernel::updateVdwTypesExclOnGPU(CudaTileListKernel& tlKernel,
4906 const int numPatches, const int atomStorageSize, const bool alchOn,
4907 CudaLocalRecord* localRecords,
4908 const int* d_vdwTypes, const int* d_id, const int* d_sortOrder,
4909 const int* d_partition,
4912 reallocate_device<int>(&vdwTypes, &vdwTypesSize, atomStorageSize, OVERALLOC);
4913 reallocate_device<int2>(&exclIndexMaxDiff, &exclIndexMaxDiffSize, atomStorageSize, OVERALLOC);
4914 reallocate_device<int>(&atomIndex, &atomIndexSize, atomStorageSize, OVERALLOC);
4916 const int numBlocks = numPatches;
4917 const int numThreads = 512;
4920 updateVdwTypesExclKernel<true><<<numBlocks, numThreads, 0, stream>>>(
4921 numPatches, localRecords,
4922 d_vdwTypes, d_id, d_sortOrder, d_exclusionsByAtom, d_partition,
4923 vdwTypes, atomIndex, exclIndexMaxDiff, tlKernel.get_part()
4926 updateVdwTypesExclKernel<false><<<numBlocks, numThreads, 0, stream>>>(
4927 numPatches, localRecords,
4928 d_vdwTypes, d_id, d_sortOrder, d_exclusionsByAtom, d_partition,
4929 vdwTypes, atomIndex, exclIndexMaxDiff, tlKernel.get_part()
4934 void CudaComputeNonbondedKernel::updateDrudeData(
4935 const int atomStorageSize, const float* h_drudeAtomAlpha,
4936 const int *h_isDrude, cudaStream_t stream) {
4937 reallocate_device(&drudeAtomAlpha, &drudeAtomAlphaSize, atomStorageSize, OVERALLOC);
4938 reallocate_device(&isDrude, &isDrudeSize, atomStorageSize, OVERALLOC);
4939 copy_HtoD(h_drudeAtomAlpha, drudeAtomAlpha, atomStorageSize, stream);
4940 copy_HtoD(h_isDrude, isDrude, atomStorageSize, stream);