1 #include "SimParameters.h"
3 #include <hip/hip_runtime.h>
4 #include <hipcub/hipcub.hpp>
8 #include "HipDefines.h"
9 #include "CudaComputeNonbondedKernel.hip.h"
10 #include "CudaTileListKernel.hip.h"
11 #include "DeviceCUDA.h"
12 #include "CudaComputeNonbondedInteractions.h"
17 #define __thread __declspec(thread)
19 extern __thread DeviceCUDA *deviceCUDA;
21 #define NONBONDKERNEL_NUM_WARP 1
22 #define REDUCENONBONDEDVIRIALKERNEL_NUM_WARP 4
23 #define REDUCEVIRIALENERGYKERNEL_NUM_WARP 4
24 #define REDUCEGBISENERGYKERNEL_NUM_WARP 4
26 #define NONBONDKERNEL_SWEEPS_PER_TILE WARPSIZE/BOUNDINGBOXSIZE
28 #define OVERALLOC 1.2f
30 void NAMD_die(const char *);
31 void NAMD_bug(const char *);
33 #define MAX_CONST_EXCLUSIONS 2048 // cache size is 8k
34 __constant__ unsigned int constExclusions[MAX_CONST_EXCLUSIONS];
37 __constant__ AlchData alchflags;
41 typedef float __attribute__((ext_vector_type(2))) Native_float2_;
45 struct __attribute__((packed)) { Native_float2_ dxy; float dz; };
46 struct { float x, y, z; };
50 fast_float3() = default;
53 fast_float3(float x_, float y_, float z_) : dxy{ x_, y_ }, dz{ z_ } {}
56 fast_float3(Native_float2_ xy_, float z_) : dxy{ xy_ }, dz{ z_ } {}
59 operator float3() const
61 return float3{ x, y, z };
64 __forceinline__ __host__ __device__
65 fast_float3& operator=(float4 y)
73 static_assert(sizeof(fast_float3) == 12);
75 __forceinline__ __host__ __device__
76 fast_float3 operator*(fast_float3 x, fast_float3 y)
78 return fast_float3{ x.dxy * y.dxy, x.dz * y.dz };
81 __forceinline__ __host__ __device__
82 fast_float3 operator*(fast_float3 x, float y)
84 return fast_float3{ x.dxy * y, x.dz * y };
87 __forceinline__ __host__ __device__
88 fast_float3 operator*(float x, fast_float3 y)
90 return fast_float3{ x * y.dxy, x * y.dz };
93 __forceinline__ __host__ __device__
94 fast_float3 operator+(fast_float3 x, fast_float3 y)
96 return fast_float3{ x.dxy + y.dxy, x.dz + y.dz };
99 __forceinline__ __host__ __device__
100 fast_float3 operator-(fast_float3 x, fast_float3 y)
102 return fast_float3{ x.dxy - y.dxy, x.dz - y.dz };
106 static __forceinline__ __host__ __device__ fast_float3 make_fast_float3(const float x)
108 return fast_float3{ x, x, x };
111 static __forceinline__ __host__ __device__ fast_float3 make_fast_float3(const float3 x)
113 return fast_float3{ x.x, x.y, x.z };
116 static __forceinline__ __host__ __device__ fast_float3 make_fast_float3(const float4 x)
118 return fast_float3{ x.x, x.y, x.z };
121 static __forceinline__ __host__ __device__ float norm2(fast_float3 a)
123 fast_float3 b = a * a;
124 return (b.x + b.y + b.z);
127 static __forceinline__ __host__ __device__ float4 make_float4(fast_float3 a)
129 return make_float4(a.x, a.y, a.z, 0.f);
133 #ifndef USE_TABLE_ARRAYS
134 __device__ __forceinline__
135 float4 sampleTableTex(cudaTextureObject_t tex, float k) {
136 const int tableSize = FORCE_ENERGY_TABLE_SIZE;
137 const float x = k * (float)tableSize - 0.5f;
138 const float f = floorf(x);
139 const float a = x - f;
140 const unsigned int i = (unsigned int)f;
141 const int i0 = i < tableSize - 1 ? i : tableSize - 1;
142 const int i1 = i0 + 1;
143 const float4 t0 = tex1Dfetch<float4>(tex, i0);
144 const float4 t1 = tex1Dfetch<float4>(tex, i1);
146 a * (t1.x - t0.x) + t0.x,
147 a * (t1.y - t0.y) + t0.y,
148 a * (t1.z - t0.z) + t0.z,
149 a * (t1.w - t0.w) + t0.w);
152 __device__ __forceinline__
153 float4 sampleTableTexInv(cudaTextureObject_t tex, float k) {
154 const int tableSize = FORCE_ENERGY_TABLE_SIZE;
155 const float x = k * (float)tableSize - 0.5f;
156 const float f = floorf(x);
157 const float a = x - f;
158 const unsigned int i = (unsigned int)f;
159 const int i0 = i < tableSize - 1 ? i : tableSize - 1;
160 const int i1 = i0 + 1;
161 const float4 t0 = tex1Dfetch<float4>(tex, i0);
162 const float4 t1 = tex1Dfetch<float4>(tex, i1);
163 float4 t2 = (t1 - t0) * a + t0;
164 return make_float4(t2.z, t2.y, t2.x, t2.w);
171 // HIP implementation of tex1D has lower performance than this custom implementation of
174 //#define USE_TABLE_ARRAYS
178 static __forceinline__ __device__ const T& fast_load(const T* buffer, unsigned int idx, unsigned int offset = 0)
180 return *reinterpret_cast<const T*>(reinterpret_cast<const char*>(buffer) + idx * static_cast<unsigned int>(sizeof(T)) + offset * static_cast<unsigned int>(sizeof(T)));
183 __device__ __forceinline__
184 float4 tableLookup(const float4* table, const float k)
186 const int tableSize = FORCE_ENERGY_TABLE_SIZE;
187 const float x = k * static_cast<float>(tableSize) - 0.5f;
188 const float f = floorf(x);
189 const float a = x - f;
190 const int i = static_cast<int>(f);
191 const int i0 = i < tableSize - 1 ? i : tableSize - 1;
192 const int i1 = i0 + 1;
193 // const float4 t0 = __ldg(&table[i0]);
194 // const float4 t1 = __ldg(&table[i1]);
195 const float4 t0 = fast_load(table, i0);
196 const float4 t1 = fast_load(table, i1);
198 a * (t1.x - t0.x) + t0.x,
199 a * (t1.y - t0.y) + t0.y,
200 a * (t1.z - t0.z) + t0.z,
201 a * (t1.w - t0.w) + t0.w);
204 __device__ __forceinline__
205 float4 tableLookupInv(const float4* table, const float k)
207 constexpr int tableSize = FORCE_ENERGY_TABLE_SIZE;
208 const float x = k * static_cast<float>(tableSize) - 0.5f;
209 const float f = floorf(x);
210 const float a = x - f;
211 const int i = static_cast<int>(f);
212 const int i0 = i < tableSize - 1 ? i : tableSize - 1;
213 const int i1 = i0 + 1;
214 // const float4 t0 = __ldg(&table[i0]);
215 // const float4 t1 = __ldg(&table[i1]);
216 const float4 t0 = fast_load(table, i0);
217 const float4 t1 = fast_load(table, i1);
218 float4 t2 = (t1 - t0) * a + t0;
219 return make_float4(t2.z, t2.y, t2.x, t2.w);
223 template<bool doEnergy, bool doSlow>
224 __device__ __forceinline__
225 void calcForceEnergy(const float r2, const float qi, const float qj,
226 const float dx, const float dy, const float dz,
227 const int vdwtypei, const int vdwtypej,
228 #ifdef USE_TABLE_ARRAYS
229 const float2* __restrict__ vdwCoefTable,
230 const float4* __restrict__ forceTable,
231 const float4* __restrict__ energyTable,
233 cudaTextureObject_t vdwCoefTableTex,
234 cudaTextureObject_t forceTableTex,
235 cudaTextureObject_t energyTableTex,
237 fast_float3& iforce, fast_float3& iforceSlow, fast_float3& jforce, fast_float3& jforceSlow,
238 float& energyVdw, float& energyElec, float& energySlow) {
240 int vdwIndex = vdwtypej + vdwtypei;
242 #if defined(USE_TABLE_ARRAYS)
243 // float2 ljab = vdwCoefTable[vdwIndex];
244 float2 ljab = fast_load(vdwCoefTable, vdwIndex);
246 float2 ljab = tex1Dfetch<float2>(vdwCoefTableTex, vdwIndex);
248 float rinv = __frsqrt_rn(r2);
251 #if defined(USE_TABLE_ARRAYS)
252 float4 fi = tableLookupInv(forceTable, rinv);
253 if (doEnergy) ei = tableLookup(energyTable, rinv);
255 float4 fi = sampleTableTexInv(forceTableTex, rinv);
256 if (doEnergy) ei = sampleTableTex(energyTableTex, rinv);
259 float fSlow = qi * qj;
260 fast_float3 f3(fi.x, fi.y, fi.z); // no waster registers, we already allocated stuff
261 fast_float3 lj(ljab.x, ljab.y, fSlow);
262 // we want to operate on reverse data here, the order of the float4 needs to be
263 // fast_float3 fi = f.z, f.y, f.x;
264 // fast_float3 lj = ljab.x, ljab.y, fSlow
265 // so if we do fi = fi * lj;
266 // f is the norm(fi) // x y z
268 // float f = ljab.x * fi.z + ljab.y * fi.y + fSlow * fi.x;
270 float f = f3.x + f3.y + f3.z;
273 energyVdw += ljab.x * ei.z + ljab.y * ei.y;
274 energyElec += fSlow * ei.x;
275 if (doSlow) energySlow += fSlow * ei.w;
277 if (doSlow) fSlow *= fi.w;
279 // dx, dy, and dz are already store in contiguous registers -> wrap them in a fast_float3 and operate
280 fast_float3 dr(dx, dy, dz);
281 fast_float3 fr = dr *f;
282 // float fx = dx * f;
283 // float fy = dy * f;
284 // float fz = dz * f;
291 iforce = iforce + fr;
292 jforce = jforce - fr;
295 fast_float3 sr = dr*fSlow;
296 // float fxSlow = dx * fSlow;
297 // float fySlow = dy * fSlow;
298 // float fzSlow = dz * fSlow;
299 // iforceSlow.x += sr.x;
300 // iforceSlow.y += sr.y;
301 // iforceSlow.z += sr.z;
302 // jforceSlow.x -= sr.x;
303 // jforceSlow.y -= sr.y;
304 // jforceSlow.z -= sr.z;
305 iforceSlow = iforceSlow + sr;
306 jforceSlow = jforceSlow - sr;
310 // TODO: NbThole FEP/TI. Is this really useful?
311 template <bool doEnergy>
312 __device__ __forceinline__
313 void calcForceEnergyNbThole(
314 const int vdwtypei, const int vdwtypej,
315 const float r2, const float qi, const float qj,
316 const float dx, const float dy, const float dz,
317 #ifdef USE_TABLE_ARRAYS
318 const float* __restrict drudeNbTholeTijTable,
320 cudaTextureObject_t drudeNbTholeTijTableTex,
322 const float alpha_i, const float alpha_j,
323 float& energyElec, fast_float3& iforce, fast_float3& jforce)
325 #ifdef USE_TABLE_ARRAYS
326 const float tij = drudeNbTholeTijTable[vdwtypej + vdwtypei];
328 const float tij = tex1Dfetch<float>(drudeNbTholeTijTableTex, vdwtypej + vdwtypei);
331 const float aprod = alpha_i * alpha_j;
332 const float rinv = rsqrtf(r2);
333 // cbrtf(rsqrtf(aprod)) = powf(aprod, -1.f/6)
334 const float aa = tij * rsqrtf(aprod);
335 // qi and qj are already scaled by sqrt(COULOMB * ComputeNonbondedUtil::scaling *
336 // ComputeNonbondedUtil::dielectric_1), respectively.
337 const float qqaa = qi * qj;
338 const float auaa = aa / rinv;
339 const float expauaa = expf(-auaa);
340 float polyauaa = 1.0f + 0.5f * auaa;
342 energyElec += qqaa * rinv * (- polyauaa * expauaa);
344 polyauaa = 1.0f + auaa*polyauaa;
345 const float rinv3 = rinv * rinv * rinv;
346 const float f = qqaa * rinv3 * (polyauaa*expauaa);
347 fast_float3 dr(dx, dy, dz);
348 fast_float3 fr = dr *f;
349 iforce = iforce + fr;
350 jforce = jforce - fr;
354 template<bool doEnergy, bool doSlow>
355 __device__ __forceinline__
356 void calcForceEnergyMath(const float r2, const float qi, const float qj,
357 const float dx, const float dy, const float dz,
358 const int vdwtypei, const int vdwtypej,
359 #if defined(USE_TABLE_ARRAYS)
360 const float2* __restrict__ vdwCoefTable,
362 cudaTextureObject_t vdwCoefTableTex,
364 fast_float3& iforce, fast_float3& iforceSlow, fast_float3& jforce, fast_float3& jforceSlow,
365 float& energyVdw, float& energyElec, float& energySlow,
366 const CudaNBConstants nbConstants) {
368 int vdwIndex = vdwtypej + vdwtypei;
370 #if defined(USE_TABLE_ARRAYS)
371 float2 ljab = vdwCoefTable[vdwIndex];
373 float2 ljab = tex1Dfetch<float2>(vdwCoefTableTex, vdwIndex);
377 float rinv = rsqrtf(r2);
379 float charge = qi * qj;
381 cudaNBForceMagCalc_VdwEnergySwitch_PMEC1<doEnergy, doSlow>(
382 r2, rinv, charge, ljab, nbConstants,
383 f, fSlow, energyVdw, energyElec, energySlow);
384 // dx, dy, and dz are already store in contiguous registers -> wrap them in a fast_float3 and operate
385 fast_float3 ff(dx, dy, dz);
388 // float fx = dx * f;
389 // float fy = dy * f;
390 // float fz = dz * f;
391 fast_float3 iff = make_fast_float3(iforce);
392 fast_float3 jff = make_fast_float3(jforce);
408 float fxSlow = dx * fSlow;
409 float fySlow = dy * fSlow;
410 float fzSlow = dz * fSlow;
411 iforceSlow.x += fxSlow;
412 iforceSlow.y += fySlow;
413 iforceSlow.z += fzSlow;
414 jforceSlow.x -= fxSlow;
415 jforceSlow.y -= fySlow;
416 jforceSlow.z -= fzSlow;
420 /* JM: Special __device__ function to compute VDW forces for alchemy.
421 * Partially swiped from ComputeNonbondedFEP.C
423 template<bool doEnergy, bool doSlow, bool shift, bool vdwForceSwitch>
424 __device__ __forceinline__
425 void calcForceEnergyFEP(const float r2, const float qi, const float qj,
426 const float dx, const float dy, const float dz,
427 const int vdwtypei, const int vdwtypej,
429 /*const AlchData &alchflags, */
430 #ifdef USE_TABLE_ARRAYS
431 const float2* __restrict__ vdwCoefTable,
432 const float4* __restrict__ forceTable,
433 const float4* __restrict__ energyTable,
435 cudaTextureObject_t vdwCoefTableTex,
436 cudaTextureObject_t forceTableTex,
437 cudaTextureObject_t energyTableTex,
439 fast_float3& iforce, fast_float3& iforceSlow, fast_float3& jforce, fast_float3& jforceSlow,
440 float& energyVdw, float &energyVdw_s, float& energyElec, float& energySlow,
441 float& energyElec_s, float& energySlow_s) {
444 int vdwIndex = vdwtypej + vdwtypei;
445 #if defined(USE_TABLE_ARRAYS)
446 float2 ljab = fast_load(vdwCoefTable, vdwIndex);
448 float2 ljab = tex1Dfetch<float2>(vdwCoefTableTex, vdwIndex); //ljab.x is A and ljab.y is B
451 float myVdwLambda = 0.0f;
452 float myVdwLambda2 = 0.0f;
453 float myElecLambda = 0.0f;
454 float myElecLambda2 = 0.0f;
455 float rinv = __frsqrt_rn(r2);
457 float alch_vdw_energy = 0.0f;
458 float alch_vdw_energy_2 = 0.0f;
459 float alch_vdw_force = 0.0f;
460 float fSlow = qi * qj;
463 #ifdef USE_TABLE_ARRAYS
464 float4 fi = tableLookup(forceTable, rinv);
465 if (doEnergy) ei = tableLookup(energyTable, rinv);
467 float4 fi = sampleTableTex(forceTableTex, rinv);
468 if (doEnergy) ei = sampleTableTex(energyTableTex, rinv);
472 //John said that there is a better way to avoid divergences here
473 //alch: true if => 1-0, 1-1, 2-0, 2-2
474 //dec: true if => 1-1, 2-2 && decouple
475 //up: true if => 1-0 && 1,1
476 //down: true if => 2-0, && 2,2
477 int ref = (p1 == 0 && p2 == 0);
478 int alch = (!ref && !(p1 == 1 && p2 ==2) && !(p1 == 2 && p2 == 1));
479 int dec = (alch && (p1 == p2) && alchflags.alchDecouple);
480 int up = (alch && (p1 == 1 || p2 == 1) && !dec);
481 int down = (alch && (p1 == 2 || p2 == 2) && !dec);
486 /*--------------- VDW SPECIAL ALCH FORCES (Swiped from ComputeNonbondedFEP.C) ---------------*/
488 myVdwLambda = alchflags.vdwLambdaUp*(up) + alchflags.vdwLambdaDown*(down) + 1.f*(ref || dec);
489 myVdwLambda2 = alchflags.vdwLambda2Up*(up) + alchflags.vdwLambda2Down*(down) + 1.f*(ref || dec);
490 myElecLambda = alchflags.elecLambdaUp*(up) + alchflags.elecLambdaDown*(down) + 1.f*(ref || dec);
491 myElecLambda2 = alchflags.elecLambda2Up*(up) + alchflags.elecLambda2Down*(down) + 1.f*(ref || dec);
494 if (vdwForceSwitch) {
496 float switchdist6_1, switchdist6_2;
497 const float cutoff6 = alchflags.cutoff2 * alchflags.cutoff2 * alchflags.cutoff2;
499 //Templated parameter. No control divergence here
501 const float myVdwShift = alchflags.vdwShiftUp*up + alchflags.vdwShiftDown*(!up);
502 const float myVdwShift2 = alchflags.vdwShift2Up*up + alchflags.vdwShift2Down*(!up);
503 r2_1 = __fdividef(1.f,(r2 + myVdwShift));
504 r2_2 = __fdividef(1.f,(r2 + myVdwShift2));
505 switchdist6_1 = alchflags.switchdist2 + myVdwShift;
506 switchdist6_1 = switchdist6_1 * switchdist6_1 * switchdist6_1;
507 switchdist6_2 = alchflags.switchdist2 + myVdwShift2;
508 switchdist6_2 = switchdist6_2 * switchdist6_2 * switchdist6_2;
512 switchdist6_1 = alchflags.switchdist2 * alchflags.switchdist2 * alchflags.switchdist2;
513 switchdist6_2 = switchdist6_1;
515 const float r6_1 = r2_1*r2_1*r2_1;
516 const float r6_2 = r2_2*r2_2*r2_2;
517 if (r2 <= alchflags.switchdist2) {
518 const float U1 = ljab.x*r6_1*r6_1 - ljab.y*r6_1; // NB: unscaled, shorthand only!
519 const float U2 = ljab.x*r6_2*r6_2 - ljab.y*r6_2;
520 // A == ljab.x, B == ljab.y
521 const float dU_1 = -ljab.x / (cutoff6 * switchdist6_1) - (-ljab.y * rsqrtf(cutoff6 * switchdist6_1));
522 const float dU_2 = -ljab.x / (cutoff6 * switchdist6_2) - (-ljab.y * rsqrtf(cutoff6 * switchdist6_2));
523 alch_vdw_energy = myVdwLambda * (U1 + dU_1);
524 alch_vdw_energy_2 = myVdwLambda2 * (U2 + dU_2);
526 //Multiplied by -1.0 to match CPU values
527 alch_vdw_force =-1.f*myVdwLambda*((12.f*U1 + 6.f*ljab.y*r6_1)*r2_1);
529 const float r3_1 = sqrtf(r6_1);
530 const float r3_2 = sqrtf(r6_2);
531 const float inv_cutoff6 = 1.0f / cutoff6;
532 const float inv_cutoff3 = rsqrtf(cutoff6);
533 // A == ljab.x, B == ljab.y
534 const float k_vdwa_1 = ljab.x / (1.0f - switchdist6_1 * inv_cutoff6);
535 const float k_vdwb_1 = ljab.y / (1.0f - sqrtf(switchdist6_1 * inv_cutoff6));
536 const float k_vdwa_2 = ljab.x / (1.0f - switchdist6_2 * inv_cutoff6);
537 const float k_vdwb_2 = ljab.y / (1.0f - sqrtf(switchdist6_2 * inv_cutoff6));
538 const float tmpa_1 = r6_1 - inv_cutoff6;
539 const float tmpb_1 = r3_1 - inv_cutoff3;
540 const float tmpa_2 = r6_2 - inv_cutoff6;
541 const float tmpb_2 = r3_2 - inv_cutoff3;
542 alch_vdw_energy = myVdwLambda * (k_vdwa_1 * tmpa_1 * tmpa_1 - k_vdwb_1 * tmpb_1 * tmpb_1);
543 alch_vdw_energy_2 = myVdwLambda2 * (k_vdwa_2 * tmpa_2 * tmpa_2 - k_vdwb_2 * tmpb_2 * tmpb_2);
544 //Multiplied by -1.0 to match CPU values
545 alch_vdw_force = -1.0f * myVdwLambda * (6.0f * r2_1 * (2.0f * k_vdwa_1 * tmpa_1 * r6_1 - k_vdwb_1 * tmpb_1 * r3_1));
546 } // r2 <= alchflags.switchdist2
548 // potential switching
549 const float diff = alchflags.cutoff2 - r2;
551 const float switchmul = (alchflags.switchfactor*(diff)*(diff)*(alchflags.cutoff2 - 3.f*alchflags.switchdist2 + 2.f*r2))*(r2 > alchflags.switchdist2) + (1.f)*(r2 <= alchflags.switchdist2);
552 const float switchmul2 = (12.f*alchflags.switchfactor*(diff)*(r2 - alchflags.switchdist2))*(r2 > alchflags.switchdist2) + (0.f) * (r2 <= alchflags.switchdist2);
554 //Templated parameter. No control divergence here
556 //This templated parameter lets me get away with not making 2 divisions. But for myVdwShift != 0, how do I do this?
557 const float myVdwShift = alchflags.vdwShiftUp*up + alchflags.vdwShiftDown*(!up);
558 const float myVdwShift2 = alchflags.vdwShift2Up*up + alchflags.vdwShift2Down*(!up);
559 //r2_1 = 1.0/(r2 + myVdwShift);
560 //r2_2 = 1.0/(r2 + myVdwShift2);
561 r2_1 = __fdividef(1.f,(r2 + myVdwShift));
562 r2_2 = __fdividef(1.f,(r2 + myVdwShift2));
568 const float r6_1 = r2_1*r2_1*r2_1;
569 const float r6_2 = r2_2*r2_2*r2_2;
570 const float U1 = ljab.x*r6_1*r6_1 - ljab.y*r6_1; // NB: unscaled, shorthand only!
571 const float U2 = ljab.x*r6_2*r6_2 - ljab.y*r6_2;
572 alch_vdw_energy = myVdwLambda*switchmul*U1;
573 alch_vdw_energy_2 = myVdwLambda2*switchmul*U2;
575 //Multiplied by -1.0 to match CPU values
576 alch_vdw_force =-1.f*myVdwLambda*((switchmul*(12.f*U1 + 6.f*ljab.y*r6_1)*r2_1+ switchmul2*U1));
580 /*-----------------------------------------------------------*/
583 //All energies should be scaled by the corresponding lambda
584 energyVdw += (ljab.x * ei.z + ljab.y * ei.y)*(ref || dec) + alch_vdw_energy*(alch && !dec);
585 energyElec += (fSlow * ei.x)*myElecLambda;
586 energyVdw_s += (ljab.x * ei.z + ljab.y * ei.y)*(ref || dec) + alch_vdw_energy_2*(alch && !dec);
587 energyElec_s += (fSlow * ei.x)*myElecLambda2;
589 energySlow += (fSlow * ei.w)*myElecLambda;
590 energySlow_s += (fSlow * ei.w)*myElecLambda2;
594 if (doSlow) fSlow *= fi.w;
596 //We should include the regular VDW forces if not dealing with alch pairs
597 f = (f + ((ljab.x * fi.z + ljab.y * fi.y)*(!alch || dec)))*myElecLambda
598 + alch_vdw_force*(alch && !dec);
612 /*There's stuff that needs to be added here, when FAST AND NOSHORT macros are on*/
613 fSlow = myElecLambda*fSlow;
614 float fxSlow = dx * fSlow;
615 float fySlow = dy * fSlow;
616 float fzSlow = dz * fSlow;
617 iforceSlow.x += fxSlow;
618 iforceSlow.y += fySlow;
619 iforceSlow.z += fzSlow;
620 jforceSlow.x -= fxSlow;
621 jforceSlow.y -= fySlow;
622 jforceSlow.z -= fzSlow;
626 /* JM: Special __device__ function to compute VDW forces for TI.
629 template<bool doEnergy, bool doSlow, bool shift, bool vdwForceSwitch>
630 __device__ __forceinline__
631 void calcForceEnergyTI(const float r2, const float qi, const float qj,
632 const float dx, const float dy, const float dz,
633 const int vdwtypei, const int vdwtypej,
635 #ifdef USE_TABLE_ARRAYS
636 const float2* __restrict__ vdwCoefTable,
637 const float4* __restrict__ forceTable,
638 const float4* __restrict__ energyTable,
640 cudaTextureObject_t vdwCoefTableTex,
641 cudaTextureObject_t forceTableTex,
642 cudaTextureObject_t energyTableTex,
644 fast_float3& iforce, fast_float3& iforceSlow, fast_float3& jforce, fast_float3& jforceSlow,
645 float& energyVdw, float& energyVdw_ti_1, float& energyVdw_ti_2,
646 float& energyElec, float& energyElec_ti_1, float& energyElec_ti_2,
647 float& energySlow, float& energySlow_ti_1, float& energySlow_ti_2) {
649 int vdwIndex = vdwtypej + vdwtypei;
650 #if defined(USE_TABLE_ARRAYS)
651 float2 ljab = fast_load(vdwCoefTable, vdwIndex);
653 float2 ljab = tex1Dfetch<float2>(vdwCoefTableTex, vdwIndex); //ljab.x is A and ljab.y is B
656 /* JM: For TI, we have to deal ALCH1 OR ALCH2 during ComputeNonbondedBase2
657 * ALCH1 for appearing terms;
658 * ALCH2 for dissapearing terms;
659 * Instead of the _s energy terms, we need the to calculate:
661 * vdwEnergy_ti_1 and _2 for VDW energies. For those we need to add the special terms calculated on
662 * ComputeNonbondedTI.C
664 * elecEnergy_ti_1 and _2 for electrostatic energy. No correction needed here though.
668 float myVdwLambda = 0.0f;
669 float myElecLambda = 0.0f;
670 float rinv = __frsqrt_rn(r2);
672 float alch_vdw_energy = 0.0f;
673 float alch_vdw_force = 0.0f;
674 float alch_vdw_dUdl = 0.0f;
675 float fSlow = qi * qj;
677 #if defined(USE_TABLE_ARRAYS)
678 float4 fi = tableLookup(forceTable, rinv);
679 if (doEnergy) ei = tableLookup(energyTable, rinv);
681 float4 fi = sampleTableTex(forceTableTex, rinv);
682 if (doEnergy) ei = sampleTableTex(energyTableTex, rinv);
685 //John said that there is a better way to avoid divergences here
686 //alch: true if => 1-0, 1-1, 2-0, 2-2
687 //dec: true if => 1-1, 2-2 && decouple
688 //up: true if => 1-0 && 1,1
689 //down: true if => 2-0, && 2,2
690 int ref = (p1 == 0 && p2 == 0);
691 int alch = (!ref && !(p1 == 1 && p2 ==2) && !(p1 == 2 && p2 == 1));
692 int dec = (alch && (p1 == p2) && alchflags.alchDecouple);
693 int up = (alch && (p1 == 1 || p2 == 1) && !dec);
694 int down = (alch && (p1 == 2 || p2 == 2) && !dec);
699 /*--------------- VDW SPECIAL ALCH STUFF (Swiped from ComputeNonbondedTI.C) ---------------*/
700 myVdwLambda = alchflags.vdwLambdaUp*(up) + alchflags.vdwLambdaDown*(down) + 1.f*(ref || dec);
701 myElecLambda = alchflags.elecLambdaUp*(up) + alchflags.elecLambdaDown*(down) + 1.f*(ref || dec);
703 if (vdwForceSwitch) {
704 const float cutoff6 = alchflags.cutoff2 * alchflags.cutoff2 * alchflags.cutoff2;
707 const float myVdwShift = alchflags.vdwShiftUp*up + alchflags.vdwShiftDown*(!up);
708 r2_1 = __fdividef(1.f,(r2 + myVdwShift));
709 switchdist6 = alchflags.switchdist2 + myVdwShift;
710 switchdist6 = switchdist6 * switchdist6 * switchdist6;
713 switchdist6 = alchflags.switchdist2 * alchflags.switchdist2 * alchflags.switchdist2;
715 const float r6_1 = r2_1*r2_1*r2_1;
716 if (r2 <= alchflags.switchdist2) {
717 const float U = ljab.x*r6_1*r6_1 - ljab.y*r6_1;
718 const float dU = -ljab.x / (cutoff6 * switchdist6) - (-ljab.y * rsqrtf(cutoff6 * switchdist6));
719 alch_vdw_force = -1.f*(myVdwLambda*((12.f*U + 6.f*ljab.y*r6_1)*r2_1));
720 alch_vdw_energy = myVdwLambda * (U + dU);
721 alch_vdw_dUdl = U + myVdwLambda * alchflags.alchVdwShiftCoeff * (6.f*U + 3.f*ljab.y*r6_1)*r2_1 + dU;
723 const float r3_1 = sqrtf(r6_1);
724 const float inv_cutoff6 = 1.0f / cutoff6;
725 const float inv_cutoff3 = sqrtf(inv_cutoff6);
726 const float k_vdwa_1 = ljab.x / (1.0f - switchdist6 * inv_cutoff6);
727 const float k_vdwb_1 = ljab.y / (1.0f - sqrtf(switchdist6 * inv_cutoff6));
728 const float tmpa_1 = r6_1 - inv_cutoff6;
729 const float tmpb_1 = r3_1 - inv_cutoff3;
730 const float U = k_vdwa_1 * tmpa_1 * tmpa_1 - k_vdwb_1 * tmpb_1 * tmpb_1;
731 alch_vdw_force = -1.0f * myVdwLambda * (6.0f * r2_1 * (2.0f * k_vdwa_1 * tmpa_1 * r6_1 - k_vdwb_1 * tmpb_1 * r3_1));
732 alch_vdw_energy = myVdwLambda * U;
733 alch_vdw_dUdl = U + myVdwLambda * alchflags.alchVdwShiftCoeff * (3.0f * r2_1 * (2.0f * k_vdwa_1 * tmpa_1 * r6_1 - k_vdwb_1 * tmpb_1 * r3_1));
734 } // r2 <= alchflags.switchdist2
736 // potential switching
737 const float diff = alchflags.cutoff2 - r2;
738 const float switchmul = (r2 > alchflags.switchdist2 ? alchflags.switchfactor*(diff)*(diff) \
739 *(alchflags.cutoff2 - 3.f*alchflags.switchdist2 + 2.f*r2) : 1.f);
741 const float switchmul2 = (r2 > alchflags.switchdist2 ? \
742 12.f*alchflags.switchfactor*(diff) \
743 *(r2 - alchflags.switchdist2) : 0.f);
744 //Templated parameter. No control divergence here
746 const float myVdwShift = alchflags.vdwShiftUp*up + alchflags.vdwShiftDown*(!up);
747 r2_1 = __fdividef(1.f,(r2 + myVdwShift));
748 }else r2_1 = rinv*rinv;
750 const float r6_1 = r2_1*r2_1*r2_1;
751 const float U = ljab.x*r6_1*r6_1 - ljab.y*r6_1; // NB: unscaled! for shorthand only!
752 alch_vdw_energy = myVdwLambda*switchmul*U;
753 //Multiplied by -1.0 to match CPU values
754 alch_vdw_force = -1.f*(myVdwLambda*(switchmul*(12.f*U + 6.f*ljab.y*r6_1)*r2_1 \
756 alch_vdw_dUdl = (switchmul*(U + myVdwLambda*alchflags.alchVdwShiftCoeff \
757 *(6.f*U + 3.f*ljab.y*r6_1)*r2_1));
760 /*-------------------------------------------------------------------------*/
763 //All energies should be scaled by the corresponding lambda
764 energyVdw += (ljab.x * ei.z + ljab.y * ei.y)*(ref || dec) + alch_vdw_energy*(alch && !dec);
765 energyElec += (fSlow * ei.x)*myElecLambda;
767 energyVdw_ti_1 += alch_vdw_dUdl*up;
768 energyVdw_ti_2 += alch_vdw_dUdl*down;
769 energyElec_ti_1 += (fSlow * ei.x)*up;
770 energyElec_ti_2 += (fSlow * ei.x)*down;
773 energySlow += (fSlow * ei.w)*myElecLambda;
775 energySlow_ti_1 += (fSlow * ei.w)*up;
776 energySlow_ti_2 += (fSlow * ei.w)*down;
781 if (doSlow) fSlow *= fi.w;
782 //We should include the regular VDW forces if not dealing with alch pairs
783 f = (f + ((ljab.x * fi.z + ljab.y * fi.y)*(ref || dec)))*myElecLambda
784 + alch_vdw_force*(alch && !dec);
798 /*There's stuff that needs to be added here, when FAST AND NOSHORT macros are on*/
799 fSlow = myElecLambda*fSlow; /* FAST(NOSHORT(+alch_vdw_force))*/ //Those should also be zeroed
800 float fxSlow = dx * fSlow;
801 float fySlow = dy * fSlow;
802 float fzSlow = dz * fSlow;
803 iforceSlow.x += fxSlow;
804 iforceSlow.y += fySlow;
805 iforceSlow.z += fzSlow;
806 jforceSlow.x -= fxSlow;
807 jforceSlow.y -= fySlow;
808 jforceSlow.z -= fzSlow;
812 template<bool doSlow, typename T>
813 __device__ __forceinline__
814 void storeForces(const int pos, const T force, const T forceSlow,
815 float* __restrict__ devForces_x,
816 float* __restrict__ devForces_y,
817 float* __restrict__ devForces_z,
818 float* __restrict__ devForcesSlow_x,
819 float* __restrict__ devForcesSlow_y,
820 float* __restrict__ devForcesSlow_z)
822 #if defined(NAMD_HIP) && ((HIP_VERSION_MAJOR == 3) && (HIP_VERSION_MINOR > 3) || (HIP_VERSION_MAJOR > 3))
823 if (force.x != 0.0f || force.y != 0.0f || force.z != 0.0f) {
824 atomicAdd(&devForces_x[pos], force.x);
825 atomicAdd(&devForces_y[pos], force.y);
826 atomicAdd(&devForces_z[pos], force.z);
829 if (forceSlow.x != 0.0f || forceSlow.y != 0.0f || forceSlow.z != 0.0f) {
830 atomicAdd(&devForcesSlow_x[pos], forceSlow.x);
831 atomicAdd(&devForcesSlow_y[pos], forceSlow.y);
832 atomicAdd(&devForcesSlow_z[pos], forceSlow.z);
836 atomicAdd(&devForces_x[pos], force.x);
837 atomicAdd(&devForces_y[pos], force.y);
838 atomicAdd(&devForces_z[pos], force.z);
840 atomicAdd(&devForcesSlow_x[pos], forceSlow.x);
841 atomicAdd(&devForcesSlow_y[pos], forceSlow.y);
842 atomicAdd(&devForcesSlow_z[pos], forceSlow.z);
847 template<bool doSlow>
848 __device__ __forceinline__
849 void storeForces(const int pos, const float4 force, const float4 forceSlow,
850 float* __restrict__ devForces_x,
851 float* __restrict__ devForces_y,
852 float* __restrict__ devForces_z,
853 float* __restrict__ devForcesSlow_x,
854 float* __restrict__ devForcesSlow_y,
855 float* __restrict__ devForcesSlow_z)
857 #if defined(NAMD_HIP) && ((HIP_VERSION_MAJOR == 3) && (HIP_VERSION_MINOR > 3) || (HIP_VERSION_MAJOR > 3))
858 if (force.x != 0.0f || force.y != 0.0f || force.z != 0.0f) {
859 atomicAdd(&devForces_x[pos], force.x);
860 atomicAdd(&devForces_y[pos], force.y);
861 atomicAdd(&devForces_z[pos], force.z);
864 if (forceSlow.x != 0.0f || forceSlow.y != 0.0f || forceSlow.z != 0.0f) {
865 atomicAdd(&devForcesSlow_x[pos], forceSlow.x);
866 atomicAdd(&devForcesSlow_y[pos], forceSlow.y);
867 atomicAdd(&devForcesSlow_z[pos], forceSlow.z);
871 atomicAdd(&devForces_x[pos], force.x);
872 atomicAdd(&devForces_y[pos], force.y);
873 atomicAdd(&devForces_z[pos], force.z);
875 atomicAdd(&devForcesSlow_x[pos], forceSlow.x);
876 atomicAdd(&devForcesSlow_y[pos], forceSlow.y);
877 atomicAdd(&devForcesSlow_z[pos], forceSlow.z);
882 //#define USE_NEW_EXCL_METHOD
885 // Returns the lower estimate for the distance between a bounding box and a set of atoms
887 __device__ __forceinline__ float distsq(const BoundingBox a, const float4 b) {
888 float dx = max(0.0f, fabsf(a.x - b.x) - a.wx);
889 float dy = max(0.0f, fabsf(a.y - b.y) - a.wy);
890 float dz = max(0.0f, fabsf(a.z - b.z) - a.wz);
891 float r2 = dx*dx + dy*dy + dz*dz;
895 #define LARGE_FLOAT (float)(1.0e10)
898 // Nonbonded force kernel
900 template <bool doEnergy, bool doVirial, bool doSlow, bool doPairlist, bool doAlch, bool doFEP, bool doTI, bool doStreaming, bool doTable, bool doAlchVdwForceSwitching, bool doNbThole>
902 __launch_bounds__(WARPSIZE*NONBONDKERNEL_NUM_WARP,
903 doPairlist ? (10) : (doEnergy ? (10) : (10) ))
904 nonbondedForceKernel(
905 const int start, const int numTileLists,
906 const TileList* __restrict__ tileLists, TileExcl* __restrict__ tileExcls,
907 const int* __restrict__ tileJatomStart,
908 const int vdwCoefTableWidth,
909 #if defined(USE_TABLE_ARRAYS)
910 const float2* __restrict__ vdwCoefTable,
912 cudaTextureObject_t vdwCoefTableTex,
914 const int* __restrict__ vdwTypes,
915 const float3 lata, const float3 latb, const float3 latc,
916 const float4* __restrict__ xyzq, const float cutoff2, const CudaNBConstants nbConstants,
917 #ifdef USE_TABLE_ARRAYS
918 const float4* __restrict__ forceTable, const float4* __restrict__ energyTable,
920 cudaTextureObject_t forceTableTex, cudaTextureObject_t energyTableTex,
924 float plcutoff2, const PatchPairRecord* __restrict__ patchPairs,
925 const int* __restrict__ atomIndex,
926 const int2* __restrict__ exclIndexMaxDiff, const unsigned int* __restrict__ overflowExclusions,
927 unsigned int* __restrict__ tileListDepth, int* __restrict__ tileListOrder,
928 int* __restrict__ jtiles, TileListStat* __restrict__ tileListStat,
929 const BoundingBox* __restrict__ boundingBoxes,
930 #ifdef USE_NEW_EXCL_METHOD
931 const int* __restrict__ minmaxExclAtom,
934 float * __restrict__ devForce_x,
935 float * __restrict__ devForce_y,
936 float * __restrict__ devForce_z,
937 float * __restrict__ devForce_w,
938 float * __restrict__ devForceSlow_x,
939 float * __restrict__ devForceSlow_y,
940 float * __restrict__ devForceSlow_z,
941 float * __restrict__ devForceSlow_w,
942 // ---- USE_STREAMING_FORCES ----
943 const int numPatches,
944 unsigned int* __restrict__ patchNumCount,
945 const CudaPatchRecord* __restrict__ cudaPatches,
946 float4* __restrict__ mapForces, float4* __restrict__ mapForcesSlow,
947 int* __restrict__ mapPatchReadyQueue,
948 int* __restrict__ outputOrder,
949 // ------------------------------
950 TileListVirialEnergy* __restrict__ virialEnergy,
952 char* __restrict__ p,
953 // ---- doNbThole ----
954 #ifndef USE_TABLE_ARRAYS
955 cudaTextureObject_t drudeNbTholeTijTableTex,
957 #ifdef USE_TABLE_ARRAYS
958 const float* __restrict drudeNbTholeTijTable,
960 const int* __restrict isDrude,
961 const float* __restrict drudeAtomAlpha,
962 const float drudeNbtholeCut2
965 // Single warp takes care of one list of tiles
966 // for (int itileList = (threadIdx.x + blockDim.x*blockIdx.x)/WARPSIZE;itileList < numTileLists;itileList += blockDim.x*gridDim.x/WARPSIZE)
967 // int itileList = start + threadIdx.x/WARPSIZE + blockDim.x/WARPSIZE*blockIdx.x;
968 // The line above is the CUDA-exclusive version. HIPification in this case is selecting b const CudaNBConstants nbConstants, ased on specific parameters
969 // In the hip scheme: Each warp gets an itile and 2 jtiles
970 // if NONBONDEDKERNEL_NUM_WARP == 1, each warp does 2 tiles, so we do 2* blockIdx
971 // int itileList = start + (threadIdx.x/WARPSIZE + NONBONDKERNEL_NUM_WARP*blockIdx.x);
973 int itileList = start + (WARPSIZE == 64 ? blockIdx.x : (threadIdx.x/BOUNDINGBOXSIZE + NONBONDKERNEL_NUM_WARP*blockIdx.x));
974 if (itileList < numTileLists)
976 fast_float3 iforce, iforceSlow;
977 fast_float3 jforce, jforceSlow;
978 float energyVdw, energyElec, energySlow;
980 float energyVdw_s, energyElec_s, energySlow_s;
982 float energyVdw_ti_1, energyVdw_ti_2, energyElec_ti_1, energyElec_ti_2, energySlow_ti_1, energySlow_ti_2;
984 unsigned int itileListLen;
987 char part1, part2, p2;
988 // ***** Drude/NbThole start
989 int drude_i, drude_j_tmp, drude_j;
990 float alpha_i, alpha_j_tmp, alpha_j;
991 // ***** Drude/NbThole end
992 bool doShift = doAlch && (alchflags.alchVdwShiftCoeff != 0.0f);
993 __shared__ float4 s_xyzq[NONBONDKERNEL_NUM_WARP][BOUNDINGBOXSIZE];
994 __shared__ int s_vdwtypej[NONBONDKERNEL_NUM_WARP][BOUNDINGBOXSIZE];
995 __shared__ float4 s_jforce[NONBONDKERNEL_SWEEPS_PER_TILE][BOUNDINGBOXSIZE];
996 __shared__ float4 s_jforceSlow[NONBONDKERNEL_SWEEPS_PER_TILE][BOUNDINGBOXSIZE];
997 __shared__ int s_jatomIndex[NONBONDKERNEL_NUM_WARP][BOUNDINGBOXSIZE];
999 // Warp index (0...WARPSIZE-1)
1000 // JM: For BoundingBoxSize32 and WARPSIZE == 64, each warp gets two tile lists!
1002 const int wid = threadIdx.x % BOUNDINGBOXSIZE;
1003 const int iwarp = 0; // one workgroup does two sweeps
1004 const int isweep = threadIdx.x / BOUNDINGBOXSIZE; // 2 sweeps for 32-sized bounding boxes
1005 constexpr int nsweep = NONBONDKERNEL_SWEEPS_PER_TILE;
1007 // Start computation
1009 TileList tmp = tileLists[itileList];
1010 int iatomStart = tmp.iatomStart;
1011 int jtileStart = tmp.jtileStart;
1012 int jtileEnd = tmp.jtileEnd;
1013 patchInd = tmp.patchInd;
1014 patchNumList = tmp.patchNumList;
1016 float shx = tmp.offsetXYZ.x*lata.x + tmp.offsetXYZ.y*latb.x + tmp.offsetXYZ.z*latc.x;
1017 float shy = tmp.offsetXYZ.x*lata.y + tmp.offsetXYZ.y*latb.y + tmp.offsetXYZ.z*latc.y;
1018 float shz = tmp.offsetXYZ.x*lata.z + tmp.offsetXYZ.y*latb.z + tmp.offsetXYZ.z*latc.z;
1020 // DH - set zeroShift flag if magnitude of shift vector is zero
1021 bool zeroShift = ! (shx*shx + shy*shy + shz*shz > 0);
1023 int iatomSize, iatomFreeSize, jatomSize, jatomFreeSize;
1025 PatchPairRecord PPStmp = patchPairs[itileList];
1026 iatomSize = PPStmp.iatomSize;
1027 iatomFreeSize = PPStmp.iatomFreeSize;
1028 jatomSize = PPStmp.jatomSize;
1029 jatomFreeSize = PPStmp.jatomFreeSize;
1032 // Write to global memory here to avoid register spilling
1035 virialEnergy[itileList].shx = shx;
1036 virialEnergy[itileList].shy = shy;
1037 virialEnergy[itileList].shz = shz;
1041 // Load i-atom data (and shift coordinates)
1043 float4 xyzq_i = xyzq[iatomStart + wid];
1044 if (doAlch) part1 = p[iatomStart + wid];
1047 drude_i = isDrude[iatomStart + wid];
1048 alpha_i = drudeAtomAlpha[iatomStart + wid];
1053 xyz_i.x = xyzq_i.x; // no wasted registers here, just aliasing as float4s are vectorized
1056 int vdwtypei = vdwTypes[iatomStart + wid]*vdwCoefTableWidth;
1058 // Load i-atom data (and shift coordinates)
1059 BoundingBox boundingBoxI;
1061 boundingBoxI = boundingBoxes[iatomStart/BOUNDINGBOXSIZE];
1062 boundingBoxI.x += shx;
1063 boundingBoxI.y += shy;
1064 boundingBoxI.z += shz;
1067 // Get i-atom global index
1068 #ifdef USE_NEW_EXCL_METHOD
1069 int iatomIndex, minExclAtom, maxExclAtom;
1074 #ifdef USE_NEW_EXCL_METHOD
1075 iatomIndex = atomIndex[iatomStart + wid];
1076 int2 tmp = minmaxExclAtom[iatomStart + wid];
1077 minExclAtom = tmp.x;
1078 maxExclAtom = tmp.y;
1080 iatomIndex = atomIndex[iatomStart + wid];
1084 // i-forces in registers
1090 // float3 iforceSlow;
1092 iforceSlow.x = 0.0f;
1093 iforceSlow.y = 0.0f;
1094 iforceSlow.z = 0.0f;
1097 // float energyVdw, energyElec, energySlow;
1101 energyVdw_ti_1 = 0.0f;
1102 energyVdw_ti_2 = 0.0f;
1104 energyElec_ti_1 = 0.0f;
1105 energyElec_ti_2 = 0.0f;
1106 energyElec_s = 0.0f;
1109 energySlow_s = 0.0f;
1110 energySlow_ti_1 = 0.0f;
1111 energySlow_ti_2 = 0.0f;
1115 // Number of exclusions
1116 // NOTE: Lowest bit is used as indicator bit for tile pairs:
1117 // bit 0 tile has no atoms within pairlist cutoff
1118 // bit 1 tile has atoms within pairlist cutoff
1120 if (doPairlist) nexcluded = 0;
1122 // Number of i loops and free atoms
1125 int nloopi = min(iatomSize - iatomStart, BOUNDINGBOXSIZE);
1126 nfreei = max(iatomFreeSize - iatomStart, 0);
1127 if (wid >= nloopi) {
1128 xyzq_i.x = -LARGE_FLOAT;
1129 xyzq_i.y = -LARGE_FLOAT;
1130 xyzq_i.z = -LARGE_FLOAT;
1135 // int itileListLen;
1136 // int minJatomStart;
1138 // minJatomStart = tileJatomStart[jtileStart];
1142 // Exclusion index and maxdiff
1143 int iexclIndex, iexclMaxdiff;
1145 int2 tmp = exclIndexMaxDiff[iatomStart + wid];
1147 iexclMaxdiff = tmp.y;
1149 // fetch two tiles at once here
1150 for (int jtile=jtileStart;jtile <= jtileEnd;jtile++) {
1151 // Load j-atom starting index and exclusion mask
1152 int jatomStart = tileJatomStart[jtile];
1154 float4 xyzq_j = xyzq[jatomStart + wid];
1155 NAMD_WARP_SYNC(WARP_FULL_MASK);
1156 if (doAlch) p2 = p[jatomStart + wid];
1160 drude_j_tmp = isDrude[jatomStart + wid];
1161 alpha_j_tmp = drudeAtomAlpha[jatomStart + wid];
1164 // Check for early bail
1166 float r2bb = distsq(boundingBoxI, xyzq_j);
1167 if (NAMD_WARP_ALL(WARP_FULL_MASK, r2bb > plcutoff2)) continue;
1169 WarpMask excl = (doPairlist) ? 0 : tileExcls[jtile].excl[wid];
1170 int vdwtypej = vdwTypes[jatomStart + wid];
1171 s_vdwtypej[iwarp][wid] = vdwtypej;
1173 // Get i-atom global index
1175 s_jatomIndex[iwarp][wid] = atomIndex[jatomStart + wid];
1178 // Number of j loops and free atoms
1181 int nloopj = min(jatomSize - jatomStart, BOUNDINGBOXSIZE);
1182 nfreej = max(jatomFreeSize - jatomStart, 0);
1183 //if (nfreei == 0 && nfreej == 0) continue;
1184 if (wid >= nloopj) {
1185 xyzq_j.x = LARGE_FLOAT;
1186 xyzq_j.y = LARGE_FLOAT;
1187 xyzq_j.z = LARGE_FLOAT;
1190 s_xyzq[iwarp][wid] = xyzq_j;
1192 // DH - self requires that zeroShift is also set
1193 const bool self = zeroShift && (iatomStart == jatomStart);
1194 const int modval = (self) ? 2*BOUNDINGBOXSIZE-1 : BOUNDINGBOXSIZE-1;
1196 s_jforce[isweep][wid] = make_float4(0.0f, 0.0f, 0.0f, 0.f);
1198 s_jforceSlow[isweep][wid] = make_float4(0.0f, 0.0f, 0.0f, 0.f);
1199 NAMD_WARP_SYNC(WARP_FULL_MASK);
1203 // NOTE: Pairlist update, we must also include the diagonal since this is used
1205 // Clear the lowest (indicator) bit
1208 // For self tiles, do the diagonal term (t=0).
1209 // NOTE: No energies are computed here, since this self-diagonal term is only for GBIS phase 2
1210 if (self && !isweep) {
1211 int j = (0 + wid) & modval;
1212 xyzq_j = s_xyzq[iwarp][j];
1213 float dx = xyzq_j.x - xyzq_i.x;
1214 float dy = xyzq_j.y - xyzq_i.y;
1215 float dz = xyzq_j.z - xyzq_i.z;
1216 float r2 = dx*dx + dy*dy + dz*dz;
1218 if (j < BOUNDINGBOXSIZE && r2 < plcutoff2) {
1219 // We have atom pair within the pairlist cutoff => Set indicator bit
1223 for (int t = (self && !isweep) ? isweep+nsweep : isweep;t < BOUNDINGBOXSIZE; t+=nsweep) {
1224 int j = (t + wid) & modval;
1226 // NOTE: __shfl() operation can give non-sense here because j may be >= WARPSIZE.
1227 // However, if (j < WARPSIZE ..) below makes sure that these non-sense
1228 // results are not used
1229 if (doAlch) part2 = WARP_SHUFFLE(WARP_FULL_MASK, p2, j, WARPSIZE);
1231 drude_j = WARP_SHUFFLE(WARP_FULL_MASK, drude_j_tmp, j, WARPSIZE);
1232 alpha_j = WARP_SHUFFLE(WARP_FULL_MASK, alpha_j_tmp, j, WARPSIZE);
1235 excl >>= nsweep;// moves it two sweeps
1236 if (j < BOUNDINGBOXSIZE) {
1237 xyzq_j = s_xyzq[iwarp][j];
1238 float dx = xyzq_j.x - xyzq_i.x;
1239 float dy = xyzq_j.y - xyzq_i.y;
1240 float dz = xyzq_j.z - xyzq_i.z;
1241 float r2 = dx*dx + dy*dy + dz*dz;
1242 if (r2 < plcutoff2) {
1243 // We have atom pair within the pairlist cutoff => Set indicator bit
1245 if (j < nfreej || wid < nfreei) {
1246 bool excluded = false;
1247 int indexdiff = s_jatomIndex[iwarp][j] - iatomIndex;
1248 if ( abs(indexdiff) <= iexclMaxdiff) {
1249 indexdiff += iexclIndex;
1250 int indexword = ((unsigned int) indexdiff) >> 5;
1252 indexword = overflowExclusions[indexword];
1255 excluded = ((indexword & (1<<(indexdiff&31))) != 0);
1257 if (excluded) nexcluded += 2;
1258 if (!excluded) excl |= (WarpMask)1 << (BOUNDINGBOXSIZE-(nsweep - isweep)); // good luck understanding this
1260 if(!excluded && r2 < cutoff2){
1261 jforce = float4(s_jforce[isweep][j]);
1262 if(doSlow) jforceSlow = float4(s_jforceSlow[isweep][j]);
1265 calcForceEnergyFEP<doEnergy, doSlow, true, doAlchVdwForceSwitching>(
1266 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1267 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1268 #ifdef USE_TABLE_ARRAYS
1273 #ifdef USE_TABLE_ARRAYS
1274 forceTable, energyTable,
1276 forceTableTex, energyTableTex,
1280 energyVdw, energyVdw_s,
1281 energyElec, energySlow, energyElec_s, energySlow_s);
1283 calcForceEnergyTI<doEnergy, doSlow, true, doAlchVdwForceSwitching>(
1284 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1285 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1286 #ifdef USE_TABLE_ARRAYS
1291 #ifdef USE_TABLE_ARRAYS
1292 forceTable, energyTable,
1294 forceTableTex, energyTableTex,
1298 energyVdw, energyVdw_ti_1,
1299 energyVdw_ti_2, energyElec, energyElec_ti_1, energyElec_ti_2,
1300 energySlow, energySlow_ti_1, energySlow_ti_2);
1304 calcForceEnergyFEP<doEnergy, doSlow, false, doAlchVdwForceSwitching>(
1305 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1306 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1307 #ifdef USE_TABLE_ARRAYS
1312 #ifdef USE_TABLE_ARRAYS
1313 forceTable, energyTable,
1315 forceTableTex, energyTableTex,
1319 energyVdw, energyVdw_s,
1320 energyElec, energySlow, energyElec_s, energySlow_s);
1322 calcForceEnergyTI<doEnergy, doSlow, false, doAlchVdwForceSwitching>(
1323 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1324 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1325 #ifdef USE_TABLE_ARRAYS
1330 #ifdef USE_TABLE_ARRAYS
1331 forceTable, energyTable,
1333 forceTableTex, energyTableTex,
1337 energyVdw, energyVdw_ti_1,
1338 energyVdw_ti_2, energyElec, energyElec_ti_1, energyElec_ti_2,
1339 energySlow, energySlow_ti_1, energySlow_ti_2);
1342 s_jforce[isweep][j] = make_float4(jforce.x, jforce.y, jforce.z, 1.f);
1343 if(doSlow) s_jforceSlow[isweep][j] = make_float4(jforceSlow.x, jforceSlow.y, jforceSlow.z, 1.f);
1344 }//if !excluded && r2 < cutoff2
1346 if (!excluded && r2 < cutoff2) {
1347 jforce = float4(s_jforce[isweep][j]);
1348 if(doSlow) jforceSlow = float4(s_jforceSlow[isweep][j]);
1350 calcForceEnergy<doEnergy, doSlow>(
1351 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1352 vdwtypei, s_vdwtypej[iwarp][j],
1353 #ifdef USE_TABLE_ARRAYS
1358 #ifdef USE_TABLE_ARRAYS
1359 forceTable, energyTable,
1361 forceTableTex, energyTableTex,
1366 energyElec, energySlow);
1368 calcForceEnergyMath<doEnergy, doSlow>(
1369 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1370 vdwtypei, s_vdwtypej[iwarp][j],
1371 #ifdef USE_TABLE_ARRAYS
1379 energyElec, energySlow, nbConstants);
1383 if (r2 < drudeNbtholeCut2) {
1384 // If drude_i >= 0, then the particle i is DRUD and drude_i is the NB index of its mother atom.
1385 // If drude_j >= 0, then the particle j is DRUD and drude_j is the NB index of its mother atom.
1386 calcForceEnergyNbThole<doEnergy>(
1387 drude_i >= 0 ? vdwTypes[drude_i]*vdwCoefTableWidth : vdwtypei,
1388 drude_j >= 0 ? vdwTypes[drude_j] : s_vdwtypej[iwarp][j],
1389 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1390 #ifdef USE_TABLE_ARRAYS
1391 drudeNbTholeTijTable,
1393 drudeNbTholeTijTableTex,
1396 energyElec, iforce, jforce);
1399 s_jforce[isweep][j] = make_float4(jforce.x, jforce.y, jforce.z, 1.f);
1400 if(doSlow) s_jforceSlow[isweep][j] = make_float4(jforceSlow.x, jforceSlow.y, jforceSlow.z, 1.f);
1406 NAMD_WARP_SYNC(WARP_FULL_MASK);
1409 // Just compute forces
1411 // Clear the first bit
1412 excl = excl & (~(WarpMask)1);
1415 int ind_sweep = isweep + wid;
1417 for (int t = 0; t < BOUNDINGBOXSIZE;t+= nsweep) {
1419 int j = (ind_sweep + t) & (BOUNDINGBOXSIZE-1);
1420 part2 = WARP_SHUFFLE(WARP_FULL_MASK, p2, j, WARPSIZE);
1423 int j = (ind_sweep + t) & (BOUNDINGBOXSIZE-1);
1424 drude_j = WARP_SHUFFLE(WARP_FULL_MASK, drude_j_tmp, j, WARPSIZE);
1425 alpha_j = WARP_SHUFFLE(WARP_FULL_MASK, alpha_j_tmp, j, WARPSIZE);
1429 int j = (ind_sweep + t) & (BOUNDINGBOXSIZE-1);
1430 xyzq_j = (float4)s_xyzq[iwarp][j];
1434 fast_float3 dr = xyz_j - xyz_i;
1435 // float dx = xyzq_j.x - xyzq_i.x;
1436 // float dy = xyzq_j.y - xyzq_i.y;
1437 // float dz = xyzq_j.z - xyzq_i.z;
1438 // float r2 = dx*dx + dy*dy + dz*dz;
1442 float r2 = norm2(dr);
1444 if(r2 < cutoff2){ // (r2 < cutoff2)
1445 jforce = float4(s_jforce[isweep][j]);
1446 if(doSlow) jforceSlow = float4(s_jforceSlow[isweep][j]);
1449 calcForceEnergyFEP<doEnergy, doSlow, true, doAlchVdwForceSwitching>(
1450 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1451 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1452 #ifdef USE_TABLE_ARRAYS
1457 #ifdef USE_TABLE_ARRAYS
1458 forceTable, energyTable,
1460 forceTableTex, energyTableTex,
1464 energyVdw, energyVdw_s,
1465 energyElec, energySlow, energyElec_s, energySlow_s);
1467 calcForceEnergyTI<doEnergy, doSlow, true, doAlchVdwForceSwitching>(
1468 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1469 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1470 #ifdef USE_TABLE_ARRAYS
1475 #ifdef USE_TABLE_ARRAYS
1476 forceTable, energyTable,
1478 forceTableTex, energyTableTex,
1482 energyVdw, energyVdw_ti_1,
1483 energyVdw_ti_2, energyElec, energyElec_ti_1, energyElec_ti_2,
1484 energySlow, energySlow_ti_1, energySlow_ti_2);
1488 calcForceEnergyFEP<doEnergy, doSlow, false, doAlchVdwForceSwitching>(
1489 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1490 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1491 #ifdef USE_TABLE_ARRAYS
1496 #ifdef USE_TABLE_ARRAYS
1497 forceTable, energyTable,
1499 forceTableTex, energyTableTex,
1503 energyVdw, energyVdw_s,
1504 energyElec, energySlow, energyElec_s, energySlow_s);
1506 calcForceEnergyTI<doEnergy, doSlow, false, doAlchVdwForceSwitching>(
1507 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1508 vdwtypei, s_vdwtypej[iwarp][j], part1, part2,
1509 #ifdef USE_TABLE_ARRAYS
1514 #ifdef USE_TABLE_ARRAYS
1515 forceTable, energyTable,
1517 forceTableTex, energyTableTex,
1521 energyVdw, energyVdw_ti_1,
1522 energyVdw_ti_2, energyElec, energyElec_ti_1, energyElec_ti_2,
1523 energySlow, energySlow_ti_1, energySlow_ti_2);
1526 s_jforce[isweep][j] = make_float4(jforce.x, jforce.y, jforce.z, 1.f);
1527 if(doSlow) s_jforceSlow[isweep][j] = make_float4(jforceSlow.x, jforceSlow.y, jforceSlow.z, 1.f);
1531 jforce = float4(s_jforce[isweep][j]);
1532 if(doSlow) jforceSlow = float4(s_jforceSlow[isweep][j]);
1534 calcForceEnergy<doEnergy, doSlow>(
1535 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1536 vdwtypei, s_vdwtypej[iwarp][j],
1537 #ifdef USE_TABLE_ARRAYS
1542 #ifdef USE_TABLE_ARRAYS
1543 forceTable, energyTable,
1545 forceTableTex, energyTableTex,
1549 energyVdw, energyElec, energySlow);
1552 calcForceEnergyMath<doEnergy, doSlow>(
1553 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1554 vdwtypei, s_vdwtypej[iwarp][j],
1555 #ifdef USE_TABLE_ARRAYS
1563 energyElec, energySlow, nbConstants);
1566 if (r2 < drudeNbtholeCut2) {
1567 // If drude_i >= 0, then the particle i is DRUD and drude_i is the NB index of its mother atom.
1568 // If drude_j >= 0, then the particle j is DRUD and drude_j is the NB index of its mother atom.
1569 calcForceEnergyNbThole<doEnergy>(
1570 drude_i >= 0 ? vdwTypes[drude_i]*vdwCoefTableWidth : vdwtypei,
1571 drude_j >= 0 ? vdwTypes[drude_j] : s_vdwtypej[iwarp][j],
1572 r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
1573 #ifdef USE_TABLE_ARRAYS
1574 drudeNbTholeTijTable,
1576 drudeNbTholeTijTableTex,
1579 energyElec, iforce, jforce);
1582 s_jforce[isweep][j] = make_float4(jforce.x, jforce.y, jforce.z, 0.f);
1583 if(doSlow) s_jforceSlow[isweep][j] = make_float4(jforceSlow.x, jforceSlow.y, jforceSlow.z, 0.f);
1588 NAMD_WARP_SYNC(WARP_FULL_MASK);
1592 // Write j-forces - shuffle them before storing to global memory
1593 jforce = s_jforce[isweep][wid];
1594 if(doSlow) jforceSlow = s_jforceSlow[isweep][wid];
1595 float4 jforceAccum = make_float4(jforce.x, jforce.y, jforce.z, 1.f);
1596 float4 jforceSlowAccum;
1597 if (doSlow) jforceSlowAccum = make_float4(jforceSlow.x, jforceSlow.y, jforceSlow.z, 1.f);
1599 jforce.x += WARP_SHUFFLE(WARP_FULL_MASK, jforce.x, wid + BOUNDINGBOXSIZE, WARPSIZE);
1600 jforce.y += WARP_SHUFFLE(WARP_FULL_MASK, jforce.y, wid + BOUNDINGBOXSIZE, WARPSIZE);
1601 jforce.z += WARP_SHUFFLE(WARP_FULL_MASK, jforce.z, wid + BOUNDINGBOXSIZE, WARPSIZE);
1603 jforceSlow.x += WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.x, wid + BOUNDINGBOXSIZE, WARPSIZE);
1604 jforceSlow.y += WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.y, wid + BOUNDINGBOXSIZE, WARPSIZE);
1605 jforceSlow.z += WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.z, wid + BOUNDINGBOXSIZE, WARPSIZE);
1608 for(int k = 1 ; k < nsweep; k++){
1609 jforceAccum.x += WARP_SHUFFLE(WARP_FULL_MASK, jforce.x, wid+BOUNDINGBOXSIZE*k, WARPSIZE);
1610 jforceAccum.y += WARP_SHUFFLE(WARP_FULL_MASK, jforce.y, wid+BOUNDINGBOXSIZE*k, WARPSIZE);
1611 jforceAccum.z += WARP_SHUFFLE(WARP_FULL_MASK, jforce.z, wid+BOUNDINGBOXSIZE*k, WARPSIZE);
1613 jforceSlowAccum.x += WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.x, wid+BOUNDINGBOXSIZE*k, WARPSIZE);
1614 jforceSlowAccum.y += WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.y, wid+BOUNDINGBOXSIZE*k, WARPSIZE);
1615 jforceSlowAccum.z += WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.z, wid+BOUNDINGBOXSIZE*k, WARPSIZE);
1620 storeForces<doSlow, float4>(jatomStart + wid, jforceAccum, jforceSlowAccum,
1621 devForce_x, devForce_y, devForce_z,
1622 devForceSlow_x, devForceSlow_y, devForceSlow_z);
1627 // We have to calculate the global flags of this part here, which is
1628 WarpMask res_excl = excl;
1629 for(int k = 1 ; k < nsweep; k++){
1630 // shuffle the integers to make sure we have complete flags
1631 res_excl |= WARP_SHUFFLE(WARP_FULL_MASK, excl, wid + BOUNDINGBOXSIZE*k, WARPSIZE);
1633 // after that, everyone grabs the value from the lower warp
1634 int anyexcl = (65536 | NAMD_WARP_ANY(WARP_FULL_MASK, res_excl != 0));
1635 // Mark this jtile as non-empty:
1636 // VdW: 1 if tile has atom pairs within pairlist cutoff and some these atoms interact
1637 // GBIS: 65536 if tile has atom pairs within pairlist cutoff but not necessary interacting (i.e. these atoms are fixed or excluded)
1638 if (!isweep && anyexcl){ // lower threads in warp updates the values
1639 if (wid) jtiles[jtile] = anyexcl;
1641 tileExcls[jtile].excl[wid] = res_excl;
1643 // lower 16 bits number of tiles with atom pairs within pairlist cutoff that interact
1644 // upper 16 bits number of tiles with atom pairs within pairlist cutoff (but not necessary interacting)
1645 // low sweep has the correct value for this, so all good
1646 itileListLen += anyexcl;
1647 // NOTE, this minJatomStart is only stored once for the first tile list entry
1648 // minJatomStart = min(minJatomStart, jatomStart);
1656 // shfl before writing - lower tile gets high tile value
1657 float3 iforceAccum = iforce;
1658 float3 iforceSlowAccum = iforceSlow;
1659 for(int k = 1 ; k < nsweep; k++){
1660 iforceAccum.x += WARP_SHUFFLE(WARP_FULL_MASK, iforce.x, wid+BOUNDINGBOXSIZE*k, WARPSIZE);
1661 iforceAccum.y += WARP_SHUFFLE(WARP_FULL_MASK, iforce.y, wid+BOUNDINGBOXSIZE*k, WARPSIZE);
1662 iforceAccum.z += WARP_SHUFFLE(WARP_FULL_MASK, iforce.z, wid+BOUNDINGBOXSIZE*k, WARPSIZE);
1664 iforceSlowAccum.x += WARP_SHUFFLE(WARP_FULL_MASK, iforceSlow.x, wid+BOUNDINGBOXSIZE*k, WARPSIZE);
1665 iforceSlowAccum.y += WARP_SHUFFLE(WARP_FULL_MASK, iforceSlow.y, wid+BOUNDINGBOXSIZE*k, WARPSIZE);
1666 iforceSlowAccum.z += WARP_SHUFFLE(WARP_FULL_MASK, iforceSlow.z, wid+BOUNDINGBOXSIZE*k, WARPSIZE);
1670 storeForces<doSlow, float3>(iatomStart + wid, iforceAccum, iforceSlowAccum,
1671 devForce_x, devForce_y, devForce_z,
1672 devForceSlow_x, devForceSlow_y, devForceSlow_z);
1676 // Done with computation
1678 // Save pairlist stuff
1680 if (wid == 0 && isweep == 0){
1681 // minJatomStart is in range [0 ... atomStorageSize-1]
1682 //int atom0 = (minJatomStart)/WARPSIZE;
1684 // int storageOffset = atomStorageSize/WARPSIZE;
1685 // int itileListLen = 0;
1686 // for (int jtile=jtileStart;jtile <= jtileEnd;jtile++) itileListLen += jtiles[jtile];
1687 // Store 0 if itileListLen == 0
1688 // tileListDepth[itileList] = (itileListLen > 0)*(itileListLen*storageOffset + atom0);
1689 // race condition here if we have the warp fetching two tiles from the same list
1690 tileListDepth[itileList] = itileListLen;
1691 tileListOrder[itileList] = itileList;
1692 // Number of active tilelists with tile with atom pairs within pairlist cutoff that interact
1693 if ((itileListLen & 65535) > 0) atomicAdd(&tileListStat->numTileLists, 1);
1694 // Number of active tilelists with tiles with atom pairs within pairlist cutoff (but not necessary interacting)
1695 if (itileListLen > 0) atomicAdd(&tileListStat->numTileListsGBIS, 1);
1696 // NOTE: always numTileListsGBIS >= numTileLists
1699 typedef cub::WarpReduce<int> WarpReduceInt;
1700 __shared__ typename WarpReduceInt::TempStorage tempStorage[NONBONDKERNEL_NUM_WARP];
1701 const int warpId = threadIdx.x / WARPSIZE;
1702 // Remove indicator bit
1704 volatile int nexcludedWarp = WarpReduceInt(tempStorage[warpId]).Sum(nexcluded);
1705 if (threadIdx.x % WARPSIZE == 0){
1706 atomicAdd(&tileListStat->numExcluded, nexcludedWarp);
1711 typedef cub::WarpReduce<float> WarpReduce;
1712 __shared__ typename WarpReduce::TempStorage tempStorage[NONBONDKERNEL_NUM_WARP];
1713 const int warpId = threadIdx.x / WARPSIZE;
1714 volatile float iforcexSum = WarpReduce(tempStorage[warpId]).Sum(iforce.x);
1715 NAMD_WARP_SYNC(WARP_FULL_MASK);
1716 volatile float iforceySum = WarpReduce(tempStorage[warpId]).Sum(iforce.y);
1717 NAMD_WARP_SYNC(WARP_FULL_MASK);
1718 volatile float iforcezSum = WarpReduce(tempStorage[warpId]).Sum(iforce.z);
1719 NAMD_WARP_SYNC(WARP_FULL_MASK);
1721 virialEnergy[itileList].forcex = iforcexSum;
1722 virialEnergy[itileList].forcey = iforceySum;
1723 virialEnergy[itileList].forcez = iforcezSum;
1727 iforcexSum = WarpReduce(tempStorage[warpId]).Sum(iforceSlow.x);
1728 NAMD_WARP_SYNC(WARP_FULL_MASK);
1729 iforceySum = WarpReduce(tempStorage[warpId]).Sum(iforceSlow.y);
1730 NAMD_WARP_SYNC(WARP_FULL_MASK);
1731 iforcezSum = WarpReduce(tempStorage[warpId]).Sum(iforceSlow.z);
1732 NAMD_WARP_SYNC(WARP_FULL_MASK);
1734 virialEnergy[itileList].forceSlowx = iforcexSum;
1735 virialEnergy[itileList].forceSlowy = iforceySum;
1736 virialEnergy[itileList].forceSlowz = iforcezSum;
1743 // NOTE: We must hand write these warp-wide reductions to avoid excess register spillage
1744 // (Why does CUB suck here?)
1746 for (int i=WARPSIZE/2;i >= 1;i/=2) {
1747 energyVdw += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyVdw, i, WARPSIZE);
1748 energyElec += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyElec, i, WARPSIZE);
1749 if(doFEP) energyVdw_s += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyVdw_s, i, WARPSIZE);
1750 if(doFEP) energyElec_s += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyElec_s, i, WARPSIZE);
1752 energyVdw_ti_1 += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyVdw_ti_1, i, WARPSIZE);
1753 energyVdw_ti_2 += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyVdw_ti_2, i, WARPSIZE);
1754 energyElec_ti_1 += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyElec_ti_1, i, WARPSIZE);
1755 energyElec_ti_2 += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyElec_ti_2, i, WARPSIZE);
1758 energySlow += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energySlow, i, WARPSIZE);
1759 if(doFEP) energySlow_s += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energySlow_s, i, WARPSIZE);
1761 energySlow_ti_1 += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energySlow_ti_1, i, WARPSIZE);
1762 energySlow_ti_2 += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energySlow_ti_2, i, WARPSIZE);
1767 if (threadIdx.x % WARPSIZE == 0) {
1768 virialEnergy[itileList].energyVdw = energyVdw;
1769 virialEnergy[itileList].energyElec = energyElec;
1770 if (doFEP) virialEnergy[itileList].energyVdw_s = energyVdw_s;
1771 if (doFEP) virialEnergy[itileList].energyElec_s = energyElec_s;
1773 virialEnergy[itileList].energyVdw_ti_1 = energyVdw_ti_1;
1774 virialEnergy[itileList].energyVdw_ti_2 = energyVdw_ti_2;
1775 virialEnergy[itileList].energyElec_ti_1 = energyElec_ti_1;
1776 virialEnergy[itileList].energyElec_ti_2 = energyElec_ti_2;
1779 virialEnergy[itileList].energySlow = energySlow;
1780 if(doFEP) virialEnergy[itileList].energySlow_s = energySlow_s;
1782 virialEnergy[itileList].energySlow_ti_1 = energySlow_ti_1;
1783 virialEnergy[itileList].energySlow_ti_2 = energySlow_ti_2;
1788 // XXX TODO: Disable streaming and see what happens
1791 // Make sure devForces and devForcesSlow have been written into device memory
1792 NAMD_WARP_SYNC(WARP_FULL_MASK);
1795 int patchDone[2] = {false, false};
1796 const int wid = threadIdx.x % WARPSIZE;
1798 int patchCountOld0 = atomicInc(&patchNumCount[patchInd.x], (unsigned int)(patchNumList.x-1));
1799 patchDone[0] = (patchCountOld0 + 1 == patchNumList.x);
1800 if (patchInd.x != patchInd.y) {
1801 int patchCountOld1 = atomicInc(&patchNumCount[patchInd.y], (unsigned int)(patchNumList.y-1));
1802 patchDone[1] = (patchCountOld1 + 1 == patchNumList.y);
1806 patchDone[0] = NAMD_WARP_ANY(WARP_FULL_MASK, patchDone[0]);
1807 patchDone[1] = NAMD_WARP_ANY(WARP_FULL_MASK, patchDone[1]);
1810 // Patch 1 is done, write onto host-mapped memory
1811 CudaPatchRecord patch = cudaPatches[patchInd.x];
1812 int start = patch.atomStart;
1813 int end = start + patch.numAtoms;
1814 for (int i=start+wid;i < end;i+=WARPSIZE) {
1815 mapForces[i] = make_float4(devForce_x[i],
1816 devForce_y[i], devForce_z[i], devForce_w[i]);
1818 mapForcesSlow[i] = make_float4(devForceSlow_x[i],
1819 devForceSlow_y[i], devForceSlow_z[i], devForceSlow_w[i]);
1825 CudaPatchRecord patch = cudaPatches[patchInd.y];
1826 int start = patch.atomStart;
1827 int end = start + patch.numAtoms;
1828 for (int i=start+wid;i < end;i+=WARPSIZE) {
1829 mapForces[i] = make_float4(devForce_x[i],
1830 devForce_y[i], devForce_z[i], devForce_w[i]);
1832 mapForcesSlow[i] = make_float4(devForceSlow_x[i],
1833 devForceSlow_y[i], devForceSlow_z[i], devForceSlow_w[i]);
1838 if (patchDone[0] || patchDone[1]) {
1839 // Make sure mapForces and mapForcesSlow are up-to-date
1840 NAMD_WARP_SYNC(WARP_FULL_MASK);
1841 __threadfence_system();
1842 // Add patch into "patchReadyQueue"
1845 int ind = atomicAdd(&tileListStat->patchReadyQueueCount, 1);
1846 // int ind = atomicInc((unsigned int *)&mapPatchReadyQueue[numPatches], numPatches-1);
1847 mapPatchReadyQueue[ind] = patchInd.x;
1850 int ind = atomicAdd(&tileListStat->patchReadyQueueCount, 1);
1851 // int ind = atomicInc((unsigned int *)&mapPatchReadyQueue[numPatches], numPatches-1);
1852 mapPatchReadyQueue[ind] = patchInd.y;
1858 if (doStreaming && outputOrder != NULL && threadIdx.x % WARPSIZE == 0) {
1859 int index = atomicAdd(&tileListStat->outputOrderIndex, 1);
1860 outputOrder[index] = itileList;
1862 } // if (itileList < numTileLists)
1866 // Finish up - reduce virials from nonbonded kernel
1868 __global__ void reduceNonbondedVirialKernel(const bool doSlow,
1869 const int atomStorageSize,
1870 const float4* __restrict__ xyzq,
1871 const float4* __restrict__ devForces, const float4* __restrict__ devForcesSlow,
1872 VirialEnergy* __restrict__ virialEnergy) {
1874 for (int ibase = blockIdx.x*blockDim.x;ibase < atomStorageSize;ibase += blockDim.x*gridDim.x)
1876 int i = ibase + threadIdx.x;
1878 // Set to zero to avoid nan*0
1883 float4 force, forceSlow;
1890 if (i < atomStorageSize) {
1892 force = devForces[i];
1893 if (doSlow) forceSlow = devForcesSlow[i];
1895 // Reduce across the entire thread block
1896 float vxxt = force.x*pos.x;
1897 float vxyt = force.x*pos.y;
1898 float vxzt = force.x*pos.z;
1899 float vyxt = force.y*pos.x;
1900 float vyyt = force.y*pos.y;
1901 float vyzt = force.y*pos.z;
1902 float vzxt = force.z*pos.x;
1903 float vzyt = force.z*pos.y;
1904 float vzzt = force.z*pos.z;
1906 const int bin = blockIdx.x % ATOMIC_BINS;
1908 typedef cub::BlockReduce<float, REDUCENONBONDEDVIRIALKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
1909 __shared__ typename BlockReduce::TempStorage tempStorage;
1910 float vxx = BlockReduce(tempStorage).Sum(vxxt); BLOCK_SYNC;
1911 float vxy = BlockReduce(tempStorage).Sum(vxyt); BLOCK_SYNC;
1912 float vxz = BlockReduce(tempStorage).Sum(vxzt); BLOCK_SYNC;
1913 float vyx = BlockReduce(tempStorage).Sum(vyxt); BLOCK_SYNC;
1914 float vyy = BlockReduce(tempStorage).Sum(vyyt); BLOCK_SYNC;
1915 float vyz = BlockReduce(tempStorage).Sum(vyzt); BLOCK_SYNC;
1916 float vzx = BlockReduce(tempStorage).Sum(vzxt); BLOCK_SYNC;
1917 float vzy = BlockReduce(tempStorage).Sum(vzyt); BLOCK_SYNC;
1918 float vzz = BlockReduce(tempStorage).Sum(vzzt); BLOCK_SYNC;
1919 if (threadIdx.x == 0) {
1920 atomicAdd(&virialEnergy[bin].virial[0], (double)vxx);
1921 atomicAdd(&virialEnergy[bin].virial[1], (double)vxy);
1922 atomicAdd(&virialEnergy[bin].virial[2], (double)vxz);
1923 atomicAdd(&virialEnergy[bin].virial[3], (double)vyx);
1924 atomicAdd(&virialEnergy[bin].virial[4], (double)vyy);
1925 atomicAdd(&virialEnergy[bin].virial[5], (double)vyz);
1926 atomicAdd(&virialEnergy[bin].virial[6], (double)vzx);
1927 atomicAdd(&virialEnergy[bin].virial[7], (double)vzy);
1928 atomicAdd(&virialEnergy[bin].virial[8], (double)vzz);
1932 // if (isnan(forceSlow.x) || isnan(forceSlow.y) || isnan(forceSlow.z))
1933 float vxxSlowt = forceSlow.x*pos.x;
1934 float vxySlowt = forceSlow.x*pos.y;
1935 float vxzSlowt = forceSlow.x*pos.z;
1936 float vyxSlowt = forceSlow.y*pos.x;
1937 float vyySlowt = forceSlow.y*pos.y;
1938 float vyzSlowt = forceSlow.y*pos.z;
1939 float vzxSlowt = forceSlow.z*pos.x;
1940 float vzySlowt = forceSlow.z*pos.y;
1941 float vzzSlowt = forceSlow.z*pos.z;
1942 float vxxSlow = BlockReduce(tempStorage).Sum(vxxSlowt); BLOCK_SYNC;
1943 float vxySlow = BlockReduce(tempStorage).Sum(vxySlowt); BLOCK_SYNC;
1944 float vxzSlow = BlockReduce(tempStorage).Sum(vxzSlowt); BLOCK_SYNC;
1945 float vyxSlow = BlockReduce(tempStorage).Sum(vyxSlowt); BLOCK_SYNC;
1946 float vyySlow = BlockReduce(tempStorage).Sum(vyySlowt); BLOCK_SYNC;
1947 float vyzSlow = BlockReduce(tempStorage).Sum(vyzSlowt); BLOCK_SYNC;
1948 float vzxSlow = BlockReduce(tempStorage).Sum(vzxSlowt); BLOCK_SYNC;
1949 float vzySlow = BlockReduce(tempStorage).Sum(vzySlowt); BLOCK_SYNC;
1950 float vzzSlow = BlockReduce(tempStorage).Sum(vzzSlowt); BLOCK_SYNC;
1951 if (threadIdx.x == 0) {
1952 atomicAdd(&virialEnergy[bin].virialSlow[0], (double)vxxSlow);
1953 atomicAdd(&virialEnergy[bin].virialSlow[1], (double)vxySlow);
1954 atomicAdd(&virialEnergy[bin].virialSlow[2], (double)vxzSlow);
1955 atomicAdd(&virialEnergy[bin].virialSlow[3], (double)vyxSlow);
1956 atomicAdd(&virialEnergy[bin].virialSlow[4], (double)vyySlow);
1957 atomicAdd(&virialEnergy[bin].virialSlow[5], (double)vyzSlow);
1958 atomicAdd(&virialEnergy[bin].virialSlow[6], (double)vzxSlow);
1959 atomicAdd(&virialEnergy[bin].virialSlow[7], (double)vzySlow);
1960 atomicAdd(&virialEnergy[bin].virialSlow[8], (double)vzzSlow);
1967 __global__ void reduceVirialEnergyKernel(
1968 const bool doEnergy, const bool doVirial, const bool doSlow,
1969 const int numTileLists,
1970 const TileListVirialEnergy* __restrict__ tileListVirialEnergy,
1971 VirialEnergy* __restrict__ virialEnergy) {
1973 for (int ibase = blockIdx.x*blockDim.x;ibase < numTileLists;ibase += blockDim.x*gridDim.x)
1975 int itileList = ibase + threadIdx.x;
1976 TileListVirialEnergy ve;
1977 if (itileList < numTileLists) {
1978 ve = tileListVirialEnergy[itileList];
1980 // Set to zero to avoid nan*0
1988 ve.forceSlowx = 0.0f;
1989 ve.forceSlowy = 0.0f;
1990 ve.forceSlowz = 0.0f;
1994 ve.energyVdw_s = 0.0;
1995 ve.energyElec = 0.0;
1996 ve.energySlow = 0.0;
1997 ve.energyElec_s = 0.0;
1998 ve.energySlow_s = 0.0;
2001 ve.energyVdw_ti_1 = 0.0;
2002 ve.energyVdw_ti_2 = 0.0;
2003 ve.energyElec_ti_1 = 0.0;
2004 ve.energyElec_ti_2 = 0.0;
2005 ve.energySlow_ti_1 = 0.0;
2006 ve.energySlow_ti_2 = 0.0;
2007 // ve.energyGBIS = 0.0;
2011 const int bin = blockIdx.x % ATOMIC_BINS;
2014 typedef cub::BlockReduce<float, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
2015 __shared__ typename BlockReduce::TempStorage tempStorage;
2016 float vxxt = ve.forcex*ve.shx;
2017 float vxyt = ve.forcex*ve.shy;
2018 float vxzt = ve.forcex*ve.shz;
2019 float vyxt = ve.forcey*ve.shx;
2020 float vyyt = ve.forcey*ve.shy;
2021 float vyzt = ve.forcey*ve.shz;
2022 float vzxt = ve.forcez*ve.shx;
2023 float vzyt = ve.forcez*ve.shy;
2024 float vzzt = ve.forcez*ve.shz;
2025 float vxx = BlockReduce(tempStorage).Sum(vxxt); BLOCK_SYNC;
2026 float vxy = BlockReduce(tempStorage).Sum(vxyt); BLOCK_SYNC;
2027 float vxz = BlockReduce(tempStorage).Sum(vxzt); BLOCK_SYNC;
2028 float vyx = BlockReduce(tempStorage).Sum(vyxt); BLOCK_SYNC;
2029 float vyy = BlockReduce(tempStorage).Sum(vyyt); BLOCK_SYNC;
2030 float vyz = BlockReduce(tempStorage).Sum(vyzt); BLOCK_SYNC;
2031 float vzx = BlockReduce(tempStorage).Sum(vzxt); BLOCK_SYNC;
2032 float vzy = BlockReduce(tempStorage).Sum(vzyt); BLOCK_SYNC;
2033 float vzz = BlockReduce(tempStorage).Sum(vzzt); BLOCK_SYNC;
2034 if (threadIdx.x == 0) {
2035 atomicAdd(&virialEnergy[bin].virial[0], (double)vxx);
2036 atomicAdd(&virialEnergy[bin].virial[1], (double)vxy);
2037 atomicAdd(&virialEnergy[bin].virial[2], (double)vxz);
2038 atomicAdd(&virialEnergy[bin].virial[3], (double)vyx);
2039 atomicAdd(&virialEnergy[bin].virial[4], (double)vyy);
2040 atomicAdd(&virialEnergy[bin].virial[5], (double)vyz);
2041 atomicAdd(&virialEnergy[bin].virial[6], (double)vzx);
2042 atomicAdd(&virialEnergy[bin].virial[7], (double)vzy);
2043 atomicAdd(&virialEnergy[bin].virial[8], (double)vzz);
2047 typedef cub::BlockReduce<float, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
2048 __shared__ typename BlockReduce::TempStorage tempStorage;
2049 float vxxt = ve.forceSlowx*ve.shx;
2050 float vxyt = ve.forceSlowx*ve.shy;
2051 float vxzt = ve.forceSlowx*ve.shz;
2052 float vyxt = ve.forceSlowy*ve.shx;
2053 float vyyt = ve.forceSlowy*ve.shy;
2054 float vyzt = ve.forceSlowy*ve.shz;
2055 float vzxt = ve.forceSlowz*ve.shx;
2056 float vzyt = ve.forceSlowz*ve.shy;
2057 float vzzt = ve.forceSlowz*ve.shz;
2058 float vxx = BlockReduce(tempStorage).Sum(vxxt); BLOCK_SYNC;
2059 float vxy = BlockReduce(tempStorage).Sum(vxyt); BLOCK_SYNC;
2060 float vxz = BlockReduce(tempStorage).Sum(vxzt); BLOCK_SYNC;
2061 float vyx = BlockReduce(tempStorage).Sum(vyxt); BLOCK_SYNC;
2062 float vyy = BlockReduce(tempStorage).Sum(vyyt); BLOCK_SYNC;
2063 float vyz = BlockReduce(tempStorage).Sum(vyzt); BLOCK_SYNC;
2064 float vzx = BlockReduce(tempStorage).Sum(vzxt); BLOCK_SYNC;
2065 float vzy = BlockReduce(tempStorage).Sum(vzyt); BLOCK_SYNC;
2066 float vzz = BlockReduce(tempStorage).Sum(vzzt); BLOCK_SYNC;
2067 if (threadIdx.x == 0) {
2068 atomicAdd(&virialEnergy[bin].virialSlow[0], (double)vxx);
2069 atomicAdd(&virialEnergy[bin].virialSlow[1], (double)vxy);
2070 atomicAdd(&virialEnergy[bin].virialSlow[2], (double)vxz);
2071 atomicAdd(&virialEnergy[bin].virialSlow[3], (double)vyx);
2072 atomicAdd(&virialEnergy[bin].virialSlow[4], (double)vyy);
2073 atomicAdd(&virialEnergy[bin].virialSlow[5], (double)vyz);
2074 atomicAdd(&virialEnergy[bin].virialSlow[6], (double)vzx);
2075 atomicAdd(&virialEnergy[bin].virialSlow[7], (double)vzy);
2076 atomicAdd(&virialEnergy[bin].virialSlow[8], (double)vzz);
2082 typedef cub::BlockReduce<double, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
2083 /* Maybe we should guard the TI and FEP energies, since those are not to be calculated on regular MDs */
2084 __shared__ typename BlockReduce::TempStorage tempStorage;
2085 double energyVdw = BlockReduce(tempStorage).Sum(ve.energyVdw); BLOCK_SYNC;
2086 double energyVdw_s = BlockReduce(tempStorage).Sum(ve.energyVdw_s); BLOCK_SYNC;
2087 double energyElec = BlockReduce(tempStorage).Sum(ve.energyElec); BLOCK_SYNC;
2088 double energyElec_s = BlockReduce(tempStorage).Sum(ve.energyElec_s); BLOCK_SYNC;
2089 double energyVdw_ti_1 = BlockReduce(tempStorage).Sum(ve.energyVdw_ti_1); BLOCK_SYNC;
2090 double energyVdw_ti_2 = BlockReduce(tempStorage).Sum(ve.energyVdw_ti_2); BLOCK_SYNC;
2091 double energyElec_ti_1 = BlockReduce(tempStorage).Sum(ve.energyElec_ti_1); BLOCK_SYNC;
2092 double energyElec_ti_2 = BlockReduce(tempStorage).Sum(ve.energyElec_ti_2); BLOCK_SYNC;
2093 if (threadIdx.x == 0){
2094 atomicAdd(&virialEnergy[bin].energyVdw, energyVdw);
2095 atomicAdd(&virialEnergy[bin].energyVdw_s, energyVdw_s);
2096 atomicAdd(&virialEnergy[bin].energyElec, energyElec);
2097 atomicAdd(&virialEnergy[bin].energyElec_s, energyElec_s);
2098 atomicAdd(&virialEnergy[bin].energyVdw_ti_1, energyVdw_ti_1);
2099 atomicAdd(&virialEnergy[bin].energyVdw_ti_2, energyVdw_ti_2);
2100 atomicAdd(&virialEnergy[bin].energyElec_ti_1, energyElec_ti_1);
2101 atomicAdd(&virialEnergy[bin].energyElec_ti_2, energyElec_ti_2);
2104 double energySlow = BlockReduce(tempStorage).Sum(ve.energySlow); BLOCK_SYNC;
2105 double energySlow_s = BlockReduce(tempStorage).Sum(ve.energySlow_s); BLOCK_SYNC;
2106 double energySlow_ti_1 = BlockReduce(tempStorage).Sum(ve.energySlow_ti_1); BLOCK_SYNC;
2107 double energySlow_ti_2 = BlockReduce(tempStorage).Sum(ve.energySlow_ti_2); BLOCK_SYNC;
2108 if (threadIdx.x == 0) {
2109 atomicAdd(&virialEnergy[bin].energySlow, energySlow);
2110 atomicAdd(&virialEnergy[bin].energySlow_s, energySlow_s);
2111 atomicAdd(&virialEnergy[bin].energySlow_ti_1, energySlow_ti_1);
2112 atomicAdd(&virialEnergy[bin].energySlow_ti_2, energySlow_ti_2);
2121 __global__ void reduceGBISEnergyKernel(const int numTileLists,
2122 const TileListVirialEnergy* __restrict__ tileListVirialEnergy,
2123 VirialEnergy* __restrict__ virialEnergy) {
2125 for (int ibase = blockIdx.x*blockDim.x;ibase < numTileLists;ibase += blockDim.x*gridDim.x)
2127 int itileList = ibase + threadIdx.x;
2128 double energyGBISt = 0.0;
2129 if (itileList < numTileLists) {
2130 energyGBISt = tileListVirialEnergy[itileList].energyGBIS;
2133 const int bin = blockIdx.x % ATOMIC_BINS;
2135 typedef cub::BlockReduce<double, REDUCEGBISENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
2136 __shared__ typename BlockReduce::TempStorage tempStorage;
2137 double energyGBIS = BlockReduce(tempStorage).Sum(energyGBISt); BLOCK_SYNC;
2138 if (threadIdx.x == 0) atomicAdd(&virialEnergy[bin].energyGBIS, energyGBIS);
2142 __global__ void reduceNonbondedBinsKernel(
2143 const bool doVirial,
2144 const bool doEnergy,
2147 VirialEnergy* __restrict__ virialEnergy) {
2149 const int bin = threadIdx.x;
2151 typedef cub::WarpReduce<double, (ATOMIC_BINS > 1 ? ATOMIC_BINS : 2)> WarpReduce;
2152 __shared__ typename WarpReduce::TempStorage tempStorage;
2155 double vxx = WarpReduce(tempStorage).Sum(virialEnergy[bin].virial[0]);
2156 double vxy = WarpReduce(tempStorage).Sum(virialEnergy[bin].virial[1]);
2157 double vxz = WarpReduce(tempStorage).Sum(virialEnergy[bin].virial[2]);
2158 double vyx = WarpReduce(tempStorage).Sum(virialEnergy[bin].virial[3]);
2159 double vyy = WarpReduce(tempStorage).Sum(virialEnergy[bin].virial[4]);
2160 double vyz = WarpReduce(tempStorage).Sum(virialEnergy[bin].virial[5]);
2161 double vzx = WarpReduce(tempStorage).Sum(virialEnergy[bin].virial[6]);
2162 double vzy = WarpReduce(tempStorage).Sum(virialEnergy[bin].virial[7]);
2163 double vzz = WarpReduce(tempStorage).Sum(virialEnergy[bin].virial[8]);
2164 if (threadIdx.x == 0) {
2165 virialEnergy->virial[0] = vxx;
2166 virialEnergy->virial[1] = vxy;
2167 virialEnergy->virial[2] = vxz;
2168 virialEnergy->virial[3] = vyx;
2169 virialEnergy->virial[4] = vyy;
2170 virialEnergy->virial[5] = vyz;
2171 virialEnergy->virial[6] = vzx;
2172 virialEnergy->virial[7] = vzy;
2173 virialEnergy->virial[8] = vzz;
2177 double vxxSlow = WarpReduce(tempStorage).Sum(virialEnergy[bin].virialSlow[0]);
2178 double vxySlow = WarpReduce(tempStorage).Sum(virialEnergy[bin].virialSlow[1]);
2179 double vxzSlow = WarpReduce(tempStorage).Sum(virialEnergy[bin].virialSlow[2]);
2180 double vyxSlow = WarpReduce(tempStorage).Sum(virialEnergy[bin].virialSlow[3]);
2181 double vyySlow = WarpReduce(tempStorage).Sum(virialEnergy[bin].virialSlow[4]);
2182 double vyzSlow = WarpReduce(tempStorage).Sum(virialEnergy[bin].virialSlow[5]);
2183 double vzxSlow = WarpReduce(tempStorage).Sum(virialEnergy[bin].virialSlow[6]);
2184 double vzySlow = WarpReduce(tempStorage).Sum(virialEnergy[bin].virialSlow[7]);
2185 double vzzSlow = WarpReduce(tempStorage).Sum(virialEnergy[bin].virialSlow[8]);
2186 if (threadIdx.x == 0) {
2187 virialEnergy->virialSlow[0] = vxxSlow;
2188 virialEnergy->virialSlow[1] = vxySlow;
2189 virialEnergy->virialSlow[2] = vxzSlow;
2190 virialEnergy->virialSlow[3] = vyxSlow;
2191 virialEnergy->virialSlow[4] = vyySlow;
2192 virialEnergy->virialSlow[5] = vyzSlow;
2193 virialEnergy->virialSlow[6] = vzxSlow;
2194 virialEnergy->virialSlow[7] = vzySlow;
2195 virialEnergy->virialSlow[8] = vzzSlow;
2201 double energyVdw = WarpReduce(tempStorage).Sum(virialEnergy[bin].energyVdw);
2202 double energyVdw_s = WarpReduce(tempStorage).Sum(virialEnergy[bin].energyVdw_s);
2203 double energyElec = WarpReduce(tempStorage).Sum(virialEnergy[bin].energyElec);
2204 double energyElec_s = WarpReduce(tempStorage).Sum(virialEnergy[bin].energyElec_s);
2205 double energyVdw_ti_1 = WarpReduce(tempStorage).Sum(virialEnergy[bin].energyVdw_ti_1);
2206 double energyVdw_ti_2 = WarpReduce(tempStorage).Sum(virialEnergy[bin].energyVdw_ti_2);
2207 double energyElec_ti_1 = WarpReduce(tempStorage).Sum(virialEnergy[bin].energyElec_ti_1);
2208 double energyElec_ti_2 = WarpReduce(tempStorage).Sum(virialEnergy[bin].energyElec_ti_2);
2209 if (threadIdx.x == 0) {
2210 virialEnergy->energyVdw = energyVdw;
2211 virialEnergy->energyVdw_s = energyVdw_s;
2212 virialEnergy->energyElec = energyElec;
2213 virialEnergy->energyElec_s = energyElec_s;
2214 virialEnergy->energyVdw_ti_1 = energyVdw_ti_1;
2215 virialEnergy->energyVdw_ti_2 = energyVdw_ti_2;
2216 virialEnergy->energyElec_ti_1 = energyElec_ti_1;
2217 virialEnergy->energyElec_ti_2 = energyElec_ti_2;
2220 double energySlow = WarpReduce(tempStorage).Sum(virialEnergy[bin].energySlow);
2221 double energySlow_s = WarpReduce(tempStorage).Sum(virialEnergy[bin].energySlow_s);
2222 double energySlow_ti_1 = WarpReduce(tempStorage).Sum(virialEnergy[bin].energySlow_ti_1);
2223 double energySlow_ti_2 = WarpReduce(tempStorage).Sum(virialEnergy[bin].energySlow_ti_2);
2224 if (threadIdx.x == 0) {
2225 virialEnergy->energySlow = energySlow;
2226 virialEnergy->energySlow_s = energySlow_s;
2227 virialEnergy->energySlow_ti_1 = energySlow_ti_1;
2228 virialEnergy->energySlow_ti_2 = energySlow_ti_2;
2232 double energyGBIS = WarpReduce(tempStorage).Sum(virialEnergy[bin].energyGBIS);
2233 if (threadIdx.x == 0) {
2234 virialEnergy->energyGBIS = energyGBIS;
2240 // ##############################################################################################
2241 // ##############################################################################################
2242 // ##############################################################################################
2244 CudaComputeNonbondedKernel::CudaComputeNonbondedKernel(int deviceID, CudaNonbondedTables& cudaNonbondedTables,
2245 bool doStreaming) : deviceID(deviceID), cudaNonbondedTables(cudaNonbondedTables), doStreaming(doStreaming) {
2247 cudaCheck(cudaSetDevice(deviceID));
2249 overflowExclusions = NULL;
2250 overflowExclusionsSize = 0;
2252 exclIndexMaxDiff = NULL;
2253 exclIndexMaxDiffSize = 0;
2261 patchNumCount = NULL;
2262 patchNumCountSize = 0;
2264 patchReadyQueue = NULL;
2265 patchReadyQueueSize = 0;
2267 force_x = force_y = force_z = force_w = NULL;
2269 forceSlow_x = forceSlow_y = forceSlow_z = forceSlow_w = NULL;
2274 drudeAtomAlpha = nullptr;
2275 drudeAtomAlphaSize = 0;
2278 void CudaComputeNonbondedKernel::reallocate_forceSOA(int atomStorageSize)
2281 reallocate_device<float>(&force_x, &forceSize, atomStorageSize, 1.4f);
2282 reallocate_device<float>(&force_y, &forceSize, atomStorageSize, 1.4f);
2283 reallocate_device<float>(&force_z, &forceSize, atomStorageSize, 1.4f);
2284 reallocate_device<float>(&force_w, &forceSize, atomStorageSize, 1.4f);
2285 reallocate_device<float>(&forceSlow_x, &forceSlowSize, atomStorageSize, 1.4f);
2286 reallocate_device<float>(&forceSlow_y, &forceSlowSize, atomStorageSize, 1.4f);
2287 reallocate_device<float>(&forceSlow_z, &forceSlowSize, atomStorageSize, 1.4f);
2288 reallocate_device<float>(&forceSlow_w, &forceSlowSize, atomStorageSize, 1.4f);
2290 reallocate_device<float>(&force_x, &forceSize, atomStorageSize*8, 1.4f);
2291 force_y = force_x + atomStorageSize;
2292 force_z = force_y + atomStorageSize;
2293 force_w = force_z + atomStorageSize;
2294 forceSlow_x = force_w + atomStorageSize;
2295 forceSlow_y = forceSlow_x + atomStorageSize;
2296 forceSlow_z = forceSlow_y + atomStorageSize;
2297 forceSlow_w = forceSlow_z + atomStorageSize;
2301 CudaComputeNonbondedKernel::~CudaComputeNonbondedKernel() {
2302 cudaCheck(cudaSetDevice(deviceID));
2303 if (overflowExclusions != NULL) deallocate_device<unsigned int>(&overflowExclusions);
2304 if (exclIndexMaxDiff != NULL) deallocate_device<int2>(&exclIndexMaxDiff);
2305 if (atomIndex != NULL) deallocate_device<int>(&atomIndex);
2306 if (vdwTypes != NULL) deallocate_device<int>(&vdwTypes);
2307 if (patchNumCount != NULL) deallocate_device<unsigned int>(&patchNumCount);
2308 if (patchReadyQueue != NULL) deallocate_host<int>(&patchReadyQueue);
2310 if (force_x != NULL) deallocate_device<float>(&force_x);
2311 if (force_y != NULL) deallocate_device<float>(&force_y);
2312 if (force_z != NULL) deallocate_device<float>(&force_z);
2313 if (force_w != NULL) deallocate_device<float>(&force_w);
2314 if (forceSlow_x != NULL) deallocate_device<float>(&forceSlow_x);
2315 if (forceSlow_y != NULL) deallocate_device<float>(&forceSlow_y);
2316 if (forceSlow_z != NULL) deallocate_device<float>(&forceSlow_z);
2317 if (forceSlow_w != NULL) deallocate_device<float>(&forceSlow_w);
2319 if (force_x != NULL) deallocate_device<float>(&force_x);
2321 if (isDrude != nullptr) deallocate_device(&isDrude);
2322 if (drudeAtomAlpha != nullptr) deallocate_device(&drudeAtomAlpha);
2325 void CudaComputeNonbondedKernel::updateVdwTypesExcl(const int atomStorageSize, const int* h_vdwTypes,
2326 const int2* h_exclIndexMaxDiff, const int* h_atomIndex, cudaStream_t stream) {
2328 reallocate_device<int>(&vdwTypes, &vdwTypesSize, atomStorageSize, OVERALLOC);
2329 reallocate_device<int2>(&exclIndexMaxDiff, &exclIndexMaxDiffSize, atomStorageSize, OVERALLOC);
2330 reallocate_device<int>(&atomIndex, &atomIndexSize, atomStorageSize, OVERALLOC);
2332 copy_HtoD<int>(h_vdwTypes, vdwTypes, atomStorageSize, stream);
2333 copy_HtoD<int2>(h_exclIndexMaxDiff, exclIndexMaxDiff, atomStorageSize, stream);
2334 copy_HtoD<int>(h_atomIndex, atomIndex, atomStorageSize, stream);
2337 int* CudaComputeNonbondedKernel::getPatchReadyQueue() {
2339 NAMD_die("CudaComputeNonbondedKernel::getPatchReadyQueue() called on non-streaming kernel");
2341 return patchReadyQueue;
2344 template <int doSlow>
2345 __global__ void transposeForcesKernel(float4 *f, float4 *fSlow,
2346 float *fx, float *fy, float *fz, float *fw,
2347 float *fSlowx, float *fSlowy, float *fSlowz, float *fSloww,
2350 int tid = blockIdx.x*blockDim.x + threadIdx.x;
2352 f[tid] = make_float4(fx[tid], fy[tid], fz[tid], fw[tid]);
2353 fx[tid] = 0.f; fy[tid] = 0.f; fz[tid] = 0.f; fw[tid] = 0.f;
2355 fSlow[tid] = make_float4(fSlowx[tid], fSlowy[tid], fSlowz[tid], fSloww[tid]);
2356 fSlowx[tid] = 0.f; fSlowy[tid] = 0.f; fSlowz[tid] = 0.f; fSloww[tid] = 0.f;
2363 void CudaComputeNonbondedKernel::nonbondedForce(CudaTileListKernel& tlKernel,
2364 const int atomStorageSize, const bool atomsChanged, const bool doMinimize,
2365 const bool doPairlist,
2366 const bool doEnergy, const bool doVirial, const bool doSlow, const bool doAlch,
2367 const bool doAlchVdwForceSwitching,
2368 const bool doFEP, const bool doTI,
2369 const bool doNbThole, const bool doTable,
2370 const float3 lata, const float3 latb, const float3 latc,
2371 const float4* h_xyzq, const float cutoff2,
2372 const CudaNBConstants nbConstants,
2373 float4* d_forces, float4* d_forcesSlow,
2374 float4* h_forces, float4* h_forcesSlow, AlchData *srcFlags,
2375 bool lambdaWindowUpdated, char *part,
2376 bool CUDASOAintegrator, bool useDeviceMigration,
2377 const float drudeNbtholeCut2,
2378 cudaStream_t stream) {
2380 #ifdef NODEGROUP_FORCE_REGISTER
2381 if (!atomsChanged && !CUDASOAintegrator) copy_HtoD<float4>(h_xyzq, tlKernel.get_xyzq(), atomStorageSize, stream);
2383 if (!doPairlist) copy_HtoD<float4>(h_xyzq, tlKernel.get_xyzq(), atomStorageSize, stream);
2387 // Copy partition to device. This is not necessary if both CUDASOAintegrator and useDeviceMigration
2389 if (doPairlist && (!CUDASOAintegrator || !useDeviceMigration)) {
2390 copy_HtoD< char>(part, tlKernel.get_part(), atomStorageSize, stream);
2392 //Copies flags to constant memory
2393 if(lambdaWindowUpdated) cudaCheck(cudaMemcpyToSymbol(alchflags, srcFlags, sizeof(AlchData)));
2396 // XXX TODO: Get rid of the clears
2398 if (doStreaming) tlKernel.clearTileListStat(stream);
2399 if(atomsChanged || doMinimize){
2400 clear_device_array<float>(force_x, atomStorageSize*4, stream);
2401 if(doSlow) clear_device_array<float>(forceSlow_x, atomStorageSize*4, stream);
2405 // --- streaming ----
2406 float4* m_forces = NULL;
2407 float4* m_forcesSlow = NULL;
2408 int* m_patchReadyQueue = NULL;
2410 unsigned long long *calculatedPairs;
2411 unsigned long long *skippedPairs;
2412 unsigned int* patchNumCountPtr = NULL;
2415 numPatches = tlKernel.getNumPatches();
2416 if (reallocate_device<unsigned int>(&patchNumCount, &patchNumCountSize, numPatches)) {
2417 // If re-allocated, clear array
2418 clear_device_array<unsigned int>(patchNumCount, numPatches, stream);
2420 patchNumCountPtr = patchNumCount;
2421 bool re = reallocate_host<int>(&patchReadyQueue, &patchReadyQueueSize, numPatches, cudaHostAllocMapped);
2423 // If re-allocated, re-set to "-1"
2424 for (int i=0;i < numPatches;i++) patchReadyQueue[i] = -1;
2426 cudaCheck(cudaHostGetDevicePointer((void**)&m_patchReadyQueue, patchReadyQueue, 0));
2427 cudaCheck(cudaHostGetDevicePointer((void**)&m_forces, h_forces, 0));
2428 cudaCheck(cudaHostGetDevicePointer((void**)&m_forcesSlow, h_forcesSlow, 0));
2430 // -----------------
2432 if (doVirial || doEnergy) {
2433 tlKernel.setTileListVirialEnergyLength(tlKernel.getNumTileLists());
2438 int* outputOrderPtr = tlKernel.getOutputOrder();
2440 int nwarp = NONBONDKERNEL_NUM_WARP;
2441 int nthread = WARPSIZE*nwarp;
2444 const int doActualNbThole = doNbThole ? int(cudaNonbondedTables.getNumPotentialNbtholeTerms() > 0) : int(false);
2450 int options = doEnergy + (doVirial << 1) + (doSlow << 2) +
2451 (doPairlist << 3) + (doAlch << 4) + (doFEP << 5) + (doTI << 6) + (doStreaming << 7) + (doTable << 8) + (doAlchVdwForceSwitching << 9) + (doActualNbThole << 10);
2454 while (start < tlKernel.getNumTileLists()) {
2456 int nleft = tlKernel.getNumTileLists() - start;
2457 int nblock = min(deviceCUDA->getMaxNumBlocks(), (nleft-1)/nwarp+1);
2461 #ifdef USE_TABLE_ARRAYS
2462 #define VDW_TABLE_PARAMS cudaNonbondedTables.getVdwCoefTable()
2463 #define TABLE_PARAMS \
2464 cudaNonbondedTables.getForceTable(), cudaNonbondedTables.getEnergyTable()
2466 #define VDW_TABLE_PARAMS cudaNonbondedTables.getVdwCoefTableTex()
2467 #define TABLE_PARAMS \
2468 cudaNonbondedTables.getForceTableTex(), cudaNonbondedTables.getEnergyTableTex()
2472 #define CALL(DOENERGY, DOVIRIAL, DOSLOW, DOPAIRLIST, DOALCH, DOFEP, DOTI, DOSTREAMING, DOALCHWDWFORCESWITCHING) \
2473 nonbondedForceKernel<DOENERGY, DOVIRIAL, DOSLOW, DOPAIRLIST, DOALCH, DOFEP, DOTI, DOSTREAMING, DOALCHWDWFORCESWITCHING > \
2474 <<< nblock, nthread, shMemSize, stream >>> \
2475 (start, tlKernel.getNumTileLists(), tlKernel.getTileLists(), tlKernel.getTileExcls(), tlKernel.getTileJatomStart(), \
2476 cudaNonbondedTables.getVdwCoefTableWidth(), \
2478 vdwTypes, lata, latb, latc, tlKernel.get_xyzq(), cutoff2, nbConstants, \
2480 tlKernel.get_plcutoff2(), tlKernel.getPatchPairs(), atomIndex, exclIndexMaxDiff, overflowExclusions, \
2481 tlKernel.getTileListDepth(), tlKernel.getTileListOrder(), tlKernel.getJtiles(), tlKernel.getTileListStatDevPtr(), \
2482 tlKernel.getBoundingBoxes(), d_forces, d_forcesSlow, \
2483 force_x, force_y, force_z, force_w, \
2484 forceSlow_x, forceSlow_y, forceSlow_z, forceSlow_w, \
2485 numPatches, patchNumCountPtr, tlKernel.getCudaPatches(), m_forces, m_forcesSlow, m_patchReadyQueue, \
2486 outputOrderPtr, tlKernel.getTileListVirialEnergy(), tlKernel.get_part(), calculatedPairs, skippedPairs); called=true
2488 bool called = false;
2491 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 0, 0, 0, 1, 0);
2492 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 0, 0, 0, 1, 0);
2493 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 0, 0, 0, 1, 0);
2494 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 0, 0, 0, 1, 0);
2495 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 0, 0, 0, 1, 0);
2496 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 0, 0, 0, 1, 0);
2497 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 0, 0, 0, 1, 0);
2498 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 0, 0, 0, 1, 0);
2501 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 0, 0, 0, 1, 0);
2502 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 0, 0, 0, 1, 0);
2503 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 0, 0, 0, 1, 0);
2504 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 0, 0, 0, 1, 0);
2505 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 0, 0, 0, 1, 0);
2506 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 0, 0, 0, 1, 0);
2507 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 0, 0, 0, 1, 0);
2508 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 0, 0, 0, 1, 0);
2511 if (doAlchVdwForceSwitching) {
2512 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 1, 1, 0, 1, 1);
2513 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 1, 1, 0, 1, 1);
2514 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 1, 1, 0, 1, 1);
2515 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 1, 1, 0, 1, 1);
2516 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 1, 1, 0, 1, 1);
2517 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 1, 1, 0, 1, 1);
2518 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 1, 1, 0, 1, 1);
2519 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 1, 1, 0, 1, 1);
2521 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 1, 1, 0, 1, 1);
2522 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 1, 1, 0, 1, 1);
2523 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 1, 1, 0, 1, 1);
2524 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 1, 1, 0, 1, 1);
2525 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 1, 1, 0, 1, 1);
2526 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 1, 1, 0, 1, 1);
2527 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 1, 1, 0, 1, 1);
2528 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 1, 1, 0, 1, 1);
2530 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 1, 1, 0, 1, 0);
2531 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 1, 1, 0, 1, 0);
2532 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 1, 1, 0, 1, 0);
2533 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 1, 1, 0, 1, 0);
2534 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 1, 1, 0, 1, 0);
2535 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 1, 1, 0, 1, 0);
2536 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 1, 1, 0, 1, 0);
2537 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 1, 1, 0, 1, 0);
2539 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 1, 1, 0, 1, 0);
2540 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 1, 1, 0, 1, 0);
2541 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 1, 1, 0, 1, 0);
2542 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 1, 1, 0, 1, 0);
2543 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 1, 1, 0, 1, 0);
2544 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 1, 1, 0, 1, 0);
2545 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 1, 1, 0, 1, 0);
2546 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 1, 1, 0, 1, 0);
2549 if (doAlchVdwForceSwitching) {
2550 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 1, 0, 1, 1, 1);
2551 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 1, 0, 1, 1, 1);
2552 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 1, 0, 1, 1, 1);
2553 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 1, 0, 1, 1, 1);
2554 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 1, 0, 1, 1, 1);
2555 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 1, 0, 1, 1, 1);
2556 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 1, 0, 1, 1, 1);
2557 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 1, 0, 1, 1, 1);
2559 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 1, 0, 1, 1, 1);
2560 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 1, 0, 1, 1, 1);
2561 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 1, 0, 1, 1, 1);
2562 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 1, 0, 1, 1, 1);
2563 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 1, 0, 1, 1, 1);
2564 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 1, 0, 1, 1, 1);
2565 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 1, 0, 1, 1, 1);
2566 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 1, 0, 1, 1, 1);
2568 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 1, 0, 1, 1, 0);
2569 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 1, 0, 1, 1, 0);
2570 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 1, 0, 1, 1, 0);
2571 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 1, 0, 1, 1, 0);
2572 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 1, 0, 1, 1, 0);
2573 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 1, 0, 1, 1, 0);
2574 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 1, 0, 1, 1, 0);
2575 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 1, 0, 1, 1, 0);
2577 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 1, 0, 1, 1, 0);
2578 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 1, 0, 1, 1, 0);
2579 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 1, 0, 1, 1, 0);
2580 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 1, 0, 1, 1, 0);
2581 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 1, 0, 1, 1, 0);
2582 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 1, 0, 1, 1, 0);
2583 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 1, 0, 1, 1, 0);
2584 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 1, 0, 1, 1, 0);
2591 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 0, 0, 0, 0, 0);
2592 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 0, 0, 0, 0, 0);
2593 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 0, 0, 0, 0, 0);
2594 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 0, 0, 0, 0, 0);
2595 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 0, 0, 0, 0, 0);
2596 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 0, 0, 0, 0, 0);
2597 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 0, 0, 0, 0, 0);
2598 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 0, 0, 0, 0, 0);
2601 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 0, 0, 0, 0, 0);
2602 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 0, 0, 0, 0, 0);
2603 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 0, 0, 0, 0, 0);
2604 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 0, 0, 0, 0, 0);
2605 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 0, 0, 0, 0, 0);
2606 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 0, 0, 0, 0, 0);
2607 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 0, 0, 0, 0, 0);
2608 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 0, 0, 0, 0, 0);
2611 if (doAlchVdwForceSwitching) {
2612 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 1, 1, 0, 0, 1);
2613 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 1, 1, 0, 0, 1);
2614 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 1, 1, 0, 0, 1);
2615 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 1, 1, 0, 0, 1);
2616 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 1, 1, 0, 0, 1);
2617 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 1, 1, 0, 0, 1);
2618 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 1, 1, 0, 0, 1);
2619 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 1, 1, 0, 0, 1);
2621 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 1, 1, 0, 0, 1);
2622 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 1, 1, 0, 0, 1);
2623 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 1, 1, 0, 0, 1);
2624 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 1, 1, 0, 0, 1);
2625 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 1, 1, 0, 0, 1);
2626 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 1, 1, 0, 0, 1);
2627 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 1, 1, 0, 0, 1);
2628 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 1, 1, 0, 0, 1);
2630 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 1, 1, 0, 0, 0);
2631 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 1, 1, 0, 0, 0);
2632 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 1, 1, 0, 0, 0);
2633 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 1, 1, 0, 0, 0);
2634 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 1, 1, 0, 0, 0);
2635 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 1, 1, 0, 0, 0);
2636 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 1, 1, 0, 0, 0);
2637 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 1, 1, 0, 0, 0);
2639 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 1, 1, 0, 0, 0);
2640 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 1, 1, 0, 0, 0);
2641 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 1, 1, 0, 0, 0);
2642 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 1, 1, 0, 0, 0);
2643 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 1, 1, 0, 0, 0);
2644 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 1, 1, 0, 0, 0);
2645 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 1, 1, 0, 0, 0);
2646 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 1, 1, 0, 0, 0);
2649 if (doAlchVdwForceSwitching) {
2650 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 1, 0, 1, 0, 1);
2651 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 1, 0, 1, 0, 1);
2652 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 1, 0, 1, 0, 1);
2653 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 1, 0, 1, 0, 1);
2654 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 1, 0, 1, 0, 1);
2655 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 1, 0, 1, 0, 1);
2656 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 1, 0, 1, 0, 1);
2657 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 1, 0, 1, 0, 1);
2659 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 1, 0, 1, 0, 1);
2660 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 1, 0, 1, 0, 1);
2661 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 1, 0, 1, 0, 1);
2662 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 1, 0, 1, 0, 1);
2663 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 1, 0, 1, 0, 1);
2664 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 1, 0, 1, 0, 1);
2665 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 1, 0, 1, 0, 1);
2666 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 1, 0, 1, 0, 1);
2668 if (!doEnergy && !doVirial && !doSlow && !doPairlist) CALL(0, 0, 0, 0, 1, 0, 1, 0, 0);
2669 if (!doEnergy && !doVirial && doSlow && !doPairlist) CALL(0, 0, 1, 0, 1, 0, 1, 0, 0);
2670 if (!doEnergy && doVirial && !doSlow && !doPairlist) CALL(0, 1, 0, 0, 1, 0, 1, 0, 0);
2671 if (!doEnergy && doVirial && doSlow && !doPairlist) CALL(0, 1, 1, 0, 1, 0, 1, 0, 0);
2672 if ( doEnergy && !doVirial && !doSlow && !doPairlist) CALL(1, 0, 0, 0, 1, 0, 1, 0, 0);
2673 if ( doEnergy && !doVirial && doSlow && !doPairlist) CALL(1, 0, 1, 0, 1, 0, 1, 0, 0);
2674 if ( doEnergy && doVirial && !doSlow && !doPairlist) CALL(1, 1, 0, 0, 1, 0, 1, 0, 0);
2675 if ( doEnergy && doVirial && doSlow && !doPairlist) CALL(1, 1, 1, 0, 1, 0, 1, 0, 0);
2677 if (!doEnergy && !doVirial && !doSlow && doPairlist) CALL(0, 0, 0, 1, 1, 0, 1, 0, 0);
2678 if (!doEnergy && !doVirial && doSlow && doPairlist) CALL(0, 0, 1, 1, 1, 0, 1, 0, 0);
2679 if (!doEnergy && doVirial && !doSlow && doPairlist) CALL(0, 1, 0, 1, 1, 0, 1, 0, 0);
2680 if (!doEnergy && doVirial && doSlow && doPairlist) CALL(0, 1, 1, 1, 1, 0, 1, 0, 0);
2681 if ( doEnergy && !doVirial && !doSlow && doPairlist) CALL(1, 0, 0, 1, 1, 0, 1, 0, 0);
2682 if ( doEnergy && !doVirial && doSlow && doPairlist) CALL(1, 0, 1, 1, 1, 0, 1, 0, 0);
2683 if ( doEnergy && doVirial && !doSlow && doPairlist) CALL(1, 1, 0, 1, 1, 0, 1, 0, 0);
2684 if ( doEnergy && doVirial && doSlow && doPairlist) CALL(1, 1, 1, 1, 1, 0, 1, 0, 0);
2691 NAMD_die("CudaComputeNonbondedKernel::nonbondedForce, none of the kernels called");
2696 #ifdef USE_TABLE_ARRAYS
2697 #define VDW_TABLE_PARAMS cudaNonbondedTables.getVdwCoefTable()
2698 #define TABLE_PARAMS \
2699 cudaNonbondedTables.getForceTable(), cudaNonbondedTables.getEnergyTable()
2700 #define THOLE_TABLE_PARMAS cudaNonbondedTables.getDrudeNbTholeTijTable()
2702 #define VDW_TABLE_PARAMS cudaNonbondedTables.getVdwCoefTableTex()
2703 #define TABLE_PARAMS \
2704 cudaNonbondedTables.getForceTableTex(), cudaNonbondedTables.getEnergyTableTex()
2705 #define THOLE_TABLE_PARMAS cudaNonbondedTables.getDrudeNbTholeTijTableTex()
2708 #define CALL(DOENERGY, DOVIRIAL, DOSLOW, DOPAIRLIST, DOALCH, DOFEP, DOTI, DOSTREAMING, DOTABLE, DOALCHWDWFORCESWITCHING, DONBTHOLE) \
2709 nonbondedForceKernel<DOENERGY, DOVIRIAL, DOSLOW, DOPAIRLIST, DOALCH, DOFEP, DOTI, DOSTREAMING, DOTABLE, DOALCHWDWFORCESWITCHING, DONBTHOLE> \
2710 <<< nblock, nthread, shMemSize, stream >>> \
2711 (start, tlKernel.getNumTileLists(), tlKernel.getTileLists(), tlKernel.getTileExcls(), tlKernel.getTileJatomStart(), \
2712 cudaNonbondedTables.getVdwCoefTableWidth(), \
2714 vdwTypes, lata, latb, latc, tlKernel.get_xyzq(), cutoff2, nbConstants, \
2716 tlKernel.get_plcutoff2(), tlKernel.getPatchPairs(), atomIndex, exclIndexMaxDiff, overflowExclusions, \
2717 tlKernel.getTileListDepth(), tlKernel.getTileListOrder(), tlKernel.getJtiles(), tlKernel.getTileListStatDevPtr(), \
2718 tlKernel.getBoundingBoxes(), \
2719 force_x, force_y, force_z, force_w, \
2720 forceSlow_x, forceSlow_y, forceSlow_z, forceSlow_w, \
2721 numPatches, patchNumCountPtr, tlKernel.getCudaPatches(), m_forces, m_forcesSlow, m_patchReadyQueue, \
2722 outputOrderPtr, tlKernel.getTileListVirialEnergy(), tlKernel.get_part(), \
2723 THOLE_TABLE_PARMAS, \
2724 isDrude, drudeAtomAlpha, drudeNbtholeCut2)
2731 case 0: CALL(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); break;
2732 case 1: CALL(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); break;
2733 case 2: CALL(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0); break;
2734 case 3: CALL(1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0); break;
2735 case 4: CALL(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0); break;
2736 case 5: CALL(1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0); break;
2737 case 6: CALL(0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0); break;
2738 case 7: CALL(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0); break;
2739 case 8: CALL(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); break;
2740 case 9: CALL(1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); break;
2741 case 10: CALL(0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0); break;
2742 case 11: CALL(1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0); break;
2743 case 12: CALL(0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0); break;
2744 case 13: CALL(1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0); break;
2745 case 14: CALL(0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0); break;
2746 case 15: CALL(1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0); break;
2749 case 16: CALL(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0); break;
2750 case 17: CALL(1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0); break;
2751 case 18: CALL(0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0); break;
2752 case 19: CALL(1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0); break;
2753 case 20: CALL(0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0); break;
2754 case 21: CALL(1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0); break;
2755 case 22: CALL(0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0); break;
2756 case 23: CALL(1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0); break;
2757 case 24: CALL(0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0); break;
2758 case 25: CALL(1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0); break;
2759 case 26: CALL(0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0); break;
2760 case 27: CALL(1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0); break;
2761 case 28: CALL(0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0); break;
2762 case 29: CALL(1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0); break;
2763 case 30: CALL(0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0); break;
2764 case 31: CALL(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0); break;
2766 case 32: CALL(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0); break;
2767 case 33: CALL(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0); break;
2768 case 34: CALL(0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0); break;
2769 case 35: CALL(1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0); break;
2770 case 36: CALL(0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0); break;
2771 case 37: CALL(1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0); break;
2772 case 38: CALL(0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0); break;
2773 case 39: CALL(1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0); break;
2774 case 40: CALL(0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0); break;
2775 case 41: CALL(1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0); break;
2776 case 42: CALL(0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0); break;
2777 case 43: CALL(1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0); break;
2778 case 44: CALL(0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0); break;
2779 case 45: CALL(1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0); break;
2780 case 46: CALL(0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0); break;
2781 case 47: CALL(1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0); break;
2783 case 48: CALL(0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0); break;
2784 case 49: CALL(1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0); break;
2785 case 50: CALL(0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0); break;
2786 case 51: CALL(1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0); break;
2787 case 52: CALL(0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0); break;
2788 case 53: CALL(1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0); break;
2789 case 54: CALL(0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0); break;
2790 case 55: CALL(1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0); break;
2791 case 56: CALL(0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0); break;
2792 case 57: CALL(1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0); break;
2793 case 58: CALL(0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0); break;
2794 case 59: CALL(1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0); break;
2795 case 60: CALL(0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0); break;
2796 case 61: CALL(1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0); break;
2797 case 62: CALL(0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0); break;
2798 case 63: CALL(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0); break;
2800 case 64: CALL(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0); break;
2801 case 65: CALL(1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0); break;
2802 case 66: CALL(0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0); break;
2803 case 67: CALL(1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0); break;
2804 case 68: CALL(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0); break;
2805 case 69: CALL(1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0); break;
2806 case 70: CALL(0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0); break;
2807 case 71: CALL(1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0); break;
2808 case 72: CALL(0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0); break;
2809 case 73: CALL(1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0); break;
2810 case 74: CALL(0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0); break;
2811 case 75: CALL(1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0); break;
2812 case 76: CALL(0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0); break;
2813 case 77: CALL(1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0); break;
2814 case 78: CALL(0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0); break;
2815 case 79: CALL(1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0); break;
2817 case 80: CALL(0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0); break;
2818 case 81: CALL(1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0); break;
2819 case 82: CALL(0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0); break;
2820 case 83: CALL(1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0); break;
2821 case 84: CALL(0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0); break;
2822 case 85: CALL(1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0); break;
2823 case 86: CALL(0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0); break;
2824 case 87: CALL(1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0); break;
2825 case 88: CALL(0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0); break;
2826 case 89: CALL(1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0); break;
2827 case 90: CALL(0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0); break;
2828 case 91: CALL(1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0); break;
2829 case 92: CALL(0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0); break;
2830 case 93: CALL(1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0); break;
2831 case 94: CALL(0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0); break;
2832 case 95: CALL(1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0); break;
2834 case 96: CALL(0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0); break;
2835 case 97: CALL(1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0); break;
2836 case 98: CALL(0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0); break;
2837 case 99: CALL(1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0); break;
2838 case 100: CALL(0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0); break;
2839 case 101: CALL(1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0); break;
2840 case 102: CALL(0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0); break;
2841 case 103: CALL(1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0); break;
2842 case 104: CALL(0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0); break;
2843 case 105: CALL(1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0); break;
2844 case 106: CALL(0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0); break;
2845 case 107: CALL(1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0); break;
2846 case 108: CALL(0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0); break;
2847 case 109: CALL(1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0); break;
2848 case 110: CALL(0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0); break;
2849 case 111: CALL(1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0); break;
2851 case 112: CALL(0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0); break;
2852 case 113: CALL(1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0); break;
2853 case 114: CALL(0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0); break;
2854 case 115: CALL(1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0); break;
2855 case 116: CALL(0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0); break;
2856 case 117: CALL(1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0); break;
2857 case 118: CALL(0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0); break;
2858 case 119: CALL(1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0); break;
2859 case 120: CALL(0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0); break;
2860 case 121: CALL(1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0); break;
2861 case 122: CALL(0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0); break;
2862 case 123: CALL(1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0); break;
2863 case 124: CALL(0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0); break;
2864 case 125: CALL(1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0); break;
2865 case 126: CALL(0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); break;
2866 case 127: CALL(1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); break;
2869 case 128: CALL(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0); break;
2870 case 129: CALL(1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0); break;
2871 case 130: CALL(0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0); break;
2872 case 131: CALL(1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0); break;
2873 case 132: CALL(0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0); break;
2874 case 133: CALL(1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0); break;
2875 case 134: CALL(0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0); break;
2876 case 135: CALL(1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0); break;
2877 case 136: CALL(0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0); break;
2878 case 137: CALL(1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0); break;
2879 case 138: CALL(0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0); break;
2880 case 139: CALL(1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0); break;
2881 case 140: CALL(0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0); break;
2882 case 141: CALL(1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0); break;
2883 case 142: CALL(0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0); break;
2884 case 143: CALL(1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0); break;
2887 case 144: CALL(0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0); break;
2888 case 145: CALL(1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0); break;
2889 case 146: CALL(0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0); break;
2890 case 147: CALL(1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0); break;
2891 case 148: CALL(0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0); break;
2892 case 149: CALL(1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0); break;
2893 case 150: CALL(0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0); break;
2894 case 151: CALL(1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0); break;
2895 case 152: CALL(0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0); break;
2896 case 153: CALL(1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0); break;
2897 case 154: CALL(0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0); break;
2898 case 155: CALL(1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0); break;
2899 case 156: CALL(0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0); break;
2900 case 157: CALL(1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0); break;
2901 case 158: CALL(0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0); break;
2902 case 159: CALL(1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0); break;
2904 case 160: CALL(0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0); break;
2905 case 161: CALL(1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0); break;
2906 case 162: CALL(0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0); break;
2907 case 163: CALL(1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0); break;
2908 case 164: CALL(0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0); break;
2909 case 165: CALL(1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0); break;
2910 case 166: CALL(0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0); break;
2911 case 167: CALL(1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0); break;
2912 case 168: CALL(0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0); break;
2913 case 169: CALL(1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0); break;
2914 case 170: CALL(0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0); break;
2915 case 171: CALL(1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0); break;
2916 case 172: CALL(0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0); break;
2917 case 173: CALL(1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0); break;
2918 case 174: CALL(0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0); break;
2919 case 175: CALL(1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0); break;
2921 case 176: CALL(0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0); break;
2922 case 177: CALL(1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0); break;
2923 case 178: CALL(0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0); break;
2924 case 179: CALL(1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0); break;
2925 case 180: CALL(0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0); break;
2926 case 181: CALL(1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0); break;
2927 case 182: CALL(0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0); break;
2928 case 183: CALL(1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0); break;
2929 case 184: CALL(0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0); break;
2930 case 185: CALL(1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0); break;
2931 case 186: CALL(0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0); break;
2932 case 187: CALL(1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0); break;
2933 case 188: CALL(0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0); break;
2934 case 189: CALL(1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0); break;
2935 case 190: CALL(0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0); break;
2936 case 191: CALL(1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0); break;
2938 case 192: CALL(0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0); break;
2939 case 193: CALL(1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0); break;
2940 case 194: CALL(0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0); break;
2941 case 195: CALL(1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0); break;
2942 case 196: CALL(0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0); break;
2943 case 197: CALL(1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0); break;
2944 case 198: CALL(0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0); break;
2945 case 199: CALL(1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0); break;
2946 case 200: CALL(0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0); break;
2947 case 201: CALL(1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0); break;
2948 case 202: CALL(0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0); break;
2949 case 203: CALL(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0); break;
2950 case 204: CALL(0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0); break;
2951 case 205: CALL(1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0); break;
2952 case 206: CALL(0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0); break;
2953 case 207: CALL(1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0); break;
2955 case 208: CALL(0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0); break;
2956 case 209: CALL(1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0); break;
2957 case 210: CALL(0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0); break;
2958 case 211: CALL(1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0); break;
2959 case 212: CALL(0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0); break;
2960 case 213: CALL(1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0); break;
2961 case 214: CALL(0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0); break;
2962 case 215: CALL(1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0); break;
2963 case 216: CALL(0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0); break;
2964 case 217: CALL(1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0); break;
2965 case 218: CALL(0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0); break;
2966 case 219: CALL(1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0); break;
2967 case 220: CALL(0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0); break;
2968 case 221: CALL(1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0); break;
2969 case 222: CALL(0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0); break;
2970 case 223: CALL(1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0); break;
2972 case 224: CALL(0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0); break;
2973 case 225: CALL(1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0); break;
2974 case 226: CALL(0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0); break;
2975 case 227: CALL(1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0); break;
2976 case 228: CALL(0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0); break;
2977 case 229: CALL(1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0); break;
2978 case 230: CALL(0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0); break;
2979 case 231: CALL(1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0); break;
2980 case 232: CALL(0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0); break;
2981 case 233: CALL(1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0); break;
2982 case 234: CALL(0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0); break;
2983 case 235: CALL(1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0); break;
2984 case 236: CALL(0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0); break;
2985 case 237: CALL(1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0); break;
2986 case 238: CALL(0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0); break;
2987 case 239: CALL(1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0); break;
2989 case 240: CALL(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0); break;
2990 case 241: CALL(1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0); break;
2991 case 242: CALL(0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0); break;
2992 case 243: CALL(1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0); break;
2993 case 244: CALL(0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0); break;
2994 case 245: CALL(1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0); break;
2995 case 246: CALL(0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0); break;
2996 case 247: CALL(1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0); break;
2997 case 248: CALL(0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0); break;
2998 case 249: CALL(1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0); break;
2999 case 250: CALL(0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0); break;
3000 case 251: CALL(1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0); break;
3001 case 252: CALL(0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0); break;
3002 case 253: CALL(1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0); break;
3003 case 254: CALL(0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0); break;
3004 case 255: CALL(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0); break;
3007 case 256: CALL(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0); break;
3008 case 257: CALL(1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0); break;
3009 case 258: CALL(0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0); break;
3010 case 259: CALL(1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0); break;
3011 case 260: CALL(0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0); break;
3012 case 261: CALL(1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0); break;
3013 case 262: CALL(0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0); break;
3014 case 263: CALL(1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0); break;
3015 case 264: CALL(0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0); break;
3016 case 265: CALL(1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0); break;
3017 case 266: CALL(0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0); break;
3018 case 267: CALL(1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0); break;
3019 case 268: CALL(0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0); break;
3020 case 269: CALL(1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0); break;
3021 case 270: CALL(0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0); break;
3022 case 271: CALL(1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0); break;
3025 case 272: CALL(0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0); break;
3026 case 273: CALL(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0); break;
3027 case 274: CALL(0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0); break;
3028 case 275: CALL(1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0); break;
3029 case 276: CALL(0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0); break;
3030 case 277: CALL(1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0); break;
3031 case 278: CALL(0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0); break;
3032 case 279: CALL(1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0); break;
3033 case 280: CALL(0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0); break;
3034 case 281: CALL(1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0); break;
3035 case 282: CALL(0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0); break;
3036 case 283: CALL(1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0); break;
3037 case 284: CALL(0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0); break;
3038 case 285: CALL(1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0); break;
3039 case 286: CALL(0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0); break;
3040 case 287: CALL(1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0); break;
3042 case 288: CALL(0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0); break;
3043 case 289: CALL(1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0); break;
3044 case 290: CALL(0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0); break;
3045 case 291: CALL(1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0); break;
3046 case 292: CALL(0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0); break;
3047 case 293: CALL(1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0); break;
3048 case 294: CALL(0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0); break;
3049 case 295: CALL(1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0); break;
3050 case 296: CALL(0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0); break;
3051 case 297: CALL(1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0); break;
3052 case 298: CALL(0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0); break;
3053 case 299: CALL(1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0); break;
3054 case 300: CALL(0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0); break;
3055 case 301: CALL(1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0); break;
3056 case 302: CALL(0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0); break;
3057 case 303: CALL(1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0); break;
3060 case 304: CALL(0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0); break;
3061 case 305: CALL(1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0); break;
3062 case 306: CALL(0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0); break;
3063 case 307: CALL(1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0); break;
3064 case 308: CALL(0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0); break;
3065 case 309: CALL(1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0); break;
3066 case 310: CALL(0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0); break;
3067 case 311: CALL(1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0); break;
3068 case 312: CALL(0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0); break;
3069 case 313: CALL(1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0); break;
3070 case 314: CALL(0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0); break;
3071 case 315: CALL(1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0); break;
3072 case 316: CALL(0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0); break;
3073 case 317: CALL(1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0); break;
3074 case 318: CALL(0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0); break;
3075 case 319: CALL(1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0); break;
3078 case 320: CALL(0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0); break;
3079 case 321: CALL(1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0); break;
3080 case 322: CALL(0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0); break;
3081 case 323: CALL(1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0); break;
3082 case 324: CALL(0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0); break;
3083 case 325: CALL(1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0); break;
3084 case 326: CALL(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0); break;
3085 case 327: CALL(1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0); break;
3086 case 328: CALL(0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0); break;
3087 case 329: CALL(1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0); break;
3088 case 330: CALL(0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0); break;
3089 case 331: CALL(1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0); break;
3090 case 332: CALL(0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0); break;
3091 case 333: CALL(1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0); break;
3092 case 334: CALL(0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0); break;
3093 case 335: CALL(1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0); break;
3096 case 336: CALL(0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0); break;
3097 case 337: CALL(1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0); break;
3098 case 338: CALL(0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0); break;
3099 case 339: CALL(1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0); break;
3100 case 340: CALL(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0); break;
3101 case 341: CALL(1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0); break;
3102 case 342: CALL(0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0); break;
3103 case 343: CALL(1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0); break;
3104 case 344: CALL(0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0); break;
3105 case 345: CALL(1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0); break;
3106 case 346: CALL(0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0); break;
3107 case 347: CALL(1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0); break;
3108 case 348: CALL(0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0); break;
3109 case 349: CALL(1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0); break;
3110 case 350: CALL(0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0); break;
3111 case 351: CALL(1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0); break;
3114 case 352: CALL(0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0); break;
3115 case 353: CALL(1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0); break;
3116 case 354: CALL(0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0); break;
3117 case 355: CALL(1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0); break;
3118 case 356: CALL(0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0); break;
3119 case 357: CALL(1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0); break;
3120 case 358: CALL(0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0); break;
3121 case 359: CALL(1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0); break;
3122 case 360: CALL(0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0); break;
3123 case 361: CALL(1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0); break;
3124 case 362: CALL(0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0); break;
3125 case 363: CALL(1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0); break;
3126 case 364: CALL(0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0); break;
3127 case 365: CALL(1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0); break;
3128 case 366: CALL(0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0); break;
3129 case 367: CALL(1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0); break;
3131 case 368: CALL(0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0); break;
3132 case 369: CALL(1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0); break;
3133 case 370: CALL(0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0); break;
3134 case 371: CALL(1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0); break;
3135 case 372: CALL(0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0); break;
3136 case 373: CALL(1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0); break;
3137 case 374: CALL(0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0); break;
3138 case 375: CALL(1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0); break;
3139 case 376: CALL(0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0); break;
3140 case 377: CALL(1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0); break;
3141 case 378: CALL(0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0); break;
3142 case 379: CALL(1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0); break;
3143 case 380: CALL(0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0); break;
3144 case 381: CALL(1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0); break;
3145 case 382: CALL(0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0); break;
3146 case 383: CALL(1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0); break;
3149 case 384: CALL(0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0); break;
3150 case 385: CALL(1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0); break;
3151 case 386: CALL(0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0); break;
3152 case 387: CALL(1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0); break;
3153 case 388: CALL(0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0); break;
3154 case 389: CALL(1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0); break;
3155 case 390: CALL(0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0); break;
3156 case 391: CALL(1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0); break;
3157 case 392: CALL(0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0); break;
3158 case 393: CALL(1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0); break;
3159 case 394: CALL(0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0); break;
3160 case 395: CALL(1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0); break;
3161 case 396: CALL(0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0); break;
3162 case 397: CALL(1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0); break;
3163 case 398: CALL(0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0); break;
3164 case 399: CALL(1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0); break;
3167 case 400: CALL(0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0); break;
3168 case 401: CALL(1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0); break;
3169 case 402: CALL(0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0); break;
3170 case 403: CALL(1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0); break;
3171 case 404: CALL(0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0); break;
3172 case 405: CALL(1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0); break;
3173 case 406: CALL(0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0); break;
3174 case 407: CALL(1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0); break;
3175 case 408: CALL(0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0); break;
3176 case 409: CALL(1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0); break;
3177 case 410: CALL(0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0); break;
3178 case 411: CALL(1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0); break;
3179 case 412: CALL(0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0); break;
3180 case 413: CALL(1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0); break;
3181 case 414: CALL(0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0); break;
3182 case 415: CALL(1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0); break;
3184 case 416: CALL(0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0); break;
3185 case 417: CALL(1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0); break;
3186 case 418: CALL(0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0); break;
3187 case 419: CALL(1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0); break;
3188 case 420: CALL(0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0); break;
3189 case 421: CALL(1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0); break;
3190 case 422: CALL(0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0); break;
3191 case 423: CALL(1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0); break;
3192 case 424: CALL(0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0); break;
3193 case 425: CALL(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0); break;
3194 case 426: CALL(0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0); break;
3195 case 427: CALL(1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0); break;
3196 case 428: CALL(0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0); break;
3197 case 429: CALL(1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0); break;
3198 case 430: CALL(0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0); break;
3199 case 431: CALL(1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0); break;
3202 case 432: CALL(0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0); break;
3203 case 433: CALL(1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0); break;
3204 case 434: CALL(0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0); break;
3205 case 435: CALL(1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0); break;
3206 case 436: CALL(0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0); break;
3207 case 437: CALL(1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0); break;
3208 case 438: CALL(0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0); break;
3209 case 439: CALL(1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0); break;
3210 case 440: CALL(0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0); break;
3211 case 441: CALL(1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0); break;
3212 case 442: CALL(0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0); break;
3213 case 443: CALL(1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0); break;
3214 case 444: CALL(0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0); break;
3215 case 445: CALL(1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0); break;
3216 case 446: CALL(0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0); break;
3217 case 447: CALL(1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0); break;
3220 case 448: CALL(0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0); break;
3221 case 449: CALL(1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0); break;
3222 case 450: CALL(0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0); break;
3223 case 451: CALL(1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0); break;
3224 case 452: CALL(0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0); break;
3225 case 453: CALL(1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0); break;
3226 case 454: CALL(0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0); break;
3227 case 455: CALL(1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0); break;
3228 case 456: CALL(0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0); break;
3229 case 457: CALL(1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0); break;
3230 case 458: CALL(0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0); break;
3231 case 459: CALL(1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0); break;
3232 case 460: CALL(0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0); break;
3233 case 461: CALL(1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0); break;
3234 case 462: CALL(0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0); break;
3235 case 463: CALL(1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0); break;
3238 case 464: CALL(0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0); break;
3239 case 465: CALL(1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0); break;
3240 case 466: CALL(0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0); break;
3241 case 467: CALL(1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0); break;
3242 case 468: CALL(0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0); break;
3243 case 469: CALL(1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0); break;
3244 case 470: CALL(0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0); break;
3245 case 471: CALL(1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0); break;
3246 case 472: CALL(0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0); break;
3247 case 473: CALL(1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0); break;
3248 case 474: CALL(0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0); break;
3249 case 475: CALL(1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0); break;
3250 case 476: CALL(0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0); break;
3251 case 477: CALL(1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0); break;
3252 case 478: CALL(0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0); break;
3253 case 479: CALL(1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0); break;
3256 case 480: CALL(0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0); break;
3257 case 481: CALL(1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0); break;
3258 case 482: CALL(0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0); break;
3259 case 483: CALL(1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0); break;
3260 case 484: CALL(0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0); break;
3261 case 485: CALL(1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0); break;
3262 case 486: CALL(0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0); break;
3263 case 487: CALL(1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0); break;
3264 case 488: CALL(0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0); break;
3265 case 489: CALL(1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0); break;
3266 case 490: CALL(0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0); break;
3267 case 491: CALL(1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0); break;
3268 case 492: CALL(0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0); break;
3269 case 493: CALL(1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0); break;
3270 case 494: CALL(0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0); break;
3271 case 495: CALL(1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0); break;
3273 case 496: CALL(0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0); break;
3274 case 497: CALL(1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0); break;
3275 case 498: CALL(0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0); break;
3276 case 499: CALL(1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0); break;
3277 case 500: CALL(0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0); break;
3278 case 501: CALL(1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0); break;
3279 case 502: CALL(0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0); break;
3280 case 503: CALL(1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0); break;
3281 case 504: CALL(0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0); break;
3282 case 505: CALL(1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0); break;
3283 case 506: CALL(0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0); break;
3284 case 507: CALL(1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0); break;
3285 case 508: CALL(0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0); break;
3286 case 509: CALL(1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0); break;
3287 case 510: CALL(0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0); break;
3288 case 511: CALL(1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0); break;
3291 * Haochuan: the calls starting from 512 to 2047 were generated by the following python script
3294 #!/usr/bin/env python3
3295 def gen_call(option: int):
3296 doEnergy = option & 1
3297 doVirial = (option >> 1) & 1
3298 doSlow = (option >> 2) & 1
3299 doPairlist = (option >> 3) & 1
3300 doAlch = (option >> 4) & 1
3301 doFEP = (option >> 5) & 1
3302 doTI = (option >> 6) & 1
3303 doStreaming = (option >> 7) & 1
3304 doTable = (option >> 8) & 1
3305 doAlchVdwForceSwitching = (option >> 9) & 1
3306 doNbThole = (option >> 10) & 1
3307 incompatible = False
3308 incompatible = incompatible | (doFEP and doTI)
3309 incompatible = incompatible | (doAlch and ((not doFEP) and (not doTI)))
3310 incompatible = incompatible | ((not doAlch) and (doFEP or doTI or doAlchVdwForceSwitching))
3311 incompatible = incompatible | ((not doTable) and (doAlch or doTI or doFEP or doAlchVdwForceSwitching))
3312 incompatible = incompatible | (doAlch and doNbThole)
3313 incompatible = incompatible | (doFEP and doNbThole)
3314 incompatible = incompatible | (doTI and doNbThole)
3315 incompatible = incompatible | (doAlchVdwForceSwitching and doNbThole)
3318 print(f' // case {option}: CALL({doEnergy}, {doVirial}, {doSlow}, {doPairlist}, {doAlch}, {doFEP}, {doTI}, {doStreaming}, {doTable}, {doAlchVdwForceSwitching}, {doNbThole}); break;')
3320 print(f' case {option}: CALL({doEnergy}, {doVirial}, {doSlow}, {doPairlist}, {doAlch}, {doFEP}, {doTI}, {doStreaming}, {doTable}, {doAlchVdwForceSwitching}, {doNbThole}); break;')
3324 for i in range(512, 2048):
3329 // case 512: CALL(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0); break;
3330 // case 513: CALL(1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0); break;
3331 // case 514: CALL(0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0); break;
3332 // case 515: CALL(1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0); break;
3333 // case 516: CALL(0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0); break;
3334 // case 517: CALL(1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0); break;
3335 // case 518: CALL(0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0); break;
3336 // case 519: CALL(1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0); break;
3337 // case 520: CALL(0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0); break;
3338 // case 521: CALL(1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0); break;
3339 // case 522: CALL(0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0); break;
3340 // case 523: CALL(1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0); break;
3341 // case 524: CALL(0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0); break;
3342 // case 525: CALL(1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0); break;
3343 // case 526: CALL(0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0); break;
3344 // case 527: CALL(1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0); break;
3345 // case 528: CALL(0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0); break;
3346 // case 529: CALL(1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0); break;
3347 // case 530: CALL(0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0); break;
3348 // case 531: CALL(1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0); break;
3349 // case 532: CALL(0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0); break;
3350 // case 533: CALL(1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0); break;
3351 // case 534: CALL(0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0); break;
3352 // case 535: CALL(1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0); break;
3353 // case 536: CALL(0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0); break;
3354 // case 537: CALL(1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0); break;
3355 // case 538: CALL(0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0); break;
3356 // case 539: CALL(1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0); break;
3357 // case 540: CALL(0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0); break;
3358 // case 541: CALL(1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0); break;
3359 // case 542: CALL(0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0); break;
3360 // case 543: CALL(1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0); break;
3361 // case 544: CALL(0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0); break;
3362 // case 545: CALL(1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0); break;
3363 // case 546: CALL(0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0); break;
3364 // case 547: CALL(1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0); break;
3365 // case 548: CALL(0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0); break;
3366 // case 549: CALL(1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0); break;
3367 // case 550: CALL(0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0); break;
3368 // case 551: CALL(1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0); break;
3369 // case 552: CALL(0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0); break;
3370 // case 553: CALL(1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0); break;
3371 // case 554: CALL(0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0); break;
3372 // case 555: CALL(1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0); break;
3373 // case 556: CALL(0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0); break;
3374 // case 557: CALL(1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0); break;
3375 // case 558: CALL(0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0); break;
3376 // case 559: CALL(1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0); break;
3377 // case 560: CALL(0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0); break;
3378 // case 561: CALL(1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0); break;
3379 // case 562: CALL(0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0); break;
3380 // case 563: CALL(1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0); break;
3381 // case 564: CALL(0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0); break;
3382 // case 565: CALL(1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0); break;
3383 // case 566: CALL(0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0); break;
3384 // case 567: CALL(1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0); break;
3385 // case 568: CALL(0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0); break;
3386 // case 569: CALL(1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0); break;
3387 // case 570: CALL(0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0); break;
3388 // case 571: CALL(1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0); break;
3389 // case 572: CALL(0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0); break;
3390 // case 573: CALL(1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0); break;
3391 // case 574: CALL(0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0); break;
3392 // case 575: CALL(1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0); break;
3393 // case 576: CALL(0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0); break;
3394 // case 577: CALL(1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0); break;
3395 // case 578: CALL(0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0); break;
3396 // case 579: CALL(1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0); break;
3397 // case 580: CALL(0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0); break;
3398 // case 581: CALL(1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0); break;
3399 // case 582: CALL(0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0); break;
3400 // case 583: CALL(1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0); break;
3401 // case 584: CALL(0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0); break;
3402 // case 585: CALL(1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0); break;
3403 // case 586: CALL(0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0); break;
3404 // case 587: CALL(1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0); break;
3405 // case 588: CALL(0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0); break;
3406 // case 589: CALL(1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0); break;
3407 // case 590: CALL(0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0); break;
3408 // case 591: CALL(1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0); break;
3409 // case 592: CALL(0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0); break;
3410 // case 593: CALL(1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0); break;
3411 // case 594: CALL(0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0); break;
3412 // case 595: CALL(1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0); break;
3413 // case 596: CALL(0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0); break;
3414 // case 597: CALL(1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0); break;
3415 // case 598: CALL(0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0); break;
3416 // case 599: CALL(1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0); break;
3417 // case 600: CALL(0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0); break;
3418 // case 601: CALL(1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0); break;
3419 // case 602: CALL(0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0); break;
3420 // case 603: CALL(1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0); break;
3421 // case 604: CALL(0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0); break;
3422 // case 605: CALL(1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0); break;
3423 // case 606: CALL(0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0); break;
3424 // case 607: CALL(1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0); break;
3425 // case 608: CALL(0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0); break;
3426 // case 609: CALL(1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0); break;
3427 // case 610: CALL(0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0); break;
3428 // case 611: CALL(1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0); break;
3429 // case 612: CALL(0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0); break;
3430 // case 613: CALL(1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0); break;
3431 // case 614: CALL(0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0); break;
3432 // case 615: CALL(1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0); break;
3433 // case 616: CALL(0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0); break;
3434 // case 617: CALL(1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0); break;
3435 // case 618: CALL(0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0); break;
3436 // case 619: CALL(1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0); break;
3437 // case 620: CALL(0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0); break;
3438 // case 621: CALL(1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0); break;
3439 // case 622: CALL(0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0); break;
3440 // case 623: CALL(1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0); break;
3441 // case 624: CALL(0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0); break;
3442 // case 625: CALL(1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0); break;
3443 // case 626: CALL(0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0); break;
3444 // case 627: CALL(1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0); break;
3445 // case 628: CALL(0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0); break;
3446 // case 629: CALL(1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0); break;
3447 // case 630: CALL(0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0); break;
3448 // case 631: CALL(1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0); break;
3449 // case 632: CALL(0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0); break;
3450 // case 633: CALL(1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0); break;
3451 // case 634: CALL(0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0); break;
3452 // case 635: CALL(1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0); break;
3453 // case 636: CALL(0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0); break;
3454 // case 637: CALL(1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0); break;
3455 // case 638: CALL(0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0); break;
3456 // case 639: CALL(1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0); break;
3457 // case 640: CALL(0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0); break;
3458 // case 641: CALL(1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0); break;
3459 // case 642: CALL(0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0); break;
3460 // case 643: CALL(1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0); break;
3461 // case 644: CALL(0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0); break;
3462 // case 645: CALL(1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0); break;
3463 // case 646: CALL(0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0); break;
3464 // case 647: CALL(1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0); break;
3465 // case 648: CALL(0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0); break;
3466 // case 649: CALL(1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0); break;
3467 // case 650: CALL(0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0); break;
3468 // case 651: CALL(1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0); break;
3469 // case 652: CALL(0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0); break;
3470 // case 653: CALL(1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0); break;
3471 // case 654: CALL(0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0); break;
3472 // case 655: CALL(1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0); break;
3473 // case 656: CALL(0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0); break;
3474 // case 657: CALL(1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0); break;
3475 // case 658: CALL(0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0); break;
3476 // case 659: CALL(1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0); break;
3477 // case 660: CALL(0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0); break;
3478 // case 661: CALL(1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0); break;
3479 // case 662: CALL(0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0); break;
3480 // case 663: CALL(1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0); break;
3481 // case 664: CALL(0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0); break;
3482 // case 665: CALL(1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0); break;
3483 // case 666: CALL(0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0); break;
3484 // case 667: CALL(1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0); break;
3485 // case 668: CALL(0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0); break;
3486 // case 669: CALL(1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0); break;
3487 // case 670: CALL(0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0); break;
3488 // case 671: CALL(1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0); break;
3489 // case 672: CALL(0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0); break;
3490 // case 673: CALL(1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0); break;
3491 // case 674: CALL(0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0); break;
3492 // case 675: CALL(1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0); break;
3493 // case 676: CALL(0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0); break;
3494 // case 677: CALL(1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0); break;
3495 // case 678: CALL(0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0); break;
3496 // case 679: CALL(1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0); break;
3497 // case 680: CALL(0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0); break;
3498 // case 681: CALL(1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0); break;
3499 // case 682: CALL(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0); break;
3500 // case 683: CALL(1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0); break;
3501 // case 684: CALL(0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0); break;
3502 // case 685: CALL(1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0); break;
3503 // case 686: CALL(0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0); break;
3504 // case 687: CALL(1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0); break;
3505 // case 688: CALL(0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0); break;
3506 // case 689: CALL(1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0); break;
3507 // case 690: CALL(0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0); break;
3508 // case 691: CALL(1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0); break;
3509 // case 692: CALL(0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0); break;
3510 // case 693: CALL(1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0); break;
3511 // case 694: CALL(0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0); break;
3512 // case 695: CALL(1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0); break;
3513 // case 696: CALL(0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0); break;
3514 // case 697: CALL(1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0); break;
3515 // case 698: CALL(0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0); break;
3516 // case 699: CALL(1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0); break;
3517 // case 700: CALL(0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0); break;
3518 // case 701: CALL(1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0); break;
3519 // case 702: CALL(0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0); break;
3520 // case 703: CALL(1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0); break;
3521 // case 704: CALL(0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0); break;
3522 // case 705: CALL(1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0); break;
3523 // case 706: CALL(0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0); break;
3524 // case 707: CALL(1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0); break;
3525 // case 708: CALL(0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0); break;
3526 // case 709: CALL(1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0); break;
3527 // case 710: CALL(0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0); break;
3528 // case 711: CALL(1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0); break;
3529 // case 712: CALL(0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0); break;
3530 // case 713: CALL(1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0); break;
3531 // case 714: CALL(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0); break;
3532 // case 715: CALL(1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0); break;
3533 // case 716: CALL(0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0); break;
3534 // case 717: CALL(1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0); break;
3535 // case 718: CALL(0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0); break;
3536 // case 719: CALL(1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0); break;
3537 // case 720: CALL(0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0); break;
3538 // case 721: CALL(1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0); break;
3539 // case 722: CALL(0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0); break;
3540 // case 723: CALL(1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0); break;
3541 // case 724: CALL(0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0); break;
3542 // case 725: CALL(1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0); break;
3543 // case 726: CALL(0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0); break;
3544 // case 727: CALL(1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0); break;
3545 // case 728: CALL(0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0); break;
3546 // case 729: CALL(1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0); break;
3547 // case 730: CALL(0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0); break;
3548 // case 731: CALL(1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0); break;
3549 // case 732: CALL(0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0); break;
3550 // case 733: CALL(1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0); break;
3551 // case 734: CALL(0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0); break;
3552 // case 735: CALL(1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0); break;
3553 // case 736: CALL(0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0); break;
3554 // case 737: CALL(1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0); break;
3555 // case 738: CALL(0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0); break;
3556 // case 739: CALL(1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0); break;
3557 // case 740: CALL(0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0); break;
3558 // case 741: CALL(1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0); break;
3559 // case 742: CALL(0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0); break;
3560 // case 743: CALL(1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0); break;
3561 // case 744: CALL(0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0); break;
3562 // case 745: CALL(1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0); break;
3563 // case 746: CALL(0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0); break;
3564 // case 747: CALL(1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0); break;
3565 // case 748: CALL(0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0); break;
3566 // case 749: CALL(1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0); break;
3567 // case 750: CALL(0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0); break;
3568 // case 751: CALL(1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0); break;
3569 // case 752: CALL(0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0); break;
3570 // case 753: CALL(1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0); break;
3571 // case 754: CALL(0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0); break;
3572 // case 755: CALL(1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0); break;
3573 // case 756: CALL(0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0); break;
3574 // case 757: CALL(1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0); break;
3575 // case 758: CALL(0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0); break;
3576 // case 759: CALL(1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0); break;
3577 // case 760: CALL(0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0); break;
3578 // case 761: CALL(1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0); break;
3579 // case 762: CALL(0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0); break;
3580 // case 763: CALL(1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0); break;
3581 // case 764: CALL(0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0); break;
3582 // case 765: CALL(1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0); break;
3583 // case 766: CALL(0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0); break;
3584 // case 767: CALL(1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0); break;
3585 // case 768: CALL(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0); break;
3586 // case 769: CALL(1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0); break;
3587 // case 770: CALL(0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0); break;
3588 // case 771: CALL(1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0); break;
3589 // case 772: CALL(0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0); break;
3590 // case 773: CALL(1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0); break;
3591 // case 774: CALL(0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0); break;
3592 // case 775: CALL(1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0); break;
3593 // case 776: CALL(0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0); break;
3594 // case 777: CALL(1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0); break;
3595 // case 778: CALL(0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0); break;
3596 // case 779: CALL(1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0); break;
3597 // case 780: CALL(0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0); break;
3598 // case 781: CALL(1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0); break;
3599 // case 782: CALL(0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0); break;
3600 // case 783: CALL(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0); break;
3601 // case 784: CALL(0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0); break;
3602 // case 785: CALL(1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0); break;
3603 // case 786: CALL(0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0); break;
3604 // case 787: CALL(1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0); break;
3605 // case 788: CALL(0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0); break;
3606 // case 789: CALL(1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0); break;
3607 // case 790: CALL(0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0); break;
3608 // case 791: CALL(1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0); break;
3609 // case 792: CALL(0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0); break;
3610 // case 793: CALL(1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0); break;
3611 // case 794: CALL(0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0); break;
3612 // case 795: CALL(1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0); break;
3613 // case 796: CALL(0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0); break;
3614 // case 797: CALL(1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0); break;
3615 // case 798: CALL(0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0); break;
3616 // case 799: CALL(1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0); break;
3617 // case 800: CALL(0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0); break;
3618 // case 801: CALL(1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0); break;
3619 // case 802: CALL(0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0); break;
3620 // case 803: CALL(1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0); break;
3621 // case 804: CALL(0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0); break;
3622 // case 805: CALL(1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0); break;
3623 // case 806: CALL(0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0); break;
3624 // case 807: CALL(1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0); break;
3625 // case 808: CALL(0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0); break;
3626 // case 809: CALL(1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0); break;
3627 // case 810: CALL(0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0); break;
3628 // case 811: CALL(1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0); break;
3629 // case 812: CALL(0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0); break;
3630 // case 813: CALL(1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0); break;
3631 // case 814: CALL(0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0); break;
3632 // case 815: CALL(1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0); break;
3633 case 816: CALL(0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0); break;
3634 case 817: CALL(1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0); break;
3635 case 818: CALL(0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0); break;
3636 case 819: CALL(1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0); break;
3637 case 820: CALL(0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0); break;
3638 case 821: CALL(1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0); break;
3639 case 822: CALL(0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0); break;
3640 case 823: CALL(1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0); break;
3641 case 824: CALL(0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0); break;
3642 case 825: CALL(1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0); break;
3643 case 826: CALL(0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0); break;
3644 case 827: CALL(1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0); break;
3645 case 828: CALL(0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0); break;
3646 case 829: CALL(1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0); break;
3647 case 830: CALL(0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0); break;
3648 case 831: CALL(1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0); break;
3649 // case 832: CALL(0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0); break;
3650 // case 833: CALL(1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0); break;
3651 // case 834: CALL(0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0); break;
3652 // case 835: CALL(1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0); break;
3653 // case 836: CALL(0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0); break;
3654 // case 837: CALL(1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0); break;
3655 // case 838: CALL(0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0); break;
3656 // case 839: CALL(1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0); break;
3657 // case 840: CALL(0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0); break;
3658 // case 841: CALL(1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0); break;
3659 // case 842: CALL(0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0); break;
3660 // case 843: CALL(1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0); break;
3661 // case 844: CALL(0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0); break;
3662 // case 845: CALL(1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0); break;
3663 // case 846: CALL(0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0); break;
3664 // case 847: CALL(1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0); break;
3665 case 848: CALL(0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0); break;
3666 case 849: CALL(1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0); break;
3667 case 850: CALL(0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0); break;
3668 case 851: CALL(1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0); break;
3669 case 852: CALL(0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0); break;
3670 case 853: CALL(1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0); break;
3671 case 854: CALL(0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0); break;
3672 case 855: CALL(1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0); break;
3673 case 856: CALL(0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0); break;
3674 case 857: CALL(1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0); break;
3675 case 858: CALL(0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0); break;
3676 case 859: CALL(1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0); break;
3677 case 860: CALL(0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0); break;
3678 case 861: CALL(1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0); break;
3679 case 862: CALL(0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0); break;
3680 case 863: CALL(1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0); break;
3681 // case 864: CALL(0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0); break;
3682 // case 865: CALL(1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0); break;
3683 // case 866: CALL(0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0); break;
3684 // case 867: CALL(1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0); break;
3685 // case 868: CALL(0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0); break;
3686 // case 869: CALL(1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0); break;
3687 // case 870: CALL(0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0); break;
3688 // case 871: CALL(1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0); break;
3689 // case 872: CALL(0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0); break;
3690 // case 873: CALL(1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0); break;
3691 // case 874: CALL(0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0); break;
3692 // case 875: CALL(1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0); break;
3693 // case 876: CALL(0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0); break;
3694 // case 877: CALL(1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0); break;
3695 // case 878: CALL(0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0); break;
3696 // case 879: CALL(1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0); break;
3697 // case 880: CALL(0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0); break;
3698 // case 881: CALL(1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0); break;
3699 // case 882: CALL(0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0); break;
3700 // case 883: CALL(1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0); break;
3701 // case 884: CALL(0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0); break;
3702 // case 885: CALL(1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0); break;
3703 // case 886: CALL(0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0); break;
3704 // case 887: CALL(1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0); break;
3705 // case 888: CALL(0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0); break;
3706 // case 889: CALL(1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0); break;
3707 // case 890: CALL(0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0); break;
3708 // case 891: CALL(1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0); break;
3709 // case 892: CALL(0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0); break;
3710 // case 893: CALL(1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0); break;
3711 // case 894: CALL(0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0); break;
3712 // case 895: CALL(1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0); break;
3713 // case 896: CALL(0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0); break;
3714 // case 897: CALL(1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0); break;
3715 // case 898: CALL(0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0); break;
3716 // case 899: CALL(1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0); break;
3717 // case 900: CALL(0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0); break;
3718 // case 901: CALL(1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0); break;
3719 // case 902: CALL(0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0); break;
3720 // case 903: CALL(1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0); break;
3721 // case 904: CALL(0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0); break;
3722 // case 905: CALL(1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0); break;
3723 // case 906: CALL(0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0); break;
3724 // case 907: CALL(1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0); break;
3725 // case 908: CALL(0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0); break;
3726 // case 909: CALL(1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0); break;
3727 // case 910: CALL(0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0); break;
3728 // case 911: CALL(1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0); break;
3729 // case 912: CALL(0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0); break;
3730 // case 913: CALL(1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0); break;
3731 // case 914: CALL(0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0); break;
3732 // case 915: CALL(1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0); break;
3733 // case 916: CALL(0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0); break;
3734 // case 917: CALL(1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0); break;
3735 // case 918: CALL(0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0); break;
3736 // case 919: CALL(1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0); break;
3737 // case 920: CALL(0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0); break;
3738 // case 921: CALL(1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0); break;
3739 // case 922: CALL(0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0); break;
3740 // case 923: CALL(1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0); break;
3741 // case 924: CALL(0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0); break;
3742 // case 925: CALL(1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0); break;
3743 // case 926: CALL(0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0); break;
3744 // case 927: CALL(1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0); break;
3745 // case 928: CALL(0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0); break;
3746 // case 929: CALL(1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0); break;
3747 // case 930: CALL(0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0); break;
3748 // case 931: CALL(1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0); break;
3749 // case 932: CALL(0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0); break;
3750 // case 933: CALL(1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0); break;
3751 // case 934: CALL(0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0); break;
3752 // case 935: CALL(1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0); break;
3753 // case 936: CALL(0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0); break;
3754 // case 937: CALL(1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0); break;
3755 // case 938: CALL(0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0); break;
3756 // case 939: CALL(1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0); break;
3757 // case 940: CALL(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0); break;
3758 // case 941: CALL(1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0); break;
3759 // case 942: CALL(0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0); break;
3760 // case 943: CALL(1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0); break;
3761 case 944: CALL(0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0); break;
3762 case 945: CALL(1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0); break;
3763 case 946: CALL(0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0); break;
3764 case 947: CALL(1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0); break;
3765 case 948: CALL(0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0); break;
3766 case 949: CALL(1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0); break;
3767 case 950: CALL(0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0); break;
3768 case 951: CALL(1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0); break;
3769 case 952: CALL(0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0); break;
3770 case 953: CALL(1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0); break;
3771 case 954: CALL(0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0); break;
3772 case 955: CALL(1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0); break;
3773 case 956: CALL(0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0); break;
3774 case 957: CALL(1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0); break;
3775 case 958: CALL(0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0); break;
3776 case 959: CALL(1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0); break;
3777 // case 960: CALL(0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0); break;
3778 // case 961: CALL(1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0); break;
3779 // case 962: CALL(0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0); break;
3780 // case 963: CALL(1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0); break;
3781 // case 964: CALL(0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0); break;
3782 // case 965: CALL(1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0); break;
3783 // case 966: CALL(0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0); break;
3784 // case 967: CALL(1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0); break;
3785 // case 968: CALL(0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0); break;
3786 // case 969: CALL(1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0); break;
3787 // case 970: CALL(0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0); break;
3788 // case 971: CALL(1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0); break;
3789 // case 972: CALL(0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0); break;
3790 // case 973: CALL(1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0); break;
3791 // case 974: CALL(0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0); break;
3792 // case 975: CALL(1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0); break;
3793 case 976: CALL(0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0); break;
3794 case 977: CALL(1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0); break;
3795 case 978: CALL(0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0); break;
3796 case 979: CALL(1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0); break;
3797 case 980: CALL(0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0); break;
3798 case 981: CALL(1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0); break;
3799 case 982: CALL(0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0); break;
3800 case 983: CALL(1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0); break;
3801 case 984: CALL(0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0); break;
3802 case 985: CALL(1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0); break;
3803 case 986: CALL(0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0); break;
3804 case 987: CALL(1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0); break;
3805 case 988: CALL(0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0); break;
3806 case 989: CALL(1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0); break;
3807 case 990: CALL(0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0); break;
3808 case 991: CALL(1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0); break;
3809 // case 992: CALL(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0); break;
3810 // case 993: CALL(1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0); break;
3811 // case 994: CALL(0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0); break;
3812 // case 995: CALL(1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0); break;
3813 // case 996: CALL(0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0); break;
3814 // case 997: CALL(1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0); break;
3815 // case 998: CALL(0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0); break;
3816 // case 999: CALL(1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0); break;
3817 // case 1000: CALL(0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0); break;
3818 // case 1001: CALL(1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0); break;
3819 // case 1002: CALL(0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0); break;
3820 // case 1003: CALL(1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0); break;
3821 // case 1004: CALL(0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0); break;
3822 // case 1005: CALL(1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0); break;
3823 // case 1006: CALL(0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0); break;
3824 // case 1007: CALL(1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0); break;
3825 // case 1008: CALL(0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0); break;
3826 // case 1009: CALL(1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0); break;
3827 // case 1010: CALL(0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0); break;
3828 // case 1011: CALL(1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0); break;
3829 // case 1012: CALL(0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0); break;
3830 // case 1013: CALL(1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0); break;
3831 // case 1014: CALL(0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0); break;
3832 // case 1015: CALL(1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0); break;
3833 // case 1016: CALL(0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0); break;
3834 // case 1017: CALL(1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0); break;
3835 // case 1018: CALL(0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0); break;
3836 // case 1019: CALL(1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0); break;
3837 // case 1020: CALL(0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0); break;
3838 // case 1021: CALL(1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0); break;
3839 // case 1022: CALL(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0); break;
3840 // case 1023: CALL(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0); break;
3841 case 1024: CALL(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); break;
3842 case 1025: CALL(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); break;
3843 case 1026: CALL(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1); break;
3844 case 1027: CALL(1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1); break;
3845 case 1028: CALL(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1); break;
3846 case 1029: CALL(1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1); break;
3847 case 1030: CALL(0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1); break;
3848 case 1031: CALL(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1); break;
3849 case 1032: CALL(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1); break;
3850 case 1033: CALL(1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1); break;
3851 case 1034: CALL(0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1); break;
3852 case 1035: CALL(1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1); break;
3853 case 1036: CALL(0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1); break;
3854 case 1037: CALL(1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1); break;
3855 case 1038: CALL(0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1); break;
3856 case 1039: CALL(1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1); break;
3857 // case 1040: CALL(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1); break;
3858 // case 1041: CALL(1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1); break;
3859 // case 1042: CALL(0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1); break;
3860 // case 1043: CALL(1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1); break;
3861 // case 1044: CALL(0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1); break;
3862 // case 1045: CALL(1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1); break;
3863 // case 1046: CALL(0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1); break;
3864 // case 1047: CALL(1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1); break;
3865 // case 1048: CALL(0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1); break;
3866 // case 1049: CALL(1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1); break;
3867 // case 1050: CALL(0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1); break;
3868 // case 1051: CALL(1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1); break;
3869 // case 1052: CALL(0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1); break;
3870 // case 1053: CALL(1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1); break;
3871 // case 1054: CALL(0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1); break;
3872 // case 1055: CALL(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1); break;
3873 // case 1056: CALL(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1); break;
3874 // case 1057: CALL(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1); break;
3875 // case 1058: CALL(0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1); break;
3876 // case 1059: CALL(1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1); break;
3877 // case 1060: CALL(0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1); break;
3878 // case 1061: CALL(1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1); break;
3879 // case 1062: CALL(0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1); break;
3880 // case 1063: CALL(1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1); break;
3881 // case 1064: CALL(0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1); break;
3882 // case 1065: CALL(1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1); break;
3883 // case 1066: CALL(0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1); break;
3884 // case 1067: CALL(1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1); break;
3885 // case 1068: CALL(0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1); break;
3886 // case 1069: CALL(1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1); break;
3887 // case 1070: CALL(0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1); break;
3888 // case 1071: CALL(1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1); break;
3889 // case 1072: CALL(0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1); break;
3890 // case 1073: CALL(1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1); break;
3891 // case 1074: CALL(0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1); break;
3892 // case 1075: CALL(1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1); break;
3893 // case 1076: CALL(0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1); break;
3894 // case 1077: CALL(1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1); break;
3895 // case 1078: CALL(0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1); break;
3896 // case 1079: CALL(1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1); break;
3897 // case 1080: CALL(0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1); break;
3898 // case 1081: CALL(1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1); break;
3899 // case 1082: CALL(0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1); break;
3900 // case 1083: CALL(1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1); break;
3901 // case 1084: CALL(0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1); break;
3902 // case 1085: CALL(1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1); break;
3903 // case 1086: CALL(0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1); break;
3904 // case 1087: CALL(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1); break;
3905 // case 1088: CALL(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1); break;
3906 // case 1089: CALL(1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1); break;
3907 // case 1090: CALL(0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1); break;
3908 // case 1091: CALL(1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1); break;
3909 // case 1092: CALL(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1); break;
3910 // case 1093: CALL(1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1); break;
3911 // case 1094: CALL(0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1); break;
3912 // case 1095: CALL(1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1); break;
3913 // case 1096: CALL(0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1); break;
3914 // case 1097: CALL(1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1); break;
3915 // case 1098: CALL(0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1); break;
3916 // case 1099: CALL(1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1); break;
3917 // case 1100: CALL(0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1); break;
3918 // case 1101: CALL(1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1); break;
3919 // case 1102: CALL(0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1); break;
3920 // case 1103: CALL(1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1); break;
3921 // case 1104: CALL(0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1); break;
3922 // case 1105: CALL(1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1); break;
3923 // case 1106: CALL(0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1); break;
3924 // case 1107: CALL(1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1); break;
3925 // case 1108: CALL(0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1); break;
3926 // case 1109: CALL(1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1); break;
3927 // case 1110: CALL(0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1); break;
3928 // case 1111: CALL(1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1); break;
3929 // case 1112: CALL(0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1); break;
3930 // case 1113: CALL(1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1); break;
3931 // case 1114: CALL(0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1); break;
3932 // case 1115: CALL(1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1); break;
3933 // case 1116: CALL(0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1); break;
3934 // case 1117: CALL(1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1); break;
3935 // case 1118: CALL(0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1); break;
3936 // case 1119: CALL(1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1); break;
3937 // case 1120: CALL(0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1); break;
3938 // case 1121: CALL(1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1); break;
3939 // case 1122: CALL(0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1); break;
3940 // case 1123: CALL(1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1); break;
3941 // case 1124: CALL(0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1); break;
3942 // case 1125: CALL(1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1); break;
3943 // case 1126: CALL(0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1); break;
3944 // case 1127: CALL(1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1); break;
3945 // case 1128: CALL(0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1); break;
3946 // case 1129: CALL(1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1); break;
3947 // case 1130: CALL(0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1); break;
3948 // case 1131: CALL(1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1); break;
3949 // case 1132: CALL(0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1); break;
3950 // case 1133: CALL(1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1); break;
3951 // case 1134: CALL(0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1); break;
3952 // case 1135: CALL(1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1); break;
3953 // case 1136: CALL(0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1); break;
3954 // case 1137: CALL(1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1); break;
3955 // case 1138: CALL(0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1); break;
3956 // case 1139: CALL(1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1); break;
3957 // case 1140: CALL(0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1); break;
3958 // case 1141: CALL(1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1); break;
3959 // case 1142: CALL(0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1); break;
3960 // case 1143: CALL(1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1); break;
3961 // case 1144: CALL(0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1); break;
3962 // case 1145: CALL(1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1); break;
3963 // case 1146: CALL(0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1); break;
3964 // case 1147: CALL(1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1); break;
3965 // case 1148: CALL(0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1); break;
3966 // case 1149: CALL(1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1); break;
3967 // case 1150: CALL(0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1); break;
3968 // case 1151: CALL(1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1); break;
3969 case 1152: CALL(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1); break;
3970 case 1153: CALL(1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1); break;
3971 case 1154: CALL(0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1); break;
3972 case 1155: CALL(1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1); break;
3973 case 1156: CALL(0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1); break;
3974 case 1157: CALL(1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1); break;
3975 case 1158: CALL(0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1); break;
3976 case 1159: CALL(1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1); break;
3977 case 1160: CALL(0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1); break;
3978 case 1161: CALL(1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1); break;
3979 case 1162: CALL(0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1); break;
3980 case 1163: CALL(1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1); break;
3981 case 1164: CALL(0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1); break;
3982 case 1165: CALL(1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1); break;
3983 case 1166: CALL(0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1); break;
3984 case 1167: CALL(1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1); break;
3985 // case 1168: CALL(0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1); break;
3986 // case 1169: CALL(1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1); break;
3987 // case 1170: CALL(0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1); break;
3988 // case 1171: CALL(1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1); break;
3989 // case 1172: CALL(0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1); break;
3990 // case 1173: CALL(1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1); break;
3991 // case 1174: CALL(0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1); break;
3992 // case 1175: CALL(1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1); break;
3993 // case 1176: CALL(0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1); break;
3994 // case 1177: CALL(1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1); break;
3995 // case 1178: CALL(0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1); break;
3996 // case 1179: CALL(1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1); break;
3997 // case 1180: CALL(0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1); break;
3998 // case 1181: CALL(1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1); break;
3999 // case 1182: CALL(0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1); break;
4000 // case 1183: CALL(1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1); break;
4001 // case 1184: CALL(0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1); break;
4002 // case 1185: CALL(1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1); break;
4003 // case 1186: CALL(0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1); break;
4004 // case 1187: CALL(1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1); break;
4005 // case 1188: CALL(0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1); break;
4006 // case 1189: CALL(1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1); break;
4007 // case 1190: CALL(0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1); break;
4008 // case 1191: CALL(1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1); break;
4009 // case 1192: CALL(0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1); break;
4010 // case 1193: CALL(1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1); break;
4011 // case 1194: CALL(0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1); break;
4012 // case 1195: CALL(1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1); break;
4013 // case 1196: CALL(0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1); break;
4014 // case 1197: CALL(1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1); break;
4015 // case 1198: CALL(0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1); break;
4016 // case 1199: CALL(1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1); break;
4017 // case 1200: CALL(0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1); break;
4018 // case 1201: CALL(1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1); break;
4019 // case 1202: CALL(0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1); break;
4020 // case 1203: CALL(1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1); break;
4021 // case 1204: CALL(0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1); break;
4022 // case 1205: CALL(1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1); break;
4023 // case 1206: CALL(0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1); break;
4024 // case 1207: CALL(1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1); break;
4025 // case 1208: CALL(0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1); break;
4026 // case 1209: CALL(1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1); break;
4027 // case 1210: CALL(0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1); break;
4028 // case 1211: CALL(1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1); break;
4029 // case 1212: CALL(0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1); break;
4030 // case 1213: CALL(1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1); break;
4031 // case 1214: CALL(0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1); break;
4032 // case 1215: CALL(1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1); break;
4033 // case 1216: CALL(0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1); break;
4034 // case 1217: CALL(1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1); break;
4035 // case 1218: CALL(0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1); break;
4036 // case 1219: CALL(1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1); break;
4037 // case 1220: CALL(0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1); break;
4038 // case 1221: CALL(1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1); break;
4039 // case 1222: CALL(0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1); break;
4040 // case 1223: CALL(1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1); break;
4041 // case 1224: CALL(0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1); break;
4042 // case 1225: CALL(1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1); break;
4043 // case 1226: CALL(0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1); break;
4044 // case 1227: CALL(1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1); break;
4045 // case 1228: CALL(0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1); break;
4046 // case 1229: CALL(1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1); break;
4047 // case 1230: CALL(0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1); break;
4048 // case 1231: CALL(1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1); break;
4049 // case 1232: CALL(0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1); break;
4050 // case 1233: CALL(1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1); break;
4051 // case 1234: CALL(0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1); break;
4052 // case 1235: CALL(1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1); break;
4053 // case 1236: CALL(0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1); break;
4054 // case 1237: CALL(1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1); break;
4055 // case 1238: CALL(0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1); break;
4056 // case 1239: CALL(1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1); break;
4057 // case 1240: CALL(0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1); break;
4058 // case 1241: CALL(1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1); break;
4059 // case 1242: CALL(0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1); break;
4060 // case 1243: CALL(1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1); break;
4061 // case 1244: CALL(0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1); break;
4062 // case 1245: CALL(1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1); break;
4063 // case 1246: CALL(0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1); break;
4064 // case 1247: CALL(1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1); break;
4065 // case 1248: CALL(0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1); break;
4066 // case 1249: CALL(1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1); break;
4067 // case 1250: CALL(0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1); break;
4068 // case 1251: CALL(1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1); break;
4069 // case 1252: CALL(0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1); break;
4070 // case 1253: CALL(1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1); break;
4071 // case 1254: CALL(0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1); break;
4072 // case 1255: CALL(1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1); break;
4073 // case 1256: CALL(0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1); break;
4074 // case 1257: CALL(1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1); break;
4075 // case 1258: CALL(0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1); break;
4076 // case 1259: CALL(1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1); break;
4077 // case 1260: CALL(0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1); break;
4078 // case 1261: CALL(1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1); break;
4079 // case 1262: CALL(0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1); break;
4080 // case 1263: CALL(1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1); break;
4081 // case 1264: CALL(0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1); break;
4082 // case 1265: CALL(1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1); break;
4083 // case 1266: CALL(0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1); break;
4084 // case 1267: CALL(1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1); break;
4085 // case 1268: CALL(0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1); break;
4086 // case 1269: CALL(1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1); break;
4087 // case 1270: CALL(0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1); break;
4088 // case 1271: CALL(1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1); break;
4089 // case 1272: CALL(0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1); break;
4090 // case 1273: CALL(1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1); break;
4091 // case 1274: CALL(0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1); break;
4092 // case 1275: CALL(1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1); break;
4093 // case 1276: CALL(0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1); break;
4094 // case 1277: CALL(1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1); break;
4095 // case 1278: CALL(0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1); break;
4096 // case 1279: CALL(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1); break;
4097 case 1280: CALL(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1); break;
4098 case 1281: CALL(1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1); break;
4099 case 1282: CALL(0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1); break;
4100 case 1283: CALL(1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1); break;
4101 case 1284: CALL(0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1); break;
4102 case 1285: CALL(1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1); break;
4103 case 1286: CALL(0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1); break;
4104 case 1287: CALL(1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1); break;
4105 case 1288: CALL(0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1); break;
4106 case 1289: CALL(1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1); break;
4107 case 1290: CALL(0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1); break;
4108 case 1291: CALL(1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1); break;
4109 case 1292: CALL(0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1); break;
4110 case 1293: CALL(1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1); break;
4111 case 1294: CALL(0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1); break;
4112 case 1295: CALL(1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1); break;
4113 // case 1296: CALL(0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1); break;
4114 // case 1297: CALL(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1); break;
4115 // case 1298: CALL(0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1); break;
4116 // case 1299: CALL(1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1); break;
4117 // case 1300: CALL(0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1); break;
4118 // case 1301: CALL(1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1); break;
4119 // case 1302: CALL(0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1); break;
4120 // case 1303: CALL(1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1); break;
4121 // case 1304: CALL(0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1); break;
4122 // case 1305: CALL(1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1); break;
4123 // case 1306: CALL(0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1); break;
4124 // case 1307: CALL(1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1); break;
4125 // case 1308: CALL(0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1); break;
4126 // case 1309: CALL(1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1); break;
4127 // case 1310: CALL(0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1); break;
4128 // case 1311: CALL(1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1); break;
4129 // case 1312: CALL(0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1); break;
4130 // case 1313: CALL(1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1); break;
4131 // case 1314: CALL(0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1); break;
4132 // case 1315: CALL(1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1); break;
4133 // case 1316: CALL(0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1); break;
4134 // case 1317: CALL(1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1); break;
4135 // case 1318: CALL(0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1); break;
4136 // case 1319: CALL(1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1); break;
4137 // case 1320: CALL(0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1); break;
4138 // case 1321: CALL(1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1); break;
4139 // case 1322: CALL(0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1); break;
4140 // case 1323: CALL(1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1); break;
4141 // case 1324: CALL(0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1); break;
4142 // case 1325: CALL(1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1); break;
4143 // case 1326: CALL(0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1); break;
4144 // case 1327: CALL(1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1); break;
4145 // case 1328: CALL(0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1); break;
4146 // case 1329: CALL(1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1); break;
4147 // case 1330: CALL(0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1); break;
4148 // case 1331: CALL(1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1); break;
4149 // case 1332: CALL(0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1); break;
4150 // case 1333: CALL(1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1); break;
4151 // case 1334: CALL(0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1); break;
4152 // case 1335: CALL(1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1); break;
4153 // case 1336: CALL(0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1); break;
4154 // case 1337: CALL(1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1); break;
4155 // case 1338: CALL(0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1); break;
4156 // case 1339: CALL(1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1); break;
4157 // case 1340: CALL(0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1); break;
4158 // case 1341: CALL(1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1); break;
4159 // case 1342: CALL(0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1); break;
4160 // case 1343: CALL(1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1); break;
4161 // case 1344: CALL(0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1); break;
4162 // case 1345: CALL(1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1); break;
4163 // case 1346: CALL(0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1); break;
4164 // case 1347: CALL(1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1); break;
4165 // case 1348: CALL(0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1); break;
4166 // case 1349: CALL(1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1); break;
4167 // case 1350: CALL(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1); break;
4168 // case 1351: CALL(1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1); break;
4169 // case 1352: CALL(0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1); break;
4170 // case 1353: CALL(1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1); break;
4171 // case 1354: CALL(0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1); break;
4172 // case 1355: CALL(1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1); break;
4173 // case 1356: CALL(0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1); break;
4174 // case 1357: CALL(1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1); break;
4175 // case 1358: CALL(0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1); break;
4176 // case 1359: CALL(1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1); break;
4177 // case 1360: CALL(0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1); break;
4178 // case 1361: CALL(1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1); break;
4179 // case 1362: CALL(0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1); break;
4180 // case 1363: CALL(1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1); break;
4181 // case 1364: CALL(0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); break;
4182 // case 1365: CALL(1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); break;
4183 // case 1366: CALL(0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1); break;
4184 // case 1367: CALL(1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1); break;
4185 // case 1368: CALL(0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1); break;
4186 // case 1369: CALL(1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1); break;
4187 // case 1370: CALL(0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1); break;
4188 // case 1371: CALL(1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1); break;
4189 // case 1372: CALL(0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1); break;
4190 // case 1373: CALL(1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1); break;
4191 // case 1374: CALL(0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1); break;
4192 // case 1375: CALL(1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1); break;
4193 // case 1376: CALL(0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1); break;
4194 // case 1377: CALL(1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1); break;
4195 // case 1378: CALL(0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1); break;
4196 // case 1379: CALL(1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1); break;
4197 // case 1380: CALL(0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1); break;
4198 // case 1381: CALL(1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1); break;
4199 // case 1382: CALL(0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1); break;
4200 // case 1383: CALL(1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1); break;
4201 // case 1384: CALL(0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1); break;
4202 // case 1385: CALL(1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1); break;
4203 // case 1386: CALL(0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1); break;
4204 // case 1387: CALL(1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1); break;
4205 // case 1388: CALL(0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1); break;
4206 // case 1389: CALL(1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1); break;
4207 // case 1390: CALL(0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1); break;
4208 // case 1391: CALL(1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1); break;
4209 // case 1392: CALL(0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1); break;
4210 // case 1393: CALL(1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1); break;
4211 // case 1394: CALL(0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1); break;
4212 // case 1395: CALL(1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1); break;
4213 // case 1396: CALL(0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1); break;
4214 // case 1397: CALL(1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1); break;
4215 // case 1398: CALL(0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1); break;
4216 // case 1399: CALL(1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1); break;
4217 // case 1400: CALL(0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1); break;
4218 // case 1401: CALL(1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1); break;
4219 // case 1402: CALL(0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1); break;
4220 // case 1403: CALL(1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1); break;
4221 // case 1404: CALL(0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1); break;
4222 // case 1405: CALL(1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1); break;
4223 // case 1406: CALL(0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1); break;
4224 // case 1407: CALL(1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1); break;
4225 case 1408: CALL(0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1); break;
4226 case 1409: CALL(1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1); break;
4227 case 1410: CALL(0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1); break;
4228 case 1411: CALL(1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1); break;
4229 case 1412: CALL(0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1); break;
4230 case 1413: CALL(1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1); break;
4231 case 1414: CALL(0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1); break;
4232 case 1415: CALL(1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1); break;
4233 case 1416: CALL(0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1); break;
4234 case 1417: CALL(1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1); break;
4235 case 1418: CALL(0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1); break;
4236 case 1419: CALL(1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1); break;
4237 case 1420: CALL(0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1); break;
4238 case 1421: CALL(1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1); break;
4239 case 1422: CALL(0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1); break;
4240 case 1423: CALL(1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1); break;
4241 // case 1424: CALL(0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1); break;
4242 // case 1425: CALL(1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1); break;
4243 // case 1426: CALL(0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1); break;
4244 // case 1427: CALL(1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1); break;
4245 // case 1428: CALL(0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1); break;
4246 // case 1429: CALL(1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1); break;
4247 // case 1430: CALL(0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1); break;
4248 // case 1431: CALL(1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1); break;
4249 // case 1432: CALL(0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1); break;
4250 // case 1433: CALL(1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1); break;
4251 // case 1434: CALL(0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1); break;
4252 // case 1435: CALL(1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1); break;
4253 // case 1436: CALL(0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1); break;
4254 // case 1437: CALL(1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1); break;
4255 // case 1438: CALL(0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1); break;
4256 // case 1439: CALL(1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1); break;
4257 // case 1440: CALL(0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1); break;
4258 // case 1441: CALL(1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1); break;
4259 // case 1442: CALL(0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1); break;
4260 // case 1443: CALL(1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1); break;
4261 // case 1444: CALL(0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1); break;
4262 // case 1445: CALL(1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1); break;
4263 // case 1446: CALL(0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1); break;
4264 // case 1447: CALL(1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1); break;
4265 // case 1448: CALL(0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1); break;
4266 // case 1449: CALL(1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1); break;
4267 // case 1450: CALL(0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1); break;
4268 // case 1451: CALL(1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1); break;
4269 // case 1452: CALL(0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1); break;
4270 // case 1453: CALL(1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1); break;
4271 // case 1454: CALL(0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1); break;
4272 // case 1455: CALL(1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1); break;
4273 // case 1456: CALL(0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1); break;
4274 // case 1457: CALL(1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1); break;
4275 // case 1458: CALL(0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1); break;
4276 // case 1459: CALL(1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1); break;
4277 // case 1460: CALL(0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1); break;
4278 // case 1461: CALL(1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1); break;
4279 // case 1462: CALL(0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1); break;
4280 // case 1463: CALL(1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1); break;
4281 // case 1464: CALL(0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1); break;
4282 // case 1465: CALL(1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1); break;
4283 // case 1466: CALL(0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1); break;
4284 // case 1467: CALL(1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1); break;
4285 // case 1468: CALL(0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1); break;
4286 // case 1469: CALL(1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1); break;
4287 // case 1470: CALL(0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1); break;
4288 // case 1471: CALL(1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1); break;
4289 // case 1472: CALL(0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1); break;
4290 // case 1473: CALL(1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1); break;
4291 // case 1474: CALL(0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1); break;
4292 // case 1475: CALL(1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1); break;
4293 // case 1476: CALL(0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1); break;
4294 // case 1477: CALL(1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1); break;
4295 // case 1478: CALL(0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1); break;
4296 // case 1479: CALL(1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1); break;
4297 // case 1480: CALL(0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1); break;
4298 // case 1481: CALL(1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1); break;
4299 // case 1482: CALL(0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1); break;
4300 // case 1483: CALL(1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1); break;
4301 // case 1484: CALL(0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1); break;
4302 // case 1485: CALL(1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1); break;
4303 // case 1486: CALL(0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1); break;
4304 // case 1487: CALL(1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1); break;
4305 // case 1488: CALL(0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1); break;
4306 // case 1489: CALL(1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1); break;
4307 // case 1490: CALL(0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1); break;
4308 // case 1491: CALL(1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1); break;
4309 // case 1492: CALL(0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1); break;
4310 // case 1493: CALL(1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1); break;
4311 // case 1494: CALL(0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1); break;
4312 // case 1495: CALL(1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1); break;
4313 // case 1496: CALL(0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1); break;
4314 // case 1497: CALL(1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1); break;
4315 // case 1498: CALL(0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1); break;
4316 // case 1499: CALL(1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1); break;
4317 // case 1500: CALL(0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1); break;
4318 // case 1501: CALL(1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1); break;
4319 // case 1502: CALL(0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1); break;
4320 // case 1503: CALL(1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1); break;
4321 // case 1504: CALL(0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1); break;
4322 // case 1505: CALL(1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1); break;
4323 // case 1506: CALL(0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1); break;
4324 // case 1507: CALL(1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1); break;
4325 // case 1508: CALL(0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1); break;
4326 // case 1509: CALL(1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1); break;
4327 // case 1510: CALL(0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1); break;
4328 // case 1511: CALL(1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1); break;
4329 // case 1512: CALL(0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1); break;
4330 // case 1513: CALL(1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1); break;
4331 // case 1514: CALL(0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1); break;
4332 // case 1515: CALL(1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1); break;
4333 // case 1516: CALL(0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1); break;
4334 // case 1517: CALL(1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1); break;
4335 // case 1518: CALL(0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1); break;
4336 // case 1519: CALL(1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1); break;
4337 // case 1520: CALL(0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1); break;
4338 // case 1521: CALL(1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1); break;
4339 // case 1522: CALL(0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1); break;
4340 // case 1523: CALL(1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1); break;
4341 // case 1524: CALL(0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1); break;
4342 // case 1525: CALL(1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1); break;
4343 // case 1526: CALL(0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1); break;
4344 // case 1527: CALL(1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1); break;
4345 // case 1528: CALL(0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1); break;
4346 // case 1529: CALL(1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1); break;
4347 // case 1530: CALL(0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1); break;
4348 // case 1531: CALL(1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1); break;
4349 // case 1532: CALL(0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1); break;
4350 // case 1533: CALL(1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1); break;
4351 // case 1534: CALL(0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1); break;
4352 // case 1535: CALL(1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1); break;
4353 // case 1536: CALL(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1); break;
4354 // case 1537: CALL(1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1); break;
4355 // case 1538: CALL(0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1); break;
4356 // case 1539: CALL(1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1); break;
4357 // case 1540: CALL(0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1); break;
4358 // case 1541: CALL(1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1); break;
4359 // case 1542: CALL(0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1); break;
4360 // case 1543: CALL(1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1); break;
4361 // case 1544: CALL(0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1); break;
4362 // case 1545: CALL(1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1); break;
4363 // case 1546: CALL(0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1); break;
4364 // case 1547: CALL(1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1); break;
4365 // case 1548: CALL(0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1); break;
4366 // case 1549: CALL(1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1); break;
4367 // case 1550: CALL(0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1); break;
4368 // case 1551: CALL(1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1); break;
4369 // case 1552: CALL(0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1); break;
4370 // case 1553: CALL(1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1); break;
4371 // case 1554: CALL(0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1); break;
4372 // case 1555: CALL(1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1); break;
4373 // case 1556: CALL(0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1); break;
4374 // case 1557: CALL(1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1); break;
4375 // case 1558: CALL(0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1); break;
4376 // case 1559: CALL(1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1); break;
4377 // case 1560: CALL(0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1); break;
4378 // case 1561: CALL(1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1); break;
4379 // case 1562: CALL(0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1); break;
4380 // case 1563: CALL(1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1); break;
4381 // case 1564: CALL(0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1); break;
4382 // case 1565: CALL(1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1); break;
4383 // case 1566: CALL(0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1); break;
4384 // case 1567: CALL(1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1); break;
4385 // case 1568: CALL(0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1); break;
4386 // case 1569: CALL(1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1); break;
4387 // case 1570: CALL(0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1); break;
4388 // case 1571: CALL(1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1); break;
4389 // case 1572: CALL(0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1); break;
4390 // case 1573: CALL(1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1); break;
4391 // case 1574: CALL(0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1); break;
4392 // case 1575: CALL(1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1); break;
4393 // case 1576: CALL(0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1); break;
4394 // case 1577: CALL(1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1); break;
4395 // case 1578: CALL(0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1); break;
4396 // case 1579: CALL(1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1); break;
4397 // case 1580: CALL(0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1); break;
4398 // case 1581: CALL(1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1); break;
4399 // case 1582: CALL(0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1); break;
4400 // case 1583: CALL(1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1); break;
4401 // case 1584: CALL(0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1); break;
4402 // case 1585: CALL(1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1); break;
4403 // case 1586: CALL(0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1); break;
4404 // case 1587: CALL(1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1); break;
4405 // case 1588: CALL(0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1); break;
4406 // case 1589: CALL(1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1); break;
4407 // case 1590: CALL(0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1); break;
4408 // case 1591: CALL(1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1); break;
4409 // case 1592: CALL(0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1); break;
4410 // case 1593: CALL(1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1); break;
4411 // case 1594: CALL(0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1); break;
4412 // case 1595: CALL(1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1); break;
4413 // case 1596: CALL(0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1); break;
4414 // case 1597: CALL(1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1); break;
4415 // case 1598: CALL(0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1); break;
4416 // case 1599: CALL(1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1); break;
4417 // case 1600: CALL(0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1); break;
4418 // case 1601: CALL(1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1); break;
4419 // case 1602: CALL(0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1); break;
4420 // case 1603: CALL(1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1); break;
4421 // case 1604: CALL(0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1); break;
4422 // case 1605: CALL(1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1); break;
4423 // case 1606: CALL(0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1); break;
4424 // case 1607: CALL(1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1); break;
4425 // case 1608: CALL(0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1); break;
4426 // case 1609: CALL(1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1); break;
4427 // case 1610: CALL(0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1); break;
4428 // case 1611: CALL(1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1); break;
4429 // case 1612: CALL(0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1); break;
4430 // case 1613: CALL(1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1); break;
4431 // case 1614: CALL(0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1); break;
4432 // case 1615: CALL(1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1); break;
4433 // case 1616: CALL(0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1); break;
4434 // case 1617: CALL(1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1); break;
4435 // case 1618: CALL(0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1); break;
4436 // case 1619: CALL(1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1); break;
4437 // case 1620: CALL(0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1); break;
4438 // case 1621: CALL(1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1); break;
4439 // case 1622: CALL(0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1); break;
4440 // case 1623: CALL(1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1); break;
4441 // case 1624: CALL(0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1); break;
4442 // case 1625: CALL(1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1); break;
4443 // case 1626: CALL(0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1); break;
4444 // case 1627: CALL(1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1); break;
4445 // case 1628: CALL(0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1); break;
4446 // case 1629: CALL(1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1); break;
4447 // case 1630: CALL(0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1); break;
4448 // case 1631: CALL(1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1); break;
4449 // case 1632: CALL(0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1); break;
4450 // case 1633: CALL(1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1); break;
4451 // case 1634: CALL(0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1); break;
4452 // case 1635: CALL(1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1); break;
4453 // case 1636: CALL(0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1); break;
4454 // case 1637: CALL(1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1); break;
4455 // case 1638: CALL(0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1); break;
4456 // case 1639: CALL(1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1); break;
4457 // case 1640: CALL(0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1); break;
4458 // case 1641: CALL(1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1); break;
4459 // case 1642: CALL(0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1); break;
4460 // case 1643: CALL(1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1); break;
4461 // case 1644: CALL(0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1); break;
4462 // case 1645: CALL(1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1); break;
4463 // case 1646: CALL(0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1); break;
4464 // case 1647: CALL(1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1); break;
4465 // case 1648: CALL(0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1); break;
4466 // case 1649: CALL(1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1); break;
4467 // case 1650: CALL(0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1); break;
4468 // case 1651: CALL(1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1); break;
4469 // case 1652: CALL(0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1); break;
4470 // case 1653: CALL(1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1); break;
4471 // case 1654: CALL(0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1); break;
4472 // case 1655: CALL(1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1); break;
4473 // case 1656: CALL(0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1); break;
4474 // case 1657: CALL(1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1); break;
4475 // case 1658: CALL(0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1); break;
4476 // case 1659: CALL(1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1); break;
4477 // case 1660: CALL(0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1); break;
4478 // case 1661: CALL(1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1); break;
4479 // case 1662: CALL(0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1); break;
4480 // case 1663: CALL(1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1); break;
4481 // case 1664: CALL(0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1); break;
4482 // case 1665: CALL(1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1); break;
4483 // case 1666: CALL(0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1); break;
4484 // case 1667: CALL(1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1); break;
4485 // case 1668: CALL(0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1); break;
4486 // case 1669: CALL(1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1); break;
4487 // case 1670: CALL(0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1); break;
4488 // case 1671: CALL(1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1); break;
4489 // case 1672: CALL(0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1); break;
4490 // case 1673: CALL(1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1); break;
4491 // case 1674: CALL(0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1); break;
4492 // case 1675: CALL(1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1); break;
4493 // case 1676: CALL(0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1); break;
4494 // case 1677: CALL(1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1); break;
4495 // case 1678: CALL(0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1); break;
4496 // case 1679: CALL(1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1); break;
4497 // case 1680: CALL(0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1); break;
4498 // case 1681: CALL(1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1); break;
4499 // case 1682: CALL(0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1); break;
4500 // case 1683: CALL(1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1); break;
4501 // case 1684: CALL(0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1); break;
4502 // case 1685: CALL(1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1); break;
4503 // case 1686: CALL(0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1); break;
4504 // case 1687: CALL(1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1); break;
4505 // case 1688: CALL(0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1); break;
4506 // case 1689: CALL(1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1); break;
4507 // case 1690: CALL(0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1); break;
4508 // case 1691: CALL(1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1); break;
4509 // case 1692: CALL(0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1); break;
4510 // case 1693: CALL(1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1); break;
4511 // case 1694: CALL(0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1); break;
4512 // case 1695: CALL(1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1); break;
4513 // case 1696: CALL(0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1); break;
4514 // case 1697: CALL(1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1); break;
4515 // case 1698: CALL(0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1); break;
4516 // case 1699: CALL(1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1); break;
4517 // case 1700: CALL(0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1); break;
4518 // case 1701: CALL(1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1); break;
4519 // case 1702: CALL(0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1); break;
4520 // case 1703: CALL(1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1); break;
4521 // case 1704: CALL(0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1); break;
4522 // case 1705: CALL(1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1); break;
4523 // case 1706: CALL(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1); break;
4524 // case 1707: CALL(1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1); break;
4525 // case 1708: CALL(0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1); break;
4526 // case 1709: CALL(1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1); break;
4527 // case 1710: CALL(0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1); break;
4528 // case 1711: CALL(1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1); break;
4529 // case 1712: CALL(0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1); break;
4530 // case 1713: CALL(1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1); break;
4531 // case 1714: CALL(0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1); break;
4532 // case 1715: CALL(1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1); break;
4533 // case 1716: CALL(0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1); break;
4534 // case 1717: CALL(1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1); break;
4535 // case 1718: CALL(0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1); break;
4536 // case 1719: CALL(1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1); break;
4537 // case 1720: CALL(0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1); break;
4538 // case 1721: CALL(1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1); break;
4539 // case 1722: CALL(0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1); break;
4540 // case 1723: CALL(1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1); break;
4541 // case 1724: CALL(0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1); break;
4542 // case 1725: CALL(1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1); break;
4543 // case 1726: CALL(0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1); break;
4544 // case 1727: CALL(1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1); break;
4545 // case 1728: CALL(0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1); break;
4546 // case 1729: CALL(1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1); break;
4547 // case 1730: CALL(0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1); break;
4548 // case 1731: CALL(1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1); break;
4549 // case 1732: CALL(0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1); break;
4550 // case 1733: CALL(1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1); break;
4551 // case 1734: CALL(0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1); break;
4552 // case 1735: CALL(1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1); break;
4553 // case 1736: CALL(0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1); break;
4554 // case 1737: CALL(1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1); break;
4555 // case 1738: CALL(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1); break;
4556 // case 1739: CALL(1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1); break;
4557 // case 1740: CALL(0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1); break;
4558 // case 1741: CALL(1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1); break;
4559 // case 1742: CALL(0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1); break;
4560 // case 1743: CALL(1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1); break;
4561 // case 1744: CALL(0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1); break;
4562 // case 1745: CALL(1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1); break;
4563 // case 1746: CALL(0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1); break;
4564 // case 1747: CALL(1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1); break;
4565 // case 1748: CALL(0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1); break;
4566 // case 1749: CALL(1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1); break;
4567 // case 1750: CALL(0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1); break;
4568 // case 1751: CALL(1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1); break;
4569 // case 1752: CALL(0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1); break;
4570 // case 1753: CALL(1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1); break;
4571 // case 1754: CALL(0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1); break;
4572 // case 1755: CALL(1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1); break;
4573 // case 1756: CALL(0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1); break;
4574 // case 1757: CALL(1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1); break;
4575 // case 1758: CALL(0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1); break;
4576 // case 1759: CALL(1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1); break;
4577 // case 1760: CALL(0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1); break;
4578 // case 1761: CALL(1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1); break;
4579 // case 1762: CALL(0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1); break;
4580 // case 1763: CALL(1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1); break;
4581 // case 1764: CALL(0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1); break;
4582 // case 1765: CALL(1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1); break;
4583 // case 1766: CALL(0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1); break;
4584 // case 1767: CALL(1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1); break;
4585 // case 1768: CALL(0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1); break;
4586 // case 1769: CALL(1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1); break;
4587 // case 1770: CALL(0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1); break;
4588 // case 1771: CALL(1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1); break;
4589 // case 1772: CALL(0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1); break;
4590 // case 1773: CALL(1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1); break;
4591 // case 1774: CALL(0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1); break;
4592 // case 1775: CALL(1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1); break;
4593 // case 1776: CALL(0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1); break;
4594 // case 1777: CALL(1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1); break;
4595 // case 1778: CALL(0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1); break;
4596 // case 1779: CALL(1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1); break;
4597 // case 1780: CALL(0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1); break;
4598 // case 1781: CALL(1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1); break;
4599 // case 1782: CALL(0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1); break;
4600 // case 1783: CALL(1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1); break;
4601 // case 1784: CALL(0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1); break;
4602 // case 1785: CALL(1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1); break;
4603 // case 1786: CALL(0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1); break;
4604 // case 1787: CALL(1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1); break;
4605 // case 1788: CALL(0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1); break;
4606 // case 1789: CALL(1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1); break;
4607 // case 1790: CALL(0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1); break;
4608 // case 1791: CALL(1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1); break;
4609 // case 1792: CALL(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1); break;
4610 // case 1793: CALL(1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1); break;
4611 // case 1794: CALL(0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1); break;
4612 // case 1795: CALL(1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1); break;
4613 // case 1796: CALL(0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1); break;
4614 // case 1797: CALL(1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1); break;
4615 // case 1798: CALL(0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1); break;
4616 // case 1799: CALL(1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1); break;
4617 // case 1800: CALL(0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1); break;
4618 // case 1801: CALL(1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1); break;
4619 // case 1802: CALL(0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1); break;
4620 // case 1803: CALL(1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1); break;
4621 // case 1804: CALL(0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1); break;
4622 // case 1805: CALL(1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1); break;
4623 // case 1806: CALL(0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1); break;
4624 // case 1807: CALL(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1); break;
4625 // case 1808: CALL(0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1); break;
4626 // case 1809: CALL(1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1); break;
4627 // case 1810: CALL(0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1); break;
4628 // case 1811: CALL(1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1); break;
4629 // case 1812: CALL(0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1); break;
4630 // case 1813: CALL(1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1); break;
4631 // case 1814: CALL(0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1); break;
4632 // case 1815: CALL(1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1); break;
4633 // case 1816: CALL(0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1); break;
4634 // case 1817: CALL(1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1); break;
4635 // case 1818: CALL(0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1); break;
4636 // case 1819: CALL(1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1); break;
4637 // case 1820: CALL(0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1); break;
4638 // case 1821: CALL(1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1); break;
4639 // case 1822: CALL(0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1); break;
4640 // case 1823: CALL(1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1); break;
4641 // case 1824: CALL(0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1); break;
4642 // case 1825: CALL(1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1); break;
4643 // case 1826: CALL(0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1); break;
4644 // case 1827: CALL(1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1); break;
4645 // case 1828: CALL(0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1); break;
4646 // case 1829: CALL(1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1); break;
4647 // case 1830: CALL(0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1); break;
4648 // case 1831: CALL(1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1); break;
4649 // case 1832: CALL(0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1); break;
4650 // case 1833: CALL(1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1); break;
4651 // case 1834: CALL(0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1); break;
4652 // case 1835: CALL(1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1); break;
4653 // case 1836: CALL(0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1); break;
4654 // case 1837: CALL(1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1); break;
4655 // case 1838: CALL(0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1); break;
4656 // case 1839: CALL(1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1); break;
4657 // case 1840: CALL(0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1); break;
4658 // case 1841: CALL(1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1); break;
4659 // case 1842: CALL(0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1); break;
4660 // case 1843: CALL(1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1); break;
4661 // case 1844: CALL(0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1); break;
4662 // case 1845: CALL(1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1); break;
4663 // case 1846: CALL(0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1); break;
4664 // case 1847: CALL(1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1); break;
4665 // case 1848: CALL(0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1); break;
4666 // case 1849: CALL(1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1); break;
4667 // case 1850: CALL(0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1); break;
4668 // case 1851: CALL(1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1); break;
4669 // case 1852: CALL(0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1); break;
4670 // case 1853: CALL(1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1); break;
4671 // case 1854: CALL(0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1); break;
4672 // case 1855: CALL(1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1); break;
4673 // case 1856: CALL(0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1); break;
4674 // case 1857: CALL(1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1); break;
4675 // case 1858: CALL(0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1); break;
4676 // case 1859: CALL(1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1); break;
4677 // case 1860: CALL(0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1); break;
4678 // case 1861: CALL(1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1); break;
4679 // case 1862: CALL(0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1); break;
4680 // case 1863: CALL(1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1); break;
4681 // case 1864: CALL(0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1); break;
4682 // case 1865: CALL(1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1); break;
4683 // case 1866: CALL(0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1); break;
4684 // case 1867: CALL(1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1); break;
4685 // case 1868: CALL(0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1); break;
4686 // case 1869: CALL(1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1); break;
4687 // case 1870: CALL(0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1); break;
4688 // case 1871: CALL(1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1); break;
4689 // case 1872: CALL(0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1); break;
4690 // case 1873: CALL(1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1); break;
4691 // case 1874: CALL(0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1); break;
4692 // case 1875: CALL(1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1); break;
4693 // case 1876: CALL(0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1); break;
4694 // case 1877: CALL(1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1); break;
4695 // case 1878: CALL(0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1); break;
4696 // case 1879: CALL(1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1); break;
4697 // case 1880: CALL(0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1); break;
4698 // case 1881: CALL(1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1); break;
4699 // case 1882: CALL(0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1); break;
4700 // case 1883: CALL(1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1); break;
4701 // case 1884: CALL(0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1); break;
4702 // case 1885: CALL(1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1); break;
4703 // case 1886: CALL(0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1); break;
4704 // case 1887: CALL(1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1); break;
4705 // case 1888: CALL(0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1); break;
4706 // case 1889: CALL(1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1); break;
4707 // case 1890: CALL(0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1); break;
4708 // case 1891: CALL(1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1); break;
4709 // case 1892: CALL(0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1); break;
4710 // case 1893: CALL(1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1); break;
4711 // case 1894: CALL(0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1); break;
4712 // case 1895: CALL(1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1); break;
4713 // case 1896: CALL(0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1); break;
4714 // case 1897: CALL(1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1); break;
4715 // case 1898: CALL(0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1); break;
4716 // case 1899: CALL(1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1); break;
4717 // case 1900: CALL(0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1); break;
4718 // case 1901: CALL(1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1); break;
4719 // case 1902: CALL(0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1); break;
4720 // case 1903: CALL(1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1); break;
4721 // case 1904: CALL(0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1); break;
4722 // case 1905: CALL(1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1); break;
4723 // case 1906: CALL(0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1); break;
4724 // case 1907: CALL(1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1); break;
4725 // case 1908: CALL(0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1); break;
4726 // case 1909: CALL(1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1); break;
4727 // case 1910: CALL(0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1); break;
4728 // case 1911: CALL(1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1); break;
4729 // case 1912: CALL(0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1); break;
4730 // case 1913: CALL(1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1); break;
4731 // case 1914: CALL(0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1); break;
4732 // case 1915: CALL(1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1); break;
4733 // case 1916: CALL(0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1); break;
4734 // case 1917: CALL(1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1); break;
4735 // case 1918: CALL(0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1); break;
4736 // case 1919: CALL(1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1); break;
4737 // case 1920: CALL(0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); break;
4738 // case 1921: CALL(1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); break;
4739 // case 1922: CALL(0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1); break;
4740 // case 1923: CALL(1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1); break;
4741 // case 1924: CALL(0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1); break;
4742 // case 1925: CALL(1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1); break;
4743 // case 1926: CALL(0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1); break;
4744 // case 1927: CALL(1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1); break;
4745 // case 1928: CALL(0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1); break;
4746 // case 1929: CALL(1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1); break;
4747 // case 1930: CALL(0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1); break;
4748 // case 1931: CALL(1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1); break;
4749 // case 1932: CALL(0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1); break;
4750 // case 1933: CALL(1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1); break;
4751 // case 1934: CALL(0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1); break;
4752 // case 1935: CALL(1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1); break;
4753 // case 1936: CALL(0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1); break;
4754 // case 1937: CALL(1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1); break;
4755 // case 1938: CALL(0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1); break;
4756 // case 1939: CALL(1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1); break;
4757 // case 1940: CALL(0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1); break;
4758 // case 1941: CALL(1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1); break;
4759 // case 1942: CALL(0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1); break;
4760 // case 1943: CALL(1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1); break;
4761 // case 1944: CALL(0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1); break;
4762 // case 1945: CALL(1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1); break;
4763 // case 1946: CALL(0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1); break;
4764 // case 1947: CALL(1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1); break;
4765 // case 1948: CALL(0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1); break;
4766 // case 1949: CALL(1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1); break;
4767 // case 1950: CALL(0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1); break;
4768 // case 1951: CALL(1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1); break;
4769 // case 1952: CALL(0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1); break;
4770 // case 1953: CALL(1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1); break;
4771 // case 1954: CALL(0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1); break;
4772 // case 1955: CALL(1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1); break;
4773 // case 1956: CALL(0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1); break;
4774 // case 1957: CALL(1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1); break;
4775 // case 1958: CALL(0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1); break;
4776 // case 1959: CALL(1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1); break;
4777 // case 1960: CALL(0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1); break;
4778 // case 1961: CALL(1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1); break;
4779 // case 1962: CALL(0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1); break;
4780 // case 1963: CALL(1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1); break;
4781 // case 1964: CALL(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1); break;
4782 // case 1965: CALL(1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1); break;
4783 // case 1966: CALL(0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1); break;
4784 // case 1967: CALL(1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1); break;
4785 // case 1968: CALL(0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1); break;
4786 // case 1969: CALL(1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1); break;
4787 // case 1970: CALL(0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1); break;
4788 // case 1971: CALL(1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1); break;
4789 // case 1972: CALL(0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1); break;
4790 // case 1973: CALL(1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1); break;
4791 // case 1974: CALL(0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1); break;
4792 // case 1975: CALL(1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1); break;
4793 // case 1976: CALL(0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1); break;
4794 // case 1977: CALL(1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1); break;
4795 // case 1978: CALL(0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1); break;
4796 // case 1979: CALL(1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1); break;
4797 // case 1980: CALL(0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1); break;
4798 // case 1981: CALL(1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1); break;
4799 // case 1982: CALL(0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1); break;
4800 // case 1983: CALL(1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1); break;
4801 // case 1984: CALL(0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1); break;
4802 // case 1985: CALL(1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1); break;
4803 // case 1986: CALL(0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1); break;
4804 // case 1987: CALL(1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1); break;
4805 // case 1988: CALL(0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1); break;
4806 // case 1989: CALL(1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1); break;
4807 // case 1990: CALL(0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1); break;
4808 // case 1991: CALL(1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1); break;
4809 // case 1992: CALL(0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1); break;
4810 // case 1993: CALL(1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1); break;
4811 // case 1994: CALL(0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1); break;
4812 // case 1995: CALL(1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1); break;
4813 // case 1996: CALL(0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1); break;
4814 // case 1997: CALL(1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1); break;
4815 // case 1998: CALL(0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1); break;
4816 // case 1999: CALL(1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1); break;
4817 // case 2000: CALL(0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1); break;
4818 // case 2001: CALL(1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1); break;
4819 // case 2002: CALL(0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1); break;
4820 // case 2003: CALL(1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1); break;
4821 // case 2004: CALL(0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1); break;
4822 // case 2005: CALL(1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1); break;
4823 // case 2006: CALL(0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1); break;
4824 // case 2007: CALL(1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1); break;
4825 // case 2008: CALL(0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1); break;
4826 // case 2009: CALL(1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1); break;
4827 // case 2010: CALL(0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1); break;
4828 // case 2011: CALL(1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1); break;
4829 // case 2012: CALL(0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1); break;
4830 // case 2013: CALL(1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1); break;
4831 // case 2014: CALL(0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1); break;
4832 // case 2015: CALL(1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1); break;
4833 // case 2016: CALL(0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1); break;
4834 // case 2017: CALL(1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1); break;
4835 // case 2018: CALL(0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1); break;
4836 // case 2019: CALL(1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1); break;
4837 // case 2020: CALL(0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1); break;
4838 // case 2021: CALL(1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1); break;
4839 // case 2022: CALL(0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1); break;
4840 // case 2023: CALL(1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1); break;
4841 // case 2024: CALL(0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1); break;
4842 // case 2025: CALL(1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1); break;
4843 // case 2026: CALL(0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1); break;
4844 // case 2027: CALL(1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1); break;
4845 // case 2028: CALL(0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1); break;
4846 // case 2029: CALL(1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1); break;
4847 // case 2030: CALL(0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1); break;
4848 // case 2031: CALL(1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1); break;
4849 // case 2032: CALL(0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1); break;
4850 // case 2033: CALL(1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1); break;
4851 // case 2034: CALL(0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1); break;
4852 // case 2035: CALL(1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1); break;
4853 // case 2036: CALL(0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1); break;
4854 // case 2037: CALL(1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1); break;
4855 // case 2038: CALL(0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1); break;
4856 // case 2039: CALL(1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1); break;
4857 // case 2040: CALL(0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); break;
4858 // case 2041: CALL(1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); break;
4859 // case 2042: CALL(0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1); break;
4860 // case 2043: CALL(1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1); break;
4861 // case 2044: CALL(0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1); break;
4862 // case 2045: CALL(1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1); break;
4863 // case 2046: CALL(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); break;
4864 // case 2047: CALL(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); break;
4869 "doEnergy=%d doVirial=%d doSlow=%d doPairlist=%d "
4870 "doAlch=%d doFEP=%d doTI=%d doStreaming=%d doTable=%d "
4872 doEnergy, doVirial, doSlow, doPairlist, doAlch, doFEP, doTI,
4873 doStreaming, doTable, options);
4876 std::string call_options;
4877 call_options += "doEnergy = " + std::to_string(int(doEnergy));
4878 call_options += ", doVirial = " + std::to_string(int(doVirial));
4879 call_options += ", doSlow = " + std::to_string(int(doSlow));
4880 call_options += ", doPairlist = " + std::to_string(int(doPairlist));
4881 call_options += ", doAlch = " + std::to_string(int(doAlch));
4882 call_options += ", doFEP = " + std::to_string(int(doFEP));
4883 call_options += ", doTI = " + std::to_string(int(doTI));
4884 call_options += ", doStreaming = " + std::to_string(int(doStreaming));
4885 call_options += ", doTable = " + std::to_string(int(doTable));
4886 call_options += ", doAlchVdwForceSwitching = " + std::to_string(int(doAlchVdwForceSwitching));
4887 call_options += ", doNbThole = " + std::to_string(int(doNbThole));
4888 const std::string error = "CudaComputeNonbondedKernel::nonbondedForce, none of the kernels called. Options are:\n" + call_options;
4889 NAMD_bug(error.c_str());
4898 cudaCheck(cudaGetLastError());
4900 start += nblock*nwarp;
4903 if ( doVirial || ! doStreaming ){
4905 int grid = (atomStorageSize + block - 1)/block;
4907 transposeForcesKernel<1><<<grid, block, 0, stream>>>(d_forces, d_forcesSlow,
4908 force_x, force_y, force_z, force_w,
4909 forceSlow_x, forceSlow_y, forceSlow_z, forceSlow_w,
4912 transposeForcesKernel<0><<<grid, block, 0, stream>>>(d_forces, d_forcesSlow,
4913 force_x, force_y, force_z, force_w,
4914 forceSlow_x, forceSlow_y, forceSlow_z, forceSlow_w,
4920 // Perform virial and energy reductions for non-bonded force calculation
4922 void CudaComputeNonbondedKernel::reduceVirialEnergy(CudaTileListKernel& tlKernel,
4923 const int atomStorageSize, const bool doEnergy, const bool doVirial, const bool doSlow, const bool doGBIS,
4924 float4* d_forces, float4* d_forcesSlow,
4925 VirialEnergy* d_virialEnergy, cudaStream_t stream) {
4927 if (doEnergy || doVirial) {
4928 clear_device_array<VirialEnergy>(d_virialEnergy, ATOMIC_BINS, stream);
4933 int nthread = REDUCENONBONDEDVIRIALKERNEL_NUM_WARP*WARPSIZE;
4934 int nblock = min(deviceCUDA->getMaxNumBlocks(), (atomStorageSize-1)/nthread+1);
4935 reduceNonbondedVirialKernel <<< nblock, nthread, 0, stream >>>
4936 (doSlow, atomStorageSize, tlKernel.get_xyzq(), d_forces, d_forcesSlow, d_virialEnergy);
4937 cudaCheck(cudaGetLastError());
4940 if (doVirial || doEnergy)
4942 int nthread = REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE;
4943 int nblock = min(deviceCUDA->getMaxNumBlocks(), (tlKernel.getTileListVirialEnergyLength()-1)/nthread+1);
4944 reduceVirialEnergyKernel <<< nblock, nthread, 0, stream >>>
4945 (doEnergy, doVirial, doSlow, tlKernel.getTileListVirialEnergyLength(), tlKernel.getTileListVirialEnergy(), d_virialEnergy);
4946 cudaCheck(cudaGetLastError());
4949 if (doGBIS && doEnergy)
4951 int nthread = REDUCEGBISENERGYKERNEL_NUM_WARP*WARPSIZE;
4952 int nblock = min(deviceCUDA->getMaxNumBlocks(), (tlKernel.getTileListVirialEnergyGBISLength()-1)/nthread+1);
4953 reduceGBISEnergyKernel <<< nblock, nthread, 0, stream >>>
4954 (tlKernel.getTileListVirialEnergyGBISLength(), tlKernel.getTileListVirialEnergy(), d_virialEnergy);
4955 cudaCheck(cudaGetLastError());
4958 if (ATOMIC_BINS > 1)
4960 // Reduce d_virialEnergy[ATOMIC_BINS] in-place (results are in d_virialEnergy[0])
4961 reduceNonbondedBinsKernel<<<1, ATOMIC_BINS, 0, stream>>>(doVirial, doEnergy, doSlow, doGBIS, d_virialEnergy);
4965 void CudaComputeNonbondedKernel::bindExclusions(int numExclusions, unsigned int* exclusion_bits) {
4966 reallocate_device<unsigned int>(&overflowExclusions, &overflowExclusionsSize, numExclusions);
4967 copy_HtoD_sync<unsigned int>(exclusion_bits, overflowExclusions, numExclusions);
4971 void CudaComputeNonbondedKernel::setExclusionsByAtom(int2* h_data, const int num_atoms) {
4972 // Global data structure shouldn't be reallocated
4973 if (d_exclusionsByAtom == NULL) allocate_device<int2>(&d_exclusionsByAtom, num_atoms);
4974 copy_HtoD_sync<int2>(h_data, d_exclusionsByAtom, num_atoms);
4979 template<bool kDoAlch>
4980 __global__ void updateVdwTypesExclKernel(
4981 const int numPatches,
4982 const CudaLocalRecord* localRecords,
4983 const int* global_vdwTypes,
4984 const int* global_id,
4985 const int* patchSortOrder,
4986 const int2* exclusionsByAtom,
4987 const int* global_partition,
4993 __shared__ CudaLocalRecord s_record;
4994 using AccessType = int32_t;
4995 AccessType* s_record_buffer = (AccessType*) &s_record;
4997 for (int patchIndex = blockIdx.x; patchIndex < numPatches; patchIndex += gridDim.x) {
4998 // Read in the CudaLocalRecord using multiple threads. This should
5000 for (int i = threadIdx.x; i < sizeof(CudaLocalRecord)/sizeof(AccessType); i += blockDim.x) {
5001 s_record_buffer[i] = ((AccessType*) &(localRecords[patchIndex]))[i];
5005 const int numAtoms = s_record.numAtoms;
5006 const int offset = s_record.bufferOffset;
5007 const int offsetNB = s_record.bufferOffsetNBPad;
5009 for (int i = threadIdx.x; i < numAtoms; i += blockDim.x) {
5010 const int order = patchSortOrder[offset + i];
5011 const int id = global_id[offset + order];
5012 vdwTypes [offsetNB + i] = global_vdwTypes[offset + order];
5013 atomIndex [offsetNB + i] = id;
5014 exclusions[offsetNB + i].x = exclusionsByAtom[id].y;
5015 exclusions[offsetNB + i].y = exclusionsByAtom[id].x;
5017 part [offsetNB + i] = global_partition[offset + order];
5025 void CudaComputeNonbondedKernel::updateVdwTypesExclOnGPU(CudaTileListKernel& tlKernel,
5026 const int numPatches, const int atomStorageSize, const bool alchOn,
5027 CudaLocalRecord* localRecords,
5028 const int* d_vdwTypes, const int* d_id, const int* d_sortOrder,
5029 const int* d_partition,
5032 reallocate_device<int>(&vdwTypes, &vdwTypesSize, atomStorageSize, OVERALLOC);
5033 reallocate_device<int2>(&exclIndexMaxDiff, &exclIndexMaxDiffSize, atomStorageSize, OVERALLOC);
5034 reallocate_device<int>(&atomIndex, &atomIndexSize, atomStorageSize, OVERALLOC);
5036 const int numBlocks = numPatches;
5037 const int numThreads = 512;
5040 updateVdwTypesExclKernel<true><<<numBlocks, numThreads, 0, stream>>>(
5041 numPatches, localRecords,
5042 d_vdwTypes, d_id, d_sortOrder, d_exclusionsByAtom, d_partition,
5043 vdwTypes, atomIndex, exclIndexMaxDiff, tlKernel.get_part()
5046 updateVdwTypesExclKernel<false><<<numBlocks, numThreads, 0, stream>>>(
5047 numPatches, localRecords,
5048 d_vdwTypes, d_id, d_sortOrder, d_exclusionsByAtom, d_partition,
5049 vdwTypes, atomIndex, exclIndexMaxDiff, tlKernel.get_part()
5054 void CudaComputeNonbondedKernel::updateDrudeData(
5055 const int atomStorageSize, const float* h_drudeAtomAlpha,
5056 const int *h_isDrude, cudaStream_t stream) {
5057 reallocate_device(&drudeAtomAlpha, &drudeAtomAlphaSize, atomStorageSize, OVERALLOC);
5058 reallocate_device(&isDrude, &isDrudeSize, atomStorageSize, OVERALLOC);
5059 copy_HtoD(h_drudeAtomAlpha, drudeAtomAlpha, atomStorageSize, stream);
5060 copy_HtoD(h_isDrude, isDrude, atomStorageSize, stream);