00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #define VMDORBUSESSE 1
00025
00026
00027 #include <math.h>
00028 #include <stdio.h>
00029 #if VMDORBUSESSE & defined(__SSE2__)
00030 #include <emmintrin.h>
00031 #endif
00032 #include "Orbital.h"
00033 #include "DrawMolecule.h"
00034 #include "utilities.h"
00035 #include "Inform.h"
00036 #include "WKFThreads.h"
00037 #include "WKFUtils.h"
00038 #if defined(VMDCUDA)
00039 #include "CUDAKernels.h"
00040 #endif
00041 #if defined(VMDOPENCL)
00042 #include "OpenCLUtils.h"
00043 #include "OpenCLKernels.h"
00044 #endif
00045
00046 #define ANGS_TO_BOHR 1.88972612478289694072f
00047
00049 Orbital::Orbital(const float *pos,
00050 const float *wfn,
00051 const float *barray,
00052 const basis_atom_t *bset,
00053 const int *types,
00054 const int *asort,
00055 const int *abasis,
00056 const float **norm,
00057 const int *nshells,
00058 const int *nprimshell,
00059 const int *shelltypes,
00060 int natoms, int ntypes, int numwave, int numbasis,
00061 int orbid) :
00062 atomid(-1), shellid(-1),
00063 numatoms(natoms), atompos(pos),
00064 num_wave_f(numwave),
00065 num_basis_funcs(numbasis),
00066 basis_array(barray),
00067 numtypes(ntypes),
00068 basis_set(bset),
00069 atom_types(types),
00070 atom_sort(asort),
00071 atom_basis(abasis),
00072 norm_factors(norm),
00073 num_shells_per_atom(nshells),
00074 num_prim_per_shell(nprimshell),
00075 shell_types(shelltypes)
00076 {
00077 grid_data = NULL;
00078 origin[0] = origin[1] = origin[2] = 0.0;
00079
00080
00081
00082
00083 normalize_wavefunction(wfn + num_wave_f*orbid);
00084
00085
00086 }
00087
00089 Orbital::~Orbital() {
00090 if (wave_f) delete [] wave_f;
00091 }
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102 void Orbital::normalize_wavefunction(const float *wfn) {
00103 #ifdef DEBUGORBS
00104 char shellname[8] = {'S', 'P', 'D', 'F', 'G', 'H', 'I', 'K'};
00105 #endif
00106 int i, j, k;
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118 wave_f = new float[num_wave_f];
00119 int ifunc = 0;
00120 for (i=0; i<numatoms; i++) {
00121 const basis_atom_t *basis_atom = &basis_set[atom_types[i]];
00122 for (j=0; j<basis_atom->numshells; j++) {
00123 int stype = basis_atom->shell[j].type;
00124 #ifdef DEBUGORBS
00125 printf("atom %i/%i, %i/%i %c-shell\n", i+1, numatoms, j+1, basis_atom->numshells, shellname[stype]);
00126 #endif
00127 for (k=0; k<basis_atom->shell[j].num_cart_func; k++) {
00128 wave_f[ifunc] = wfn[ifunc] * norm_factors[stype][k];
00129
00130 #ifdef DEBUGORBS
00131 printf("%3i %c %2i wave_f[%3i]=% f norm=%.3f normwave=% f\n",
00132 i, shellname[stype], k, ifunc, wfn[ifunc],
00133 norm_factors[stype][k], wave_f[ifunc]);
00134 #endif
00135 ifunc++;
00136 }
00137 }
00138 }
00139 }
00140
00141
00142
00143
00144
00145
00146 int Orbital::set_grid_to_bbox(const float *pos, float padding,
00147 float resolution) {
00148 int i = 0;
00149 float xyzdim[3];
00150
00151
00152
00153 origin[0] = xyzdim[0] = pos[0];
00154 origin[1] = xyzdim[1] = pos[1];
00155 origin[2] = xyzdim[2] = pos[2];
00156
00157
00158
00159
00160 for(i=1; i<numatoms; i++) {
00161 if (pos[3*i ] < origin[0]) origin[0] = pos[3*i];
00162 if (pos[3*i+1] < origin[1]) origin[1] = pos[3*i+1];
00163 if (pos[3*i+2] < origin[2]) origin[2] = pos[3*i+2];
00164 if (pos[3*i ] > xyzdim[0]) xyzdim[0] = pos[3*i];
00165 if (pos[3*i+1] > xyzdim[1]) xyzdim[1] = pos[3*i+1];
00166 if (pos[3*i+2] > xyzdim[2]) xyzdim[2] = pos[3*i+2];
00167 }
00168
00169
00170 origin[0] -= padding;
00171 origin[1] -= padding;
00172 origin[2] -= padding;
00173 gridsize[0] = xyzdim[0] + padding - origin[0];
00174 gridsize[1] = xyzdim[1] + padding - origin[1];
00175 gridsize[2] = xyzdim[2] + padding - origin[2];
00176
00177 set_resolution(resolution);
00178
00179 return TRUE;
00180 }
00181
00182
00183
00184
00185
00186
00187 void Orbital::set_grid(float newori[3], float newdim[3], float newvoxelsize) {
00188 origin[0] = newori[0];
00189 origin[1] = newori[1];
00190 origin[2] = newori[2];
00191 gridsize[0] = newdim[0];
00192 gridsize[1] = newdim[1];
00193 gridsize[2] = newdim[2];
00194 set_resolution(newvoxelsize);
00195 }
00196
00197
00198 void Orbital::set_resolution(float resolution) {
00199 voxelsize = resolution;
00200 int i;
00201 for (i=0; i<3; i++) {
00202 numvoxels[i] = (int)(gridsize[i]/voxelsize) + 1;
00203 gridsize[i] = voxelsize*(numvoxels[i]-1);
00204 }
00205 }
00206
00207 #define XNEG 0
00208 #define YNEG 1
00209 #define ZNEG 2
00210 #define XPOS 3
00211 #define YPOS 4
00212 #define ZPOS 5
00213
00214
00215
00216
00217 int Orbital::check_plane(int dir, float threshold, int minstepsize,
00218 int &stepsize) {
00219 bool repeat=0;
00220 int u, v, w, nu, nv;
00221
00222
00223
00224 u = (dir+1)%3;
00225 v = (dir+2)%3;
00226 w = dir%3;
00227
00228
00229
00230
00231
00232
00233 do {
00234 int success = 0;
00235 int offset = 0;
00236 int gridstep = stepsize;
00237
00238 if (repeat) {
00239
00240
00241
00242 offset = stepsize;
00243 gridstep = 2*stepsize;
00244 }
00245
00246
00247 float grid[3];
00248 grid[w] = origin[w] + (dir/3)*(numvoxels[w]-1) * voxelsize;
00249
00250
00251 for (nu=0; nu<numvoxels[u]; nu+=gridstep) {
00252 grid[u] = origin[u] + nu * voxelsize;
00253
00254 for (nv=0; nv<numvoxels[v]; nv+=gridstep) {
00255 grid[v] = origin[v] + nv * voxelsize;
00256
00257 if (fabs(evaluate_grid_point(grid[0], grid[1], grid[2])) > threshold) {
00258 success = 1;
00259 break;
00260 }
00261 }
00262 if (success) break;
00263 }
00264
00265 if (success) {
00266
00267
00268
00269 if (!(dir/3)) origin[w] -= stepsize*voxelsize;
00270 numvoxels[w] += stepsize;
00271 if (stepsize<=minstepsize) {
00272
00273 return 1;
00274 }
00275 stepsize /=2;
00276 repeat = 1;
00277
00278
00279 } else {
00280
00281 if (!(dir/3)) origin[w] += stepsize*voxelsize;
00282 numvoxels[w] -= stepsize;
00283
00284 repeat = 0;
00285 if (numvoxels[w] <= 1) {
00286
00287
00288
00289 numvoxels[w] = stepsize;
00290 if (!(dir/3)) origin[w] -= stepsize*voxelsize;
00291 stepsize /=2;
00292 repeat = 1;
00293
00294 }
00295 }
00296
00297 } while (repeat);
00298
00299 return 0;
00300 }
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318
00319
00320
00321
00322
00323 void Orbital::find_optimal_grid(float threshold,
00324 int minstepsize, int maxstepsize) {
00325 int optimal[6] = {0, 0, 0, 0, 0, 0};
00326 int stepsize[6];
00327 int i;
00328 for (i=0; i<6; i++) stepsize[i] = maxstepsize;
00329
00330 #ifdef DEBUGORBS
00331 printf("origin = {%f %f %f}\n", origin[0], origin[1], origin[2]);
00332 printf("gridsize = {%f %f %f}\n", gridsize[0], gridsize[1], gridsize[2]);
00333 #endif
00334
00335
00336
00337
00338 int iter = 0;
00339 while ( !optimal[0] || !optimal[1] || !optimal[2] ||
00340 !optimal[3] || !optimal[4] || !optimal[5] )
00341 {
00342 if (iter>100) {
00343 msgInfo << "WARNING: Could not optimize orbital grid boundaries in"
00344 << iter << "steps!" << sendmsg;
00345 break;
00346 }
00347 iter++;
00348
00349
00350
00351 if (!optimal[XNEG])
00352 optimal[XNEG] = check_plane(XNEG, threshold, minstepsize, stepsize[XNEG]);
00353
00354 if (!optimal[XPOS])
00355 optimal[XPOS] = check_plane(XPOS, threshold, minstepsize, stepsize[XPOS]);
00356
00357 if (!optimal[YNEG])
00358 optimal[YNEG] = check_plane(YNEG, threshold, minstepsize, stepsize[YNEG]);
00359
00360 if (!optimal[YPOS])
00361 optimal[YPOS] = check_plane(YPOS, threshold, minstepsize, stepsize[YPOS]);
00362
00363 if (!optimal[ZNEG])
00364 optimal[ZNEG] = check_plane(ZNEG, threshold, minstepsize, stepsize[ZNEG]);
00365
00366 if (!optimal[ZPOS])
00367 optimal[ZPOS] = check_plane(ZPOS, threshold, minstepsize, stepsize[ZPOS]);
00368
00369 #ifdef DEBUGORBS
00370 printf("origin {%f %f %f}\n", origin[0], origin[1], origin[2]);
00371 printf("ngrid {%i %i %i}\n", numvoxels[0], numvoxels[1], numvoxels[2]);
00372 printf("stepsize {%i %i %i %i %i %i}\n", stepsize[0], stepsize[1], stepsize[2],
00373 stepsize[3], stepsize[4], stepsize[5]);
00374 #endif
00375 }
00376
00377
00378 gridsize[0] = numvoxels[0]*voxelsize;
00379 gridsize[1] = numvoxels[1]*voxelsize;
00380 gridsize[2] = numvoxels[2]*voxelsize;
00381 }
00382
00383
00384
00385 int Orbital::calculate_mo(DrawMolecule *mol, int density) {
00386 wkf_timerhandle timer;
00387
00388 timer=wkf_timer_create();
00389 wkf_timer_start(timer);
00390
00391 #ifdef DEBUGORBS
00392 printf("num_wave_f=%i\n", num_wave_f);
00393 #endif
00394
00395 #ifdef DEBUGORBS
00396 int i=0;
00397 for (i=0; i<num_wave_f; i++) {
00398 printf("wave_f[%i] = %f\n", i, wave_f[i]);
00399 }
00400 #endif
00401
00402
00403
00404 int numgridpoints = numvoxels[0] * numvoxels[1] * numvoxels[2];
00405 grid_data = new float[numgridpoints];
00406
00407
00408
00409 #ifdef DEBUGORBS
00410 printf("Calculating %ix%ix%i orbital grid. \n", numvoxels[0], numvoxels[1], numvoxels[2]);
00411 #endif
00412
00413
00414 int rc=-1;
00415
00416
00417 #if defined(VMDCUDA)
00418
00419
00420 if ((max_shell_type() <= G_SHELL) &&
00421 (max_primitives() <= 32) &&
00422 (!getenv("VMDNOCUDA"))) {
00423 rc = vmd_cuda_evaluate_orbital_grid(mol->cuda_devpool(),
00424 numatoms, wave_f, num_wave_f,
00425 basis_array, num_basis_funcs,
00426 atompos, atom_basis,
00427 num_shells_per_atom,
00428 num_prim_per_shell,
00429 shell_types, total_shells(),
00430 numvoxels, voxelsize,
00431 origin, density, grid_data);
00432 }
00433 #endif
00434 #if defined(VMDOPENCL)
00435
00436
00437 if (rc!=0 &&
00438 (max_shell_type() <= G_SHELL) &&
00439 (max_primitives() <= 32) &&
00440 (!getenv("VMDNOOPENCL"))) {
00441
00442 #if 1
00443
00444 static vmd_opencl_orbital_handle *orbh = NULL;
00445 static cl_context clctx = NULL;
00446 static cl_command_queue clcmdq = NULL;
00447 static cl_device_id *cldevs = NULL;
00448 if (orbh == NULL) {
00449 printf("Attaching OpenCL device:\n");
00450 wkf_timer_start(timer);
00451 cl_int clerr = CL_SUCCESS;
00452
00453 cl_platform_id clplatid = vmd_cl_get_platform_index(0);
00454 cl_context_properties clctxprops[] = {(cl_context_properties) CL_CONTEXT_PLATFORM, (cl_context_properties) clplatid, (cl_context_properties) 0};
00455 clctx = clCreateContextFromType(clctxprops, CL_DEVICE_TYPE_GPU, NULL, NULL, &clerr);
00456
00457 size_t parmsz;
00458 clerr |= clGetContextInfo(clctx, CL_CONTEXT_DEVICES, 0, NULL, &parmsz);
00459 if (clerr != CL_SUCCESS) return -1;
00460 cldevs = (cl_device_id *) malloc(parmsz);
00461 if (clerr != CL_SUCCESS) return -1;
00462 clerr |= clGetContextInfo(clctx, CL_CONTEXT_DEVICES, parmsz, cldevs, NULL);
00463 if (clerr != CL_SUCCESS) return -1;
00464 clcmdq = clCreateCommandQueue(clctx, cldevs[0], 0, &clerr);
00465 if (clerr != CL_SUCCESS) return -1;
00466 wkf_timer_stop(timer);
00467 printf(" OpenCL context creation time: %.3f sec\n", wkf_timer_time(timer));
00468
00469 wkf_timer_start(timer);
00470 orbh = vmd_opencl_create_orbital_handle(clctx, clcmdq, cldevs);
00471 wkf_timer_stop(timer);
00472 printf(" OpenCL kernel compilation time: %.3f sec\n", wkf_timer_time(timer));
00473
00474 wkf_timer_start(timer);
00475 }
00476 #endif
00477
00478 rc = vmd_opencl_evaluate_orbital_grid(mol->cuda_devpool(), orbh,
00479 numatoms, wave_f, num_wave_f,
00480 basis_array, num_basis_funcs,
00481 atompos, atom_basis,
00482 num_shells_per_atom,
00483 num_prim_per_shell,
00484 shell_types, total_shells(),
00485 numvoxels, voxelsize,
00486 origin, density, grid_data);
00487
00488 #if 0
00489
00490 vmd_opencl_destroy_orbital_handle(parms.orbh);
00491 clReleaseCommandQueue(clcmdq);
00492 clReleaseContext(clctx);
00493 free(cldevs);
00494 #endif
00495 }
00496 #endif
00497 #if 0
00498 int numprocs = 1;
00499 if (getenv("VMDDUMPORBITALS")) {
00500 write_orbital_data(getenv("VMDDUMPORBITALS"), numatoms,
00501 wave_f, num_wave_f, basis_array, num_basis,
00502 atompos, atom_basis, num_shells_per_atom,
00503 num_prim_per_shell, shell_types,
00504 num_shells, numvoxels, voxelsize, origin);
00505
00506 read_calc_orbitals(devpool, getenv("VMDDUMPORBITALS"));
00507 }
00508 #endif
00509 if (rc!=0) rc = evaluate_grid_fast(numatoms, wave_f, basis_array,
00510 atompos, atom_basis,
00511 num_shells_per_atom, num_prim_per_shell,
00512 shell_types, numvoxels, voxelsize,
00513 origin, density, grid_data);
00514
00515 if (rc!=0) {
00516 msgErr << "Error computing orbital grid" << sendmsg;
00517 delete [] grid_data;
00518 grid_data=NULL;
00519 return FALSE;
00520 }
00521
00522 wkf_timer_stop(timer);
00523
00524 #if 0
00525 {
00526 double gflops = (numgridpoints * flops_per_gridpoint()) / (wkf_timer_time(timer) * 1000000000.0);
00527
00528 char strbuf[1024];
00529 sprintf(strbuf, "Orbital calc. time %.3f secs, %.2f gridpoints/sec, %.2f GFLOPS",
00530 wkf_timer_time(timer),
00531 (((double) numgridpoints) / wkf_timer_time(timer)),
00532 gflops);
00533 msgInfo << strbuf << sendmsg;
00534 }
00535 #endif
00536
00537 wkf_timer_destroy(timer);
00538 return TRUE;
00539 }
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565
00566
00567
00568
00569 float Orbital::evaluate_grid_point(float grid_x, float grid_y, float grid_z) {
00570 int at;
00571 int prim, shell;
00572
00573
00574 float value = 0.0;
00575
00576
00577 int ifunc = 0;
00578 int shell_counter = 0;
00579
00580
00581 for (at=0; at<numatoms; at++) {
00582 int maxshell = num_shells_per_atom[at];
00583 int prim_counter = atom_basis[at];
00584
00585
00586 float xdist = (grid_x - atompos[3*at ])*ANGS_TO_BOHR;
00587 float ydist = (grid_y - atompos[3*at+1])*ANGS_TO_BOHR;
00588 float zdist = (grid_z - atompos[3*at+2])*ANGS_TO_BOHR;
00589 float dist2 = xdist*xdist + ydist*ydist + zdist*zdist;
00590
00591
00592
00593
00594
00595
00596 for (shell=0; shell < maxshell; shell++) {
00597 float contracted_gto = 0.0f;
00598
00599
00600
00601 int maxprim = num_prim_per_shell[shell_counter];
00602 int shelltype = shell_types[shell_counter];
00603 for (prim=0; prim < maxprim; prim++) {
00604 float exponent = basis_array[prim_counter ];
00605 float contract_coeff = basis_array[prim_counter + 1];
00606 contracted_gto += contract_coeff * expf(-exponent*dist2);
00607 prim_counter += 2;
00608 }
00609
00610
00611 float tmpshell=0;
00612
00613
00614 int i, j;
00615 float xdp, ydp, zdp;
00616 float xdiv = 1.0f / xdist;
00617 for (j=0, zdp=1.0f; j<=shelltype; j++, zdp*=zdist) {
00618 int imax = shelltype - j;
00619 for (i=0, ydp=1.0f, xdp=powf(xdist, float(imax)); i<=imax; i++, ydp*=ydist, xdp*=xdiv) {
00620 tmpshell += wave_f[ifunc++] * xdp * ydp * zdp;
00621 }
00622 }
00623 value += tmpshell * contracted_gto;
00624
00625 shell_counter++;
00626 }
00627 }
00628
00629
00630 return value;
00631 }
00632
00633
00634
00635
00636
00637 int Orbital::max_primitives(void) {
00638 int maxprim=-1;
00639
00640 int shell_counter = 0;
00641 for (int at=0; at<numatoms; at++) {
00642 for (int shell=0; shell < num_shells_per_atom[at]; shell++) {
00643 int numprim = num_prim_per_shell[shell_counter];
00644 if (numprim > maxprim)
00645 maxprim = numprim;
00646 }
00647 }
00648
00649 return maxprim;
00650 }
00651
00652
00653
00654
00655
00656 int Orbital::max_shell_type(void) {
00657 int maxshell=-1;
00658
00659 int shell_counter = 0;
00660 for (int at=0; at<numatoms; at++) {
00661 for (int shell=0; shell < num_shells_per_atom[at]; shell++) {
00662 int shelltype = shell_types[shell_counter];
00663 shell_counter++;
00664 if (shelltype > maxshell)
00665 maxshell=shelltype;
00666 }
00667 }
00668
00669 return maxshell;
00670 }
00671
00672
00673
00674
00675
00676
00677 int Orbital::max_wave_f_count(void) {
00678 int maxcount=0;
00679
00680 int shell_counter = 0;
00681 for (int at=0; at<numatoms; at++) {
00682 for (int shell=0; shell < num_shells_per_atom[at]; shell++) {
00683 int shelltype = shell_types[shell_counter];
00684 int i, j;
00685 int count=0;
00686 for (i=0; i<=shelltype; i++) {
00687 int jmax = shelltype - i;
00688 for (j=0; j<=jmax; j++) {
00689 count++;
00690 }
00691 }
00692 shell_counter++;
00693 if (count > maxcount)
00694 maxcount=count;
00695 }
00696 }
00697
00698 return maxcount;
00699 }
00700
00701
00702
00703
00704
00705 double Orbital::flops_per_gridpoint() {
00706 double flops=0.0;
00707
00708 int shell_counter = 0;
00709 for (int at=0; at<numatoms; at++) {
00710 flops += 7;
00711
00712 for (int shell=0; shell < num_shells_per_atom[at]; shell++) {
00713 for (int prim=0; prim < num_prim_per_shell[shell_counter]; prim++)
00714 flops += 4;
00715
00716 int shelltype = shell_types[shell_counter];
00717
00718 switch (shelltype) {
00719
00720 case S_SHELL: flops += 2; break;
00721 case P_SHELL: flops += 8; break;
00722 case D_SHELL: flops += 17; break;
00723 case F_SHELL: flops += 30; break;
00724 case G_SHELL: flops += 50; break;
00725
00726
00727 default:
00728 int i, j;
00729 for (i=0; i<=shelltype; i++) {
00730 int jmax = shelltype - i;
00731 flops += 1;
00732 for (j=0; j<=jmax; j++) {
00733 flops += 6;
00734 }
00735 }
00736 break;
00737 }
00738
00739 shell_counter++;
00740 }
00741 }
00742
00743 return flops;
00744 }
00745
00746
00747
00748
00749
00750
00751
00752
00753
00754
00755
00756 static const float MAXNUMF = 3.4028234663852885981170418348451692544e38f;
00757 static const float MAXLOGF = 88.72283905206835f;
00758 static const float MINLOGF = -103.278929903431851103f;
00759 static const float LOG2EF = 1.44269504088896341f;
00760 static const float C1 = 0.693359375f;
00761 static const float C2 = -2.12194440e-4f;
00762
00763 static inline float cephesfastexpf(float x) {
00764 float z;
00765 int n;
00766
00767 if(x > MAXLOGF)
00768 return MAXNUMF;
00769
00770 if(x < MINLOGF)
00771 return 0.0;
00772
00773
00774 z = floorf( LOG2EF * x + 0.5f );
00775 x -= z * C1;
00776 x -= z * C2;
00777 n = (int) z;
00778
00779 z = x * x;
00780
00781 z = ((((( 1.9875691500E-4f * x + 1.3981999507E-3f) * x
00782 + 8.3334519073E-3f) * x + 4.1665795894E-2f) * x
00783 + 1.6666665459E-1f) * x + 5.0000001201E-1f) * z + x + 1.0f;
00784
00785 x = ldexpf(z, n);
00786 return x;
00787 }
00788
00789
00790
00791
00792
00793
00794
00795
00796
00797
00798
00799
00800
00801
00802
00803
00804
00805
00806
00807
00808
00809
00810
00811
00812
00813
00814
00815
00816
00817
00818
00819
00820
00821
00822
00823
00824
00825
00826
00827
00828
00829
00830
00831 #define MLOG2EF -1.44269504088896f
00832
00833
00834
00835
00836
00837 #define SCEXP0 1.0000000000000000f
00838 #define SCEXP1 0.6987082824680118f
00839 #define SCEXP2 0.2633174272827404f
00840 #define SCEXP3 0.0923611991471395f
00841 #define SCEXP4 0.0277520543324108f
00842
00843
00844 #define EXPOBIAS 127
00845 #define EXPOSHIFT 23
00846
00847
00848 #define ACUTOFF -10
00849
00850 typedef union flint_t {
00851 float f;
00852 int n;
00853 } flint;
00854
00855 float aexpfnx(float x) {
00856
00857 float mb;
00858 int mbflr;
00859 float d;
00860 float sy;
00861 flint scalfac;
00862
00863 if (x < ACUTOFF) return 0.f;
00864
00865 mb = x * MLOG2EF;
00866 mbflr = (int) mb;
00867 d = mbflr - mb;
00868 sy = SCEXP0 + d*(SCEXP1 + d*(SCEXP2 + d*(SCEXP3 + d*SCEXP4)));
00869
00870 scalfac.n = (EXPOBIAS - mbflr) << EXPOSHIFT;
00871 return (sy * scalfac.f);
00872 }
00873
00874
00875
00876
00877
00878
00879 #define S_SHELL 0
00880 #define P_SHELL 1
00881 #define D_SHELL 2
00882 #define F_SHELL 3
00883 #define G_SHELL 4
00884 #define H_SHELL 5
00885
00886 int evaluate_grid(int numatoms,
00887 const float *wave_f, const float *basis_array,
00888 const float *atompos,
00889 const int *atom_basis,
00890 const int *num_shells_per_atom,
00891 const int *num_prim_per_shell,
00892 const int *shell_types,
00893 const int *numvoxels,
00894 float voxelsize,
00895 const float *origin,
00896 int density,
00897 float * orbitalgrid) {
00898 if (!orbitalgrid)
00899 return -1;
00900
00901 int nx, ny, nz;
00902
00903
00904 int numgridxy = numvoxels[0]*numvoxels[1];
00905 for (nz=0; nz<numvoxels[2]; nz++) {
00906 float grid_x, grid_y, grid_z;
00907 grid_z = origin[2] + nz * voxelsize;
00908 for (ny=0; ny<numvoxels[1]; ny++) {
00909 grid_y = origin[1] + ny * voxelsize;
00910 int gaddrzy = ny*numvoxels[0] + nz*numgridxy;
00911 for (nx=0; nx<numvoxels[0]; nx++) {
00912 grid_x = origin[0] + nx * voxelsize;
00913
00914
00915
00916 int at;
00917 int prim, shell;
00918
00919
00920 float value = 0.0;
00921
00922
00923 int ifunc = 0;
00924 int shell_counter = 0;
00925
00926
00927 for (at=0; at<numatoms; at++) {
00928 int maxshell = num_shells_per_atom[at];
00929 int prim_counter = atom_basis[at];
00930
00931
00932 float xdist = (grid_x - atompos[3*at ])*ANGS_TO_BOHR;
00933 float ydist = (grid_y - atompos[3*at+1])*ANGS_TO_BOHR;
00934 float zdist = (grid_z - atompos[3*at+2])*ANGS_TO_BOHR;
00935
00936 float xdist2 = xdist*xdist;
00937 float ydist2 = ydist*ydist;
00938 float zdist2 = zdist*zdist;
00939 float xdist3 = xdist2*xdist;
00940 float ydist3 = ydist2*ydist;
00941 float zdist3 = zdist2*zdist;
00942
00943 float dist2 = xdist2 + ydist2 + zdist2;
00944
00945
00946
00947
00948
00949
00950 for (shell=0; shell < maxshell; shell++) {
00951 float contracted_gto = 0.0f;
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961 int maxprim = num_prim_per_shell[shell_counter];
00962 int shelltype = shell_types[shell_counter];
00963 for (prim=0; prim<maxprim; prim++) {
00964 float exponent = basis_array[prim_counter ];
00965 float contract_coeff = basis_array[prim_counter + 1];
00966
00967
00968
00969
00970 #if defined(__GNUC__) && !defined(__ICC)
00971
00972
00973 contracted_gto += contract_coeff * aexpfnx(-exponent*dist2);
00974 #elif defined(__ICC)
00975
00976
00977
00978
00979 contracted_gto += contract_coeff * cephesfastexpf(-exponent*dist2);
00980 #else
00981
00982
00983 contracted_gto += contract_coeff * expf(-exponent*dist2);
00984 #endif
00985 prim_counter += 2;
00986 }
00987
00988
00989 float tmpshell=0;
00990 switch (shelltype) {
00991 case S_SHELL:
00992 value += wave_f[ifunc++] * contracted_gto;
00993 break;
00994
00995 case P_SHELL:
00996 tmpshell += wave_f[ifunc++] * xdist;
00997 tmpshell += wave_f[ifunc++] * ydist;
00998 tmpshell += wave_f[ifunc++] * zdist;
00999 value += tmpshell * contracted_gto;
01000 break;
01001
01002 case D_SHELL:
01003 tmpshell += wave_f[ifunc++] * xdist2;
01004 tmpshell += wave_f[ifunc++] * xdist * ydist;
01005 tmpshell += wave_f[ifunc++] * ydist2;
01006 tmpshell += wave_f[ifunc++] * xdist * zdist;
01007 tmpshell += wave_f[ifunc++] * ydist * zdist;
01008 tmpshell += wave_f[ifunc++] * zdist2;
01009 value += tmpshell * contracted_gto;
01010 break;
01011
01012 case F_SHELL:
01013 tmpshell += wave_f[ifunc++] * xdist3;
01014 tmpshell += wave_f[ifunc++] * xdist2 * ydist;
01015 tmpshell += wave_f[ifunc++] * ydist2 * xdist;
01016 tmpshell += wave_f[ifunc++] * ydist3;
01017 tmpshell += wave_f[ifunc++] * xdist2 * zdist;
01018 tmpshell += wave_f[ifunc++] * xdist * ydist * zdist;
01019 tmpshell += wave_f[ifunc++] * ydist2 * zdist;
01020 tmpshell += wave_f[ifunc++] * zdist2 * xdist;
01021 tmpshell += wave_f[ifunc++] * zdist2 * ydist;
01022 tmpshell += wave_f[ifunc++] * zdist3;
01023 value += tmpshell * contracted_gto;
01024 break;
01025
01026 case G_SHELL:
01027 tmpshell += wave_f[ifunc++] * xdist2 * xdist2;
01028 tmpshell += wave_f[ifunc++] * xdist3 * ydist;
01029 tmpshell += wave_f[ifunc++] * xdist2 * ydist2;
01030 tmpshell += wave_f[ifunc++] * ydist3 * xdist;
01031 tmpshell += wave_f[ifunc++] * ydist2 * ydist2;
01032 tmpshell += wave_f[ifunc++] * xdist3 * zdist;
01033 tmpshell += wave_f[ifunc++] * xdist2 * ydist * zdist;
01034 tmpshell += wave_f[ifunc++] * ydist2 * xdist * zdist;
01035 tmpshell += wave_f[ifunc++] * ydist3 * zdist;
01036 tmpshell += wave_f[ifunc++] * xdist2 * zdist2;
01037 tmpshell += wave_f[ifunc++] * zdist2 * xdist * ydist;
01038 tmpshell += wave_f[ifunc++] * ydist2 * zdist2;
01039 tmpshell += wave_f[ifunc++] * zdist3 * xdist;
01040 tmpshell += wave_f[ifunc++] * zdist3 * ydist;
01041 tmpshell += wave_f[ifunc++] * zdist2 * zdist2;
01042 value += tmpshell * contracted_gto;
01043 break;
01044
01045 default:
01046 #if 1
01047
01048 int i, j;
01049 float xdp, ydp, zdp;
01050 float xdiv = 1.0f / xdist;
01051 for (j=0, zdp=1.0f; j<=shelltype; j++, zdp*=zdist) {
01052 int imax = shelltype - j;
01053 for (i=0, ydp=1.0f, xdp=powf(xdist, float(imax)); i<=imax; i++, ydp*=ydist, xdp*=xdiv) {
01054 tmpshell += wave_f[ifunc++] * xdp * ydp * zdp;
01055 }
01056 }
01057 value += tmpshell * contracted_gto;
01058 #else
01059 int i, j, k;
01060 for (k=0; k<=shelltype; k++) {
01061 for (j=0; j<=shelltype; j++) {
01062 for (i=0; i<=shelltype; i++) {
01063 if (i+j+k==shelltype) {
01064 value += wave_f[ifunc++] * contracted_gto
01065 * pow(xdist,i) * pow(ydist,j) * pow(zdist,k);
01066 }
01067 }
01068 }
01069 }
01070 #endif
01071 }
01072
01073 shell_counter++;
01074 }
01075 }
01076
01077
01078 if (density) {
01079 float orbdensity = value * value;
01080 if (value < 0.0)
01081 orbdensity = -orbdensity;
01082 orbitalgrid[gaddrzy + nx] = orbdensity;
01083 } else {
01084 orbitalgrid[gaddrzy + nx] = value;
01085 }
01086 }
01087 }
01088 }
01089
01090 return 0;
01091 }
01092
01093
01094
01095 #if VMDORBUSESSE & defined(__SSE2__)
01096
01097
01098
01099
01100
01101
01102
01103 #ifdef _MSC_VER
01104 # define ALIGN16_BEG __declspec(align(16))
01105 # define ALIGN16_END
01106 #else
01107 # define ALIGN16_BEG
01108 # define ALIGN16_END __attribute__((aligned(16)))
01109 #endif
01110
01111 #define _PS_CONST(Name, Val) \
01112 static const ALIGN16_BEG float _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
01113 #define _PI32_CONST(Name, Val) \
01114 static const ALIGN16_BEG int _pi32_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
01115
01116 _PS_CONST(exp_hi, 88.3762626647949f);
01117 _PS_CONST(exp_lo, -88.3762626647949f);
01118
01119 _PS_CONST(cephes_LOG2EF, 1.44269504088896341);
01120 _PS_CONST(cephes_exp_C1, 0.693359375);
01121 _PS_CONST(cephes_exp_C2, -2.12194440e-4);
01122
01123 _PS_CONST(cephes_exp_p0, 1.9875691500E-4);
01124 _PS_CONST(cephes_exp_p1, 1.3981999507E-3);
01125 _PS_CONST(cephes_exp_p2, 8.3334519073E-3);
01126 _PS_CONST(cephes_exp_p3, 4.1665795894E-2);
01127 _PS_CONST(cephes_exp_p4, 1.6666665459E-1);
01128 _PS_CONST(cephes_exp_p5, 5.0000001201E-1);
01129 _PS_CONST(one, 1.0);
01130 _PS_CONST(half, 0.5);
01131
01132 _PI32_CONST(0x7f, 0x7f);
01133
01134 typedef union xmm_mm_union {
01135 __m128 xmm;
01136 __m64 mm[2];
01137 } xmm_mm_union;
01138
01139 #define COPY_XMM_TO_MM(xmm_, mm0_, mm1_) { \
01140 xmm_mm_union u; u.xmm = xmm_; \
01141 mm0_ = u.mm[0]; \
01142 mm1_ = u.mm[1]; \
01143 }
01144
01145 #define COPY_MM_TO_XMM(mm0_, mm1_, xmm_) { \
01146 xmm_mm_union u; u.mm[0]=mm0_; u.mm[1]=mm1_; xmm_ = u.xmm; \
01147 }
01148
01149 __m128 exp_ps(__m128 x) {
01150 __m128 tmp = _mm_setzero_ps(), fx;
01151 __m64 mm0, mm1;
01152
01153 x = _mm_min_ps(x, *(__m128*)_ps_exp_hi);
01154 x = _mm_max_ps(x, *(__m128*)_ps_exp_lo);
01155
01156
01157 fx = _mm_mul_ps(x, *(__m128*)_ps_cephes_LOG2EF);
01158 fx = _mm_add_ps(fx,*(__m128*)_ps_half);
01159
01160
01161
01162 tmp = _mm_movehl_ps(tmp, fx);
01163 mm0 = _mm_cvttps_pi32(fx);
01164 mm1 = _mm_cvttps_pi32(tmp);
01165
01166 tmp = _mm_cvtpi32x2_ps(mm0, mm1);
01167
01168 __m128 mask = _mm_cmpgt_ps(tmp, fx);
01169 mask = _mm_and_ps(mask, *(__m128*)_ps_one);
01170 fx = _mm_sub_ps(tmp, mask);
01171
01172 tmp = _mm_mul_ps(fx, *(__m128*)_ps_cephes_exp_C1);
01173 __m128 z = _mm_mul_ps(fx, *(__m128*)_ps_cephes_exp_C2);
01174 x = _mm_sub_ps(x, tmp);
01175 x = _mm_sub_ps(x, z);
01176
01177 z = _mm_mul_ps(x,x);
01178
01179 __m128 y = *(__m128*)_ps_cephes_exp_p0;
01180 y = _mm_mul_ps(y, x);
01181 y = _mm_add_ps(y, *(__m128*)_ps_cephes_exp_p1);
01182 y = _mm_mul_ps(y, x);
01183 y = _mm_add_ps(y, *(__m128*)_ps_cephes_exp_p2);
01184 y = _mm_mul_ps(y, x);
01185 y = _mm_add_ps(y, *(__m128*)_ps_cephes_exp_p3);
01186 y = _mm_mul_ps(y, x);
01187 y = _mm_add_ps(y, *(__m128*)_ps_cephes_exp_p4);
01188 y = _mm_mul_ps(y, x);
01189 y = _mm_add_ps(y, *(__m128*)_ps_cephes_exp_p5);
01190 y = _mm_mul_ps(y, z);
01191 y = _mm_add_ps(y, x);
01192 y = _mm_add_ps(y, *(__m128*)_ps_one);
01193
01194
01195 z = _mm_movehl_ps(z, fx);
01196 mm0 = _mm_cvttps_pi32(fx);
01197 mm1 = _mm_cvttps_pi32(z);
01198 mm0 = _mm_add_pi32(mm0, *(__m64*)_pi32_0x7f);
01199 mm1 = _mm_add_pi32(mm1, *(__m64*)_pi32_0x7f);
01200 mm0 = _mm_slli_pi32(mm0, 23);
01201 mm1 = _mm_slli_pi32(mm1, 23);
01202
01203 __m128 pow2n;
01204 COPY_MM_TO_XMM(mm0, mm1, pow2n);
01205
01206 y = _mm_mul_ps(y, pow2n);
01207 _mm_empty();
01208 return y;
01209 }
01210
01211
01212
01213
01214
01215
01216
01217
01218
01219 #if defined(__GNUC__) && ! defined(__INTEL_COMPILER)
01220 #define __align(X) __attribute__((aligned(X) ))
01221 #if (__GNUC__ < 4)
01222 #define MISSING_mm_cvtsd_f64
01223 #endif
01224 #else
01225 #define __align(X) __declspec(align(X) )
01226 #endif
01227
01228 typedef union SSEreg_t {
01229 __m128 f;
01230 __m128i i;
01231 struct {
01232 int r0, r1, r2, r3;
01233 } reg;
01234 } SSEreg;
01235
01236 #define MLOG2EF -1.44269504088896f
01237
01238
01239
01240
01241
01242 #define SCEXP0 1.0000000000000000f
01243 #define SCEXP1 0.6987082824680118f
01244 #define SCEXP2 0.2633174272827404f
01245 #define SCEXP3 0.0923611991471395f
01246 #define SCEXP4 0.0277520543324108f
01247
01248
01249 #define EXPOBIAS 127
01250 #define EXPOSHIFT 23
01251
01252
01253 #define ACUTOFF -10
01254
01255 __m128 aexpfnxsse(__m128 x) {
01256 __align(16) SSEreg scal;
01257 __align(16) SSEreg n;
01258 __align(16) SSEreg y;
01259
01260 scal.f = _mm_cmpge_ps(x, _mm_set_ps1(ACUTOFF));
01261
01262
01263 if (_mm_movemask_ps(scal.f) == 0) {
01264 return _mm_setzero_ps();
01265 }
01266
01267
01268
01269
01270
01271
01272
01273
01274 y.f = _mm_mul_ps(x, _mm_set_ps1(MLOG2EF));
01275 n.i = _mm_cvttps_epi32(y.f);
01276 x = _mm_cvtepi32_ps(n.i);
01277 x = _mm_sub_ps(x, y.f);
01278
01279
01280
01281
01282
01283 y.f = _mm_mul_ps(x, _mm_set_ps1(SCEXP4));
01284 y.f = _mm_add_ps(y.f, _mm_set_ps1(SCEXP3));
01285 y.f = _mm_mul_ps(y.f, x);
01286 y.f = _mm_add_ps(y.f, _mm_set_ps1(SCEXP2));
01287 y.f = _mm_mul_ps(y.f, x);
01288 y.f = _mm_add_ps(y.f, _mm_set_ps1(SCEXP1));
01289 y.f = _mm_mul_ps(y.f, x);
01290 y.f = _mm_add_ps(y.f, _mm_set_ps1(SCEXP0));
01291
01292
01293
01294
01295
01296
01297 n.i = _mm_sub_epi32(_mm_set1_epi32(EXPOBIAS), n.i);
01298 n.i = _mm_slli_epi32(n.i, EXPOSHIFT);
01299 scal.f = _mm_and_ps(scal.f, n.f);
01300 y.f = _mm_mul_ps(y.f, scal.f);
01301
01302 return y.f;
01303 }
01304
01305
01306 int evaluate_grid_sse(int numatoms,
01307 const float *wave_f, const float *basis_array,
01308 const float *atompos,
01309 const int *atom_basis,
01310 const int *num_shells_per_atom,
01311 const int *num_prim_per_shell,
01312 const int *shell_types,
01313 const int *numvoxels,
01314 float voxelsize,
01315 const float *origin,
01316 int density,
01317 float * orbitalgrid) {
01318 if (!orbitalgrid)
01319 return -1;
01320
01321 int nx, ny, nz;
01322 __attribute__((aligned(16))) float sxdelta[4];
01323 for (nx=0; nx<4; nx++)
01324 sxdelta[nx] = ((float) nx) * voxelsize * ANGS_TO_BOHR;
01325
01326
01327
01328 int numgridxy = numvoxels[0]*numvoxels[1];
01329 for (nz=0; nz<numvoxels[2]; nz++) {
01330 float grid_x, grid_y, grid_z;
01331 grid_z = origin[2] + nz * voxelsize;
01332 for (ny=0; ny<numvoxels[1]; ny++) {
01333 grid_y = origin[1] + ny * voxelsize;
01334 int gaddrzy = ny*numvoxels[0] + nz*numgridxy;
01335 for (nx=0; nx<numvoxels[0]; nx+=4) {
01336 grid_x = origin[0] + nx * voxelsize;
01337
01338
01339
01340 int at;
01341 int prim, shell;
01342
01343
01344 __m128 value = _mm_setzero_ps();
01345
01346
01347 int ifunc = 0;
01348 int shell_counter = 0;
01349
01350
01351 for (at=0; at<numatoms; at++) {
01352 int maxshell = num_shells_per_atom[at];
01353 int prim_counter = atom_basis[at];
01354
01355
01356 float sxdist = (grid_x - atompos[3*at ])*ANGS_TO_BOHR;
01357 float sydist = (grid_y - atompos[3*at+1])*ANGS_TO_BOHR;
01358 float szdist = (grid_z - atompos[3*at+2])*ANGS_TO_BOHR;
01359
01360 float sydist2 = sydist*sydist;
01361 float szdist2 = szdist*szdist;
01362 float yzdist2 = sydist2 + szdist2;
01363
01364 __m128 xdelta = _mm_load_ps(&sxdelta[0]);
01365 __m128 xdist = _mm_load_ps1(&sxdist);
01366 xdist = _mm_add_ps(xdist, xdelta);
01367 __m128 ydist = _mm_load_ps1(&sydist);
01368 __m128 zdist = _mm_load_ps1(&szdist);
01369 __m128 xdist2 = _mm_mul_ps(xdist, xdist);
01370 __m128 ydist2 = _mm_mul_ps(ydist, ydist);
01371 __m128 zdist2 = _mm_mul_ps(zdist, zdist);
01372 __m128 dist2 = _mm_load_ps1(&yzdist2);
01373 dist2 = _mm_add_ps(dist2, xdist2);
01374
01375
01376
01377
01378
01379
01380 for (shell=0; shell < maxshell; shell++) {
01381 __m128 contracted_gto = _mm_setzero_ps();
01382
01383
01384
01385
01386
01387
01388
01389
01390
01391 int maxprim = num_prim_per_shell[shell_counter];
01392 int shelltype = shell_types[shell_counter];
01393 for (prim=0; prim<maxprim; prim++) {
01394
01395 float exponent = -basis_array[prim_counter ];
01396 float contract_coeff = basis_array[prim_counter + 1];
01397
01398
01399 __m128 expval = _mm_mul_ps(_mm_load_ps1(&exponent), dist2);
01400
01401 #if 1
01402 __m128 retval = aexpfnxsse(expval);
01403 #else
01404 __m128 retval = exp_ps(expval);
01405 #endif
01406 __m128 ctmp = _mm_mul_ps(_mm_load_ps1(&contract_coeff), retval);
01407 contracted_gto = _mm_add_ps(contracted_gto, ctmp);
01408 prim_counter += 2;
01409 }
01410
01411
01412 __m128 tmpshell = _mm_setzero_ps();
01413 switch (shelltype) {
01414 case S_SHELL:
01415 value = _mm_add_ps(value, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), contracted_gto));
01416 break;
01417
01418 case P_SHELL:
01419 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), xdist));
01420 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), ydist));
01421 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), zdist));
01422 value = _mm_add_ps(value, _mm_mul_ps(tmpshell, contracted_gto));
01423 break;
01424
01425 case D_SHELL:
01426 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), xdist2));
01427 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(xdist, ydist)));
01428 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), ydist2));
01429 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(xdist, zdist)));
01430 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(ydist, zdist)));
01431 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), zdist2));
01432 value = _mm_add_ps(value, _mm_mul_ps(tmpshell, contracted_gto));
01433 break;
01434
01435 case F_SHELL:
01436 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(xdist2, xdist)));
01437 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(xdist2, ydist)));
01438 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(ydist2, xdist)));
01439 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(ydist2, ydist)));
01440 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(xdist2, zdist)));
01441 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(_mm_mul_ps(xdist, ydist), zdist)));
01442 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(ydist2, zdist)));
01443 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(zdist2, xdist)));
01444 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(zdist2, ydist)));
01445 tmpshell = _mm_add_ps(tmpshell, _mm_mul_ps(_mm_load_ps1(&wave_f[ifunc++]), _mm_mul_ps(zdist2, zdist)));
01446 value = _mm_add_ps(value, _mm_mul_ps(tmpshell, contracted_gto));
01447 break;
01448
01449 #if 0
01450 default:
01451
01452 int i, j;
01453 float xdp, ydp, zdp;
01454 float xdiv = 1.0f / xdist;
01455 for (j=0, zdp=1.0f; j<=shelltype; j++, zdp*=zdist) {
01456 int imax = shelltype - j;
01457 for (i=0, ydp=1.0f, xdp=pow(xdist, imax); i<=imax; i++, ydp*=ydist, xdp*=xdiv) {
01458 tmpshell += wave_f[ifunc++] * xdp * ydp * zdp;
01459 }
01460 }
01461 value += tmpshell * contracted_gto;
01462 #endif
01463 }
01464
01465 shell_counter++;
01466 }
01467 }
01468
01469
01470 if (density) {
01471 __m128 mask = _mm_cmplt_ps(value, _mm_setzero_ps());
01472 __m128 sqdensity = _mm_mul_ps(value, value);
01473 __m128 orbdensity = sqdensity;
01474 __m128 nsqdensity = _mm_and_ps(sqdensity, mask);
01475 orbdensity = _mm_sub_ps(orbdensity, nsqdensity);
01476 orbdensity = _mm_sub_ps(orbdensity, nsqdensity);
01477 _mm_storeu_ps(&orbitalgrid[gaddrzy + nx], orbdensity);
01478 } else {
01479 _mm_storeu_ps(&orbitalgrid[gaddrzy + nx], value);
01480 }
01481 }
01482 }
01483 }
01484
01485 return 0;
01486 }
01487
01488 #endif
01489
01490
01491
01492
01493
01494
01495 typedef struct {
01496 int numatoms;
01497 const float *wave_f;
01498 const float *basis_array;
01499 const float *atompos;
01500 const int *atom_basis;
01501 const int *num_shells_per_atom;
01502 const int *num_prim_per_shell;
01503 const int *shell_types;
01504 const int *numvoxels;
01505 float voxelsize;
01506 int density;
01507 const float *origin;
01508 float *orbitalgrid;
01509 } orbthrparms;
01510
01511
01512 extern "C" void * orbitalthread(void *voidparms) {
01513 int numvoxels[3];
01514 float origin[3];
01515 orbthrparms *parms = NULL;
01516 wkf_threadlaunch_getdata(voidparms, (void **) &parms);
01517
01518 numvoxels[0] = parms->numvoxels[0];
01519 numvoxels[1] = parms->numvoxels[1];
01520 numvoxels[2] = 1;
01521
01522 origin[0] = parms->origin[0];
01523 origin[1] = parms->origin[1];
01524
01525
01526 int planesize = numvoxels[0] * numvoxels[1];
01527 wkf_tasktile_t tile;
01528 while (wkf_threadlaunch_next_tile(voidparms, 2, &tile) != WKF_SCHED_DONE) {
01529 int k;
01530 for (k=tile.start; k<tile.end; k++) {
01531 origin[2] = parms->origin[2] + parms->voxelsize * k;
01532 #if VMDORBUSESSE & defined(__SSE2__)
01533 evaluate_grid_sse(parms->numatoms,
01534 parms->wave_f, parms->basis_array,
01535 parms->atompos,
01536 parms->atom_basis,
01537 parms->num_shells_per_atom,
01538 parms->num_prim_per_shell,
01539 parms->shell_types,
01540 numvoxels,
01541 parms->voxelsize,
01542 origin,
01543 parms->density,
01544 parms->orbitalgrid + planesize*k);
01545 #else
01546 evaluate_grid(parms->numatoms,
01547 parms->wave_f, parms->basis_array,
01548 parms->atompos,
01549 parms->atom_basis,
01550 parms->num_shells_per_atom,
01551 parms->num_prim_per_shell,
01552 parms->shell_types,
01553 numvoxels,
01554 parms->voxelsize,
01555 origin,
01556 parms->density,
01557 parms->orbitalgrid + planesize*k);
01558 #endif
01559 }
01560 }
01561
01562 return NULL;
01563 }
01564
01565
01566 int evaluate_grid_fast(int numatoms,
01567 const float *wave_f,
01568 const float *basis_array,
01569 const float *atompos,
01570 const int *atom_basis,
01571 const int *num_shells_per_atom,
01572 const int *num_prim_per_shell,
01573 const int *shell_types,
01574 const int *numvoxels,
01575 float voxelsize,
01576 const float *origin,
01577 int density,
01578 float * orbitalgrid) {
01579 int rc=0;
01580 orbthrparms parms;
01581
01582 parms.numatoms = numatoms;
01583 parms.wave_f = wave_f;
01584 parms.basis_array = basis_array;
01585 parms.atompos = atompos;
01586 parms.atom_basis = atom_basis;
01587 parms.num_shells_per_atom = num_shells_per_atom;
01588 parms.num_prim_per_shell = num_prim_per_shell;
01589 parms.shell_types = shell_types;
01590 parms.numvoxels = numvoxels;
01591 parms.voxelsize = voxelsize;
01592 parms.origin = origin;
01593 parms.density = density;
01594 parms.orbitalgrid = orbitalgrid;
01595
01596 #if defined(VMDTHREADS)
01597 int numprocs = wkf_thread_numprocessors();
01598 #else
01599 int numprocs = 1;
01600 #endif
01601
01602
01603 wkf_tasktile_t tile;
01604 tile.start = 0;
01605 tile.end = numvoxels[2];
01606 rc = wkf_threadlaunch(numprocs, &parms, orbitalthread, &tile);
01607
01608 return rc;
01609 }
01610
01611
01612 void Orbital::print_wavefunction() {
01613
01614
01615 #if !(defined(_MSC_VER) || defined(ARCH_IRIX6) || defined(ARCH_IRIX6_64))
01616 char shellname[6] = {'S', 'P', 'D', 'F', 'G', 'H'};
01617 int ifunc = 0;
01618 int at;
01619 int shell;
01620 for (at=0; at<numatoms; at++) {
01621 for (shell=0; shell < num_shells_per_atom[at]; shell++) {
01622 int shelltype = basis_set[at].shell[shell].type;
01623
01624
01625 int i, j, iang=0;
01626 float xdist=2.0;
01627 float ydist=2.0;
01628 float zdist=2.0;
01629 float xdp, ydp, zdp;
01630 float xdiv = 1.0f / xdist;
01631 for (j=0, zdp=1.0f; j<=shelltype; j++, zdp*=zdist) {
01632 int imax = shelltype - j;
01633 for (i=0, ydp=1.0f, xdp=pow(xdist, imax); i<=imax; i++, ydp*=ydist, xdp*=xdiv) {
01634 printf("%3i %c", at, shellname[shelltype]);
01635 int k, m=0;
01636 char buf[20]; buf[0] = '\0';
01637 for (k=0; k<(int)log2f(xdp); k++, m++) sprintf(buf+m, "x");
01638 for (k=0; k<(int)log2f(ydp); k++, m++) sprintf(buf+m, "y");
01639 for (k=0; k<(int)log2f(zdp); k++, m++) sprintf(buf+m, "z");
01640
01641 printf("%-5s (%1.0f%1.0f%1.0f) wave_f[%3i] = % 11.6f\n", buf,
01642 log2f(xdp), log2f(ydp), log2f(zdp), ifunc, wave_f[ifunc]);
01643
01644 iang++;
01645 ifunc++;
01646 }
01647 }
01648 }
01649 }
01650 #endif
01651
01652 }