/*
 * Copyright (C) 2004-2006 by Wei Wang.  All rights reserved.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "preconditioner.h"
#include "utilities.h"
#include "helper.h"

static MD_Double *workspace = NULL;

static void Nmultiply(struct Preconditioner_Tag *precond,
                      const MD_Double *u, MD_Double *v);


MD_Errcode preconditioner_init(struct Preconditioner_Tag *precond,
                               const MD_Int natoms, const MD_Double rcut,
                               MD_Dvec *pos, MD_Double *diag,
                               MD_Dvec systemsize, MD_Int **excllist,
                               const MD_Int maxneibrs, MD_Int **neibrlist,
                               MD_Int *nneibrs, const MD_Int degree)
{
  MD_Int i;

  assert(NULL != precond);
  assert(natoms >= 0 && rcut >= 0.0 && NULL != pos && NULL != diag && 
         maxneibrs >= 0 && degree >= 0);
  precond->natoms    = natoms;
  precond->rcut      = rcut;
  precond->ppos      = pos;
  precond->pdiag     = diag;
  precond->systemsize= systemsize;
  precond->maxneibrs = maxneibrs;
  precond->degree    = degree;
  precond->pexcllist = excllist;
  precond->coarse_nneibrs   = nneibrs;
  precond->coarse_neibrlist = neibrlist;

  if (precond->degree > 0) {
    precond->nneibrs = my_calloc((size_t)precond->natoms, 
				 sizeof(MD_Int),
				 "# of neighbors array");
    precond->neibrlist = my_calloc((size_t)precond->natoms, 
				   sizeof(MD_Int *), 
				   "neighborlist in preconditioner");
    for (i = 0; i < precond->natoms; i++) {
      precond->neibrlist[i] = my_calloc((size_t)precond->maxneibrs, 
					sizeof(MD_Int), "neibri");
    }
    workspace = my_calloc((size_t)precond->natoms*3, sizeof(MD_Double), 
                          "workspace"); 
  }

  printf("Preconditioner specification: \n");
  printf("  cutoff radius : %f\n", precond->rcut);
  printf("  inverse degree: %d\n", precond->degree);
  printf("  max. neibrs   : %d\n", maxneibrs);
  return OK; 
}


MD_Errcode preconditioner_destroy(struct Preconditioner_Tag *precond)
{
  if (precond->degree > 0) {
    MD_Int i;
    for (i = 0; i < precond->natoms; i++) free(precond->neibrlist[i]);
    free(precond->neibrlist);
    free(precond->nneibrs);
    free(workspace);
  }
  memset(precond, 0, sizeof(struct Preconditioner_Tag));
  return OK;
}


/* compute the neibrlist, based on a coarse-grained neibrlist */
void preconditioner_setup(struct Preconditioner_Tag *precond)
{
  const MD_Double rcut2 = precond->rcut * precond->rcut;
  const MD_Dvec *pos = precond->ppos;
  const MD_Int *nneibr = precond->coarse_nneibrs;
  MD_Int **neibrlist   = precond->coarse_neibrlist;
  MD_Int **excllist    = precond->pexcllist;
  MD_Int *mynneibrs    = precond->nneibrs;
  MD_Int **myneibrlist = precond->neibrlist;
  MD_Int *myneibrlisti, *excli;
  const MD_Dvec systemsize = precond->systemsize;
  MD_Dvec posi, rij;
  const MD_Int natoms = precond->natoms;
  MD_Int i,j, nn, jj, in; 
  MD_Int *ineibr;
#ifdef DEBUG_PRECONDITIONER
  MD_Int maxneibrs = 0;
  MD_Int totneibrs = 0;
#endif

  if (0 == precond->degree) return; /* no need to prepare */
  ASSERT(NULL != excllist);
  for (i = 0; i < natoms; i++) {
    nn = nneibr[i];
    ineibr = neibrlist[i];
    posi = pos[i];
    in = 0;
    myneibrlisti = myneibrlist[i];
    for (jj = 0; jj < nn; jj++) {
      j = ineibr[jj];
      excli = excllist[i];
      while (*excli < j) excli++;
      if (0 == *excli - j) continue;
      MD_vec_substract(posi, pos[j], rij);
      SIMPLE_BOUND_VEC(rij, systemsize);
      if (MD_vec_dot(rij, rij) < rcut2) {
        myneibrlisti[in++] = j;
        ASSERT(in < precond->maxneibrs);  /* hard boundary */
      }
    }
    mynneibrs[i] = in;
#ifdef DEBUG_PRECONDITIONER
    if (in > maxneibrs) maxneibrs = in;
    totneibrs += mynneibrs[i];
#endif
/*
    printf("atom %d has %d neibrs\n", i, in);
*/
  }

#ifdef DEBUG_PRECONDITIONER
    printf("max. neibrs is %d, avg neibrs = %f\n", maxneibrs, totneibrs / (1.0 * natoms));
#endif

  return;
}


/* compute v = N*u */
void Nmultiply(struct Preconditioner_Tag *precond,
               const MD_Double *u, MD_Double *v)
{
  const MD_Int natoms = precond->natoms;
  const MD_Int veclen = natoms * 3;
  const MD_Dvec *pos = precond->ppos;
  const MD_Int* nneibrs = precond->nneibrs;
  MD_Int **neibrlist = precond->neibrlist;
  MD_Int i, j, jj, nn;
  MD_Double inv_rsqr, inv_rcube, tmp;
  const MD_Double *ui, *uj; 
  MD_Double *vi, *vj;
  const MD_Dvec systemsize = precond->systemsize;
  MD_Dvec posi, rij;
  MD_Double vix, viy, viz;  /* reduce number of writes */
  MD_Int *ineibr;

  for (i = 0; i < veclen; i++)  v[i] = 0.0;
  for (i = 0; i < natoms; i++)  {
    ui = u + 3*i;
    vi = v + 3*i;
    nn = nneibrs[i];
    ineibr = neibrlist[i];
    posi = pos[i];
    vix = viy = viz = 0.0;
    for  (jj = 0;  jj < nn; jj++) {
      /*  N_ij = 1/r^3 * (I - 3*rr^t/r^2), r = rij */
      j = ineibr[jj];
      MD_vec_substract(posi, pos[j], rij);
      SIMPLE_BOUND_VEC(rij, systemsize);
      inv_rsqr = 1.0/MD_vec_dot(rij, rij);
      inv_rcube = inv_rsqr * sqrt(inv_rsqr);
      uj = u + 3*j;
      tmp = 3.0 * (rij.x*uj[X] + rij.y*uj[Y] + rij.z*uj[Z]) * inv_rsqr;
      vix += (uj[X] - tmp * rij.x) * inv_rcube;
      viy += (uj[Y] - tmp * rij.y) * inv_rcube;
      viz += (uj[Z] - tmp * rij.z) * inv_rcube;
      tmp = 3.0 * (rij.x*ui[X] + rij.y*ui[Y] + rij.z*ui[Z]) * inv_rsqr;
      vj = v + 3*j;
      vj[X] += (ui[X] - tmp * rij.x) * inv_rcube;
      vj[Y] += (ui[Y] - tmp * rij.y) * inv_rcube;
      vj[Z] += (ui[Z] - tmp * rij.z) * inv_rcube;
    }
    vi[X] += vix; vi[Y] += viy; vi[Z] += viz;
  }
 
  return;
}


/* the motivation is to "solve" (D+N)x = y, or x = ( (I+N*inv(D))*D )^{-1} * y,
 * where D is diagonal, and N is symmetric with 0 diagonal elements.
 * The inverse is approximated directly by a polynomial expansion:
 *       x  = inv(D) * (I - N*inv(D) + (N*inv(D))^2 - ...) * y
 * truncation after a certain number of terms gives:
 *       x0 = inv(D) * y
 *       x1 = inv(D) * (y - N*x0)
 *       x2 = inv(D) * (y - N*x1) 
 * etc. Using x0, x1 as the solution means we use the following preconditioner:
 *       M0^{-1} = inv(D)
 *       M1^{-1} = inv(D) * (I - N*inv(D))
 *       M2^{-1} = inv(D) * (I - N*inv(D) + (-N*inv(D))^2)
 * etc. Note that 
 *   (1) it is the inverse of Mi, not Mi itself, that is used in computing x. 
 *   (2) each Mi is symmetric by construction. 
 */
void preconditioner_solve(struct Preconditioner_Tag *precond,
                          const MD_Double *rhs, MD_Double *solution)
{
  const MD_Int degree = precond->degree;
  const MD_Int veclen = precond->natoms * 3;
  const MD_Double *diag = precond->pdiag;
  MD_Int i, iter;

  for (i = 0; i < veclen; i++) solution[i] = rhs[i] / diag[i];

  for (iter = 1; iter <= degree; iter++) {
    Nmultiply(precond, solution, workspace);
    for (i=0; i<veclen; i++) solution[i] = (rhs[i] - workspace[i])/diag[i];
  }

  return;
}

