/*************************************************************************/
/*                                                                       */
/*  Copyright (c) 1994 Stanford University                               */
/*                                                                       */
/*  All rights reserved.                                                 */
/*                                                                       */
/*  Permission is given to use, copy, and modify this software for any   */
/*  non-commercial purpose as long as this copyright notice is not       */
/*  removed.  All other uses, including redistribution in whole or in    */
/*  part, are forbidden without prior written permission.                */
/*                                                                       */
/*  This software is provided with absolutely no warranty and no         */
/*  support.                                                             */
/*                                                                       */
/* --------------------------------------------------------------------- */
/*                                                                       */
/*  Modifications of the original Barnes-Hut code (as taken from         */
/*  Stanford's SPLASH-2 distribution) to allow use on Alewife and        */
/*  with CRL are copyright:                                              */
/*                                                                       */
/*  Copyright (C) 1995 Massachusetts Institute of Technology             */
/*                                                                       */
/*************************************************************************/

#include "code.h"
#include <math.h>

void    loadtree(bodyptr, unsigned);
bool    intcoord(int [], vector);
int     subindex(int [], int);
cellptr SubdivideLeaf(leafptr, cellptr, unsigned, unsigned);
void    hackcofm(int, unsigned);

cellptr makecell(unsigned);
leafptr makeleaf(unsigned);
cellptr InitCell(cellptr, unsigned);
leafptr InitLeaf(cellptr, unsigned);


/* initialize tree structure for hack force calculation.
 */

void maketree(unsigned ProcessId)
{
  int     pidx;
  bodyptr p;

  Local.myncell = 0;
  Local.mynleaf = 0;

  if (ProcessId == 0)
  {
#if defined(USE_CRL)
    Local.mycelltab[Local.myncell++] = rgn_rid(g_root);
#else
    Local.mycelltab[Local.myncell++] = g_root;
#endif
  }

  for (pidx=0; pidx<Local.mynbody; pidx++)
  {
#if defined(USE_CRL)
    p = (bodyptr) rgn_map(Local.mybodytab[pidx]);
    rgn_start_write(p);
#else
    p = Local.mybodytab[pidx];
#endif

    if (Mass(p) != 0.0)
      loadtree(p, ProcessId);
    else
      fprintf(stderr, "Process %d found body 0x%08x to have zero mass\n",
	      ProcessId, (int) p);

#if defined(USE_CRL)
    rgn_end_write(p);
    rgn_unmap(p);
#endif
  }

#if defined(USE_CRL)
  rgn_barrier();
#else
  shm_barrier();
#endif

  hackcofm(0, ProcessId);

#if defined(USE_CRL)
  rgn_barrier();
#else
  shm_barrier();
#endif
}


/* descend tree and insert particle
 *
 * (p is a mapped region with a write operation in progress)
 */

void loadtree(bodyptr p, unsigned ProcessId)
{
  int     level, flag;
  int     xp[NDIM];
  int     kidIndex;
  cellptr curr;
  cellptr next;
  cellptr new;
  leafptr le;

  intcoord(xp, Pos(p));

  /* the SPLASH-2 version of barnes goes through a bunch of
   * machinations at this point that look like an attempt to verify
   * that p could plausibly be a member of a tree rooted at the root
   * node that used to get passed in, or something like that. since
   * this code does nothing except complain if an inappropriate node
   * is found (and none ever seems to be), i leave it out in this
   * version.
   */

#if defined(USE_CRL)
  curr = (cellptr) rgn_map(rgn_rid(g_root));
  rgn_start_read(curr);
#else
  curr = g_root;
#endif

  level    = Level(curr);
  kidIndex = subindex(xp, level);
  level  >>= 1;

  flag = TRUE;
  while (flag)
  {
    if (level == 0)
      error("not enough levels in tree\n");

    /* INVARIANT:
     *  curr is mapped with a read in progress
     */

    if (Subp(curr)[kidIndex] == 0)
    {
      /* no child exists for the desired kidIndex yet, so restart
       * operation on curr as a write so we can try to insert the new
       * node
       */
#if defined(USE_CRL)
      rgn_end_read(curr);
      rgn_start_write(curr);
#else
      acquire_lock(&(curr->cell_lock));
#endif

      if (Subp(curr)[kidIndex] == 0)
      {
	/* still no child for the desired kidIndex, so go ahead and
	 * insert new node
	 */
	le = InitLeaf((cellptr) curr, ProcessId);

#if defined(USE_CRL)
	Parent(p)   = rgn_rid(le);
#else
	Parent(p)   = (nodeptr) le;
#endif
	Level(p)    = level;
	ChildNum(p) = le->num_bodies;

	ChildNum(le) = kidIndex;
#if defined(USE_CRL)
	Bodyp(le)[le->num_bodies++] = rgn_rid(p);
	Subp(curr)[kidIndex] = rgn_rid(le);
#else
	Bodyp(le)[le->num_bodies++] = p;
	Subp(curr)[kidIndex] = (nodeptr) le;
#endif

#if defined(USE_CRL)
	rgn_end_write(le);
	rgn_unmap(le);
	rgn_end_write(curr);
	rgn_unmap(curr);
#endif

	flag = FALSE;
      }
      else
      {
	/* somebody beat us to the punch inserting a node for the
	 * desired kidIndex, so restart operation on curr as a read
	 * and continue
	 */
#if defined(USE_CRL)
	rgn_end_write(curr);
	rgn_start_read(curr);
#endif
      }

#if !defined(USE_CRL)
      release_lock(&(curr->cell_lock));
#endif
    }

    /* INVARIANT: exactly one of:
     *  flag is FALSE, nothing mapped
     *  flag is TRUE, curr is mapped with a read in progress
     */

    if (flag)
    {
#if defined(USE_CRL)
      next = (cellptr) rgn_map(Subp(curr)[kidIndex]);
      rgn_end_read(curr);
      rgn_start_read(next);
#else
      next = (cellptr) Subp(curr)[kidIndex];
#endif

      if (Type(next) == LEAF)
      {
	/* reached a leaf, so lock parent (by initiating a write
	 * operation) so we can try to insert the new node
	 */
#if defined(USE_CRL)
	rgn_end_read(next);
	rgn_unmap(next);
	rgn_start_write(curr);
	next = (cellptr) rgn_map(Subp(curr)[kidIndex]);
	rgn_start_write(next);
#else
	acquire_lock(&(curr->cell_lock));
	next = (cellptr) Subp(curr)[kidIndex];
#endif

	/* INVARIANT:
	 *  curr and next mapped with writes in progress
	 */

	if (Type(next) == LEAF)
	{
	  /* child is still a leaf, try to insert
	   */
	  le = (leafptr) next;

	  if (le->num_bodies == MAX_BODIES_PER_LEAF)
	  {
	    /* ack, leaf is full, so subdivide it (and go around the
	     * big while loop again)
	     */
	    new = SubdivideLeaf(le, curr, level, ProcessId);
#if defined(USE_CRL)
	    Subp(curr)[kidIndex] = rgn_rid(new);
#else
	    Subp(curr)[kidIndex] = (nodeptr) new;
#endif

#if defined(USE_CRL)
	    rgn_end_write(curr);
#endif

	    /* SubdivideLeaf() takes care of ending write operation on
	     * next and unmapping it
	     */

	    next = new;
#if defined(USE_CRL)
	    rgn_start_read(next);
#endif
	  }
	  else
	  {
	    /* insert body into leaf
	     */
#if defined(USE_CRL)
	    Parent(p)   = rgn_rid(le);
#else
	    Parent(p)   = (nodeptr) le;
#endif
	    Level(p)    = level;
	    ChildNum(p) = le->num_bodies;
#if defined(USE_CRL)
	    Bodyp(le)[le->num_bodies++] = rgn_rid(p);
#else
	    Bodyp(le)[le->num_bodies++] = p;
#endif

#if defined(USE_CRL)
	    rgn_end_write(curr);
	    rgn_unmap(curr);
	    rgn_end_write(next);
	    rgn_unmap(next);
#endif

	    flag = FALSE;
	  }
	}
	else
	{
	  /* child is no longer a leaf, someobdy must have beat us to
	   * the punch
	   */
#if defined(USE_CRL)
	  rgn_end_write(curr);
	  rgn_end_write(next);
	  rgn_start_read(next);
#endif
	}

#if !defined(USE_CRL)
	release_lock(&(curr->cell_lock));
#endif
      }
    }

    /* INVARIANT: exactly one of:
     *  flag is FALSE, nothing mapped
     *  flag is TRUE, curr is mapped, next is mapped with read in progress
     */

    if (flag)
    {
      /* move one level down the tree
       */
#if defined(USE_CRL)
      rgn_unmap(curr);
#endif
      curr     = next;
      kidIndex = subindex(xp, level);
      level    = level >> 1;
    }
  }
}


/* compute integerized coordinates
 * returns: TRUE unless rp was out of bounds
 */

bool intcoord(int xp[], vector rp)
{
  int    k;
  bool   inb;
  double xsc;

  inb = TRUE;

  for (k=0; k<NDIM; k++)
  {
    xsc = (rp[k] - rmin[k]) / rsize; 
    if ((0.0 <= xsc) && (xsc < 1.0))
      xp[k] = floor(IMAX * xsc);
    else
      inb = FALSE;
  }

  return inb;
}


/* determine which subcell to select
 */

int subindex(int x[NDIM], int l)
{
  int i, k;
  int yes;

  i   = 0;
  yes = FALSE;

  if (x[0] & l)
  {
    i  += NSUB >> 1;
    yes = TRUE;
  }

  for (k=1; k<NDIM; k++)
  {
    if (((x[k] & l) && !yes) || (!(x[k] & l) && yes))
    { 
      i  += NSUB >> (k + 1);
      yes = TRUE;
    }
    else
    {
      yes = FALSE;
    }
  }

  return i;
}


/* le and parent are mapped regions with write operations in progress
 * (before returning, end write operation on le and unmap it)
 */
cellptr SubdivideLeaf(leafptr le, cellptr parent, unsigned level, unsigned ProcessId)
{
  int     i, index;
  int     num_bodies;
  int     xp[NDIM];
  cellptr c;
#if defined(USE_CRL)
  rid_t   bodies[MAX_BODIES_PER_LEAF];
#else
  bodyptr bodies[MAX_BODIES_PER_LEAF];
#endif
  bodyptr p;

  /* first copy leaf's bodies to temp array, so we can reuse the leaf
   */
  num_bodies = le->num_bodies;
  for (i=0; i<num_bodies; i++)
  {
    bodies[i] = Bodyp(le)[i];
    Bodyp(le)[i] = 0;
  }
  le->num_bodies = 0;

  /* create the parent cell for this subtree
   */
  c = InitCell(parent, ProcessId);
  ChildNum(c) = ChildNum(le);

  /* do first particle separately, so we can reuse le
   */
#if defined(USE_CRL)
  p = (bodyptr) rgn_map(bodies[0]);
  rgn_start_write(p);
#else
  p = bodies[0];
#endif

  intcoord(xp, Pos(p));
  index = subindex(xp, level);
#if defined(USE_CRL)
  Subp(c)[index] = rgn_rid(le);
#else
  Subp(c)[index] = (nodeptr) le;
#endif
  ChildNum(le)   = index;
#if defined(USE_CRL)
  Parent(le)     = rgn_rid(c);
#else
  Parent(le)     = (nodeptr) c;
#endif
  Level(le)      = level >> 1;

  /* set stuff for body
   */
#if defined(USE_CRL)
  Parent(p)   = rgn_rid(le);
#else
  Parent(p)   = (nodeptr) le;
#endif
  ChildNum(p) = le->num_bodies;
  Level(p)    = level >> 1;

  /* insert the body
   */
#if defined(USE_CRL)
  Bodyp(le)[le->num_bodies++] = rgn_rid(p);
#else
  Bodyp(le)[le->num_bodies++] = p;
#endif

#if defined(USE_CRL)
  rgn_end_write(le);
  rgn_unmap(le);
  rgn_end_write(p);
  rgn_unmap(p);
#endif

  /* now handle the rest
   */
  for (i=1; i<num_bodies; i++)
  {
#if defined(USE_CRL)
    p = (bodyptr) rgn_map(bodies[i]);
    rgn_start_write(p);
#else
    p = bodies[i];
#endif

    intcoord(xp, Pos(p));
    index = subindex(xp, level);

    if (Subp(c)[index] == 0)
    {
      le = InitLeaf(c, ProcessId);
      ChildNum(le)   = index;
#if defined(USE_CRL)
      Subp(c)[index] = rgn_rid(le);
#else
      Subp(c)[index] = (nodeptr) le;
#endif
    }
    else
    {
#if defined(USE_CRL)
      le = (leafptr) rgn_map(Subp(c)[index]);
      rgn_start_write(le);
#else
      le = (leafptr) Subp(c)[index];
#endif
    }

#if defined(USE_CRL)
    Parent(p)   = rgn_rid(le);
#else
    Parent(p)   = (nodeptr) le;
#endif
    ChildNum(p) = le->num_bodies;
    Level(p)    = level >> 1;
#if defined(USE_CRL)
    Bodyp(le)[le->num_bodies++] = rgn_rid(p);
#else
    Bodyp(le)[le->num_bodies++] = p;
#endif

#if defined(USE_CRL)
    rgn_end_write(le);
    rgn_unmap(le);
    rgn_end_write(p);
    rgn_unmap(p);
#endif
  }

#if defined(USE_CRL)
  rgn_end_write(c);
#endif

  return c;
}


/* descend tree finding center-of-mass coordinates
 */

void hackcofm(int nc, unsigned ProcessId)
{
  int     i;
  int     lidx, cidx;
  nodeptr r;
  leafptr l;
  bodyptr p;
  cellptr q;
  vector  tmpv;
#ifdef QUADPOLE
  vector  dr;
  real    drsq;
  matrix  drdr, Idrsq, tmpm;
#endif

  /* get a cell using get*sub. Cells are got in reverse of the order
   * in the cell array; i.e. reverse of the order in which they were
   * created this way, we look at child cells before parents
   */

  for (lidx=(Local.mynleaf-1); lidx>=0; lidx--)
  {
#if defined(USE_CRL)
    l = (leafptr) rgn_map(Local.myleaftab[lidx]);
    rgn_start_write(l);
#else
    l = Local.myleaftab[lidx];
#endif

    Mass(l) = 0.0;
    Cost(l) = 0;
    CLRV(Pos(l));

    for (i=0; i<l->num_bodies; i++)
    {
#if defined(USE_CRL)
      p = (bodyptr) rgn_map(Bodyp(l)[i]);
      rgn_start_read(p);
#else
      p = Bodyp(l)[i];
#endif

      Mass(l) += Mass(p);
      Cost(l) += Cost(p);
      MULVS(tmpv, Pos(p), Mass(p));
      ADDV(Pos(l), Pos(l), tmpv);

#if defined(USE_CRL)
      rgn_end_read(p);
      rgn_unmap(p);
#endif
    }
    DIVVS(Pos(l), Pos(l), Mass(l));

#ifdef QUADPOLE
    CLRM(Quad(l));
    for (i=0; i<l->num_bodies; i++)
    {
#if defined(USE_CRL)
      p = (bodyptr) rgn_map(Bodyp(l)[i]);
      rgn_start_read(p);
#else
      p = Bodyp(l)[i];
#endif

      SUBV(dr, Pos(p), Pos(l));
      OUTVP(drdr, dr, dr);
      DOTVP(drsq, dr, dr);
      SETMI(Idrsq);
      MULMS(Idrsq, Idrsq, drsq);
      MULMS(tmpm, drdr, 3.0);
      SUBM(tmpm, tmpm, Idrsq);
      MULMS(tmpm, tmpm, Mass(p));
      ADDM(Quad(l), Quad(l), tmpm);

#if defined(USE_CRL)
      rgn_end_read(p);
      rgn_unmap(p);
#endif
    }
#endif

    Done(l) = TRUE;

#if defined(USE_CRL)
    rgn_end_write(l);
    rgn_unmap(l);
#endif
  }

  for (cidx=(Local.myncell-1); cidx>=0; cidx--)
  {
#if defined(USE_CRL)
    q = rgn_map(Local.mycelltab[cidx]);
    rgn_start_write(q);
#else
    q = Local.mycelltab[cidx];
#endif

    Mass(q) = 0.0;
    Cost(q) = 0;
    CLRV(Pos(q));

    for (i=0; i<NSUB; i++)
    {
      if (Subp(q)[i] != 0)
      {
#if defined(USE_CRL)
	r = rgn_map(Subp(q)[i]);
	rgn_start_write(r);
#else
	r = Subp(q)[i];
#endif

	while (!Done(r))
	{
#if defined(USE_CRL)
	  rgn_end_write(r);
	  rgn_start_write(r);
#endif
	}

	Mass(q) += Mass(r);
	Cost(q) += Cost(r);
	MULVS(tmpv, Pos(r), Mass(r));
	ADDV(Pos(q), Pos(q), tmpv);
	Done(r) = FALSE;

#if defined(USE_CRL)
	rgn_end_write(r);
	rgn_unmap(r);
#endif
      }
    }
    DIVVS(Pos(q), Pos(q), Mass(q));

#ifdef QUADPOLE
    CLRM(Quad(q));
    for (i=0; i<NSUB; i++)
    {
      if (Subp(q)[i] != 0)
      {
#if defined(USE_CRL)
	r = rgn_map(Subp(q)[i]);
	rgn_start_read(r);
#else
	r = Subp(q)[i];
#endif

	SUBV(dr, Pos(r), Pos(q));
	OUTVP(drdr, dr, dr);
	DOTVP(drsq, dr, dr);
	SETMI(Idrsq);
	MULMS(Idrsq, Idrsq, drsq);
	MULMS(tmpm, drdr, 3.0);
	SUBM(tmpm, tmpm, Idrsq);
	MULMS(tmpm, tmpm, Mass(r));
	ADDM(tmpm, tmpm, Quad(r));
	ADDM(Quad(q), Quad(q), tmpm);

#if defined(USE_CRL)
	rgn_end_read(r);
	rgn_unmap(r);
#endif
      }
    }
#endif

    Done(q) = TRUE;

#if defined(USE_CRL)
    rgn_end_write(q);
    rgn_unmap(q);
#endif
  }
}


/* allocate and initialize a new cell (returns cell with write
 * operation in progress)
 * 
 * (parent is either NULL or a mapped region with a read operation in
 * progress)
 */
cellptr InitCell(cellptr parent, unsigned ProcessId)
{
  cellptr c;

  c = makecell(ProcessId);

  if (parent == NULL)
  {
    Level(c)  = IMAX >> 1;
    Parent(c) = 0;
  }
  else
  {
    Level(c)  = Level(parent) >> 1;
#if defined(USE_CRL)
    Parent(c) = rgn_rid(parent);
#else
    Parent(c) = (nodeptr) parent;
#endif
  }

  ChildNum(c) = 0;

  return c;
}


/* allocate and initialize a new leaf (returns leaf with write
 * operation in progress)
 * 
 * (parent is either NULL or a mapped region with a read operation in
 * progress)
 */
leafptr InitLeaf(cellptr parent, unsigned ProcessId)
{
  leafptr l;

  l = makeleaf(ProcessId);

  if (parent == NULL)
  {
    Level(l)  = IMAX >> 1;
    Parent(l) = 0;
  }
  else
  {
    Level(l)  = Level(parent) >> 1;
#if defined(USE_CRL)
    Parent(l) = rgn_rid(parent);
#else
    Parent(l) = (nodeptr) parent;
#endif
  }

  ChildNum(l) = 0;

  return l;
}


/* allocate new cell (returns cell with write operation in progress)
 */

cellptr makecell(unsigned ProcessId)
{
  int     i;
  int     mycell;
#if defined(USE_CRL)
  rid_t   c_tmp;
#else
  cellptr c_tmp;
#endif
  cellptr c;

  if (Local.mynumcell == maxmycell)
    error("makecell: Proc %d needs more than %d cells; increase fcells\n",
	  ProcessId, maxmycell);

  mycell = Local.mynumcell++;
#if defined(USE_CRL)
  c_tmp = Local.ctab[mycell];
#else
  c_tmp = &(Local.ctab[mycell]);
#endif
  Local.mycelltab[Local.myncell++] = c_tmp;

#if defined(USE_CRL)
  c = (cellptr) rgn_map(c_tmp);
  rgn_start_write(c);
#else
  c = c_tmp;
#endif

  Type(c) = CELL;
  Done(c) = FALSE;
  Mass(c) = 0.0;

  for (i=0; i<NSUB; i++)
    Subp(c)[i] = 0;

  return c;
}


/* allocate new leaf (returns leaf with write operation in progress)
 */

leafptr makeleaf(unsigned ProcessId)
{
  int     i;
  int     myleaf;
#if defined(USE_CRL)
  rid_t   le_tmp;
#else
  leafptr le_tmp;
#endif
  leafptr le;

  if (Local.mynumleaf == maxmyleaf)
    error("makeleaf: Proc %d needs more than %d leaves; increase fleaves\n",
	  ProcessId, maxmyleaf);

  myleaf = Local.mynumleaf++;
#if defined(USE_CRL)
  le_tmp = Local.ltab[myleaf];
#else
  le_tmp = &(Local.ltab[myleaf]);
#endif
  Local.myleaftab[Local.mynleaf++] = le_tmp;

#if defined(USE_CRL)
  le = (leafptr) rgn_map(le_tmp);
  rgn_start_write(le);
#else
  le = le_tmp;
#endif

  Type(le) = LEAF;
  Done(le) = FALSE;
  Mass(le) = 0.0;
  le->num_bodies = 0;

  for (i=0; i<MAX_BODIES_PER_LEAF; i++)
    Bodyp(le)[i] = 0;

  return le;
}
