/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, SCAI.LAB                        *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Jan 92                                                   *
*  Last Update : Oct 93                                                   *
*                                                                         *
*  This Module is part of the DALIB                                       *
*                                                                         *
*  MODULE : reduction.m4                                                  *
*                                                                         *
*  Function: Realization of Global Reductions between Processors          *
*                                                                         *
*  Export :  DALIB                                                        *
*  ===============                                                        *
*                                                                         *
*  Export :  FORTRAN Interface                                            *
*  ===========================                                            *
*                                                                         *
*   void FUNCTION(dalib_reduction) (char *data, int *op)                  *
*                                                                         *
*   void FUNCTION(dalib_pos_reduction) (char *data, int *op)              * 
*   void FUNCTION(dalib_loc_exchange) (char *data, int *size)             * 
*                                                                         *
*   void FUNCTION(dalib_top_reduction) (int *topid, int *topdim,          *
*                             char *data, int *size)                      *
*                                                                         *
**************************************************************************/

#include "dalib.h"

#undef DEBUG

/*******************************************************************
*                                                                  *
*  following routines are imported from (n)combiners.m4            *
*                                                                  *
*******************************************************************/

extern dalib_routine *dalib_get_reduction_fn (/* int op */);  
extern dalib_routine *dalib_get_n_reduction_fn (/* int op */);  

extern dalib_routine *dalib_get_pos_reduction_fn ();  /* from combiners.m4 */

/*******************************************************************
*                                                                  *
*  FORTRAN - Interface                                             *
*                                                                  *
*******************************************************************/

     /*********************************************************
     *                                                        *
     *  dalib_nreduction (char *dat, int *op, int *counter)   *
     *                                                        *
     *********************************************************/

void FUNCTION(dalib_nreduction) (dat, op, counter) 
unsigned char *dat;
int *op;
int *counter;
 
{ int gid;
  int no_elems;      /* vector length of reduction */
  int elem_size;     /* element size of reduction  */

  dalib_routine *f_n_reduction;

  gid = dalib_context_group ();

  no_elems  = *counter;
  elem_size = dalib_get_reduction_size (*op);

  f_n_reduction = dalib_get_n_reduction_fn (*op);
  dalib_set_n_reduction_elems (no_elems);           /* must be set */

  dalib_group_reduce (dat, no_elems * elem_size, f_n_reduction, gid);

} /* dalib_nreduction */

     /*********************************************************
     *                                                        *
     *  dalib_reduction (char *dat, int *op)                  *
     *                                                        *
     *********************************************************/

void FUNCTION(dalib_reduction) (dat, op) 

void *dat;   /* can be of any type, e.g. REAL, INTEGER, ... */
int  *op;

{ int gid;
  int elem_size;

  dalib_routine *f_reduction;    /* reduction function */

  elem_size   = dalib_get_reduction_size (*op);
  f_reduction = dalib_get_reduction_fn   (*op);

  gid = dalib_context_group ();

  dalib_group_reduce (dat, elem_size, f_reduction, gid);

} /* dalib_reduction */

/*******************************************************************
*                                                                  *
*  subroutine DALIB_BARRIER ()                                     *
*                                                                  *
*   - barrier on the active processor subset                       *
*                                                                  *
*******************************************************************/

void FUNCTION(dalib_barrier) () 

{ int group_id;

  group_id = dalib_context_group ();

  dalib_group_barrier (group_id);

} /* dalib_barrier */

     /*********************************************************
     *                                                        *
     *  dalib_top_nreduction (int *topid, int *dim,           *
     *                      char *dat, int *op, int *counter) *
     *                                                        *
     *********************************************************/

void dalib_top_nreduction (topid, dim, dat, op, no_elems)
int topid, dim;
unsigned char *dat;
int op, no_elems;

{ int gid;
  int elem_size;

  dalib_routine *f_n_reduction;

  gid = dalib_top_subgroup (topid, dim);

  elem_size     = dalib_get_reduction_size (op);
  f_n_reduction = dalib_get_n_reduction_fn (op);

  dalib_set_n_reduction_elems (no_elems);           /* must be set */

#ifdef DEBUG
  printf ("reductions of %d x %d, gid = %d, dat = %d\n",
          no_elems, elem_size, gid, dat);
#endif

  dalib_group_reduce (dat, elem_size * no_elems, f_n_reduction, gid);

} /* dalib_top_nreduction */

     /*********************************************************
     *                                                        *
     *  dalib_top_reduction (int *topid, int *dim,            *
     *                      char *dat, int *op, int *counter) *
     *                                                        *
     *********************************************************/

void FUNCTION(dalib_top_reduction) (topid, dim, dat, op) 

int  *topid, *dim;
void *dat;           /* reduction data */
int  *op;

{ int gid;           /* processor group involved in reduction */
  int real_topid;    /* identifier for topology               */
  int elem_size;

  dalib_routine *f_reduction;    /* reduction function */

  elem_size   = dalib_get_reduction_size (*op);
  f_reduction = dalib_get_reduction_fn   (*op);

  real_topid = *topid;

  if (real_topid <= MAX_RANK)

     /* is a default topology, get the topology from the context */

     real_topid = dalib_context_default_top (real_topid);

  gid = dalib_top_subgroup (real_topid, *dim);

  dalib_group_reduce (dat, elem_size, f_reduction, gid);

} /* dalib_top_reduction */

     /**************************************************************
     *                                                             *
     *  dalib_array_dim_reduction (array_info *array_id, int *dim, *
     *                             char *dat, int *op)             *
     *                                                             *
     **************************************************************/

void FUNCTION(dalib_array_dim_reduction) (array_id, dim, dat, op) 

array_info *array_id;

int        *dim;        /* reduction dimension */
void       *dat;        /* reduction data      */
int        *op;         /* reduction operatior */

{ int gid;
  int topid, topdim;
  int elem_size;

  dalib_routine *f_reduction;    /* reduction function */

  elem_size   = dalib_get_reduction_size (*op);
  f_reduction = dalib_get_reduction_fn   (*op);

  dalib_array_map_query (*array_id, *dim, &topid, &topdim);

  if (topid == 0) return;  /* replicated, no reduction */

  gid = dalib_top_subgroup (topid, topdim);

  dalib_group_reduce (dat, elem_size, f_reduction, gid);

} /* dalib_array_dim_reduction */

     /*********************************************************
     *                                                        *
     *  dalib_top_scan (int *topid, int *dim,                 *
     *                  char *dat, int *op, int *counter)     *
     *                                                        *
     *********************************************************/

void FUNCTION(dalib_top_scan) (topid, dim, dat, op, mask, up) 

int  *topid, *dim;
void *dat;
int  *mask;
int  *op;
int *up;

{ int gid;
  int elem_size;

  dalib_routine *f_reduction;    /* reduction function */

  elem_size   = dalib_get_reduction_size (*op);
  f_reduction = dalib_get_reduction_fn   (*op);

  gid = dalib_top_subgroup (*topid, *dim);

  if (*up == 1)  /* prefix, otherwise suffix */

    dalib_group_scan_up (dat, elem_size, f_reduction, gid, *mask);  

   else

    dalib_group_scan_down (dat, elem_size, f_reduction, gid, *mask);

} /* FUNCTION(dalib_top_scan) */ 

     /*********************************************************
     *                                                        *
     *  dalib_pos_reduction (char *dat, int *op)              *
     *                                                        *
     *********************************************************/

static dalib_routine *f_pos_reduction;

static void global_pos_reduction (data, hdata)

{ f_pos_reduction (data, hdata, data+8, hdata+8);
}

int pos_pid;    /* global used for info on position */

void FUNCTION(dalib_pos_reduction) (dat, op) 

void *dat;
int  *op;

{ char *posdat;  /* temporary space for data + its position */
  char *pdat;

  int k, gid;
  int elem_size;

  f_pos_reduction = dalib_get_pos_reduction_fn (*op);
  elem_size   = dalib_get_reduction_size (op);   /* same for pos */

#ifdef DEBUG
   printf ("%d: dalib_pos_reduction, op = %d\n", pcb.i, *op);
#endif

   /* pack dat and position into posdat */

   posdat = (char *) dalib_malloc (elem_size + 8, "process pos reduction");

   * ((int *) posdat) = pcb.i;
   pdat = (char *) dat;
   for (k=0; k<elem_size; k++) posdat[k+8] = pdat[k];

   /* reduction */

   gid = dalib_context_group ();
   dalib_group_reduce (posdat, elem_size+8, global_pos_reduction, gid);

   /* unpack dat and position into posdat */

   pos_pid = * ((int *) posdat);
   for (k=0; k<elem_size; k++)
      pdat[k] = posdat[k+8];

#ifdef DEBUG
   printf ("%d: dalib_pos_reduction, pos = %d\n", pcb.i, pos_pid);
#endif

   dalib_free (posdat, elem_size + 8);

} /* FUNCTION(dalib_pos_reduction) */ 

     /*********************************************************
     *                                                        *
     *  dalib_loc_exchange (char *data, int *size)            *
     *                                                        *
     *********************************************************/

void FUNCTION(dalib_loc_exchange) (data, size) 

void *data;
int  *size;

{ int gid;

  gid = dalib_context_group ();
  dalib_group_bcast (data, *size, pos_pid, gid);

} /* dalib_loc_exchange */

/*****************************************************************************
*                                                                            *
*  FUNCTION(dalib_top_section_reduction) (int *topid, int *topdim,           * 
*                                         section_info *section_id, int *op) *
*                                                                            *
*  - reduction of a (replicated) array section along a topology dimension    *
*                                                                            *
*****************************************************************************/

void FUNCTION(dalib_top_section_reduction) (topid, topdim, section_id, op) 

section_info *section_id;
int *topid, *topdim, *op;

{ int bytes, section_size;
  int is_new;
  int elem_size;

  void *reduction_data;

  extern dalib_routine *dalib_get_n_reduction_fn ();  /* from ncombiners.m4 */
  dalib_routine *f_n_reduction;

  int gid;

  elem_size     = dalib_get_reduction_size(*op);  /* bytes for one element */
  f_n_reduction = dalib_get_n_reduction_fn(*op);  /* get vector reduction  */

  gid = dalib_top_subgroup (*topid, *topdim);

  /* get the data of the section, copy in necessary */

  dalib_secarray_get_data (*section_id, 1, 
                           &section_size, &bytes,
                           &reduction_data, &is_new);

  dalib_set_n_reduction_elems (section_size);     /* set vector length     */

  if (bytes != elem_size)

     { dalib_internal_error ("top_section_reduction: size mismatch");
       printf ("reduction needs %d bytes for one elem, data has %d bytes\n",
                elem_size, bytes);
       dalib_stop ();
     }

#ifdef DEBUG
  printf ("top_section_reduction, top = (%d of %d), %d x %d bytes\n",
           *topid, *topdim, section_size, bytes);
#endif

  dalib_group_reduce (reduction_data, elem_size * section_size, 
                      f_n_reduction, gid);

  if (is_new)

     dalib_secarray_unpack (*section_id, reduction_data);

} /* dalib_top_section_reduction */
