/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, SCAI.LAB                        *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Jun 94                                                   *
*  Last Update : Nov 98                                                   *
*                                                                         *
*  This Module is part of the DALIB                                       *
*                                                                         *
*  Module      : group_ops.m4                                             *
*                                                                         *
*  Function: Definition of Processor Groups                               *
*                                                                         *
*  Export :  internal Interface                                           *
*  ============================                                           *
*                                                                         *
*  Updates:                                                               *
*  ========                                                               *
*                                                                         *
*   11/1998     sending/receiving with new routines gsend/greceive        *
*                                                                         *
**************************************************************************/

#include <stdio.h> 
#include "dalib.h"

#ifdef MPI
#include <mpi.h>
#endif

#undef DEBUG
#define CHECK

/*******************************************************************
*                                                                  *
*  void dalib_group_bcast (char *data, int d_size,                 *
*                          int  source, int group_id)              *
*                                                                  *
*  - broadcast of data (contiguous data of d_size) bytes in group  *
*  - source is position in group (1<=source<=group_size) that      *
*    owns the valid data                                           *
*                                                                  *
*******************************************************************/

void dalib_group_bcast (data, d_len, source, group_id)

char *data;    /* pointer to the data element              */
int  d_len;    /* size of the data element in bytes        */
int  source;   /* process source sends data of d_len bytes */
int  group_id; /* group for which broadcast is done        */

/*   Communication Patterns for broadcast of process source

     source is relative (1 <= source <= size) for the topology 

     1    2    3    4    5   src   7    8    9   10  
       <----------------------
       ------------------------------------->
       -----------------> 
       ------->            -------->
       ->        ->        ->        ->        ->

*/

{ int steph, distance;

  int NP;              /* 1, ..., NP relative ids of group */
  int NId;             /* my position in the group         */
  int Npartner;        /* partner position in the group    */

  /*  getting values of the group  */

#if defined(VT)
  dalib_trace_on (TRACE_GROUP_BCAST);
#endif

  NP = dalib_group_size (group_id);

  if ((source < 1) || (source > NP))

     { char msg[128];

       sprintf (msg, "group_bcast: invalid source (%d), groupsize(%d) = %d",
                source, group_id, NP);
       dalib_internal_error (msg);
       dalib_stop ();
     }

  NId = dalib_group_position (group_id, pcb.i); 

 /* 0 : not in group, 1<=NId<=size */

  /* return if process is not in the group */

  if (!NId) return;

#ifdef DEBUG
  printf ("%d: group_bcast (gid = %d, NP = %d, NId = %d), len=%d, d=%d\n",
           pcb.i, group_id, NP, NId, d_len, source);
#endif

        /***************************************
        *                                      *
        *  source  -> 1,  1  <-  source        *
        *                                      *
        ***************************************/

  if (source != 1)   /* send value to process 1 */

    { if (NId==source) 

         { /* I am the source, sending to first task in group */

           dalib_gsend (group_id, 1, data, d_len, 0); 
         }

      if (NId == 1) 

         { /* I am the first task in the group, recv from source */

           dalib_greceive (group_id, source, data, d_len);    
         }
    }

        /***************************************
        *                                      *
        *  compute  distance = log NP (base 2) *
        *                                      *
        ***************************************/

  distance = 1;
  while (distance < NP)    /* log NP (base 2) loop */
    distance = 2*distance;

  while (distance > 1)
    { steph = distance;
      distance = distance / 2;

           /***************************************
           *                                      *
           *  send the data to next processors    *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == 0)

         { Npartner = NId + distance;

           /* if partner son exists and not source, send the value */

           if ( (Npartner <= NP) && (Npartner != source) )

             { 
#ifdef DEBUG
               printf ("%d: bcast send data to %d [group %d: %d -> %d]\n", 
                       pcb.i, dalib_group_position (group_id, Npartner),
                       group_id, NId, Npartner);
#endif

               dalib_gsend (group_id, Npartner, data, d_len, 0);
             }
         }

           /***************************************
           *                                      *
           *  recv the data from previous procs   *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == distance)

         { Npartner = NId - distance;

           if (NId != source)

             { /* receive only if I am not the source of broadcast */

#ifdef DEBUG
               printf ("%d: recvs data from %d\n", 
                       pcb.i, dalib_group_position (group_id, Npartner));
#endif
               dalib_greceive (group_id, Npartner, data, d_len);
             }
         }
    }

  /* now all processes have in data the broadcast value */

#if defined(VT)
  dalib_trace_off (TRACE_GROUP_BCAST);
#endif

}  /* dalib_group_bcast */

/*******************************************************************
*                                                                  *
*  dalib_group_reduce (data, d_len, f, group_id)                   *
*                                                                  *
*******************************************************************/

void dalib_group_reduce (data, d_len, f_reduction, group_id)

char *data;             /* pointer to the data to be reduced       */
int  d_len;             /* size of the data element in bytes       */
void (*f_reduction) (); /* reduction operator                      */
int  group_id;          /* group for which reduction is done       */

/*   Communication Patterns for reduction of processes

     0    1    2    3    4    5    6    7    8    9   10
            <-        <-        <-        <-        <-
            <-------            <--------
            <-----------------
            <-------------------------------------

        brodcast of processor 1

       <-   --->---->---->---->---->---->---->--->

     process id 0 is exclusively reserved for the host

*/

{ int steph, distance;

  int NP;              /* 1, ..., NP relative ids of group */
  int NId;             /* my position in the group         */
  int Npartner;        /* partner position in the group    */

  char *hdata;         /* help buffer for data from others */

  /*  getting values of the group  */

  NP  = dalib_group_size (group_id);
  NId = dalib_group_position (group_id, pcb.i);

  /* 1 <= NId <= size, return if process is not in the group */

  if (!NId) return;

#ifdef DEBUG
   printf ("%d: group_reduce (gid = %d, NP = %d, NId = %d), len=%d\n",
            pcb.i, group_id, NP, NId, d_len);
#endif

  hdata = (char *) dalib_malloc (d_len, "dalib_group_reduce");

        /***************************************
        *                                      *
        *  compute  distance = log NP (base 2) *
        *                                      *
        ***************************************/

  distance = 1;
  while (distance < NP)    /* log NP (base 2) loop */

    { steph = 2 * distance;

           /***************************************
           *                                      *
           *  recv the data from next processors  *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == 0)

         { Npartner = NId + distance;

           /* if partner son exists and not source, send the value */

           if (Npartner <= NP)

             { 
#ifdef DEBUG
               printf ("%d: reduction recvs data from %d\n", 
                       pcb.i, dalib_group_element (group_id, Npartner));
#endif
               dalib_greceive (group_id, Npartner, hdata, d_len);

               f_reduction (data, hdata);

             }
         }

           /***************************************
           *                                      *
           *  send the data to previous procs     *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == distance)

         { Npartner = NId - distance;

#ifdef DEBUG
           printf ("%d: reduction send data to %d\n",
                   pcb.i, dalib_group_element (group_id, Npartner));
#endif
           dalib_gsend (group_id, Npartner, data, d_len, 0);

         }

      distance = steph;
    }

  dalib_free (hdata, d_len);

  /* now broadcast the result to all other processors of the group */

  dalib_group_bcast (data, d_len, 1, group_id);

  /* now all processes have in data the broadcast value */

}  /* dalib_group_reduce */

/*******************************************************************
*                                                                  *
*  dalib_group_scan (data, d_len, f, group_id, mask, up)           *
*                                                                  *
*******************************************************************/
 
void dalib_group_scan_up (data, d_len, f_reduction, group_id, mask)
 
char *data;             /* pointer to the data to be reduced       */
int  d_len;             /* size of the data element in bytes       */
void (*f_reduction) (); /* reduction operator                      */
int  group_id;          /* group for which reduction is done       */
int  mask;              /* true if processor has contribution      */

/*   Communication Patterns for reduction of processes

       1    2    3    4    5    6    7    8    9   10
         <-        <-        <-        <-        <-
         <-------            <--------
         <-----------------
         <-------------------------------------

*/

{ int steph, distance;

  int NP;              /* 1, ..., NP relative ids of group */
  int NId;             /* my position in the group         */
  int Npartner;        /* partner position in the group    */

  char *hdata;         /* help buffer for data from others */
  char *sdata;         /* buffer with scanned data         */

  /*  getting values of the group  */

  NP  = dalib_group_size (group_id);
  NId = dalib_group_position (group_id, pcb.i);

  /* return if process is not in the group */

  if (!NId) return;

#ifdef DEBUG
   printf ("%d: group_scan_up (gid=%d,size=%d,me=%d), len=%d\n",
            pcb.i, group_id, NP, NId, d_len);
#endif

  hdata = (char *) dalib_malloc (d_len, "group_scan_up");
  sdata = (char *) dalib_malloc (d_len, "group_scan_up");

  dalib_memcopy (sdata, data, d_len);

        /***************************************
        *                                      *
        *  compute  distance = log NP (base 2) *
        *                                      *
        ***************************************/

  distance = 1;
  while (distance < NP)    /* log NP (base 2) loop */

    { steph = 2 * distance;  /* 2, 4, 8, 16, 32, 64, ... */

           /***************************************
           *                                      *
           *  recv the data from next processors  *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == 0)

         { Npartner = NId + distance;

           /* if partner son exists and not source, send the value */

           if (Npartner <= NP)

             { 
#ifdef DEBUG
               printf ("%d recvs data from %d\n", 
                       pcb.i, dalib_group_element (group_id, Npartner));
#endif
               dalib_greceive (group_id, Npartner, hdata, d_len);

               dalib_gsend    (group_id, Npartner, sdata, d_len, 0);

               f_reduction (sdata, hdata);

               /* sdata =  a(NId)...... a(NId+steph-1)  */

             }
         }

           /***************************************
           *                                      *
           *  send the data to previous procs     *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == distance)

         { Npartner = NId - distance;

#ifdef DEBUG
           printf ("%d send data to %d\n", 
                    pcb.i, dalib_group_element (group_id, Npartner));
#endif

           dalib_gsend (group_id, Npartner, sdata, d_len, 0);

           dalib_greceive (group_id, Npartner, hdata, d_len);

           f_reduction (data, hdata);
         }

      distance = steph;
    }

  if (NId == 1)

    {  /* first processor sets its hdata to zero */

       for (steph=0; steph<d_len; steph++)
            hdata [steph] = '\0';

    }

  /* now top down for providing the correct results    */ 

  while (distance > 1)
    { steph = distance;
      distance = distance / 2;
 
           /***************************************
           *                                      *
           *  send the data to next processors    *
           *                                      *
           ***************************************/
 
      if ( ((NId-1) % steph) == 0)
 
         { Npartner = NId + distance;
 
           /* if partner son exists and not source, send the value */
 
           if (Npartner <= NP) 
 
             { 

#ifdef DEBUG
               printf ("%d sends hdata to %d\n", 
                       pcb.i, dalib_group_element (group_id, Npartner));
#endif
               dalib_gsend (group_id, Npartner, hdata, d_len, 0);
             }
         }
 
           /***************************************
           *                                      *
           *  recv the data from previous procs   *
           *                                      *
           ***************************************/
 
      if ( ((NId-1) % steph) == distance)
 
         { Npartner = NId - distance;
 
#ifdef DEBUG
           printf ("%d recvs data from %d\n", 
                   pcb.i, dalib_group_element (group_id, Npartner));
#endif
           dalib_greceive (group_id, Npartner, sdata, d_len);

           f_reduction (data,  sdata);
           f_reduction (hdata, sdata);
 
         }
    }

  free (sdata);
  free (hdata);
 
}  /* dalib_group_scan_up */


/*******************************************************************
*                                                                  *
*  dalib_group_scan_down (data, d_len, f, group_id, mask)          *
*                                                                  *
*******************************************************************/
 
void dalib_group_scan_down (data, d_len, f_reduction, group_id, mask)
 
char *data;             /* pointer to the data to be reduced       */
int  d_len;             /* size of the data element in bytes       */
void (*f_reduction) (); /* reduction operator                      */
int  group_id;          /* group for which reduction is done       */
int  mask;              /* true if processor has contribution      */

/*   Communication Patterns for reduction of processes

       1    2    3    4    5    6    7    8    9   10
         <-        <-        <-        <-        <-
         <-------            <--------
         <-----------------
         <-------------------------------------

*/

{ int steph, distance;

  int NP;              /* 1, ..., NP relative ids of group */
  int NId;             /* my position in the group         */
  int Npartner;        /* partner position in the group    */

  char *hdata;         /* help buffer for data from others */
  char *sdata;         /* buffer with scanned data         */

  /*  getting values of the group  */

  NP  = dalib_group_size (group_id);
  NId = dalib_group_position (group_id, pcb.i);

  /* return if process is not in the group */

  if (!NId) return;

#ifdef DEBUG
   printf ("%d: group_scan_down (gid=%d,size=%d,me=%d), len=%d\n",
            pcb.i, group_id, NP, NId, d_len);
#endif

  hdata = (char *) dalib_malloc (d_len, "group_scan_down");
  sdata = (char *) dalib_malloc (d_len, "group_scan_down");

  dalib_memcopy (sdata, data, d_len);

  /* start with sum = 0 */

        /***************************************
        *                                      *
        *  compute  distance = log NP (base 2) *
        *                                      *
        ***************************************/

  distance = 1;
  while (distance < NP)    /* log NP (base 2) loop */

    { steph = 2 * distance;  /* 2, 4, 8, 16, 32, 64, ... */

           /***************************************
           *                                      *
           *  recv the data from next processors  *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == 0)

         { Npartner = NId + distance;

           /* if partner son exists and not source, send the value */

           if (Npartner <= NP)

             { 

#ifdef DEBUG
               printf ("%d recvs data from %d\n", 
                   pcb.i, dalib_group_element (group_id, Npartner));
#endif
               dalib_greceive (group_id, Npartner, hdata, d_len);

               f_reduction (data, hdata);

             }
         }

           /***************************************
           *                                      *
           *  send the data to previous procs     *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == distance)

         { Npartner = NId - distance;

#ifdef DEBUG
           printf ("%d send data to %d\n", 
                   pcb.i, dalib_group_element (group_id, Npartner));
#endif

           dalib_gsend (group_id, Npartner, data, d_len, 0);

         }

      distance = steph;
    }

  if (NId == 1)

    {  /* first processor sets its hdata to zero */

       for (steph=0; steph<d_len; steph++)
            sdata [steph] = '\0';

    }

  /* now top down for providing the correct results    */ 

  while (distance > 1)
    { steph = distance;
      distance = distance / 2;
 
           /***************************************
           *                                      *
           *  send the data to next processors    *
           *                                      *
           ***************************************/
 
      if ( ((NId-1) % steph) == 0)
 
         { Npartner = NId + distance;
 
           /* if partner son exists and not source, send the value */
 
           if (Npartner <= NP) 
 
             { 
#ifdef DEBUG
               printf ("%d sends hdata to %d\n", 
                        pcb.i, dalib_group_element (group_id, Npartner));
#endif
               dalib_gsend (group_id, Npartner, sdata, d_len, 0);

               dalib_greceive (group_id, Npartner, hdata, d_len, 0);

               f_reduction (sdata, hdata);
              
             }
         }
 
           /***************************************
           *                                      *
           *  recv the data from previous procs   *
           *                                      *
           ***************************************/
 
      if ( ((NId-1) % steph) == distance)
 
         { Npartner = NId - distance;
 
#ifdef DEBUG
           printf ("%d recvs data from %d\n", 
                   pcb.i, dalib_group_element (group_id, Npartner));
#endif
           dalib_greceive (group_id, Npartner, sdata, d_len);

           dalib_gsend    (group_id, Npartner, d_len, 0);

           f_reduction (data, sdata, d_len);

         }
    }

  free (sdata);
  free (hdata);
 
}  /* dalib_group_scan_down */

/*******************************************************************
*                                                                  *
*  dalib_group_concat (data, d_len, group_id)                      *
*                                                                  *
*******************************************************************/

void dalib_group_concat (data, d_len, group_id)

char *data;             /* pointer to the data to be reduced       */
int  d_len;             /* size of the data element in bytes       */
int  group_id;          /* group for which reduction is done       */

/*   Communication Patterns for reduction of processes

     size (data) = d_len * NP  (NP number of processors in the group)

     Note: every processor has its data at the right position

     0    1    2    3    4    5    6    7    8    9   10
            <-        <-        <-        <-        <-
            <-------            <--------
            <-----------------
            <-------------------------------------

        brodcast of processor 1

       <-   --->---->---->---->---->---->---->--->

     process id 0 is exclusively reserved for the host

*/

{ int steph, distance;

  int NP;              /* 1, ..., NP relative ids of group */
  int NId;             /* my position in the group         */
  int Npartner;        /* partner position in the group    */

  int size;            /* size of sended/received data     */

  /*  getting values of the group  */

  NP  = dalib_group_size (group_id);
  NId = dalib_group_position (group_id, pcb.i);

  /* return if process is not in the group */

  if (!NId) return;

#ifdef DEBUG
   printf ("%d: group_concat (gid = %d, NP = %d, NId = %d), data_len=%d\n",
            pcb.i, group_id, NP, NId, d_len);
#endif

        /***************************************
        *                                      *
        *  compute  distance = log NP (base 2) *
        *                                      *
        ***************************************/

  distance = 1;

  while (distance < NP)    /* log NP (base 2) loop */

    { steph = 2 * distance;

           /***************************************
           *                                      *
           *  recv the data from next processors  *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == 0)

         { Npartner = NId + distance;

           /* if partner son exists and not source, send the value */

           if (Npartner <= NP)

             { if (Npartner + distance <= NP)
                  size = distance * d_len;
                 else
                  size = (NP - Npartner + 1) * d_len;

#ifdef DEBUG
               printf ("%d: recvs data (%d bytes) from %d\n", 
                       pcb.i, size, dalib_group_element (group_id, Npartner));
#endif
               dalib_greceive (group_id, Npartner, 
                               data + (Npartner-1) * d_len, size);

             }
         }

           /***************************************
           *                                      *
           *  send the data to previous procs     *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == distance)

         { Npartner = NId - distance;

           if (NId + distance <= NP)
             size = distance * d_len;
            else
             size = (NP - NId + 1) * d_len;

#ifdef DEBUG
           printf ("%d send data (%d bytes) to %d\n", 
                   pcb.i, size, dalib_group_element (group_id, Npartner));
#endif
           dalib_gsend (group_id, Npartner, data + (NId-1) * d_len, size, 0);

         }

      distance = steph;
    }

  /* now broadcast the result to all other processors of the group */

  dalib_group_bcast (data, NP * d_len, 1, group_id);

  /* now all processes have in data the broadcast value */

}  /* dalib_group_concat */

/*******************************************************************
*                                                                  *
*  void dalib_group_barrier (int group_id)                         *
*                                                                  *
*    - realization of a barrier in the given group                 *
*                                                                  *
*******************************************************************/

void dummy_reduce (d1, d2)
INTEGER d1, d2;
{
} /* dummy_reduce */

void dalib_group_barrier (group_id)

int group_id;

{ INTEGER data;

  int  elem_size;
  dalib_routine *f_reduction;    /* reduction function */

  if (group_id == dalib_group_all())

    { 

       /* shmem_barrier_all (); might be called on SGI, but 
          the right initialization is required                  */

#if defined(CENJU)

       CJbarrier ((long *) 0);
       return;

#elif defined(MPI)

       MPI_Barrier(MPI_COMM_WORLD);
       return;
#endif

     }

  /* fallback: reduction on the context group */

  elem_size   = sizeof(data);
  f_reduction = dummy_reduce;

  data = 0;  /* should not deal with undefined data */

  dalib_group_reduce (&data, elem_size, f_reduction, group_id);

} /* dalib_group_barrier */

