/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, SCAI.LAB                        *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Jun 94                                                   *
*  Last Update : Mar 98                                                   *
*                                                                         *
*  This Module is part of the DALIB                                       *
*                                                                         *
*  Module      : groups.m4                                                *
*                                                                         *
*  Function: Definition of Processor Groups                               *
*                                                                         *
*  Export :  internal Interface                                           *
*  ============================                                           *
*                                                                         *
*    void dalib_groups_init (NP)                                          *
*                                                                         *
*       - initializes groups 'all' and 'nodes', no other groups           *
*                                                                         *
*    int dalib_group_create (int size, int first, int step)               *
*                                                                         *
*       - create group with size members, first, first+step, ....         *
*                                                                         *
*    int dalib_group_all ()                                               *
*                                                                         *
*       - special groups for all nodes with host and only nodes           *
*                                                                         *
*    void dalib_group_bcast (char *data, int d_len, int source,           *
*                            int group_id                     )           *
*                                                                         *
*    void dalib_group_reduce (char *data, int d_len, func, int group_id)  *
*                                                                         *
*       - reply a reduction function to data of all group members         *
*         (note: afterwards the result is available for all procs)        *
*                                                                         *
*    UPDATES:                                                             *
*    ========                                                             *
*                                                                         *
*    03/98  :  groups now central data structur for processor subsets     *
*                                                                         *
**************************************************************************/

#include <stdio.h> 
#include "dalib.h"

#undef DEBUG
#define CHECK

     /*********************************************************
     *                                                        *
     *  Definition of the Data Structures                     *
     *                                                        *
     *********************************************************/

typedef struct

  { int size;     /* size of the group                                 */
    int first;    /* first proc in group, its absolute task id         */
    int step;     /* first + k * step, k < size, are other procs       */

  } group_entry;

static group_entry *groups [MAX_GROUPS];
static int         group_top = 0;

static int         all_group;

     /*********************************************************
     *                                                        *
     *  Error Messages                                        *
     *                                                        *
     *    0 <= group_id < group_top                           *
     *                                                        *
     *********************************************************/

#ifdef CHECK

static void dalib_group_check_valid (group_id)
int group_id;
{  char msg[80];
   if ((group_id < 0) || (group_id >= group_top))
   { sprintf (msg,"dalib_group_check_valid, %d is illegal (must be in 0:%d)\n",
       group_id, group_top-1);
     dalib_internal_error (msg);
   }
} /* dalib_group_check_valid */

#endif 

     /*********************************************************
     *                                                        *
     *  Printing out all groups                               *
     *                                                        *
     *********************************************************/

#ifdef DEBUG

static void dalib_group_print ()

{ group_entry *entry;

  int k, size, first, step;

  for (k=0; k<group_top; k++)
   { 
     entry  = groups[k];
     size   = entry->size;
     first  = entry->first;
     step   = entry->step;
     printf ("%d : group %d of size %d = %d, %d, ..., %d\n",
             pcb.i, k+1, size, first, first + step, first + (size-1) * step);
   }

} /* dalib_group_print */
#endif

/**************************************************************************
*                                                                         *
*  void dalib_group_define (int size, int first, int step)                *
*                                                                         *
*    - make a new group entry for processors first, first+step, ...       *
*                                                                         *
**************************************************************************/

static void dalib_group_define (size, first, step)

int first, size, step;

{ group_entry *entry;

  entry = (group_entry *) dalib_malloc (sizeof (group_entry),
                                           "dalib_group_define");
  entry->first = first;
  entry->size  = size ;
  entry->step  = step ;

  groups[group_top] = entry;

#ifdef DEBUG
  printf ("%d: new group (id=%d) defined, Pn=%d, P1=%d, Ps=%d\n",
           pcb.i, group_top, size, first, step);
#endif

  group_top ++;

} /* dalib_group_define */

/**************************************************************************
*                                                                         *
*   int dalib_find_group (size, first, step)                              *
*                                                                         *
*    - searching a group entry in the table for specified processor set   *
*    - returns group_top if entry not found                               *
*                                                                         *
**************************************************************************/

static int dalib_find_group (size, first, step)
int size, first, step;

{ int found, pos;
  group_entry *entry;

  pos = 0; found = 0;

  while ((pos < group_top) && (!found))
   { entry = groups[pos];
     found = (entry->size == size);
     if (found)
        found = (entry->first == first);
     if (found)
        found = (entry->step == step);
     if (!found) pos++;
   }

   return (pos);

} /* dalib_find_group */

/**************************************************************************
*                                                                         *
*  int dalib_group_create (int size, int first, int step)                 *
*                                                                         *
*    - make a new group entry for processors first, first+step, ...       *
*    - new entry only created if processor group does not exist           *
*    - returns interal identification of the new group                    *
*                                                                         *
**************************************************************************/

int dalib_group_create (size, first, step)

{ int group_id;

  if (step <= 0)

    { dalib_internal_error ("group of step <= 0 is illegal");
      dalib_stop ();
    }

  group_id = dalib_find_group (size, first, step);

  if (group_id == group_top)
     dalib_group_define (size, first, step);

#ifdef DEBUG
  printf ("%d: group_create (Pn=%d,P1,=%d,Ps=%d) is group id %d\n",
           pcb.i, size, first, step, group_id);
#endif

  return (group_id);

} /* dalib_group_create */

/**************************************************************************
*                                                                         *
*  int dalib_subgroup_create (int group_id,                               *
*                             int size, int first, int step)              *
*                                                                         *
*   - creates a subprocessor group for a given processor group            *
*                                                                         *
**************************************************************************/

int dalib_subgroup_create (group_id, size, first, step)       

int group_id;
int first;
int size;
int step;

{ group_entry *group;

  int g_first;
  int g_size;
  int g_step;

#ifdef CHECK
  dalib_group_check_valid (group_id);
#endif

  group = groups[group_id];

  g_size  = group->size;
  g_first = group->first;
  g_step  = group->step;

  /* check that subgroup fits within group */

  g_first = g_first + (first-1) * step;
  g_step  = g_step  *  step;

#ifdef DEBUG
  printf ("%d: define subgroup of group %d: P1=%d, Ps=%d, Pn=%d\n",
           pcb.i, group_id, g_first, g_step, size);
#endif

  return dalib_group_create (size, g_first, g_step);

} /* dalib_subgroup_create */

/**************************************************************************
*                                                                         *
*  void dalib_group_set (int first, int rank, int n[], int inc[],         *
*                        int *size, int p_ids[])                          *
*                                                                         *
*  - first, n[0], ..., n[rank-1], inc[0], ..., inc[rank-1]                *
*    specifies processor set first + k0*inc[0] +  ...                     *
*  - lists the size = n[0] * ... * n[rank-1] processors in p_ids          *
*                                                                         *
**************************************************************************/

static void dalib_group_set (first, rank, n, inc, size, p_ids)

int first, rank;
int n[], inc[];
int *size;
int p_ids[];

{ int i0, i1, i2, i3;
  int N;

  N = 0;

  if (rank == 2)

    { for (i1=0; i1<n[1]; i1++)
        for (i0=0; i0<n[0]; i0++)
           p_ids[N++] = first + i0 * inc[0] + i1 * inc[1];
    }

  if (rank == 3)

    { for (i2=0; i2<n[2]; i2++)
       for (i1=0; i1<n[1]; i1++)
        for (i0=0; i0<n[0]; i0++)
           p_ids[N++] = first + i0 * inc[0] + i1 * inc[1] + i2 * inc[2];
    }

  if (rank == 4)

    { for (i3=0; i3<n[3]; i3++)
       for (i2=0; i2<n[2]; i2++)
        for (i1=0; i1<n[1]; i1++)
         for (i0=0; i0<n[0]; i0++)
          p_ids[N++] = first + i0 * inc[0] + i1 * inc[1] 
                             + i2 * inc[2] + i3 * inc[3];
    }

  *size = N;

} /* dalib_group_set */

/**************************************************************************
*                                                                         *
*  int dalib_subgroup_make (int group_id, int first, int rank,            *
*                           int rank, int n[], int inc[])                 *
*                                                                         *
*  - make a subgroup within group_id by the specified processors          *
*                                                                         *
**************************************************************************/

int dalib_subgroup_make (group_id, first, rank, n, inc)

int group_id;
int first;
int rank;
int n[];
int inc[];

{ int new_rank;
  int new_n[MAX_DIMENSIONS];
  int new_inc [MAX_DIMENSIONS];

  dalib_linear_compress (rank, n, inc, &new_rank, new_n, new_inc);

  if (new_rank == 0)

     return dalib_subgroup_create (group_id, 1, first, 1);

   else if (new_rank == 1)

     return dalib_subgroup_create (group_id, new_n[0], first, new_inc[0]);

   else 

     { int p_ids [MAXP];
       int i, size;

       dalib_internal_error ("make subgroup: not of lb:ub:str");
       dalib_group_set (first, new_rank, new_n, new_inc, &size, p_ids);
       printf ("processors : ");
       for (i=0; i<size; i++) printf ("%d ", p_ids[i]);
       printf ("\n");
       dalib_stop ();
     }

} /* dalib_subgroup_make */

/**************************************************************************
*                                                                         *
*  void dalib_group_info (int group_id, int *size, int *first, int *step) *
*                                                                         *
*   - returns first, size, step for processor group                       *
*                                                                         *
**************************************************************************/

void dalib_group_info (group_id, size, first, step)

int group_id;
int *first;
int *size;
int *step;

{ group_entry *group;

  group = groups[group_id];

  *size  = group->size;
  *first = group->first;
  *step  = group->step;
}

/*******************************************************************
*                                                                  *
*  int dalib_group_relpos (int group_id, int PId)                  *
*                                                                  *
*   - returns relative position of PId in group  (1<=pos<=size)    *
*   - 0 if PId does not belong to the group                        *
*                                                                  *
*******************************************************************/

int dalib_group_relpos (group_id, pid)

int group_id;
int pid;

{ group_entry *group;

  int first, step, size;
  int NId;

#ifdef CHECK
  dalib_group_check_valid (group_id);
#endif

  group = groups[group_id];

  first = group->first;
  step  = group->step;
  size  = group->size;

  NId   = (pid - first) / step + 1;

  if (NId <= 0)    return 0;
  if (NId >  size) return 0;

  if (first + (NId-1) * step != pid) return 0;

  return NId;

} /* dalib_group_relpos */

/**************************************************************************
*                                                                         *
*   int dalib_group_abspos (int group_id, int pid)                        *
*                                                                         *
*   - 1 <= pid <= dalib_group_size (group_id) specified relative pos      *
*   - returns global processor id for the corresponding element in group  *
*                                                                         *
**************************************************************************/

int dalib_group_abspos (group_id, pid)

{ group_entry *group;

  int NId;

#ifdef CHECK
  dalib_group_check_valid (group_id);
#endif

  group = groups[group_id];

  NId = group->first + (pid-1) * group->step;

  return NId;

} /* dalib_group_abspos */

/*******************************************************************
*                                                                  *
*   int dalib_group_first (group_id)                               *
*   int dalib_group_size  (group_id)                               *
*   int dalib_group_step  (group_id)                               *
*                                                                  *
*******************************************************************/

int dalib_group_first (group_id)

int group_id;

{ return groups[group_id]->first;

} /* dalib_group_first */

int dalib_group_size (group_id)

int group_id;

{ return groups[group_id]->size;

} /* dalib_group_size */

int dalib_group_step (group_id)

int group_id;

{ return groups[group_id]->step;

} /* dalib_group_step */

/*******************************************************************
*                                                                  *
*  void dalib_groups_init (int NP)                                 *
*                                                                  *
*    - creates one group (group 0) for all processors              *
*                                                                  *
*******************************************************************/

void dalib_groups_init (NP)

int NP;  /* number of node processes */

{ if (group_top == 0)

   { /* create a group for all processes */

     all_group   = dalib_group_create (NP, 1, 1);
   }
   
  else
  
   { /* groups already exist, update NP */

     groups[all_group]->size = NP;
   }

#ifdef DEBUG
  dalib_group_print ();
#endif

} /* dalib_groups_init */

/**************************************************************************
*                                                                         *
*   int dalib_group_distinct (int g_id1, int g_id2)                       *
*                                                                         *
*   - returns 1 if the two groups have not one common processor           *
*                                                                         *
**************************************************************************/

int dalib_group_distinct (g_id1, g_id2)

{ int P1[3];
  int P2[3];
  int P3[3];

  dalib_group_info (g_id1, P1+1, P1, P1+2);
  dalib_group_info (g_id2, P2+1, P2, P2+2);

  P1[1] = P1[0] + (P1[1]-1)*P1[2];
  P2[1] = P2[0] + (P2[1]-1)*P2[2];

  dalib_intersect_sections (P1, P2, P3);
  
#ifdef DEBUG
  printf ("%d: group distinct %d:%d:%d and %d:%d:%d is %d:%d:%d\n",
         pcb.i, P1[0], P1[1], P1[2], P2[0], P2[1], P2[2], P3[0], P3[1], P3[2]);
#endif

  if (dalib_range_size (P3[0], P3[1], P3[2]) == 0) return 1;

  return 0;

} /* dalib_group_distinct */

/*******************************************************************
*                                                                  *
*  void dalib_groups_exit ()                                       *
*                                                                  *
*   - free all groups and allocated memory                         *
*                                                                  *
*******************************************************************/

void dalib_groups_exit ()

{ int i;

  for (i=group_top-1; i>=0; i--)
     dalib_free (groups[i], sizeof(group_entry));

  group_top = 0;
}

/**************************************************************************
*                                                                         *
*   int dalib_group_all ()                                                *
*                                                                         *
*    - returns group of all participating processors (should be 0)        *
*                                                                         *
**************************************************************************/

int dalib_group_all ()
{ return (all_group);
} /* dalib_group_all */

     /*************************************************************
     *   dalib_pack_group (buffer, top_id => length)              *
     *************************************************************/

void dalib_pack_group (buffer, group_id, length)

char buffer[];
int  group_id;
int  *length;

{ group_entry *entry;

  dalib_group_check_valid (group_id);

  entry = groups[group_id];

  *length = sizeof (group_entry);

  dalib_memcopy (buffer, entry, *length);

#ifdef DEBUG
  printf ("%d: packed group id = %d\n", pcb.i, group_id);
#endif

} /* dalib_pack_group */

     /*************************************************************
     *   dalib_unpack_group (buffer => group_id, length)          *
     *************************************************************/

void dalib_unpack_group (buffer, group_id, length)

char buffer[];
int  *length;
int  *group_id;

{ group_entry entry;

  dalib_memcopy (&entry, buffer, sizeof(entry));

  *length = sizeof(entry);

  *group_id = dalib_group_create (entry.size, entry.first, entry.step);

#ifdef DEBUG
  printf ("%d: unpacked group %d (size=%d, first=%d, step=%d)\n",
          pcb.i, *group_id, entry.size, entry.first, entry.step);
#endif

} /* dalib_unpack_group */

/*******************************************************************
*                                                                  *
*  dalib_group_bcast (data, size, source, group_id)                *
*                                                                  *
*  - source is position in group (1<=source<=group_size)           *
*                                                                  *
*******************************************************************/

void dalib_group_bcast (data, d_len, source, group_id)

char *data;    /* pointer to the data element              */
int  d_len;    /* size of the data element in bytes        */
int  source;   /* process source sends data of d_len bytes */
int  group_id; /* group for which broadcast is done        */

/*   Communication Patterns for broadcast of process source

     source is relative (1 <= source <= size) for the topology 

     0    1    2    3    4    5   src   7    8    9   10  
            <----------------------
       <-   ------------------------------------->
            -----------------> 
            ------->            -------->
            ->        ->        ->        ->        ->

     process id 0 is exclusively reserved for the host

*/

{ int steph, distance;

  group_entry *group;
  int NP;              /* 1, ..., NP relative ids of group */
  int NId;             /* my position in the group         */
  int Npartner;        /* partner position in the group    */
  int P1;              /* absolute position of first       */
  int Pme;             /* my process id                    */
  int Ps;              /* stride between two processes     */

  int Pto;             /* process id to which I send       */
  int Pfrom;           /* process id from which I receive  */

  /*  getting values of the group  */

#if defined(VT)
  dalib_trace_on (TRACE_GROUP_BCAST);
#endif

#ifdef CHECK
  dalib_group_check_valid (group_id);
#endif

  group = groups[group_id];
  NP    = group->size;
  P1    = group->first;
  Ps    = group->step;
  Pme   = pcb.i;

  if (group_id == all_group)  P1 = 1;

  NId = dalib_group_relpos (group_id, Pme); 

 /* 0 : not in group, 1<=NId<=size */

  /* return if process is not in the group */

  if (!NId) return;

#ifdef DEBUG
  printf ("%d: group_bcast (gid=%d,p1=%d,ps=%d,size=%d,me=%d), len=%d, d=%d\n",
           pcb.i, group_id, P1, Ps, NP, NId, d_len, source);
#endif

        /***************************************
        *                                      *
        *  source  -> 1,  1  <-  source        *
        *                                      *
        ***************************************/

  if (source != 1)   /* send value to process 1 */
    { if (NId==source) 
         { /* I am the source, sending to first task in group */
           Pto = P1;
           dalib_send (Pto, data, d_len, 0);     /* I send to first */
         }
      if (Pme == P1) 
         { /* I am the first task in the group, recv from source */
           Pfrom = P1 + (source-1) * Ps;
           dalib_receive (Pfrom, data, d_len);  /* 1 recvs form j */
         }
    }

        /***************************************
        *                                      *
        *  compute  distance = log NP (base 2) *
        *                                      *
        ***************************************/

  distance = 1;
  while (distance < NP)    /* log NP (base 2) loop */
    distance = 2*distance;

  while (distance > 1)
    { steph = distance;
      distance = distance / 2;

           /***************************************
           *                                      *
           *  send the data to next processors    *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == 0)

         { Npartner = NId + distance;

           /* if partner son exists and not source, send the value */

           if ( (Npartner <= NP) && (Npartner != source) )

             { Pto = P1 + (Npartner - 1) * Ps;
#ifdef DEBUG
               printf ("%d sends data to %d\n", Pme, Pto);
#endif
               dalib_send (Pto, data, d_len, 0);
             }
         }

           /***************************************
           *                                      *
           *  recv the data from previous procs   *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == distance)

         { Npartner = NId - distance;

           if (NId != source)

             { /* receive only if I am not the source of broadcast */

               Pfrom = P1 + (Npartner -1) * Ps;

#ifdef DEBUG
               printf ("%d: recvs data from %d\n", Pme, Pfrom);
#endif
               dalib_receive (Pfrom, data, d_len);
             }
         }
    }

  /* now all processes have in data the broadcast value */

#if defined(VT)
  dalib_trace_off (TRACE_GROUP_BCAST);
#endif

}  /* dalib_group_broadcast */

/*******************************************************************
*                                                                  *
*  dalib_group_reduce (data, d_len, f, group_id)                   *
*                                                                  *
*******************************************************************/

void dalib_group_reduce (data, d_len, f_reduction, group_id)

char *data;             /* pointer to the data to be reduced       */
int  d_len;             /* size of the data element in bytes       */
void (*f_reduction) (); /* reduction operator                      */
int  group_id;          /* group for which reduction is done       */

/*   Communication Patterns for reduction of processes

     0    1    2    3    4    5    6    7    8    9   10
            <-        <-        <-        <-        <-
            <-------            <--------
            <-----------------
            <-------------------------------------

        brodcast of processor 1

       <-   --->---->---->---->---->---->---->--->

     process id 0 is exclusively reserved for the host

*/

{ int steph, distance;

  group_entry *group;
  int NP;              /* 1, ..., NP relative ids of group */
  int NId;             /* my position in the group         */
  int Npartner;        /* partner position in the group    */
  int P1;              /* absolute position of first       */
  int Pme;             /* my process id                    */
  int Ps;              /* stride between two processes     */

  int Pto;             /* process id to which I send       */
  int Pfrom;           /* process id from which I receive  */

  char *hdata;         /* help buffer for data from others */

  /*  getting values of the group  */

#ifdef CHECK
  dalib_group_check_valid (group_id);
#endif

  group = groups[group_id];
  NP    = group->size;
  P1    = group->first;
  Ps    = group->step;
  Pme   = pcb.i;

  if (group_id == all_group)  P1 = 1;

  NId = dalib_group_relpos (group_id, Pme);  /* 1 <= NId <= size */

  /* return if process is not in the group */

  if (!NId) return;

#ifdef DEBUG
   printf ("%d: group_reduce (gid=%d,size=%d,me=%d), len=%d\n",
            pcb.i, group_id, NP, NId, d_len);
#endif

  hdata = (char *) dalib_malloc (d_len, "dalib_group_reduce");

        /***************************************
        *                                      *
        *  compute  distance = log NP (base 2) *
        *                                      *
        ***************************************/

  distance = 1;
  while (distance < NP)    /* log NP (base 2) loop */

    { steph = 2 * distance;

           /***************************************
           *                                      *
           *  recv the data from next processors  *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == 0)

         { Npartner = NId + distance;

           /* if partner son exists and not source, send the value */

           if (Npartner <= NP)

             { Pfrom = P1 + (Npartner - 1) * Ps;
#ifdef DEBUG
               printf ("%d recvs data from %d\n", Pme, Pfrom);
#endif
               dalib_receive (Pfrom, hdata, d_len);

               f_reduction (data, hdata);

             }
         }

           /***************************************
           *                                      *
           *  send the data to previous procs     *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == distance)

         { Npartner = NId - distance;

           Pto = P1 + (Npartner -1) * Ps;

#ifdef DEBUG
           printf ("%d send data to %d\n", Pme, Pto);
#endif
           dalib_send (Pto, data, d_len, 0);

         }

      distance = steph;
    }

  dalib_free (hdata, d_len);

  /* now broadcast the result to all other processors of the group */

  dalib_group_bcast (data, d_len, 1, group_id);

  /* now all processes have in data the broadcast value */

}  /* dalib_group_reduce */

/*******************************************************************
*                                                                  *
*  dalib_group_scan (data, d_len, f, group_id, mask, up)           *
*                                                                  *
*******************************************************************/
 
void dalib_group_scan_up (data, d_len, f_reduction, group_id, mask)
 
char *data;             /* pointer to the data to be reduced       */
int  d_len;             /* size of the data element in bytes       */
void (*f_reduction) (); /* reduction operator                      */
int  group_id;          /* group for which reduction is done       */
int  mask;              /* true if processor has contribution      */

/*   Communication Patterns for reduction of processes

       1    2    3    4    5    6    7    8    9   10
         <-        <-        <-        <-        <-
         <-------            <--------
         <-----------------
         <-------------------------------------

*/

{ int steph, distance;

  group_entry *group;
  int NP;              /* 1, ..., NP relative ids of group */
  int NId;             /* my position in the group         */
  int Npartner;        /* partner position in the group    */
  int P1;              /* absolute position of first       */
  int Pme;             /* my process id                    */
  int Ps;              /* stride between two processes     */

  int Pto;             /* process id to which I send       */
  int Pfrom;           /* process id from which I receive  */

  char *hdata;         /* help buffer for data from others */
  char *sdata;         /* buffer with scanned data         */

  /*  getting values of the group  */

#ifdef CHECK
  dalib_group_check_valid (group_id);
#endif

  group = groups[group_id];
  NP    = group->size;
  P1    = group->first;
  Ps    = group->step;
  Pme   = pcb.i;

  if (group_id == all_group)  P1 = 1;

  NId = dalib_group_relpos (group_id, Pme);  /* 1 <= NId <= size */

  /* return if process is not in the group */

  if (!NId) return;

#ifdef DEBUG
   printf ("%d: group_scan_up (gid=%d,size=%d,me=%d), len=%d\n",
            pcb.i, group_id, NP, NId, d_len);
#endif

  hdata = (char *) dalib_malloc (d_len, "group_scan_up");
  sdata = (char *) dalib_malloc (d_len, "group_scan_up");

  dalib_memcopy (sdata, data, d_len);

        /***************************************
        *                                      *
        *  compute  distance = log NP (base 2) *
        *                                      *
        ***************************************/

  distance = 1;
  while (distance < NP)    /* log NP (base 2) loop */

    { steph = 2 * distance;  /* 2, 4, 8, 16, 32, 64, ... */

           /***************************************
           *                                      *
           *  recv the data from next processors  *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == 0)

         { Npartner = NId + distance;

           /* if partner son exists and not source, send the value */

           if (Npartner <= NP)

             { Pfrom = P1 + (Npartner - 1) * Ps;
#ifdef DEBUG
               printf ("%d recvs data from %d\n", Pme, Pfrom);
#endif
               dalib_receive (Pfrom, hdata, d_len);

               dalib_send    (Pfrom, sdata,  d_len, 0);

               f_reduction (sdata, hdata);

               /* sdata =  a(NId)...... a(NId+steph-1)  */

             }
         }

           /***************************************
           *                                      *
           *  send the data to previous procs     *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == distance)

         { Npartner = NId - distance;

           Pto = P1 + (Npartner -1) * Ps;

#ifdef DEBUG
           printf ("%d send data to %d\n", Pme, Pto);
#endif

           dalib_send (Pto, sdata, d_len, 0);

           dalib_receive (Pto, hdata, d_len);

           f_reduction (data, hdata);
         }

      distance = steph;
    }

  if (NId == 1)

    {  /* first processor sets its hdata to zero */

       for (steph=0; steph<d_len; steph++)
            hdata [steph] = '\0';

    }

  /* now top down for providing the correct results    */ 

  while (distance > 1)
    { steph = distance;
      distance = distance / 2;
 
           /***************************************
           *                                      *
           *  send the data to next processors    *
           *                                      *
           ***************************************/
 
      if ( ((NId-1) % steph) == 0)
 
         { Npartner = NId + distance;
 
           /* if partner son exists and not source, send the value */
 
           if (Npartner <= NP) 
 
             { Pto = P1 + (Npartner - 1) * Ps;
#ifdef DEBUG
               printf ("%d sends hdata to %d\n", Pme, Pto);
#endif
               dalib_send (Pto, hdata, d_len, 0);
             }
         }
 
           /***************************************
           *                                      *
           *  recv the data from previous procs   *
           *                                      *
           ***************************************/
 
      if ( ((NId-1) % steph) == distance)
 
         { Npartner = NId - distance;
 
           /* receive only if I am not the source of broadcast */
 
           Pfrom = P1 + (Npartner -1) * Ps;
 
#ifdef DEBUG
           printf ("%d recvs data from %d\n", Pme, Pfrom);
#endif
           dalib_receive (Pfrom, sdata, d_len);

           f_reduction (data,  sdata);
           f_reduction (hdata, sdata);
 
         }
    }

  free (sdata);
  free (hdata);
 
}  /* dalib_group_scan_up */


/*******************************************************************
*                                                                  *
*  dalib_group_scan_down (data, d_len, f, group_id, mask)          *
*                                                                  *
*******************************************************************/
 
void dalib_group_scan_down (data, d_len, f_reduction, group_id, mask)
 
char *data;             /* pointer to the data to be reduced       */
int  d_len;             /* size of the data element in bytes       */
void (*f_reduction) (); /* reduction operator                      */
int  group_id;          /* group for which reduction is done       */
int  mask;              /* true if processor has contribution      */

/*   Communication Patterns for reduction of processes

       1    2    3    4    5    6    7    8    9   10
         <-        <-        <-        <-        <-
         <-------            <--------
         <-----------------
         <-------------------------------------

*/

{ int steph, distance;

  group_entry *group;
  int NP;              /* 1, ..., NP relative ids of group */
  int NId;             /* my position in the group         */
  int Npartner;        /* partner position in the group    */
  int P1;              /* absolute position of first       */
  int Pme;             /* my process id                    */
  int Ps;              /* stride between two processes     */

  int Pto;             /* process id to which I send       */
  int Pfrom;           /* process id from which I receive  */

  char *hdata;         /* help buffer for data from others */
  char *sdata;         /* buffer with scanned data         */

  /*  getting values of the group  */

#ifdef CHECK
  dalib_group_check_valid (group_id);
#endif

  group = groups[group_id];
  NP    = group->size;
  P1    = group->first;
  Ps    = group->step;
  Pme   = pcb.i;

  if (group_id == all_group)  P1 = 1;

  NId = dalib_group_relpos (group_id, Pme);  /* 1 <= NId <= size */

  /* return if process is not in the group */

  if (!NId) return;

#ifdef DEBUG
   printf ("%d: group_scan_down (gid=%d,size=%d,me=%d), len=%d\n",
            pcb.i, group_id, NP, NId, d_len);
#endif

  hdata = (char *) dalib_malloc (d_len, "group_scan_down");
  sdata = (char *) dalib_malloc (d_len, "group_scan_down");

  dalib_memcopy (sdata, data, d_len);

  /* start with sum = 0 */

        /***************************************
        *                                      *
        *  compute  distance = log NP (base 2) *
        *                                      *
        ***************************************/

  distance = 1;
  while (distance < NP)    /* log NP (base 2) loop */

    { steph = 2 * distance;  /* 2, 4, 8, 16, 32, 64, ... */

           /***************************************
           *                                      *
           *  recv the data from next processors  *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == 0)

         { Npartner = NId + distance;

           /* if partner son exists and not source, send the value */

           if (Npartner <= NP)

             { Pfrom = P1 + (Npartner - 1) * Ps;
#ifdef DEBUG
               printf ("%d recvs data from %d\n", Pme, Pfrom);
#endif
               dalib_receive (Pfrom, hdata, d_len);

               f_reduction (data, hdata);

             }
         }

           /***************************************
           *                                      *
           *  send the data to previous procs     *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == distance)

         { Npartner = NId - distance;

           Pto = P1 + (Npartner -1) * Ps;

#ifdef DEBUG
           printf ("%d send data to %d\n", Pme, Pto);
#endif

           dalib_send (Pto, data, d_len, 0);

         }

      distance = steph;
    }

  if (NId == 1)

    {  /* first processor sets its hdata to zero */

       for (steph=0; steph<d_len; steph++)
            sdata [steph] = '\0';

    }

  /* now top down for providing the correct results    */ 

  while (distance > 1)
    { steph = distance;
      distance = distance / 2;
 
           /***************************************
           *                                      *
           *  send the data to next processors    *
           *                                      *
           ***************************************/
 
      if ( ((NId-1) % steph) == 0)
 
         { Npartner = NId + distance;
 
           /* if partner son exists and not source, send the value */
 
           if (Npartner <= NP) 
 
             { Pto = P1 + (Npartner - 1) * Ps;
#ifdef DEBUG
               printf ("%d sends hdata to %d\n", Pme, Pto);
#endif
               dalib_send (Pto, sdata, d_len, 0);

               dalib_receive (Pto, hdata, d_len, 0);

               f_reduction (sdata, hdata);
              
             }
         }
 
           /***************************************
           *                                      *
           *  recv the data from previous procs   *
           *                                      *
           ***************************************/
 
      if ( ((NId-1) % steph) == distance)
 
         { Npartner = NId - distance;
 
           /* receive only if I am not the source of broadcast */
 
           Pfrom = P1 + (Npartner -1) * Ps;
 
#ifdef DEBUG
           printf ("%d recvs data from %d\n", Pme, Pfrom);
#endif
           dalib_receive (Pfrom, sdata, d_len);

           dalib_send    (Pfrom, data, d_len, 0);

           f_reduction (data, sdata, d_len);

         }
    }

  free (sdata);
  free (hdata);
 
}  /* dalib_group_scan_down */

/*******************************************************************
*                                                                  *
*  dalib_group_concat (data, d_len, group_id)                      *
*                                                                  *
*******************************************************************/

void dalib_group_concat (data, d_len, group_id)

char *data;             /* pointer to the data to be reduced       */
int  d_len;             /* size of the data element in bytes       */
int  group_id;          /* group for which reduction is done       */

/*   Communication Patterns for reduction of processes

     size (data) = d_len * NP  (NP number of processors in the group)

     Note: every processor has its data at the right position

     0    1    2    3    4    5    6    7    8    9   10
            <-        <-        <-        <-        <-
            <-------            <--------
            <-----------------
            <-------------------------------------

        brodcast of processor 1

       <-   --->---->---->---->---->---->---->--->

     process id 0 is exclusively reserved for the host

*/

{ int steph, distance;

  group_entry *group;
  int NP;              /* 1, ..., NP relative ids of group */
  int NId;             /* my position in the group         */
  int Npartner;        /* partner position in the group    */
  int P1;              /* absolute position of first       */
  int Pme;             /* my process id                    */
  int Ps;              /* stride between two processes     */

  int Pto;             /* process id to which I send       */
  int Pfrom;           /* process id from which I receive  */

  int size;            /* size of sended/received data     */

  /*  getting values of the group  */

#ifdef CHECK
  dalib_group_check_valid (group_id);
#endif

  group = groups[group_id];
  NP    = group->size;
  P1    = group->first;
  Ps    = group->step;
  Pme   = pcb.i;

  if (group_id == all_group)  P1 = 1;

  if (Pme != 0)
      NId   = (Pme - P1) / Ps + 1;
    else
      NId   = 0;

  /* return if process is not in the group */

  if (group_id != all_group)
     { if (NId <= 0) return;
       if (NId >  NP) return;
       if ((P1 + (NId - 1) * Ps) != Pme)  return;
     }

#ifdef DEBUG
   printf ("%d: concat (gid=%d, NP=%d, me=%d), data_len=%d\n",
            pcb.i, group_id, NP, NId, d_len);
#endif

        /***************************************
        *                                      *
        *  compute  distance = log NP (base 2) *
        *                                      *
        ***************************************/

  distance = 1;

  while (distance < NP)    /* log NP (base 2) loop */

    { steph = 2 * distance;

           /***************************************
           *                                      *
           *  recv the data from next processors  *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == 0)

         { Npartner = NId + distance;

           /* if partner son exists and not source, send the value */

           if (Npartner <= NP)

             { Pfrom = P1 + (Npartner - 1) * Ps;

               if (Npartner + distance <= NP)
                  size = distance * d_len;
                 else
                  size = (NP - Npartner + 1) * d_len;

#ifdef DEBUG
               printf ("%d recvs data (%d bytes) from %d\n", Pme, size, Pfrom);
#endif
               dalib_receive (Pfrom, data + (Npartner-1) * d_len, size);

             }
         }

           /***************************************
           *                                      *
           *  send the data to previous procs     *
           *                                      *
           ***************************************/

      if ( ((NId-1) % steph) == distance)

         { Npartner = NId - distance;

           Pto = P1 + (Npartner -1) * Ps;

           if (NId + distance <= NP)
             size = distance * d_len;
            else
             size = (NP - NId + 1) * d_len;

#ifdef DEBUG
           printf ("%d send data (%d bytes) to %d\n", Pme, size, Pto);
#endif
           dalib_send (Pto, data + (NId-1) * d_len, size, 0);

         }

      distance = steph;
    }

  /* now broadcast the result to all other processors of the group */

  dalib_group_bcast (data, NP * d_len, 1, group_id);

  /* now all processes have in data the broadcast value */

}  /* dalib_group_concat */
