/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, SCAI.LAB                        *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Feb 95                                                   *
*  Last Update : Oct 97                                                   *
*                                                                         *
*  This Module is part of the DALIB                                       *
*                                                                         *
*  Module      : assign.m4                                                *
*                                                                         *
*  Function    : structured assignment with section descriptors           *
*                                                                         *
*  EXPORT : FORTRAN Interface                                             *
*                                                                         *
*     void FUNCTION(dalib_assign)                                         *
*                                                                         *
*                   (array_info/section_info *target_section,             *
*                    array_info/section_info *source_section,             *
*                                                                         *
*     void FUNCTION(dalib_assign_permute)                                 *
*                                                                         *
*                   (array_info/section_info *target_section,             *
*                    array_info/section_info *source_section,             *
*                    int *p1, ..., int *p7)                               *
*                                                                         *
*  IMPORT:                                                                *
*                                                                         *
*  UPDATES:                                                               *
*                                                                         *
*   10/95  :  assignment works now for aligned arrays                     *
*   09/97  :  assignment improved for replication of distributed arrays   *
*                                                                         *
**************************************************************************/

#undef DEBUG

#include "dalib.h"
#include <stdio.h>

/*******************************************************************
*                                                                  *
*   GLOBAL DATA FOR MOVING of SECTIONS                             *
*                                                                  *
*******************************************************************/

   /* two constants used for indexing the global data */

#define SOURCE 0
#define TARGET 1 

       /************************************************
       *                                               *
       *   Information about the both sections         *
       *                                               *
       ************************************************/

static void *       local_id   [2];   /* descriptor of local section part */
static section_info section_id [2];
static array_info   array_id   [2];
static int          array_dims [2][MAX_DIMENSIONS];

/* Note : section can have fixed elements in some dimensions, so
          rank of section might be smaller than rank of array           

          array_dims[...][sec_dim]  is array dim of sec_dim (1..rank)    */

       /************************************************
       *                                               *
       *   Information about the schedules             *
       *                                               *
       ************************************************/

static int send_schedules, recv_schedules, total_schedules;

static int all_sched_ids  [MAX_DIMENSIONS];

static int send_sched_ids [MAX_RANK];
static int send_index_dims[MAX_RANK];   /* 1 <= send_index_dims[i] <= rank */
static int send_top_dims  [MAX_RANK];

static int recv_sched_ids [MAX_RANK];
static int recv_index_dims[MAX_RANK];   /* 1 <= recv_index_dims[i] <= rank */
static int recv_top_dims  [MAX_RANK];

       /************************************************
       *                                               *
       *   Information about the topologies            *
       *                                               *
       ************************************************/

static int topology [2];            /* identification of topology           */
static int topology_ids [2][MAXP];  /* processors in the topology           */
static int topology_full [2];       /* is 1 if full topology is involved    */
static int topology_ref [2];        /* reference processor in this topology */
static int topology_mask [2];       /* true if in topology                  */

       /************************************************
       *                                               *
       *   Permutation data                            *
       *                                               *
       ************************************************/

static int is_permute;
static int permute_vals [MAX_DIMENSIONS];

/*******************************************************************
*                                                                  *
*  void dalib_restrict_top_dim (kind, top_dim, top_pos)            *
*                                                                  *
*   - in topology[kind] for top_dim only top_pos plays the game    *
*   - fix in topology_mask and update topology_ref processor       *
*                                                                  *
*******************************************************************/

static void dalib_restrict_top_dim (kind, top_dim, top_pos)
int kind, top_dim, top_pos;

{ int top_id;
  int NP, NId;
  int dist;

  top_id = topology [kind];

  /* update the mask for this topology */

  dalib_top_info (top_id, top_dim, &NP, &NId);

  if (NId != top_pos)
     topology_mask [kind] = 0;    /* no longer in topology */

  /* update the reference */
  
  dist = dalib_top_distance (top_id, top_dim);

  topology_ref[kind] += (top_pos - 1) * dist;

} /* dalib_restrict_top_dim */

/*******************************************************************
*                                                                  *
*  void dalib_set_topology (SOURCE/TARGET)                         *
*                                                                  *
*******************************************************************/

static void dalib_set_topology (kind)

{ array_info template_id;

  int top_id, top_rank, top_dim, top_pos;
  int dist_kind, idim;
  int NP, NId;

  dalib_array_info (array_id[kind], &template_id, &top_id);

  topology[kind] = top_id;

  if  (top_id == 0)

     { /* replicated topology */

       int group, first;

       group = dalib_context_group ();
       first = dalib_group_first (group);

       topology_mask [kind]    = (pcb.i == first);
       topology_ids  [kind][0] = first;  /* one single processor only */
       topology_ref  [kind]    = 0;
       topology_full [kind]    = 0;

     }

   else 

     { /* node processor topology */

       int size;

       topology_mask [kind] = dalib_in_topology (top_id);
       topology_ref  [kind] = 0;
       topology_full [kind] = 1;

       dalib_group_all_elements (dalib_top_group (top_id), &size,
                                 topology_ids[kind]);

       top_rank = dalib_top_rank (top_id);

       /* check for replicated and embedded dimensions */

       for (top_dim=1; top_dim <= top_rank; top_dim++)

        { dalib_array_top_query (array_id[kind], top_dim,
                                 &dist_kind, &idim, &top_pos);

          if (dist_kind == kREPLICATED_DIM)

             { /* info about replicated dimensions is used later */

               topology_full [kind] = 0;
 
               dalib_top_info (top_id, top_dim, &NP, &NId);

               /* only first processor will play the game */

               if (NId != 1) topology_mask [kind] = 0;
  
#ifdef DEBUG
               printf ("%d: replicated dimension (topid=%d,dim=%d)\n",
                       pcb.i, top_id, top_dim);
#endif

               /* topology ref will not change */

             }

          if (dist_kind == kEMBEDDED_DIM)

             { dalib_restrict_top_dim (kind, top_dim, top_pos);
               topology_full [kind] = 0;
             }

        } /* for all dimensions of topology */

     }

#ifdef DEBUG
   printf ("%d: set topology (%d), mask = %d, ref = %d\n",
           pcb.i, kind, topology_mask[kind], topology_ref[kind]);
#endif

} /* dalib_set_topology */

/*******************************************************************
*                                                                  *
*  void dalib_set_send_schedule (int schedule_id, int pid,         *
*                                local_info local_id, int dim)     *
*                                                                  *
*******************************************************************/

void dalib_set_send_schedule (schedule_id, pid, local_id, index_dim)

int schedule_id;
int pid;
void *local_id;
int index_dim;

{ int lb, ub, str, okay;
  int N;
  int *values;

  dalib_schedule_send_range (schedule_id, pid, &okay, &lb, &ub, &str);

  if (okay)

    { dalib_local_set_range (local_id, index_dim, lb, ub, str);
      return;
    }

  dalib_schedule_send_values (schedule_id, pid, &N, &values);
  dalib_local_set_indirect (local_id, index_dim, N, values);

} /* dalib_set_send_schedule */

/*******************************************************************
*                                                                  *
*  void dalib_set_recv_schedule (int schedule_id, int pid,         *
*                                local_info local_id, int dim)     *
*                                                                  *
*******************************************************************/

void dalib_set_recv_schedule (schedule_id, pid, local_id, index_dim)

int schedule_id;
int pid;
void *local_id;
int index_dim;

{ int lb, ub, str, okay;
  int N;
  int *values;

  dalib_schedule_recv_range (schedule_id, pid, &okay, &lb, &ub, &str);

  if (okay)

    { dalib_local_set_range (local_id, index_dim, lb, ub, str);
      return;
    }

  dalib_schedule_recv_values (schedule_id, pid, &N, &values);
  dalib_local_set_indirect (local_id, index_dim, N, values);

} /* dalib_set_recv_schedule */

/*******************************************************************
*                                                                  *
*  void dalib_fix_topology (SOURCE/TARGET,                         *
*                           int index_dim, int_index_val)          *
*                                                                  *
*   - dim is an array dimension which is fixed in section          *
*     (e.g. A (....,5,....) ), 0 < index_dim <= rank               *
*                                                                  *
*   - val is the corresponding value at this dimension             *
*                                                                  *
*******************************************************************/

static void dalib_fix_topology (kind, index_dim, index_val)

int kind, index_dim, index_val;

{ int top_id, top_dim, top_pos;
  int base, stride, lb, ub;
  int dist_kind;

  int new_index_val;

  DistDim    mapping;

  dalib_array_dim_mapping (array_id[kind], index_dim,
                           &base, &stride, &lb, &ub,
                           &top_id, &mapping);

  if (top_id <= 0) return;

  dalib_dim_mapping_info (mapping, &dist_kind, &top_dim);

  if (dist_kind == kSERIAL_DIM) return;

  /* apply alignment to the index value to get correct owner */

  new_index_val = base + stride * index_val;

  dalib_distribution_owner (new_index_val, lb, ub, top_id, mapping,
                            &top_dim, &top_pos);

  dalib_restrict_top_dim (kind, top_dim, top_pos);

#ifdef DEBUG
  printf ("%d: index (%d = %d), fix topology %d, dim = %d, pos = %d\n",
           pcb.i, index_dim, index_val, top_id, top_dim, top_pos);
#endif 

  /* fix also the element in the section, but change to local address */

  new_index_val = dalib_dist_local_addr (new_index_val, lb, ub, mapping);
  new_index_val -= base;

  dalib_local_set_dim (local_id[kind], index_dim, new_index_val);

} /* dalib_fix_topology */

/*******************************************************************
*                                                                  *
*   SETTING GLOBAL DATA OF ONE SECTION                             *
*                                                                  *
*      set_section (A (N,3:N, 1, 2:N-1), kind)                     *
*                                                                  *
*         array_dims[kind] : (1,3)                                 *
*                                                                  *
*******************************************************************/

static int dalib_set_section (section, kind)
section_info section;
int kind;

{ int array_rank, sec_rank;
  int i;

  SecDimInfo *sec_dim;

  section_id [kind] = section;
  array_id   [kind] = section->array_id;
 
  /* create a local descriptor for the local section data */

  dalib_local_dsp_create (local_id + kind, section);

  array_rank     = array_id[kind]->rank;
  sec_dim        = section_id[kind]->dimensions;

#ifdef DEBUG
  printf ("%d: set section %d, kind = %d, array_rank = %d, local_id = %d\n",
          pcb.i, section, kind, array_rank, local_id[kind]);
#endif 

       /*****************************************
       *    FIX Topology of this SECTION        *
       *****************************************/

  dalib_set_topology (kind);

       /*****************************************
       *    TRAVERSE SECTION by INDEX           *
       *****************************************/

  sec_rank = 0;

  for (i=0; i < array_rank; i++)

     { if (sec_dim->is_range == 1)

          { /* is a range */

            array_dims [kind][sec_rank] = i+1;
            sec_rank++;
          }

        else 

          { /* is single element, check if dim is distributed */

            dalib_fix_topology (kind, i+1, sec_dim->global_range[0]);

          }

       sec_dim++;
     }

#ifdef DEBUG
  printf ("%d: array_dims [%d] = ( ", pcb.i, kind);
  for (i=0; i<sec_rank; i++) printf (" %d", array_dims[kind][i]);
  printf (" )\n");
#endif 

  return (sec_rank);

} /* dalib_set_section */

/*******************************************************************
*                                                                  *
*   dalib_permute_source ()                                        *
*                                                                  *
*******************************************************************/

static void dalib_permute_source (assign_rank, perm_vals)

int assign_rank, perm_vals[];

{ int i;
  int help [MAX_DIMENSIONS];

  for (i=0; i < assign_rank; i++)
      help [i] = array_dims[SOURCE][i];

  for (i=0; i < assign_rank; i++)
      array_dims[SOURCE][i] = help [perm_vals[i]];

}  /* dalib_permute_source */

/*******************************************************************
*                                                                  *
*  static void dalib_check_sections (int rank)                     *
*                                                                  *
*   - verify that sections have the same shape                     *
*   - complicated by A(1,1:N) matches B(1:N,1)                     *
*                                                                  *
*******************************************************************/

static void dalib_check_sections (rank)
int rank;

{ SecDimInfo *source_dim, *target_dim;
  int i, sn, tn;
  int error;

  error = 0;

  for (i=0; (i<rank) && (!error); i++)

   { source_dim = section_id[SOURCE]->dimensions + array_dims[SOURCE][i] - 1;
     target_dim = section_id[TARGET]->dimensions + array_dims[TARGET][i] - 1;

     sn = dalib_range_size (source_dim->global_range[0],
                            source_dim->global_range[1],
                            source_dim->global_range[2]);
    
     tn = dalib_range_size (target_dim->global_range[0],
                            target_dim->global_range[1],
                            target_dim->global_range[2]);

     if (sn != tn) error = 1;
   }

   if (error)

     { dalib_internal_error ("assign of section mismatch");
       printf ("source section (of %s): \n",
                section_id[SOURCE]->array_id->name);
       dalib_print_section_info (section_id[SOURCE]);
       printf ("target section (of %s): \n",
                section_id[TARGET]->array_id->name);
       dalib_print_section_info (section_id[TARGET]);
       printf ("source section has size %d at dim %d\n", 
                sn, array_dims[SOURCE][i-1]);
       printf ("target section has size %d at dim %d\n", 
                tn, array_dims[TARGET][i-1]);
       if (is_permute)
          { printf ("permutation :  ");
            for (i=0; i < rank; i++) printf ("%d ", permute_vals[i] + 1);
            printf ("\n");
          }
       dalib_stop ();
     }

} /* dalib_check_sections */

/*******************************************************************
*                                                                  *
*   static void dalib_assign_move (int index_dim)                  *
*                                                                  *
*   - calculate structured dimensions schedule for 0<=dim<rank     *
*   - all data is accessed globally                                *
*                                                                  *
*******************************************************************/

static void dalib_assign_move (index_dim)

{ int source_array_dim, target_array_dim;
  SecDimInfo *sec_dim;

  DistDim t_mapping, s_mapping;

  int t_size [2];
  int s_size [2];

  int *t_section;
  int *s_section;

  int s_base, s_stride;
  int t_base, t_stride;

  int t_kind, t_top, t_dim, t_NP, t_id;
  int s_kind, s_top, s_dim, s_NP, s_id;

  int schedule_id;

  int N;
  int lb, ub, str;

  /* TARGET (...,t_low:t_up:t_str,....) = SOURCE (...,s_low:s_up:s_str,...)  */

       /*****************************************
       *    SOURCE DIM                          *
       *****************************************/

  source_array_dim = array_dims[SOURCE][index_dim];

  dalib_array_dim_mapping (array_id[SOURCE], source_array_dim,
                           &s_base, &s_stride,
                           s_size, s_size+1, &s_top, &s_mapping);

  dalib_dim_mapping_info (s_mapping, &s_kind, &s_dim);

  if (s_kind == kSERIAL_DIM)
     { s_NP = 1; s_id = 1; }
   else
     dalib_top_info (s_top, s_dim, &s_NP, &s_id);

  sec_dim = section_id[SOURCE]->dimensions + source_array_dim - 1;

  s_section = sec_dim->global_range;

       /*****************************************
       *    TARGET DIM                          *
       *****************************************/

  target_array_dim = array_dims[TARGET][index_dim];

  dalib_array_dim_mapping (array_id[TARGET], target_array_dim,
                           &t_base, &t_stride,
                           t_size, t_size+1, &t_top, &t_mapping);

  dalib_dim_mapping_info (t_mapping, &t_kind, &t_dim);

  if (t_kind == kSERIAL_DIM)
     { t_NP = 1; t_id = 1; }
   else
     dalib_top_info (t_top, t_dim, &t_NP, &t_id);

  sec_dim = section_id[TARGET]->dimensions + target_array_dim - 1;

  t_section = sec_dim->global_range;

#ifdef DEBUG
  printf ("%d: assign_move %d, target (top=%d,dim=%d,N=%d:%d,kind=%d), source (top=%d,dim=%d,N=%d:%d,kind=%d)\n",
           pcb.i, index_dim, t_top, t_dim, 
                             t_size[0], t_size[1], t_kind, 
                             s_top, s_dim, 
                             s_size[0], s_size[1], s_kind);
#endif

  /* if both dimensions are serial we can immediately return */

  if (t_kind == kSERIAL_DIM)
     { t_NP = 1; t_id = 1; }
   else
     dalib_top_info (t_top, t_dim, &t_NP, &t_id);

  if ((t_NP == 1) && (s_NP == 1)) return;

  dalib_get_move_schedule

               (&schedule_id,

                t_NP, t_id, t_mapping,
                t_base, t_stride,
                t_size, t_section,

                s_NP, s_id, s_mapping,
                s_base, s_stride,
                s_size, s_section);

#ifdef DEBUG
     dalib_print_schedule (schedule_id);
#endif 

  /* in any case we have now a new schedule */

  all_sched_ids   [total_schedules] = schedule_id;
  total_schedules++;

  if (t_NP != 1)
 
   { /* target dimension is distributed, so we have many sends */

     send_sched_ids  [send_schedules] = schedule_id;
     send_index_dims [send_schedules] = source_array_dim;
     send_top_dims   [send_schedules] = t_dim;
     send_schedules ++;

   }

   else

   { /* many -> one, target is one block, so we can fix send section */

     dalib_set_send_schedule (schedule_id, 1,
                              local_id[SOURCE], source_array_dim);

   }

  if (s_NP != 1)

   { /* source dimension is distributed, so we have many receives */

     recv_sched_ids  [recv_schedules] = schedule_id;
     recv_index_dims [recv_schedules] = target_array_dim;
     recv_top_dims   [recv_schedules] = s_dim;
     recv_schedules ++;

   } 

   else 

   { /* one -> many, so we can fix section where we receive         */

     dalib_set_recv_schedule (schedule_id, 1,
                              local_id[TARGET], target_array_dim);

   } /* one -> many */

} /* dalib_move_define */

/*******************************************************************
*                                                                  *
*  CIRCULAR shifting for replication of a distributed section      *
*                                                                  *
*******************************************************************/

       /************************************************
       *                                               *
       *  dalib_do_shift_send (int pid)                *
       *                                               *
       ************************************************/

static void dalib_do_shift_send (pid)

int pid;   /* processor that gets my section in the first place */

{ 
#ifdef DEBUG
   printf ("%d: shift_send to %d\n", pcb.i, pid);
   dalib_print_local_info (local_id[SOURCE]);
#endif

  if (is_permute)
     dalib_local_perm_send (pid, local_id[SOURCE], permute_vals);
   else
     dalib_local_send (pid, local_id[SOURCE]);

} /* dalib_do_shift_send */

/*******************************************************************
*                                                                  *
*  void dalib_do_shift_recv (int pid, int p_left, int p_right)     *
*                                                                  *
*   - section to be processed is the local one of processor pid    *
*   - I get the section of processor p_left                        *
*   - I send the section to processor p_right                      *
*                                                                  *
*******************************************************************/

static void dalib_do_shift_recv (pid, p_left, p_right)

int pid, p_left, p_right;

{ int top_id;                /* topology that sends data to be replicated */
  int top_rank;              /* rank of topology                          */

  int is_in_top;             /* 1 if pid belongs to sending topology      */

  int rel_pos [3];           /* relative coordinates in topology          */

  int s_id;                  /* schedule identifier                       */

  int lb, ub, str, N;
  int pos, ind_dim, top_dim;
  int i;

#ifdef DEBUG
   printf ("%d: dalib_do_shift_recv, section from %d\n", pcb.i, pid);
#endif

  /* get relative coordinate p1  */

  top_id    = topology[SOURCE];
  top_rank  = dalib_top_rank (top_id);

  dalib_top_position (top_id, pid, &is_in_top, rel_pos);

  /* now identify the section I have to receive */

  if (is_in_top)

     { for (i=0; i<recv_schedules; i++)

         { s_id    = recv_sched_ids [i];
           ind_dim = recv_index_dims[i];
           top_dim = recv_top_dims  [i];

           pos     = rel_pos[top_dim-1];

           dalib_set_recv_schedule (s_id, pos, local_id[TARGET], ind_dim);
       
#ifdef DEBUG
  printf ("%d: top_dim = %d, pos = %d, s_id = %d, dim = %d, range = %d:%d:%d\n",
           pcb.i, top_dim, pos, s_id, ind_dim, lb, ub, str);
#endif
         }

#ifdef DEBUG
       printf ("%d: recv section (origin from %d) from %d, send to %d\n", 
               pcb.i, pid, p_left, p_right);
       dalib_print_local_info (local_id[TARGET]);
#endif

       dalib_local_recv (p_left, local_id[TARGET]);
       if (p_right > 0)   
          dalib_local_send (p_right, local_id[TARGET]); 
     }

} /* dalib_do_shift_recv */

/*******************************************************************
*                                                                  *
*  void dalib_do_send (int schedule_depth)                         *
*                                                                  *
*  - sends section[SOURCE] to topology [TARGET]                    *
*                                                                  *
*  - depth = 0, everything is fixed, so send directly              *
*  - depth > 0, fix section in one dimension, recursive call       *
*                                                                  *
*******************************************************************/

static void dalib_do_send (n)

int n;

{ int ref_pid;

  int pid, p_low, p_high;

  int lb, ub, str, N;
  int okay;

  int s_id;         /* schedule identifier */
  int index_dim;

  int top_id, top_dim, top_dist;

  ref_pid = topology_ref[TARGET];


#ifdef DEBUG
       printf ("%d: do send (nest=%d) from %d\n", pcb.i, n, ref_pid);
#endif

  if (n == 0)

     { int top_pid;

       top_pid = topology_ids[TARGET][ref_pid];

       if (is_permute)
          dalib_local_perm_send (top_pid, local_id[SOURCE], permute_vals);
        else
          dalib_local_send (top_pid, local_id[SOURCE]);

       return;
     }

  top_id   = topology[TARGET];
  top_dim  = send_top_dims[n-1];
  top_dist = dalib_top_distance (top_id, top_dim);

  index_dim = send_index_dims[n-1];
  s_id      = send_sched_ids [n-1];

  dalib_schedule_psends (s_id, &p_low, &p_high);

  for (pid = p_low; pid <= p_high; pid++)

    { dalib_set_send_schedule (s_id, pid, local_id[SOURCE], index_dim);

      topology_ref[TARGET] = ref_pid + (pid - 1) * top_dist;

      dalib_do_send (n-1);   /* recursive call */

    }

  topology_ref[TARGET] = ref_pid;

} /* dalib_do_send */

/*******************************************************************
*                                                                  *
*  RECEIVING LOOP NEST                                             *
*                                                                  *
*   note : receive data from topology related to SOURCE            *
*                                                                  *
*******************************************************************/

       /************************************************
       *                                               *
       *  do_recv  (int n)                             *
       *                                               *
       ************************************************/

static void dalib_do_recv (n)

int n;

{ int ref_pid;

  int pid, p_low, p_high;

  int s_id;         /* schedule identifier */
  int index_dim;

  int top_id, top_dim, top_dist;

  ref_pid = topology_ref[SOURCE];

#ifdef DEBUG
  printf ("%d: do recv (nest = %d) from %d\n", pcb.i, n, ref_pid);
#endif

  if (n == 0)

     { /* local section to be to received is completely fixed */

       int top_pid;

       top_pid = topology_ids[SOURCE][ref_pid];

       dalib_local_recv (top_pid, local_id[TARGET]);

       return;
     }

  top_id   = topology[SOURCE];
  top_dim  = recv_top_dims[n-1];
  top_dist = dalib_top_distance (top_id, top_dim);

  index_dim = recv_index_dims[n-1];
  s_id      = recv_sched_ids [n-1];

  dalib_schedule_precvs (s_id, &p_low, &p_high);

  for (pid = p_low; pid <= p_high; pid++)

    { dalib_set_recv_schedule (s_id, pid, local_id[TARGET], index_dim);

      topology_ref[SOURCE] = ref_pid + (pid - 1) * top_dist;

      dalib_do_recv (n-1);   /* recursive call */

    }

  topology_ref[SOURCE] = ref_pid;

} /* dalib_do_recv */

/*******************************************************************
*                                                                  *
*  static void dalib_make_local_dim (SecDimInfo *source_dim,       *
*                                    SecDimInfo *target_dim)       *
*                                                                  *
*  - restrict range of source section (from replicated array)      *
*    to the local part of the target section                       *
*                                                                  *
*******************************************************************/
 
static void dalib_make_local_dim (source_dim, target_dim)

SecDimInfo *source_dim, *target_dim;

{ int *s_range;

  /* find the part in the source that correponds to local part of target */

  s_range = source_dim->local_range;

  dalib_map1_section (target_dim->global_range, source_dim->global_range,
                      target_dim->local_range, s_range);

#ifdef DEBUG
  printf ("%d: local part of source section is %d:%d:%d\n",
           pcb.i, s_range[0], s_range[1], s_range[2]);
#endif

} /* dalib_make_local_dim */

/*******************************************************************
*                                                                  *
*  static void dalib_do_rep_assign (section_info target_section,   *
*                                   section_info source_section,   *
*                                   int rank)                      *
*                                                                  *
*  ASSIGNMENT FOR ARRAY SECTIONS: target_section = source-section  *
*                                                                  *
*  Special case: source section is replicated (no communication)   *
*                                                                  *
*  Hint: usually not called as no communication required (but      *
*        likely for inherited distributions)                       *
*                                                                  *
*******************************************************************/

static void dalib_do_rep_assign (target_section, source_section, rank)
 
section_info target_section;
section_info source_section;
int rank;
 
{ SecDimInfo *source_dim, *target_dim;

  int i;

#ifdef DEBUG
  printf ("%d: source is replicated, so make local assign\n", pcb.i);
#endif

  for (i=0; i<rank; i++)
 
   { source_dim = section_id[SOURCE]->dimensions + array_dims[SOURCE][i] - 1;
     target_dim = section_id[TARGET]->dimensions + array_dims[TARGET][i] - 1;
 
     /* in the source the global range and local range are equal,
        but we have to restrict to the local part according to target */

     dalib_make_local_dim (source_dim, target_dim);

   }

  /* local assignment : target_section = source_section */

  dalib_secarray_copy (target_section, source_section);

  dalib_section_reset (source_section);

} /* dalib_do_rep_assign */

/*******************************************************************
*                                                                  *
*  static void dalib_do_assign (section_info target_section,       *
*                               section_info source_section,       *
*                               int rank)                          *
*                                                                  *
*  ASSIGNMENT FOR ARRAY SECTIONS: target_section = source-section  *
*                                                                  *
*******************************************************************/

static void dalib_do_assign (target_section, source_section, rank)

section_info target_section;
section_info source_section;
int rank;

{ int i;

       /*****************************************
       *    check the two sections for compatib *
       *****************************************/

   dalib_check_sections (rank);

       /*****************************************
       *    special case : source replicted     *
       *****************************************/

#ifdef DEBUG
   printf ("%d: do assign, check for replicated source (source_top = %d)\n",
            pcb.i, topology[SOURCE]);
#endif

   if (topology[SOURCE] == 0)

      { dalib_do_rep_assign (target_section, source_section, rank);
        return;
      }

       /*****************************************
       *    compute the structured schedules    *
       *****************************************/

  send_schedules  = 0;
  recv_schedules  = 0;
  total_schedules = 0;   /* count all schedules for free */

  for (i=0; i<rank; i++)
     dalib_assign_move (i);

#ifdef DEBUG
  printf ("%d: dalib_assign_moves, schedules send = %d, recv =%d, total = %d\n",
           pcb.i, send_schedules, recv_schedules, total_schedules);
#endif

       /*****************************************
       *    special case : target replicted     *
       *                   source distributed   *
       *****************************************/

  /* recv_schedules == 0 : only one processor will send, so prefer
     a broadcast

     ATTENTION: the following will not work for recv_schedules == 0
     ATTENTION: works only fine if replicated among all processors

  */

  if (   (topology[TARGET] == 0) 
      && (topology_full[SOURCE])
      && (recv_schedules > 0) 
      && (dalib_context_group () == 0)                     )

    { int i, pid;
      int p_left, p_right;

      p_left  = pcb.i - 1; if (p_left  < 1    ) p_left  = pcb.p;
      p_right = pcb.i + 1; if (p_right > pcb.p) p_right = 1;

      /* every processor will send its section to the next processor */

      if (topology_mask[SOURCE])

         { if (send_schedules == 0)

              dalib_do_shift_send (pcb.i);  /* send at first to me */

            else
 
              { dalib_internal_error ("assign to replicated section");
                dalib_stop ();
              }
         }

       else

         { /* send a dummy value */

#ifdef DEBUG
           printf ("%d: I have not to send anything for shift replication\n", 
                   pcb.i);
#endif 
         }

      /* every processor receives the sections                   */

      pid = pcb.i;    /* the first time I get section from me */


      for (i=0; i<pcb.p; i++)

        { dalib_section_reset (target_section);  /* was necessary on SP2 ? */

          if (pid == pcb.i)

             { /* my section I get from myself */

              if (p_right != pid)
                 dalib_do_shift_recv (pid, pid, p_right);
               else
                 dalib_do_shift_recv (pid, pid, 0);
             }

           else

             { /* the other sections I get from my left neighbour */

               if (p_right != pid)
                  dalib_do_shift_recv (pid, p_left, p_right);
                else
                  dalib_do_shift_recv (pid, p_left, 0);
             }

          pid = pid - 1;
          if (pid < 1) pid = pcb.p;
        }
 
      /* free the schedules                                      */

      for (i=total_schedules-1; i >= 0; i--)
         dalib_free_schedule (all_sched_ids[i]);

      return;

    }
  
       /*****************************************
       *    send data of the source section     *
       *****************************************/

#ifdef DEBUG
  printf ("%d: dalib_do_assign, send data (schedules = %d), mask = %d\n",
           pcb.i, send_schedules, topology_mask[SOURCE]);
#endif 

  if (topology_mask[SOURCE])

  { /* this processor has source data to be sent */

     dalib_do_send (send_schedules);

  } /* send */

       /*****************************************
       *   receive part                         *
       *****************************************/

#ifdef DEBUG
  printf ("%d: dalib_do_assign, recv data (schedules = %d), mask = %d\n",
           pcb.i, recv_schedules, topology_mask[TARGET]);
#endif 

  if (topology_mask[TARGET])

  { /* this processor has target data to be received */

     dalib_do_recv (recv_schedules);

  } /* receive */

       /*****************************************
       *   broadcast of all replicated data     *
       *****************************************/

#ifdef DEBUG
   printf ("broadcast part\n");
#endif

   dalib_section_reset (target_section);
   dalib_replicate (target_section);

       /*****************************************
       *    free all schedules                  *
       *****************************************/

#ifdef DEBUG
   printf ("free part\n");
#endif

  for (i=total_schedules-1; i >= 0; i--)
     dalib_free_schedule (all_sched_ids[i]);

} /* dalib_do_assign */

/*******************************************************************
*                                                                  *
* void FUNCTION(dalib_assign)                                      *
*                                                                  *
*            (array_info/section_info *target_section,             *
*             array_info/section_info *source_section,             *
*                                                                  *
*******************************************************************/

void FUNCTION(dalib_assign) (target_section, source_section) 

section_info *target_section;
section_info *source_section;

{ int assign_rank; 
  section_info source_sec, target_sec;

#ifdef DEBUG
   printf ("%d: dalib_assign, section %d = section %d\n",
           pcb.i, *target_section, *source_section);
#endif

   dalib_section_full (&source_sec, *source_section);
   dalib_section_full (&target_sec, *target_section);

       /*****************************************
       *    set infos about section globally    *
       *****************************************/

  assign_rank = dalib_set_section (source_sec, SOURCE);
  if (dalib_set_section (target_sec, TARGET) != assign_rank)
     dalib_internal_error ("dalib_assign");

  is_permute = 0;

  dalib_do_assign (target_sec, source_sec, assign_rank);

  dalib_local_dsp_free (local_id[SOURCE]);
  dalib_local_dsp_free (local_id[TARGET]);

  if (dalib_is_array_info (*target_section))
     FUNCTION(dalib_section_free) (&target_sec); 
 
  if (dalib_is_array_info (*source_section))
     FUNCTION(dalib_section_free) (&source_sec); 
 
} /* dalib_assign */

/*******************************************************************
*                                                                  *
* void FUNCTION(dalib_assign_permute)                              *
*                                                                  *
*            (array_info/section_info *target_section,             *
*             array_info/section_info *source_section,             *
*             int *p1, ..., int *p7)                               *
*                                                                  *
*******************************************************************/

void FUNCTION(dalib_assign_permute) (target_section, source_section, 
                            p1, p2, p3, p4, p5, p6, p7)

section_info *target_section;
section_info *source_section;

int *p1, *p2, *p3, *p4, *p5, *p6, *p7;

{ int assign_rank;
  section_info target_sec, source_sec;

#ifdef DEBUG
   printf ("%d: dalib_assign_permute, section %d = section %d\n",
           pcb.i, *target_section, *source_section);
#endif

   dalib_section_full (&source_sec, *source_section);
   dalib_section_full (&target_sec, *target_section);
 
       /*****************************************
       *    set infos about section globally    *
       *****************************************/

  assign_rank = dalib_set_section (source_sec, SOURCE);

  if (dalib_set_section (target_sec, TARGET) != assign_rank)
     dalib_internal_error ("dalib_assign_permute");

  is_permute = 1;

  /* note : FORTRAN permutation between 1 and n, but 0 .. n-1 in C */

  switch (assign_rank)  {

    case 7 : permute_vals[6] = *p7 - 1;
    case 6 : permute_vals[5] = *p6 - 1;
    case 5 : permute_vals[4] = *p5 - 1;
    case 4 : permute_vals[3] = *p4 - 1;
    case 3 : permute_vals[2] = *p3 - 1;
    case 2 : permute_vals[1] = *p2 - 1;
    case 1 : permute_vals[0] = *p1 - 1;
    case 0 : break;

  } /* switch */

  /* permute index information of source section */

  dalib_permute_source (assign_rank, permute_vals); 

  dalib_do_assign (target_sec, source_sec, assign_rank);

  dalib_local_dsp_free (local_id[SOURCE]);
  dalib_local_dsp_free (local_id[TARGET]);

  /* delete section descriptors if they have been created */

  if (dalib_is_array_info (*target_section))
     FUNCTION(dalib_section_free) (&target_sec); 
 
  if (dalib_is_array_info (*source_section))
     FUNCTION(dalib_section_free) (&source_sec); 
 
} /* FUNCTION(dalib_assign_permute) */ 

