/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, SCAI.LAB                        *
*                                                                         *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Nov 94                                                   *
*  Last Update : Oct 97                                                   *
*                                                                         *
*  This Module is part of the DALIB                                       *
*                                                                         *
*  Module      : overlap1.m4                                              *
*                                                                         *
*  Function: Updating of overlap areas by the descriptor                  *
*                                                                         *
*  Export :  DALIB Interface                                              *
*  =========================                                              *
*                                                                         *
*  Export :  FORTRAN Interface                                            *
*  ===========================                                            *
*                                                                         *
* void FUNCTION(dalib_overlap_update) (section_id, size,                  *
*                        lb1, rb1, lb2, rb2, lb3, rb3,                    *
*                        lb4, rb4, lb5, rb5, lb6, rb6, lb7, rb7)          *
*                                                                         *
*    - updates the corresponding area of the overlap area                 *
*    - works for serial and distributed arrays                            *
*                                                                         *
*  Open problems:                                                         *
*                                                                         *
*    - uniproc model, this module is necessary but calls topolgy/comm     *
*    - error handling if overlap area is too small                        *
*    - error handling if not block distribution for the overlap dim       *
*    - it should be possible to work with smaller arrays                  *
*                                                                         *
*  Updates                                                                *
*  =======                                                                *
*                                                                         *
*   10/97 : using new mapping routines of distribution                    *
*                                                                         *
**************************************************************************/

#include <stdio.h>
#include "dalib.h"

#undef DEBUG
#define CHECK

/**************************************************************************
*                                                                         *
* make a special data structure describing the section of an array        *
*                                                                         *
**************************************************************************/

   /* some date used globally for convenience */

static DimInfo    *array_dimensions;
static SecDimInfo *section_dimensions;

/**************************************************************************
*                                                                         *
*   set the section/array informations globally                           *
*                                                                         *
**************************************************************************/

static void set_sub_section (section_id)

section_info section_id;

{ array_info   array_id;

  int i, rank, lb, ub, str;

  array_id = section_id->array_id;

  array_dimensions   = array_id->dimensions;
  section_dimensions = section_id->dimensions;

  /* negative strides are not possible here */

  rank = array_id->rank;
  for (i=0; i<rank; i++)
     if (section_dimensions[i].global_range[2] < 0)
       { lb  = section_dimensions[i].global_range [1];
         ub  = section_dimensions[i].global_range [0];
         str = section_dimensions[i].global_range [2];
         section_dimensions[i].global_range [0] = lb;
         section_dimensions[i].global_range [1] = ub;
         section_dimensions[i].global_range [2] = -str;
         lb  = section_dimensions[i].local_range [1];
         ub  = section_dimensions[i].local_range [0];
         str = section_dimensions[i].local_range [2];
         section_dimensions[i].local_range [0] = lb;
         section_dimensions[i].local_range [1] = ub;
         section_dimensions[i].local_range [2] = -str;
       }
  
} /* set_sub_section */

/**************************************************************************
*                                                                         *
*   global data for sending and receiving data types / processors         *
*                                                                         *
**************************************************************************/

static dd_type global_ddt_send, global_ddt_recv;

static int     global_send_set;
static int     global_recv_set;
static int     global_send_section [3];
static int     global_recv_section [3];

static void do_communication ()

{ 
#ifdef DEBUG
  printf ("overlap requires following data movement :\n");
  printf ("copy the following ddt : \n");
  dalib_ddt_print (global_ddt_send);
  printf ("to the following ddt : \n");
  dalib_ddt_print (global_ddt_recv);
#endif

  dalib_ddt_move (global_ddt_recv, global_ddt_send);

  dalib_ddt_free (global_ddt_send);
  dalib_ddt_free (global_ddt_recv);

} /* do_communication */

/*******************************************************************
*                                                                  *
*  void dalib_shift_section (int overlap_section [2],              *
*                            int t_1, int t_n, int ov_update)      *
*                                                                  *
*   - tests for circular shifting                                  *
*   - [1:N], 1  -> 0:N-1 are updated values                        *
*                                                                  *
*******************************************************************/

static void dalib_shift_section (overlap_section, t_1, t_N, ov_update)

  /* test for circular shifting, e.g.
     overlap update [1:N], 1  -> 0:N-1 are updated values */

int overlap_section[];
int t_1, t_N;
int ov_update;

{ if (ov_update > 0)

   { /* left overlap, make sure that last processor sends correctly */

     if (overlap_section[0] < t_1)
       overlap_section[1] += (t_1 - overlap_section[0]); 

           /* updated values must come from right */
   }

   else if (ov_update < 0)

   { /* right overlap, make sure that first processor sends correctly */

     if (t_N < overlap_section[1])
       overlap_section[0] -= (overlap_section[1] - t_N); 

           /* updated values must come from left */
   }

} /* dalib_shift_section */

/*******************************************************************
*                                                                  *
*                                                                  *
*******************************************************************/

static void dalib_set_comm_parts (my_section, ov_update,
                                  left_side, right_side)

int my_section[];
int left_side [];
int right_side [];

{ int my_low, my_up;
  int val;               /* absolute value of ov_update */

  /*  ov_update > 0 :  update left overlap, implies sendingt to the right
      ov_update < 0 :  update right overlap, implies sending to the left   */

  val = ov_update;
  if (ov_update < 0) val = -ov_update;

  my_low = my_section[0];
  my_up  = my_section[1];
   
  if (ov_update > 0)

    { /* left overlap update */

     /***********************************************************
     *                                                          *
     *         lb  ....  ub-val+1 ... ub                        *
     *         -   -   -   x   x   x   x                        *
     *         -   -   -   x   x   x   x                        *
     *                                                          *
     ***********************************************************/

      right_side [0] = my_up - val + 1;
      right_side [1] = my_up;

      if (right_side[0] < my_low)

         { /* I have not all values for my left neighbor */

           dalib_internal_error ("overlap too big");
           dalib_stop ();
         }


      /***********************************************************
      *                                                          *
      *         lb-val ... lb-1 lb  ... ub                       *
      *            x  x ..   x   -  ... -                        *
      *                                                          *
      ***********************************************************/
 
      left_side [0] = my_low - val;
      left_side [1] = my_low - 1;

    }

  else

   { /***********************************************************
     *                                                          *
     *  - send left part of my local part to the left proc.     *
     *                                                          *
     *         lb  .... lb+val-1  ... ub                        *
     *         x   x   x   x   -  -    -                        *
     *         x   x   x   x   -  -    -                        *
     *                                                          *
     ***********************************************************/
 
      left_side [0] = my_low;
      left_side [1] = my_low + val - 1;

      if (left_side[1] > my_up)

         { /* I have not all values for my left neighbor */

           dalib_internal_error ("overlap too big");
           dalib_stop ();
         }

      /***********************************************************
      *                                                          *
      *          lb  ....    ub  ub+1  ...  ub+val               *
      *           -  -   -   -     x    x     x                  *
      *           -  -   -   -     x    x     x                  *
      *                                                          *
      ***********************************************************/
 
      /* recv data in my right overlap area, should be enough   */
 
      right_side [0] = my_up + 1;
      right_side [1] = my_up + val;

   }
  
} /* dalib_set_comm_parts */
 
/*******************************************************************
*                                                                  *
*  dalib_ov_move                                                   *
*                                                                  *
*   - update of overlap area                                       *
*                                                                  *
*         P =  1     NId       NP          block distr on NP procs * 
*                                                                  *
*     t_1 .......................  t_N     template extent         *
*                                                                  *
*******************************************************************/
 
void dalib_ov_move (t_1, t_N, 
                    array_section, ov_pos)

int t_1, t_N;

int array_section[3];        /* section for which we calculate overlap */
int ov_pos;

{ int overlap_section[3];    /* section that needs the update         */

  int global_size[2];

  int my_section[3];

  int right_side[2];
  int left_side[2];

  int send_section[3];
  int recv_section[3];

  int val;
  int ov_update;

#ifdef DEBUG
  printf ("ov_move, update = %d, sec = %d:%d, in %d:%d\n",
           ov_pos, array_section[0], array_section[1], 
           t_1, t_N);
#endif

  ov_update = ov_pos;
  val = ov_update;
  if (ov_update < 0) val = -ov_update;

  /* check that overlap is not too big */

  if ((t_N - t_1 + 1) < val)

    { dalib_internal_error ("overlap area too small");
      printf ("dim size = %d:%d, overlap = %d\n",
               t_1, t_N, val);
      dalib_stop();
    }

  /*  ov_update > 0 :  update left overlap, implies sendingt to the right
      ov_update < 0 :  update right overlap, implies sending to the left  

      sec_low - ov_update : sec_high - ov_update  provides this data        */

  /* compute the section in the template that provides the overlap          */

  overlap_section[0] = array_section[0] - ov_update;
  overlap_section[1] = array_section[1] - ov_update;
  overlap_section[2] = 1;

  dalib_shift_section (overlap_section, t_1, t_N, ov_update);

  /* get in my_low and my_up my local and upper boundary */

  global_size [0] = t_1;
  global_size [1] = t_N;

  my_section[0] = global_size[0];
  my_section[1] = global_size[1];
  my_section[2] = 1;

  global_send_set = 0;
  global_recv_set = 0;

  dalib_set_comm_parts (my_section, ov_update, left_side, right_side);

  if (ov_update > 0)

   { /* left overlap update, receive from left side, send to right side */

      dalib_intersect_range_slice (overlap_section, right_side,
                                   send_section);

      global_send_set = 1;
      global_send_section [0] = send_section[0];
      global_send_section [1] = send_section[1];
      global_send_section [2] = send_section[2];

      /*  receive left side from my left processor */
 
      dalib_intersect_range_slice (overlap_section, left_side,
                                   recv_section);

      /* maybe that I have to receive it from more than one processor */

      global_recv_set = 1;
      global_recv_section [0] = recv_section[0];
      global_recv_section [1] = recv_section[1];
      global_recv_section [2] = recv_section[2];

      
    }

  else

   { /* send left part of my local part to the left proc.       */

      dalib_intersect_range_slice (overlap_section, left_side,
                                   send_section);
 
      /* send it to the left neighbor */

      global_send_set = 1;
      global_send_section [0] = send_section[0];
      global_send_section [1] = send_section[1];
      global_send_section [2] = send_section[2];

      /* recv data in my right overlap area  from right processor */
 
      dalib_intersect_range_slice (overlap_section, right_side,
                                   recv_section);

      /* recv from the right neighbor */

      global_recv_set = 1;
      global_recv_section [0] = recv_section[0];
      global_recv_section [1] = recv_section[1];
      global_recv_section [2] = recv_section[2];
 
   }
  
} /* dalib_ov_move */

/**************************************************************************
*                                                                         *
* get_ov_section (array_id, ov_dim, val)                                  *
*                                                                         *
* let array be A [lb1:ub1, lb2:ub2, ..., lbn:ubn]                         *
*                                                                         *
* result is then a derived data type for exactly this section:            *
*                                                                         *
* CASE 1 : val > 0  means update of left overlap area                     *
*                                                                         *
*   send A [lb1:ub1, ...,ub-val+1:ub, ..., lbn:ubn] to right              *
*   recv A [lb1:ub1, ...,lb-val:lb-1, ..., lbn:ubn] from left             *
*                                                                         *
* CASE 2 : val < 0  means update of right overlap area                    *
*                                                                         *
*   send A [lb1:ub1, ...,lb:lb-val-1, ..., lbn:ubn] to left               *
*   recv A [lb1:ub1, ...,ub+1:ub+val, ..., lbn:ubn] from right            *
*                                                                         *
*  note : works globally with subsection                                  *
*                                                                         *
**************************************************************************/

static void get_ov_section (section_id, size, ov_dim, val)

section_info section_id;
int ov_dim;
int val;
int size;

{ int save_lb, save_ub;

  int global_section[3];

  int top_id, top_dim;
  int t_base, t_stride;
  int t_low, t_up, kind;

  DistDim t_mapping;

  int lb, ub, str, n;
  int pid, p_low, p_high, pid_ref;
  int NP, NId, distance;

  int *local_section;

  if (val == 0) return;

  global_section[0] = section_dimensions[ov_dim].global_range[0];
  global_section[1] = section_dimensions[ov_dim].global_range[1];
  global_section[2] = 1;    /* do not care about too much data */

  /* set local_section as pointer to the local section size */

  local_section = section_dimensions[ov_dim].local_range;

  save_lb = local_section[0];
  save_ub = local_section[1];

#ifdef DEBUG
  printf ("%d: ov update, sec=%d (%d bytes), dim=%d, pos=%d\n",
           pcb.i, section_id, size, ov_dim+1, val);
#endif

  dalib_array_dim_mapping (section_id->array_id, ov_dim+1,
                           &t_base, &t_stride, &t_low, &t_up, 
                           &top_id, &t_mapping);

  dalib_dim_mapping_info (t_mapping, &kind, &top_dim);


  NP = 1; NId = 1;
  pid_ref  = pcb.i;
  distance = 0;

#ifdef DEBUG
  printf ("%d: topology (id=%d,dim=%d), I am %d/%d, dist = %d\n",
          pcb.i, top_id, top_dim, NId, NP, distance);
#endif

  dalib_ov_move (t_low, t_up,
                 global_section, val);

#ifdef DEBUG
  printf ("computed schedule for overlap udpate\n");
#endif

  global_ddt_send = NO_DDT;
  global_ddt_recv = NO_DDT;

  if (global_send_set)

    { lb  = global_send_section[0];
      ub  = global_send_section[1];
      str = global_send_section[2];

      local_section [0] = lb; 
      local_section [1] = ub;

#ifdef DEBUG
      printf ("send the range %d:%d\n", lb, ub);
      dalib_print_section_info (section_id);
#endif
      dalib_make_section_ddt (&global_ddt_send, section_id, size); 
    }

  if (global_recv_set)

    { lb  = global_recv_section[0];
      ub  = global_recv_section[1];
      str = global_recv_section[2];

      local_section [0] = lb; 
      local_section [1] = ub;

#ifdef DEBUG
      printf ("recv the range %d:%d\n", lb, ub);
      dalib_print_section_info (section_id);
#endif
      dalib_make_section_ddt (&global_ddt_recv, section_id, size); 
    }

  do_communication ();
 
  local_section [0] = save_lb;
  local_section [1] = save_ub;

#ifdef DEBUG
  printf ("%d: my new local size is %d - %d\n", pcb.i,
           section_dimensions[ov_dim].local_range[0],
           section_dimensions[ov_dim].local_range[1]);
#endif
 
} /* get_ov_section */

/**************************************************************************
*                                                                         *
*   static void dalib_fictive_local_size (array_id, dim => lb, ub)        *
*                                                                         *
**************************************************************************/

static void dalib_fictive_local_size (array_id, dim, lb, ub)
array_info array_id;
int dim;
int *lb, *ub;

{ *lb = array_id->dimensions[dim].global_size[0];
  *ub = array_id->dimensions[dim].global_size[1];

} /* dalib_fictive_local_size */

/**************************************************************************
*                                                                         *
*   static void extend_section (section_id, dim, left, right)             *
*                                                                         *
*   section_id->dim[dim].local_range :  sec_lb : sec_ub                   *
*                                                                         *
*   - sec_lb : sec_ub is local part inclusive overlap size                *
*   - this local size                                                     *
*   - this is absolutely necessary for updates in more dimensions         *
*                                                                         *
*  NOTE : local part of one dimension is not considered for               *
*         update in this dimension, but for all others                    *
*                                                                         *
**************************************************************************/

static void extend_section (section_id, dim, left, right)

section_info section_id;
int dim, left, right;

{ SecDimInfo *sec_dim;
  int        local_lb, local_ub;

  if (left + right == 0) return;

  sec_dim   = section_id->dimensions + dim;
 
  /* extend the local size, but only this one */

  dalib_fictive_local_size (section_id->array_id, dim, &local_lb, &local_ub);

  dalib_r_s_intersect (sec_dim->global_range[0] - left,
                       sec_dim->global_range[1] + right,
                       sec_dim->global_range[2],
                       local_lb - left,
                       local_ub + right,
                       sec_dim->local_range+0,
                       sec_dim->local_range+1,
                       sec_dim->local_range+2);

  sec_dim->is_range = 1;  /* important: now lb : ub */

  /* Question: what happens to aligned arrays that have no real elements
               but have an overlap at processor boundaries              

     Solution: array_dim->local_size should be a fictive size 

  */

#ifdef DEBUG
  printf ("%d: local section at dim %d extended by [%d:%d] to %d:%d:%d\n",
          pcb.i, dim, left, right, sec_dim->local_range[0],
          sec_dim->local_range[1], sec_dim->local_range[2]);
#endif

} /* extend_section */

/**************************************************************************
*                                                                         *
* void FUNCTION(dalib_overlap_update) (array_id, lov1, rov1, lov2, rov2, * 
*                              lov3, rov3, lov4, rov4, lov5, rov5,        *
*                                          lov6, rov6, lov7, rov7)        *
*                                                                         *
**************************************************************************/

void FUNCTION(dalib_overlap_update) (section_id, 
                            lov1, rov1, lov2, rov2, lov3, rov3,
                            lov4, rov4, lov5, rov5, lov6, rov6, lov7, rov7)

section_info *section_id; 
int *lov1, *rov1, *lov2, *rov2, *lov3, *rov3, *lov4, *rov4,
    *lov5, *rov5, *lov6, *rov6, *lov7, *rov7;

{ int      rank;
  int      size;

  array_info array_id;

#ifdef DEBUG
  printf ("dalib_overlap_update\n");
  dalib_print_section_info (*section_id);
#endif 

  array_id = (*section_id)->array_id;

  /* Attention: overlap cannot be defined after reserving memory */

  rank = array_id->rank;
  size = array_id->size;

  set_sub_section (*section_id);

  /* note: new data in overlap area in one dimension will be
           added to the local range of section_id            */

  switch (rank) {

     case 7 : extend_section  (*section_id, 6, *lov7, *rov7);
     case 6 : extend_section  (*section_id, 5, *lov6, *rov6);
     case 5 : extend_section  (*section_id, 4, *lov5, *rov5);
     case 4 : extend_section  (*section_id, 3, *lov4, *rov4);
     case 3 : extend_section  (*section_id, 2, *lov3, *rov3);
     case 2 : extend_section  (*section_id, 1, *lov2, *rov2);
     case 1 : extend_section  (*section_id, 0, *lov1, *rov1);

  } /* switch */

  switch (rank) {

     case 7 : { get_ov_section  (*section_id, size, 6, *lov7);
                get_ov_section (*section_id, size, 6, -(*rov7));
              }
     case 6 : { get_ov_section  (*section_id, size, 5, *lov6);
                get_ov_section (*section_id, size, 5, -(*rov6));
              }
     case 5 : { get_ov_section  (*section_id, size, 4, *lov5);
                get_ov_section (*section_id, size, 4, -(*rov5));
              }
     case 4 : { get_ov_section  (*section_id, size, 3, *lov4);
                get_ov_section (*section_id, size, 3, -(*rov4));
              }
     case 3 : { get_ov_section  (*section_id, size, 2, *lov3);
                get_ov_section (*section_id, size, 2, -(*rov3));
              }
     case 2 : { get_ov_section  (*section_id, size, 1, *lov2);
                get_ov_section (*section_id, size, 1, -(*rov2));
              }
     case 1 : { get_ov_section  (*section_id, size, 0, *lov1);
                get_ov_section (*section_id, size, 0, -(*rov1));
              }

  } /* switch */

#ifdef DEBUG
  printf ("%d: overlap update finished\n", pcb.i);
#endif 

} /* FUNCTION(dalib_overlap_update) */ 
