/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, SCAI.LAB                        *
*                                                                         *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Nov 94                                                   *
*  Last Update : Oct 97                                                   *
*                                                                         *
*  This Module is part of the DALIB                                       *
*                                                                         *
*  Module      : overlap.m4                                               *
*                                                                         *
*  Function: Updating of overlap areas by the descriptor                  *
*                                                                         *
*  Export :  DALIB Interface                                              *
*  =========================                                              *
*                                                                         *
*  Export :  FORTRAN Interface                                            *
*  ===========================                                            *
*                                                                         *
* void FUNCTION(dalib_overlap_update) (section_id, size, * 
*                        lb1, rb1, lb2, rb2, lb3, rb3,                    *
*                        lb4, rb4, lb5, rb5, lb6, rb6, lb7, rb7)          *
*                                                                         *
*    - updates the corresponding area of the overlap area                 *
*    - works for serial and distributed arrays                            *
*                                                                         *
*  Open problems:                                                         *
*                                                                         *
*    - uniproc model, this module is necessary but calls topolgy/comm     *
*    - error handling if overlap area is too small                        *
*    - error handling if not block distribution for the overlap dim       *
*    - it should be possible to work with smaller arrays                  *
*                                                                         *
*  Updates                                                                *
*  =======                                                                *
*                                                                         *
*   10/97 : using new mapping routines of distribution                    *
*                                                                         *
**************************************************************************/

#include <stdio.h>
#include "dalib.h"

#undef DEBUG
#define CHECK

/**************************************************************************
*                                                                         *
* make a special data structure describing the section of an array        *
*                                                                         *
**************************************************************************/

   /* some date used globally for convenience */

static DimInfo    *array_dimensions;
static SecDimInfo *section_dimensions;

/**************************************************************************
*                                                                         *
*   set the section/array informations globally                           *
*                                                                         *
**************************************************************************/

static void set_sub_section (section_id)

section_info section_id;

{ array_info   array_id;

  int i, rank, lb, ub, str;

  array_id = section_id->array_id;

  array_dimensions   = array_id->dimensions;
  section_dimensions = section_id->dimensions;

  /* negative strides are not possible here */

  rank = array_id->rank;
  for (i=0; i<rank; i++)
     if (section_dimensions[i].global_range[2] < 0)
       { lb  = section_dimensions[i].global_range [1];
         ub  = section_dimensions[i].global_range [0];
         str = section_dimensions[i].global_range [2];
         section_dimensions[i].global_range [0] = lb;
         section_dimensions[i].global_range [1] = ub;
         section_dimensions[i].global_range [2] = -str;
         lb  = section_dimensions[i].local_range [1];
         ub  = section_dimensions[i].local_range [0];
         str = section_dimensions[i].local_range [2];
         section_dimensions[i].local_range [0] = lb;
         section_dimensions[i].local_range [1] = ub;
         section_dimensions[i].local_range [2] = -str;
       }
  
} /* set_sub_section */

/**************************************************************************
*                                                                         *
*   global data for sending and receiving data types / processors         *
*                                                                         *
**************************************************************************/

static dd_type global_ddt_send, global_ddt_recv;
static int     global_p_send,   global_p_recv;

static int execute_communication;   /* 0 : only save of data */

static void do_communication ()

{ 
#ifdef DEBUG
  printf ("%d do communication send -> %d, recv <- %d\n",
           pcb.i, global_p_send, global_p_recv);
  printf ("send the following ddt : \n");
  dalib_ddt_print (global_ddt_send);
  printf ("recv the following ddt : \n");
  dalib_ddt_print (global_ddt_recv);
#endif

  if ((global_p_send == pcb.i) && (global_p_recv == pcb.i))
 
    dalib_ddt_move (global_ddt_recv, global_ddt_send);

  else

    { dalib_send_ddt    (global_p_send, global_ddt_send);
      dalib_recv_ddt_op (global_p_recv, global_ddt_recv, 0);
    }

  dalib_ddt_free (global_ddt_send);
  dalib_ddt_free (global_ddt_recv);

} /* do_communication */

/**************************************************************************
*                                                                         *
* get_ov_section (array_id, ov_dim, val)                                  *
*                                                                         *
* let array be A [lb1:ub1, lb2:ub2, ..., lbn:ubn]                         *
*                                                                         *
* result is then a derived data type for exactly this section:            *
*                                                                         *
* CASE 1 : val > 0  means update of left overlap area                     *
*                                                                         *
*   send A [lb1:ub1, ...,ub-val+1:ub, ..., lbn:ubn] to right              *
*   recv A [lb1:ub1, ...,lb-val:lb-1, ..., lbn:ubn] from left             *
*                                                                         *
* CASE 2 : val < 0  means update of right overlap area                    *
*                                                                         *
*   send A [lb1:ub1, ...,lb:lb-val-1, ..., lbn:ubn] to left               *
*   recv A [lb1:ub1, ...,ub+1:ub+val, ..., lbn:ubn] from right            *
*                                                                         *
*  note : works globally with subsection                                  *
*                                                                         *
**************************************************************************/

static void get_ov_section (section_id, size, ov_dim, val)

section_info section_id;
int ov_dim;
int val;
int size;

{ int save_lb, save_ub;

  int global_section[3];

  int top_id, top_dim;
  int t_base, t_stride;
  int t_low, t_up, kind;

  DistDim t_mapping;

  int lb, ub, str, n;
  int okay;
  int pid, p_low, p_high, pid_ref;
  int s_id;

  int NP, NId, distance;

  int *local_section;

  if (val == 0) return;

  global_section[0] = section_dimensions[ov_dim].global_range[0];
  global_section[1] = section_dimensions[ov_dim].global_range[1];
  global_section[2] = 1;    /* do not care about too much data */

  /* set local_section as pointer to the local section size */

  local_section = section_dimensions[ov_dim].local_range;

  save_lb = local_section[0];
  save_ub = local_section[1];

#ifdef DEBUG
  printf ("%d: ov update, sec=%d (%d bytes), dim=%d, pos=%d\n",
           pcb.i, section_id, size, ov_dim+1, val);
#endif

  dalib_array_dim_mapping (section_id->array_id, ov_dim+1,
                           &t_base, &t_stride, &t_low, &t_up, 
                           &top_id, &t_mapping);

  dalib_dim_mapping_info (t_mapping, &kind, &top_dim);


  if ((top_id > 0) && (top_dim > 0))   /* not a serial dimension */

    { dalib_top_info (top_id, top_dim, &NP, &NId);
      distance = dalib_top_distance (top_id, top_dim);
      pid_ref  = pcb.i - (NId-1) * distance;
    }

  else

    { NP = 1; NId = 1;
      pid_ref  = pcb.i;
      distance = 0;
    }

#ifdef DEBUG
  printf ("%d: topology (id=%d,dim=%d), I am %d/%d, dist = %d\n",
          pcb.i, top_id, top_dim, NId, NP, distance);
#endif

  dalib_ov_move (&s_id, NP, NId, t_mapping, t_low, t_up,
                 t_base, t_stride, global_section, val);

#ifdef DEBUG
  printf ("%d: computed schedule for overlap udpate\n", pcb.i);
  dalib_print_schedule (s_id);
#endif

  global_ddt_send = NO_DDT;
  global_ddt_recv = NO_DDT;

  dalib_schedule_psends (s_id, &p_low, &p_high);

  if (p_low < p_high)
     { dalib_internal_error ("overlap: too many targets");
       dalib_stop ();
     }

  for (pid = p_low; pid <= p_high; pid++)

    { dalib_schedule_send_range (s_id, pid, &okay, &lb, &ub, &str);
      if (!okay)
        { dalib_internal_error ("get overlap section: not a range");
          dalib_stop ();
        }
      local_section [0] = lb; 
      local_section [1] = ub;
      global_p_send = pid_ref + (pid - 1) * distance;
#ifdef DEBUG
      printf ("%d: send to %d the range %d:%d\n",
              pcb.i, global_p_send, lb, ub);
      dalib_print_section_info (section_id);
#endif
      dalib_make_section_ddt (&global_ddt_send, section_id, size); 
    }

  if (p_low < p_high)
     { dalib_internal_error ("overlap: too many sources");
       dalib_stop ();
     }

  dalib_schedule_precvs (s_id, &p_low, &p_high);

  for (pid = p_low; pid <= p_high; pid++)
    { dalib_schedule_recv_range (s_id, pid, &okay, &lb, &ub, &str, &n);
      local_section [0] = lb; 
      local_section [1] = ub;
      global_p_recv = pid_ref + (pid - 1) * distance;
#ifdef DEBUG
      printf ("%d: recv from %d the range %d:%d\n",
              pcb.i, global_p_recv, lb, ub);
      dalib_print_section_info (section_id);
#endif
      dalib_make_section_ddt (&global_ddt_recv, section_id, size); 
    }

  if (execute_communication) do_communication ();
 
  dalib_free_schedule (s_id);

  local_section [0] = save_lb;
  local_section [1] = save_ub;

#ifdef DEBUG
  printf ("%d: my new local size is %d - %d\n", pcb.i,
           section_dimensions[ov_dim].local_range[0],
           section_dimensions[ov_dim].local_range[1]);
#endif
 
} /* get_ov_section */

/**************************************************************************
*                                                                         *
*   static void dalib_fictive_local_size (array_id, dim => lb, ub)        *
*                                                                         *
**************************************************************************/

static void dalib_fictive_local_size (array_id, dim, lb, ub)
array_info array_id;
int dim;
int *lb, *ub;

{ int array_lb, array_ub;
  int temp_lb, temp_ub;
  int a_in_t_lb, a_in_t_ub;
  int a_f_in_t_lb, a_f_in_t_ub;

  int  base, stride, kind;
  int  top_id, top_dim;
  int  NP, NId;
  int  x, dummy;
 
  int temp_size[2];
  int local_size[2];

  DistDim mapping;

  array_lb = array_id->dimensions[dim].global_size[0];
  array_ub = array_id->dimensions[dim].global_size[1];

  dalib_array_dim_mapping (array_id, dim+1, 
                           &base, &stride, &temp_lb, &temp_ub,
                           &top_id, &mapping);

  dalib_dim_mapping_info (mapping, &kind, &top_dim);

  if (kind == kSERIAL_DIM)

     { *lb = array_lb;
       *ub = array_ub;
       return;
     }

  a_in_t_lb = array_lb * stride + base;
  a_in_t_ub = array_ub * stride + base;

  /* make a fictive size over the full template */

  if (stride > 0)
     { x = (a_in_t_lb - temp_lb + stride) / stride;
       a_f_in_t_lb = a_in_t_lb - x * stride;
       x = (temp_ub - a_in_t_ub + stride) / stride;
       a_f_in_t_ub = a_in_t_ub + x * stride;
     }
   else 
     { x = (a_in_t_ub - temp_lb - stride) / (-stride);
       a_f_in_t_ub = a_in_t_ub + x * stride;
       x = (temp_ub - a_in_t_lb - stride) / (-stride);
       a_f_in_t_lb = a_in_t_lb - x * stride;
     }

#ifdef DEBUG
  printf ("%d: fictive of %d:%d by %d*I+%d = %d:%d:%d in %d:%d is %d:%d\n",
           pcb.i, array_lb, array_ub, stride, base, a_in_t_lb, a_in_t_ub,
           stride, temp_lb, temp_ub, a_f_in_t_lb, a_f_in_t_ub);
#endif 

  /* find now the local part of fictive array in template */

  temp_size[0] = temp_lb;
  temp_size[1] = temp_ub;

  dalib_top_info (top_id, top_dim, &NP, &NId);

  dalib_distribution_size (mapping, NId, NP, temp_size, local_size);

  *lb = local_size[0];
  *ub = local_size[1];

  dalib_r_s_intersect (a_f_in_t_lb, a_f_in_t_ub, stride, 
                       *lb, *ub,   lb, ub, &dummy);

  /* map fictive part back to array lb:ub:stride -> lb':ub' */

  dalib_map_section (a_in_t_lb, stride, array_lb, 1, 
                     *lb, *ub, dummy, lb, ub, &dummy);

#ifdef DEBUG
  printf ("%d: fictive local of %d:%d by %d*I+%d in %d:%d is %d:%d\n",
           pcb.i, array_lb, array_ub, stride, base, temp_lb, temp_ub, *lb, *ub);
#endif 

} /* dalib_fictive_local_size */

/**************************************************************************
*                                                                         *
*   static void extend_section (section_id, dim, left, right)             *
*                                                                         *
*   section_id->dim[dim].local_range :  sec_lb : sec_ub                   *
*                                                                         *
*   - sec_lb : sec_ub is local part inclusive overlap size                *
*   - this local size                                                     *
*   - this is absolutely necessary for updates in more dimensions         *
*                                                                         *
*  NOTE : local part of one dimension is not considered for               *
*         update in this dimension, but for all others                    *
*                                                                         *
**************************************************************************/

static void extend_section (section_id, dim, left, right)

section_info section_id;
int dim, left, right;

{ SecDimInfo *sec_dim;
  int        local_lb, local_ub;

  if (left + right == 0) return;

  sec_dim   = section_id->dimensions + dim;
 
  /* extend the local size, but only this one */

  dalib_fictive_local_size (section_id->array_id, dim, &local_lb, &local_ub);

  dalib_r_s_intersect (sec_dim->global_range[0] - left,
                       sec_dim->global_range[1] + right,
                       sec_dim->global_range[2],
                       local_lb - left,
                       local_ub + right,
                       sec_dim->local_range+0,
                       sec_dim->local_range+1,
                       sec_dim->local_range+2);

  sec_dim->is_range = 1;  /* important: now lb : ub */

  /* Question: what happens to aligned arrays that have no real elements
               but have an overlap at processor boundaries              

     Solution: array_dim->local_size should be a fictive size 

  */

#ifdef DEBUG
  printf ("%d: local section at dim %d extended by [%d:%d] to %d:%d:%d\n",
          pcb.i, dim, left, right, sec_dim->local_range[0],
          sec_dim->local_range[1], sec_dim->local_range[2]);
#endif

} /* extend_section */

/**************************************************************************
*                                                                         *
* void FUNCTION(dalib_overlap_update) (array_id, lov1, rov1, lov2, rov2, * 
*                              lov3, rov3, lov4, rov4, lov5, rov5,        *
*                                          lov6, rov6, lov7, rov7)        *
*                                                                         *
**************************************************************************/

void FUNCTION(dalib_overlap_update) (section_id, 
                            lov1, rov1, lov2, rov2, lov3, rov3,
                            lov4, rov4, lov5, rov5, lov6, rov6, lov7, rov7)

section_info *section_id; 
int *lov1, *rov1, *lov2, *rov2, *lov3, *rov3, *lov4, *rov4,
    *lov5, *rov5, *lov6, *rov6, *lov7, *rov7;

{ int      rank;
  int      size;

  array_info array_id;

#if defined(VT)
  dalib_trace_on (TRACE_OVERLAP_UPDATE);
#endif

#ifdef DEBUG
  printf ("dalib_overlap_update\n");
  dalib_print_section_info (*section_id);
#endif 

  array_id = (*section_id)->array_id;

  /* Attention: overlap cannot be defined after reserving memory */

  rank = array_id->rank;
  size = array_id->size;

  set_sub_section (*section_id);

  execute_communication = 1;      /* do communication at once */

  /* note: new data in overlap area in one dimension will be
           added to the local range of section_id            */

  switch (rank) {

     case 7 : extend_section  (*section_id, 6, *lov7, *rov7);
     case 6 : extend_section  (*section_id, 5, *lov6, *rov6);
     case 5 : extend_section  (*section_id, 4, *lov5, *rov5);
     case 4 : extend_section  (*section_id, 3, *lov4, *rov4);
     case 3 : extend_section  (*section_id, 2, *lov3, *rov3);
     case 2 : extend_section  (*section_id, 1, *lov2, *rov2);
     case 1 : extend_section  (*section_id, 0, *lov1, *rov1);

  } /* switch */

  switch (rank) {

     case 7 : { get_ov_section  (*section_id, size, 6, *lov7);
                get_ov_section (*section_id, size, 6, -(*rov7));
              }
     case 6 : { get_ov_section  (*section_id, size, 5, *lov6);
                get_ov_section (*section_id, size, 5, -(*rov6));
              }
     case 5 : { get_ov_section  (*section_id, size, 4, *lov5);
                get_ov_section (*section_id, size, 4, -(*rov5));
              }
     case 4 : { get_ov_section  (*section_id, size, 3, *lov4);
                get_ov_section (*section_id, size, 3, -(*rov4));
              }
     case 3 : { get_ov_section  (*section_id, size, 2, *lov3);
                get_ov_section (*section_id, size, 2, -(*rov3));
              }
     case 2 : { get_ov_section  (*section_id, size, 1, *lov2);
                get_ov_section (*section_id, size, 1, -(*rov2));
              }
     case 1 : { get_ov_section  (*section_id, size, 0, *lov1);
                get_ov_section (*section_id, size, 0, -(*rov1));
              }

  } /* switch */

#ifdef DEBUG
  printf ("%d: overlap update finished\n", pcb.i);
#endif 

#if defined(VT)
  dalib_trace_off (TRACE_OVERLAP_UPDATE);
#endif

} /* FUNCTION(dalib_overlap_update) */ 

/**************************************************************************
*                                                                         *
* void dalib_section_overlap (section_id, size, dim, overlap,             *
*                             p_send, ddt_send, p_recv, ddt_recv)         *
*                                                                         *
*  - direct query for communication of a simple overlap                   *
*                                                                         *
**************************************************************************/

void dalib_section_overlap (section_id, dim, overlap,
                            p_send, ddt_send, 
                            p_recv, ddt_recv              )

section_info section_id; 
int dim, overlap;

int     *p_send, *p_recv;
dd_type *ddt_send, *ddt_recv;

{ int      size;

  array_info array_id;

#ifdef DEBUG
  printf ("%d: dalib_section_overlap, sec = %d, dim/ov = %d/%d, size = %d\n",
          pcb.i, section_id, dim, overlap, size);
#endif 

  array_id = (section_id)->array_id;

  /* Attention: overlap cannot be defined after reserving memory */

  size = array_id->size;

  if (array_id->data == (unsigned char *) NULL)
     dalib_internal_error ("overlap_update: no data");

  if (overlap == 0)
     dalib_internal_error ("section_overlap : no overlap");

  set_sub_section (section_id);

  /* note: new data in overlap area in one dimension will be
           added to the local range of section_id            */

  execute_communication = 0;      /* do not communication */

  get_ov_section  (section_id, size, dim-1, overlap);

  *ddt_send = global_ddt_send;
  *ddt_recv = global_ddt_recv;

  *p_send   = global_p_send;
  *p_recv   = global_p_recv;

#ifdef DEBUG
  printf ("%d: overlap computation ready, send -> %d, recv <- %d\n",
          pcb.i, global_p_send, global_p_recv);
#endif 

} /* dalib_section_overlap */
