/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, SCAI.LAB                        *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Feb 95                                                   *
*  Last Update : Oct 97                                                   *
*                                                                         *
*  This Module is part of the DALIB / UNILIB                              *
*                                                                         *
*  Module      : shift.m4                                                 *
*                                                                         *
*  Function    : circular shifting with section descriptors               *
*                                                                         *
*  Export : FORTRAN Interface                                             *
*                                                                         *
* void FUNCTION(dalib_cshift) (target_data, source_data, shift, dim, * 
*              target_section, source_section, shift_dsp, dim_dsp)        *
*                                                                         *
*      target_section = cshift (source_section, shift, dim)               *
*                                                                         *
* void FUNCTION(dalib_eoshift) (target_data, source_data, shift, boundary, dim, * 
*                  target_section, source_section, s_dsp, b_dsp, d_dsp)   *
*                                                                         *
*      target_section = eoshift (source_section, shift, boundary, dim)    *
*                                                                         *
*       section_info *target_section, *source_section;                    *
*       int *dim, *shift;                                                 *
*       char *boundary;                                                   *
*                                                                         *
*  UPDATES:                                                               *
*                                                                         *
*    10/95  now every cshift is handled                                   *
*    10/97  correct deallocation of memory                                *
*                                                                         *
**************************************************************************/

#undef DEBUG

#define CHECK

#include "dalib.h"
#include <stdio.h>

/*******************************************************************
*                                                                  *
*  global_data :    bool circular,  char *boundary                 *
*                                                                  *
*******************************************************************/

static int is_circular;    /*  1 : cshift, 0 : eoshift    */
static char *bound_val;    /*  boundary value for eoshift */

static section_info source_section;
static array_info   source_array;
static int          source_dim;
static SecDimInfo   *source_dim_info;

static section_info target_section;
static array_info   target_array;
static int          target_dim;
static SecDimInfo   *target_dim_info;

static int shift_size;
static int shift_pos;
static int shift_N;

static int save_flag = 0;
static int save_memsize;

/*******************************************************************
*                                                                  *
*  dalib_set_shift_source (section_info source, int dim)           *
*                                                                  *
*******************************************************************/

static void dalib_set_shift_source (source, dim)
section_info source;
int dim;

{  int rank;

   dalib_section_full (&source_section, source);
   source_array = source_section->array_id;

   rank = dalib_section_rank (source_section);

   if ((dim <= 0) || (dim > rank))

     { dalib_internal_error ("cshift/eoshift: illegal dim argument");
       printf ("dim = %d, should be >= 1 and <= %d\n", dim, rank);
       printf ("section for shifting : \n");
       dalib_print_section_info (source_section);
       dalib_stop ();
     }

   source_dim      = dalib_section_array_dim (source_section, dim);
   source_dim_info = source_section->dimensions + source_dim - 1;

} /* dalib_set_shift_source */

/*******************************************************************
*                                                                  *
*  dalib_set_shift_target (section_info target, int dim)           *
*                                                                  *
*******************************************************************/

static void dalib_set_shift_target (target, dim)
section_info target;
int dim;

{  int rank;

   dalib_section_full (&target_section, target);
   target_array = target_section->array_id;

   rank = dalib_section_rank (target_section);

   if ((dim <= 0) || (dim > rank))

     { dalib_internal_error ("cshift/eoshift: illegal dim argument");
       printf ("dim = %d, should be >= 1 and <= %d\n", dim, rank);
       printf ("section for shifting : \n");
       dalib_print_section_info (target_section);
       dalib_stop ();
     }

   target_dim      = dalib_section_array_dim (target_section, dim);
   target_dim_info = target_section->dimensions + target_dim - 1;

} /* dalib_set_shift_target */

/*******************************************************************
*                                                                  *
*  dalib_make_save_copy ()                                         *
*                                                                  *
*******************************************************************/

static void dalib_make_save_copy ()

{ unsigned char *newdata;
  int memsize;

  int first, total[MAX_DIMENSIONS+1], dummy;

  save_flag = 0;

  if (target_array != source_array) return;

  /* create a new source array */

  source_array = (array_info) dalib_array_copy_dsp (target_array);

  source_array->AlignInfo       = target_array->AlignInfo;
  source_array->DistributeInfo  = target_array->DistributeInfo;
  source_array->dsp_status_flag = target_array->dsp_status_flag;

  /* make not a new shared array, copy like a distributed array */

  source_array->SharedInfo     = NO_SHARED;

  dalib_array_addressing (source_array, pcb.i, &dummy, &first, total);
 
  memsize = total [source_array->rank] * source_array->size;

#ifdef DEBUG
  printf ("%d: copy of source array required, %d bytes\n",
           pcb.i, memsize);
#endif

  newdata = (unsigned char *) dalib_malloc (memsize, "dalib_make_save_copy");
  dalib_memcopy (newdata, target_array->f_data, memsize);

  source_array->f_data = newdata;
  source_section->array_id = source_array;

  save_flag = 1;
  save_memsize = memsize;

} /* dalib_make_save_copy */

/*******************************************************************
*                                                                  *
*  dalib_shift_init (int shift)                                    *
*                                                                  *
*   - set global values shift_pos, shift_N, shift_size             *
*   - check correct sections                                       *
*                                                                  *
*******************************************************************/

static void dalib_shift_init (shift)
int shift;

{ shift_size = source_array->size;

#ifdef CHECK
  if (shift_size != target_array->size)
    { dalib_internal_error ("shift: different sizes");
      printf ("%d: source size = %d, target size = %d\n",
              pcb.i, shift_size, target_array->size);
      dalib_stop ();
    }
#endif

  /* now set global shift_pos, shift_N */

  shift_pos = shift;
 
  shift_N = dalib_range_size (source_dim_info->global_range[0],
                              source_dim_info->global_range[1],
                              source_dim_info->global_range[2]);

  if (is_circular)

    { /*  -shift_N / 2 <= shift_pos <= shift_N / 2  */
      int n2 = (shift_N+1) / 2;
      while (shift_pos > n2) shift_pos -= shift_N;
      while (shift_pos < -n2) shift_pos += shift_N;
    }

  else

    { /*  -shift_N <= shift_pos <= shift_N  */
      if (shift_pos < -shift_N) shift_pos = -shift_N;
      if (shift_pos > shift_N)  shift_pos = shift_N;
    }

#ifdef DEBUG
  printf ("%d: dalib_shift_init, N = %d, pos = %d, size = %d\n",
          pcb.i, shift_N, shift_pos, shift_size);
#endif 

} /* dalib_shift_init */

/*******************************************************************
*                                                                  *
*  dalib_shift_exit ()                                             *
*                                                                  *
*   - free copy of source array if save_flag was set               *
*                                                                  *
*******************************************************************/

void dalib_shift_exit ()

{ if (save_flag == 0) return;

  dalib_free (source_array->f_data, save_memsize);

  dalib_free (source_array, dalib_array_dsp_size (source_array->rank));

  source_section->array_id = target_array;
}

/*******************************************************************
*                                                                  *
*  dalib_secdim_subrange (section_info secdim, subsecdim,          *
*                         int lb, int ub)                          *
*                                                                  *
*   - build subsection (lb:ub) of the input dimension secdim       *
*     that is been considered as (1:N)                             *
*                                                                  *
*******************************************************************/

static void dalib_secdim_subrange (secdim, subsecdim, lb, ub)

SecDimInfo   *secdim, *subsecdim;
int          lb, ub;

{ /* assert : secdim->is_range */

  int stride;
  int *v;

  stride = secdim->global_range[2];

  subsecdim->is_range = 1;

  subsecdim->global_range[0] = secdim->global_range[0] + (lb - 1) * stride;
  subsecdim->global_range[1] = secdim->global_range[0] + (ub - 1) * stride;
  subsecdim->global_range[2] = stride;

  v = subsecdim->global_range;

} /* dalib_secdim_subrange */

/*******************************************************************
*                                                                  *
*  SHIFTHING : most general case                                   *
*                                                                  *
*******************************************************************/

static void dalib_shift_assigns ()

{ /* general solution for every case */

  SecDimInfo source_dim_save;
  SecDimInfo target_dim_save;

  if (shift_pos == 0)
   
     { FUNCTION(dalib_assign) (&target_section, &source_section);
       return;
     }

  dalib_make_save_copy ();

  source_dim_save = *source_dim_info;
  target_dim_save = *target_dim_info;

  if (shift_pos > 0)

     {  /* target (1:N-pos)   = source (pos+1,N)   */
  
        dalib_secdim_subrange (&target_dim_save, 
                               target_dim_info, 1, shift_N - shift_pos);
        dalib_secdim_subrange (&source_dim_save, 
                               source_dim_info, shift_pos + 1, shift_N);

     }

   else              /* shift_pos < 0 */

     {  /* target (1-pos:N)   = source (1:N+pos)   */
  
        dalib_secdim_subrange (&target_dim_save, 
                               target_dim_info, 1 - shift_pos, shift_N);
        dalib_secdim_subrange (&source_dim_save, 
                               source_dim_info, 1, shift_N + shift_pos);

     }

   /* update local sizes of the new sections */

   dalib_section_reset (source_section);
   dalib_section_reset (target_section);

   FUNCTION(dalib_assign) (&target_section, &source_section);

   if (shift_pos > 0)

      { /* target (N-pos+1:N) = source (1:pos)     */

        dalib_secdim_subrange (&target_dim_save, target_dim_info, 
                               shift_N - shift_pos + 1, shift_N);
        dalib_secdim_subrange (&source_dim_save, 
                               source_dim_info, 1, shift_pos);
      }

    else

      { /* target (1:-pos) = source (N+pos+1:N)     */

        dalib_secdim_subrange (&target_dim_save, 
                               target_dim_info, 1, -shift_pos);
        dalib_secdim_subrange (&source_dim_save, source_dim_info, 
                               shift_N + shift_pos + 1, shift_N);
      }

    /* update local sizes of the new sections */

    dalib_section_reset (source_section);
    dalib_section_reset (target_section);

    if (is_circular)
       FUNCTION(dalib_assign) (&target_section, &source_section);
     else
       dalib_section_fill (target_section, bound_val);

    /* reset old sections */

    *source_dim_info = source_dim_save;
    *target_dim_info = target_dim_save;

    dalib_section_reset (source_section);
    dalib_section_reset (target_section);

} /* dalib_shift_assigns */

/*******************************************************************
*                                                                  *
*  dalib_secdim_localrange (section_info secdim, subsecdim,        *
*                           int lb, int ub)                        *
*                                                                  *
*   - build subsection (lb:ub) of the input dimension secdim       *
*     that is been considered as (1:N)                             *
*                                                                  *
*******************************************************************/
 
static void dalib_secdim_localrange (secdim, subsecdim, lb, ub)
 
SecDimInfo   *secdim, *subsecdim;
int          lb, ub;
 
{ /* assert : secdim->is_range */
 
  int stride;
 
  stride = secdim->local_range[2];
 
  subsecdim->is_range = 1;
 
  subsecdim->local_range[0] = secdim->local_range[0] + (lb - 1) * stride;
  subsecdim->local_range[1] = secdim->local_range[0] + (ub - 1) * stride;
  subsecdim->local_range[2] = stride;
 
} /* dalib_secdim_subrange */

/*******************************************************************
*                                                                  *
*  SHIFTHING with COMMUNICATION (only left, right neighbor)        *
*                                                                  *
*******************************************************************/

void dalib_shift_communication (p_left, p_right, size, dim, pos)

int p_left, p_right;
int dim, pos, size;
 
{ int source_lb, source_ub, source_str;
  int target_lb, target_ub, target_str;
  int N, N1;

  char *save;

  SecDimInfo source_dim_save;
  SecDimInfo target_dim_save;
 
#ifdef DEBUG
  printf ("%d: dalib_shift_communication, %d <- (dim=%d, pos=%d) -> %d\n",
           pcb.i, p_left, dim, pos, p_right);
#endif

  if (pos == 0)
  
     { dalib_secarray_copy (target_section, source_section);
       return;
     }

  source_dim_save = *source_dim_info;
  target_dim_save = *target_dim_info;
 
  source_lb  = source_dim_info->local_range[0];
  source_ub  = source_dim_info->local_range[1];
  source_str = source_dim_info->local_range[2];

  N = dalib_range_size (source_lb, source_ub, source_str);

  target_lb  = target_dim_info->local_range[0];
  target_ub  = target_dim_info->local_range[1];
  target_str = target_dim_info->local_range[2];

  N1 = dalib_range_size (target_lb, target_ub, target_str);

  if (N != N1)

     { dalib_internal_error ("shift_communication, different size");
       dalib_stop ();
     }

  /* tests are no longer necessary:

  if (source_str != 1)

     { dalib_internal_error ("cshift_communication, stride in source section");
       dalib_print_section_info (source_section);
       dalib_stop ();
     }

  if (target_str != 1)

     { dalib_internal_error ("cshift_communication, stride in target section");
       dalib_print_section_info (target_section);
       dalib_stop ();
     }

     end of old tests                   */

  if (pos > 0)

     { /************************************************
       *                                               *
       *    save = source[..,lb:lb+pos-1,...]          *
       *    target[lb:ub-pos] = source[lb+pos:ub]      *
       *    target[ub-pos+1:ub] = save                 *
       *                                               *
       ************************************************/

       dalib_secdim_localrange (&source_dim_save, source_dim_info, 1, pos);

       if (is_circular)
         { if (p_left != pcb.i)
               dalib_secarray_send (p_left, source_section);
             else
               dalib_secarray_pack (&save, source_section);
         }
        else
         { /* eoshift sends only to real left processors */
           if (p_left < pcb.i)
              dalib_secarray_send (p_left, source_section);
         }

       dalib_secdim_localrange (&source_dim_save, source_dim_info, pos+1, N);
       dalib_secdim_localrange (&target_dim_save, target_dim_info, 1, N-pos);

       dalib_secarray_copy (target_section, source_section);
 
       dalib_secdim_localrange (&target_dim_save, target_dim_info, N-pos+1, N);
 
       if (is_circular)
         { if (p_right != pcb.i)
               dalib_secarray_recv (p_right, target_section);
             else
               dalib_secarray_unpack (target_section, save);
         }
        else
         { /* eoshift receives only from real right processors */
           if (p_right > pcb.i)
              dalib_secarray_recv (p_right, target_section);
            else
              dalib_section_fill (target_section, bound_val);
         }
 
     } /* pos > 0 */

  else if (pos < 0)

     { /************************************************
       *                                               *
       *    send source[..,ub+pos+1:ub,...]            *
       *    target[lb-pos:ub] = source[lb:ub+pos]      *
       *    recv target[lb:lb-pos-1]                   *
       *                                               *
       ************************************************/

       dalib_secdim_localrange (&source_dim_save, source_dim_info, N+pos+1, N);

       if (is_circular)
         { if (p_right != pcb.i)
              dalib_secarray_send (p_right, source_section);
            else
              dalib_secarray_pack (&save, source_section);
         }
        else
         { /* eoshift sends only to real right processors */
           if (p_right > pcb.i)
              dalib_secarray_send (p_right, source_section);
         }

       dalib_secdim_localrange (&source_dim_save, source_dim_info, 1, N+pos);
       dalib_secdim_localrange (&target_dim_save, target_dim_info, -pos+1,N);

       dalib_secarray_copy (target_section, source_section);

       dalib_secdim_localrange (&target_dim_save, target_dim_info, 1, -pos);

       if (is_circular)
         { if (p_left != pcb.i)
               dalib_secarray_recv (p_left, target_section);
             else
               dalib_secarray_unpack (target_section, save);
         }
        else
         { /* eoshift receives only from real left processor */
           if (p_left < pcb.i)
              dalib_secarray_recv (p_left, target_section);
            else
              dalib_section_fill (target_section, bound_val);
         }

     }
            
    *source_dim_info = source_dim_save;
    *target_dim_info = target_dim_save;

} /* dalib_shift_communication */

/*******************************************************************
*                                                                  *
*  SHIFTING FOR SECTIONS                                           *
*                                                                  *
*  dalib_shift (target_section, source_section, dim, pos)          *
*                                                                  *
*     target_section = shift (source_section, dim, pos)           *
*                                                                  *
*******************************************************************/

void dalib_shift (target, source, shift, dim)

section_info target;
section_info source;
int dim, shift;

{ int source_topid, target_topid;
  int source_topdim, target_topdim;
  int base, stride, lb, ub, kind;

  int is_shift, max_shift;
  int NP, NId;
  int p_left, p_right, p_help;
  int switch_flag;
  DistDim source_mapping;

#ifdef DEBUG
  printf ("%d: dalib_shift (dim=%d, shift=%d)\n", pcb.i, dim, shift);
#endif
 
  dalib_set_shift_source (source, dim);
  dalib_set_shift_target (target, dim);

  dalib_shift_init (shift);

  if (!dalib_is_aligned (source_section, target_section))

   {  dalib_shift_assigns ();
      goto clean_up;
   }

#ifdef DEBUG
   printf ("%d: shift source and target are aligned, try efficient routines\n",
           pcb.i);
#endif

  /* we can assume that both sections are directly aligned */

  dalib_array_dim_mapping (source_array, source_dim,
                           &base, &stride, &lb, &ub, 
                           &source_topid, &source_mapping);

  dalib_dim_mapping_info (source_mapping, &kind, &source_topdim);

  if (kind == kSERIAL_DIM)

     { /* local shifting */

       dalib_shift_communication (pcb.i, pcb.i,
                                  shift_size, dim, shift_pos);
       goto clean_up;

     }

  switch_flag = 0;

  if (source_dim_info->global_range[2] < 0)
     switch_flag = 1;
  if (stride < 0)
     switch_flag = 1 - switch_flag;

  dalib_top_info (source_topid, source_topdim, &NP, &NId);
  dalib_top_neighbors (&p_left, &p_right,
                       source_topid, source_topdim);

  if (switch_flag)
     { p_help  = p_right;
       p_right = p_left;
       p_left  = p_help;
     }

  max_shift = shift_N / NP;
  if (shift_pos > 0)
     is_shift = shift_pos;
   else
     is_shift = -shift_pos;

  if (((ub - lb + 1) == shift_N) && (is_shift <= max_shift))

     { /* shift with communication */

       dalib_shift_communication (p_left, p_right,
                                  shift_size, dim, shift_pos);

       goto clean_up;

     } /* shifting with communication */
  
  /* cannot use efficient neighbour communication */

#ifdef DEBUG
  printf ("%d: shift=%d (max=%d), N=%d, dimsize=%d, failed using efficient\n", 
          pcb.i, is_shift, max_shift, shift_N, ub - lb +1);
#endif

  dalib_shift_assigns ();

clean_up:

  dalib_shift_exit ();

  if (dalib_is_array_info (target))
    FUNCTION(dalib_section_free) (&target_section);

  if (dalib_is_array_info (source))
    FUNCTION(dalib_section_free) (&source_section);

} /* dalib_shift */

/*******************************************************************
*                                                                  *
*  CIRCULAR SHIFTING FOR SECTIONS                                  *
*                                                                  *
*   void dalib_cshift (target_section, source_section, dim, pos)   *
*                      target_dsp,     source_dsp,  d_dsp, p_dsp)  *
*                                                                  *
*     target_section = cshift (source-section, dim, pos)           *
*                                                                  *
*******************************************************************/

void FUNCTION(dalib_cshift)

     (target_data,    source_data,    shift,     dim,
      target_section, source_section, shift_dsp, dim_dsp)

char         *target_data, *source_data;
section_info *target_section, *source_section;
section_info *shift_dsp, *dim_dsp;
int *dim, *shift;

{ int dim_val;

  is_circular = 1;              /* cshift               */

  if (FUNCTION(dalib_present) (shift_dsp))
     { dalib_internal_error ("dalib_cshift : illegal shift argument");
       dalib_stop ();
     }

  if (FUNCTION(dalib_present) (dim_dsp))
     { dalib_internal_error ("dalib_cshift : illegal dim argument");
       dalib_stop ();
     }

#ifdef DEBUG
  printf ("%d: dalib_cshift (dim=%d, shift=%d)\n", pcb.i, *dim, *shift);
#endif

  if (FUNCTION(dalib_present) (dim))
     dim_val = *dim;
   else
     dim_val  = 1;

  dalib_shift (*target_section, *source_section, *shift, dim_val);
  
} /* FUNCTION(dalib_cshift) */ 

/*******************************************************************
*                                                                  *
*  END-OFF SHIFTING FOR SECTIONS                                   *
*                                                                  *
*  void FUNCTION(dalib_eoshift) (target_section, source_section,   *
*                                dim, boundary, pos)               *
*                                                                  *
*   target_section = eoshift (source-section, pos, boundary, dim)  *
*                                                                  *
*******************************************************************/

void FUNCTION(dalib_eoshift)

     (target_data,    source_data,    shift, boundary, dim,
      target_section, source_section, s_dsp, b_dsp,    d_dsp)

char *target_data, *source_data;
section_info *target_section, *source_section;
int *dim, *shift;
char *boundary;
section_info *s_dsp, *b_dsp, *d_dsp;

{ int dim_val;

  if (FUNCTION(dalib_present) (s_dsp))
     { dalib_internal_error ("dalib_eoshift : illegal shift argument");
       dalib_stop ();
     }

  if (FUNCTION(dalib_present) (b_dsp))
     { dalib_internal_error ("dalib_eoshift : illegal boundary argument");
       dalib_stop ();
     }

  if (FUNCTION(dalib_present) (d_dsp))
     { dalib_internal_error ("dalib_eoshift : illegal dim argument");
       dalib_stop ();
     }

  is_circular = 0;              /* eoshift               */
  bound_val   = boundary;       /* global boundary value */

#ifdef DEBUG
  printf ("%d: dalib_eoshift (dim=%d, shift=%d)\n", pcb.i, *dim, *shift);
#endif

  if (FUNCTION(dalib_present) (dim))
     dim_val = *dim;
   else
     dim_val  = 1;

  dalib_shift (*target_section, *source_section, *shift, dim_val);
 
} /* FUNCTION(dalib_eoshift) */ 
