/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, SCAI.LAB                        *
*                                                                         *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Aug 97                                                   *
*  Last Update : Aug 97                                                   *
*                                                                         *
*  This Module is part of the DALIB                                       *
*                                                                         *
*  Module      : remote.m4                                                *
*                                                                         *
*  Function: Creating descriptors for remote access of distr. data        *
*                                                                         *
*  EXPORT:                                                                *
*                                                                         *
*   a) FORTRAN interface                                                  *
*                                                                         *
*      FUNCTION(dalib_array_shared) (array_info array_id, int *kind)      * 
*                                                                         *
*   b) DALIB interface                                                    *
*                                                                         *
*      int dalib_is_globally_shared (array_info array_id)                 *
*                                                                         *
*      void dalib_array_shared_alloc (array_info array_id)                *
*                                                                         *
*      void dalib_shared_free (array_info array_id, int only_data)        *
*                                                                         *
*       - data will be freed if it is own data                            *
*                                                                         *
*  Attention: this module should be available in any case                 *
*                                                                         *
**************************************************************************/
 
#undef DEBUG

#include "dalib.h"

     /***********************************************
     *                                              *
     *   record describing remote addressing fac.   *
     *                                              *
     ***********************************************/

typedef struct {

   int first;
   unsigned char *data;
   int total[MAX_DIMENSIONS+1];

   } remote_access;

struct RemoteRecord
 
 { int rma_defined;         /* will be 1 if rma_info allocated     */

   int rma_NP;              /* number of processors that have data */

   remote_access *rma_info  /* will have rma_NP entries only       */
 };

/******************************************************************
*                                                                 *
*  FUNCTION(dalib_array_rma) (array_info array_id)                *
*                                                                 *
*   - defines an array to be accessed via remote memory access    *
*   - will not exchange addresses (done with allocation)          *
*                                                                 *
******************************************************************/

void FUNCTION(dalib_array_rma) (array_id)
 
array_info *array_id;
 
{ Remote remote_ptr;

  remote_ptr  = (Remote) dalib_malloc (sizeof (struct RemoteRecord), 
                                               "dalib_array_rma");

  remote_ptr->rma_defined = 0;

#ifdef RMA
   dalib_system_rma_init();
#else
   dalib_internal_error ("RMA (remote memory access) not available");
   dalib_stop ();
#endif

  (*array_id)->RemoteInfo = remote_ptr;

#ifdef DEBUG
  printf ("%d: array %d has been defined to have remote access\n",
           pcb.i, *array_id);
#endif 

} /* dalib_array_rma */

/*******************************************************************
*                                                                  *
* void dalib_remote_free (array_info array_id)                     *
*                                                                  *
*******************************************************************/

void dalib_remote_free (remote_ptr)

Remote remote_ptr;

{ Remote remote_ptr;

  if (remote_ptr->rma_defined)

     dalib_free (remote_ptr->rma_info, 
                 (remote_ptr->NP) * sizeof(remote_access));

  dalib_free (remote_ptr, sizeof(struct RemoteRecord));

} /* dalib_remote_free */

/**************************************************************************
*                                                                         *
*  void dalib_array_remote_init (array_info array_id)                     *
*                                                                         *
*   - exchange remote addressing information                              *
*                                                                         *
**************************************************************************/

void dalib_array_remote_init (array_id)

array_info array_id;

{  int size;                   /* entries for one processor         */
   remote_access *my_info;     /* help pointer into remote_adr_info */
   int NId, N0;
   int top_id;
   array_info template_dsp;
   char *dummy;

#ifdef DEBUG
   printf ("%d: array_remote_init, dsp = %d\n", pcb.i, array_id);
#endif

   dalib_array_info (array_id, &template_dsp, &top_id);

   remote_info_NP = dalib_top_size (top_id);
   N0    = dalib_top_first (top_id);

   size = sizeof (remote_access);

   remote_info = (remote_access *)
      dalib_malloc (size * remote_info_NP, "dalib_array_remote_init");

   if (dalib_in_topology (top_id))

      { /* this processor owns data */

        NId = pcb.i - N0;

        my_info = remote_info +  NId;

        dalib_array_addressing (array_id, pcb.i, &dummy,
                                &(my_info->first), my_info->total);

        my_info->data = array_id->data;

#ifdef DEBUG
       printf ("%d: have set my info (data = %d) at relpos %d\n",
               pcb.i, my_info->data, NId);
#endif

      } /* set my info */
   for (NId = 0; NId < remote_info_NP; NId++)

    {  dalib_process_broadcast (remote_info + NId, size, N0 + NId);
#ifdef DEBUG
       printf ("%d: broadcast from %d, address = %d\n",
                pcb.i, NId, remote_info[NId].data);
#endif
    }

} /* dalib_array_remote_init */

/*******************************************************************
*                                                                  *
*  dalib_shared_read (int owner, array_info array_id,              *
*                     char *data, int offset)                      *
*                                                                  *
*******************************************************************/

static void dalib_shared_read (owner, array_id, data, offset)

int owner;
array_info *array_id;
char *data;
int offset;

{ Shared shared_ptr;
  int size;

#ifdef DEBUG
  printf ("%d: dalib_shared_read on owner = %d (offset = %d)\n",
           pcb.i, owner, offset);
#endif

  shared_ptr = (*array_id)->SharedInfo;

  if (shared_ptr == NO_SHARED)
 
    { dalib_internal_error ("shared read on non-shared array");
      dalib_stop ();
    }

  size = (*array_id)->size;

  switch (shared_ptr->shared_kind) {
 
   case MEM_SHARED :

      /* note : offsets will be global offsets */

      dalib_memcopy (data, (*array_id)->data + offset * size, size);

      break;

   case MEM_RMA:

#ifdef RMA
      if (owner == pcb.i)
          dalib_memcopy (data, (*array_id)->data + offset * size, size);
        else
          dalib_rma_get (data, owner, array_id, offset);
#else
      dalib_internal_error ("RMA shared not available");
      dalib_stop ();
#endif
      break;
 
   default :

      dalib_internal_error ("shared_read : no shared/remote access");
      dalib_stop ();

  } /* switch */

} /* dalib_shared_read */

/*******************************************************************
*                                                                  *
*  dalib_shared_update (int owner, array_info array_id,            *
*                       char *data, int offset)                    *
*                                                                  *
*******************************************************************/

static void dalib_shared_update (owner, array_id, data, offset)

int owner;
array_info *array_id;
char *data;
int offset;

{ Shared shared_ptr;
  int size;

#ifdef DEBUG
  printf ("%d: dalib_shared_update on owner = %d (offset = %d)\n",
           pcb.i, owner, offset);
#endif

  shared_ptr = (*array_id)->SharedInfo;
 
  if (shared_ptr == NO_SHARED)
 
    { dalib_internal_error ("shared write on non-shared array");
      dalib_stop ();
    }
 

  size = (*array_id)->size;

  switch (shared_ptr->shared_kind) {
 
   case MEM_SHARED :

      /* note : offsets will be global offsets */

      dalib_memcopy ((*array_id)->data + offset * size, data, size);

      break;

   case MEM_RMA:

#ifdef RMA
      if (owner == pcb.i)
          dalib_memcopy ((*array_id)->data + offset * size, data, size);
        else
          dalib_rma_put (data, owner, array_id, offset);
#else
      dalib_internal_error ("RMA (remote memory access) not available");
      dalib_stop ();
#endif
      break;
 
   default :

      dalib_internal_error ("shared_update : no shared/remote access");
      dalib_stop ();

  } /* switch */

} /* dalib_shared_update */

/*******************************************************************
*                                                                  *
*  dalib_shared_nread (int owner, array_info *array_id,            *
*                      char *data, int offsets[], int n)           *
*                                                                  *
*******************************************************************/

static void dalib_shared_nread (owner, array_id, data, offsets, n)

int owner, n;
array_info *array_id;
char *data;
int offsets[];

{ Shared shared_ptr;
  array_info id;
  unsigned char *ptr;
  int size;
  int i;

  id = *array_id;

#ifdef DEBUG
  printf ("%d: dalib_shared_nread on owner = %d (%d values)\n",
           pcb.i, owner, n);
#endif

  shared_ptr = id->SharedInfo;

  if (shared_ptr == NO_SHARED)
 
    { dalib_internal_error ("shared multiple read on non-shared array");
      dalib_stop ();
    }

  size = id->size;

  switch (shared_ptr->shared_kind) {
 
   case MEM_SHARED :

      /* note : offsets will be global offsets */

      ptr = id->data;

#ifdef DEBUG
      printf ("%d: offsets are ", pcb.i);
      for (i=0; i<n; i++)
          printf (" %d", offsets[i]);
      printf ("\n");
#endif 

      dalib_memget (data, ptr, offsets, n, size);

      break;

   case MEM_RMA:

#ifdef DEBUG
      printf ("%d: offsets are ", pcb.i);
      for (i=0; i<n; i++)
          printf (" %d", offsets[i]);
      printf ("\n");
#endif 

#ifdef RMA
      if (owner == pcb.i)
          dalib_memget (data, (*array_id)->data, offsets, n, size);
        else
          dalib_rma_nget (data, owner, array_id, offsets, n);
#else
      dalib_internal_error ("RMA shared not available");
      dalib_stop ();
#endif
      break;
 
   default :

      dalib_internal_error ("shared_nread : no shared/remote access");
      dalib_stop ();

  } /* switch */

} /* dalib_shared_nread */

/*******************************************************************
*                                                                  *
*  dalib_shared_nwrite (int owner, array_info array_id,            *
*                       char *data, int offsets[], int n)          *
*                                                                  *
*******************************************************************/

static void dalib_shared_nwrite (owner, array_id, data, offsets, n)

int owner, n;
array_info *array_id;
char *data;
int offsets[];

{ Shared shared_ptr;
  array_info id;
  unsigned char *ptr;
  int *hd;
  int size;
  int i;

#ifdef DEBUG
  printf ("%d: dalib_shared_nwrite on owner = %d (%d values)\n",
           pcb.i, owner, n);
#endif

  id = *array_id;

  shared_ptr = id->SharedInfo;

  if (shared_ptr == NO_SHARED)
 
    { dalib_internal_error ("shared multiple write on non-shared array");
      dalib_stop ();
    }
 
  size = id->size;

  hd = (int *) data;   /* printing data values */

  switch (shared_ptr->shared_kind) {
 
   case MEM_SHARED :

      /* note : offsets will be global offsets */

      ptr = id->data;

#ifdef DEBUG
      printf ("%d: offsets are ", pcb.i);
      for (i=0; i<n; i++)
          printf (" %d (val = %d)", offsets[i], hd[i]);
      printf ("\n");
#endif 

      dalib_memset (ptr, offsets, data, n, size);

      break;

   case MEM_RMA:

#ifdef RMA
      for (i=0; i<n; i++)
         dalib_rma_put (data+i*size, owner, array_id, offsets[i]);
#else
      dalib_internal_error ("RMA shared not available");
      dalib_stop ();
#endif
      break;
 
   default :

      dalib_internal_error ("shared_nwrite : no shared/remote access");
      dalib_stop ();

  } /* switch */

} /* dalib_shared_nwrite */

/*********************************************************************
*                                                                    *
* FUNCTION(dalib_rma_read) (char *data, array_info *array_id,        *
*                           int *ind1, int *ind2, ..., int *ind7)    *
*                                                                    *
*********************************************************************/

void FUNCTION(dalib_rma_read) (data, array_id, ind1, ind2, ind3, ind4, 
                                               ind5 ,ind6, ind7)

char *data;
array_info *array_id;
int *ind1, *ind2, *ind3, *ind4, *ind5, *ind6, *ind7;

{ int global_indices [MAX_DIMENSIONS];
  int rank;
  int owner, offset;
  array_info dummy;
  int top_id;
 
  rank = (*array_id)->rank;
 
  switch (rank) {

    case 7: global_indices[6] = *ind7;
    case 6: global_indices[5] = *ind6;
    case 5: global_indices[4] = *ind5;
    case 4: global_indices[3] = *ind4;
    case 3: global_indices[2] = *ind3;
    case 2: global_indices[1] = *ind2;
    case 1: global_indices[0] = *ind1;

  } /* end switch */

  dalib_array_info (*array_id, &dummy, &top_id);

   /*
  if (pcb.p == 1)
 
      { owner = 1;
        offset = dalib_local_offset (*array_id, global_indices);
      }
 
    else
   */
 
      { owner  = dalib_multidim_owner (*array_id, global_indices);
        offset = dalib_remote_offset (*array_id, owner, global_indices);
        owner  = dalib_top_elem (top_id, owner);
      }
 
#ifdef DEBUG
    printf ("%d: array read, owner = %d, offset = %d\n", 
             pcb.i, owner, offset);
#endif

  dalib_shared_read (owner, array_id, data, offset);
 
} /* dalib_array_read */

/*********************************************************************
*                                                                    *
* FUNCTION(dalib_rma_update) (int *op, char *data,                   *
*                             array_info *array_id,                  *
*                             int *ind1, int *ind2, ..., int *ind7)  *
*                                                                    *
*********************************************************************/

void FUNCTION(dalib_rma_update) (op, data, array_id, ind1, ind2, ind3, ind4, 
                                                     ind5 ,ind6, ind7)

char *data;
array_info *array_id;
int *ind1, *ind2, *ind3, *ind4, *ind5, *ind6, *ind7;

{ int global_indices [MAX_DIMENSIONS];
  int rank;

  int top_id;
  array_info dummy;
  int owner, offset;

  rank = (*array_id)->rank;
 
  switch (rank) {

    case 7: global_indices[6] = *ind7;
    case 6: global_indices[5] = *ind6;
    case 5: global_indices[4] = *ind5;
    case 4: global_indices[3] = *ind4;
    case 3: global_indices[2] = *ind3;
    case 2: global_indices[1] = *ind2;
    case 1: global_indices[0] = *ind1;

  } /* end switch */

  dalib_array_info (*array_id, &dummy, &top_id);
 
  if (pcb.p == 1)
 
      { owner = 1;
        offset = dalib_local_offset (*array_id, global_indices);
      }
 
    else
 
      { owner  = dalib_multidim_owner (*array_id, global_indices);
        offset = dalib_remote_offset (*array_id, owner, global_indices);
        owner  = dalib_top_elem (top_id, owner);
      }
 
  dalib_shared_update (owner, array_id, data, offset);

} /* dalib_array_write */

/*******************************************************************
*                                                                  *
*    FUNCTION(dalib_array_remote) (remote_ptr *s_id,               *
*                       char *array_vals, array_info *array_dsp    *
*                       char *base_vals, array_info *base_dsp,     *
*                       char *mask_vals, array_info *mask_dsp,     *
*                       char *ind1_vals, array_info *ind1_dsp,     *
*                       ...                                        *
*                       char *ind7_vals, array_info *ind7_dsp)     *
*                                                                  *
*   s_id      : identification of the computed schedule            *
*                                                                  *
*   base_dsp   : array that will be indirectly addressed           *
*                (rank of this array is k)                         *
*                                                                  *
*   ind1_dsp   : integer array 1                                   *
*   ...                                                            *
*   indk_dsp   : integer array k                                   *
*                                                                  *
*   DO J = 0, no_indexes                                           *
*       B (IND1(J), ..., INDk(J)) <-> ARRAY(J)   ! read / write    *
*   END DO                                                         *
*                                                                  *
*******************************************************************/
 
void FUNCTION(dalib_array_remote)
 
         (s_id, array_vals,  array_dsp,
                base_vals,   base_dsp,
                mask_vals,   mask_dsp,
                ind1_vals,   ind1_dsp,
                ind2_vals,   ind2_dsp,
                ind3_vals,   ind3_dsp,
                ind4_vals,   ind4_dsp,
                ind5_vals,   ind5_dsp,
                ind6_vals,   ind6_dsp,
                ind7_vals,   ind7_dsp)
 
int *s_id;
 
array_info *array_dsp, *base_dsp, *mask_dsp;
char       *array_vals, *base_vals, *mask_vals;
 
array_info *ind1_dsp, *ind2_dsp, *ind3_dsp, *ind4_dsp,
           *ind5_dsp, *ind6_dsp, *ind7_dsp;
 
char       *ind1_vals, *ind2_vals, *ind3_vals, *ind4_vals,
           *ind5_vals, *ind6_vals, *ind7_vals;
 
{ int *owners, *offsets;
 
  int base_rank, base_topology;
  int no_indexes;
  int source_topology;
 
  array_info dummy, source_array;

  int **index_array;
  int *mask_data;

       /******************************************
       *   set global data                       *
       ******************************************/
 
  dalib_indexes_set_source (array_dsp);
  dalib_indexes_set_base   (base_dsp);
  dalib_indexes_set_mask   (mask_dsp);
 
  base_rank = (*base_dsp)->rank;
  dalib_array_info (*base_dsp, &dummy, &base_topology);

  dalib_indexes_source_info (&source_array, &no_indexes);
  dalib_array_info (source_array, &dummy, &source_topology);
 
  switch (base_rank) {
 
    case 7 : dalib_indexes_set_dim (7, ind7_dsp, ind7_vals);
    case 6 : dalib_indexes_set_dim (6, ind6_dsp, ind6_vals);
    case 5 : dalib_indexes_set_dim (5, ind5_dsp, ind5_vals);
    case 4 : dalib_indexes_set_dim (4, ind4_dsp, ind4_vals);
    case 3 : dalib_indexes_set_dim (3, ind3_dsp, ind3_vals);
    case 2 : dalib_indexes_set_dim (2, ind2_dsp, ind2_vals);
    case 1 : dalib_indexes_set_dim (1, ind1_dsp, ind1_vals);
             break;
 
    default : dalib_internal_error ("indirect_def, illegal rank");
              dalib_stop ();
 
   } /* switch */
 
#ifdef DEBUG
  printf ("%d: remote def, indexes = %d, target (rank=%d,top=%d)\n",
           pcb.i, no_indexes, base_rank, base_topology);
#endif
 
       /******************************************
       *   compute global addresses/processors   *
       ******************************************/
 
   dalib_indexes_info (&index_array, &mask_data, &no_indexes);

   owners  = (int *) dalib_int_malloc (no_indexes,"indexes_owners");
   offsets = (int *) dalib_int_malloc (no_indexes,"indexes_local_offsets");

   dalib_find_owners (owners, no_indexes, *base_dsp, mask_data, index_array);

   if (dalib_is_globally_shared (*base_dsp))
 
       dalib_make_global_offsets (offsets, no_indexes, *base_dsp,
                                  mask_data, index_array);

    else

       dalib_make_remote_offsets (offsets, no_indexes, *base_dsp,
                                  owners, mask_data, index_array);
 
       /******************************************
       *   compute the schedule                  *
       ******************************************/
 
   dalib_new_remote (s_id, base_topology, no_indexes,
                     owners, offsets);

   free (offsets);
   free (owners);
 
   dalib_indexes_free ();

} /* dalib_array_remote */

/*******************************************************************
*                                                                  *
*  dalib_array_rem_read (remote_ptr *s_id,                         *
*                        array_info *array_dsp,                    *
*                        array_info *base_dsp )                    *
*                                                                  *
*******************************************************************/

void FUNCTION(dalib_array_rem_read) (s_id, array_dsp, base_dsp)

int *s_id;
array_info *array_dsp;
array_info *base_dsp;

{ int topid;
  int NId, NP;
  dd_type source_ddt;
  char *temporary, *ht;
  int obj_size;
  int no_elems;
  int *no_top;

  int *rem_offsets;
  int *order;

  int is_contiguous;
  char *source_vals;
  int no_indexes;
  int pid;
  int nvals;

  dalib_make_secarray_ddt1 (&source_ddt, *array_dsp,
                            &no_elems, &obj_size);

#ifdef DEBUG
  printf ("%d: remote read, no_elems = %d, obj_size = %d\n", 
           pcb.i, no_elems, obj_size);
#endif

  dalib_ddt_is_contiguous (source_ddt, &is_contiguous, &source_vals);

  temporary = dalib_malloc (no_elems * obj_size, "dalib_array_rem_read");

  ht = temporary;

  dalib_remote_info (*s_id, &topid, &no_indexes,
                     &no_top, &rem_offsets, &order);

  /* attention: no_indexes can be different from no_elems */

  NP = dalib_top_size (topid);

  for (NId=0;  NId<NP; NId++)

    { pid   = dalib_top_elem (topid, NId);

      nvals = no_top[NId];

      if (nvals > 0)
         dalib_shared_nread (pid, base_dsp, ht, rem_offsets, nvals);

      rem_offsets += nvals;
      ht          += nvals * obj_size;
    }

#ifdef DEBUG
  printf ("%d: all values read in temporary (%d)\n", pcb.i, temporary);
  for (NId=0; NId<no_elems; NId++)
     printf ("%d: temp[%d] = %d\n", pcb.i, NId, * ((int *) temporary + NId));
#endif

  if (is_contiguous)

     dalib_memset (source_vals, order, temporary, no_indexes, obj_size); 

    else

     { source_vals = dalib_malloc (no_elems * obj_size, "dalib_array_rem_read");
       if (no_indexes < no_elems)
          dalib_ddt_pack (source_vals, source_ddt);
       dalib_memset (source_vals, order, temporary, no_indexes, obj_size);
       dalib_ddt_unpack (source_ddt, source_vals, 0);
       free (source_vals);
     }

  free (temporary);
  dalib_ddt_free (source_ddt);
 
} /* dalib_array_rem_read */

/*******************************************************************
*                                                                  *
*  dalib_array_rem_write (remote_ptr *s_id,                        *
*                         array_info *array_dsp,                   *
*                         array_info *base_dsp )                   *
*                                                                  *
*******************************************************************/

void FUNCTION(dalib_array_rem_write) (s_id, array_dsp, base_dsp)

remote_ptr *s_id;
array_info *array_dsp;
array_info *base_dsp;

{ int topid;
  int NId, NP;
  dd_type source_ddt;
  char *temporary, *ht;
  int obj_size;
  int no_elems;
  int *no_top;

  int *rem_offsets;
  int *order;

  int is_contiguous;
  char *source_vals;
  int no_indexes;
  int pid;
  int nvals;

  dalib_make_secarray_ddt1 (&source_ddt, *array_dsp,
                            &no_elems, &obj_size);

  dalib_ddt_is_contiguous (source_ddt, &is_contiguous, &source_vals);

  dalib_remote_info (*s_id, &topid, &no_indexes,
                     &no_top, &rem_offsets, &order);

  temporary = dalib_malloc (no_indexes * obj_size, "dalib_array_rem_write");

  if (is_contiguous)

     dalib_memget (temporary, source_vals, order, no_indexes, obj_size);

    else

     { source_vals = dalib_malloc (no_elems * obj_size, "dalib_array_rem_read");
       dalib_ddt_pack (source_vals, source_ddt);
       dalib_memget (temporary, source_vals, order, no_indexes, obj_size);
       free (source_vals);
     }

  ht = temporary;

  /* attention: no_indexes can be different from no_elems */

  NP = dalib_top_size (topid);

  for (NId=0;  NId<NP; NId++)

    { pid   = dalib_top_elem (topid, NId);

      nvals = no_top[NId];

      if (nvals > 0)
         dalib_shared_nwrite (pid, base_dsp, ht, rem_offsets, nvals);

      rem_offsets += nvals;
      ht          += nvals * obj_size;
    }

  free (temporary);
  dalib_ddt_free (source_ddt);
 
} /* dalib_array_rem_write */

/**************************************************************
*                                                             *
*   FUNCTION (dalib_remote_free) (remote_ptr *s_id)           *
*                                                             *
**************************************************************/
 
void FUNCTION(dalib_array_remote_free) (s_id)

remote_ptr *s_id;
 
{ /* give all the needed memory free */
 
  dalib_free_remote (*s_id);

} /* dalib_array_remote_free */
