/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, SCAI.LAB                        *
*                                                                         *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Aug 97                                                   *
*  Last Update : Aug 97                                                   *
*                                                                         *
*  This Module is part of the DALIB                                       *
*                                                                         *
*  Module      : remote.m4                                                *
*                                                                         *
*  Function: Creating descriptors for remote access of distr. data        *
*                                                                         *
*  Note : Remote can also be used for remote addressing without RMA       *
*                                                                         *
*         (this module is machine independent)                            *
*                                                                         *
*  EXPORT:                                                                *
*                                                                         *
*   a) FORTRAN interface                                                  *
*                                                                         *
*      FUNCTION(dalib_array_rma) (array_info array_id)                    *
*      FUNCTION(dalib_array_dsm) (array_info array_id)                    *
*                                                                         *
*   b) DALIB interface                                                    *
*                                                                         *
*                                                                         *
*  Attention: this module should be available in any case                 *
*                                                                         *
**************************************************************************/
 
#undef DEBUG

#include <stdlib.h>    /* import of NULL needed */

#include "dalib.h"

#ifdef GM

/* in this module we make no difference between GM and SHM */

#define SHM

#endif

     /***********************************************
     *                                              *
     *   record describing remote addressing fac.   *
     *                                              *
     ***********************************************/

#define kREMOTE_SHARED       2
#define kDISTRIBUTED_SHARED  3
#define kIS_SHARED           4

typedef struct {

   unsigned char *remoteAddr;
   unsigned char *localAddr;      /* remote data mapped to this local addr */

   int first;
   int total[MAX_DIMENSIONS+1];

   } remote_access;

struct RemoteRecord
 
 { int rma_defined;         /* will be 1 if rma_info allocated       */

   int rma_kind;            /* REMOTE/DISTRIBUTED/ANY of them        */

   int rma_NP;              /* number of processors that have data   */

   remote_access *rma_info; /* will have rma_NP entries only         */

#ifdef SHM
   Shared SharedInfo;       /* in case that remote access is via SHM */
#endif

 };

/******************************************************************
*                                                                 *
*  static void dalib_array_set_remote_info (array_info array_id)  *
*                                                                 *
*   - creates/initializes RemoteInfo for array_id                 *
*                                                                 *
******************************************************************/

static void dalib_array_set_remote_info (array_id, kind)
 
array_info array_id;
int        kind;
 
{ Remote rma_pointer;

#ifdef DEBUG
  printf ("%d: dalib_array_set_remote_info for array pcb = %p\n", 
          pcb.i, array_id);
#endif 

  rma_pointer  = (Remote) dalib_malloc (sizeof (struct RemoteRecord), 
                                               "dalib_array_rma");

  rma_pointer->rma_defined = 0;
  rma_pointer->rma_kind    = kind;

#ifdef SHM
  rma_pointer->SharedInfo = NO_SHARED;
#endif

  /* Remote can also be used for remote addressing without RMA */

  array_id->RemoteInfo = rma_pointer;

} /* dalib_array_set_remote_info */

/******************************************************************
*                                                                 *
*  FUNCTION(dalib_array_rma) (array_info array_id)                *
*                                                                 *
*   - defines an array to be accessed via remote memory access    *
*   - will not exchange addresses (done with allocation)          *
*                                                                 *
******************************************************************/

void FUNCTION(dalib_array_rma) (array_id, kind)
 
array_info *array_id;
int        *kind;
 
{ 

#if defined(RMA) || defined(SHM) || defined(DSM)

  /* Runtime Support for remote memory access is enabled */

#ifdef RMA
  dalib_system_rma_init();
#endif

#ifdef RMA

  /* There is no runtime support for DSM */

  if (*kind == kDISTRIBUTED_SHARED)

    { dalib_internal_error (
       "DSM/SHM [distributed] shared memory not available");
      dalib_stop ();
    }

#endif

  dalib_array_set_remote_info (*array_id, *kind);

#ifdef DEBUG
  printf ("%d: array pcb = %p has been defined to have remote access\n",
           pcb.i, *array_id);
#endif 

#else

  dalib_internal_error (
      "RMA/DSM/SHM [remote|distributed] shared memory not available");
  dalib_stop ();

#endif

} /* dalib_array_rma */

/**************************************************************************
*                                                                         *
*  void dalib_array_remote_malloc (array_info array_id)                   *
*                                                                         *
*   - allocate memory for remote access                                   *
*   - in most cases it is the same as usual allocation of data            *
*                                                                         *
**************************************************************************/

void dalib_array_remote_malloc (array_id)

array_info array_id;

{ Remote rma_pointer;
  Shared SharedInfo;       /* in case that remote access is via SHM */

  rma_pointer = array_id->RemoteInfo;

  if (rma_pointer == NO_REMOTE)

     { dalib_internal_error ("array_remote_malloc: no RemoteInfo");
       dalib_stop ();
     }

#ifdef SHM
   dalib_array_shm_remote_malloc (array_id, &SharedInfo);
   rma_pointer->SharedInfo = SharedInfo;
#else
   dalib_array_malloc (array_id);
#endif

} /* dalib_array_remote_malloc */

/**************************************************************************
*                                                                         *
*  void dalib_array_remote_init (array_info array_id)                     *
*                                                                         *
*   - exchange remote addressing schemes for all processors               *
*                                                                         *
**************************************************************************/

void dalib_array_remote_init (array_id)

array_info array_id;

{ Remote rma_pointer;

  int rem_size;               /* size of one addressing scheme       */

  int size;                   /* size of one array element           */
  int rank;                   /* rank of array                       */

  remote_access *all_info;    /* will contain all addressing schemes */
  remote_access *my_info;     /* help pointer into remote_adr_info   */

  int NId, N0, NP;
  int top_id;
  array_info template_dsp;
  char *dummy;

#ifdef DEBUG
   printf ("%d: array_remote_init, dsp = %p\n", pcb.i, array_id);
#endif 

   rank = array_id->rank;
   size = array_id->size;

   rma_pointer = array_id->RemoteInfo;

   if (rma_pointer == NO_REMOTE)

     { dalib_array_set_remote_info (array_id, kIS_SHARED);
       rma_pointer = array_id->RemoteInfo;
     }

   /* nothing more to do if remote schemes are already avaiable */

#ifdef DEBUG
   printf ("%d: array_remote_init, defined = %d\n", 
            pcb.i, rma_pointer->rma_defined);
#endif 

   if (rma_pointer->rma_defined) return;

   dalib_array_info (array_id, &template_dsp, &top_id);

   NP = dalib_top_size (top_id);
   N0 = dalib_top_first (top_id);

   rem_size = sizeof (remote_access);

   all_info = (remote_access *) 
                dalib_malloc (rem_size * NP, "array_remote_init");

   if (dalib_in_topology (top_id))

      { /* this processor owns data */

        NId = pcb.i - N0;

        my_info = all_info +  NId;

        dalib_array_addressing (array_id, pcb.i, &dummy,
                                &(my_info->first), my_info->total);

        my_info->remoteAddr = array_id->f_data;

#ifdef DEBUG
       printf ("%d: have set my info (remoteAddr = %p) at relpos %d\n", 
               pcb.i, my_info->remoteAddr, NId);
#endif

      } /* set my info */

   for (NId = 0; NId < NP; NId++)

    {  dalib_context_broadcast (all_info + NId, rem_size, N0 + NId);
#ifdef DEBUG
       printf ("%d: broadcast from %d, address = %p\n",
                pcb.i, NId, all_info[NId].remoteAddr);
#endif
    }

   rma_pointer->rma_defined = 1;
   rma_pointer->rma_NP      = NP;
   rma_pointer->rma_info    = all_info;

   my_info = all_info;

   for (NId = 0; NId < NP; NId++)

     { unsigned char *remoteAddr, *localAddr;
       int length;
       int remote_pid;
       int group_id;

       remoteAddr = my_info->remoteAddr;

       group_id   = dalib_context_group ();
       remote_pid = dalib_group_element (group_id, NId+1);

       /* default: remote Address cannot be mapped to local Address */

       localAddr = (unsigned char *) NULL;

#ifdef DSM

       /* in case of remote mapping get localAddr for remoteAddr */

       length = my_info->total[rank] * size;

       localAddr = (unsigned char*) CJrmap (remote_pid-1, remoteAddr, length);

       if (localAddr == NULL)

          { printf ("%d: could not map remoteAddr %p from pid = %d\n",
                    pcb.i, remoteAddr, NId);

            dalib_internal_error ("mapping failed");
          }

#ifdef DEBUG
 printf ("%d: mapped remoteAddr %p (len = %d) from pid = %d to localAddr %p\n",
          pcb.i, remoteAddr, length, NId, localAddr);
#endif

#endif

#ifdef SHM
       localAddr = remoteAddr;
#endif

       my_info->localAddr = localAddr;

       my_info ++;
     }

#ifdef DEBUG
   printf ("%d: array_remote_init ready\n", pcb.i);
#endif 

} /* dalib_array_remote_init */

/*******************************************************************
*                                                                  *
*  void dalib_remote_copy_info (array_info dummy_dsp, actual_dsp)  *
*                                                                  *
*    - if actual has remote access and dummy should have it,       *
*      then copy it from actual to dummy                           *
*                                                                  *
*******************************************************************/

void dalib_remote_copy_info (dummy_dsp, actual_dsp)

array_info actual_dsp, dummy_dsp;

{ Remote actual_info, dummy_info;

  remote_access *all_info;    /* will contain all addressing schemes */

  int size;
  int NP;

#ifdef DEBUG
  printf ("%d: copy remote info from actual %d to dummy %d\n", 
           pcb.i, actual_dsp, dummy_dsp);
#endif

  dummy_info = dummy_dsp->RemoteInfo;
  actual_info = actual_dsp->RemoteInfo;

  if (dummy_info == NO_REMOTE) return;
  if (actual_info == NO_REMOTE) return;

#ifdef DEBUG
  printf ("%d: copy remote, dummy defined = %d, actual defined = %d\n",
           pcb.i, dummy_info->rma_defined, actual_info->rma_defined);
#endif

  if (actual_info->rma_defined == 0) return;

  size = sizeof (remote_access);
  NP   = actual_info->rma_NP;

  all_info = (remote_access *) dalib_malloc (size * NP, "array_remote_copy");

  dalib_memcopy (all_info, actual_info->rma_info, size*NP);

  dummy_info->rma_defined = 1;
  dummy_info->rma_NP      = NP;
  dummy_info->rma_info    = all_info;

} /* dalib_remote_copy_info */

/*******************************************************************
*                                                                  *
* void dalib_remote_free (array_info array_id)                     *
*                                                                  *
*   - free descriptor structures for remote access                 *
*                                                                  *
*******************************************************************/

void dalib_remote_free (rma_pointer)

Remote rma_pointer;

{ if (rma_pointer->rma_defined)

    { dalib_free (rma_pointer->rma_info, 
                  (rma_pointer->rma_NP) * sizeof(remote_access));
    }

  dalib_free (rma_pointer, sizeof(struct RemoteRecord));

} /* dalib_remote_free */

/*******************************************************************
*                                                                  *
* void dalib_array_remote_free (array_info array_id)               *
*                                                                  *
*    - free remote data (but do not free the descriptor)           *
*                                                                  *
*******************************************************************/

void dalib_array_remote_free (array_id)

array_info array_id;

{ 

#ifdef SHM

  Shared shared_ptr;

  shared_ptr  = array_id->RemoteInfo->SharedInfo;

  if (shared_ptr != NO_SHARED)

     { dalib_shared_data_free (shared_ptr);
       array_id->RemoteInfo->SharedInfo = NO_SHARED;
     }

#else

  dalib_free (array_id->c_data, array_id->n_data);
 
#endif

} /* dalib_array_remote_free */

/**************************************************************************
*                                                                         *
*  dalib_array_remote_addressing (array_info array_id, int pid,           *
*                                 int *first, int total[])                *
*                                                                         *
*   - get the addressing scheme on processor pid (relative position)      *
*                                                                         *
**************************************************************************/

void dalib_array_remote_addressing (array_id, pid, first, total)

array_info array_id;
int        *first;
int        total[];
int        pid;

{ int i, size, rank;
  remote_access *ptr;

  Remote rma_pointer;

  rma_pointer = array_id->RemoteInfo;

  if (rma_pointer == (NO_REMOTE))

    { dalib_internal_error ("remote addressing without RMA info");
      dalib_stop ();
    }

  if (rma_pointer->rma_defined == 0)

    { dalib_internal_error ("remote addressing not initialized");
      dalib_stop ();
    }


  rank = array_id->rank;
  ptr = rma_pointer->rma_info + pid;

  *first = ptr->first;
  for (i=0;i<=rank;i++) total[i] = ptr->total[i];

#ifdef DEBUG
  if (rank == 1)
    printf ("%d: remote %d is remoteAddr = %p, first = %d, total = %d %d\n",
            pcb.i, pid, ptr->remoteAddr, *first, total[0], total[1]);
  if (rank == 2)
    printf ("%d: remote %d is remoteAddr = %p first = %d, total = %d %d %d\n",
            pcb.i, pid, ptr->remoteAddr, *first, total[0], total[1], total[2]);
#endif

} /* dalib_array_remote_addressing */

/**************************************************************************
*                                                                         *
*  unsigned char *dalib_array_remote_data (array_info array_id, int pid)  *
*                                                                         *
*   - returns data pointer of remote processor pid                        *
*                                                                         *
**************************************************************************/

unsigned char *dalib_array_remote_data (array_id, pid)

array_info array_id;
int        pid;

{ int i, size, rank;
  remote_access *ptr;

  Remote rma_pointer;

  rma_pointer = array_id->RemoteInfo;

  if (rma_pointer == (NO_REMOTE))

    { dalib_internal_error ("remote addressing without RMA info");
      dalib_stop ();
    }

  if (rma_pointer->rma_defined == 0)

    { dalib_internal_error ("remote addressing not initialized");
      dalib_stop ();
    }

  ptr = rma_pointer->rma_info + pid;

#ifdef DEBUG
  printf ("%d: remote address of array (dsp=%p) on pid = %d starts at %p\n",
           pcb.i, array_id, pid, ptr->remoteAddr);
#endif

#if defined(DSM) || defined(SHM)
  return (ptr->localAddr);
#else
  return (ptr->remoteAddr);
#endif

} /* dalib_array_remote_data */

/**************************************************************************
*                                                                         *
*  int dalib_remote_offset (array_info array_id, int pid,                 *
*                           int global_indices[]         )                *
*                                                                         *
*    - returns local offset on processor pid for global indexes           *
*                                                                         *
**************************************************************************/

int dalib_remote_offset (array_id, pid, global_indices)

array_info array_id;
int        pid;
int global_indices[];

{ int lb, ub;     /* lb:ub is global size of one dimension   */
  int offset;
  int i, rank;
  int zero;
  int total[MAX_DIMENSIONS + 1];

  rank = array_id->rank;

  dalib_array_remote_addressing (array_id, pid, &zero, total);

  offset = -zero;

  for (i=0; i<rank; i++)
    offset += global_indices[i] * total[i];

  return (offset);

} /* dalib_remote_offset */

/*********************************************************************
*                                                                    *
* FUNCTION(dalib_rma_read) (char *data, array_info *array_id,        *
*                           int *ind1, int *ind2, ..., int *ind7)    *
*                                                                    *
*********************************************************************/

void FUNCTION(dalib_rma_read) (data, array_id, ind1, ind2, ind3, ind4, 
                                               ind5 ,ind6, ind7)

char *data;
array_info *array_id;
int *ind1, *ind2, *ind3, *ind4, *ind5, *ind6, *ind7;

{ int global_indices [MAX_DIMENSIONS];

  int rank, size;

  unsigned char *rem_address;    /* remote address */

  int owner, offset;

  int rem_pid;

  array_info dummy;
  int top_id;
 
  rank = (*array_id)->rank;
  size = (*array_id)->size;
 
  switch (rank) {

    case 7: global_indices[6] = *ind7;
    case 6: global_indices[5] = *ind6;
    case 5: global_indices[4] = *ind5;
    case 4: global_indices[3] = *ind4;
    case 3: global_indices[2] = *ind3;
    case 2: global_indices[1] = *ind2;
    case 1: global_indices[0] = *ind1;

  } /* end switch */

  dalib_array_info (*array_id, &dummy, &top_id);

  if (pcb.p == 1)
 
     { rem_pid     = 1;
       offset      = dalib_local_offset (*array_id, global_indices);
       rem_address = (*array_id)->f_data + offset * size;
     }
 
    else
 
     { owner       = dalib_multidim_owner (*array_id, global_indices);
       offset      = dalib_remote_offset (*array_id, owner, global_indices);
       rem_address = dalib_array_remote_data (*array_id, owner);
       rem_address = rem_address + offset * size;
       rem_pid     = dalib_top_elem (top_id, owner);
     }
 
#ifdef DEBUG
  switch (rank) {

  case 1 :

     printf ("%d: rma_read (index = %d), owner = %d, offset = %d\n", 
              pcb.i, *ind1, owner, offset);
     break;

  case 2 :

     printf ("%d: rma_read (index = %d, %d), owner = %d, offset = %d\n", 
              pcb.i, *ind1, *ind2, owner, offset);
     break;

  default :

     printf ("%d: rma_read (index = %d %d, %d), owner = %d, offset = %d\n", 
              pcb.i, *ind1, *ind2, *ind3, owner, offset);
     break;

  } /* switch */
#endif

  if (rem_pid == pcb.i)

     dalib_memcopy (data, rem_address, size);

   else

     { 

#if defined(DSM) || defined(SHM)

#ifdef DEBUG
     printf ("%d: get %d bytes from remotely mapped address %p (pid = %d)\n",
              pcb.i, size, rem_address, rem_pid);
#endif

     dalib_memcopy (data, rem_address, size);

#elif defined(RMA)

     dalib_system_rma_get (data, rem_pid, rem_address, size);

#else
     dalib_internal_error ("RMA access not available\n");
     dalib_stop ();
#endif

     }
 
} /* dalib_rma_read */

/**********************************************************************
*                                                                     *
*  void dalib_ind_array_info (array_info array_dsp, int *obj_size,    *
*                             int *serial_size, int *dist_size)       *
*                                                                     *
*  IN  : array_info array_dsp  (must only be distributed in the       *
*                               last dimension)                       *
*                                                                     *
*  OUT : obj_size is number of bytes for one element                  *
*        serial_size is number of elements in serial dimension        *
*        dist_size is number of elements in distributed dimension     *
*                                                                     *
**********************************************************************/

static void dalib_base_info (array_dsp, serial_size, rank)
                             
array_info array_dsp;
int *serial_size;
int *rank;

{ int i;
  DimInfo *dim;

  *rank = array_dsp->rank;
  dim   = array_dsp->dimensions;

  *serial_size = array_dsp->size;

  for (i=0; i<(*rank)-1; i++)
    { *serial_size *=  (dim->global_size[1] - dim->global_size[0] + 1);
      dim++;
    }

} /* dalib_base_info */

/*********************************************************************
*                                                                    *
* FUNCTION(dalib_rma_read_fast) (char *data,                         *
*                                array_info *array_id,               *
*                                int *ind)                           *
*                                                                    *
* - special version of rma_read if array_id is only distributed      *
*   along the last dimension and read all first dimensions in        *
*                                                                    *
*********************************************************************/

void FUNCTION(dalib_rma_read_fast) (data, array_id, ind)

char *data;
array_info *array_id;
int *ind;

{ int rank, size;

  unsigned char *rem_address;    /* remote address */

  int owner, offset;

  int rem_pid;

  int low, high;

  int top_id, top_dim;
  int lb, ub, local_lb;
  int base, stride;
  int kind;

  int NP;

  DistDim mapping;

  dalib_base_info (*array_id, &size, &rank);

  dalib_array_dim_mapping (*array_id, rank,
                           &base, &stride, &lb, &ub,
                           &top_id, &mapping);

  dalib_dim_mapping_info (mapping, &kind, &top_dim);

  if (kind == kBLOCK_DIM)

      { int bsize;

        dalib_internal_error ("not updated for new version");
        dalib_stop ();

        bsize = 0;

      NP       = dalib_top_size (top_id);
      owner    = dalib_block_owner (NP, bsize, lb, ub, *ind); 
      local_lb = lb + (owner-1) * bsize; 
      offset   = (*ind) - local_lb;

#ifdef DEBUG
 printf ("%d: ind = %d -> owner = %d, offset = %d\n",
         pcb.i, *ind, owner, offset);
#endif

    }

   else

    { dalib_internal_error ("rma_read_fast illegal");
      dalib_stop ();
    }

  rem_pid = dalib_top_elem (top_id, owner-1);
  rem_address =  dalib_array_remote_data (*array_id, owner-1);
  rem_address += offset * size;

#ifdef DEBUG
  printf ("%d: rma_read_fast (ind=%d), pid = %d, address = %d, size = %d\n",
           pcb.i, *ind, rem_pid, rem_address, size);
#endif

#if defined(RMA)
  dalib_system_rma_get (data, rem_pid, rem_address, size);
#else
  dalib_internal_error ("RMA access not available\n");
  dalib_stop ();
#endif

} /* dalib_rma_read_fast */

/*********************************************************************
*                                                                    *
* FUNCTION(dalib_rma_write) (int *op, char *data,                    *
*                            array_info *array_id,                   *
*                            int *ind1, int *ind2, ..., int *ind7)   *
*                                                                    *
*********************************************************************/

void FUNCTION(dalib_rma_write) (data, array_id, ind1, ind2, ind3, ind4, 
                                                ind5 ,ind6, ind7)

void *data;
array_info *array_id;
int *ind1, *ind2, *ind3, *ind4, *ind5, *ind6, *ind7;

{ int global_indices [MAX_DIMENSIONS];

  int rank, size;

  unsigned char *rem_address;    /* remote address */

  int owner, offset;

  int rem_pid;

  array_info dummy;
  int top_id;
 
  rank = (*array_id)->rank;
  size = (*array_id)->size;
 
  switch (rank) {

    case 7: global_indices[6] = *ind7;
    case 6: global_indices[5] = *ind6;
    case 5: global_indices[4] = *ind5;
    case 4: global_indices[3] = *ind4;
    case 3: global_indices[2] = *ind3;
    case 2: global_indices[1] = *ind2;
    case 1: global_indices[0] = *ind1;

  } /* end switch */

  dalib_array_info (*array_id, &dummy, &top_id);

  if (pcb.p == 1)
 
     { rem_pid     = 1;
       offset      = dalib_local_offset (*array_id, global_indices);
       rem_address = (*array_id)->f_data + offset * size;
     }
 
    else
 
     { owner       = dalib_multidim_owner (*array_id, global_indices);
       offset      = dalib_remote_offset (*array_id, owner, global_indices);
       rem_address = dalib_array_remote_data (*array_id, owner);
       rem_address = rem_address + offset * size;
       rem_pid     = dalib_top_elem (top_id, owner);
     }
 
#ifdef DEBUG
  switch (rank) {

  case 1 :

     printf ("%d: rma_write (index = %d), owner = %d, offset = %d\n", 
              pcb.i, *ind1, owner, offset);
     break;

  case 2 :

     printf ("%d: rma_write (index = %d, %d), owner = %d, offset = %d\n", 
              pcb.i, *ind1, *ind2, owner, offset);
     break;

  default :

     printf ("%d: rma_write (index = %d %d, %d), owner = %d, offset = %d\n", 
              pcb.i, *ind1, *ind2, *ind3, owner, offset);
     break;

  } /* switch */
#endif

  if (rem_pid == pcb.i)

     { /* rem_address is a local address, so update it locally */

       dalib_memcopy (rem_address, data, size);
     }

   else

     { 

#if defined(RMA)
       dalib_system_rma_put (data, rem_pid, rem_address, size);
#else
       dalib_internal_error ("RMA access not available\n");
       dalib_stop ();
#endif
     }
 
} /* dalib_rma_write */

/*********************************************************************
*                                                                    *
* FUNCTION(dalib_rma_update) (int *op, char *data,                   *
*                             array_info *array_id,                  *
*                             int *ind1, int *ind2, ..., int *ind7)  *
*                                                                    *
*********************************************************************/

void FUNCTION(dalib_rma_update) (op, data, array_id, ind1, ind2, ind3, ind4, 
                                                     ind5 ,ind6, ind7)

int  *op;
char *data;
array_info *array_id;
int *ind1, *ind2, *ind3, *ind4, *ind5, *ind6, *ind7;

{ int global_indices [MAX_DIMENSIONS];

  int rank, size;

  unsigned char *rem_address;    /* remote address */

  dalib_routine *f_reduction;
  extern dalib_routine *dalib_get_reduction_fn();

  int owner, offset;

  int rem_pid;

  array_info dummy;
  int top_id;
 
  rank = (*array_id)->rank;
  size = (*array_id)->size;
 
  switch (rank) {

    case 7: global_indices[6] = *ind7;
    case 6: global_indices[5] = *ind6;
    case 5: global_indices[4] = *ind5;
    case 4: global_indices[3] = *ind4;
    case 3: global_indices[2] = *ind3;
    case 2: global_indices[1] = *ind2;
    case 1: global_indices[0] = *ind1;

  } /* end switch */

  dalib_array_info (*array_id, &dummy, &top_id);

  if (pcb.p == 1)
 
     { rem_pid     = 1;
       offset      = dalib_local_offset (*array_id, global_indices);
       rem_address = (*array_id)->f_data + offset * size;
     }
 
    else
 
     { owner       = dalib_multidim_owner (*array_id, global_indices);
       offset      = dalib_remote_offset (*array_id, owner, global_indices);
       rem_address = dalib_array_remote_data (*array_id, owner);
       rem_address = rem_address + offset * size;
       rem_pid     = dalib_top_elem (top_id, owner);
     }
 
#ifdef DEBUG
  switch (rank) {

  case 1 :

     printf ("%d: rma_update (index = %d), owner = %d, offset = %d\n", 
              pcb.i, *ind1, owner, offset);
     break;

  case 2 :

     printf ("%d: rma_update (index = %d, %d), owner = %d, offset = %d\n", 
              pcb.i, *ind1, *ind2, owner, offset);
     break;

  default :

     printf ("%d: rma_update (index = %d %d, %d), owner = %d, offset = %d\n", 
              pcb.i, *ind1, *ind2, *ind3, owner, offset);
     break;

  } /* switch */
#endif

  if (*op > 0) f_reduction = dalib_get_reduction_fn (*op);

  if (rem_pid == pcb.i)

     { /* rem_address is a local address, so update it locally */

       if (*op == 0)
          dalib_memcopy (rem_address, data, size);
       else
          f_reduction (rem_address, data);
     }

   else

     { 

#if defined(RMA)
      if (*op == 0)
         dalib_system_rma_put (data, rem_pid, rem_address, size);
       else
         dalib_system_rma_update (*op, data, rem_pid, rem_address, size);
#else
     dalib_internal_error ("RMA access not available\n");
     dalib_stop ();
#endif

    }
 
} /* dalib_rma_update */

void FUNCTION(dalib_rma_update_fast) (op, data, array_id, ind)

int *op;
char *data;
array_info *array_id;
int *ind;

{ int rank, size;

  unsigned char *rem_address;    /* remote address */

  int owner, offset;

  int rem_pid;

  int low, high;

  int top_id, top_dim;
  int lb, ub, local_lb;
  int base, stride;
  int kind;

  int NP;

  DistDim mapping;

  int *offsets;

  dalib_base_info (*array_id, &size, &rank);

  dalib_array_dim_mapping (*array_id, rank,
                           &base, &stride, &lb, &ub,
                           &top_id, &mapping);

  dalib_dim_mapping_info (mapping, &kind, &top_dim);

  if (kind == kBLOCK_DIM)

    { int bsize;

      bsize = (int) offsets;

      dalib_internal_error ("not updated for new version");
      dalib_stop ();
 
      NP       = dalib_top_size (top_id);
      owner    = dalib_block_owner (NP, bsize, lb, ub, *ind); 
      local_lb = lb + (owner-1) * ((int) offsets);
      offset   = (*ind) - local_lb;

#ifdef DEBUG
 printf ("%d: ind = %d -> owner = %d, offset = %d\n",
         pcb.i, *ind, owner, offset);
#endif

    }

   else

    { dalib_internal_error ("rma_read_fast illegal");
      dalib_stop ();
    }

  rem_pid = dalib_top_elem (top_id, owner-1);
  rem_address =  dalib_array_remote_data (*array_id, owner-1);
  rem_address += offset * size;

#ifdef DEBUG
  printf ("%d: rma_read_fast (ind=%d), pid = %d, address = %p, size = %d\n",
           pcb.i, *ind, rem_pid, rem_address, size);
#endif

#if defined(RMA)
  dalib_system_rma_update (*op, data, rem_pid, rem_address, size);
#else
  dalib_internal_error ("RMA access not available\n");
  dalib_stop ();
#endif

} /* dalib_rma_update_fast */

 
/**************************************************************************
*                                                                         *
* void FUNCTION(dalib_array_access) (array_id, ....)                      *
*                                                                         *
**************************************************************************/

void FUNCTION(dalib_array_dist_access)
                          (array_id, a_ptr, a_zero, a_dim1, a_dim2,
                           a_dim3, a_dim4, a_dim5, a_dim6, a_dim7)

array_info *array_id;
unsigned char *a_ptr;

int *a_zero;
int  *a_dim1, *a_dim2, *a_dim3, *a_dim4, *a_dim5, *a_dim6, *a_dim7;

{ int rank, size;
  array_info descriptor;
  remote_access *rma_info;
  int NId, NP;

  descriptor = *array_id;

  rank = descriptor->rank;
  size = descriptor->size;

#if defined(DSM) || defined(SHM)

  rma_info = descriptor->RemoteInfo->rma_info;
  NP = descriptor->RemoteInfo->rma_NP;

  for (NId=0; NId < NP; NId++)

    { int *total;
      long offset;
      unsigned char *data;
      int first;

      total  = rma_info->total;
      data   = rma_info->localAddr;
      first  = rma_info->first;

      switch (rank) {

      case 7 : a_dim7[NId] = total[7];
      case 6 : a_dim6[NId] = total[6];
      case 5 : a_dim5[NId] = total[5];
      case 4 : a_dim4[NId] = total[4];
      case 3 : a_dim3[NId] = total[3];
      case 2 : a_dim2[NId] = total[2];
      case 1 : a_dim1[NId] = total[1];
      case 0 : break;

      } /* switch */

      if (data == NULL)

         { dalib_internal_error ("no local pointer");
           dalib_stop ();
         }

      offset = data - a_ptr;

#ifdef DEBUG
      printf ("%d: dist_addr of NId = %d, local = %p, stat = %p, offset = %d\n",
               pcb.i, NId+1, data, a_ptr, offset);
#endif

      if (offset % size)

        { printf ("array_dist_access: static addr = %p, dyn addr = %p\n", 
                   a_ptr, data);
          printf ("              diff = %d is not multiple of size = %d\n",
                  offset, size);
          dalib_internal_error (
             "alignment problem, use flag -f for compilation");
          dalib_stop ();
        }

      offset = offset / size;

      a_zero[NId] = (offset + 1 - first);

#ifdef DEBUG
      printf ("%d: dist_addr (NId=%d), offset = %d, first = %d, zero = %d\n",
               pcb.i, NId+1, offset, first, a_zero[NId]);
#endif

      rma_info ++;
    }
    
#else

   dalib_internal_error ("DSM (distributed shared memory) not available");
   dalib_stop ();

#endif

} /* dalib_array_dist_access */

/**************************************************************************
*                                                                         *
* void FUNCTION(dalib_array_rma_access) (array_id, ....)                  *
*                                                                         *
**************************************************************************/

void FUNCTION(dalib_array_rma_access)
                          (array_id, a_zero, a_dim1, a_dim2,
                           a_dim3, a_dim4, a_dim5, a_dim6, a_dim7)

array_info *array_id;

int *a_zero;
int  *a_dim1, *a_dim2, *a_dim3, *a_dim4, *a_dim5, *a_dim6, *a_dim7;

{ int rank, size;
  array_info descriptor;
  remote_access *rma_info;
  int NId, NP;

  descriptor = *array_id;

  rank = descriptor->rank;
  size = descriptor->size;

#if defined(DSM) || defined(SHM) || defined(RMA)

  rma_info = descriptor->RemoteInfo->rma_info;
  NP = descriptor->RemoteInfo->rma_NP;

  for (NId=0; NId < NP; NId++)

    { int *total;
      long offset;
      int first;

      total  = rma_info->total;
      first  = rma_info->first;

      switch (rank) {

      case 7 : a_dim7[NId] = total[7];
      case 6 : a_dim6[NId] = total[6];
      case 5 : a_dim5[NId] = total[5];
      case 4 : a_dim4[NId] = total[4];
      case 3 : a_dim3[NId] = total[3];
      case 2 : a_dim2[NId] = total[2];
      case 1 : a_dim1[NId] = total[1];
      case 0 : break;

      } /* switch */

      a_zero[NId] = - first;

      rma_info ++;
    }
    
#else

   dalib_internal_error ("RMA (remote shared memory) not available");
   dalib_stop ();

#endif

} /* dalib_array_rma_access */

/**************************************************************************
*                                                                         *
*                                                                         *
**************************************************************************/

void FUNCTION(dalib_rma_read1_block) (data, array_id, owner, ind)

char *data;
array_info *array_id;
int *ind;
int *owner;

{ char *rem_address;
  int rem_pid;
  int size;

  remote_access *rma_info; /* will have rma_NP entries only       */

  rem_pid = *owner - 1;

  size = (*array_id)->size;
  rma_info = (*array_id)->RemoteInfo->rma_info;
  rma_info += rem_pid;
#if defined(DSM) || defined(SHM)
  rem_address =  rma_info->localAddr;
#elif defined(RMA)
  rem_address =  rma_info->remoteAddr;
#else
  dalib_internal_error ("no shared access");
#endif

  rem_address += (*ind - rma_info->first) * size;

#if defined(DSM) || defined(SHM)
  dalib_memcopy (data, rem_address, size);
#elif defined(RMA)
  dalib_system_rma_get (data, rem_pid+1, rem_address, size);
#else

#endif
}

/**************************************************************************
*                                                                         *
*                                                                         *
**************************************************************************/

void FUNCTION(dalib_remote_read) (data, array_id, owner, ind)

char *data;
array_info *array_id;
int *ind;
int *owner;

{ char *rem_address;
  int rem_pid;
  int size;

  remote_access *rma_info; /* will have rma_NP entries only       */

  rem_pid = *owner - 1;

  size = (*array_id)->size;
  rma_info = (*array_id)->RemoteInfo->rma_info;
  rma_info += rem_pid;
#if defined(DSM) || defined(SHM)
  rem_address =  rma_info->localAddr;
#elif defined(RMA)
  rem_address =  rma_info->remoteAddr;
#else
  dalib_internal_error ("no shared access");
#endif

  rem_address += (*ind) * size;

#if defined(DSM) || defined(SHM)
  dalib_memcopy (data, rem_address, size);
#elif defined(RMA)
  dalib_system_rma_get (data, rem_pid+1, rem_address, size);
#else

#endif
}
