/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, SCAI.WR                         *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Jan 95                                                   *
*  Last Update : Aug 97                                                   *
*                                                                         *
*  This Module is part of the DALIB                                       *
*                                                                         *
*  Module      : ind_shadow.m4                                            *
*                                                                         *
*  Function    : Use of shadow edges for unstructured communication       *
*                                                                         *
*  Export : FORTRAN Interface                                             *
*                                                                         *
*   void dalib_ind_shadow_define (schedule_ptr *s_id,                     *
*                                 char *array_data, char *index_data,     *
*                                 array_info *s_dsp,                      *
*                                 array_info *array_dsp, *index_dsp)      *
*                                                                         *
*   - update index to point to shadow area, computes schedule s_id        *
*                                                                         *
*   void dalib_ind_shadow_get (schedule_ptr *s_id, char *array_data,      *
*                              array_info *s_dsp, *array_dsp)             *
*                                                                         *
*   - update shadow area of array corresponding the schedule              *
*   - shadow area will contain afterwards the non-local values            *
*                                                                         *
*   void dalib_ind_shadow_put (schedule_ptr *s_id,                        *
*                              char *array_data, int *op,                 *
*                              array_info *s_dsp, *array_dsp, *op_dsp)    *
*                                                                         *
*   - write non-local computed values back to owning processors           *
*   - elements will be combined correspoinding reduction operator op      *
*                                                                         *
*   void dalib_ind_shadow_set (schedule_ptr *s_id,                        *
*                              char *array_data, char *val_data,          *
*                              array_info *s_dsp, *array_dsp, *val_dsp)   *
*                                                                         *
*   - initializes shadow area with the value 'val'                        *
*   - needed for reductions on distributed arrays                         *
*                                                                         *
*   void dalib_ind_shadow_set1 (schedule_ptr *s_id,                       *
*                               char *array_data, char *val_data,         *
*                               array_info *s_dsp, *array_dsp, *val_dsp)  *
*                                                                         *
*     - initialize shadow area for out-of-range values                    *
*                                                                         *
*   void dalib_ind_shadow_free (schedule_ptr *s_id)                       *
*                                                                         *
*    - frees internal data structures used for the schedule               *
*                                                                         *
**************************************************************************/

#undef DEBUG

#undef REMOTE
#define CHECK

#include "dalib.h"
#include "inspector.h"

extern inspector_data *dalib_inspector_db_get ();

/************************************************************************
*                                                                       *
*  Data structure used for full shadow schedule when it is used         *
*                                                                       *
*  Note : different shadow schedules can share the same                 *
*         communication schedule (but do not share shadow)              *
*                                                                       *
************************************************************************/

#define SHADOW_IDENTIFICATION 2197

typedef struct shadow_schedule *shadow_ptr;

struct shadow_schedule {

   int ident;               /* Identification of shadow schedule    */

   int db_pos;              /* position in inspector data base
                               for the communication schedule       */

   unsigned char *base_data;  /* data of base array, as use will be    
                                 restricted if shadow is on the heap  */

   char *shadow_area;       /* shadow area allocated on heap        */

   int shadow_area_size;    /* number of bytes allocated on heap    */

   int shadow_entries;      /* size of shadow area                  */

   int offset;              /* a[...,low+offset+1] points to shadow */
   int high;                /* a[...,high] points to shadow_data    */

};  /* shadow schedule */

/* Note:

   shadow_data = dsp->f_data + (offset+left_ov+1) * serial * nbytes;

*/

/*******************************************************************
*                                                                  *
*   void dalib_verify_shadow_schedule (shadow_ptr *s_id)           *
*                                                                  *
*    - verify that s_id is legal shadow schedule                   *
*                                                                  *
*******************************************************************/

void dalib_verify_shadow_schedule (s_id) 
shadow_ptr *s_id;

{ if ((*s_id)->ident != SHADOW_IDENTIFICATION)

    { dalib_internal_error ("invalid shadow schedule");
      dalib_stop ();
    }

} /* dalib_verify_shadow_schedule */

/*******************************************************************
*                                                                  *
*                                                                  *
*                                                                  *
*******************************************************************/

void dalib_check_shadow_size (s_id, base_dsp)

shadow_ptr s_id;
array_info base_dsp;

{ unsigned char *data;
  int rank;
  int right_ov;

  data     = base_dsp->f_data;

  /* if the shadow has been computed with this base array, we are done */

  if (s_id->base_data == data) return;

#ifdef DEBUG
  printf ("%d: schedule calculated for base %p, used for array %p",
          pcb.i, s_id->base_data, data);
#endif

  /* Verify that shadow schedule can be used also for this other array */

  if (s_id->shadow_area_size > 0)

     { char msg [128];

       sprintf (msg,
         "shadow (%d entries on heap allocated) not valid for array %s",
         s_id->shadow_entries+1, base_dsp->name);

       dalib_internal_error (msg);
       dalib_stop ();
     }

  rank     = base_dsp->rank;
  right_ov = base_dsp->dimensions[rank-1].shadow[1];

  if (right_ov < s_id->shadow_entries + 1)

    { char msg[128];

      sprintf (msg, "shadow schedule requires size %d (array %s has only %d)",
               s_id->shadow_entries + 1, base_dsp->name, right_ov);

      dalib_internal_error (msg);
      dalib_stop ();
    }

} /* dalib_check_shadow_size */

/*******************************************************************
*                                                                  *
*   dalib_indirect_localize_addresses (schedule_id)                *
*                                                                  *
*******************************************************************/

void dalib_indirect_localize_addresses (base, schedule_id)

schedule_ptr schedule_id;
array_info base;

{ int no_other_values;
  int *global_addresses;
  int i;

#ifdef DEBUG 
  printf ("%d: dalib_indirect_localize_addresses, schedule = %d\n", 
           pcb.i, schedule_id);
#endif 

  dalib_indirect_target_info (schedule_id, &no_other_values, 
                              &global_addresses);
 
#ifdef DEBUG 
  printf ("%d: target info : %d other values\n", pcb.i, no_other_values);
  printf ("%d: global addresses", pcb.i);
  for (i=0;i<no_other_values;i++) printf (" %d", global_addresses[i]);
  printf ("\n");
#endif 

  dalib_addr_global_to_local (base, no_other_values, 
                              global_addresses, global_addresses);

#ifdef DEBUG 
  printf ("%d: local addresses", pcb.i);
  for (i=0;i<no_other_values;i++) printf (" %d", global_addresses[i]);
  printf ("\n");
#endif 

} /* dalib_indirect_localize_addresses */

/**********************************************************************
*                                                                     *
*  void dalib_count_owners (owners, N, M, m, non_local, out_range)    *
*                                                                     *
*  int N, M, m, *non_local, *out_range;                               *
*  int owners[N];                                                     *
*                                                                     *
*  non_local : number of owners[i] between 0 and M-1, but not m       *
*  out_range : number of owners[i] not between 0 and M-1              *
*                                                                     *
**********************************************************************/

static void dalib_count_owners (owners, N, M, m, non_local, out_range)   

int N, M, m, *non_local, *out_range;
int owners[];

{ int j, val;

  int nr1 = 0;   /* counts out of range values      */
  int nr2 = 0;   /* counts legal values not equal m */

  for (j=0; j < N; j++)

    { val = owners[j];

      if (val >= 0)

         { if (val >= M) nr1++;
             else if (val != m) nr2++;
         }

    } /* for all owner elements */

  *out_range = nr1;
  *non_local = nr2;

#ifdef DEBUG
  printf ("%d: count_owners (N=%d), M=%d, m=%d, out = %d, not local = %d\n",
          pcb.i, N, M, m, nr1, nr2);
#endif

} /* dalib_count_owners */

/**********************************************************************
*                                                                     *
*  void dalib_get_owners (owners, N, M, m, non_local, out_range)      *
*                                                                     *
*  int N, M, m;                                                       *
*  int owners[N];                                                     *
*                                                                     *
*  int non_local[], out_range[];                                      *
*                                                                     *
*  - sizes of non_local, out_range given by dalib_count_owners        *
*                                                                     *
**********************************************************************/

static void dalib_get_owners (owners, N, M, m, non_local, out_range)

int N, M, m, owners[], non_local[], out_range[];

{ int j, val;

  int nr1 = 0;   /* counts out of range values      */
  int nr2 = 0;   /* counts legal values not equal m */
  int nr3 = 0;   /* counts masked values            */

  for (j=0; j < N; j++)

    { val = owners[j];

      if (val >= M)

         { out_range[nr1++] = j;
           owners[j] = -1;
         }
 
       else if (val < 0)

         { nr3++;
           owners[j] = -1;
         }

        else if (val != m)
         
           non_local[nr2++] = j;

        else
         
           owners[j] = -1;

    } /* for j */

#ifdef DEBUG
  printf ("%d: dalib_get_owners : %d out of range, %d non local, %d masked\n",
           pcb.i, nr1, nr2, nr3);
#endif

} /* dalib_get_owners */

/*******************************************************************
*                                                                  *
*  dalib_shadow_calc (s_id, array_dsp, target_proc, global_addr)   *
*                                                                  *
*******************************************************************/

void dalib_shadow_calc (s_id, array_dsp, indexes, no_indexes,
                        target_processors, global_addresses, change)

schedule_ptr *s_id;
array_info array_dsp;
int indexes[];
int no_indexes;
int target_processors[];
int global_addresses [];
int **change;

{ int pid, NP, NId;

  int *non_local_indexes;
  int *non_global_indexes;
  int *source_indexes;
  int *index_change;
  int non_local_nr, non_global_nr;
  int source_topology;

  array_info dummy;

  int base_topology;

  int j, no_values;

  /* we have to get base topology and source topology */

  dalib_array_info (array_dsp, &dummy, &base_topology);

  /* note : array_dsp can also be a section */

  dalib_top_info (base_topology, 1, &NP, &NId);

  NId -= 1;    /* 0 <= NId < NP */

#ifdef DEBUG
   printf ("%d: dalib_shadow_calc, no=%d, top (id=%d, NP=%d, NId=%d)\n",
           pcb.i, no_indexes, base_topology, NP, NId);
#endif

  /* collect non-local indexes and non-global (out of range) indexes) */

  dalib_count_owners (target_processors, no_indexes, NP, NId, 
                      &non_local_nr, &non_global_nr);

#ifdef DEBUG
   printf ("%d: dalib_shadow_calc, non_local = %d, non_global = %d\n",
            pcb.i, non_local_nr, non_global_nr);
#endif
 
  non_local_indexes  = dalib_int_malloc (non_local_nr, "shadow_calc1");
  non_global_indexes = dalib_int_malloc (non_global_nr, "shadow_calc2");

  index_change = dalib_int_malloc (3*(non_local_nr + non_global_nr) + 1,
                                   "shadow_calc3");

  *change = index_change;

  *(index_change++) = non_local_nr + non_global_nr;

  /* step 2 : compress arrays for non-local indexes */
 
  dalib_get_owners (target_processors, no_indexes, NP, NId,
                    non_local_indexes, non_global_indexes);

  /* for the schedule all local indexes will not be considered  */

  source_topology = 1;  /*  !!!!  */

  dalib_new_indirect (s_id, source_topology, base_topology, no_indexes,
                            target_processors, global_addresses);

  /* global indexes in indexes become now indexes in overlap area */
 
  dalib_indirect_source_info (*s_id, &no_values, &source_indexes);

  /* all non global indexes will point to a default value */
 
#ifdef DEBUG
   printf ("%d: update %d non global indexes\n", pcb.i, non_global_nr);
#endif 

  for (j=0; j < non_global_nr; j++)

    {  /* index_change: pos, old_val, new_val */

       *(index_change++) = non_global_indexes[j];
       *(index_change++) = indexes[non_global_indexes[j]];
       *(index_change++) = -1;
    }
 
  /* all non local indexes will point to the overlap area */
 
#ifdef DEBUG
   printf ("%d: update %d non local indexes (= %d)\n", 
            pcb.i, non_local_nr, no_values             );
#endif 

  for (j=0; j < non_local_nr; j++)

     { 

#ifdef DEBUG
       printf ("%d: non-local %d translates index[%d] -> %d \n", pcb.i, j,
               indexes[source_indexes[j]], j+1); 
#endif

       /* index_change: pos, old_val, new_val */

       *(index_change++) = source_indexes[j];
       *(index_change++) = indexes[source_indexes[j]];
       *(index_change++) = j;

       source_indexes[j] = j;
     }
 
#ifdef DEBUG
   printf ("%d: dalib_shadow_calc ready\n", pcb.i);
#endif
 
  dalib_int_free (non_global_indexes, non_global_nr);
  dalib_int_free (non_local_indexes, non_local_nr);

} /* dalib_shadow_calc */

/*******************************************************************
*                                                                  *
*  void dalib_shadow_reset_indexes (int *indexes,                  *
*                                   int *index_changes)            *
*                                                                  *
*    - reset indirection array to old values                       *
*                                                                  *
*******************************************************************/

void dalib_shadow_reset_indexes (indexes, index_changes)

int indexes[];
int *index_changes;

{ int i, n;

  n = *index_changes++;

  for (i=0; i<n; i++)

     { int pos = *index_changes++;
       int old = *index_changes++;
       int new = *index_changes++;

       indexes [pos] = old;
     }

} /* dalib_shadow_reset_indexes */

/*******************************************************************
*                                                                  *
*  void dalib_shadow_change_indexes (int *indexes,                 *
*                                    int high,                     *
*                                    int *index_changes)           *
*                                                                  *
*    - set in indirection array the new index values               *
*    - high is the offset into the shadow area                     *
*      so A(...,HIGH) is first shadow element                      *
*                                                                  *
*******************************************************************/

void dalib_shadow_change_indexes (indexes, high, index_changes)

int indexes[];
int high;
int *index_changes;

{ int i, n;

  n = *index_changes++;

  for (i=0; i<n; i++)

     { int pos = *index_changes++;
       int old = *index_changes++;
       int new = *index_changes++;

#ifdef DEBUG
   /*    printf ("%d: change index at pos = %d to %d = %d + %d\n", 
                  pcb.i, pos, new+high, new, high); */
#endif

       indexes [pos] = new + high;
     }

} /* dalib_shadow_change_indexes */

/*******************************************************************
*                                                                  *
*  void dalib_free_shadow_changes (inspector_data *I_Data)         *
*                                                                  *
*    - free allocated memory for index change structure            *
*                                                                  *
*******************************************************************/

void dalib_free_shadow_changes (I_Data)

inspector_data *I_Data;

{ int *index_changes;

  int temp_size;

  temp_size= I_Data->index_temp_size;

  if (temp_size > 0)

      dalib_free (I_Data->index_c_data, temp_size);

  index_changes = I_Data->index_changes;

  if (index_changes == (int *) 0) return;

  dalib_int_free (index_changes, 3 * index_changes[0] + 1);

} /* dalib_free_shadow_changes */

/**********************************************************************
*                                                                     *
*  void dalib_ind_array_info (array_info array_dsp, int *obj_size,    *
*                             int *serial_size, int *dist_size)       *
*                                                                     *
*  IN  : array_info array_dsp  (must only be distributed in the       *
*                               last dimension)                       *
*                                                                     *
*  OUT : obj_size is number of bytes for one element                  *
*        serial_size is number of elements in serial dimension        *
*        dist_size is number of elements in distributed dimension     *
*                                                                     *
**********************************************************************/

static void dalib_ind_array_info (array_dsp, obj_size, serial_size, dist_size)

array_info array_dsp;
int *obj_size, *serial_size, *dist_size;

{ int i, rank;
  DimInfo *dim;

  rank = array_dsp->rank;
  dim  = array_dsp->dimensions;

  *obj_size = array_dsp->size;
  *serial_size = 1;

  for (i=0; i<rank-1; i++)
    { *serial_size *=  (dim->global_size[1] - dim->global_size[0] + 1);
      dim++;
    }

  *dist_size = dim->local_size[1] - dim->local_size[0] + 1;

#ifdef DEBUG
   printf ("%d: ind_array_info, dsp = %p, size = %d, serial = %d, dist = %d\n",
            pcb.i, array_dsp, *obj_size, *serial_size, *dist_size);
#endif 

} /* dalib_ind_array_info */

/***************************************************************************
*                                                                          *
*  shadow_ptr dalib_build_shadow_schedule (int pos, array_info base_dsp)   *
*                                                                          *
*   - creates a new shadow schedule for inspector 'pos' in data base       *
*     and the base array identified by base_dsp                            *
*                                                                          *
*   - shadow edges will be created on the heap if base_dsp has not         *
*     enough shadow area available (restricts the use always to            *
*     this base array)                                                     *
*                                                                          *
***************************************************************************/

shadow_ptr dalib_build_shadow_schedule (pos, base_dsp)

int        pos;
array_info base_dsp;

{ inspector_data *S;
  int *dummy;
  char *shadow_area;
  unsigned char *shadow_data;
  int  shadow_entries;
  int  shadow_area_size;
  int  shadow_bytes;
  int  low, high, rank;
  int  left_ov, right_ov;
  int  offset;

  shadow_ptr schedule;

  int nbytes, serial, N;

  S = dalib_inspector_db_get (pos);

  dalib_indirect_source_info (S->schedule_id, &shadow_entries, &dummy);

  dalib_ind_array_info (base_dsp, &nbytes, &serial, &N);

  rank     = base_dsp->rank;

  /* Attention: in case of static arrays the data starts as
                specified via the global size                 */

  if (base_dsp->global_addressing)
     low      = base_dsp->dimensions[rank-1].global_size[0];
   else
     low      = base_dsp->dimensions[rank-1].local_size[0];

  high     = base_dsp->dimensions[rank-1].local_size[1];
  left_ov  = base_dsp->dimensions[rank-1].shadow[0];
  right_ov = base_dsp->dimensions[rank-1].shadow[1];

  shadow_bytes     = serial * nbytes;

  if (right_ov >= shadow_entries + 1)

     { /* BASE array has enough SHADOW */

      if (pcb.redist_flag) printf (
          "%d: unstructured shadow area (is=%d,needed=%d) available for %s\n", 
          pcb.i, right_ov, shadow_entries+1, base_dsp->name);
     
       shadow_area      = (char *) 0;
       shadow_area_size = 0;

       offset           = (high - low + 1);

       /* schedule->high  = high + 2 */
     }

   else if (base_dsp->dynamic)

     { /* BASE array is dynamic, so redistribution with enough shadow */

       array_info help_dsp;   /* becomes a complete copy of base_dsp */
       int size;

       if (pcb.redist_flag)
         printf (
          "%d: unstructured shadow area (is=%d,needed=%d), realloc for %s\n", 
          pcb.i, right_ov, shadow_entries+1, base_dsp->name);
     
       size = dalib_array_dsp_size (rank);

       help_dsp = (array_info) dalib_malloc (size, "build_shadow");

       dalib_memcopy (help_dsp, base_dsp, size);

       /* define the entries of the new descriptor */

       base_dsp->dimensions[rank-1].shadow[1] = shadow_entries + 1;
       base_dsp->n_data = 0;
       base_dsp->dsp_status_flag = DSP_DEFINED;

       /* allocate new memory */

       dalib_array_full_allocate (base_dsp, base_dsp->f_data);

       /* copy old values to new allocated memory */

       FUNCTION(dalib_assign) (&base_dsp, &help_dsp);

       /* free the old base array (if not own) and the help descriptor
          ATTENTION: distribution/alignment descriptor must not be freed  */

       dalib_array_free_own_data (help_dsp);
       dalib_free (help_dsp, size);

       shadow_area      = (char *) 0;
       shadow_area_size = 0;

       offset           = (high - low + 1);

     }

   else

     { /* shadow area will be allocated on the heap */

       if (pcb.redist_flag) printf (
         "%d: new unstructured shadow area (is=%d,needed=%d) for %s on heap\n", 
         pcb.i, right_ov, shadow_entries+1, base_dsp->name);
     
       shadow_area_size = (shadow_entries + 2) * shadow_bytes;

       shadow_area = (char *) dalib_malloc (shadow_area_size,
                                            "dalib_build_shadow_schedule");

       offset = (long) shadow_area - (long) base_dsp->f_data;

       /* make sure that we get a correct alignment */

       while (offset%shadow_bytes != 0) offset++;

       /* now division is without any reminder */

       offset = offset / shadow_bytes;

       offset = offset - left_ov;  

     }

  shadow_data = base_dsp->f_data + (offset+left_ov+1) * shadow_bytes;

#ifdef DEBUG
  printf ("%d: dalib_build_schedule, entries = %d, bytes = %d\n",
           pcb.i, shadow_entries, shadow_bytes);
  printf ("%d: base data at %p, shadow area at %p, offset = %d\n",
           pcb.i, base_dsp->f_data, shadow_area, offset);
  printf ("%d: shadow data at %p, low = %d, l_ov = %d, %s(%d) maps to shadow\n",
           pcb.i, shadow_data, low, left_ov, base_dsp->name, 
           offset+low-left_ov+1);
#endif

  schedule = (shadow_ptr) dalib_malloc (sizeof(struct shadow_schedule), 
                                        "build_shadow");

  schedule->ident            = SHADOW_IDENTIFICATION;
  schedule->db_pos           = pos;
  schedule->base_data        = base_dsp->f_data;
  schedule->shadow_area      = shadow_area;
  schedule->offset           = offset;
  schedule->high             = offset + low + 1;
  schedule->shadow_area_size = shadow_area_size;
  schedule->shadow_entries   = shadow_entries;

  return schedule;

} /* dalib_build_shadow_schedule */

/****************************************************************************
*                                                                           *
*  void dalib_shadow_indexes (..)                                           *
*                                                                           *
*   - copies indexes into temporaries                                       *
*   - makes update of non-local, non-global indexes                         *
*                                                                           *
****************************************************************************/

void dalib_shadow_indexes (schedule, ind_dsp, tmp_dsp, Insp_Data)

shadow_ptr schedule;
array_info *tmp_dsp, *ind_dsp;
inspector_data *Insp_Data;

{ int no, size, is_new;
  int *index_data;
  int *index_changes;

  index_changes = Insp_Data->index_changes;

#ifdef DEBUG
  printf ("%d: dalib_shadow_indexes, changes = %d\n", 
           pcb.i, index_changes[0]);
#endif

  /* CASE 1: explicit shadow, so change values of indirection array */

  if ((*tmp_dsp) == (*ind_dsp))

    { dalib_secarray_get_data (*ind_dsp, 1, &no, &size, &index_data, &is_new);

      if (is_new)

         { dalib_internal_error ("shadow rebuild for noncontigous indexes");
           dalib_stop ();
         }

      dalib_shadow_change_indexes (index_data, schedule->high, index_changes);

      return;
    }

  /* Define the temporary as a pointer to the integer array */

  dalib_ptr_set_array (*tmp_dsp, *ind_dsp);

  /* CASE 2: automatic shadow, but fitting temporary is available */

  if (    (Insp_Data->index_temp_size > 0)
       && (Insp_Data->shadow_high   == schedule->offset)  )

     { /* that is fine, we can use the existing temporary */

       (*tmp_dsp)->dsp_status_flag = DSP_PTR_DATA;
       (*tmp_dsp)->n_data          = 0;
       (*tmp_dsp)->f_data          = Insp_Data->index_f_data;

       if (pcb.redist_flag)

          printf ("%d: temporary for indexes %s reused, high = %d\n",
                   pcb.i, (*ind_dsp)->name, Insp_Data->shadow_high);
     
       return;
     }

   if (pcb.redist_flag)

     { if (Insp_Data->index_temp_size > 0)

          printf ("%d: temporary for indexes %s not reusable (%d-%d)\n",
                       pcb.i, (*ind_dsp)->name, Insp_Data->shadow_high,
                       schedule->offset);
       else

          printf ("%d: temporary for indexes %s new allocated (first use)\n",
                       pcb.i, (*ind_dsp)->name);
      }   

  /* CASE 3: automatic shadow, but no fitting temporary available */

  (*tmp_dsp)->dsp_status_flag = DSP_DEFINED;

  FUNCTION(dalib_array_allocate) (tmp_dsp, (*tmp_dsp)->f_data);

  /* now copy the index values */

  FUNCTION(dalib_assign) (tmp_dsp, ind_dsp);

#ifdef DEBUG
      printf ("%d: indexes copied to temporary\n", pcb.i);
#endif

  dalib_secarray_get_data (*tmp_dsp, 1, &no, &size, &index_data, &is_new);

  if (is_new)

     { dalib_internal_error ("shadow rebuild for noncontigous indexes");
       dalib_stop ();
     }

  dalib_shadow_change_indexes (index_data, schedule->high, index_changes);

  if (Insp_Data->index_temp_size == 0)

     { /* we give control of the temporary to the schedule manager */

       Insp_Data->shadow_high     = schedule->offset;
       Insp_Data->index_f_data    = (*tmp_dsp)->f_data;
       Insp_Data->index_c_data    = (*tmp_dsp)->c_data;
       Insp_Data->index_temp_size = (*tmp_dsp)->n_data;

       /* data of temporary is no longer own data, make a real pointer */

       (*tmp_dsp)->dsp_status_flag = DSP_PTR_DATA;
       (*tmp_dsp)->n_data          = 0;

     }

} /* dalib_shadow_indexes */

/****************************************************************************
*                                                                           *
*  void dalib_shadow_indexes (..)                                           *
*                                                                           *
****************************************************************************/

void dalib_free_shadow_schedule (schedule)

shadow_ptr schedule;

{ 
#ifdef DEBUG
  printf ("%d: free shadow schedule, data size = %d\n",
          pcb.i, schedule->shadow_area_size);
#endif

  if (schedule->shadow_area_size > 0)
     dalib_free (schedule->shadow_area, schedule->shadow_area_size);

  /* delete the identification to avoid possible reuse */

  schedule->ident = 0;

  dalib_free (schedule, sizeof (struct shadow_schedule));

} /* dalib_free_shadow_schedule */

/**********************************************************************
*                                                                     *
*  dalib_insp_shadow_get  (int *s_id,                                 *
*                          char *base_vals,   array_info base_dsp,    *
*                          char *mask_vals,   array_info mask_dsp,    *
*                          char *ind_vals,    array_info ind_dsp)     *
*                          char *tmp_vals,    array_info tmp_dsp)     *
*                                                                     *
*   s_id        : identification of the computed schedule             *
*                                                                     *
*   base_array   : array that will be indirectly addressed            *
*                  (distributed along the last dimension)             *
*                                                                     *
*   ind_dsp      : integer array for distributed dimension            *
*                                                                     *
*   mask_array   : mask array used for indexes                        *
*                                                                     *
*  IMPORTANT: mask, ind must be aligned                               *
*                                                                     *
**********************************************************************/

void FUNCTION(dalib_insp_shadow_get)

         (s_id, base_vals,   base_dsp,
                mask_vals,   mask_dsp,
                ind_vals,    ind_dsp,
                tmp_vals,    tmp_dsp   )

shadow_ptr *s_id;

array_info *base_dsp, *mask_dsp, *ind_dsp, *tmp_dsp;
char       *base_vals, *mask_vals, *ind_vals, *tmp_vals;

{ inspector_info I_Info;
  inspector_data I_Data;

  inspector_data *DB_I_Data;

  char *index_data;

  int no, size, is_new;
  int db_id;

  /* build the corresponding Inspector information , shadow = 1 */

  dalib_insp_info_set_shadow (&I_Info, base_dsp);
  dalib_insp_info_set_index  (&I_Info, 1, ind_dsp, ind_vals);
  dalib_insp_info_set_mask   (&I_Info, mask_dsp, mask_vals);

  /* precompute inspector data for the given info */

  dalib_inspector_fill_data (&I_Data, &I_Info);

  /* find Inspector data for the given information */

  db_id = dalib_inspector_db_search (&I_Data);

#ifdef DEBUG
   printf ("%d: dalib_insp_shadow_get, position in data base = %d\n",
            pcb.i, db_id);
#endif

  /* CASE 1 :  compute a new schedule and insert in data base */

  if (db_id >= 0) goto found;

  new_inspector: 

  /* build a completely new inspector for BASE (IND1, ..., INDk) */

   DB_I_Data = (inspector_data *) dalib_malloc (sizeof(inspector_data),
                                                "creating inspector data");

   *DB_I_Data = I_Data;

   dalib_inspector_build (DB_I_Data, &I_Info);

   db_id = dalib_inspector_db_insert (DB_I_Data);

   dalib_inspector_set_valid (&I_Info, db_id);

   if (pcb.redist_flag)
      printf (
        "%d: schedule (db=%d) for unstructured shadow (%s:%s) new computed\n",
        pcb.i, db_id, (*base_dsp)->name, (*ind_dsp)->name);
     
   goto calc;

  found :

  /* there exists already an INSPECTOR for BASE(IND1, ..., INDk) */

  DB_I_Data = dalib_inspector_db_get (db_id);
 
  if (!dalib_inspector_is_valid (&I_Info, db_id))

    { if (dalib_inspector_db_attached (db_id))

         { if (pcb.redist_flag)
             printf ("%d: schedule found, (db=%d), but invalid and attached\n",
                pcb.i, db_id);

           goto new_inspector;

         }

#ifdef DEBUG
      printf ("%d: inspector data not valid, needs rebuilding\n", 
                   pcb.i);
#endif

      dalib_inspector_rebuild (DB_I_Data, &I_Info);

      dalib_inspector_set_valid (&I_Info, db_id);

      if (pcb.redist_flag)
        printf ("%d: schedule for indirect adressing (db=%d) new rebuilt\n",
                pcb.i, db_id);

    }

   dalib_inspector_db_attach (db_id);

#ifdef DEBUG
   printf ("%d: inspector (db=%d) valid and reusable\n", pcb.i, db_id);
#endif
 
   if (pcb.redist_flag)

      printf (
       "%d: schedule (db=%d) reused for unstructured shadow (%s:%s)\n", 
        pcb.i, db_id, (*base_dsp)->name, (*ind_dsp)->name);


calc: /* now build the shadow schedule */

   *s_id = dalib_build_shadow_schedule (db_id, *base_dsp);

   dalib_shadow_indexes (*s_id, ind_dsp, tmp_dsp, DB_I_Data);

} /* dalib_insp_shadow_get */

/**************************************************************************
*                                                                         *
*   void dalib_exec_shadow_get (int *s_id, array_info *base_dsp)          *
*                                                                         *
*   - update shadow area of array corresponding the schedule              *
*   - shadow area will contain afterwards the non-local values            *
*                                                                         *
**************************************************************************/

void FUNCTION(dalib_exec_shadow_get) (s_id, base_dsp)

shadow_ptr *s_id;
array_info *base_dsp;

{ unsigned char *data;
  int N;
  int size, serial;
  int rank, left_ov, right_ov;

  inspector_data *S;

  int db_id;

  dalib_verify_shadow_schedule (s_id);  /* check valid shadow pointer */

  db_id = (*s_id)->db_pos;

  S = dalib_inspector_db_get (db_id);

  data     = (*base_dsp)->f_data;
  rank     = (*base_dsp)->rank;
  left_ov  = (*base_dsp)->dimensions[rank-1].shadow[0];

  dalib_check_shadow_size (*s_id, *base_dsp);

  dalib_ind_array_info (*base_dsp, &size, &serial, &N);

  size = size * serial;  /* size is multiplied by elems in serial dimension */

#ifdef DEBUG
  printf ("%d: dalib_ind_shadow_get, schedule = %p, data = %p, size =%d\n",
           pcb.i, S->schedule_id, data, size);
#endif

  dalib_indirect_send_target (S->schedule_id, data, size);

#ifdef DEBUG
  printf ("%d: dalib_exec_shadow_get, have sent target\n", pcb.i);
#endif

  data = data + ((*s_id)->offset + left_ov + 1) * size;

  dalib_indirect_recv_source (S->schedule_id, data, size);
 
#ifdef DEBUG
  printf ("%d: dalib_exec_shadow_get, have received source\n", pcb.i);
#endif

} /* dalib_exec_shadow_get */

/**************************************************************************
*                                                                         *
*   void dalib_exec_shadow_put (int *s_id, array_info *base_dsp, int *op) *
*                                                                         *
*   - write non-local computed values back to owning processors           *
*   - elements will be combined correspoinding reduction operator op      *
*                                                                         *
**************************************************************************/

void FUNCTION(dalib_exec_shadow_put) (s_id, base_dsp, op)

shadow_ptr *s_id;
array_info *base_dsp;
int *op;

{ unsigned char *data;
  int N;
  int size, serial;
  int rank, left_ov;

  inspector_data *S;
  int db_id;

  dalib_verify_shadow_schedule (s_id);  /* check valid shadow pointer */

  db_id = (*s_id)->db_pos;

  S = dalib_inspector_db_get (db_id);

  data = (*base_dsp)->f_data;

  dalib_check_shadow_size (*s_id, *base_dsp);

  dalib_ind_array_info (*base_dsp, &size, &serial, &N);

  size = size * serial;  /* size is multiplied by elems in serial dimension */

  rank    = (*base_dsp)->rank;
  left_ov = (*base_dsp)->dimensions[rank-1].shadow[0];

  data = data + ((*s_id)->offset + left_ov + 1) * size;

#ifdef DEBUG
  printf ("%d: dalib_ind_shadow_put, schedule = %p, data = %p, size =%d\n",
           pcb.i, S->schedule_id, data, size);
#endif

  dalib_indirect_send_source (S->schedule_id, data, size);

#ifdef DEBUG
  printf ("%d: dalib_ind_shadow_put, have sent source\n", pcb.i);
#endif

  data = (*base_dsp)->f_data;

  dalib_indirect_recv_target (S->schedule_id, *op, data, size);
 
#ifdef DEBUG
  printf ("%d: dalib_ind_shadow_put, have received source\n", pcb.i);
#endif

} /* dalib_exec_shadow_put */
 
/**************************************************************************
*                                                                         *
*   void dalib_exec_shadow_set (int *s_id, array_info *base_dsp, char *val) *
*                                                                         *
**************************************************************************/

void FUNCTION(dalib_exec_shadow_set) (s_id, base_dsp, val_data)

shadow_ptr *s_id;
array_info *base_dsp;
char *val_data;

{ unsigned char *data;
  int N;
  int size, serial;
  int ov_size;
  int *dummy;
  int i, j;
  int rank, left_ov;

  dalib_verify_shadow_schedule (s_id);  /* check valid shadow pointer */

  dalib_check_shadow_size (*s_id, *base_dsp);

  dalib_ind_array_info (*base_dsp, &size, &serial, &N);

  ov_size = (*s_id)->shadow_entries;

  rank    = (*base_dsp)->rank;
  left_ov = (*base_dsp)->dimensions[rank-1].shadow[0];

  data = (*base_dsp)->f_data;
  data = data + ((*s_id)->offset + left_ov + 1) * size * serial;

  if ((*s_id)->shadow_area_size > 0)

     { /* shadow is allocated on the heap, only valid for original array */

       if ((*s_id)->base_data != data)

         { dalib_internal_error ("exec_shadow_set on other array");
           dalib_stop ();
         }
     }

#ifdef DEBUG
  printf ("%d: shadow_set, data = %p, size = %d, serial =%d, ov_size = %d\n",
          pcb.i, data, size, serial, ov_size);
#endif

  for (i=0; i<ov_size*serial; i++)

     for (j=0; j<size; j++) *data++ = val_data[j];

} /* dalib_exec_shadow_set */

/**************************************************************
*                                                             *
*  FUNCTION(dalib_insp_shadow_release) (shadow_ptr *s_id)     *
*                                                             *
*  - shadow structur is deleted, but not shadow comm. sched.  *
*                                                             *
**************************************************************/

void FUNCTION(dalib_insp_shadow_release) (s_id, ind_vals, ind_dsp)

shadow_ptr *s_id;   /* internal identification of schedule */

array_info *ind_dsp;
int        *ind_vals;

{ int db_id;

  dalib_verify_shadow_schedule (s_id);  /* check valid shadow pointer */

  db_id = (*s_id)->db_pos;

  dalib_free_shadow_schedule (*s_id);

  dalib_inspector_db_release (db_id);

} /* dalib_insp_shadow_release */

/*****************************************************************************
*                                                                            *
*  void dalib_ind_shadow_define (schedule_ptr *s_id,                         *
*                                char *array_data, char *index_data,         *
*                                array_info *s_dsp,                          *
*                                array_info *array_dsp, *index_dsp)          *
*                                                                            *
*   - update index to point to shadow area, computes schedule s_id           *
*                                                                            *
*   in :  array_dsp, array_data   array distributed along last dimension     *
*   in :  index_dsp, index_data   index values for last dim of array         *
*                                                                            *
*   out :  non local values of index will point to shadow area of array      *
*          s_id will be schedule to operate on shadow area                   *
*                                                                            *
*   - array_dsp is an array with a shadow area                               *
*   - array_dsp can have more than one dimension, but only last              *
*     one can be distributed                                                 *
*                                                                            *
*        !HPF$ DISTRIBUTE array (*,*,ANY)                                    *
*                                                                            *
*  This routine has side effects for the array index_dsp                     *
*  as non-local indexes will point into the overlap area                     *
*                                                                            *
*****************************************************************************/

void FUNCTION(dalib_ind_shadow_define) (s_id, array_data, index_data,
                                        s_dsp, array_dsp, index_dsp)

shadow_ptr *s_id;   /* internal identification of schedule */
array_info *array_dsp, *index_dsp, *s_dsp;
char *array_data, *index_data;

{ int *no_arg; 

  dalib_get_not_present (&no_arg);

  /* descriptor for new indexes is the same one as the old one */

  FUNCTION(dalib_insp_shadow_get) (s_id, array_data,  array_dsp,
                                   (char *) no_arg, (array_info *) no_arg,
                                   index_data,  index_dsp,
                                   index_data,  index_dsp);

  /* this routine makes sure that the base array has really enough
     overlap                                                       */

  if ((*s_id)->shadow_area_size > 0)

     { /* shadow was allocated on the heap, we do not allow this here */

       char msg[256];

       sprintf (msg, "ind_shadwo_define: %s not enough overlap (%d needed)",
                (*array_dsp)->name, (*s_id)->shadow_entries+1);

       dalib_internal_error (msg);
       dalib_stop ();

     }

} /* dalib_ind_shadow_define */

/**************************************************************************
*                                                                         *
*   void dalib_ind_shadow_get (schedule_ptr *s_id, char *array_data,      *
*                              array_info *s_dsp, *array_dsp)             *
*                                                                         *
*   - update shadow area of array corresponding the schedule              *
*   - shadow area will contain afterwards the non-local values            *
*                                                                         *
**************************************************************************/

void FUNCTION(dalib_ind_shadow_get) (s_id, array_data, s_dsp, array_dsp)

shadow_ptr *s_id;   /* internal identification of schedule */
array_info *array_dsp, *s_dsp;
char *array_data;

{ 
 
   FUNCTION(dalib_exec_shadow_get) (s_id, array_dsp);

} /* dalib_ind_shadow_get */

/**************************************************************************
*                                                                         *
*   void dalib_ind_shadow_put (schedule_ptr *s_id,                        *
*                              char *array_data, int *op,                 *
*                              array_info *s_dsp, *array_dsp, *op_dsp)    *
*                                                                         *
*   - write non-local computed values back to owning processors           *
*   - elements will be combined correspoinding reduction operator op      *
*                                                                         *
**************************************************************************/

void FUNCTION(dalib_ind_shadow_put) (s_id, array_data, op,
                                      s_dsp, array_dsp, op_dsp)

shadow_ptr *s_id;   /* internal identification of schedule */
array_info *array_dsp, *s_dsp, *op_dsp;
int *op;
char *array_data;

{ 
   FUNCTION(dalib_exec_shadow_put) (s_id, array_dsp, op);

} /* dalib_ind_shadow_put */

/**************************************************************************
*                                                                         *
*   void dalib_ind_shadow_set (schedule_ptr *s_id,                        *
*                              char *array_data, char *val_data,          *
*                              array_info *s_dsp, *array_dsp, *val_dsp)   *
*                                                                         *
*   - initializes shadow area with the value 'val'                        *
*   - needed for reductions on distributed arrays                         *
*                                                                         *
**************************************************************************/

void FUNCTION(dalib_ind_shadow_set) (s_id, array_data, val_data,
                                     s_dsp, array_dsp, val_dsp)

shadow_ptr *s_id;   /* internal identification of schedule */
array_info *array_dsp, *s_dsp, *val_dsp;
char *array_data, *val_data;

{
  FUNCTION(dalib_exec_shadow_set) (s_id, array_dsp, val_data);

} /* dalib_ind_shadow_set */

/**************************************************************************
*                                                                         *
*   void dalib_ind_shadow_set1 (schedule_ptr *s_id,                       *
*                               char *array_data, char *val_data,         *
*                               array_info *s_dsp, *array_dsp, *val_dsp)  *
*                                                                         *
*     - initialize shadow area for out-of-range values                    *
*                                                                         *
**************************************************************************/

void FUNCTION(dalib_ind_shadow_set1) (s_id, array_data, val_data,
                                      s_dsp, array_dsp, val_dsp)

schedule_ptr *s_id;
array_info *array_dsp, *s_dsp, *val_dsp;
char *array_data, *val_data;

{ unsigned char *data;
  int N;
  int size, serial;
  int ov_size;
  int *dummy;
  int i, j;

  data = (*array_dsp)->f_data;

  dalib_ind_array_info (*array_dsp, &size, &serial, &N);

  data += N * size * serial;   /* data points to shadow area */

  dalib_memcopy (data, val_data, size * serial);

} /* dalib_ind_shadow_set1 */

/**************************************************************************
*                                                                         *
*   void dalib_ind_shadow_free (schedule_ptr *s_id, char *index_data,     * 
*                               array_info *s_dsp, array_info *index_dsp) *
*                                                                         *
*    - resets changed index values of index_data                          *
*    - frees internal data structures used for the schedule               *
*                                                                         *
**************************************************************************/
 
void FUNCTION(dalib_ind_shadow_free) (s_id, index_data,
                                      s_dsp, index_dsp)

shadow_ptr *s_id;   /* internal identification of schedule */
char *index_data;
array_info *s_dsp, *index_dsp;
 
{ int db_id;
  inspector_data *S;
  int is_new, size, no;
  int *indexes;

  db_id = (*s_id)->db_pos;

  S = dalib_inspector_db_get (db_id);

  /* get correct data pointer, do not rely on index_data currently */

  dalib_secarray_get_data (*index_dsp, 1, &no, &size, &indexes, &is_new);

  if (is_new)

     { dalib_internal_error ("shadow index reset for noncontiguous indexes");
       dalib_stop ();
     }

  dalib_shadow_reset_indexes (indexes, S->index_changes);

  /* give all the needed memory free */

  dalib_free_shadow_schedule (*s_id);

  dalib_inspector_db_release (db_id);

} /* dalib_ind_shadow_free */

void FUNCTION(print_addr)(f)
int *f;
{ printf ("%d: address = %d\n", pcb.i, f);
}
