/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, SCAI.LAB                        *
*                                                                         *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Nov 94                                                   *
*  Last Update : Oct 97                                                   *
*                                                                         *
*  This Module is part of the DALIB                                       *
*                                                                         *
*  Module      : distribution                                             *
*                                                                         *
*  Function: Descriptor for the DISTRIBUTION of arrays/templates          *
*                                                                         *
*  Export :  FORTRAN Interface                                            *
*  ===========================                                            *
*                                                                         *
*    void FUNCTION(dalib_distribute) (array_id, topology,                 *
*                                     type1, info1, len1, map1, ....      *
*                                     type7, info7, len7, map7)           *
*                                                                         *
*     type<k> : BLOCK-1, CYCLIC-2, SERIAL-0, GEN_BLOCK-3                  *
*     info<k> : BLOCK (info), CYCLIC (info)                               *
*                                                                         *
*     GEN_BLOCK (size_array)                                              *
*     INDIRECT  (map_array)                                               *
*     ARBITRARY (size, len, map)   -> info = size                         *
*                                                                         *
*  Export :  DALIB Interface                                              *
*  =========================                                              *
*                                                                         *
*   void dalib_dist_local_sizes (array_info array_id)                     *
*                                                                         *
*    - computes local sizes of distributed arrays/templates               *
*    - DimInfo[k].map_flag = 0/1 for old/new addressing translation       *
*                                                                         *
*  RESTRICTIONS (for full HPF 2.0)                                        *
*  ===============================                                        *
*                                                                         *
*   - arrays for GEN_BLOCK, INDIRECT, ARBITRARY will not be copied        *
*     (so they should not be changed until the local sizes have           *
*      been computed)                                                     *
*                                                                         *
*  UPDATE:                                                                *
*  =======                                                                *
*                                                                         *
*     10/97  : cyclic(m) now supported                                    *
*     10/97  : indirect(map) now supported                                *
*     10/97  : arbitrary(size,len,map) now supported                      *
*                                                                         *
**************************************************************************/

#include <stdio.h>
#include "dalib.h"

#undef DEBUG
#define CHECK

     /*********************************************************
     *                                                        *
     *  Definition of the Data Structure for Template         *
     *                                                        *
     *  - note that every value is equal on each processor    *
     *                                                        *
     *   DISTRIBUTE (*)                                       *
     *   DISTRIBUTE (BLOCK (m))                               *
     *   DISTRIBUTE (CYCLIC (n))                              *
     *   DISTRIBUTE (GEN_BLOCK (size_array))                  *
     *   DISTRIBUTE (INDIRECT  (map_array))                   *
     *   DISTRIBUTE (ARBIRTRAY (n, len(n), map(n))            *
     *                                                        *
     *********************************************************/

# define AXIS_SET       0
# define AXIS_INHERITED 1
# define AXIS_COMPUTED  2

typedef struct { short axis_type;  
               } ySERIAL_DIM;

typedef struct { short axis_type;  
                 int   axis_dim;        /* dimension of processor array */
                 int   user_bsize;      /* BLOCK (axis_info)            */
                 int   sys_bsize;       /*                              */
               } yBLOCK_DIM;

typedef struct { short axis_type;  
                 int   axis_dim;        /* dimension of processor array */
                 int   axis_state;      /* state for allocation         */
                 int   axis_info;       /* CYCLIC (axis_info)           */
                 int   *axis_offsets;   /* offsets for reordering       */
               } yCYCLIC_DIM;

typedef struct { short axis_type;  
                 int   axis_dim;        /* dimension of processor array */
                 short axis_state;      /* state for the use            */
                 int  *axis_sizes;      /* GEN_BLOCK (axis_info_array)  */
                 int  *axis_offsets;    /* offsets for reordering       */
               } yGEN_BLOCK_DIM;

typedef struct { short axis_type;  
                 int   axis_dim;        /* dimension of processor array */
                 int   axis_state;      /* state for allocation         */
                 int  *axis_map;        /* MAP array, later permutation */
                 int  *axis_offsets;    /* considered as GEN_BLOCK (..) */
               } yINDIRECT_DIM;

typedef struct { short axis_type;  
                 int   axis_dim;        /* dimension of processor array */
                 int  *axis_map;        /* MAP array, later permutation */
                 int  *axis_offsets;    /* considered as GEN_BLOCK (..) */
                 int   axis_state;      /* state for allocation         */
                 int   no_chunks;
               } yARBITRARY_DIM;

typedef struct { short axis_type;       /* for BLOCK(), CYCLIC(),       */
                 int   axis_dim;        /* GEN_BLOCK(), ..., ALL        */
               } yGENERAL_DIM;

    /* description of a serial/distributed dimension */

union DistDimRecord {

    short axis_type;

    ySERIAL_DIM     SERIAL_DIM;
    yBLOCK_DIM      BLOCK_DIM;
    yCYCLIC_DIM     CYCLIC_DIM;
    yGEN_BLOCK_DIM  GEN_BLOCK_DIM;
    yINDIRECT_DIM   INDIRECT_DIM;
    yARBITRARY_DIM  ARBITRARY_DIM;
    yGENERAL_DIM    GENERAL_DIM;

 };       /* info about the distribution of one dim */

    /* descriptor for a full distribution */

struct DistributionRecord {

  int topology;
  int no_aligned;
  int max_aligned;
  int is_localized;    /* 0 : not localized, 1 : is localized */
  array_info *aligned_arrays;   /* is aligned_arrays [max_aligned] */
  union DistDimRecord dims[MAX_DIMENSIONS];
 };

/**************************************************************************
*                                                                         *
*   void dalib_distribution_check (array_info array_id)                   *
*                                                                         *
*     - returns serious error if array_id has not a distribution          *
*                                                                         *
**************************************************************************/

void dalib_distribution_check (array_id)
array_info array_id;
 
{ if (!dalib_is_array_info (array_id))
    { dalib_internal_error ("distribution check: no array info");
      dalib_stop ();
    }
  if (array_id->DistributeInfo == NO_DISTRIBUTION)
    { dalib_internal_error ("distribution check: no distribution");
      dalib_stop ();
    }
} /* dalib_distribution_check */

/**************************************************************************
*                                                                         *
*   void dalib_distribution_print (Distribution dist, int rank)           *
*                                                                         *
**************************************************************************/

void dalib_distribution_print (dist, rank)
Distribution dist;
int rank;

{ int i;
  int top_rank;
  DistDim dim;
  int NP, NId;
  int top_dim;

  dim = dist->dims;

  printf ("!HPF$ DISTRIBUTE (");

  for (i=0; i<rank; i++)

    { switch (dim->axis_type) {

       case kSERIAL_DIM     : printf ("*"); break;
       case kCYCLIC_DIM     : printf ("CYCLIC(%d)", dim->CYCLIC_DIM.axis_info);
                              break;
       case kBLOCK_DIM      : printf ("BLOCK(%d)", dim->BLOCK_DIM.sys_bsize);
                              break;
       case kGEN_BLOCK_DIM  :

         printf ("GEN_BLOCK(offsets=");
         top_dim = dim->GEN_BLOCK_DIM.axis_dim;
         dalib_top_info (dist->topology, top_dim, &NP, &NId);
         for (NId = 0; NId < NP; NId++)
             { printf ("%d", dim->GEN_BLOCK_DIM.axis_offsets[NId]);
               if (NId < NP-1) printf (",");
             }
         printf (")");
         break;

      default : printf ("kind=%d", dim->axis_type);

     } /* switch */

     if (i<rank-1) printf (",");

    } /* for */

  if (dist->topology == kANY_TOPOLOGY)

     printf (") ONTO *\n");

    else

     printf (") ONTO topid = %d (rank=%d)\n", 
             dist->topology, dalib_top_rank (dist->topology));

} /* dalib_distribution_print */

     /*********************************************************
     *                                                        *
     *  void dalib_axis_align (int offsets[], NP, lb, ub)     *
     *                                                        *
     *  - align offsets to global size lb:ub                  *
     *  - can now be called several times without error       *
     *                                                        *
     *********************************************************/

static void dalib_axis_align (offsets, NP, lb, ub)

int offsets[];
int NP, lb, ub;

{ int i;
  int total;
  int zero;     /* needed to compute local sizes several times */

  total = (ub - lb + 1);
  zero  = offsets[0];

  /* note: offsets[NP] is the total sum of all block sizes */

  if (offsets[NP]-zero < total) 

     { dalib_internal_error ("block sizes do not sum up to total size");
       printf ("dim size is : %d (range is %d:%d)\n", total, lb, ub);
       printf ("block sum   : %d\n", offsets[NP]-zero);
       dalib_stop ();
     }

  for (i=0; i<=NP; i++)

     { offsets[i] += lb - zero;
       if (offsets[i] > ub+1) offsets[i] = ub+1;
     }

#ifdef DEBUG
  for (i=1; i<=NP; i++)
     printf ("processor %d has %d - %d\n", i, offsets[i-1], offsets[i]-1);
#endif
     
} /* dalib_axis_align */

/**************************************************************************
*                                                                         *
*   void dalib_offset_set_local (offsets, P, pid, global, local)          *
*                                                                         *
*   offsets            5    11    18     26     35     45     56          *
*                                                                         *
*   lb:ub = 1:50                                                          *
*   ranges            1:5   6:11 12:18  19:26  27:35  36:45  46:50        *
*                                                                         *
*   1 <= pid <= P                                                         *
*                                                                         *
**************************************************************************/

void dalib_offset_set_local (offsets, P, pid, global, local)

int offsets [];
int P, pid;
int global  [];
int local   [];

{ int lb, ub;

  lb = global[0];
  ub = global[1];

  if (lb + offsets[P-1] <= ub)

     { dalib_internal_error ("sizes do not sum up to total size");
       printf ("dim size is     : %d:%d\n", lb, ub);
       printf ("sum of blocks   : %d\n", offsets[P-1]);
       dalib_stop ();
     }

  if (pid == 1)
    local[0] = lb;
   else
    local[0] = lb + offsets[pid-2];

  local[1] = lb + offsets[pid-1] - 1;

  if (local[0] > ub) local[0] = ub+1;   /* makes range to 0 */
  if (local[1] > ub) local[1] = ub;

  local[2] = 1;
 
} /* dalib_offset_set_local */

/**************************************************************************
*                                                                         *
*  compute the local size of distributed dimensions                       *
*                                                                         *
*  - verify that overlap area is not to big                               *
*  - verify that overlap is only used for block distribution              *
*                                                                         *
**************************************************************************/

static void dalib_compute_block_sizes (top_id, dimensions, distributions)

int     top_id;
DimInfo *dimensions;
DistDim distributions;    

{ int top_dim;
  int NId, NP;
  int *global, *local;
  int size;

  global = dimensions->global_size;
  local  = dimensions->local_size;

  /* distributions->axis_type == kBLOCK_DIM */

  top_dim = distributions->BLOCK_DIM.axis_dim;
  dalib_top_info (top_id, top_dim, &NP, &NId);

  size = distributions->BLOCK_DIM.user_bsize;

  dalib_calc_block_size (NP, global[0], global[1], &size);

  distributions->BLOCK_DIM.sys_bsize = size;

  dalib_block_size (NP, NId, size, global, local);

  local[2] = 1; /* is the stride of section */

#ifdef DEBUG
  printf ("%d: BLOCK to topdim %d (%d of %d) l: %d:%d -> %d:%d:%d)\n",
          pcb.i, top_dim, NId, NP, global[0], global[1], 
          local[0], local[1], local[2]);
#endif

  dimensions->map_flag = 0;     /* local is direct subrange */

} /* dalib_compute_block_sizes */

/**************************************************************************
*                                                                         *
*  void dalib_compute_genblock_sizes (int top_id,                         *
*                                     DimInfo *dimension,                 *
*                                     DistDim distribution)               *
*                                                                         *
*   GEN_BLOCK_DIM.axis_dim      :  dimension of topology (unchanged)      *
*   GEN_BLOCK_DIM.axis_offsets  :  block sizes (before)                   *
*                                  offsets (after)                        *
*                                                                         *
*   local range on NId  :  offsets[NId-1] .. offsets[NId]-1               *
*                                                                         *
*   sizes       5     6     7      8      9     10     11                 *
*   offsets     1     6    12     19     27     36     46    51           *
*                                                                         *
*   NOTE : offsets is now allocated by DALIB                              *
*                                                                         *
**************************************************************************/

static void dalib_compute_genblock_sizes (top_id, dimension, distribution)

int     top_id;
DimInfo *dimension;
DistDim distribution;    

{ int top_dim;
  int NId, NP;
  int N;
  int *global, *local;
  int *block_sizes, *offsets;

  global = dimension->global_size;
  local  = dimension->local_size;

  N = global[1] - global[0] + 1;  /* total size of elements in dimension */

  top_dim     = distribution->GEN_BLOCK_DIM.axis_dim;
  dalib_top_info (top_id, top_dim, &NP, &NId);

  if (distribution->GEN_BLOCK_DIM.axis_state == AXIS_SET)

     { int *block_sizes;

       block_sizes = distribution->GEN_BLOCK_DIM.axis_sizes;

       offsets = (int *) dalib_int_malloc (NP+1, "genblock_sizes");

       offsets[0] = NP;  /* save NP as first entry in the offsets */

       distribution->GEN_BLOCK_DIM.axis_offsets = offsets;

       offsets++;

       dalib_gen_block_offsets (NP, N, block_sizes, offsets);

       distribution->GEN_BLOCK_DIM.axis_state = AXIS_COMPUTED;
     }

    else

     offsets = distribution->GEN_BLOCK_DIM.axis_offsets + 1;

  /* Note : structures will be also valid for inherited distribution */

  dalib_offset_set_local (offsets, NP, NId, global, local);

  dimension->map_flag = 0;     /* local is direct subrange */

#ifdef DEBUG
  printf ("%d: GEN_BLOCK to topdim %d (%d of %d) l: %d:%d -> %d:%d:%d)\n",
          pcb.i, top_dim, NId, NP, global[0], global[1],
          local[0], local[1], local[2]);
#endif

} /* dalib_compute_genblock_sizes */

/**************************************************************************
*                                                                         *
*  void dalib_compute_map_sizes (int top_id,                              *
*                                DimInfo dimension,                       *
*                                DistDimRecord distribution)          *
*                                                                         *
*   INDIRECT_DIM.axis_dim      :  dimension of topology (unchanged)       *
*   INDIRECT_DIM.axis_offsets  :  NULL (before)                           *
*                                 offsets (after)                         *
*                                                                         *
*   local range on NId  :  offsets[NId-1] .. offsets[NId]-1               *
*                                                                         *
*   sizes       5     6     7      8      9     10     11                 *
*   offsets     1     6    12     19     27     36     46    51           *
*                                                                         *
*   NOTE : offsets is now allocated by DALIB                              *
*                                                                         *
**************************************************************************/

static void dalib_compute_map_sizes (top_id, dimension, distribution)

int     top_id;
DimInfo *dimension;
DistDim distribution;    

{  int *mapping;
   int *offsets;
   int *permutation, *inverse;
   int N;
   int top_dim;
   int NId, NP;
   int *global, *local;

   global = dimension->global_size;
   local  = dimension->local_size;

   /* distribution->axis_type == kINDIRECT_DIM */

   top_dim = distribution->INDIRECT_DIM.axis_dim;

   dalib_top_info (top_id, top_dim, &NP, &NId);

   mapping = distribution->INDIRECT_DIM.axis_map;

   N = global[1] - global[0] + 1;

   if (distribution->INDIRECT_DIM.axis_state == AXIS_SET)

     { /* allocate memory for INDIRECT data structures */

       offsets     = dalib_int_malloc (NP+1,  "compute_map_sizes");
       permutation = dalib_int_malloc (2*N+1, "compute_map_sizes");

       permutation[0] = N;   /* so we know later the allocated size */

       distribution->INDIRECT_DIM.axis_offsets = offsets;
       distribution->INDIRECT_DIM.axis_map     = permutation;

       permutation++;  /* now points to the permutation data structure */

       inverse     = permutation + N;

       /* Attention: uses also position 0 of offsets */

       dalib_mapping_offsets (NP, mapping, global[0], global[1],
                              offsets, permutation, inverse);

       offsets[0] = NP;

       offsets++;

       distribution->INDIRECT_DIM.axis_state = AXIS_COMPUTED;

     }

    else

     offsets = distribution->INDIRECT_DIM.axis_offsets + 1;

   dalib_offset_set_local (offsets, NP, NId, global, local);

#ifdef DEBUG
  printf ("%d: MAP to topdim %d (%d of %d) l: %d:%d -> %d:%d:%d)\n",
          pcb.i, top_dim, NId, NP, global[0], global[1], 
          local[0], local[1], local[2]);
#endif

   dimension->map_flag = 1;     /* local is indirect subrange */

} /* dalib_compute_map_sizes */

/**************************************************************************
*                                                                         *
*  void dalib_compute_arbitrary_sizes (int top_id,                        *
*                                      DimInfo *dimension,                *
*                                      DistDim distribution)              *
*                                                                         *
*   ARBITRARY_DIM.axis_dim     :  dimension of topology (unchanged)       *
*                                                                         *
*   ARBITRARY_DIM.no_chunks    :  number of blocks (unchanged)            *
*                                                                         *
*   ARBITRARY_DIM.axis_offsets :  len (before)                            *
*                                 P_offsets (after)                       *
*                                                                         *
*   ARBITRARY_DIM.axis_map     :  map (before)                            *
*                                 global_offsets/local_offsets (after)    *
*                                                                         *
*   len (map)    :   10 (1)  20 (2)  30 (2)  40 (1)                       *
*                                                                         *
*   P_sizes      :    50    50                                            *
*   P_offsets    :     0    50    100                                     *
*                                                                         *
*   global_offsets :    0      10      30      60     100                 *
*   local_offsets  :    0      50      70      10                         *
*                                                                         *
*   permutation    :    1       4       2       3                         *
*                                                                         *
*                      10      40      20      30                         *
*                       0      10      50      70                         *
*                                                                         *
**************************************************************************/

static void dalib_compute_arbitrary_sizes (top_id, dimension, distribution)

int      top_id;
DimInfo  *dimension;
DistDim  distribution;    

{ int top_dim;
  int NId, NP;
  int *global, *local;
  int no_chunks;
  int *chunk_sizes, *chunk_map;
  int *chunk_offsets, *chunk_local_offsets;
  int *P_offsets;

  global = dimension->global_size;
  local  = dimension->local_size;

  top_dim     = distribution->ARBITRARY_DIM.axis_dim;
  no_chunks   = distribution->ARBITRARY_DIM.no_chunks;
  chunk_map   = distribution->ARBITRARY_DIM.axis_map;
  chunk_sizes = distribution->ARBITRARY_DIM.axis_offsets;

  dalib_top_info (top_id, top_dim, &NP, &NId);

  if (distribution->ARBITRARY_DIM.axis_state == AXIS_SET)

    { /* allocate memory for offsets and sizes */

      chunk_offsets = dalib_int_malloc (2*no_chunks+2, "arbitrary_sizes");
      P_offsets     = dalib_int_malloc (NP+1, "arbitrary_sizes");

      distribution->ARBITRARY_DIM.axis_offsets = P_offsets;
      distribution->ARBITRARY_DIM.axis_map     = chunk_offsets;

      chunk_local_offsets = chunk_offsets + no_chunks + 1;

      dalib_arbitrary_offsets (NP, no_chunks, chunk_sizes, chunk_map, 
                               global[0], global[1],
                               P_offsets, chunk_offsets, chunk_local_offsets);

      P_offsets [0] = NP;  /* so we know the size of the allocated data */

      P_offsets ++;        /* now P_offsets points to the running sums  */

      distribution->ARBITRARY_DIM.axis_state = AXIS_COMPUTED;
    }

    else

      P_offsets = distribution->ARBITRARY_DIM.axis_offsets + 1;

  dalib_offset_set_local (P_offsets, NP, NId, global, local); 

  dimension->map_flag = 1;     /* local is indirect subrange */

} /* dalib_compute_arbitrary_sizes */

/**************************************************************************
*                                                                         *
*   void dalib_compute_cyclic_sizes (int top_id,                          *
*                                    DimInfo *dimension,                  *
*                                    DistDim distributions)               *
*                                                                         *
**************************************************************************/

static void dalib_compute_cyclic_sizes (top_id, dimension, distribution)

int     top_id;
DimInfo *dimension;
DistDim distribution;    

{  int top_dim;
   int NId, NP;
   int N;
   int *global, *local;
   int info;

   global = dimension->global_size;
   local  = dimension->local_size;

   N = global[1] - global[0] + 1;

   /* distribution->axis_type == kCYCLIC_DIM */

   top_dim = distribution->CYCLIC_DIM.axis_dim;
   info    = distribution->CYCLIC_DIM.axis_info;

   if (info <= 0)
  
      { info = 1;
        distribution->CYCLIC_DIM.axis_info = 1;
      }

   dalib_top_info (top_id, top_dim, &NP, &NId);

   if (info == 1)

      { dalib_cyclic_size (NP, NId, global, local);

        dimension->map_flag = 0;     /* local is indirect subrange */

      }

    else 

      { int *offsets;

        offsets = dalib_int_malloc (NP+1, "cyclic_offsets");

        distribution->CYCLIC_DIM.axis_offsets = offsets;

        offsets[0] = NP;  /* save NP as first entry in the offsets */

        offsets++;

        dalib_block_cyclic_offsets (NP, N, info, offsets);

        /* local <- local part on NId of NP for global */

        dalib_offset_set_local (offsets, NP, NId, global, local);

        dimension->map_flag = 1;     /* local is indirect subrange */

      }

#ifdef DEBUG
 printf ("%d: CYCLIC (%d) to topdim %d (%d of %d) %d:%d -> %d:%d:%d)\n",
         pcb.i, info, top_dim, NId, NP, global[0], global[1], 
         local[0], local[1], local[2]);
#endif

} /* dalib_compute_cyclic_sizes */

/**************************************************************************
*                                                                         *
*   void dalib_set_global_sizes (DimInfo *dimension)                     *
*                                                                         *
***************************************************************************/

static void dalib_set_global_sizes (dimension)

DimInfo  *dimension;

{ int *global, *local;
  int info;

  global = dimension->global_size;
  local  = dimension->local_size;

  /* for serial dimension local_size = global_size is default 
     but might be not the case if descriptor has been copied before */

  local[0] = global[0];
  local[1] = global[1];
  local[2] = 1;

  dimension->map_flag = 0;     /* local is direct subrange */

} /* dalib_set_global_sizes */

/**************************************************************************
*                                                                         *
*   void dalib_compute_local_sizes (int rank, int top_id,                 *
*                                   DimInfo *dimensions,                  *
*                                   DistDim distributions)                *
*                                                                         *
*   - compute local sizes for all dimensions                              *
*   - sets dimensions[i].local_size                                       *
*                                                                         *
**************************************************************************/

static void dalib_compute_local_sizes (rank, top_id,
                                       dimensions, distributions)

int rank;
int top_id;

DimInfo *dimensions;
DistDim distributions;    

{ int i;
  int axis_type;

#ifdef DEBUG
  printf ("%d: local size for distribute  (array_rank = %d, top = %d)\n",
          pcb.i, rank, top_id);
#endif

  for (i=0; i<rank; i++)

    { axis_type = distributions->axis_type;

      switch (axis_type) {

      case kSERIAL_DIM : dalib_set_global_sizes (dimensions); 
                         break;

      case kBLOCK_DIM :  dalib_compute_block_sizes (top_id, dimensions, 
                                                    distributions);
                         break;

      case kGEN_BLOCK_DIM : dalib_compute_genblock_sizes
                              (top_id, dimensions, distributions);
                            break;

      case kINDIRECT_DIM : dalib_compute_map_sizes (top_id, dimensions, 
                                                    distributions);
                           break;

      case kCYCLIC_DIM : dalib_compute_cyclic_sizes (top_id, dimensions, 
                                                     distributions);
                         break;

      case kARBITRARY_DIM : dalib_compute_arbitrary_sizes (top_id, 
                                         dimensions, distributions);
                            break;

      case kANY_DISTRIBUTED_DIM :
      case kANY_BLOCK_DIM:
      case kANY_CYCLIC_DIM :
      case kANY_GEN_BLOCK_DIM :
      case kANY_INDIRECT_DIM :
      case kANY_ARBITRARY_DIM : 

         dalib_internal_error (
            "cannot compute local sizes for underspecified mapping");
         dalib_stop ();

      default         : dalib_internal_error ("unknown distributions");
                        dalib_stop ();

      } /* switch axis_type */

      distributions ++;
      dimensions ++;

    } /* for i */

} /* dalib_compute_local_sizes */

/***************************************************************************
*                                                                          *
*   void dalib_dist_local_sizes (array_info array_id)                      *
*                                                                          *
*    - computes local sizes of distributed arrays/templates                *
*                                                                          *
***************************************************************************/

void dalib_dist_local_sizes (array_id)

array_info array_id;

{ int array_rank;
  DimInfo *array_dims;    /* pointer for array dimension information */
  DistDim dims;           /* pointer for dim information             */
  Distribution dist_ptr;
  int topology;

#ifdef CHECK
  dalib_distribution_check (array_id);
#endif

  array_rank = array_id->rank;
  array_dims = array_id->dimensions;
  dist_ptr   = array_id->DistributeInfo;
  dims       = dist_ptr->dims;
  topology   = dist_ptr->topology;

  if (topology == kANY_TOPOLOGY)

     { dalib_internal_error (
        "cannot compute local sizes for underspecified mapping");

       dalib_distribution_print (array_id->DistributeInfo, array_rank);
       dalib_stop ();
     }

  if (dist_ptr->is_localized)

     { /* cannot compute local sizes twice if mapping info is lost */

       dalib_internal_error ("distribution already localized");
       printf ("cannot reallocate for this distribution\n");
       dalib_distribution_print (dist_ptr, array_rank);
       dalib_stop ();
     }

  dalib_compute_local_sizes (array_rank, topology, array_dims, dims);

  dist_ptr->is_localized = 1;

} /* dalib_dist_local_sizes */

/**************************************************************************
*                                                                         *
*  void dalib_set_dim_distribution (DistDimRecord *dim,               *
*                                   int type, int *info, int *len,        *
*                                   int *map, int topology, int *topdim)  *
*                                                                         *
*  - set distribution info for every dimension                            *
*  - make no calculations as topology can be kANY_TOPOLOGY                *
*                                                                         *
**************************************************************************/

static void dalib_set_dim_distribution (dim, type, info, len, map,
                                        topology, topdim)

DistDim dim;
int type, *info, *len, *map;
int topology;
int *topdim;

{ dim->axis_type = type;

#ifdef DEBUG
  printf ("%d: dalib_set_dim_distribution, type = %d, info = %d, dim = %d\n",
          pcb.i, type, *info, *topdim);
#endif

  switch (type) {

  case kSERIAL_DIM    : break;

  case kBLOCK_DIM     : dim->BLOCK_DIM.user_bsize = *info;
                        dim->BLOCK_DIM.sys_bsize  = *info;
                        (*topdim)++;
                        dim->BLOCK_DIM.axis_dim  = *topdim;
                        break;

  case kCYCLIC_DIM    : dim->CYCLIC_DIM.axis_info = *info;
                        (*topdim)++;
                        dim->CYCLIC_DIM.axis_dim  = *topdim;
                        break;

  case kGEN_BLOCK_DIM : (*topdim)++;
                        dim->GEN_BLOCK_DIM.axis_state   = AXIS_SET;
                        dim->GEN_BLOCK_DIM.axis_dim     = *topdim;
                        dim->GEN_BLOCK_DIM.axis_sizes   = info;
                        dim->GEN_BLOCK_DIM.axis_offsets = (int *) 0;
                        break;

  case kINDIRECT_DIM :  (*topdim)++;
                        dim->INDIRECT_DIM.axis_state   = AXIS_SET;
                        dim->INDIRECT_DIM.axis_dim     = *topdim;
                        dim->INDIRECT_DIM.axis_offsets = (int *) NULL;
                        dim->INDIRECT_DIM.axis_map     = info;
                        break;

  case kARBITRARY_DIM:  (*topdim)++;
                        dim->ARBITRARY_DIM.axis_dim     = *topdim;
                        dim->ARBITRARY_DIM.axis_state   = AXIS_SET;
                        dim->ARBITRARY_DIM.axis_offsets = len;
                        dim->ARBITRARY_DIM.axis_map     = map;
                        dim->ARBITRARY_DIM.no_chunks    = *info;
                        break;

  case kANY_BLOCK_DIM:
  case kANY_CYCLIC_DIM :
  case kANY_GEN_BLOCK_DIM :
  case kANY_INDIRECT_DIM :
  case kANY_DISTRIBUTED_DIM :
  case kANY_ARBITRARY_DIM : (*topdim)++;
                            dim->GENERAL_DIM.axis_dim = *topdim;
                            break;

  default             : dalib_internal_error ("unknown distribution");
                        dalib_stop ();

  } /* end of switch */

} /* dalib_set_dim_distribution */

/**************************************************************************
*                                                                         *
*  int dalib_dist_dsp_size (int rank)                                     *
*                                                                         *
*  - number of bytes needed for a distribution descriptor                 *
*                                                                         *
**************************************************************************/

static int dalib_dist_dsp_size (rank)
int rank;

{ int save;

  save = (MAX_DIMENSIONS - rank) * sizeof (union DistDimRecord);

  return sizeof(struct DistributionRecord) - save;

} /* dalib_dist_dsp_size */

/**************************************************************************
*                                                                         *
*  Distribution dalib_make_distribution (array_rank, topology, ...)       *
*                                                                         *
*  - returns a new descriptor for the corresponding distribution          *
*                                                                         *
**************************************************************************/

static Distribution dalib_make_distribution 

       (array_rank, topology, type, info, len, map)

int array_rank;           /* rank of array or template         */
int topology;             /* id of topology or processor array */
int type[], *info[];       
int *len[], *map[];
 
{ int i, top_dim;
  Distribution dist_ptr;
  DistDim dims;    /* pointer for dim information */

#ifdef DEBUG
  printf ("%d: dalib_make_distribution (rank = %d, topid = %d)\n",
          pcb.i, array_rank, topology);
#endif

  dist_ptr = (Distribution)
              dalib_malloc (dalib_dist_dsp_size (array_rank),
                            "distribution_dsp");
 
  /* set topology */

  dist_ptr->topology = topology;
  dist_ptr->is_localized = 0;
  dist_ptr->no_aligned = 0;
  dist_ptr->max_aligned = 0;
  dist_ptr->aligned_arrays = (array_info *) 0;

  /* set distributed and serial dimensions */

  dims = dist_ptr->dims;

  top_dim = 0;

  for (i=0; i < array_rank; i++)

     dalib_set_dim_distribution (dims+i, type[i], info[i], len[i], map[i],
                                 topology, &top_dim);

  if (topology != kANY_TOPOLOGY)

   { if (top_dim != dalib_top_rank (topology))

    { dalib_internal_error ("make distribution (dist dims != rank of top)");
      dalib_distribution_print (dist_ptr, array_rank);
      dalib_stop();
    }
   }

  return (dist_ptr);

} /* dalib_make_distribution */

/**************************************************************************
*                                                                         *
*  Exported FORTRAN routines                                              *
*                                                                         *
*  void FUNCTION(dalib_distribute) (array_id, topology,                   *
*                                   type1, info1, len1, map1, ....        *
*                                   type7, info7, len7, map7)             *
*                                                                         *
**************************************************************************/

void FUNCTION(dalib_distribute) (array_id, topology,
                        type1, info1, len1, map1, type2, info2, len2, map2, 
                        type3, info3, len3, map3, type4, info4, len4, map4,
                        type5, info5, len5, map5, type6, info6, len6, map6,
                        type7, info7, len7, map7)

array_info *array_id;
int *topology;
int *type1, *info1, *type2, *info2, *type3, *info3, *type4, *info4;
int *type5, *info5, *type6, *info6, *type7, *info7;
int *len1, *map1, *len2, *map2, *len3, *map3, *len4, *map4;
int *len5, *map5, *len6, *map6, *len7, *map7;

{ Distribution dist;          /* descriptor for distribution */
  int array_rank;

  int type  [MAX_DIMENSIONS];
  int *info [MAX_DIMENSIONS];
  int *len  [MAX_DIMENSIONS];
  int *map  [MAX_DIMENSIONS];

  int target_topology;

  if ((*array_id)->DistributeInfo != NO_DISTRIBUTION)
     dalib_internal_error ("is already distributed");

  array_rank = (*array_id)->rank;     

#ifdef DEBUG
printf ("%d: dalib_distribute (array = %d, rank = %d, topology = %d)\n",
          pcb.i, array_id, array_rank, *topology);
#endif

  switch (array_rank) {

     case 7 : type[6] = *type7; info[6] = info7;
              len [6] = len7;   map[6]  = map7;
     case 6 : type[5] = *type6; info[5] = info6;
              len [5] = len6;   map[5]  = map6;
     case 5 : type[4] = *type5; info[4] = info5;
              len [4] = len5;   map[4]  = map5;
     case 4 : type[3] = *type4; info[3] = info4;
              len [3] = len4;   map[3]  = map4;
     case 3 : type[2] = *type3; info[2] = info3;
              len [2] = len3;   map[2]  = map3;
     case 2 : type[1] = *type2; info[1] = info2;
              len [1] = len2;   map[1]  = map2;
     case 1 : type[0] = *type1; info[0] = info1;
              len [0] = len1;   map[0]  = map1;

   } /* switch */

  target_topology = *topology;

  if (target_topology <= MAX_RANK)

     /* is a default topology, get the topology from the context */

     target_topology = dalib_context_default_top (target_topology);

  dist = dalib_make_distribution (array_rank, target_topology,
                                  type, info, len, map);

  (*array_id)->DistributeInfo = dist;

  /* compute the local sizes is done with the array definition */

} /* dalib_distribute */

/**************************************************************************
*                                                                         *
*  DELETING Distribution Descriptions                                     *
*                                                                         *
*  - important : destroy varying block size offset arrays                 *
*                                                                         *
**************************************************************************/

void dalib_dist_dim_free (mapping)

DistDim mapping;

{ switch (mapping->axis_type) {

   case kCYCLIC_DIM :

     { int *offsets, size, NP;

       size    = mapping->CYCLIC_DIM.axis_info;

       if (size > 1)

          { offsets = mapping->CYCLIC_DIM.axis_offsets;
            NP      = offsets[0];
            dalib_int_free (offsets, NP+1);
          }

       break;
     }

   case kGEN_BLOCK_DIM :

     if (mapping->GEN_BLOCK_DIM.axis_state == AXIS_COMPUTED)

        { int *offsets; 
          int NP;

          offsets = mapping->GEN_BLOCK_DIM.axis_offsets;
          NP      = offsets[0];
          dalib_int_free (offsets, NP+1);

          mapping->GEN_BLOCK_DIM.axis_state = AXIS_SET;

        }

     break;

   case kINDIRECT_DIM :

     if (mapping->INDIRECT_DIM.axis_state == AXIS_COMPUTED)

       { int *map, *offsets;
         int NP, N;

         map     = mapping->INDIRECT_DIM.axis_map;
         offsets = mapping->INDIRECT_DIM.axis_offsets;

         N = map[0]; NP = offsets[0];

         dalib_int_free (offsets, NP+1);
         dalib_int_free (map, 2*N+1);
       }

     break;

   case kARBITRARY_DIM :

     if (mapping->ARBITRARY_DIM.axis_state == AXIS_COMPUTED)

       { int *P_offsets, *chunk_offsets;
         int NP, no_chunks;

         P_offsets = mapping->ARBITRARY_DIM.axis_offsets;
         chunk_offsets = mapping->ARBITRARY_DIM.axis_map;

         NP = P_offsets[0]; 
         no_chunks = mapping->ARBITRARY_DIM.no_chunks;

         dalib_int_free (P_offsets, NP+1);
         dalib_int_free (chunk_offsets, 2 * no_chunks + 2);
       }

     break;

   default: ; /* nothing else has to be freed */

  } /* switch */

} /* dalib_dist_dim_free */

void dalib_dist_free (dist, rank)

Distribution dist;
int rank;

{ DistDim mapping;
  int i;

  /* at first free all the arrays for offsets of varying block sizes */

  /* check that there are no longer any aligned arrays */

  if (dist->aligned_arrays != (void *) 0)
     free (dist->aligned_arrays);

  if (dist->is_localized)

   { /* free memory needed for distribution description of dimensions   */

     mapping = dist->dims;

     for (i=0; i<rank; i++)

       { switch (mapping->axis_type) {

           /* nothing to do for BLOCK distributions */

           case kBLOCK_DIM  : break;
           case kSERIAL_DIM : break;

           default : dalib_dist_dim_free (mapping);

         } /* switch */

         mapping++;
   
       } /* for all dimensions */

   } /* free structures of advanced mappings */

  /* now free the memory for the distribution dsp itself */

  dalib_free (dist, dalib_dist_dsp_size (rank));

} /* dalib_dist_free */

/***************************************************************************
*                                                                          *
*   void dalib_distribution_reset (Distribution distribute_info)           *
*                                                                          *
*    - reset distribution record after deallocation of the array/template  *
*    - frees structures allocated for the actual sizes                     *
*                                                                          *
***************************************************************************/

void dalib_distribution_reset (array_id)

array_info array_id;

{ int i, array_rank;

  DistDim dims;           /* pointer for dim information             */
  Distribution dist_ptr;

#ifdef CHECK
  dalib_distribution_check (array_id);
#endif

  array_rank = array_id->rank;
  dist_ptr   = array_id->DistributeInfo;

  if (dist_ptr->is_localized == 0) return;

  dims = dist_ptr->dims;

  for (i=0; i < array_rank; i++, dims++)

    { switch (dims->axis_type) {

        case kSERIAL_DIM    : break;
        case kBLOCK_DIM     : break;

        default          : dalib_dist_dim_free (dims);

      } /* switch */

    } /* for */

  dist_ptr->is_localized = 0;

  /* otherwise we do not localize and error occurs somewhere else */

} /* dalib_distribution_reset */

/**************************************************************************
*                                                                         *
*                                                                         *
*  INTERNAL DALIB Functions                                               *
*                                                                         *
*  dalib_distribution_info (array_id => template_id, top_id)              *
*                                                                         *
**************************************************************************/
 
void dalib_distribution_info (array_id, template_id, top_id)
array_info array_id;
array_info *template_id;
int        *top_id;
 
{ dalib_distribution_check (array_id);

#ifdef DEBUG
  printf ("%d: distribution info for array_id = %d\n", pcb.i, array_id);
#endif

  *template_id = array_id;
  *top_id      = array_id->DistributeInfo->topology;

#ifdef DEBUG
  printf ("%d: distribution info for array_id = %d, top = %d\n", 
           pcb.i, array_id, *top_id);
#endif
 
} /* dalib_distribution_info */
 
/**************************************************************************
*                                                                         *
*  dalib_distribution_dim_mapping (array_info array_id, int dim,          *
*                                  int *base, *stride,                    *
*                                  int *lb, int *ub,                      *
*                                  int *topology,                         *
*                                  DimMap *mapping)                       *
*                                                                         *
*  - get info how dimension dim of array array_id is mapped               *
*                                                                         *
**************************************************************************/

void dalib_distribution_dim_mapping (array_id, dim, base, stride,
                                    lb, ub, topology, mapping)

/* Input Arguments : */

array_info array_id;
int        dim;

/* Output Arguments : */

int     *topology;
int     *base, *stride, *lb, *ub;
DistDim *mapping;

{ Distribution DistributeInfo;

  /* no alignment, full range for a really distributed dimension */

  *base         = 0;
  *stride       = 1;
  *lb           = array_id->dimensions[dim-1].global_size[0];
  *ub           = array_id->dimensions[dim-1].global_size[1];

  DistributeInfo = array_id->DistributeInfo;

  if (DistributeInfo->topology == -1)

      { *topology  = 0;
        *mapping   = NO_DIST_DIM;
        return;
      }

  *topology = DistributeInfo->topology;
  *mapping  = DistributeInfo->dims + (dim-1);

} /* dalib_distribution_dim_mapping */

/**************************************************************************
*                                                                         *
*  dalib_distribution_top_mapping (array_info array_id, int *topology,    *
*                                  int *index_dim,                        *
*                                  int *base, *stride,                    *
*                                  int *lb, int *ub,                      *
*                                  DimMap *mapping)                       *
*                                                                         *
*  - get info how dimension dim of array array_id is mapped               *
*  - entries in arrays are sorted by topology dimensions                  *
*                                                                         *
**************************************************************************/

void dalib_distribution_top_mapping (array_id, topology, index_dim,
                                     base, stride, lb, ub, mapping)

/* Input Arguments : */

array_info array_id;

/* Output Arguments : */

int     *topology;
int     index_dim[];
int     base[], stride[], lb[], ub[];
DistDim mapping[];

{ Distribution DistributeInfo;
  DistDim      DistMappings;

  int idim, top_dim, array_rank;

  /* no alignment, full range for a really distributed dimension */

  DistributeInfo = array_id->DistributeInfo;
  DistMappings   = DistributeInfo->dims;

  if (DistributeInfo->topology == -1)

      { *topology  = 0;  /* has rank 0, so arrays will be empty */
        return;
      }

  *topology = DistributeInfo->topology;

  array_rank = array_id->rank;

  for (idim=0; idim < array_rank; idim++, DistMappings++)

    if (DistMappings->axis_type != kSERIAL_DIM)

     { int top_dim;

       top_dim = DistMappings->GENERAL_DIM.axis_dim - 1;

       /* index dimension (idim + 1) is mapped to 
          topology dimension (top_dim + 1)           */

       index_dim[top_dim] = idim+1;
       base[top_dim]      = 0;
       stride[top_dim]    = 1;
       lb[top_dim]        = array_id->dimensions[idim].global_size[0];
       ub[top_dim]        = array_id->dimensions[idim].global_size[1];
       mapping[top_dim]   = DistMappings;

     }

} /* dalib_distribution_top_mapping */

/**************************************************************************
*                                                                         *
*  dalib_dim_mapping_info (DistDim mapping => int kind, int top_dim)      *
*                                                                         *
**************************************************************************/

void dalib_dim_mapping_info (mapping, kind, top_dim)

/* Input Arguments : */

DistDim mapping;

/* Output Arguments : */

int     *kind;
int     *top_dim;

{ if (mapping == NO_DIST_DIM)

     { *kind = kSERIAL_DIM; *top_dim = 0; return; }

  *kind = mapping->axis_type;

  if (*kind == kSERIAL_DIM)
     *top_dim = 0;
   else
     *top_dim = mapping->GENERAL_DIM.axis_dim;

} /* dalib_dim_mapping_info */

void dalib_dist_cyclic_info (mapping, block_size)

/* Input Arguments : */

DistDim mapping;     /* must be CYCLIC */

/* Output Argument : */

int *block_size;

{ *block_size = mapping->CYCLIC_DIM.axis_info;

} /* dalib_dist_cyclic_info */

/**************************************************************************
*                                                                         *
*  bool dalib_same_dim_mapping (DistDim mapping1, DistDim mapping2)       *
*                                                                         *
*   - verify the same mapping of two mappings (topid1 == topid2)          *
*                                                                         *
**************************************************************************/

int dalib_same_dim_mapping (top_id, mapping1, mapping2)

/* Input Arguments : */

int     top_id;
DistDim mapping1, mapping2;

{ int kind1, kind2;
  int top_dim1, top_dim2;

  dalib_dim_mapping_info (mapping1, &kind1, &top_dim1);
  dalib_dim_mapping_info (mapping2, &kind2, &top_dim2);
  
  if (kind1 != kind2)       return (0);
  if (top_dim1 != top_dim2) return (0);   /* ???, why not if NP are equal */

  if (kind1 == kSERIAL_DIM) return (1);   /* ready */

  /* we know that mapping1 and mapping2 are both DIST_DIM */

  switch (kind1) {

    case kBLOCK_DIM :

       if (mapping1->BLOCK_DIM.user_bsize == mapping2->BLOCK_DIM.user_bsize)
          return (1);

       /* BLOCK(m) possible, where m hast still not be computed   */ 

       if (mapping1->BLOCK_DIM.user_bsize == 0) return (1);
       if (mapping2->BLOCK_DIM.user_bsize == 0) return (1);

       return (0);

    case kCYCLIC_DIM :

       if (mapping1->CYCLIC_DIM.axis_info == mapping2->CYCLIC_DIM.axis_info)
          return (1);

       break;

    case kGEN_BLOCK_DIM :

       break;

    case kINDIRECT_DIM :

       break;

    case kARBITRARY_DIM :
 
       break;

    default : ;

  } /* switch */

  return (1);

} /* dalib_same_dim_mapping */

/**************************************************************************
*                                                                         *
*  void  dalib_distribution_top_query                                     *
*           (array_info array_id, int top_dim,                            *
*            int *kind, int *index_dim, int *top_pos)                     *
*                                                                         *
*     - asks how array_id is related to topology dimension top_dim        *
*                                                                         *
*     - kind = SERIAL_DIM / BLOCK_DIM / GEN_BLOCK_DIM / CYCLIC_DIM        *
*     - index_dim : which index_dim is mapped (0 for no one)              *
*     - top_pos = 0                                                       *
*                                                                         *
**************************************************************************/

void dalib_distribution_top_query (array_id, top_dim,
                                   kind, index_dim, top_pos)
 
array_info array_id;
int        top_dim;
 
int *kind;
int *index_dim;
int *top_pos;
 
{ int i,  rank;
  DistDim dims;

  rank = array_id->rank;
  dims = array_id->DistributeInfo->dims;

  /* default assumption is that topology dimension is replicated */

  *kind      = kSERIAL_DIM;
  *index_dim = 0;
  *top_pos   = 0;

  for (i=0; i<rank; i++)

     { switch (dims->axis_type) {

       case kSERIAL_DIM : break;
 
       case kBLOCK_DIM  : if (dims->BLOCK_DIM.axis_dim == top_dim)

                            { *kind = kBLOCK_DIM;
                              *index_dim = i+1; 
                            }

                          break;
 
       case kGEN_BLOCK_DIM : if (dims->GEN_BLOCK_DIM.axis_dim == top_dim)

                            { *kind = kGEN_BLOCK_DIM;
                              *index_dim = i+1;
                            }

                          break;
 
       case kCYCLIC_DIM  : if (dims->CYCLIC_DIM.axis_dim == top_dim)

                            { *kind = kCYCLIC_DIM;
                              *index_dim = i+1;
                            }

                          break;
 
       } /* end of switch */

       dims++;
     } 

#ifdef DEBUG
   printf ("%d: top_query gives index_dim = %d, kind = %d\n",
            pcb.i, *index_dim, *kind);
#endif

} /* dalib_distribution_top_query */

/**************************************************************************
*                                                                         *
*   int dalib_distribution_topology (array_info array_id)                 *
*                                                                         *
*    - returns topology to which array/template is mapped onto            *
*                                                                         *
**************************************************************************/

int dalib_distribution_topology (array_id)
array_info array_id;

{ if (array_id->DistributeInfo == NO_DISTRIBUTION)
    dalib_internal_error ("no distribution is given");

  return (array_id->DistributeInfo->topology);

} /* dalib_distribution_topology */

/**************************************************************************
*                                                                         *
*  dalib_distribution_info (array_id, int *topology,                      *
*                           int type[], int info[], int map[])            *
*                                                                         *
*  - returns all relevant informations of the distribution of arr/temp    *
*  - map returns size of block (own size for varying block sizes)         *
*                                                                         *
**************************************************************************/

void dalib_distribution_data (array_id, topology, type, info, map)

array_info array_id;
int *topology;
int type[], *info[], map[];

{ int i, rank;

  short axis_type;

  Distribution dist;          /* descriptor for distribution */
  DistDim  dims;
 
  dalib_distribution_check (array_id);
 
  rank   = array_id->rank;
  dist   = array_id->DistributeInfo;
  dims   = dist->dims;
 
  *topology = dist->topology;

  for (i=0; i<rank; i++)

    { axis_type = dims->axis_type;

      type[i] = axis_type;

      switch (axis_type) {

       case kSERIAL_DIM : 

          info[i] = (int *) 0;    /* implementation dependent */
          map[i]  = 0;            /* no processor dimension   */
          break;

       case kBLOCK_DIM  : 

          info[i] = (int *) dims->BLOCK_DIM.sys_bsize;
          map[i]  = dims->BLOCK_DIM.axis_dim;
          break;

       case kGEN_BLOCK_DIM :

          info[i] = dims->GEN_BLOCK_DIM.axis_offsets + 1;
          map[i]  = dims->GEN_BLOCK_DIM.axis_dim;
          break;

       case kCYCLIC_DIM :

          info[i] = (int *) dims->CYCLIC_DIM.axis_info;
          map[i]  = dims->CYCLIC_DIM.axis_dim;
          break;

       case kINDIRECT_DIM :

          info[i] = dims->INDIRECT_DIM.axis_offsets+1;
          map[i]  = dims->INDIRECT_DIM.axis_dim;
          break;

       case kARBITRARY_DIM :

          info[i] = dims->ARBITRARY_DIM.axis_offsets+1;
          map[i]  = dims->ARBITRARY_DIM.axis_dim;
          break;

       default :

          dalib_internal_error ("distribution_info: illegal kind");
          dalib_stop ();

       } /* switch */

      dims++;

    } /* for */

} /* dalib_distribution_data */

/**************************************************************************
*                                                                         *
*   void dalib_set_aligned_to (array_info alignee, array_info template)   *
*                                                                         *
**************************************************************************/

void dalib_set_aligned_to (array_id, template_id)

array_info array_id, template_id;

{ Distribution dist;
  int no, max;
  array_info *alignees;

  dalib_distribution_check (template_id);

  dist = template_id->DistributeInfo;
  no  = dist->no_aligned;
  max = dist->max_aligned;
  alignees = dist->aligned_arrays;

  if (no == max)

     { if (max == 0)  max = 20;

         else  { /* array is too small */

                 free (alignees);
                 max = 2 * max;
               }

       alignees = (array_info *) dalib_malloc (max * sizeof (array_info),
                                               "dalib_set_aligned_to");
       dist->max_aligned = max;
       dist->aligned_arrays  = alignees;
     }

  alignees [no] = array_id;
  dist->no_aligned += 1;

} /* dalib_set_aligned_to */

/**************************************************************************
*                                                                         *
*   void dalib_set_dealigned (array_info alignee, array_info template)    *
*                                                                         *
**************************************************************************/

void dalib_set_dealigned (array_id, template_id)

array_info array_id, template_id;

{ Distribution dist;
  int no, pos, found, i;
  array_info *alignees;

  dalib_distribution_check (template_id);

  dist = template_id->DistributeInfo;

  no = dist->no_aligned;
  alignees = dist->aligned_arrays;

  pos = 0;
  found = 0;

  while (!found && (pos < no))
    { found = (alignees[pos] == array_id);
      if (!found) pos++;
    }

  if (!found)
   { dalib_internal_error ("dalib_set_dealigned: array not found");
     dalib_stop ();
   }

  /* remove the entry */

  for (i = pos; i < no-1; i++)
     alignees[i] = alignees[i+1];

  dist->no_aligned -= 1;

} /* dalib_set_dealigned */

/**************************************************************************
*                                                                         *
*  void dalib_get_aligned_arrays (array_info template_id,                 *
*                                 int *n, array_info **alignees)          *
*                                                                         *
**************************************************************************/

void dalib_get_aligned_arrays (template_id, n, alignees)

array_info template_id;
int *n;
array_info **alignees;

{
  dalib_distribution_check (template_id);

  *n        = template_id->DistributeInfo->no_aligned;
  *alignees = template_id->DistributeInfo->aligned_arrays;

} /* dalib_aligned_arrays */

/**************************************************************************
*                                                                         *
*  PREDICATE dalib_underspecified_distribution (array_info array_id)      *
*                                                                         *
*   - returns true if distribution of array is underspecified             *
*   - cannot compute local sizes for underspecified mappings              *
*                                                                         *
**************************************************************************/

int dalib_underspecified_distribution (array_id)

array_info array_id;

{ Distribution  dist;
  DistDim       dim;

  int i, rank, kind;

  dalib_distribution_check (array_id);

  dist = array_id->DistributeInfo;

  rank = array_id->rank;

  if (dist->topology == kANY_TOPOLOGY) return (1);

#ifdef DEBUG
  dalib_distribution_print (dist, rank);
#endif

  for (i=0, dim=dist->dims; i<rank; i++, dim++)

     { kind = dim->axis_type;

       switch (kind) {

         case kANY_BLOCK_DIM       : return(1);
         case kANY_CYCLIC_DIM      : return(1);
         case kANY_GEN_BLOCK_DIM   : return(1);
         case kANY_INDIRECT_DIM    : return(1);
         case kANY_ARBITRARY_DIM   : return(1);
         case kANY_DISTRIBUTED_DIM : return(1);

         default                : {}

       } /* switch */
     }

  return (0);  /* not underspecified */

} /* dalib_underspecified_distribution */

/**************************************************************************
*                                                                         *
*  void dalib_make_full_distribution (array_info array_id)                *
*                                                                         *
**************************************************************************/

void dalib_make_full_distribution (array_id)

array_info array_id;

{ Distribution  dist;
  DistDim dim;

  int i, rank, kind;

  int top_rank;

  dalib_distribution_check (array_id);

  dist = array_id->DistributeInfo;

  rank = array_id->rank;

  top_rank = 0;

  for (i=0, dim=dist->dims; i<rank; i++, dim++)

     { kind = dim->axis_type;

       switch (kind) {

         case kANY_BLOCK_DIM       : top_rank ++;
                                     dim->axis_type = kBLOCK_DIM;
                                     dim->BLOCK_DIM.user_bsize = 0;
                                     dim->BLOCK_DIM.sys_bsize = 0;
                                     dim->BLOCK_DIM.axis_dim  = top_rank;
                                     break;
                              
         case kANY_CYCLIC_DIM      : top_rank ++;
                                     dim->axis_type = kCYCLIC_DIM;
                                     dim->CYCLIC_DIM.axis_info = 1;
                                     dim->CYCLIC_DIM.axis_dim  = top_rank;
                                     break;
                              
         case kANY_GEN_BLOCK_DIM   : 
         case kANY_INDIRECT_DIM    : 
         case kANY_ARBITRARY_DIM   : 
         case kANY_DISTRIBUTED_DIM : top_rank ++;
                                     dim->axis_type = kBLOCK_DIM;
                                     dim->BLOCK_DIM.user_bsize = 0;
                                     dim->BLOCK_DIM.sys_bsize = 0;
                                     dim->BLOCK_DIM.axis_dim  = top_rank;
                                     break;
                              
         case kSERIAL_DIM          : break;

         default                   : top_rank ++;

       } /* switch */
     }

  if (dist->topology == kANY_TOPOLOGY) 

    { if (top_rank > MAX_RANK)

         { dalib_internal_error ("too many distributed dims");
           dalib_stop ();
         }

      dist->topology = top_rank;  /* get default topology for this rank */

    }

   else if (dalib_top_rank (dist->topology) != top_rank)

    { dalib_internal_error ("make full distribution, topology rank error");
      dalib_stop ();
    }

#ifdef DEBUG
   printf ("%d: result of making full distribution \n", pcb.i);
   dalib_distribution_print (dist, rank);
#endif

} /* dalib_make_full_distribution */

/**************************************************************************
*                                                                         *
*  void dalib_inherit_distribution (general_id, special_id)               *
*                                                                         *
**************************************************************************/

static void dalib_inherit_dist_dim (general_dim, special_dim)

DistDim special_dim, general_dim;

{ *general_dim = *special_dim;

#ifdef DEBUG
  printf ("%d: inherit distributed dimension, kind = %d\n",
          pcb.i, general_dim->axis_type);
#endif

  /* change the state to avoid freeing of internal data structures */

  switch (general_dim->axis_type) {

    case kGEN_BLOCK_DIM : 

       if (special_dim->GEN_BLOCK_DIM.axis_state == AXIS_COMPUTED);
          general_dim->GEN_BLOCK_DIM.axis_state = AXIS_INHERITED;
       break;
 
    case kINDIRECT_DIM : 

       if (special_dim->INDIRECT_DIM.axis_state == AXIS_COMPUTED);
          general_dim->INDIRECT_DIM.axis_state = AXIS_INHERITED;
       break;
 
    case kARBITRARY_DIM : 

       if (special_dim->ARBITRARY_DIM.axis_state == AXIS_COMPUTED);
          general_dim->ARBITRARY_DIM.axis_state = AXIS_INHERITED;
       break;
 
  }

} /* dalib_inherit_dist_dim */

/**************************************************************************
*                                                                         *
*  void dalib_inherit_distribution (array_info general_id,                *
*                                   array_info special_id)                *
*                                                                         *
**************************************************************************/

void dalib_inherit_distribution (general_id, special_id)

array_info general_id, special_id;

{ Distribution special_dist, general_dist;
  DistDim special_dim, general_dim;

  int i, rank;

  dalib_distribution_check (special_id);
  dalib_distribution_check (general_id);

  special_dist = special_id->DistributeInfo;
  general_dist = general_id->DistributeInfo;

  general_dist->topology = special_dist->topology;

  rank = special_id->rank;

  for (i=0, special_dim = special_dist->dims, general_dim = general_dist->dims;
       i < rank; i++, special_dim++, general_dim++)

       dalib_inherit_dist_dim (general_dim, special_dim);

} /* dalib_inherit_distribution */

/**************************************************************************
*                                                                         *
*  int dalib_is_dis_dim_specialization (                                  *
*               int special_kind, int general_kind,                       *
*               int *special_size,  *general_size)                        *
*                                                                         *
*  - returns (1) if special_* is a specialization of general_*            *
*                                                                         *
**************************************************************************/

int dalib_is_dist_dim_specialization (special_mapping, general_mapping)

DistDim special_mapping, general_mapping;

{ int special_kind, general_kind;
  int special_dim, general_dim;

  dalib_dim_mapping_info (special_mapping, &special_kind, &special_dim);
  dalib_dim_mapping_info (general_mapping, &general_kind, &general_dim);

#ifdef DEBUG
  printf ("%d: dist dim: special = (%d,%d), general = (%d,%d)\n",
           pcb.i, special_kind, special_dim, general_kind, general_dim);
#endif

  if (general_kind == special_kind)

   { if (general_kind == kSERIAL_DIM) return (1);     /* okay */

     switch (general_kind) {

       case kBLOCK_DIM :

          if (   general_mapping->BLOCK_DIM.sys_bsize
              == special_mapping->BLOCK_DIM.sys_bsize)

             return (1);

          /* BLOCK(m) possible, where m hast still not be computed   */

          if (general_mapping->BLOCK_DIM.user_bsize == 0) return (1);
          if (special_mapping->BLOCK_DIM.user_bsize == 0) return (1);
   
          return (0);

       case kCYCLIC_DIM :

          if (   general_mapping->CYCLIC_DIM.axis_info
              == special_mapping->CYCLIC_DIM.axis_info)
             return (1);

          return (0);

       case kGEN_BLOCK_DIM :

          return (1);

       case kINDIRECT_DIM :

          return (1);
   
       case kARBITRARY_DIM :

          return (1);

       default : ;

     } /* switch */

   }

  /* general can be a really general distribution format */

  switch (general_kind) {

    case kANY_BLOCK_DIM       : if (special_kind == kBLOCK_DIM) return (1);
                                break;
    case kANY_CYCLIC_DIM      : if (special_kind == kBLOCK_DIM) return (1);
                                if (special_kind == kCYCLIC_DIM) return (1);
                                break;
    case kANY_GEN_BLOCK_DIM   : if (special_kind == kGEN_BLOCK_DIM)
                                   return (1);
                                break;
    case kANY_INDIRECT_DIM    : if (special_kind == kINDIRECT_DIM)
                                   return (1);
                                break;
    case kANY_DISTRIBUTED_DIM : return (1);

    default                   : return (0);

    } /* switch */

  return (0);

} /* dalib_is_dim_specialization */

/**************************************************************************
*                                                                         *
*  int dalib_dist_local_addr (int val, int lb, int ub, DimMap mapping)    *
*                                                                         *
*  - calculate local index value for a mapped dimension                   *
*  - function is independent of the processor where it is computed        *
*                                                                         *
**************************************************************************/

int dalib_dist_local_addr (val, lb, ub, mapping)

int     val, lb, ub;
DistDim mapping;

{ int *global_offsets, *local_offsets;
  int no_chunks;
  int chunk, offset;

  if (mapping == NO_DIST_DIM) return val;

  switch (mapping->axis_type) {

    case kCYCLIC_DIM : 

      { int Bsize, NP;
        int *offsets;
        int l_index;

        Bsize   = mapping->CYCLIC_DIM.axis_info;

        if (Bsize == 1) return (val);

        offsets = mapping->CYCLIC_DIM.axis_offsets;

        NP = offsets[0]; 

        offsets++;            /* now points to the real processor offsets */

        dalib_block_cyclic_local (NP, Bsize, lb, ub, val, offsets, &l_index);

        return (l_index);

      }

    case kINDIRECT_DIM :

      { int *perm;

        perm = mapping->INDIRECT_DIM.axis_map + 1;

        return (perm [val-lb] + lb);
      }

    case kARBITRARY_DIM : 

      { int *global_offsets, *local_offsets;
        int no_chunks;
        int chunk, offset;
        int val1;            /* val in 0..ub-lb instead of lb..ub */

        no_chunks      = mapping->ARBITRARY_DIM.no_chunks;
        global_offsets = mapping->ARBITRARY_DIM.axis_map;
        local_offsets  = global_offsets + no_chunks + 1;

        /* step 1:  find for val the chunk and offset in chunk */

        val1    = val - lb;
        chunk   = dalib_gen_block_owner (global_offsets+1, no_chunks, val1);
        offset  = val1 - global_offsets [chunk];

#ifdef DEBUG
        printf ("%d: %d (of %d:%d) in chunk = %d, offset = %d, local = %d\n",
                 pcb.i, val, lb, ub, chunk, offset, 
                 local_offsets[chunk]);
#endif

        /* step 2:  look for local offset and return result  */

        return (local_offsets[chunk] + offset + lb);

       }
         
    /* all other dimensions have no mapping */

    default : return (val);

  } /* switch */

} /* dalib_dist_local_addr */

/**************************************************************************
*                                                                         *
*  int dalib_dist_global_addr (int val, int lb, int ub, DimMap mapping)   *
*                                                                         *
*  - calculate global index value for a mapped dimension                  *
*  - function is independent of the processor where it is computed        *
*                                                                         *
**************************************************************************/

int dalib_dist_global_addr (val, lb, ub, mapping)

int     val, lb, ub;
DistDim mapping;

{ int *global_offsets, *local_offsets;
  int no_chunks;
  int chunk, offset;

  if (mapping == NO_DIST_DIM) return val;

  switch (mapping->axis_type) {

    case kCYCLIC_DIM :

      { int info, NP, pid;
        int *offsets;
        int g_index;

        info    = mapping->CYCLIC_DIM.axis_info;
        offsets = mapping->CYCLIC_DIM.axis_offsets;

        NP = offsets[0];

        offsets++;     /* now points to the real processor offsets */

        dalib_block_cyclic_global (NP, info, lb, ub, val, offsets, &g_index);

        return (g_index);

      }

    case kINDIRECT_DIM :

      { int *perm;

        perm = mapping->INDIRECT_DIM.axis_map + 1;
        perm += (ub - lb + 1); /* inverse mapping */
        return (perm [val-lb] + lb);

      }

    case kARBITRARY_DIM : 

      { int val1;    /* coordinates in 0..ub-lb instead of lb..ub */

         /***********************************************************
         *                                                          *
         *   len (map)      :   10 (1)  20 (2)  30 (2)  40 (1)      *
         *                                                          *
         *   global_offsets :    0      10      30      60     100  *
         *   local_offsets  :    0      50      70      10          *
         *                                                          *
         ***********************************************************/

         no_chunks      = mapping->ARBITRARY_DIM.no_chunks;
         global_offsets = mapping->ARBITRARY_DIM.axis_map;
         local_offsets  = global_offsets + no_chunks + 1;

         /* step 1:  find for val the chunk and offset in chunk */

         val1   = val - lb;
         chunk  = dalib_offset_owner (local_offsets, no_chunks, val1);
         offset = val1 - local_offsets [chunk];

#ifdef DEBUG
       printf ("%d: %d (of %d:%d) in chunk = %d, offset = %d, global = %d\n",
                pcb.i, val, lb, ub, chunk, offset, 
                global_offsets[chunk]);
#endif

         /* step 2:  look for global offset and return result  */

         return (global_offsets[chunk] + offset + lb);
        
       }

    /* all other dimensions have no reordering */

    default : return (val);

  } /* switch */

} /* dalib_dist_global_addr */

/**************************************************************************
*                                                                         *
*   int dalib_mapping_owner (DistDim mapping, int NP,                     *
*                            int lb, int ub, int val)                     *
*                                                                         *
*   - lb:ub is mapped via 'mapping' onto NP processors                    *
*   - returns owner of val                                                *
*                                                                         *
**************************************************************************/

int dalib_mapping_owner (mapping, NP, lb, ub, val)

/* Input arguments : */

int NP;            /* number of processors for mapped dimension */
int val, lb, ub;   /* val is value in mapped range lb:ub        */
DistDim mapping;

/* Output arguments : function result is owner */


{ int *perm;
  int *global_offsets, *local_offsets;
  int no_chunks;
  int chunk, offset;

  if (NP == 0) return 0;

  if (mapping == NO_DIST_DIM) return 0;

  if (mapping->axis_type == kSERIAL_DIM) return 0;

  switch (mapping->axis_type) {

    case kBLOCK_DIM :

      { int block_size;

        block_size = mapping->BLOCK_DIM.sys_bsize;

        return dalib_block_owner (NP, block_size, lb, ub, val) + 1;

      }

    case kCYCLIC_DIM :

      { int Bsize;

        Bsize = mapping->CYCLIC_DIM.axis_info;

        if (Bsize != 1)

          return dalib_block_cyclic_owner (NP, Bsize, lb, ub, val) + 1;

         else

          return dalib_cyclic_owner (NP, lb, ub, val) + 1;
 
      }

    case kGEN_BLOCK_DIM :

      { int *offsets;

        /* make sure that state is not AXIS_SET */

        offsets = mapping->GEN_BLOCK_DIM.axis_offsets + 1;

        return dalib_gen_block_owner (offsets, NP, val-lb) + 1;

      }

    case kINDIRECT_DIM : 

      { int *perm, *offsets;

        perm    = mapping->INDIRECT_DIM.axis_map + 1;
        offsets = mapping->INDIRECT_DIM.axis_offsets + 1;

        /* perm maps index to 0 <= i < N, offsets[NP-1] == N */

        return dalib_gen_block_owner (offsets, NP, perm[val-lb]) + 1;
 
      }

    case kARBITRARY_DIM : 

      { int *global_offsets, *local_offsets;
        int *P_offsets;
        int no_chunks;
        int chunk, offset;
        int val1;

        no_chunks      = mapping->ARBITRARY_DIM.no_chunks;
        global_offsets = mapping->ARBITRARY_DIM.axis_map;
        local_offsets  = global_offsets + no_chunks + 1;

        /* step 1:  find for val the chunk and offset in chunk */

        val1   = val - lb;
        chunk  = dalib_gen_block_owner (global_offsets+1, no_chunks, val1);
        offset = val1 - global_offsets [chunk];

        /* step 2:  look for local offset and return result  */

        val1 = local_offsets[chunk] + offset;

        P_offsets = mapping->ARBITRARY_DIM.axis_offsets + 1;

        return dalib_gen_block_owner (P_offsets, NP, val1) + 1;

      }

    /* all other dimensions are illegal */

    default :

       dalib_internal_error ("illegal mapping for distribution_owner");
       dalib_stop ();

  } /* switch */

} /* dalib_mapping_owner */

/**************************************************************************
*                                                                         *
*  void dalib_distribution_owner (int val, int lb, int ub,                *
*                                 int topology, DistDim mapping,          *
*                                 int *top_dim, *top_pos)                 *
*                                                                         *
*  - returns owner for a distributed dimension                            *
*                                                                         *
**************************************************************************/

void dalib_distribution_owner (val, lb, ub, topology, mapping, 
                               top_dim, top_pos)

/* Input arguments : */

int     val, lb, ub;
int     topology;
DistDim mapping;

/* Output arguments : */

int *top_dim;
int *top_pos;

{ int NId, NP;

  if (mapping == NO_DIST_DIM) 

     { *top_dim = 0; *top_pos = 0; return; }

  if (mapping->axis_type == kSERIAL_DIM)

     { *top_dim = 0; *top_pos = 0; return; }

  *top_dim = mapping->GENERAL_DIM.axis_dim;

  /* now we have still to find the position */

  dalib_top_info (topology, *top_dim, &NP, &NId);

  *top_pos = dalib_mapping_owner (mapping, NP, lb, ub, val);

#ifdef DEBUG
  printf ("%d: distribution_owner (kind=%d,NP=%d), val = %d, owner = %d\n",
          pcb.i, mapping->axis_type, NP, val, *top_pos);
#endif

} /* dalib_distribution_owner */

/**************************************************************************
*                                                                         *
*  void dalib_distribution_addresses (int NP, DistDim mapping,            *
*                                     int lb, int ub,                     *
*                                     int N, int index[], int owner[])    *
*                                                                         *
*  - returns many owners for a distributed dimension                      *
*  - index has global values, but we return local values                  *
*                                                                         *
**************************************************************************/

static void dalib_new_index (N, index, index_new)

int N;
int **index;
int *index_new;

{ int j;
  int *old_index, *new_index;

  if (*index_new) return;  /* we have already new indexes */

  old_index = *index;

  new_index = (int *) dalib_int_malloc (N, "dalib_new_indexes");

  for (j = 0; j<N; j++) new_index[j] = old_index[j];

  *index     = new_index;
  *index_new = 1;         /* will be free after building the inspecotor */

} /* dalib_new_index */

void dalib_distribution_addresses (NP, mapping, 
                                   base, stride, lb, ub,
                                   N, index, index_new, owner)

/* Input arguments : */

int base, stride;
int lb, ub;
int NP;
DistDim mapping;

int N;

/* In-Output argument : */

int **index;
int *index_new;

int owner[];

{ if (NP == 1)

     { int i;
       int *index_vals;

       index_vals = *index;

       for (i=0; i<N; i++)
         if (index_vals[i] < lb) owner[i] = -1;

       return;   /* nothing to do on a single processor */
     }

  if (mapping == NO_DIST_DIM) 

     { dalib_internal_error ("distribution_addresses, NO_DIST_DIM");
       dalib_stop ();
     }

  if (mapping->axis_type == kSERIAL_DIM)

     { dalib_internal_error ("distribution_addresses, SERIAL_DIM");
       dalib_stop ();
     }

  if (stride != 1)

     { dalib_internal_error ("distribution_addresses, stride != 1");
       dalib_stop ();
     }

  if (base != 0)

     { dalib_internal_error ("distribution_addresses, base != 0");
       dalib_stop ();
     }

#ifdef DEBUG
  printf ("%d: dalib_distribution_addresses (NP=%d, map=%d, [%d:%d], N=%d\n",
           pcb.i, NP, mapping->axis_type, lb, ub, N);
#endif

  switch (mapping->axis_type) {

    case kBLOCK_DIM :

      { int BSize;

        BSize = mapping->BLOCK_DIM.sys_bsize;

        dalib_block_addresses (NP, BSize, lb, ub, N, *index, owner);

        break;
      }


    case kCYCLIC_DIM :

      { int Bsize;
        int *offsets;

        Bsize   = mapping->CYCLIC_DIM.axis_info;
        offsets = mapping->CYCLIC_DIM.axis_offsets;

        offsets++;  /* increment to the real offsets */

        if (Bsize != 1)

          { dalib_new_index (N, index, index_new);

            dalib_block_cyclic_addresses (NP, Bsize, lb, ub, offsets,
                                          N, *index, owner);
          }

          else

            dalib_cyclic_addresses (NP, lb, ub, N, *index, owner);

        break;
      }

    case kGEN_BLOCK_DIM :

      { int *offsets;

        offsets = mapping->GEN_BLOCK_DIM.axis_offsets;
        offsets ++;

        dalib_gen_block_addresses (NP, lb, ub, offsets, N, *index, owner);

        break;

      }

    case kINDIRECT_DIM : 

      { int *perm, *offsets;

        perm    = mapping->INDIRECT_DIM.axis_map + 1;
        offsets = mapping->INDIRECT_DIM.axis_offsets + 1;

        dalib_new_index (N, index, index_new);

        /* owner = perm[index-lb] + lb) */

        dalib_indirect_addresses (NP, lb, ub, offsets, perm,
                                  N, *index, owner);

        break;
      }

    case kARBITRARY_DIM : 

      { int *global_offsets;
        int *local_offsets;
        int *P_offsets;
        int no_chunks;

        no_chunks      = mapping->ARBITRARY_DIM.no_chunks;
        global_offsets = mapping->ARBITRARY_DIM.axis_map;
        local_offsets  = global_offsets + no_chunks + 1;
        P_offsets      = mapping->ARBITRARY_DIM.axis_offsets + 1;

        dalib_new_index (N, index, index_new);

        dalib_arbitrary_addresses (NP, lb, ub, no_chunks,
                                   global_offsets, local_offsets,
                                   P_offsets,
                                   N, *index, owner);

        break;
      }

    /* all other dimensions are illegal */

    default :

         dalib_internal_error ("illegal mapping for distribution_owner");
         dalib_stop ();

  } /* switch */

} /* dalib_distribution_addresses */

/**************************************************************************
*                                                                         *
*  void dalib_distribution_range (DistDim mapping,                        *
*                                 int NId, int NP,                        *
*                                 int global_size[],                      *
*                                 int global_section[],                   *
*                                 int local_section[]);                   *
*                                                                         *
* - computes local part of global_section in local_section                *
*                                                                         *
**************************************************************************/

void dalib_distribution_range (mapping, NId, NP, global_size, 
                               global_section, local_section)

/* Input arguments : */

DistDim mapping;
int     NId, NP;
int     global_size [];  /* lb = global_size[0], ub = global_size [1] */
int     global_section[]; 

/* Output arguments : */

int     local_section[]; 

{ if (NP == 1)

     { local_section[0] = global_section[0];
       local_section[1] = global_section[1];
       local_section[2] = global_section[2];
       return;
     }

  if (mapping == NO_DIST_DIM) 

     { dalib_internal_error ("distribution_range, no distribution");
       dalib_stop ();
     }

  if (mapping->axis_type == kSERIAL_DIM)

     { dalib_internal_error ("distribution_addresses, SERIAL_DIM");
       dalib_stop ();
     }

  switch (mapping->axis_type) {

    case kBLOCK_DIM :

      { int BSize;

        BSize = mapping->BLOCK_DIM.sys_bsize;

        dalib_block_range (NP, NId, BSize, global_size,
                           global_section, local_section);

        break;
      }


    case kCYCLIC_DIM :

      { int Bsize;
        int *offsets;

        Bsize   = mapping->CYCLIC_DIM.axis_info;
        offsets = mapping->CYCLIC_DIM.axis_offsets;
        offsets++;  /* increment to the real offsets */

        if (Bsize != 1)

          { dalib_internal_error ("range not available for CYCLIC(K)");
            dalib_stop ();
          }

        dalib_cyclic_range (NP, NId, global_size, 
                            global_section, local_section);

        break;
      }

    case kGEN_BLOCK_DIM :

      { int *offsets;

        offsets = mapping->GEN_BLOCK_DIM.axis_offsets;
        offsets ++;

        dalib_gen_block_range (offsets, NP, NId, global_size, 
                               global_section, local_section);

        break;

      }

    /* all other dimensions are illegal */

    default :

         dalib_internal_error ("illegal mapping for distribution_owner");

  } /* switch */

#ifdef DEBUG
printf ("%d: dist_range [kind=%d,NP=%d,NId=%d] on %d:%d, %d:%d:%d->%d:%d:%d\n",
         pcb.i, mapping->axis_type, NP, NId, global_size[0], global_size[1],
         global_section[0], global_section[1], global_section[2],
         local_section[0], local_section[1], local_section[2]);
#endif

} /* dalib_distribution_range */

/**************************************************************************
*                                                                         *
*  void dalib_distribution_size (DistDim mapping,                         *
*                                int NId, int NP,                         *
*                                int global_size[],                       *
*                                int local_size[])                        *
*                                                                         *
* - computes local part of global_size in local_size                      *
* - needed for computing of structured schedules                          *
*                                                                         *
**************************************************************************/

void dalib_distribution_size (mapping, NId, NP, global_size, local_size)

/* Input arguments : */

DistDim mapping;
int     NId, NP;
int     global_size [];  /* lb = global_size[0], ub = global_size [1] */

/* Output arguments : */

int     local_size[]; 

{ int dummy_size;

  if (NP == 1)

     { local_size[0] = global_size[0];
       local_size[1] = global_size[1];
       local_size[2] = 1;
       return;
     }

  if (mapping == NO_DIST_DIM) 

     { dalib_internal_error ("distribution_size, no distribution");
       dalib_stop ();
     }

  if (mapping->axis_type == kSERIAL_DIM)

     { dalib_internal_error ("distribution_size, SERIAL_DIM");
       dalib_stop ();
     }

  switch (mapping->axis_type) {

    case kBLOCK_DIM :

      { int BSize;

        BSize = mapping->BLOCK_DIM.sys_bsize;

        dalib_block_size (NP, NId, BSize, global_size, local_size);

        local_size[2] = 1;

        break;
      }


    case kCYCLIC_DIM :

      { int Bsize;
        int *offsets;

        Bsize   = mapping->CYCLIC_DIM.axis_info;
        offsets = mapping->CYCLIC_DIM.axis_offsets;
        offsets++;  /* increment to the real offsets */

        if (Bsize != 1)

          { dalib_internal_error ("range not available for CYCLIC(K)");
            dalib_stop ();
          }

        dalib_cyclic_size (NP, NId, global_size[0], global_size[1],
                           local_size, local_size+1, &dummy_size);

        break;
      }

    case kGEN_BLOCK_DIM :

      { int *offsets;

        offsets = mapping->GEN_BLOCK_DIM.axis_offsets;
        offsets ++;

        if (NId == 1)
          local_size[0] = global_size[0];
        else
          local_size[0] = global_size[0] + offsets[NId-2];

        local_size[1] = global_size[0] + offsets[NId-1] - 1;

        break;

      }

    /* all other dimensions are illegal */

    default :

         dalib_internal_error ("illegal mapping for distribution_size");

  } /* switch */

#ifdef DEBUG
printf ("%d: dist_size [kind=%d,NP=%d,NId=%d] of %d:%d  is %d:%d:%d\n",
         pcb.i, mapping->axis_type, NP, NId, global_size[0], global_size[1],
         local_size[0], local_size[1], local_size[2]);
#endif

} /* dalib_distribution_size */

/**************************************************************************
*                                                                         *
*  void dalib_pack_distribution (char *buffer,                            *
*                                                                         *
**************************************************************************/

     /*************************************************************
     *  dalib_pack_distribution (buffer, rank, dist => length)    *
     *************************************************************/

void dalib_pack_distribution (buffer, rank, dist, length)

char *buffer;
int  rank;
Distribution dist;
int  *length;

{ int dist_len;
  int top_len;

  int dim, error, type;

  error = 0;

  for (dim=0; dim < rank; dim++)

     { type = dist->dims[dim].axis_type;
       if (type == kGEN_BLOCK_DIM) error++;
       if (type == kINDIRECT_DIM)  error++;
       if (type == kARBITRARY_DIM) error++;
     }

  if (error > 0)

     { dalib_internal_error ("pack_distribution: illegal descriptor");
       dalib_stop ();
     }

  /* pack distribution descriptor into buffer (dist_len bytes needed) */

  dist_len = dalib_dist_dsp_size (rank);
  dalib_memcopy (buffer, dist, dist_len);

  /* pack topology information afterwards into the buffer */

  if (dist->topology > 0)  /* invalid top identification here */

     dalib_pack_topology (buffer + dist_len, dist->topology, &top_len);

   else top_len = 0;

  *length = dist_len + top_len;

} /* dalib_pack_distribution */

     /*************************************************************
     *  dalib_unpack_distribution (buffer, rank => dist, length)  *
     *************************************************************/

void dalib_unpack_distribution (buffer, rank, dist, length)

char         buffer[];
int          rank;
Distribution *dist;
int          *length;

{ int dist_len, top_len;   /* bytes used for encoding */

  Distribution new_dist;
  int          top_id;

  dist_len = dalib_dist_dsp_size (rank);

  new_dist = (Distribution) dalib_malloc (dist_len, "unpack_distribution");

  dalib_memcopy (new_dist, buffer, dist_len);

  top_id = new_dist->topology;

#ifdef DEBUG
  printf ("%d: unpack distribution, dist len = %d, top_id = %d\n",
           pcb.i, dist_len, top_id);
#endif

  if (top_id > 0) 
       dalib_unpack_topology (buffer + dist_len, &top_id, &top_len);
    else  
       top_len = 0;

  new_dist->topology       = top_id;  
  new_dist->is_localized   = 0;
  new_dist->no_aligned     = 0;
  new_dist->max_aligned    = 0;
  new_dist->aligned_arrays = (array_info *) 0;

  *dist   = new_dist;
  *length = dist_len + top_len;

} /* dalib_unpack_distribution */

