/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, SCAI.LAB                        *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Jun 94                                                   *
*  Last Update : Aug 94                                                   *
*                                                                         *
*  This Module is part of the DALIB                                       *
*                                                                         *
*  Module      : ddt.c                                                    *
*                                                                         *
*  Function: derived data types + packing/unpacking                       *
*                                                                         *
*  A ddt describes a sequence of data in the memory space; it will        *
*  be used for copying, sending and receiving                             *
*                                                                         *
*  a) definition of derived data types                                    *
*                                                                         *
*   void dalib_ddt_def_simple (dd_type *new_ddt, char *memory, int size)  *
*                                                                         *
*     - contiguous section in memory of rank 0                            *
*                                                                         *
*   void dalib_ddt_def_section (dd_type *new_ddt, char *memory, int size, *
*                               int rank, int inc1, n1, ..., inc7, n7)    *
*                                                                         *
*     - section memory [k1*inc1+...+k7*inc7], 0 <= ki < ni                *
*       (one element has size bytes)                                      *
*                                                                         *
*   void dalib_ddt_def_indexed (dd_type *new_ddt, int no_elems,           *
*                               int *indexes              )               *
*                                                                         *
*     - dates are  memory [indexes[i]], 0 <= i < no_elems                 *
*       (memory and size will be set later)                               *
*                                                                         *
*   void dalib_ddt_set_data (dd_type ddt, char *data, int size)           *
*                                                                         *
*   void dalib_ddt_def_combine (dd_type *ddt, dd_type ddt1, dd_type ddt2) *
*                                                                         *
*     - build the union of two derived data types                         *
*                                                                         *
*  b) using/freeing derived data types                                    *
*                                                                         *
*   void dalib_ddt_setref (dd_type ddt)                                   *
*                                                                         *
*     - set an additional reference to the ddt structure                  *
*                                                                         *
*   void dalib_ddt_free (dd_type ddt)                                     *
*                                                                         *
*     - free memory used for ddt if no other references                   *
*                                                                         *
*  c) asking for properties                                               *
*                                                                         *
*   void dalib_ddt_get_size (dd_type ddt, int *size)                      *
*                                                                         *
*     - returns size in bytes of the memory ared defined by ddt           *
*                                                                         *
*   void dalib_ddt_is_contiguous (dd_type ddt, int *is, char **address)   *
*                                                                         *
*     - returns true for is if ddt specifies contiguous memory section    *
*       (address is pointer to begining of this section)                  *
*                                                                         *
*  d) packing / unpacking with derived data types                         *
*                                                                         *
*   void dalib_ddt_pack (char *buffer, dd_type ddt)                       *
*                                                                         *
*     - contiguous buffer is filled with data from memory def. by ddt     *
*                                                                         *
*   void dalib_ddt_unpack (dd_type ddt, char *buffer, int op)             *
*                                                                         *
*     - memory (def by ddt) is filled by contigous buffer                 *
*       (op specifies the operation applied with old value)               *
*                                                                         *
*   void dalib_ddt_move (dd_type ddt_target, dd_type ddt_source)          *
*                                                                         *
**************************************************************************/

#undef DEBUG

#include "dalib.h"

/*******************************************************************
*                                                                  *
*  structure for derived data types                                *
*                                                                  *
*******************************************************************/

     /******************************************************
     *                                                     *
     *  enumeration type for different kind of ddt         *
     *                                                     *
     ******************************************************/

typedef enum

     { DDT_SECTION, 
       DDT_COMBINED,
       DDT_INDEXED,
       DDT_TENSOR

     } ddt_kind;

     /******************************************************
     *                                                     *
     *  head of ddt                                        *
     *                                                     *
     ******************************************************/

typedef struct 
 
     { ddt_kind kind;
       int      nbytes;     /* number of total bytes      */
       int      ref_count;  /* reference counter for free */
     } ddt_head;

     /******************************************************
     *                                                     *
     *  repeating factor for one dimension of a section    *
     *                                                     *
     ******************************************************/

typedef struct

     { int increment; 
       int repeat;
     } ddt_dim;
     
     /******************************************************
     *                                                     *
     *  type for describing a section as a ddt             *
     *                                                     *
     *   address[k0*inc[0]+...+kn*inc[n]]  n = rank-1      *
     *     0 <= ki < repeat[i]                             *
     *                                                     *
     ******************************************************/

typedef struct 

     { ddt_head head;
       char *address;                  /* address [...] will be section */
       int  size;                      /* size of one element in sec    */
       int  rank;                      /* rank of section               */
       ddt_dim dim [MAX_DIMENSIONS];   /* increment and repeat          */
     } ddt_section;

     /******************************************************
     *                                                     *
     *  type for describing a combination as a ddt         *
     *                                                     *
     ******************************************************/

typedef struct 

     { ddt_head head;
       dd_type  first, second;
     } ddt_combined;

     /******************************************************
     *                                                     *
     *  type for describing an indexing as a ddt           *
     *                                                     *
     *  address [indexes[i]], 0 <= i < no_elems            *
     *                                                     *
     ******************************************************/

typedef struct

     { ddt_head head;
       char *address;
       int size;               /* size of one element in bytes   */
       dd_type tensor_ddt;
       int increment;          /* indexes is indexes * increment */
       int no_elems;           /* number of indexes              */
       int *indexes;           /* integer indexes                */
     } ddt_tensor;

     /******************************************************
     *                                                     *
     *  type for describing an indexing as a ddt           *
     *                                                     *
     *  address [indexes[i]], 0 <= i < no_elems            *
     *                                                     *
     ******************************************************/

typedef struct

     { ddt_head head;
       char *address;          /* base address used for indexing */
       int size;               /* size of one element in bytes   */
       int no_elems;           /* number of indexes              */
       int *indexes;           /* integer indexes                */
     } ddt_indexed;

     /******************************************************
     *                                                     *
     *  ddt is union of different types for the ddt        *
     *                                                     *
     ******************************************************/

union ddt_info

     { ddt_kind kind;
       ddt_head head;
       ddt_section  section;
       ddt_indexed  indexed;
       ddt_combined combined;
       ddt_tensor   tensor;
     };

/*******************************************************************
*                                                                  *
*  printing of a derived data type                                 *
*                                                                  *
*******************************************************************/

void dalib_ddt_print (ddt)
dd_type ddt;

{ int nbytes, rank, i;
  ddt_dim *dim;

  if (ddt == NO_DDT)
    { printf ("%d: derived data type is empty\n", pcb.i);
      return;
    }

  if (ddt->kind == DDT_SECTION)

    { nbytes = ddt->head.nbytes;
      rank   = ddt->section.rank;
      dim    = ddt->section.dim;

  printf ("%d: derived data type (section), rank = %d, size = %d, total = %d\n",
          pcb.i, rank, ddt->section.size, nbytes);

      for (i=0; i<rank; i++)
        printf ("%d: section (%d), inc = %d, repeat = %d\n",
                pcb.i, i+1, dim[i].increment, dim[i].repeat);
    }

  if (ddt->kind == DDT_INDEXED)

    { nbytes = ddt->head.nbytes;
      printf ("%d: ddt (indexed), size = %d, total = %d, = ",
               pcb.i, ddt->indexed.size, nbytes);
      for (i=0; i<ddt->indexed.no_elems; i++)
         printf ("%d ", ddt->indexed.indexes[i]);
      printf ("\n");
    }

  if (ddt->kind == DDT_TENSOR)

    { nbytes = ddt->head.nbytes;
      printf ("%d: ddt (tensor), size = %d, inc = %d, no _elems = %d, ",
               pcb.i, ddt->tensor.size, ddt->tensor.increment,
               ddt->tensor.no_elems);
      for (i=0; i<ddt->tensor.no_elems; i++)
         printf ("%d ", ddt->tensor.indexes[i]);
      printf ("\n");
    }

} /* dalib_ddt_print */

/*******************************************************************
*                                                                  *
*   - checking for a contiguous section                            *
*                                                                  *
*******************************************************************/

static int dalib_ddt_dim_cont (dim, rank)
ddt_dim *dim;
int rank;

{ int i, is;
  if (rank == 0) return (1);
  is = (dim[0].increment == 1);
  for (i=1; i<rank; i++)
    is = is && (dim[i-1].increment * dim[i-1].repeat == dim[i].increment);
  return (is);
}

/*******************************************************************
*                                                                  *
*  optimizing the representation of a section                      *
*                                                                  *
*   repeat[i] = 1  :   not a real dimension                        *
*                                                                  *
*   k[i-1]*inc[i-1] + k[i]*inc[i]   repeat[i-1]*inc[i-1] = inc[i]  *
*   k[i-1]*inc[i-1]  where 0 <= k[i-1] < repeat[i-1] * repeat[i]   *
*                                                                  *
*******************************************************************/

static void dalib_ddt_compress_section1 (dim, rank)
ddt_dim *dim;
int *rank;

{ int i, k, new_rank;

  new_rank = *rank;

  k = 0;
  while (k < new_rank)
   {  if (dim[k].repeat == 1)
        { for (i=k+1; i<new_rank; i++)  dim[i-1] = dim[i];
          new_rank --;
        }
      else k++;
   }
  *rank = new_rank;

} /* dalib_ddt_compress_section1 */

static void dalib_ddt_compress_section (dim, rank, size)
ddt_dim *dim;
int *rank, *size;

{ int i, k;
  int new_rank, new_size;

  new_rank = *rank;
  new_size = *size;

  k = 0;
  while (k < new_rank)
   {  if (dim[k].repeat == 1)
        { for (i=k+1; i<new_rank; i++)  dim[i-1] = dim[i];
          new_rank --;
        }
      else k++;
   }

  k = 1;
  while (k < new_rank)
   {  if (dim[k-1].increment * dim[k-1].repeat == dim[k].increment)
        { dim[k-1].repeat *= dim[k].repeat;
          for (i=k+1; i<new_rank; i++)  dim[i-1] = dim[i];
          new_rank --;
        }
      else k++;
   }

  if (dim[0].increment == 1)
   { new_size *= dim[0].repeat;
     for (i=1; i<new_rank; i++)  dim[i-1] = dim[i];
     new_rank --;
   }

  *size = new_size;
  *rank = new_rank;
}

/*******************************************************************
*                                                                  *
*   find total number of bytes for a derived data type (ddt)       *
*                                                                  *
*******************************************************************/

static void dalib_ddt_find_total (ddt)
dd_type ddt;

{ int k, rank;
  ddt_dim *dim;
  int bytes;

  if (ddt == NO_DDT) return;

  switch (ddt->kind) { 

    case DDT_SECTION : { bytes = ddt->section.size;
                         rank  = ddt->section.rank;
                         dim   = ddt->section.dim;
                         for (k=0; k < rank; k++)
                            bytes *= dim[k].repeat;
                         break;
                       }
    case DDT_INDEXED : { bytes =  ddt->indexed.size;
                         bytes *= ddt->indexed.no_elems; 
                         break;
                       }
    case DDT_COMBINED: { dalib_ddt_find_total (ddt->combined.first);
                         dalib_ddt_find_total (ddt->combined.second);
                         bytes  = ddt->combined.first->head.nbytes;
                         bytes += ddt->combined.second->head.nbytes;
                         break;
                       }

    case DDT_TENSOR:   { dalib_ddt_find_total (ddt->tensor.tensor_ddt);
                         bytes  = ddt->tensor.tensor_ddt->head.nbytes;
                         bytes *= ddt->tensor.no_elems;
                         break;
                       }

    } /* switch */

    ddt->head.nbytes = bytes;

} /* dalib_ddt_find_total */

/*******************************************************************
*                                                                  *
*   defining a contiguous memory area as a ddt                     *
*                                                                  *
*******************************************************************/

void dalib_ddt_def_simple (new_ddt, memory, size)
dd_type *new_ddt;
char *memory;
int  size;

{  dd_type ddt;

   ddt = (dd_type) dalib_malloc (sizeof (ddt_section), "ddt_def_simple");

   ddt->kind = DDT_SECTION;
   ddt->head.ref_count   = 0;
   ddt->section.address = memory;
   ddt->section.size = size;
   ddt->section.rank = 0;
   ddt->head.nbytes  = size;
   
} /* dalib_ddt_def_simple */

/*******************************************************************
*                                                                  *
*   defining of a section with rank = 0, 1, ... or 7               *
*                                                                  *
*******************************************************************/

void dalib_ddt_def_section (new_ddt, memory, size, rank, inc, n)

dd_type *new_ddt;
char *memory;
int size, rank;
int inc[], n[];

{  ddt_dim *dim;
   dd_type ddt;
   int save, empty;
   int new_rank;
   int i;

#ifdef DEBUG
   if (rank == 1)
     printf ("%d: def section = (inc/n) = [%d:%d]\n", pcb.i, inc[0], n[0]);
   if (rank == 2)
     printf ("%d: def section = (inc/n) = [%d:%d,%d:%d]\n",
             pcb.i, inc[0], n[0], inc[1], n[1]);
   if (rank >= 3)
     printf ("%d: def section = (inc/n) = [%d:%d,%d:%d,%d:%d,...]\n",
             pcb.i, inc[0], n[0], inc[1], n[1], inc[2], n[2]);
#endif

   empty = 0;
   if (size == 0) empty = 1;
   
   for (i=0; i<rank; i++) if (n[i] <= 0) empty = 1;

   if (empty)
      { *new_ddt = NO_DDT;
        return;
      }

   ddt = (dd_type) dalib_malloc (sizeof (ddt_section), "ddt_def_section");

   ddt->kind = DDT_SECTION;
   ddt->head.ref_count   = 0;
   ddt->section.address = memory;
   ddt->section.size = size;
   dim = ddt->section.dim;

   for (i=0; i<rank; i++)

     { dim[i].increment = inc[i]; dim[i].repeat = n[i];  }

   new_rank = rank;
   dalib_ddt_compress_section1 (dim, &new_rank); 
   ddt->section.rank = new_rank;

   dalib_ddt_find_total (ddt);

#ifdef DEBUG
   dalib_ddt_print (ddt);
#endif

   *new_ddt = ddt;
}

/*******************************************************************
*                                                                  *
*   defining an indexed memory area as a ddt                       *
*                                                                  *
*******************************************************************/

void dalib_ddt_def_indexed (new_ddt, no_elems, indexes)

dd_type *new_ddt;
int no_elems, *indexes;

{ dd_type ddt;

  if (no_elems <= 0)

   { *new_ddt = NO_DDT;
     return;
   }

  ddt = (dd_type) dalib_malloc (sizeof (ddt_indexed), "ddt_def_indexed");

  ddt->kind             = DDT_INDEXED;
  ddt->head.ref_count   = 0;
  ddt->indexed.size     = 2;
  ddt->indexed.address  = (char *) 0;
  ddt->indexed.no_elems = no_elems;
  ddt->indexed.indexes  = indexes;

  dalib_ddt_find_total (ddt);

#ifdef DEBUG
  dalib_ddt_print (ddt);
#endif

  *new_ddt = ddt;

} /* dalib_ddt_def_indexed */

/*******************************************************************
*                                                                  *
*   tensoring of a ddt                                             *
*                                                                  *
*******************************************************************/

void dalib_ddt_def_tensor (new_ddt, tensor_ddt, size, inc, no_elems, indexes)

dd_type *new_ddt, tensor_ddt;
int     size, inc, no_elems;
int     *indexes;

{ dd_type ddt;

  if (no_elems <= 0)

   { *new_ddt = NO_DDT;
     return;
   }

  ddt = (dd_type) dalib_malloc (sizeof (ddt_tensor), "ddt_def_tensor");

  /* optimizations are possible for inc = 1, tensor_ddt is section */

  ddt->kind              = DDT_TENSOR;
  ddt->head.ref_count    = 0;
  ddt->tensor.tensor_ddt = tensor_ddt;
  ddt->tensor.size       = size;
  ddt->tensor.address    = (char *) 0;
  ddt->tensor.increment  = inc;
  ddt->tensor.no_elems   = no_elems;
  ddt->tensor.indexes    = indexes;

  dalib_ddt_find_total (ddt);

#ifdef DEBUG
  dalib_ddt_print (ddt);
#endif

  *new_ddt = ddt;

} /* dalib_ddt_def_tensor */

/*******************************************************************
*                                                                  *
*   union of two ddts                                              *
*                                                                  *
*******************************************************************/

void dalib_ddt_def_combine (ddt, ddt1, ddt2)
dd_type *ddt, ddt1, ddt2;

{ 
  if (ddt1 == NO_DDT)
   { *ddt = ddt2;
     return;
   }

  if (ddt2 == NO_DDT)
   { *ddt = ddt1;
     return;
   }

  dalib_internal_error ("ddt_combine not possible");
}

/*******************************************************************
*                                                                  *
*   updating of internal information of a derived data type        *
*                                                                  *
*******************************************************************/

void dalib_ddt_setref (ddt)
dd_type ddt;

{ /* ddt will be used internally for non-blocking receive, 
     so give it only free after second explicit call for free 
  */

  ddt->head.ref_count += 1;
}

void dalib_ddt_set_data (ddt, data, size)
dd_type ddt;
char *data;
int size;

{ if (ddt == NO_DDT)
     return;

  switch (ddt->kind) {

  case DDT_INDEXED  : { ddt->indexed.address = data;
                        ddt->indexed.size    = size;
                        break;
                      }
  case DDT_SECTION  : { ddt->section.address = data;
                        ddt->section.size    = size;
                        break;
                      }
  case DDT_TENSOR   : { ddt->tensor.address = data;
                        ddt->tensor.size    = size;
                        break;
                      }
  case DDT_COMBINED : { dalib_ddt_set_data (ddt->combined.first, data, size);
                        dalib_ddt_set_data (ddt->combined.second, data, size);
                        break;
                      }
  } /* switch */

  dalib_ddt_find_total (ddt);

} /* dalib_ddt_set_data */

/*******************************************************************
*                                                                  *
*   asking for properties (total bytes, is_contiguous)             *
*                                                                  *
*******************************************************************/

void dalib_ddt_get_size (ddt, size)
dd_type ddt;
int *size;
{ if (ddt == NO_DDT)
      *size = 0;
    else
      *size = ddt->head.nbytes;
} /* dalib_ddt_get_size */

/*******************************************************************
*                                                                  *
*  void dalib_ddt_is_contiguous (dd_type ddt => int is,            *
*                                                                  *
*******************************************************************/

void dalib_ddt_is_contiguous (ddt, is, address)
dd_type ddt;
int *is;
char **address;

{ if (ddt == NO_DDT)
    { *is = 1;
      *address = (char *) 0;
      return;
    }

  switch (ddt->kind) {

  case DDT_SECTION  : { *is = dalib_ddt_dim_cont (ddt->section.dim,
                                                  ddt->section.rank);
                        *address = ddt->section.address;
                        break;
                      }
  case DDT_INDEXED  : { *is = 0;
                        *address = ddt->indexed.address;
                        break;
                      }
  case DDT_TENSOR   : { *is = 0;
                        *address = ddt->tensor.address;
                        break;
                      }
  default : { dalib_internal_error ("ddt_is_contiguous");
              dalib_stop ();
            }

  } /* switch */

}
     /******************************************************
     *                                                     *
     *  free derived data types                            *
     *                                                     *
     ******************************************************/

void dalib_ddt_free (ddt)
dd_type ddt;

{ if (ddt == NO_DDT) return;

  if (ddt->head.ref_count > 0)    /* free if no other pointers set */

     { ddt->head.ref_count --;
       return;
     }

  switch (ddt->kind) { 

    case DDT_SECTION : dalib_free (ddt, sizeof(ddt_section));
                       break;
    case DDT_INDEXED : dalib_free (ddt, sizeof(ddt_indexed));
                       break;
    case DDT_TENSOR  : dalib_ddt_free (ddt->tensor.tensor_ddt);
                       dalib_free (ddt, sizeof(ddt_tensor));
                       break;
    case DDT_COMBINED: dalib_ddt_free (ddt->combined.first);
                       dalib_ddt_free (ddt->combined.second);
                       dalib_free (ddt, sizeof(ddt_combined));
                       break;
    default          : dalib_internal_error ("illegal ddt in free");
                       dalib_stop ();
                   
   } /* switch */

} /* dalib_ddt_free */

/*******************************************************************
*                                                                  *
*   unpacking of data described with a derived data type           *
*                                                                  *
*   - note that buffer must have the corresponding size            *
*                                                                  *
*******************************************************************/

void dalib_ddt_unpack ();   /* recursive definition */

     /******************************************************
     *                                                     *
     *  unpacking of a contiguous section with ddt         *
     *                                                     *
     ******************************************************/

static void dalib_ddt_unpack_section (ddt, buffer)
char    *buffer;
dd_type ddt;

{ int i, rank;
  char *target;
  int objsize;
  ddt_dim my_dim [MAX_DIMENSIONS];

  if (ddt->kind != DDT_SECTION)
     dalib_internal_error ("ddt_unpack_section", "not a section");

  rank    = ddt->section.rank;
  objsize = ddt->section.size;
  target  = ddt->section.address;
  for (i=0; i<rank; i++)
      my_dim[i] = ddt->section.dim[i];

#ifdef DEBUG
  printf ("%d: unpack section, rank = %d, objsize = %d\n",
          pcb.i, rank, objsize);
#endif

  for (i=0; i<rank; i++) my_dim[i].increment *= objsize;

  switch (rank) {

  case 0 :  dalib_memcopy (target, buffer, objsize);
            break;

  case 1 :  dalib_scatter1d (target, buffer, objsize, 
                             my_dim[0].increment, my_dim[0].repeat);
            break;

  case 2 :  dalib_scatter2d (target, buffer, objsize, 
                             my_dim[0].increment, my_dim[0].repeat,
                             my_dim[1].increment, my_dim[1].repeat);
            break;

  case 3 :  dalib_scatter3d (target, buffer, objsize, 
                             my_dim[0].increment, my_dim[0].repeat,
                             my_dim[1].increment, my_dim[1].repeat,
                             my_dim[2].increment, my_dim[2].repeat);
            break;
  case 4 :  dalib_scatter4d (target, buffer, objsize, 
                             my_dim[0].increment, my_dim[0].repeat,
                             my_dim[1].increment, my_dim[1].repeat,
                             my_dim[2].increment, my_dim[2].repeat,
                             my_dim[3].increment, my_dim[3].repeat);
            break;
  case 5 :  dalib_scatter5d (target, buffer, objsize, 
                             my_dim[0].increment, my_dim[0].repeat,
                             my_dim[1].increment, my_dim[1].repeat,
                             my_dim[2].increment, my_dim[2].repeat,
                             my_dim[3].increment, my_dim[3].repeat,
                             my_dim[4].increment, my_dim[4].repeat);
            break;
  case 6 :  dalib_scatter6d (target, buffer, objsize, 
                             my_dim[0].increment, my_dim[0].repeat,
                             my_dim[1].increment, my_dim[1].repeat,
                             my_dim[2].increment, my_dim[2].repeat,
                             my_dim[3].increment, my_dim[3].repeat,
                             my_dim[4].increment, my_dim[4].repeat,
                             my_dim[5].increment, my_dim[5].repeat);
            break;
  case 7 :  dalib_scatter7d (target, buffer, objsize, 
                             my_dim[0].increment, my_dim[0].repeat,
                             my_dim[1].increment, my_dim[1].repeat,
                             my_dim[2].increment, my_dim[2].repeat,
                             my_dim[3].increment, my_dim[3].repeat,
                             my_dim[4].increment, my_dim[4].repeat,
                             my_dim[5].increment, my_dim[5].repeat,
                             my_dim[6].increment, my_dim[6].repeat);
            break;

  default : dalib_internal_error ("unpack buffer, illegal rank");
            break;

  } /* switch */

} /* dalib_ddt_unpack_section */

static void dalib_ddt_unpack_indexed (ddt, buffer, op)
char    *buffer;
dd_type ddt;

{ char *target;
  int objsize;
  int size1;
  int no_elems;
  int *indexes;
  int N;

  if (ddt->kind != DDT_INDEXED)
     dalib_internal_error ("ddt_unpack_indexed", "not indexed");

  objsize  = ddt->indexed.size;
  target   = ddt->indexed.address;
  indexes  = ddt->indexed.indexes;
  no_elems = ddt->indexed.no_elems;

  if (op == 0)

     dalib_memset (target, indexes, buffer, no_elems, objsize);

   else if ((size1 = dalib_get_reduction_size (op)) == objsize)

     dalib_set (op, target, indexes, buffer, no_elems);

   else if ((N=objsize/size1) * size1 == objsize)

     dalib_setn (op, target, indexes, buffer, no_elems, N);

  else

     { dalib_internal_error ("ddt_unpack_indexed : size mismatch");
       printf ("objsize = %d, size for op = %d\n", objsize, size1);
       dalib_stop ();
     }

} /* dalib_ddt_unpack_indexed */

void dalib_ddt_unpack_tensor (ddt, buffer, op)

char    *buffer;
dd_type ddt;
int     op;

{ int i, no_elems;

  int  objsize;
  int  inc;

  char *base_addr, *address;

  dd_type tensor_ddt;

  char *ptr;
  int  *indexes;
 
  base_addr  = ddt->tensor.address;
  objsize    = ddt->tensor.size;
  no_elems   = ddt->tensor.no_elems;
  inc        = ddt->tensor.increment;
  tensor_ddt = ddt->tensor.tensor_ddt;
  indexes    = ddt->tensor.indexes;


  ptr = buffer;

  for (i=0; i<no_elems; i++)

    { address = base_addr + indexes[i] * inc * objsize;

      dalib_ddt_set_data (tensor_ddt, address, objsize);

      dalib_ddt_unpack (tensor_ddt, ptr, op);

      ptr +=  tensor_ddt->head.nbytes;

    }

} /* dalib_ddt_unpack_tensor */

void dalib_ddt_unpack (ddt, buffer, op)
int     op;
char    *buffer;
dd_type ddt;

{ if (ddt == NO_DDT)
     return;

  switch (ddt->kind) {

  case DDT_SECTION  : { if (op == 0)
                           dalib_ddt_unpack_section (ddt, buffer);
                         else
                           dalib_internal_error ("unpack with op for section");
                        break;
                      }

  case DDT_INDEXED  : { dalib_ddt_unpack_indexed (ddt, buffer, op);
                        break;
                      }

  case DDT_TENSOR   : { dalib_ddt_unpack_tensor (ddt, buffer, op);
                        break;
                      }

  default          :  { dalib_internal_error ("ddt_unpack");
                        dalib_stop ();
                        break;
                      }
  } /* switch */

} /* dalib_ddt_unpack */

/*******************************************************************
*                                                                  *
*   packing of data described with a derived data type             *
*                                                                  *
*   - note that buffer must have the corresponding size            *
*                                                                  *
*******************************************************************/

void dalib_ddt_pack ();

     /******************************************************
     *                                                     *
     *  packing of a ddt into a contiguous memory section  *
     *                                                     *
     *  - buffer is already created and has enough place   *
     *                                                     *
     ******************************************************/

void dalib_ddt_pack_section (buffer, ddt)
char    *buffer;
dd_type ddt;

{ int i, rank;
  char *target;
  int objsize;

  ddt_dim my_dim[MAX_DIMENSIONS];

  if (ddt->kind != DDT_SECTION)
     dalib_internal_error ("ddt_unpack_section", "not a section");

  rank    = ddt->section.rank;
  objsize = ddt->section.size;
  target  = ddt->section.address;
  for (i=0; i<rank; i++) my_dim[i] = ddt->section.dim[i];

#ifdef DEBUG
  printf ("%d: pack section, rank = %d, objsize = %d\n",
          pcb.i, rank, objsize);
#endif

  for (i=0; i<rank; i++) my_dim[i].increment *= objsize;

  switch (rank) {

  case 0 :  dalib_memcopy (buffer, target, objsize);
            break;

  case 1 :  dalib_gather1d (buffer, target, objsize, 
                            my_dim[0].increment, my_dim[0].repeat);
            break;

  case 2 :  dalib_gather2d (buffer, target, objsize, 
                            my_dim[0].increment, my_dim[0].repeat,
                            my_dim[1].increment, my_dim[1].repeat);
            break;

  case 3 :  dalib_gather3d (buffer, target, objsize, 
                            my_dim[0].increment, my_dim[0].repeat,
                            my_dim[1].increment, my_dim[1].repeat,
                            my_dim[2].increment, my_dim[2].repeat);
            break;
  case 4 :  dalib_gather4d (buffer, target, objsize, 
                            my_dim[0].increment, my_dim[0].repeat,
                            my_dim[1].increment, my_dim[1].repeat,
                            my_dim[2].increment, my_dim[2].repeat,
                            my_dim[3].increment, my_dim[3].repeat);
            break;
  case 5 :  dalib_gather5d (buffer, target, objsize, 
                            my_dim[0].increment, my_dim[0].repeat,
                            my_dim[1].increment, my_dim[1].repeat,
                            my_dim[2].increment, my_dim[2].repeat,
                            my_dim[3].increment, my_dim[3].repeat,
                            my_dim[4].increment, my_dim[4].repeat);
            break;
  case 6 :  dalib_gather6d (buffer, target, objsize, 
                            my_dim[0].increment, my_dim[0].repeat,
                            my_dim[1].increment, my_dim[1].repeat,
                            my_dim[2].increment, my_dim[2].repeat,
                            my_dim[3].increment, my_dim[3].repeat,
                            my_dim[4].increment, my_dim[4].repeat,
                            my_dim[5].increment, my_dim[5].repeat);
            break;
  case 7 :  dalib_gather7d (buffer, target, objsize, 
                            my_dim[0].increment, my_dim[0].repeat,
                            my_dim[1].increment, my_dim[1].repeat,
                            my_dim[2].increment, my_dim[2].repeat,
                            my_dim[3].increment, my_dim[3].repeat,
                            my_dim[4].increment, my_dim[4].repeat,
                            my_dim[5].increment, my_dim[5].repeat,
                            my_dim[6].increment, my_dim[6].repeat);
            break;

  default : dalib_internal_error ("unpack buffer, illegal rank");
            break;

  } /* switch */

} /* dalib_ddt_pack_section */

void dalib_ddt_pack_indexed (buffer, ddt)

char    *buffer;
dd_type ddt;

{ char *target;
  int objsize;
  int no_elems;
  int *indexes;

  if (ddt->kind != DDT_INDEXED)
     dalib_internal_error ("ddt_pack_indexed", "not indexed");

  objsize  = ddt->indexed.size;
  target   = ddt->indexed.address;
  indexes  = ddt->indexed.indexes;
  no_elems = ddt->indexed.no_elems;

  dalib_memget (buffer, target, indexes, no_elems, objsize);

} /* dalib_ddt_pack_indexed */

/*******************************************************************
*                                                                  *
*   void dalib_ddt_pack_tensor (char *buffer, ddt_type ddt)        *
*                                                                  *
*******************************************************************/

void dalib_print_buffer (buffer, N)

int *buffer;
int N;

{ int i;

  printf ("%d: content of buffer = ", pcb.i);
  for (i=0; i<N; i++) printf ("%d ", buffer[i]);
  printf ("\n");

} 

void dalib_ddt_pack_tensor (buffer, ddt)

char    *buffer;
dd_type ddt;

{ int i, no_elems;

  int  objsize;
  int  inc;

  char *base_addr, *address;

  dd_type tensor_ddt;

  char *ptr;
  int  *indexes;
  
  base_addr  = ddt->tensor.address;
  objsize    = ddt->tensor.size;
  no_elems   = ddt->tensor.no_elems;
  inc        = ddt->tensor.increment;
  tensor_ddt = ddt->tensor.tensor_ddt;
  indexes    = ddt->tensor.indexes;

  ptr = buffer;

  for (i=0; i<no_elems; i++)

    { address = base_addr + indexes[i] * inc * objsize;

      dalib_ddt_set_data (tensor_ddt, address, objsize);

      dalib_ddt_pack (ptr, tensor_ddt);

      ptr += tensor_ddt->head.nbytes;

    }

#ifdef DEBUG
  dalib_print_buffer (buffer, no_elems);
#endif

} /* dalib_ddt_pack_tensor */

void dalib_ddt_pack (buffer, ddt)
char    *buffer;
dd_type ddt;

{ if (ddt == NO_DDT)
     return;

  switch (ddt->kind) {
  
  case DDT_SECTION  : { dalib_ddt_pack_section (buffer, ddt);
                        break;
                      }
  case DDT_INDEXED  : { dalib_ddt_pack_indexed (buffer, ddt);
                        break;
                      }
  case DDT_TENSOR   : { dalib_ddt_pack_tensor (buffer, ddt);
                        break;
                      }
  case DDT_COMBINED : { dalib_internal_error ("no pack for combined ddt");
                        break;
                      }

  } /* switch */

}

/*******************************************************************
*                                                                  *
*   moving data between two derived data types                     *
*                                                                  *
*******************************************************************/

void dalib_ddt_move (ddt_target, ddt_source)
dd_type ddt_source, ddt_target;

{ int size, size1;
  char *mem;

  dalib_ddt_get_size (ddt_target, &size);
  dalib_ddt_get_size (ddt_source, &size1);

  if (size != size1)
    { dalib_internal_error ("ddt_move: size mismatch");
      printf ("source has size : %d, target has size : %d\n", size1, size);
      dalib_stop ();
    }

  if (size == 0) return;

  /* here are more efficient solutions possible */

  mem = (char *) dalib_malloc (size, "dalib_ddt_move");
  dalib_ddt_pack (mem, ddt_source);
  dalib_ddt_unpack (ddt_target, mem, 0);
  dalib_free (mem, size);

} /* dalib_ddt_move */
