/***************************************************************************
 *                                                                         *
 *  Author      : James Cownie Meiko  / Thomas Brandes GMD                 *
 *  Copyright   : Public Domain                                            *
 *  Date        : April 93                                                 *
 *  Last Update : October 93                                               *
 *                                                                         *
 *  This Module is part of the DALIB                                       *
 *                                                                         *
 *  Module      : memgather.c                                              *
 *                                                                         *
 *  Function    : Gathering of strided arrays to contiguous memory section *
 *                                                                         *
 *  Export :    ONLY INTERNAL USE IN DALIB                                 *
 *                                                                         *
 *   void dalib_gather1d (target, source, size, stride1, n1)               *
 *   void dalib_gather2d (target, source, size, stride1, n1, stride2, n2)  *
 *                                                                         *
 *   ....                                                                  *
 *                                                                         *
 *   unsigned char *target, *source;                                       *
 *   int size, stride1, n1, ...., stridek, nk                              *
 *                                                                         *
 **************************************************************************/

#undef DEBUG

#include <stdio.h>
#include "dalib.h"

/**************************************************************************
*                                                                         *
*     GGGGG       A    TTTTTTT  H     H    EEEEEE   RRRR                  *
*    G           A A      T     H     H    E       R    R                 *
*    G           A A      T     H     H    E       R    R                 *
*    G   GGG    A   A     T     HHHHHHH    EEEEEE  RRRRR                  *
*    G     G    AAAAA     T     H     H    E       R  R                   *
*    G     G    A   A     T     H     H    E       R   R                  *
*     GGGGGG    A   A     T     H     H    EEEEEE  R    R                 *
*                                                                         *
**************************************************************************/

/**************************************************************************
*                                                                         *
*  dalib_gather1d (target, source, size, stride1, n1)                     *
*                                                                         *
*    -> dalib_gather1d8                                                   *
*    -> dalib_gather1d4                                                   *
*    -> dalib_gather1d1                                                   *
*                                                                         *
**************************************************************************/

void dalib_gather1d (target, source, size, stride1, n1)
unsigned char * target, * source;
int size, stride1, n1;
 
{   int code = ((int) target) | ((int) source) | size | stride1 ;

#ifdef DEBUG
  printf (
   "%d: dalib_gather1d, %d <- %d, size = %d, inc/n : %d/%d\n",
        pcb.i, target, source, size, stride1, n1);
#endif

    if ((code & 0x07) == 0)

    {  /* Optimise cases which are multiples of doubles, double aligned */

       dalib_gather1d8 (target, source, size >> 3, stride1 >> 3, n1);
    }

    else if ((code & 0x03) == 0)

    {  /* Optimise cases which are multiples of ints, int aligned */

       dalib_gather1d4 (target, source, size >> 2, stride1 >> 2, n1);
    }

    else

       dalib_gather1d1 (target, source, size, stride1, n1);

} /* dalib_gather1d */

/**************************************************************************
*                                                                         *
*  dalib_gather2d (target, source, size, stride1, n1, stride2, n2)        *
*                                                                         *
*    -> dalib_gather2d8                                                   *
*    -> dalib_gather2d4                                                   *
*    -> dalib_gather2d1                                                   *
*                                                                         *
**************************************************************************/

void dalib_gather2d (target, source, size, stride1, n1, stride2, n2)
unsigned char *target, *source;
int size, stride1, n1, stride2, n2;

{   int code;

#ifdef DEBUG
  printf (
   "%d: dalib_gather2d, %d <- %d, size = %d, inc/n : %d/%d %d/%d\n",
        pcb.i, target, source, size, stride1, n1, stride2, n2);
#endif

    if (stride1*n1 == stride2)
       { dalib_gather1d (target, source, size, stride1, n1*n2);
         return;
       }

    code = ((int) target) | ((int) source) | size | stride1 | stride2 ;

    if ((code & 0x07) == 0)

    {  /* Optimise cases which are multiples of doubles, double aligned */

       dalib_gather2d8 (target, source, size >> 3, stride1 >> 3, n1,
                                                   stride2 >> 3, n2);
    }

    else if ((code & 0x03) == 0)

    {  /* Optimise cases which are multiples of ints, int aligned */

       dalib_gather2d4 (target, source, size >> 2, stride1 >> 2, n1,
                                                   stride2 >> 2, n2);
    }

    else

       dalib_gather2d1 (target, source, size, stride1, n1, stride2, n2);

} /* dalib_gather2d */

/**************************************************************************
*                                                                         *
*  dalib_gather3d (target, source, size, stride1, n1, ..., stride3, n3)   *
*                                                                         *
*    -> dalib_gather3d8                                                   *
*    -> dalib_gather3d4                                                   *
*    -> dalib_gather3d1                                                   *
*                                                                         *
**************************************************************************/

void dalib_gather3d (target, source, size, stride1, n1, stride2, n2,
                                           stride3, n3              )
unsigned char *target, *source;
int size, stride1, n1, stride2, n2, stride3, n3;

{   int code;

#ifdef DEBUG
  printf (
   "%d: dalib_gather3d, %d <- %d, size = %d, inc/n : %d/%d %d/%d %d/%d\n",
        pcb.i, target, source, size, stride1, n1,
        stride2, n2, stride3, n3);
#endif

    if (stride2*n2 == stride3)
       { dalib_gather2d (target, source, size, stride1, n1, stride2, n2*n3);
         return;
       }

    if (stride1*n1 == stride2)
       { dalib_gather2d (target, source, size, stride1, n1*n2, stride3, n3);
         return;
       }

    code = ((int) target) | ((int) source) | size 
           | stride1 | stride2 | stride3;

    if ((code & 0x07) == 0)

    {  /* Optimise cases which are multiples of doubles, double aligned */

       dalib_gather3d8 (target, source, size >> 3, stride1 >> 3, n1,
                                 stride2 >> 3, n2, stride3 >> 3, n3);
    }

    else if ((code & 0x03) == 0)

    {  /* Optimise cases which are multiples of ints, int aligned */

       dalib_gather3d4 (target, source, size >> 2, stride1 >> 2, n1,
                                 stride2 >> 2, n2, stride3 >> 2, n3);
    }

    else

       dalib_gather3d1 (target, source, size, stride1, n1, stride2, n2,
                        stride3, n3);

} /* dalib_gather3d */

/**************************************************************************
*                                                                         *
*  dalib_gather4d (target, source, size, stride1, n1, ..., stride4, n4)   *
*                                                                         *
*    -> dalib_gather4d8                                                   *
*    -> dalib_gather4d4                                                   *
*    -> dalib_gather4d1                                                   *
*                                                                         *
**************************************************************************/

void dalib_gather4d (target, source, size, stride1, n1, stride2, n2,
                                           stride3, n3, stride4, n4 )
unsigned char *target, *source;
int size, stride1, n1, stride2, n2, stride3, n3, stride4, n4;

{   int code;

#ifdef DEBUG
  printf (
   "%d: dalib_gather4d, %d <- %d, size = %d, inc/n : %d/%d %d/%d %d/%d %d/%d\n",
        pcb.i, target, source, size, stride1, n1, 
        stride2, n2, stride3, n3, stride4, n4);
#endif

    if (stride3*n3 == stride4)
       { dalib_gather3d (target, source, size, stride1, n1, stride2, n2,
                          stride3, n3*n4);
         return;
       }

    if (stride2*n2 == stride3)
       { dalib_gather3d (target, source, size, stride1, n1, stride2, n2*n3,
                         stride4, n4);
         return;
       }

    if (stride1*n1 == stride2)
       { dalib_gather3d (target, source, size, stride1, n1*n2, stride3, n3,
                         stride4, n4);
         return;
       }

    code = ((int) target) | ((int) source) | size 
           | stride1 | stride2 | stride3 | stride4 ;

    if ((code & 0x07) == 0)

    {  /* Optimise cases which are multiples of doubles, double aligned */

       dalib_gather4d8 (target, source, size >> 3, stride1 >> 3, n1,
                        stride2 >> 3, n2, stride3 >> 3, n3, stride4 >> 3, n4);
    }

    else if ((code & 0x03) == 0)

    {  /* Optimise cases which are multiples of ints, int aligned */

       dalib_gather4d4 (target, source, size >> 2, stride1 >> 2, n1,
                        stride2 >> 2, n2, stride3 >> 2, n3, stride4 >> 2, n4);
    }

    else

       dalib_gather4d1 (target, source, size, stride1, n1, stride2, n2,
                        stride3, n3, stride4, n4);

} /* dalib_gather4d */

/**************************************************************************
*                                                                         *
*  dalib_gather5d (target, source, size, stride1, n1, ..., stride5, n5)   *
*                                                                         *
*    -> dalib_gather5d8                                                   *
*    -> dalib_gather5d4                                                   *
*    -> dalib_gather5d1                                                   *
*                                                                         *
**************************************************************************/

void dalib_gather5d (target, source, size, stride1, n1, stride2, n2,
                              stride3, n3, stride4, n4, stride5, n5 )
unsigned char *target, *source;
int size, stride1, n1, stride2, n2, stride3, n3, stride4, n4, stride5, n5;

{   int code;

#ifdef DEBUG
  printf (
   "%d: dalib_gather5d, %d <- %d, size = %d, inc/n : %d/%d %d/%d %d/%d %d/%d %d/%d\n",
        pcb.i, target, source, size, stride1, n1, 
        stride2, n2, stride3, n3, stride4, n4, stride5, n5);
#endif

    if (stride4*n4 == stride5)
       { dalib_gather4d (target, source, size, stride1, n1, stride2, n2,
                          stride3, n3, stride4, n4*n5);
         return;
       }

    if (stride3*n3 == stride4)
       { dalib_gather4d (target, source, size, stride1, n1, stride2, n2,
                          stride3, n3*n4, stride5, n5);
         return;
       }

    if (stride2*n2 == stride3)
       { dalib_gather4d (target, source, size, stride1, n1, stride2, n2*n3,
                         stride4, n4, stride5, n5);
         return;
       }

    if (stride1*n1 == stride2)
       { dalib_gather4d (target, source, size, stride1, n1*n2, stride3, n3,
                         stride4, n4, stride5, n5);
         return;
       }

    code = ((int) target) | ((int) source) | size 
           | stride1 | stride2 | stride3 | stride4 | stride5 ;

    if ((code & 0x07) == 0)

    {  /* Optimise cases which are multiples of doubles, double aligned */

       dalib_gather5d8 (target, source, size >> 3, stride1 >> 3, n1,
                        stride2 >> 3, n2, stride3 >> 3, n3, stride4 >> 3, n4,
                        stride5 >> 3, n5);
    }

    else if ((code & 0x03) == 0)

    {  /* Optimise cases which are multiples of ints, int aligned */

       dalib_gather5d4 (target, source, size >> 2, stride1 >> 2, n1,
                        stride2 >> 2, n2, stride3 >> 2, n3, stride4 >> 2, n4,
                        stride5 >> 2, n5);
    }

    else

       dalib_gather5d1 (target, source, size, stride1, n1, stride2, n2,
                        stride3, n3, stride4, n4, stride5, n5);

} /* dalib_gather5d */

/**************************************************************************
*                                                                         *
*  dalib_gather6d (target, source, size, stride1, n1, ..., stride6, n6)   *
*                                                                         *
*    -> dalib_gather6d8                                                   *
*    -> dalib_gather6d4                                                   *
*    -> dalib_gather6d1                                                   *
*                                                                         *
**************************************************************************/

void dalib_gather6d (target, source, size, stride1, n1, stride2, n2,
                              stride3, n3, stride4, n4, stride5, n5, 
                                                        stride6, n6 )

unsigned char *target, *source;
int size, stride1, n1, stride2, n2, stride3, n3, stride4, n4, 
          stride5, n5, stride6, n6;

{   int code;

#ifdef DEBUG
  printf (
   "%d: dalib_gather6d, %d <- %d, size = %d, inc/n : %d/%d %d/%d %d/%d %d/%d %d/%d %d/%d\n",
        pcb.i, target, source, size, stride1, n1, 
        stride2, n2, stride3, n3, stride4, n4, stride5, n5, stride6, n6);
#endif

    if (stride5*n5 == stride6)
       { dalib_gather5d (target, source, size, stride1, n1, stride2, n2,
                          stride3, n3, stride4, n4, stride5, n5*n6);
         return;
       }

    if (stride4*n4 == stride5)
       { dalib_gather5d (target, source, size, stride1, n1, stride2, n2,
                          stride3, n3, stride4, n4*n5, stride6, n6);
         return;
       }

    if (stride3*n3 == stride4)
       { dalib_gather5d (target, source, size, stride1, n1, stride2, n2,
                          stride3, n3*n4, stride5, n5, stride6, n6);
         return;
       }

    if (stride2*n2 == stride3)
       { dalib_gather5d (target, source, size, stride1, n1, stride2, n2*n3,
                         stride4, n4, stride5, n5, stride6, n6);
         return;
       }

    if (stride1*n1 == stride2)
       { dalib_gather5d (target, source, size, stride1, n1*n2, stride3, n3,
                         stride4, n4, stride5, n5, stride6, n6);
         return;
       }

    code = ((int) target) | ((int) source) | size 
           | stride1 | stride2 | stride3 | stride4 | stride5 | stride6 ;

    if ((code & 0x07) == 0)

    {  /* Optimise cases which are multiples of doubles, double aligned */

       dalib_gather6d8 (target, source, size >> 3, stride1 >> 3, n1,
                        stride2 >> 3, n2, stride3 >> 3, n3, stride4 >> 3, n4,
                        stride5 >> 3, n5, stride6 >> 3, n6);
    }

    else if ((code & 0x03) == 0)

    {  /* Optimise cases which are multiples of ints, int aligned */

       dalib_gather6d4 (target, source, size >> 2, stride1 >> 2, n1,
                        stride2 >> 2, n2, stride3 >> 2, n3, stride4 >> 2, n4,
                        stride5 >> 2, n5, stride6 >> 2, n6);
    }

    else

       dalib_gather6d1 (target, source, size, stride1, n1, stride2, n2,
                        stride3, n3, stride4, n4, stride5, n5, stride6, n6);

} /* dalib_gather6d */

/**************************************************************************
*                                                                         *
*  dalib_gather7d (target, source, size, stride1, n1, ..., stride7, n7)   *
*                                                                         *
*    -> dalib_gather7d8                                                   *
*    -> dalib_gather7d4                                                   *
*    -> dalib_gather7d1                                                   *
*                                                                         *
**************************************************************************/

void dalib_gather7d (target, source, size, stride1, n1, stride2, n2,
                              stride3, n3, stride4, n4, stride5, n5, 
                                           stride6, n6, stride7, n7 )

unsigned char *target, *source;
int size, stride1, n1, stride2, n2, stride3, n3, stride4, n4, 
          stride5, n5, stride6, n6, stride7, n7;

{   int code;

#ifdef DEBUG
  printf (
   "%d: dalib_gather7d, %d <- %d, size = %d, inc/n : %d/%d %d/%d %d/%d %d/%d %d/%d %d/%d %d/%d\n",
        pcb.i, target, source, size, stride1, n1, stride2, n2, stride3, n3, 
                        stride4, n4, stride5, n5, stride6, n6, stride7, n7);
#endif

    if (stride6*n6 == stride7)
       { dalib_gather6d (target, source, size, stride1, n1, stride2, n2,
                  stride3, n3, stride4, n4, stride5, n5, stride6, n6*n7);
         return;
       }

    if (stride5*n5 == stride6)
       { dalib_gather6d (target, source, size, stride1, n1, stride2, n2,
                  stride3, n3, stride4, n4, stride5, n5*n6, stride7, n7);
         return;
       }

    if (stride4*n4 == stride5)
       { dalib_gather6d (target, source, size, stride1, n1, stride2, n2,
                  stride3, n3, stride4, n4*n5, stride6, n6, stride7, n7);
         return;
       }

    if (stride3*n3 == stride4)
       { dalib_gather6d (target, source, size, stride1, n1, stride2, n2,
                  stride3, n3*n4, stride5, n5, stride6, n6, stride7, n7);
         return;
       }

    if (stride2*n2 == stride3)
       { dalib_gather6d (target, source, size, stride1, n1, stride2, n2*n3,
                        stride4, n4, stride5, n5, stride6, n6, stride7, n7);
         return;
       }

    if (stride1*n1 == stride2)
       { dalib_gather6d (target, source, size, stride1, n1*n2, stride3, n3,
                        stride4, n4, stride5, n5, stride6, n6, stride7, n7);
         return;
       }

    code = ((int) target) | ((int) source) | size | stride1 | stride2 
                    | stride3 | stride4 | stride5 | stride6 | stride7 ;

    if ((code & 0x07) == 0)

    {  /* Optimise cases which are multiples of doubles, double aligned */

       dalib_gather7d8 (target, source, size >> 3, stride1 >> 3, n1,
                        stride2 >> 3, n2, stride3 >> 3, n3, stride4 >> 3, n4,
                        stride5 >> 3, n5, stride6 >> 3, n6, stride7 >> 3, n7);
    }

    else if ((code & 0x03) == 0)

    {  /* Optimise cases which are multiples of ints, int aligned */

       dalib_gather7d4 (target, source, size >> 2, stride1 >> 2, n1,
                        stride2 >> 2, n2, stride3 >> 2, n3, stride4 >> 2, n4,
                        stride5 >> 2, n5, stride6 >> 2, n6, stride7 >> 2, n7);
    }

    else

       dalib_gather7d1 (target, source, size, stride1, n1, stride2, n2,
                        stride3, n3, stride4, n4, stride5, n5, 
                        stride6, n6, stride7, n7);

} /* dalib_gather7d */
