/*  erewprefix.c   by C.W.Kessler 12/95
 *  general integer multiprefix-ADD implementation in Fork95
 *  for EREW PRAM (no mpadd() available, e.g. for floats)
 *  for n>=p, p|n, uses Ladner/Fisher-Algorithm as a subroutine,
 *  takes O((n/p) log p) time on p-processor SB-PRAM without using mpadd()
 *
 *  This is not work-optimal but no faster EREW algorithm is known.
 */

#include <fork.h>
#include <syscall.h>
#include <io.h>

extern sync void output_array( sh int*, sh int );
extern sync int par_prefix_add( sh int*, sh int *, sh int );
extern sync int parallel_prefix_add( sh int*, sh int, sh int *, sh int );

sh int *a, *b;
sh int n = 100;


void main( void ) {
 pr int i;
 start {
   a = (int *) shalloc(n);
   b = (int *) shalloc(n);
   /* preset the input array: */
   seq  prS("\nSource Array:\n");
   farm for (i=$; i<n; i+= __STARTED_PROCS__)  a[i] = 1; 
   /* initialize the output array: */
   output_array( a, n );                  /*print the original array*/
   parallel_prefix_add( a, n, b, 0 ); 
   seq  prS("\nParallel Prefix Array:\n");
   output_array( b, n );                  /*print the resulting array*/
   seq  printAccStat();
 }
}


sync int par_prefix_add(    /* Ladner/Fisher-Algorithm for p==n */
  sh int *in,     /* operand array, length p */
  sh int *out,    /* result array, length p */
  sh int initsum) /* global offset on parallel prefix computation */
{
  sh int p = groupsize();
  sh int d;
  sh int sum;

  out[$] = in[$];
  for (d=1; d<p; d=d<<1) 
    if ($>=d)
       out[$] += out[$-d];
  out[$] += initsum;
  sum = out[p-1];
  out[$] -= in[$];    /* correction: prefix a[0..$-1], excluding a[$] */
  return sum;
}


sync int parallel_prefix_add(
  sh int *in,     /* operand array, length n */
  sh int n,       /* problem size */
  sh int *out,    /* result array, length n */
  sh int initsum) /* global offset on parallel prefix computation */
{
  sh int p = groupsize();
  sh int k;

  for (k=0; k<n; k+=p )      /*shared loop over slices. Assume p|n*/
     initsum = par_prefix_add( in + k, out + k, initsum );
  return initsum;
}
 

sync void output_array (
  sh int *arr,    /* the array to print out */
  sh int n )      /* length of arr */
{
  pr int i;
  seq {
    for (i=0; i<n; i++)
       { prI( arr[i], 0 ); write(1,"  ",2); }
    write(1,"\n\n",2);
  }
}
