/*  erewprefix.c   by C.W.Kessler 12/95
 *
 *  general integer multiprefix-ADD implementation in Fork95
 *
 *  for EREW PRAM (no mpadd() available, e.g. for floats)
 *
 *  takes O((n/p) log n) time on p-processor SB-PRAM without using mpadd()
 *
 *  This is not optimal but no faster EREW algorithm is known.
 */

#include <fork.h>
#include <syscall.h>
#include <io.h>

extern sync void output_array( sh int*, sh int );
extern sync void parallel_prefix_add( sh int*, sh int, sh int *, sh int );

sh int *a, *b;
sh int n = 100;


main() {
 pr int i;
 start {
   a = (int *) shalloc(n);
   b = (int *) shalloc(n);
   /* preset the input array: */
   seq  prS("\nSource Array:\n");
   farm for (i=$; i<n; i+= __STARTED_PROCS__)  a[i] = 1; 
   /* initialize the output array: */
   output_array( a, n );                  /*print the original array*/
   parallel_prefix_add( a, n, b, 0 ); 
   seq  prS("\nParallel Prefix Array:\n");
   output_array( b, n );                  /*print the resulting array*/
 }
}


sync void parallel_prefix_add(
  sh int *in,     /* operand array, length n */
  sh int n,       /* problem size */
  sh int *out,    /* result array, length n */
  sh int initsum) /* global offset on parallel prefix computation */
{
  sh int p = groupsize();
  sh int k;
  pr int i;

  for (i=$; i<n; i+=p)           /*initialization, private loop*/
     out[i] = in[i];
  for (k=1; k<n; k=k*2)      /*shared loop over levels*/
     for (i=n-1-$; i>=k; i-=p)   /*private loop inside level k*/
        out[i] = out[i] + out[i-k];   /*loop direction is critical! */
  for (i=$; i<n; i+=p)           /*step over n/p slices of entire array*/
     out[i] = out[i] - in[i];    /*prefix sum out[i] does not include in[i]*/
}
 

sync void output_array (
  sh int *arr,    /* the array to print out */
  sh int n )      /* length of arr */
{
  pr int i;
  seq {
    for (i=0; i<n; i++)
       { prI( arr[i], 0 ); write(1,"  ",2); }
    write(1,"\n\n",2);
  }
}
