/*  pipe.c   by C.W.Kessler 05/95 updated 07/99
 *  
 *  pipelined integer global sum implementation in Fork95
 *
 *  n must be a power of 2 and at least be p/2.
 *  p must be a power of 2 minus 1.
 *
 *  Condition: group-relative Processor ID's '$' must be consecutively
 *             numbered from 0 to groupsize - 1.
 */

#include <fork.h>
#include <syscall.h>
#include <stdlib.h>
#include <io.h>

#define max(a,b) ((a)>=(b)?(a):(b))

sh int n = 20;
sh int p = 7;

/* Pipeline graph consisting of p nodes: */

typedef struct node {
  int data;
  int left, right;   /*predecessors*/
  int stage;
} Node, *Graph;


sync Graph tree_pipeline( sh int *depth, sh int p ) 
{
  int mystage;
  sh Graph pipeline;
  *depth = ilog2( p );
  pipeline = (Graph) shalloc( p * sizeof( Node ) );
  pipeline[$].data = 0;
  if ($ < p/2) {
    pipeline[$].left = 2*$+1;    /*initialize predecessor link in parallel*/
    pipeline[$].right = 2*$+2;   /*initialize predecessor link in parallel*/
  } else {
    pipeline[$].left = -1;       /*leaf processors have no predecessor*/
    pipeline[$].right = -1; 
  }
  mystage = *depth - ilog2( $+1 );
  pipeline[$].stage = mystage;
  return pipeline;
}


sync void pipestep( sh Graph pipeline, sh int *A, sh int depth )
{
 if (pipeline[$].stage == 0)         // leaf computation:
    pipeline[$].data = A[2*($-((1<<depth)-1))]
                     + A[2*($-((1<<depth)-1)) + 1];
  else
    pipeline[$].data = pipeline[ pipeline[$].left ].data
                     + pipeline[ pipeline[$].right ].data;
}


void printpipe( Graph pipeline, int p )
{
  int i;
  for (i=0; i<p; i++)
     pprintf(" node %d:  stage %d,  left %d,  right %d\n",
               i, pipeline[i].stage, pipeline[i].left, pipeline[i].right );
} 


void main( void )
{
 start if ($<p) {
   sh int t;
   sh int sum = 0;   // accumulates partial sums of the slices
   sh int depth;
   sh int slice = 1<<(ilog2(p)+1);
   sh int *a;
   sh Graph pipeline = tree_pipeline( &depth, p );
   int i;
   seq { printf("depth: %d\n", depth);
         printpipe( pipeline, p );
         printf("slice = %d\n", slice);
   }
   // preset the input array:
   a = (int *) shalloc(n + p);   // p additional items to flush pipeline
   farm srand(17+2*$*$);
   farm
     forall(i,0,n,p) 
       a[i] = i; // rand() & 0x1fff;  //range 0..8191
   a[n+$] = 0;          /*additional items to flush the pipeline correctly*/
   seq {
     prS("\nSource Array:\n");
     for (i=0; i<n; i++) printf(" %d", a[i] );
     printf("\n");
   }
   // pipelined computation:
   for (t=0; t< n/slice + 1 + depth; t++) 
     if (t >= pipeline[$].stage) {
       pipestep( pipeline, a+t*slice, depth );
       seq printf("pipestep returns %d\n", pipeline[0].data );
       sum += pipeline[0].data;
     }
   seq printf("\nResult: %d\n", sum );
 }
}
