/* CRCW Quicksort using fork DC strategy.    C.W. Kessler 12/95
 * sorts N elements using p processors 
 * in expected time O((N/p)log N) and space 2*N + O(1).
 * synchronous version.
 */

#include <fork.h>
#include <assert.h>
#include <io.h>
#include <stdlib.h>
#include <math.h>

sh int N;   /* the number of array elements to be sorted */
sh int *a;  /* the array to be sorted */

#define sort2(a,b,t) if (a>b) { t=a; a=b; b=t; /*swap*/ }
#define renumber( t ) $ = mpadd( &(t), 1 );
#define est_work(n) ((n)*(ilog2(n)+1))

async void print_array( int *a, int N )    /* sequential output */
{
 int j;
 for (j=0; j<N; j++) printf(" %d", a[j] );
 printf("\n");
}


/* compare function used by the sequential qsort() routine */

async int cmp( void *a, void *b )
{
  if      (*(int *)a < *(int *)b) return -1;
  else if (*(int *)a > *(int *)b) return 1;
       else                       return 0;
}
  

/* quicksort n elements using p processors in place */

sync void qs( sh int *array, sh int n, sh int *temparray )
{
 sh int p, lowerprocs;           /* number of processors for subgroups */
 sh int lowersize, uppersize; /* size of subarrays lower[], upper[] */
 sh int equalsize;            /* #elem. equal to pivot element */
 sh int *lower, *upper;       /* subarrays to be recursively sorted */
 sh int *equal, l, e, u;
 sh int pivot = 0;
 pr int j, k, mygroup;

 if ($>=n) return;    /* never need more than n processors */

 p = groupsize();
 seq pprintf(" qs(%d,%d)\n", n,p);

 if (n<=1)  return;  /* trivial */
 if (n==2)  { sort2( array[0], array[1], pivot ); return; }  /* simple */

 if (p==1)  { farm qsort( array, n, 1, cmp ); return; }  /* sequential */

 renumber( pivot );    /* set $ consecutively from 0 to p-1 */
 lowersize = uppersize = equalsize = 0;
 lowerprocs = 0;
 pivot = array[0];         /* select pivot element */

 /* in parallel determine sizes of subarrays lower[], equal[], upper[]: */
 farm
   for (j=$; j<n; j+=p)   /* parallel loop over array[] */
     if      (array[j]<pivot)  syncadd( &lowersize, 1 );
     else if (array[j]>pivot)  syncadd( &uppersize, 1 );
          else                 syncadd( &equalsize, 1 );

 /* allocate space for subarrays lower[], upper[] in temparray[]: */
 lower = temparray;
 equal = lower + lowersize;
 upper = equal + equalsize;

 /* in parallel copy array elements to lower[]/equal[]/upper[] */
 l = e = u = 0;
 farm
   for (j=$; j<n; j+=p)   /* parallel loop over array[] */
     if (array[j]<pivot)  { k=mpadd( &l, 1 ); lower[k] = array[j];}
     else
     if (array[j]>pivot)  { k=mpadd( &u, 1 ); upper[k] = array[j];}
     else                 { k=mpadd( &e, 1 ); equal[k] = array[j];}

 farm
   for (j=$; j<n; j+=p)
      array[j] = temparray[j];    /* copy back */
 /* now temparray[] can be used as temporary space for recursive calls */

 if (lowersize>1 && uppersize>1) {   /* the general case */
    farm
      lowerprocs = (int)((float)(est_work(lowersize)*p)
                       / (float)(est_work(lowersize)+est_work(uppersize)));
    if (lowerprocs<=0) lowerprocs = 1;    /*correction*/
    if (lowerprocs>=p) lowerprocs = p-1;  /*correction*/
    farm mygroup = ($<lowerprocs)? 0:1;
    fork ( 2; @=mygroup; ) {
      if (@==0) qs( array, lowersize, temparray );
      else      qs( array + lowersize + equalsize, uppersize,
                    temparray + lowersize + equalsize );
    }
 }
 else  
 if (lowersize>1)  qs( array, lowersize, temparray );
 else
 if (uppersize>1)  qs( array + lowersize + equalsize, uppersize, 
                       temparray + lowersize + equalsize );
 /* else do nothing; */
}


pr char c;

void main( void )
{
 pr int j;
 pr int starttime, stoptime;
 srand( 8*$*$*$ + 17 );    /* seed random generator */
 start {
   sh int *t;
   seq {
     printf("Enter N = ");
     fscanf(stdin,"%d", &N);
     printf("\ngenerate %d random numbers.\nDisplay array (y/n)? ", N);
     fscanf(stdin,"%s", &c);
   }
   t = (int *) shalloc( N );
   a = (int *) shalloc( N );
   farm
     for (j=$; j<N; j+=__STARTED_PROCS__)
       a[j] = abs(rand())%1024;           /*set array*/
   seq if (c=='y') print_array( a, N );
   farm  starttime = getct();

   qs( a, N, t ); 

   farm  stoptime = getct();
   seq {
     if (c=='y')   print_array( a, N );
     printf("\nTime: %d PRAM CPU Cycles\n", stoptime - starttime );
     printf("<Ctrl-C>"); fscanf(stdin,"%s", &c);
   }
 }
 barrier; exit(0);
}
