/** recdoub.c
 *
 *  recursive doubling example
 *  C. Kessler 990615
 */
#include <fork.h>   // always required in Fork programs
#include <io.h>     // printf() prototype 
#include <string.h> // memcpy() prototype
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <math.h>


sync void divide_conquer(
  sh void ***presult, // address of result array
  sh void **data,   // operand array
  sh int n,         // #elements in operand array
  sh int elsize,    // element size in operand array
  sh void (*solveseq)( void ***pres, void **dat, int nn, int elsiz ),
  sh sync int (*istrivial)( sh int nn ),
  sh void (*solvetrivial)( void ***pres, void **dat, int nn, int elsiz ),
  sh sync void (*divide)( sh int *pk, sh void ***psubprob, sh int **psubsiz, 
                          sh void **dat, sh int nn, sh int elsiz ),
  sh sync void (*conquer)( sh int kk, sh void ***pres, sh void ***subsol,
                           sh int *ss, sh int nn, sh int elsiz )
 )
{
  sh int p = groupsize();
  sh int k;
  sh int istr;
  sh int *subsize;
  sh void **subproblem, ***subsolution;
 
  if (p==1) { 
     farm solveseq( presult, data, n, elsize );
     return;
  }

  istr = istrivial( n );
  if (istr) {
     seq solvetrivial( presult, data, n, elsize );
     return;
  }
  
  // divide into k subproblems of suitable size,
  divide( &k, &subproblem, &subsize, data, n, elsize );
  farm assert( k > 1 );
  if (k > p) {   // not enough processors, solve sequentially:
     seq solveseq( presult, data, n, elsize );
     return;
  }
  
  // divide into k subproblems of suitable size,
  divide( &k, &subproblem, &subsize, data, n, elsize );
  farm assert( k > 1 );
  if (k > p) {   // not enough processors, solve sequentially:
     seq solveseq( presult, data, n, elsize );
     return;
  }

  // solve subproblems recursively in parallel,
  // store result in data array:
  subsolution = (void ***)shalloc( k * sizeof(void **));
  fork( k; @=$%k; ) {
     sh int pp = 0;
     $ = mpadd( &pp, 1 );
     divide_conquer( &(subsolution[@]),
                     subproblem[@], subsize[@], elsize,
                     solveseq, istrivial, solvetrivial, 
                     divide, conquer );
  }

  // now do the conquer operation to compute the result,
  // allocate *presult and store the result therein:
  conquer( k, presult, subsolution, subsize, n, elsize );

  shallfree();
}


// -------------- the problem-specific routines: ----------
 
sync int issmall( sh int N )
{
  if ( N <= 1 )  return 1;
  else           return 0;
}


sync void nosplit( sh int *pk, sh void ***psubproblem, sh int **psubsize,
                   sh void **data, sh int n, sh int elsize )
{
 *pk = 1;
 *psubproblem = data;
 *psubsize = (int *)shalloc( sizeof(int) );
}


void **seqfolr( void **a, void **b, int N, int elsize )
{
  float *x = (float *)shmalloc( N*sizeof(float) );
  int i;

  x[0] = ((float *)b)[0];
  for (i=1; i<N; i++)
     x[i] = ((float *)b)[i] + ((float *)a)[i] * x[i-1];
  return (void **)x;
}

float f(float u, float v) { return u+v; }

float g(float u, float v) { return u*v; }


sync float *doublingstep( float *a, float *b, int N, int elsize )
{
 sh float *a1;
 sh float *b1;
 sh float *x1, *x;
 sh int p = groupsize();
 sh int issm;
 int k;
 issm = issmall( N );
 if (p==1) {
    farm x = (float *)seqfolr( (void **)a, (void **)b, N, sizeof(float) );
    return x;
 }
 if (issm) {
    seq x = (float *)seqfolr( (void **)a, (void **)b, N, sizeof(float) );
    return x;
 }
 seq {
    a1 = (float *)shmalloc(N/2 * sizeof(float));
    b1 = (float *)shmalloc(N/2 * sizeof(float));
    printf("doublingstep %d\n", N );
 }
 forall( k, 1, N/2, p ) {
   a1[k] = g( a[2*k], a[2*k-1] );
   b1[k] = f( b[2*k], g( a[2*k], b[2*k-1] ) );
 }
 b1[0] = b[0];
 x1 = doublingstep( a1, b1, N/2, elsize );

 x = (float *)shmalloc(N * sizeof(float));
 forall( k, 0, N/2, p )
    x[2*k] = x1[k];
 forall( k, 1, N/2+1, p )
    x[2*k-1] = f( b[2*k-1], g(a[2*k-1], x1[k-1]) );
 shfree( a1 );
 shfree( b1 );
 shfree( x1 );
 return x; 
} 


sh int N = 16;

void main (void)
{
  start {
    sh float *a = (float *)shalloc(N * sizeof(float));
    sh float *b = (float *)shalloc(N * sizeof(float));
    sh float *x;
    sh int p = groupsize();
    int i;

    forall( i, 0, N, p ) {
       a[i] = 0.5;
       b[i] = 1.0;
    }
    x = doublingstep( a, b, N, sizeof(float) ); 
    seq {
      for (i=0; i<N; i++)
         printf(" %f\n", x[i] );
      printf("\n");
    }
    seq {
      printf("Zur Kontrolle: seq.\n");
      x = (float *)seqfolr( (void **)a, (void **)b, N, sizeof(float) );
      for (i=0; i<N; i++)
         printf(" %f\n", x[i] );
      printf("\n");
    }
  }
}
    
