/* Gauss-Elimination in Fork95.   C.W.Kessler 02/96 */

#include <fork.h>
#include <io.h>
#include <math.h>

/* The forall() macro cannot be nested.
 * We apply explicit flattening to a self-scheduling parallel loop
 * to exploit 2D parallelism.
 */

typedef double ** syst;
typedef double *  eq;


sync syst build_system( sh int *pn ) 
{
   /* build initial system of *pn equations in *pm variables.
    * RHS vector stored in column *pn of system matrix.
    */
  pr int i;
  pr int n;
  sh syst s;
  sh int p = groupsize();
  n = 3;    /*example values*/
  seq s = (syst) shmalloc( n * sizeof( eq ));
  farm
    forall (i, 0, n, p)
      s[i] = (eq) shmalloc( (n+1) * sizeof( double )); 
  s[0][0] = 2.0;   s[0][1] =  -1.0;   s[0][2] = 3.0;  s[0][3]=6.0;
  s[1][0] =-1.0;   s[1][1] =   3.0;   s[1][2] =-5.0;  s[1][3]=4.0;
  s[2][0] =-1.0;   s[2][1] =   2.0;   s[2][2] = 5.0;  s[2][3]=0.0; 
  *pn = n;
  return s;
}


async void print_system( syst s, int n, int m )
{
  /* One processor prints the system of n equations
   * and m variables in sequential. */
  pr int i, j;
  printf("New %d x %d system:\n", n, m );
  printf("==================================================\n");
  for (i=0; i<n; i++) {
     printf("EQ%d: ", i);
     for (j=0; j<m; j++) 
        printf(" %5.2lf x%.2d", s[i][j], j );
     printf(" == %5.2lf\n", s[i][m] );
  }
  printf("==================================================\n");
}


sync void eliminate( sh syst a, sh int n )
{
  /* applies Gaussian elimination to system a[][].
   * Assume matrix is regular and diagonally dominant.
   * For a system of n equations in n variables,
   * up to n^2 processors could be used to update in parallel.
   * Up to n processors are used here.
   * The solution is returned in the right hand side column
   * of the modified system tableau.
   */
 sh int r;
 pr int i, j, ij;
 sh int p = 0;
 sh int nextij;
 sh double *rowfactor = (double *)shalloc( n * sizeof(double));
 $ = mpadd( &p, 1 );   // renumber
 
 for (r=0; r<n; r++) {  /* do n elimination steps */ 
    sh double factor = 1.0 / a[r][r];
    nextij = 0;

    seq printf("Stage %d factor %lf p=%d\n", r, factor, p);
    /* my pivot element is a[r][r] because a is diag. dom. */

    /* normalize equation r: */
    farm
      for ( j=r+$; j<n+1; j+=p ) 
        a[r][j] = a[r][j] * factor;

    farm
      for ( i=$; i<n; i+=p ) 
        rowfactor[i] = a[i][r];   // copying avoids data dependencies

    farm {
     for( ij = mpadd(&nextij,1); ij < n*(n+1); ij = mpadd(&nextij,1) )  {
      i = ij / (n+1);  // row index of element to be updated
      if (i!=r) {
        /* update row i: */
        j = ij % (n+1);  // column index of element to be updated
        if (j<r) continue;
        a[i][j] = a[i][j] - a[r][j] * rowfactor[i]; 
      }
     }
    }
    farm if ($==0) print_system( a, n, n );
 }  /* for r */
 return;
}


sh syst s;     /* system tableau */
sh int n;      /* n eq's in m var's */

void main( void )
{
  pr int starttime, stoptime;
  start {
    s = build_system( &n );
    farm if ($==0) print_system( s, n, n );
    farm starttime = getct();
    eliminate( s, n );
  }
  stoptime = getct();
  if ($==0) printf("\nTime: %d PRAM CPU cycles\n", stoptime - starttime);
  barrier;
}
