/* Gauss-Elimination in Fork95.   C.W.Kessler 02/96 */

#include <fork.h>
#include <io.h>
#include <math.h>

#define REQUIRED_SPACE 4096    /* overall #words of shared memory */
sh static int myheap;
sh static int mycurr;

/* The forall()/gforall() macro cannot be nested.
 * Denote further levels of parallelism by sforall(),
 * a sequentialized variant of the gforall macro:
 */

typedef double ** syst;
typedef double *  eq;


async void print_system( syst s, int n, int m )
{
  /* One processor prints the system of n equations
   * and m variables in sequential. */
  pr int i, j;
  printf("New %d x %d system:\n", n, m );
  printf("==================================================\n");
  for (i=0; i<n; i++) {
     printf("EQ%d: ", i);
     for (j=0; j<m; j++) 
        printf(" %5.2lf x%.2d", s[i][j], j );
     printf(" == %5.2lf\n", s[i][m] );
  }
  printf("==================================================\n");
}


sync syst build_system( sh int *pn ) 
{
   /* build initial system of *pn equations in *pm variables.
    * RHS vector stored in column *pn of system matrix.
    */
  pr int i;
  pr int n;
  sh syst s;
  sh int p = groupsize();
  n = 3;    /*example values*/
  seq s = (syst) shmalloc( n * sizeof( eq ));
  farm
    for (i=$; i<n; i+=p)
      s[i] = (eq) shmalloc( (n+1) * sizeof( double )); 
  s[0][0] = 2.0;   s[0][1] =  -1.0;   s[0][2] = 3.0;  s[0][3]=6.0;
  s[1][0] =-1.0;   s[1][1] =   3.0;   s[1][2] =-5.0;  s[1][3]=4.0;
  s[2][0] =-1.0;   s[2][1] =   2.0;   s[2][2] = 5.0;  s[2][3]=0.0; 
  *pn = n;
  return s;
}



sync void eliminate( sh syst a, sh int n )
{
  /* applies Gaussian elimination to system a[][].
   * Assume matrix is regular and diagonally dominant.
   * For a system of n equations in n variables,
   * up to n^2 processors could be used to update in parallel.
   * Up to n processors are used here.
   * The solution is returned in the right hand side column
   * of the modified system tableau.
   */
 sh int r;
 sh int p;
 pr int i, j;
 p = 0;
 $ = mpadd(&p,1);
 
 for (r=0; r<n; r++) {  /* do n elimination steps */ 
    sh double factor = 1.0 / a[r][r];
    seq printf("Stage %d factor %f p=%d\n", r, factor, p);
    /* my pivot element is a[r][r] because a is diag. dom. */
    /* normalize equation r: */
    farm
     for( j=r+$; j<=n; j+=p )
       a[r][j] = a[r][j] * factor;
   
    for( i=$; i<n; i+=p )   /* in parallel update all rows i */
     farm 
      if (i!=r) {
        pr double myfactor = a[i][r]; /*scaling factor: old a[i][r]/a[r][r]*/
        for( j=r; j<=n; j++ ) {  /* update column j: */
          a[i][j] = a[i][j] - a[r][j] * myfactor;
        }
      }
    seq print_system( a, n, n );
 }  /* for r */
 return;
}


sh syst s;     /* system tableau */
sh int n;      /* n eq's in m var's */

async void main( void )
{
  pr unsigned int starttime, stoptime;
  start {
    s = build_system( &n );
    seq print_system( s, n, n );
    farm starttime = getct();
    eliminate( s, n );
  }
  stoptime = getct();
  if ($==0) printf("\nTime: %d PRAM CPU cycles\n", stoptime - starttime);
  if ($==0) printAccStat();
  barrier;
}
