/****************************************************************
*                                                               *
*  gjmpi.c                                                      *
*  Gauss-Jordan method for solving systems of linear equations  *
*  MPI-Version  (row-oriented)                                  *
*  Joerg Meyer       University of Nebraska at Omaha            *
*  07/15/94          Computer Science Department                *
*  07/02/95          Bug fix									*
*                                                               *
****************************************************************/

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <mpi.h>

#define N	50	/* size of linear system */
#define EPSILON 1e-20
#define FALSE	0
#define TRUE	1

typedef double mrow_t[N+1];	/* used to reference dynamically created */
typedef mrow_t far *p_mrow_t;	/* two-dimensional array		 */

/****************************************************************
*                                                               *
*    initialize_system (a) - randomly generates system of       *
*                            linear equations                   *
*    parameter: a - pointer to matrix                           *
*                                                               *
****************************************************************/
void initialize_system (p_mrow_t a)
{
    Int	i, j, seed;
 
    time (&seed);
    srand ((unsigned int)seed);
    for (i = 0; i < N; i++)
    {
	a[i][N] = 0.0;
	for (j = 0; j < N; j++)
	{
	    a[i][j] = rand ();
	    a[i][N] += (j - 50) * a[i][j];
	}
    }
}

/****************************************************************
*                                                               *
*    print_results (a) - prints solution of system of           *
*                        linear equations                       *
*    parameter: a - pointer to matrix                           *
*                                                               *
****************************************************************/
void print_results (p_mrow_t a)
{
    Int	i;          
    char s[80];
    
    for (i = 0; i < N; i++)
    {
        if (i > 0 && i % 20 == 0)
        {
            printf ("<RETURN> to continue");
            gets(s);                        
            printf ("\r");
        }
	printf ("x[%ld] = %10.6lf\n", i, a[i][N]);
    }
}

/****************************************************************
*                                                               *
*    gauss_jordan_elimination (m) -                             *
*          row-oriented Gauss-Jordan algorithm implementation   *
*          solves system of linear equations in parallel        *
*          using MPI calls, all tasks have to call this         *
*          function, only task 0 actually delivers pointer      *
*          to matrix (matrix overwritten)                       *
*    parameter: m - pointer to matrix                           *
*    result   : solution vector in m[i][N]                      *
*                                                               *
****************************************************************/
Int gauss_jordan_elimination (p_mrow_t m)
{
    Int myrank, ranksize;
    MPI_Status status;
    Int startrow, lastrow, nrow, sr, lr;
    p_mrow_t mp;
    double far *pivotrow;
    Int far *pivotp, far *marked;
    Int	i, j, k, picked;
    double  tmp;
    struct {
	double val;
	Int rank; } in, out;
  
/* assume MPI_Init has already been called */
    MPI_Comm_rank (MPI_COMM_WORLD, &myrank);
    MPI_Comm_size (MPI_COMM_WORLD, &ranksize);

/* rows of matrix I have to process */
    startrow = (myrank * N) / ranksize;
    lastrow = ((myrank + 1) * N) / ranksize;
    nrow = lastrow - startrow;

/* dynamically allocate data structures */
/* copy of my portion of matrix */
    mp = (p_mrow_t) malloc (nrow * (N+1) * sizeof (double));
/* row to be used next for reduction of matrix */
    pivotrow = (double far *) malloc ((N+1) * sizeof (double));
/* stores which column was reduced using this row */
    pivotp = (Int far *) malloc (N * sizeof (Int));
/* stores if row was used for reduction of column */
    marked = (Int far *) malloc (nrow * sizeof (Int));

    if (myrank == 0)	/* process 0 */
    {
/* distribute data among processes */
	for (i = 1; i < ranksize; i++)
	{
	    sr = (i * N) / ranksize;
	    lr = ((i + 1) * N) / ranksize;
	    MPI_Send (m[sr], (lr-sr) * (N+1), MPI_DOUBLE,
		      i, 1234, MPI_COMM_WORLD);
	}
	for (i = startrow; i < lastrow; i++)
	    for (j = 0; j < N + 1; j++)
		mp[i][j] = m[i][j];
    }
    else
    {
/* receive portion of matrix */
	MPI_Recv (mp, nrow * (N+1), MPI_DOUBLE,
		  0, 1234, MPI_COMM_WORLD, &status);
    }
    for (i = 0; i < nrow; i++)
	marked[i] = 0;

    for (i = 0; i < N; i++)	/* for each column */
    {             
/* WinMPI extension */
	MPI_Win_yield ();
	    
/* find local maximum in column */
	tmp = 0.0;
	for (j = 0; j < nrow; j++)
	{
	    if (!marked[j] && (fabs (mp[j][i]) > tmp))
	    {
		tmp = fabs (mp[j][i]);
		picked = j;
	    }
	}
/* find global maximum in column and which process stores it */
	in.val = tmp;
	in.rank = myrank;
	MPI_Allreduce (&in, &out, 1, MPI_DOUBLE_INT, 
                       MPI_MAXLOC, MPI_COMM_WORLD);
	if (out.rank == myrank)	/* I am process with the global maximum */
	{
	    marked[picked] = 1;
	    pivotp[picked] = i;
	    for (j = 0; j < N + 1; j++)
		pivotrow[j] = mp[picked][j];
	}
/* process with global maximum broadcasts entire row (pivot row) */
	MPI_Bcast (pivotrow, N + 1, MPI_DOUBLE,
		   out.rank, MPI_COMM_WORLD);
	if (fabs (pivotrow[i]) < EPSILON)	/* no solution */
	{
		if (myrank == 0)
		printf ("Exits on iteration %d\n", i);
	    return (FALSE);
	}
/* reduce all rows using broadcasted pivot row */
	for (j = 0; j < nrow; j++)
	    if (!(marked[j] && pivotp[j] == i))
	    {
		tmp = mp[j][i] / pivotrow[i];
		for (k = i; k < N + 1; k++)
		    mp[j][k] -= pivotrow[k] * tmp;
	    }
    }

    if (myrank == 0)	/* process 0 */
/* compute local part of solution and collect results of other processes */
    {
	for (i = 0; i < nrow; i++)
	    m[i][N] = mp[i][N] / mp[i][pivotp[i]];

	for (i = 1; i < ranksize; i++)
	{
	    sr = (i * N) / ranksize;
	    lr = ((i + 1) * N) / ranksize;
	    MPI_Recv (pivotrow, lr - sr, MPI_DOUBLE,
		      i, 1235, MPI_COMM_WORLD, &status);
	    for (j = 0; j < lr - sr; j++)
		m[sr + j][N] = pivotrow[j];
	    MPI_Recv (pivotp + sr, lr - sr, MPI_INT,
		      i, 1236, MPI_COMM_WORLD, &status);
	}
/* sort result vector - solution stored in m[i][N] */
	for (i = 0; i < N; i++)
	    m[pivotp[i]][0] = m[i][N];
	for (i = 0; i < N; i++)
	    m[i][N] = m[i][0];
    }
    else	/* process != 0 */
    {
/* compute and send local part of solution */
	for (i = 0; i < nrow; i++)
	   pivotrow[i] = mp[i][N] / mp[i][pivotp[i]];
	MPI_Send (pivotrow, nrow, MPI_DOUBLE, 0, 1235, MPI_COMM_WORLD);
	MPI_Send (pivotp, nrow, MPI_INT, 0, 1236, MPI_COMM_WORLD);
    }
    return (TRUE);	/* solution found */
}

int MPI_main (int argc, LPPSTR argv)
{
    p_mrow_t a;
    Int solution;
    Int myrank, ranksize;
    double t1, t2, time;

    MPI_Init (&argc, &argv);
    MPI_Comm_rank (MPI_COMM_WORLD, &myrank);
    MPI_Comm_size (MPI_COMM_WORLD, &ranksize);
    if (myrank == 0)	/* process 0 */
    {
/* process 0 provides matrix to be solved */
	a = (p_mrow_t) p4_shmalloc (N * sizeof(mrow_t));
	initialize_system (a);
    }
    else	/* process != 0 */
	a = NULL;
/* wait until all processes are running (measuring time) */
    MPI_Barrier (MPI_COMM_WORLD);
    if (myrank == 0)
	t1 = MPI_Wtime ();	

    solution = gauss_jordan_elimination (a);

    MPI_Barrier (MPI_COMM_WORLD);
    if (myrank == 0)	/* process 0 */
    {
	t2 = MPI_Wtime ();

	if (solution == TRUE)
	    print_results (a);
	else
	    printf ("No solution\n");

	time = t2 - t1;
	printf ("%ld tasks used - Execution time: %.2lf sec\n", 
		ranksize, time);
    }

    MPI_Finalize ();
    return (0);
}
