//----------------------------------*-C++-*----------------------------------//
// global_mpi.cc
// Maurice LeBrun
// Wed Feb  1 16:50:59 1995
//---------------------------------------------------------------------------//
// @> Global C4 functions for MPI
//
// $Id: global_mpi.cc,v 1.1 1995/02/07 20:43:04 mjl Exp $
//
// $Log: global_mpi.cc,v $
// Revision 1.1  1995/02/07  20:43:04  mjl
// Architecture-dependent implementations of C4 global functions.
//
//---------------------------------------------------------------------------//

#include "DynArray.h"
#include "Assert.h"

//---------------------------------------------------------------------------//
// Miscellaneous

void C4_Init( int& argc, char **& argv )
{
    MPI_Init( &argc, &argv );
}

void C4_Finalize()
{
    MPI_Finalize();
}

int C4_node()
{
    int node;
    MPI_Comm_rank( MPI_COMM_WORLD, &node );
    return node;
}

int C4_nodes()
{
    int nodes;
    MPI_Comm_size( MPI_COMM_WORLD, &nodes );
    return nodes;
}

int C4_group()
{
    int group = 0;
    return group;
}

void C4_gsync()
{
    MPI_Barrier( MPI_COMM_WORLD );
}

//---------------------------------------------------------------------------//
// MPI send/receive calls (basic set)
//
// Synchronous:
//	MPI_Send(void* buf, int count, MPI_Datatype datatype,
//		 int dest, int tag, MPI_Comm comm);
//	MPI_Recv(void* buf, int count, MPI_Datatype datatype,
//		 int source, int tag, MPI_Comm comm, MPI_Status *status);
//
// Asynchronous:
//	MPI_Isend(void* buf, int count, MPI_Datatype datatype,
//		  int dest, int tag, MPI_Comm comm, MPI_Request *request);
//	MPI_Irecv(void* buf, int count, MPI_Datatype datatype,
//		  int source, int tag, MPI_Comm comm, MPI_Request *request);
//---------------------------------------------------------------------------//

//---------------------------------------------------------------------------//
// Perform a normal (blocking) send.
//---------------------------------------------------------------------------//

int C4_Send( void *buf, int size, int dest, int tag, int group )
{
    MPI_Send( buf, size, MPI_BYTE, dest, tag, MPI_COMM_WORLD );
    return C4_SUCCESS;
}

//---------------------------------------------------------------------------//
// Perform a normal (blocking) receive.
//---------------------------------------------------------------------------//

int C4_Recv( void *buf, int size, int source, int tag, int group )
{
    MPI_Status status;
    int cnt;
    MPI_Recv( buf, size, MPI_BYTE, source, tag, MPI_COMM_WORLD, &status );
    MPI_Get_count( &status, MPI_BYTE, &cnt );
    return cnt;
}

//---------------------------------------------------------------------------//
// Perform a non blocking send.
//---------------------------------------------------------------------------//

C4_Req C4_SendAsync( void *buf, int size, int dest, int tag, int group )
{
    C4_Req r;
    MPI_Isend( buf, size, MPI_BYTE, dest, tag, MPI_COMM_WORLD, &r.r );
    r.set();
    return r;
}

//---------------------------------------------------------------------------//
// Perform a non blocking receive.
//---------------------------------------------------------------------------//

C4_Req C4_RecvAsync( void *buf, int size, int source, int tag, int group )
{
    C4_Req r;
    MPI_Irecv( buf, size, MPI_BYTE, source, tag, MPI_COMM_WORLD, &r.r );
    r.set();
    return r;
}

//---------------------------------------------------------------------------//
// Global reduction operations.
//
// The call most like that in NX is:
//
//	MPI_Allreduce(void* sendbuf, void* recvbuf, int count,
//		      MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
// 
// which returns the result to all processes in the group.  "op" determines
// the type of reduction performed.
//
// Available reduction operators, description, and allowed types: 
//
// MPI_MAX, MPI_MIN		min, max			I, F
// MPI_SUM, MPI_PROD		sum, product			I, F
// MPI_BAND, MPI_BOR, MPI_BXOR	bitwise and, or, xor		I, B
// MPI_LAND, MPI_LOR, MPI_LXOR	logical and, or, xor		I
// MPI_MAXLOC, MPI_MINLOC	min, max value and location
//
// where types are:
//
// I:	MPI_INT, MPI_LONG, MPI_SHORT, MPI_UNSIGNED_SHORT, 
//	MPI_UNSIGNED, MPI_UNSIGNED_LONG
// F:	MPI_FLOAT, MPI_DOUBLE, MPI_LONG_DOUBLE
// B:	MPI_BYTE
//---------------------------------------------------------------------------//

static DynArray<int>    ibuf(10);
static DynArray<long>   lbuf(10);
static DynArray<float>  fbuf(10);
static DynArray<double> dbuf(10);

//---------------------------------------------------------------------------//
// Sum, scalar

void C4_gsum( int& x )
{
    int y = x;
    MPI_Allreduce( &y, &x, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
}

void C4_gsum( long& x )
{
    long y = x;
    MPI_Allreduce( &y, &x, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD );
}

void C4_gsum( float& x )
{
    float y = x;
    MPI_Allreduce( &y, &x, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD );
}

void C4_gsum( double& x )
{
    double y = x;
    MPI_Allreduce( &y, &x, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD );
}

//---------------------------------------------------------------------------//
// Sum, array

void C4_gsum( int *px, int n )
{
    Assert( n >= 0 );

    ibuf[n-1] = 0;		// auto expand the buffer.
    for( int i=0; i < n; i++ )
	ibuf[i] = px[i];

    MPI_Allreduce( &ibuf[0], px, n, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
}

void C4_gsum( long *px, int n )
{
    Assert( n >= 0 );

    lbuf[n-1] = 0;		// auto expand the buffer.
    for( int i=0; i < n; i++ )
	lbuf[i] = px[i];

    MPI_Allreduce( &lbuf[0], px, n, MPI_LONG, MPI_SUM, MPI_COMM_WORLD );
}

void C4_gsum( float *px, int n )
{
    Assert( n >= 0 );

    fbuf[n-1] = 0;		// auto expand the buffer.
    for( int i=0; i < n; i++ )
	fbuf[i] = px[i];

    MPI_Allreduce( &fbuf[0], px, n, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD );
}

void C4_gsum( double *px, int n )
{
    Assert( n >= 0 );

    dbuf[n-1] = 0;		// auto expand the buffer.
    for( int i=0; i < n; i++ )
	dbuf[i] = px[i];

    MPI_Allreduce( &dbuf[0], px, n, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD );
}

//---------------------------------------------------------------------------//
// Min, scalar

void C4_gmin( int& x )
{
    int y = x;
    MPI_Allreduce( &y, &x, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD );
}

void C4_gmin( long& x )
{
    long y = x;
    MPI_Allreduce( &y, &x, 1, MPI_LONG, MPI_MIN, MPI_COMM_WORLD );
}

void C4_gmin( float& x )
{
    float y = x;
    MPI_Allreduce( &y, &x, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD );
}

void C4_gmin( double& x )
{
    double y = x;
    MPI_Allreduce( &y, &x, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD );
}

//---------------------------------------------------------------------------//
// Max, scalar

void C4_gmax( int& x )
{
    int y = x;
    MPI_Allreduce( &y, &x, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD );
}

void C4_gmax( long& x )
{
    long y = x;
    MPI_Allreduce( &y, &x, 1, MPI_LONG, MPI_MAX, MPI_COMM_WORLD );
}

void C4_gmax( float& x )
{
    float y = x;
    MPI_Allreduce( &y, &x, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD );
}

void C4_gmax( double& x )
{
    double y = x;
    MPI_Allreduce( &y, &x, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD );
}

//---------------------------------------------------------------------------//
//                              end of global_mpi.cc
//---------------------------------------------------------------------------//

