



#ifndef lint
static char vcid[] = "$Id: support.c,v 1.1.1.1 1998/08/27 19:16:35 gropp Exp $";
#endif

/* #define DEBUG */
#include <stdio.h>
#include "blkcm/bcp.h"
#include "blkcm/bc.h"
#include <stdio.h>

/*
  Usage:
   The user builds a "program" consisting of an array of
   "BCentry" type.  
   In order to provide of asynchronous communication, the following
   sequence of calls may be made:

   BCirecv( pgm )          initialize for receive
   BCisend( pgm )          send
   BCiwait( pgm )          wait for sends and receive to complete

   BCexec( pgm, sctx, dctx )           does all of the above
 
   (In these calls, sctx and dctx are contexts to be applied to the 
   source and destinations resp.; this allows the same program to be used
   on identically laid out arrays.)

   In addition, the routine

   BCcompile( pgm, options )

   must be called to initialize all of the internal data.

   Distributed memory algorithm:
   This version uses asynchronous send/receives and the message type
   to select between destinations.  

   Optimization for dense source and/or destination buffers:  Just
   make the buffer entry the source (p) and eliminate the
   copy (in or out).  Note that this isn't compatible with buffering
   multiple sends/receives to the same processor.

   BLOCK_COMM_SYNC_RECV_AND_SEND:
   This entry insures that any receive ENCOUNTERED SO FAR is completed
   before continuing with the sends.  This method requires the user
   to properly sort the program entries.  To aid in this, there is
   a routine that will take a program defined with BCjoin_nbr
   and schedule it with these syncs.

   The "inplace" code is not correct.  Known bugs include:
   1. Does not handle off != 0.

   Another important optimization is to allow the user to replace the
   default copy routines with user-specified "transfer functions".
   These could do such things as interpolation, averaging, or scatter/gather.
   Allowing these to be placed in these routines can eliminate an additional
   pass over the data on one or both sides of the communication.

   These functions are given by in, out, copy, and are the default
   "slab" functions unless reset by the user.

   Another optimization is to allocate storage for only a few (1? 2?) phases
   at a time, and to reuse the storage for buffers between phases.  This
   may be needed for programs with large numbers of phases (such as those
   in a triangular solve with the phase being the "wavefront" index.

   For asynchronous operation, do a phase at a time?  Should all the
   routines work a phase at a time (posting of receives only a phase
   at a time)?

   This file contains routines for the ipsc series machines that 
   handles all of the various optimizations

   There should be versions for phase-by-phase, asynchronous, buffer reuse,
   ...
 */

#ifdef DISTRIBUTED_MEMORY
BCentry *BCdo_recv( BCPGM *, BCentry *, BCentry *, int (*)(), int );

/*
   Initialize receives for ALL receives (all phases) 
 */
BCirecv( Program )
BCPGM *Program;
{
BCentry *pgm = Program->pgm;
int        n    = Program->n;
int        (*copy)() = Program->copy;
int       ntag = (Program->ncalls & 0x1);

#ifdef DEBUG
printf( "Starting irecv\n" );
#endif
while (n--) {
    switch (GET_MAJOR_MODE(pgm)) {
	/* This is wrong.  The values may not have been set yet in the
	   case of overlapping multiple phases */
#ifdef FOO
        case BLOCK_COMM_LOCAL_SRC:
            (*copy)( &pgm->src, &((pgm + pgm->processor)->src) );
	    break;
#endif
        case BLOCK_COMM_DEST:
        if (pgm->type & BLOCK_COMM_BUFFER) {
#ifdef intelnx
	    if (pgm->type & BLOCK_COMM_SYNC_NBR) 
		pgm->rc = isendrecv( pgm->mtype+ntag, pgm->buffer, 0,
				     pgm->processor, 0, 
				     (pgm->mtype + ntag) | BASE_FORCE, 
				     pgm->buffer, pgm->act_len );
	    else 
#endif
		MPI_Irecv(pgm->buffer,pgm->act_len,MPI_BYTE,MPI_ANY_SOURCE,pgm->mtype+ntag,MPI_COMM_WORLD,&(pgm->rc));
#ifdef DEBUG
            printf( "IReceived type %d on %d (should be from %d)\n",
                   pgm->mtype+ntag, __MYPROCID, pgm->processor );
#endif
	    }
        break;
        }
    NEXTLINE(pgm);
    }
#ifdef DEBUG
printf( "Ending irecv\n" );
#endif

return 0;
}

/* 
   Initialize sends.  Obey phase partial ordering (this means completing
   all receives from the previous phase before sending)
 */
int BCisend( Program )
BCPGM *Program;
{
int        n;
BCentry *pgm_save, *pgm;
int        (*out)() = Program->out;
int        (*in)()  = Program->in;
int        (*copy)() = Program->copy;
MY_FLOAT   *p;
int       ntag = (Program->ncalls & 0x1);

#ifdef DEBUG
printf( "[%d] Starting isend\n", __MYPROCID );
#endif
n   = Program->n;
pgm = pgm_save = Program->pgm;
Program->pgm_last = pgm;
while (n--) {
    /* sync all recvs before this send with a lower phase */
    if (pgm->type & BLOCK_COMM_SYNC_BLOCK) {
	pgm_save = BCdo_recv( Program, pgm, pgm_save, in, pgm->phase );
	Program->pgm_last = pgm_save;
	}
    /* May be local source */
    if (IS_LOCAL_SRC(pgm)) {
	(*copy)( &pgm->src, &((pgm + pgm->processor)->src) );
	}
    else if (IS_SRC(pgm)) {
	p  = pgm->buffer;
        if (!pgm->inplace)
            (*out)( &pgm->src, p );
#ifdef FOO
	else
	    /* if inplace, p is the USERs data area, so this ammounts
	       to doing a getaddress. */
	    p += pctx;
#endif
        if (pgm->type & BLOCK_COMM_BUFFER) {
#ifdef intelnx
	    if (pgm->type & BLOCK_COMM_SYNC_NBR) {
		int b;
		/* Receive null message from partner */
#ifdef DEBUG
		printf( "sync sending type %d on %d\n", pgm->mtype+ntag, 
		        __MYPROCID );
#endif
		MPI_Recv(&b,sizeof(int),MPI_BYTE,MPI_ANY_SOURCE,pgm->mtype+ntag,MPI_COMM_WORLD,&_mpi_status);
		MPI_Irsend(p,pgm->act_len,MPI_BYTE,pgm->processor,pgm->mtype+ntag,MPI_COMM_WORLD,&(pgm->rc ));
		}
	    else 
#endif
		MPI_Isend(p,pgm->act_len,MPI_BYTE,pgm->processor,pgm->mtype+ntag,MPI_COMM_WORLD,&(pgm->rc ));
#ifdef DEBUG
            printf( "Sent type %d to %d from %d [size %d]\n", pgm->mtype+ntag,
                    pgm->processor, __MYPROCID, pgm->act_len );
#endif
            }
        }
    NEXTLINE(pgm);
    }
#ifdef DEBUG
printf( "[%d] Ending isend\n", __MYPROCID );
#endif

return 0;
}

/* 
  Wait for all sends to complete, as well as any receives in the 
  last phase. 
 */
int BCiwait( Program )
BCPGM *Program;
{
BCentry *pgm = Program->pgm;
int        n    = Program->n;
int       ntag = (Program->ncalls & 0x1);

#ifdef DEBUG
printf( "[%d] Starting BCiwait\n", __MYPROCID );
#endif

LOGEVENT(400);

/* Wait on sends---really should be Program->pgm_last_send */
while (n--) {
    if (IS_SRC(pgm)) {
        if ((pgm->type & BLOCK_COMM_BUFFER) && ! PIRecvIdNull(pgm->rc)) {
#ifdef DEBUG
	    printf( "Waiting for type %d on %d (isend)\n", 
		    pgm->mtype+ntag, __MYPROCID );
#endif
	    MPI_Wait(&(pgm->rc ),&_mpi_status);
#ifdef DEBUG
	    printf( "Finished isend type %d on %d\n",
		    pgm->mtype+ntag, __MYPROCID );
#endif
	    }
        }
    NEXTLINE(pgm);
    }
LOGEVENT(401);

BCdo_recv( Program, Program->pgm + Program->n, Program->pgm_last, 
	      Program->in, (pgm-1)->phase+1 );

#ifdef DEBUG
printf( "[%d] Starting BCiwait\n", __MYPROCID );
#endif

return 0;
}


/*
   This routine processes all of the recv's from pgm_save to pgm-1.
   Return the value of pgm_save on exit.
 */
BCentry *BCdo_recv( Program, pgm, pgm_save, in, phase )
BCPGM   *Program;
BCentry *pgm, *pgm_save;
int     (*in)(), phase;
{
int       ntag = (Program->ncalls & 0x1);
LOGEVENT(200);
while (pgm_save < pgm) {
    if (pgm_save->phase >= phase) break;
    if (IS_DEST(pgm_save)) {
	if ((pgm_save->type & BLOCK_COMM_BUFFER) && 
	    ! PIRecvIdNull(pgm_save->rc)) {
#ifdef DEBUG
    printf( "Waiting (recv) type %d from %d on %d in do_recv processing\n",
	        pgm_save->mtype+ntag, pgm_save->processor, __MYPROCID );
#endif
            RECVWAITNOMEM(pgm_save->mtype+ntag,pgm_save->buffer,
			  pgm_save->act_len,
			  MY_MPI_FLOAT,pgm_save->rc);
#ifdef DEBUG
    printf( "Received type %d from %d on %d (%d bytes)in do_recv processing\n",
	        pgm_save->mtype+ntag, pgm_save->processor, __MYPROCID, __MPILEN);
#endif
	    PIRecvIdClear(pgm_save->rc);
	    }
	if (!pgm_save->inplace) {
	    LOGEVENT(202);
	    (*in)( pgm_save->buffer, &pgm_save->src );
	    LOGEVENT(203);
	    }
	}
    NEXTLINE(pgm_save);
    }
LOGEVENT(201);
return pgm_save;
}

#ifdef HOST_IPSCSIM
/* The version of the simulator that we are running does not include this
   function */
int isendrecv( type, sb, slen, proc, pid, rtype, rb, rlen )
int type, slen, proc, pid, rtype, rlen, *sb, *rb;
{
int rc;
#ifdef DEBUG
printf( "irecving %d on %d\n", rtype, __MYPROCID );
#endif
MPI_Irecv(rb,rlen,MPI_BYTE,MPI_ANY_SOURCE,rtype,MPI_COMM_WORLD,&(rc));
#ifdef DEBUG
printf( "sending %d to %d on %d\n", type, proc, __MYPROCID );
#endif
SENDSYNCNOMEM(type,sb,slen,proc,MY_MPI_FLOAT);
return rc;
}
#endif

/*
   A common need is to do the following:
   send a series of messages
   recv a series of messages and act on them

   Routines are provides to form up the messages to send and to handle
   messages received.  The routines are passed a context that may be
   used by the programmer.

   Returns 0 on success, != 0 on failure.
 */
int BCsr( nsend, sendact, scontext,
	     nrecv, recvact, rcontext, outbuflen )
int (*sendact)(), nsend, outbuflen, (*recvact)();
void *scontext, *rcontext;
{
int  sbuflen, sproc;
char *sbuf, *outbuf;

MSGALLOCRECV( outbuf, outbuflen, char );
/* Send the messages */
while (nsend--) {
    (*sendact)( &scontext, &sbuf, &sbuflen, &sproc );
    SENDSYNCNOMEM( VALID_TYPE, sbuf, sbuflen, sproc, MY_MPI_FLOAT );
    }
/* Receive the messages */
while (nrecv--) {
    RECVSYNC( VALID_TYPE, outbuf, outbuflen, MY_MPI_FLOAT );
    (*recvact)( outbuf, &rcontext );
    }
MSGFREERECV( outbuf );
return 0;
}   

/*@
  BCUseASyncSend - Use the ssynchronous send routines instead of the 
  synchronous send routines.

  Input Parameter:
.  pgm - make this program use the asynchronous send routines
 @*/
void BCUseASyncSend( pgm )
BCPGM *pgm;
{
pgm->isend = BCisend;
pgm->iwait = BCiwait;
}

/* These are the per-phase routines.  Not yet tested. */

int BCirecvPhase( Program, phase )
BCPGM *Program;
int   phase;
{
BCentry *pgm = Program->pgm;
int        n    = Program->n;
int        (*copy)() = Program->copy;
int       ntag = (Program->ncalls & 0x1);

#ifdef DEBUG
printf( "Starting irecvPhase %d\n", phase );
#endif
if (phase == 0 || phase == pgm->phase) 
    Program->pgm_cur = pgm;
else {
    pgm = Program->pgm_cur;
    n   -= (pgm - Program->pgm);
    while (n > 0 && pgm->phase != phase) {
	n--;
	NEXTLINE(pgm);
	}
    }

while (n-- && pgm->phase == phase) {
    switch (GET_MAJOR_MODE(pgm)) {
        case BLOCK_COMM_LOCAL_SRC:
            (*copy)( &pgm->src, &((pgm + pgm->processor)->src) );
	    break;
        case BLOCK_COMM_DEST:
        if (pgm->type & BLOCK_COMM_BUFFER) {
#ifdef intelnx
	    if (pgm->type & BLOCK_COMM_SYNC_NBR) 
		pgm->rc = isendrecv( pgm->mtype+ntag, pgm->buffer, 0,
				     pgm->processor, 0, 
				     (pgm->mtype+ntag) | BASE_FORCE, 
				     pgm->buffer, pgm->act_len );
	    else 
#endif
		MPI_Irecv(pgm->buffer,pgm->act_len,MPI_BYTE,MPI_ANY_SOURCE,pgm->mtype+ntag,MPI_COMM_WORLD,&(pgm->rc));
#ifdef DEBUG
            printf( "IReceived type %d on %d (should be from %d)\n",
                   pgm->mtype+ntag, __MYPROCID, pgm->processor );
#endif
	    }
        break;
        }
    NEXTLINE(pgm);
    }
#ifdef DEBUG
printf( "Ending irecvPhase\n" );
#endif

return 0;
}

/* 
   Initialize sends.  Obey phase partial ordering (this means completing
   all receives from the previous phase before sending)
 */
int BCisendPhase( Program, phase )
BCPGM *Program;
int        phase;
{
int        n;
BCentry    *pgm_save, *pgm;
int        (*out)() = Program->out;
int        (*in)()  = Program->in;
int        (*copy)()  = Program->copy;
MY_FLOAT   *p;
int       ntag = (Program->ncalls & 0x1);

#ifdef DEBUG
printf( "[%d] Starting isendPhase %d\n", __MYPROCID, phase );
#endif
n   = Program->n;
pgm = pgm_save = Program->pgm;
Program->pgm_last = pgm;

if (phase == 0 || phase == pgm->phase) 
    Program->pgm_cur = pgm;
else {
    pgm = Program->pgm_cur;
    n   -= (pgm - Program->pgm);
    while (n > 0 && pgm->phase != phase) {
	n--;
	NEXTLINE(pgm);
	}
    }

while (n--) {
    /* sync all recvs before this send with a lower phase */
    if (pgm->type & BLOCK_COMM_SYNC_BLOCK) {
	pgm_save = BCdo_recv( Program, pgm, pgm_save, in, pgm->phase );
	Program->pgm_last = pgm_save;
	}
    /* May be local source */
    if (IS_LOCAL_SRC(pgm)) {
	(*copy)( &pgm->src, &((pgm + pgm->processor)->src) );
	}
    else if (IS_SRC(pgm)) {
	p  = pgm->buffer;
        if (!pgm->inplace)
            (*out)( &pgm->src, p );
#ifdef FOO
	else
	    /* if inplace, p is the USERs data area, so this ammounts
	       to doing a getaddress. */
	    p += pctx;
#endif
        if (pgm->type & BLOCK_COMM_BUFFER) {
#ifdef intelnx
	    if (pgm->type & BLOCK_COMM_SYNC_NBR) {
		int b;
		/* Receive null message from partner */
#ifdef DEBUG
		printf( "sync sending type %d on %d\n", pgm->mtype+ntag, 
		        __MYPROCID );
#endif
		MPI_Recv(&b,sizeof(int),MPI_BYTE,MPI_ANY_SOURCE,pgm->mtype+ntag,MPI_COMM_WORLD,&_mpi_status);
		MPI_Irsend(p,pgm->act_len,MPI_BYTE,pgm->processor,pgm->mtype+ntag,MPI_COMM_WORLD,&(pgm->rc ));
		}
	    else 
#endif
		MPI_Isend(p,pgm->act_len,MPI_BYTE,pgm->processor,pgm->mtype+ntag,MPI_COMM_WORLD,&(pgm->rc ));
#ifdef DEBUG
            printf( "Sent type %d to %d from %d [size %d]\n", pgm->mtype+ntag,
                    pgm->processor, __MYPROCID, pgm->act_len );
#endif
            }
        }
    NEXTLINE(pgm);
    }
#ifdef DEBUG
printf( "[%d] Ending isendPhase\n", __MYPROCID );
#endif

return 0;
}

/* 
  Wait for all sends to complete, as well as any receives in the 
  last phase. 
 */
int BCiwaitPhase( Program, phase )
BCPGM *Program;
int   phase;
{
BCentry *pgm = Program->pgm;
int        n    = Program->n;
int       ntag = (Program->ncalls & 0x1);

#ifdef DEBUG
printf( "[%d] Starting BCiwait\n", __MYPROCID );
#endif

LOGEVENT(400);

/* Wait on sends---really should be Program->pgm_last_send */
while (n--) {
    if (IS_SRC(pgm)) {
        if ((pgm->type & BLOCK_COMM_BUFFER) && ! PIRecvIdNull(pgm->rc)) {
#ifdef DEBUG
	    printf( "Waiting for type %d on %d (isend)\n", 
		    pgm->mtype+ntag, __MYPROCID );
#endif
	    MPI_Wait(&(pgm->rc ),&_mpi_status);
#ifdef DEBUG
	    printf( "Finished isend type %d on %d\n",
		    pgm->mtype+ntag, __MYPROCID );
#endif
	    }
        }
    NEXTLINE(pgm);
    }
LOGEVENT(401);

BCdo_recv( Program, Program->pgm + Program->n, Program->pgm_last, 
	      Program->in, (pgm-1)->phase+1 );

#ifdef DEBUG
printf( "[%d] Starting BCiwaitPhase\n", __MYPROCID );
#endif

return 0;
}

#else
/*
   This file contains routines for the sun (and other uni-processors) that 
   handles all of the various optimizations
 */

int BCirecv( Program )
BCPGM *Program;
{
BCentry *pgm = Program->pgm;
int        n    = Program->n;
int        (*copy)() = Program->copy;

while (n--) {
    switch (GET_MAJOR_MODE(pgm)) {
        case BLOCK_COMM_LOCAL_SRC:
            (*copy)( &pgm->src, &((pgm + pgm->processor)->src) );
	    break;
	case BLOCK_COMM_LOCAL_DEST: 
	    break;
        default:
	    { char buf[256];
	    sprintf( buf, "Unknown Program Command %d\n", 
		     GET_MAJOR_MODE(pgm) );
	    (*Program->Error)( buf );
	     }
        }
    NEXTLINE(pgm);
    }
return 0;
}

#endif



