



#ifndef lint
static char vcid[] = "$Id: buffer.c,v 1.1.1.1 1998/08/27 19:16:26 gropp Exp $";
#endif

/*
   This file contains routines to provide for automatic buffering
   of messages between processors pairs.

   Algorithm:
   Within each phase, find all of the communications to the same processor.
   Order these so that the SEND happens last (after the buffer is filled)
   and the RECV happens first (so that the buffer is posted first).
   This grouping is managed by comm_group_block.

   In order to use the same code for buffered and unbuffered operations,
   it is necessary to set the "BUFFER" bit on all communication (only 
   operations with the BUFFER bit set are actually sent).

   Question: Should that bit be set by the setup routine, and only CLEARED
   by the buffer routine?  No, note that the buffer routines ALSO allocate
   the buffer space.
 */
#include <stdio.h>
#include <stdio.h>
#include "blkcm/bcp.h"
#include "blkcm/bc.h"

#ifdef DISTRIBUTED_MEMORY
static void BCgroup_block();

/*
   This routine runs through the program and changes sends and recvs to
   the buffered version.  Rules:
   SEND_BUFFER is the LAST SEND to a particular processor.
   RECV_BUFFER is the FIRST RECV from a particular processor.

   Algorithm: Group all sends/recvs that go to the same processor with
   the same phase together.  This is done by SORT before this routine
   is called.  Then:
        Compute the buffer size 
	Allocate the buffer and propogate it through the sends/receives
	Set the buffer bit at the beginning (RECV) or end (SEND)
	Note that the buffer location must be set so that the processor
	has the first address in the buffer
 */
int BCset_buffered( Program )
BCPGM *Program;
{
BCentry    *pgm = Program->pgm;
int           n    = Program->n;

while (n) {
    switch (GET_MAJOR_MODE(pgm)) {
        case BLOCK_COMM_SRC:
        case BLOCK_COMM_DEST:
	    BCgroup_block( Program, &pgm, &n, GET_MAJOR_MODE(pgm) );
	    break;
	/* Skip local src/dest */    
        }
    NEXTLINE(pgm); n--;
    }
return 0;
}

/*
   Return true if the slab represents contiguous storage, false otherwise
 */
static int BCis_contig( s )
slab *s;
{
/* If the strides are not unit, return false */
if (s->s1 != 1) return 0;

/* If multi-dimensional, the blocks must be contiguous */
if (s->n2 != 1)
    if (s->n1 != s->inc1) return 0;
if (s->n3 != 1)
    if (s->n2 != s->inc2) return 0;
if (s->n4 != 1)
    if (s->n3 != s->inc3) return 0;
if (s->n5 != 1)
    if (s->n4 != s->inc4) return 0;

return 1;
}

/* 
   Turn on the buffer bit in all sends/receives; allocate space.
 */
BCset_unbuffered( Program )
BCPGM *Program;
{
BCentry    *pgm = Program->pgm;
int           n    = Program->n, buflen;
slab          *s;

TRPUSH(BCTRID+1);
while (n) {
    switch (GET_MAJOR_MODE(pgm)) {
        case BLOCK_COMM_SRC:
	case BLOCK_COMM_DEST:
	     s = &pgm->src;
	     pgm->type  |= BLOCK_COMM_BUFFER;
             buflen     = BCmsg_len( Program, s );
	     pgm->act_len= buflen;
	     if (pgm->inplace && BCis_contig( s ))
		 /* don't do this here; do this in the set address */
		 pgm->buffer = s->p;
	     else {
	     	/* Use the proper routine for allocating buffers */
#ifdef ALLOC_BUFFERS
	     	if (GET_MAJOR_MODE(pgm) == BLOCK_COMM_DEST)
	     	    MSGALLOCRECV(pgm->buffer,buflen,double);
	     	else
	     	    MSGALLOCSEND(pgm->buffer,buflen,double);
#else
		 pgm->buffer   = (double *)malloc((unsigned)(buflen ));
#endif
		 if (!pgm->buffer) {
		     (*Program->Error)( "Out of space allocating buffers\n" );
		     return 1;
		     }
		 pgm->inplace  = 0;
		 }
	     pgm->mtype = pgm->id;
             break;
	     }
    NEXTLINE(pgm); n--;
    }
TRPOP;

return 0;
}

/* 
   Group and block an operation.
   One last problem.  The message type is set based on the id of the send
   or receive.  Since we use the type at each end, they won't match up.
   In order to fix this, we set the base types here; they'll be modified 
   by set_type.
 */
static void BCgroup_block( Program, ppgm, pn, op )
BCPGM   *Program;
BCentry **ppgm;
int     *pn, op;
{
BCentry *pgm = *ppgm, *plast;
BCentry *pgm_first = *ppgm;
int        size, nop, issend = (op == BLOCK_COMM_SRC), n = *pn;
char       *p;

TRPUSH(BCTRID+2);
pgm->act_len = BCmsg_len( Program, &pgm->src );
size         = pgm->act_len;
nop          = 1;
NEXTLINE(pgm); n--;
/* Find the end of the ops of this type; get bufsize of each */
while (n > 0) {
    if (!(pgm->type & op)) break;
    if (pgm_first->processor != pgm->processor ||
	pgm_first->phase     != pgm->phase) break;
    nop++;
    pgm->act_len = BCmsg_len( Program, &pgm->src );
    size         += pgm->act_len;
    NEXTLINE(pgm); n--;
    }
pgm--; n++;
/* pgm is the LAST op in the block */
plast = pgm;
if (nop == 1 && pgm->inplace && BCis_contig( &pgm->src )) {
    pgm->buffer = pgm->src.p;
    pgm->type  |= BLOCK_COMM_BUFFER;
    pgm->mtype  = pgm->id;
    }
else {
    if (size <= 0) {
	if (size < 0) {
	    fprintf( stderr, "Error in BCgroup_block: buffer size (%d) < 0\n",
		     size );
	    p = 0;
	    /* return ??? */
	    }
	else {
	    /* Unexpected? */
	    p = 0;
	    }
	}
    else {
	/* Positive sized buffers --- allocate and set */
#ifdef ALLOC_BUFFERS
	if (GET_MAJOR_MODE(pgm) == BLOCK_COMM_DEST) {
	    MSGALLOCRECV(p,size,char);
	    }
	else {
	    MSGALLOCSEND(p,size,char);
	    }
#else
	/* fprintf( stderr, "Allocating %d bytes for op = %d\n", size, op ); */
	p            = (char *)malloc((unsigned)(size ));
#endif
	if (!p)exit(1);;
	}
    /* set the buffers.  Note that they must match in order.
       We do this by first allocating them.  Then, the send operation
       is EXCHANGED with the last send to put it in the correct
       position */
    pgm          = pgm_first;
    pgm->type    |= BLOCK_COMM_BUFFER;
    pgm->buffer  = (double *)p;
    p            += pgm->act_len;
    pgm->act_len = size;
    pgm->mtype   = pgm->id;
    NEXTLINE(pgm);
    while (pgm <= plast) {
	pgm->buffer  = (double *)p;
	p            += pgm->act_len;
	pgm->inplace = 0;
	NEXTLINE(pgm);
	}
    if (issend && pgm_first != plast) {
	/* Swap first, last */
	BCentry temp;
	temp       = *pgm_first;
	*pgm_first = *plast;
	*plast     = temp;
#ifdef FOO
	/* buffer should be called before pairsync */
	/* Finally, if this was a SYNC_BLOCK send, we need to move
	   the SYNC. */
	if ((plast->type & BLOCK_COMM_SYNC_BLOCK) ||
	    (pgm_first->type & BLOCK_COMM_SYNC_BLOCK)) {
	    plast->type     ^= BLOCK_COMM_SYNC_BLOCK;
	    pgm_first->type ^= BLOCK_COMM_SYNC_BLOCK;
	    }
#endif	
	}
    }
/* Return the NEXT command and number */
*ppgm = plast;
*pn   = n;
TRPOP;
}

int BCdobuffer( Program, option )
BCPGM *Program;
int   option;
{
if (option & BCOPTION_BUFFER)
    BCset_buffered( Program );
else 
    BCset_unbuffered( Program );

return 0;
}
#endif

