/*
This program is an implementation of block matrix multiplication
using the Pipe-Multiply-Roll Algorithm.
This program is designed to run on the PVM system and assumes a 
mesh-connected topology.
*/

#include <stdio.h>
#include <pvm3.h>
/*
 *PGPVM I #define USE_PGTRACE in the Makefile, if you wish to do this
 *in the program, place 
 *#define USE_PGTRACE here
 *
 *To turn off tracing, remove this macro, either from here, or the 
 *Makefile, wherever you placed it, and recompile
 */
#include <pgpvm.h>

#define ROOT 0                /* The root node */
#define SUN4 0                /* Architecture we'll use - set if needed */
#define PROGNAME "matmul"     /* Component name - change if needed */
#define BROADCAST (int) 99    /* message type for broadcasting */
#define DATA1 (int) 999       /* message type for sending A matrices */
#define DATA2 (int) 9999      /* message type for sending B matrices */

/********************** GLOBAL VARIABLES ***********************/
int numprocs;           /* Number of processors */
int dimension;          /* dimension of the mesh */
int size;               /* The matrices to multiplied are size x size */
int mysize;             /* size of local submatrix */
int me;                 /* Which processor I am */
int mytid;              /* My tid */
int *tids;              /* Array of tids */
int myrow, mycol;       /* My row and column in the mesh */
char *name;             /* name of object code */
int *C,*A,*B,*tempA;    /* submatrices used in multiplication */

/************************ FUNCTION DECLARATIONS ******************/
void Startup();
void MakeMatrices();
void Pipe();
void Multiply();
void Roll();
void PrintMatrix();

/************************ BEGIN MAIN ****************************/
main (argc, argv)
    int argc;
    char **argv;
{
    register int i;      /* loop counter */
    int usage;           /* CPU time used */

    name = PROGNAME;
    Startup(argc,argv);    /* Get data about the matrices and processes */
    MakeMatrices(mysize);  /* Declare storage for and initialize matrices */
    
    if (me == ROOT) usage=clock();

    /* Execute the Pipe-Multiply-Roll algorithm */
    for (i=0;i<dimension;i++) {
        Pipe(A,tempA,mysize,i);
        Multiply(tempA,B,C,mysize);
        Roll(B,mysize);
    }

    /* output the results */
    /*
    PrintMatrix(A,B,C,mysize);
    */


    /* Wait for all processes to finish */

    if (me == ROOT) {    
        usage = clock() - usage;
        printf("\n\nTime to multiply two %dx%d matrices using a %dx%d mesh: %lf seconds\n",size,size,dimension,dimension,(double) usage/1000000.0);
    }

    pvm_exit();  /* Leave PVM */
}


/*==================== Startup ================================
 * This function computes the matrix size and the dimension of the
 * cube and broadcasts that data to all nodes as well as 
 * performing error checking.
 */
void Startup(argc, argv)
    int argc;
    char **argv;
{
    register int i,j;       /* loop counters */

    mytid = pvm_mytid();
    if (pvm_parent() < 0) {     /* Root sends out initial values */
        if (argc != 3) {    /* Check program usage */
            fprintf(stderr,"Usage: %s <matrix size> <mesh dim>\n",
                name);
            pvm_exit();
            exit(1);
        }

        /* Get number of processes, matrix size and mesh dimension */
        size=atoi(argv[1]);
        dimension=atoi(argv[2]);
        numprocs=dimension*dimension;

        if (size%dimension) {
            fprintf(stderr,"Mesh dim must divide matrix size.\n");
            pvm_exit();
            exit(1);
        }

        /* Start up other node programs */
        tids = (int *) malloc(numprocs * sizeof(int));
        tids[0] = mytid;
        me = 0;
        if (pvm_spawn(name,(char **)0,0,0,numprocs-1, &tids[1]) < 0) {
            fprintf(stderr,"\nError spawning Halting.\n\n");
            for (j=1;j<numprocs;j++) pvm_kill(tids[i]);
            pvm_exit();
            exit(1);
        }


        /* Broadcast data to all nodes */
        pvm_initsend(0);
        pvm_pkint(&size,1,1);
        pvm_pkint(&dimension,1,1);
        pvm_pkint(&numprocs,1,1);
        pvm_pkint(tids,numprocs,1);
        pvm_mcast(&tids[1],numprocs-1,BROADCAST);

        /*great place to call pg_tids, right after multicast of tids*/
        /*Note that the previous mcast will not produce trace events*/
        /*and will not be visualized because pg_tids has yet to be  */
        /*called.  In this case, this is desired as we are not      */
        /*in visualized the initial sending of data but rather the  */
        /*rest of the application                                   */
        pg_tids(&tids[0],numprocs);
    }

    /* Otherwise, receive data from node 0 */
    else {
        pvm_recv(-1,BROADCAST);
        pvm_upkint(&size,1,1);
        pvm_upkint(&dimension,1,1);
        pvm_upkint(&numprocs,1,1);
        tids = (int *) malloc(numprocs * sizeof(int));
        pvm_upkint(tids,numprocs,1);

        /*great place to call pg_tids, right after receiving tids*/
        pg_tids(&tids[0],numprocs);
        for (j=0;j<numprocs;j++) 
            if (tids[j] == mytid) {
                me = j;
                break;
            }
    }

    /* compute my local info */
    mysize=size/dimension;
    numprocs=dimension*dimension;
    myrow=me/dimension;
    mycol=me%dimension;
printf("I am %d %d; mysz=%d, P=%d, myr=%d, myc=%d\n",me,tids[me],
mysize,numprocs,myrow,mycol);
fflush(stdout);
fflush(stdout);
}

/*================== MakeMatrices ====================
 * This function will declare storage for and initialize all
 * submatrices used by this process.
 */
void MakeMatrices(N)
    int N;
{
    register int i;  /* loop counter */
    int size,len;

    size=N*N;
    len=size*sizeof(int);

    /* Declare matrix storage in a one-dimensional array */
    C=(int *) malloc(len);
    B=(int *) malloc(len);
    A=(int *) malloc(len);
    tempA=(int *) malloc(len);

    /* Initialize the matrices */
    for (i=0;i<size;i++) {
        *(C+i)=0;
        *(A+i)=i+me*(myrow+1);
        *(B+i)=i-me*(myrow+1);
    }
}

/*===================== Pipe =============================
 * The function implements the Pipe part of the algorithm.
 * One node on each row of the mesh is desiganated to send
 * its A matrix to the other nodes on the row.
 */
void Pipe(matrix,temp,size,who)
    int *matrix,*temp;
    int size,who;
{
    register int i;    /* loop counter */
    int len;           /* number of ints to send in a message */

    len = size*size;

    /* If I am the broadcaster, send to all my neighbors.  Note
    also, that I send to myself.  The reason for this is that
    the temp matrix is used in the multiplication, and it would
    probably be faster to send the matrix as a message rather than
    copy it one element at a time. */
    if (mycol == (myrow+who)%dimension) {
        pvm_initsend(0);
        pvm_pkint(matrix,len,1);
        for (i=0;i<dimension;i++) 
            pvm_send(tids[myrow*dimension+i],DATA1);
    }

    /* Now receive the temp matrix */
    pvm_recv(-1,DATA1);
    pvm_upkint(temp,len,1);
}

/*======================= Multiply ============================
 * This function will multiply the matrices A and B, and store 
 * the results in C.
 */
void Multiply(A,B,C,size)
    int *A,*B,*C,size;
{
    register int i,j,k;    /* loop counters */
    register int temp;

    /* Multiply my subset of the matrices */
    for (i = 0; i < size; i ++)
        for (j = 0; j < size; j++) {
            temp = 0;
            for (k = 0; k < size; k++)
                temp += A[i*size+k] * B[k*size+j];
            C[i*size+j] += temp;
        }
}

/*=================== Roll ===========================
 * This function will rotate B matrices in each column
 * of the mesh upward.
 */
void Roll(B,size)
    int *B,size;
{
    int len;   /* number of bytes in a message to pass */
    int who;   /* who to send to */

    len = size*size;
    who = (myrow?(myrow-1)*dimension+mycol:(dimension-1)*dimension+mycol);
    pvm_initsend(0);
    pvm_pkint(B,len,1);
printf("I am %d %d, sending to %d %d\n", me, tids[me], who, tids[who]);
fflush(stdout);
    pvm_send(tids[who],DATA2);
    pvm_recv(-1,DATA2);
    pvm_upkint(B,len,1);
}

/*==================== PrintMatrix =====================
 * Print the three matrices A, B, and C in order of 
 * processor number.
 */
void PrintMatrix(A,B,C,size)
    int *A,*B,*C,size;
{
    register int j,k;    /* loop counters */
    int synch=0;         /* synchronization variable */

    if (me != ROOT) {
        pvm_recv(BROADCAST);
        pvm_upkint(&synch,1,1);
    }

    printf("\n\nP(%d,%d):\n\n",myrow,mycol);
    for (j=0;j<size;j++)
        for (k=0;k<size;k++) printf("A[%d,%d] = %d\n",j,k,A[j*size+k]);
    for (j=0;j<size;j++)
        for (k=0;k<size;k++) printf("B[%d,%d] = %d\n",j,k,B[j*size+k]);
    for (j=0;j<size;j++)
        for (k=0;k<size;k++) printf("C[%d,%d] = %d\n",j,k,C[j*size+k]);

    if (me != numprocs-1) {
        pvm_initsend(0);
        pvm_pkint(&synch,1,1);
        pvm_send(tids[me+1],BROADCAST);
    }
}

