// Jacobi
// Author: William W. Y. Liang
// Usage: jacobi <row number> <column number> <loop number> <init value>
// Date: 96/07/16

#include <iostream.h>
#include <stdlib.h>
#include <stdio.h>
#include "adsm.h"
#include "adsmutil.h"
#include "adsmtime.h"

#define INITVAL 100

void master();
void slave();

char *prog;
int nhost,seqno;
AdsmBarrier B0("B0");
AdsmBarrier B1("B1");

int rownum;
int colnum;
int loopnum;
int initval;

main(int argc,char *argv[]) {
  char *prog=execname(argv[0]);
  seqno=get_seqno();
  nhost=get_nhost(); 

  if (seqno==0) {
    if (argc<5) {
      cerr<<"Usage: "<<prog
          <<" <row number> <column number> <loop number> <init value>"<<endl;
      exit(0);
    }

    int param[4];
    cout<<nhost<<" hosts detected"<<endl
        <<"row number = "   <<(rownum= param[0]=atoi(argv[1]))<<endl
        <<"column number = "<<(colnum= param[1]=atoi(argv[2]))<<endl
        <<"loop number = "  <<(loopnum=param[2]=atoi(argv[3]))<<endl
        <<"initial value = "<<(initval=param[3]=atoi(argv[4]))<<endl;

    if (rownum<nhost) {
      cerr<<"Error: row number must >= host number"<<endl;
      return -1;
    }

    adsm_malloc("param",sizeof(int)*4,param);

    adsm_spawn(prog,nhost);

    master();
  } else {
    int *param=(int*)adsm_malloc("param",sizeof(int)*4);
    adsm_refresh(param);

    rownum=param[0];
    colnum=param[1];
    loopnum=param[2];
    initval=param[3];

    slave();
  }
}

void master() {
  int i,j;

  Timing timing("T");
  B0.barrier(nhost+1);
  timing.start();
  B0.barrier(nhost+1);
  timing.stop();
  B0.barrier(nhost+1);

  if (rownum>10 || colnum>10) return;

  float *grid[rownum];
  adsm_malloc_array("grid",sizeof(float)*(colnum+2),0,rownum,grid);

  adsm_refresh(AdsmBulkBegin);
  for (i=0; i<rownum; i++) adsm_refresh(grid[i]);
  adsm_refresh(AdsmBulkEnd);

  cout<<"Result:"<<endl;
  for (i=0; i<rownum; i++) {
    for (j=1; j<=colnum; j++) cout<<grid[i][j]<<' ';
    cout<<endl;
  }
}

void slave() {
  int i,j,k;

  // task partition
  int length=(rownum+nhost-1)/nhost;
  int begin=length*(seqno-1);
  if (begin+length>rownum) length=rownum-begin;

  // initialization vector

  float initrow1[colnum+2],initrow2[colnum+2];
  for (i=1; i<=colnum; i++) initrow1[i]=initval;
  initrow1[0]=initrow1[colnum+1]=0;
  for (i=0; i<colnum+2; i++) initrow2[i]=0;

  // data partition

  // Each processor maps a block of rows plus 2 rows of neighbore blocks
  // into grid[length+2]. Note that index of grid in DSM is from -1 to
  // rownum, in which -1'th and rownum'th rows are boundary rows.
  // They correspond to grid[0] and grid[length+1] respectively.

  float *grid[length+2];
  char name[20];
  adsm_malloc(AdsmBulkBegin);
  for (i=begin-1,j=0; j<length+2; j++,i++) {
    sprintf(name,"grid[%d]",i);
    grid[j]=(float*)adsm_malloc(name,sizeof(float)*(colnum+2),
                                (i==-1||i==rownum)?initrow2:initrow1,
                                AdsmDataLocal|AdsmDataCache|AdsmDataUpdate);
  }
  adsm_malloc(AdsmBulkEnd);

  float tmpgrid[length+2][colnum+2];

  B0.barrier(nhost+1);

  // computation

  for (k=0; k<loopnum; k++) {
    adsm_refresh(AdsmBulkBegin);
    adsm_refresh(grid[0]); // lower bound of predecessor block
    adsm_refresh(grid[length+1]); // upper bound of successor block
    adsm_refresh(AdsmBulkEnd);

    for (i=1; i<=length; i++)
      for (j=1; j<=colnum; j++)
        tmpgrid[i][j]=(grid[i-1][j]+grid[i+1][j]+grid[i][j-1]+grid[i][j+1])/4;
 
    B1.barrier(nhost);

    for (i=1; i<=length; i++)
      for (j=1; j<=colnum; j++)
        grid[i][j]=tmpgrid[i][j];

    adsm_flush(grid[1]); // upper bound, for predecessor block
    adsm_flush(grid[length]); // lower bound, for successor block

    B1.barrier(nhost);
  }

  B0.barrier(nhost+1);

  for (i=1; i<=length; i++)
    adsm_flush(grid[i]);

  B0.barrier(nhost+1);
}
