#include "blkcm/bc.h"
#include "blkcm/mesh.h"
#include <stdio.h>
#include <math.h>

#define MALLOC malloc
#define NUMNODES __NUMNODES
#define MYPROCID __MYPROCID

main( argc, argv )
int  argc;
char **argv;
{
    int main2();

    PICall(main2,argc,argv);
    exit(0);
}

int main2( argc, argv )
int  argc;
char **argv;
{
    int dimen = 2;                        /* Dimension of the domain */
    int nx=4, ny=4, lnx, lny;             /* Global and local sizes */
    int nx1, nx2, ny1, ny2;               /* Local partition boundaries */
    int i, j, ind, x_eff, y_eff, err=0;   /* Integer utility variables */
    double *x;                            /* Variable array */
    double tmp;                           /* Double prec. utility variable */
    BCArrayPart sz[2];
    BCPGM *pgm;

    /* Set up sz array for BlockComm. */
    sz[0].mdim        = nx;
    sz[0].is_parallel = 1;
    sz[0].ndim        = -1;
    sz[1].mdim        = ny;
    sz[1].is_parallel = 1;
    sz[1].ndim        = -1;
    BCFindGhostFromStencil(dimen,sz,0,0,11);
    BCGlobalToLocalArray(dimen,sz,NUMNODES,MYPROCID);

    /* Build program */
    pgm = BCBuildArrayPGM(dimen,sz,NUMNODES,MYPROCID,sizeof(double));
    /* BCUseOrderedSend( pgm ); */
    BCArrayCompile(pgm,0);

    /* Help debug the code */
    /* BCprint_pgms( pgm, stdout ); */

    /* Get partition boundaries in x and y directions. */
    nx1 = sz[0].start;
    nx2 = sz[0].end;
    ny1 = sz[1].start;
    ny2 = sz[1].end;
    printf("Proc %0d: nx1=%0d, nx2=%0d, ny1=%0d, ny2=%0d\n",MYPROCID,
           nx1,nx2,ny1,ny2);
    PIgsync(PIAllProcs);

    /* Allocate memory for variable array. */
    lnx = (nx2-nx1+3);
    lny = (ny2-ny1+3);
    x = (double *) MALLOC(lnx*lny*sizeof(double));

    /*** Begin communication test. ***/
    if (MYPROCID == 0)
        printf("\nTesting communication...\n");
    PIgsync(PIAllProcs);

    /*
     * First we assign all non-ghost sites a unique global index
     * (the ghost sites are set to zero).
     */
    for (j = 0; j < lny; j++)
        for (i = 0; i < lnx; i++) {
            ind = j*lnx + i;
            x_eff = ((nx1-1+i+nx) % nx) + 1;
            y_eff = ((ny1-1+j+ny) % ny) + 1;
            tmp = (double) ((y_eff-1)*nx + x_eff);
            if ((i==0) || (j==0) || (i==lnx-1) || (j==lny-1))
                x[ind] = 0.0;
            else
                x[ind] = tmp;
            if (MYPROCID == 0) {
                printf(" %6.2f ",x[ind]);
                if (i == (lnx-1))
                    printf("\n");
            }
        }
    if (MYPROCID == 0)
        printf("\n");
    PIgsync(PIAllProcs);

    /* Next we set the ghost sites... */
    BCexec(pgm,x,x);

    PIgsync(PIAllProcs);
    for (j = 0; j < lny; j++)
        for (i = 0; i < lnx; i++) {
            ind = j*lnx + i;
            if (MYPROCID == 0) {
                printf(" %6.2f ",x[ind]);
                if (i == (lnx-1))
                    printf("\n");
            }
        }
    if (MYPROCID == 0)
        printf("\n");
    PIgsync(PIAllProcs);

    /*
     * Now we generate an effective global index for all sites, including
     * the ghost sites, and check for errors.
     */
    for (j = 0; j < lny; j++)
        for (i = 0; i < lnx; i++) {
            ind = j*lnx + i;
            x_eff = ((nx1-1+i+nx) % nx) + 1;
            y_eff = ((ny1-1+j+ny) % ny) + 1;
            tmp = (double) ((y_eff-1)*nx + x_eff);
            if (fabs(x[ind] - tmp) > 0.000001) {
                printf("Error on processor %0d: ",MYPROCID);
                printf("(x_global=%0d, y_global=%0d)  ",nx1+i,ny1+j);
                printf("(%0.4f != %0.4f)\n",x[ind],tmp);
                err = 1;
            }
        }

    if (err) {
        if (MYPROCID == 0) {
            printf("Communication test failed.\n");
            printf("Exiting...\n");
        }
        exit(0);
    }
    if (MYPROCID == 0)
        printf("Communication OK...\n\n");

    PIgsync(PIAllProcs);
    printf("Proc %0d: Normal exit from main2...\n",MYPROCID);
}

