c
c   This file contains a very simple program for testing various combinations
c   of the 3-d grid operations.  The idea is to initialize each segment with
c   the values of the global indices, with ghost points being negative.
c   After a copy, the values are tested.  Note that because the ghost-points
c   exist at the periodic physical boundaries and not at the non-periodic
c   physical boundaries, we can test both cases.
c
c     int mdim,          /* size of array in this dimension */
c     int is_parallel,   /* true if the array is parallelized in
c                           this dimension */
c     int start,         /* starting index for local piece */
c     int end,           /* ending index for local piece */
c     int loc            /* location in this dimension of the 
c                           processor */
c     int ndim           /* number of processors in this dimension,
c 	                   -1 if unspecified */
c     int sg             /* start and end ghost limits */
c     int eg             /* start and end ghost limits */
c     int nsg            /* start and end ghost limits for neighbor */
c     int neg
c
       integer function worker()
       integer iii, k, mx, my, mz, nd
       parameter(nd=3, NBYTES=8)
       integer maxnx, maxny
       parameter(maxnx = 258, maxny = 258, maxnz=16 )
c      Fields in the "SZ" array 
       include '../meshf.h'
       include 'mpif.h'
       integer pgm, szar(0:9,0:nd-1), iper(0:nd-1)
       integer nx, ny, nz
       integer sx,sxgp,ex,exgp,sy,sygp,ey,eygp,sz,szgp,ez,ezgp
       double precision a(maxnx*maxny*maxnz)
       integer myid, nproc, ierr
       integer err
       logical printar, printfinal
c       integer argc, argv
c
c Define the domain as a 2-d mesh of size mx x my, to be subdivided 
c in both dimensions
c
        mx    = 128
        my    = 128
        mz    = 16
c       printar = .false.
c
c       mx = 20
c       my = 14
c       mz = 16
c       printar = .true.
        printar = .false.
        printfinal = .false. 
c       call syargload( argc, argv )
c      if (syarghasname( argc, argv, 1,"-periodic" )) print *, 'found'
c
c
c      ? a loop over boundary cases
       do 1000 iii=0, 7
c      
       szar(szmdim,0)       = mx
       szar(szisparallel,0) =  1
       szar(szndim,0)       = -1
       szar(szmdim,1)       = my
       szar(szisparallel,1) =  1
       szar(szndim,1)       = -1
       szar(szmdim,2)       = mz
       szar(szisparallel,2) =  1
       szar(szndim,2)       = -1
       iper(0)            = mod( iii, 2 )
       iper(1)            = mod( iii/2, 2 )
       iper(2)            = mod( iii/4, 2 )
c       call syarggetintvec( argc, argv, 1, "-iper", 3, iper )
       call MPI_COMM_RANK(MPI_COMM_WORLD,myid,ierr)
       call MPI_COMM_SIZE(MPI_COMM_WORLD,nproc,ierr)
       if (myid .eq. 0) print *, 'Iper = ', iper
c
c      Build the communications pattern by:
c 
c      1. Compute the size of the ghost-points from the computational 
c         stencil
c      2. Compute the local part of the array that this processor 
c         is responsible for
c      3. Build the communication pattern and "compile" it
c
c      Setup the ghost points from the stencil 
c      Note that this assumes doublely periodic boundary conditions.
       call BCFindGhostFromStencil( nd, szar, 0, 0, boxstencil )

       call BCGlobalToLocalArray( nd, szar, nproc, myid )
       call BCSetGhostWidths( nd, szar, iper )
c
c       do 10 i=0, nproc
c       if (pigtoken(0,i).ne.0) then
c          print *, 'Processor ', i
c       call BCPrintArrayPartVerbose( 0, nd, szar )
c       endif
c 10    continue
       pgm = BCBuildArrayPGM( nd, szar, nproc, myid, NBYTES )
       call BCUseOrderedSend( pgm )
       call BCArrayCompile( pgm, 0 )
c       call bcprint_pgms( pgm, 0 )
c
c      Compute the parameters of our part of the domain
c
       sx   = szar(szstart,0) + 1
       ex   = szar(szend,0) + 1
       sxgp = szar(szsg,0)
       exgp = szar(szeg,0)
       nx   = ex - sx + 1 + sxgp + exgp
       sy   = szar(szstart,1) + 1
       ey   = szar(szend,1) + 1
       sygp = szar(szsg,1)
       eygp = szar(szeg,1)
       ny   = ey - sy + 1 + sygp + eygp
       sz   = szar(szstart,2) + 1
       ez   = szar(szend,2) + 1
       szgp = szar(szsg,2)
       ezgp = szar(szeg,2)
       nz   = ez - sz + 1 + szgp + ezgp
c
c      Sanity check on subproblem size
       if (ny * nx * nz .gt. maxnx * maxny * maxnz) then
	   print *, 'Selected domain too large '
	   worker = 1
	   return
       endif
c
c      initialize a 
       call InitDomain( a, mx,my,mz,sx,sxgp,ex,exgp,sy,sygp,ey,eygp,
     *                 sz,szgp,ez,ezgp)
       if (printar) then
       if (myid .eq. 0) print *, 'Initial arrays:'
           do k=1,nz
           print *, 'plane ', k
          call dispall( a(1+(k-1)*(nx*ny)), 
     *                  sx, sxgp, ex, exgp, sy, sygp, ey, eygp, 
     *                  mx, my, 16 )
          enddo
       endif
cc
       call BCexec( pgm, a, a )
c
c      Check results
       call CheckValues( a, mx,my,mz,sx,sxgp,ex,exgp,sy,sygp,ey,eygp, 
     *                   sz,szgp,ez,ezgp,iper, err )
       call MPI_ALLREDUCE(err,i,1,MPI_INTEGER,MPI_SUM,
     &                    MPI_COMM_WORLD,ierr)
       err = i
       if (printfinal .or. (err .gt. 0 .and. printar)) then 
          do k=1,nz
           print *, 'plane ', k
          call dispall( a(1+(k-1)*(nx*ny)), 
     *                  sx, sxgp, ex, exgp, sy, sygp, ey, eygp, 
     *                   mx, my, 16 )
          enddo
       endif
c
       call BCfree( pgm )
c
 1000  continue
       worker = 0
       return
       end
c
       subroutine InitDomain( a, mx,my,mz,
     +                           sx,sxgp,ex,exgp,sy,sygp,ey,eygp,
     +                           sz,szgp,ez,ezgp)
       integer      mx,my,sx,sxgp,ex,exgp,sy,sygp,ey,eygp
       integer      mz,sz,szgp,ez,ezgp
       double precision a(sx-sxgp:ex+exgp,sy-sygp:ey+eygp,
     +                    sz-szgp:ez+ezgp)
       integer i, j, k
c
c     Set everything to - global index
c
      do k=sz-szgp,ez+ezgp
        do j = sy-sygp,ey+eygp
           do i = sx-sxgp,ex+exgp
             a(i,j,k) = -( i + mx * ((j-1) + my * (k-1)))
          enddo
        enddo
       enddo
c
c ---  initialize the interior as the local indices
c
       do k = sz, ez
         do j = sy, ey
           do i = sx, ex
             a(i,j,k) = i + mx * ((j-1) + my * (k-1))
          enddo
        enddo
       enddo
       return
       end
c
       subroutine CheckValues( a, mx,my,mz,
     +                      sx,sxgp,ex,exgp,sy,sygp,ey,eygp,
     +                      sz,szgp,ez,ezgp,iper, err)
       integer      mx,my,sx,sxgp,ex,exgp,sy,sygp,ey,eygp,iper(0:2)
       integer      mz,sz,szgp,ez,ezgp
       double precision a(sx-sxgp:ex+exgp,sy-sygp:ey+eygp,
     +                    sz-szgp:ez+ezgp)
       integer err, tval, i, j, k, cnt
       integer myid, ierr
       include 'mpif.h'
c
c     Set everything should now be global index
c
       call MPI_COMM_RANK(MPI_COMM_WORLD,myid,ierr)
       err = 0
       cnt = 0
       do k = sz-szgp, ez+ezgp
         do j = sy-sygp,ey+eygp
            do i = sx-sxgp,ex+exgp
               tval = i + mx * ((j-1) + my * (k-1))
c This version should be correct for periodic boundaries
             tval = 1+mod(mx+i-1,mx) + mx * ( mod(my+j-1,my) + 
     *              my * (mod(mz+k-1,mz)))
             if (abs(a(i,j,k) - tval) .gt. 1.0e-10) then
                 err = err + 1
                 if (err .gt. 10) return 
                 write (*,1) myid, i, j, k, a(i,j,k), tval
 1               format( ' [', i2, '] (', i3, ',', i5 ,',', i5, 
     *                   ') = ', f6.0, ', should be ', i4 )
             endif
             cnt = cnt + 1
          enddo
         enddo
       enddo
       call MPI_ALLREDUCE(cnt,k,1,MPI_INTEGER,MPI_SUM,
     &                    MPI_COMM_WORLD,ierr)
       if (myid .eq. 0) then
          print *, 'Tested ', k, ' values without error'
       endif
       return 
       end
