*#################################################################
*
*      ARRAY TOOLS LIBRARY
*
*  This library is used in conjunction with some BLockComm
*  routines to manipulate array information, and to define
*  an appropriate decomposition.
*
*  The idea is to store all the information in two structures
*  "sz" and "proc" (see meshf.h for a description) that can
*  be passed to the rest of the code in a transparent way.
*  Information is stored or retrieved from the structures
*  by calling routines, but one can always manipulate the
*  structures directly.
*
*  Written by: Andrea Malagoli (malagoli@liturchi.uchicago.edu)
*  Date: 22 May 1995.
*
*#################################################################
*     Decomposition utility routines
*     To be placed in a separate library.
*.................................................................

*-----------------------------------------------------------

      subroutine AL_set_id(proc)
         include 'meshf.h'
         integer proc(p_0:p_1)
         integer PImytid, PInumtids, ierr
c        proc(p_id) = PImytid()
         call MPI_COMM_RANK(MPI_COMM_WORLD,proc(p_id),ierr)
      end

      subroutine AL_set_nproc(proc)
         include 'meshf.h'
         integer proc(p_0:p_1)
         integer PImytid, PInumtids
c        proc(p_np) = PInumtids()
         call MPI_COMM_SIZE(MPI_COMM_WORLD,proc(p_np),ierr)
      end

*/*F
*-----------------------------------------------------------
*
*.... GET_PGM
*.... Get a unique pointer to a given data decomposition ....	
*.... Eventually, we will want to pass an option variable
*.... to specify the communications options.
*.... Must call:
*..       call  set_id(proc)
*..       call  set_nproc(proc)
*F*/

      subroutine AL_get_pgm(pgm,sz,ndim,proc,WWORD)
         include 'meshf.h'
	 integer pgm,proc(p_0:p_1),WWORD
	 integer sz(*)
         myid  = proc(p_id)
         nproc = proc(p_np)
         pgm = BCBuildArrayPGM(ndim,sz,nproc,myid,WWORD)
         call  BCUseOrderedSend(pgm)
         call  BCArrayCompile(pgm,0)
	 return
      end

*/*F
*-----------------------------------------------------------
*... FREE_PGM
*F*/

      subroutine AL_free_pgm(pgm)
	 integer pgm
         call BCfree(pgm)
         return
      end

*-----------------------------------------------------------

*... SET_GLOBAL_DIM
      subroutine AL_set_global_dim(gdim,sz,ndim)
         include 'meshf.h'
         integer ndim
         integer sz(sz_0:sz_1,ndim), gdim(ndim)
	   Do i = 1, ndim
            sz(sz_mdim,i) = gdim(i)
	   End Do
         return
      end

*... GET_GLOBAL_DIM
      subroutine AL_get_global_dim(gdim,sz,ndim)
         include 'meshf.h'
         integer ndim
         integer sz(sz_0:sz_1,ndim), gdim(ndim)
	   Do i = 1, ndim
            gdim(i) = sz(sz_mdim,i)
	   End Do
         return
      end

*-----------------------------------------------------------

*... GET_ARRAY_BOUNDS ....
      subroutine AL_get_array_bounds(bx,ex,bxgp,exgp,dim,sz)
         include "meshf.h"
         integer idim
         integer sz(sz_0:sz_1,1)
	 character*1 dim

         if( dim .eq. 'x' ) idim = 1
         if( dim .eq. 'y' ) idim = 2
         if( dim .eq. 'z' ) idim = 3

         bx   = sz(sz_start,idim)+1
         ex   = sz(sz_end  ,idim)+1
         bxgp = sz(sz_sg   ,idim)
         exgp = sz(sz_eg   ,idim)

         return
       end

*---------------------------------------------------
*/*F
*... GET_LOCAL_DIM
*F*/

       subroutine AL_get_local_dim(ldim,sz,ndim)
       include 'meshf.h'
       integer ldim(ndim), sz(sz_0:sz_1,ndim)
	Do i = 1, ndim
          ldim(i) = sz(sz_end,i)-sz(sz_start,i)+1
	End Do
       end

*/*F
*---------------------------------------------------
*... GET_ARRAY_TSIZE
*
*  Get the size of the array on the node
*  INCLUDING the ghost points.
*
*F*/

       subroutine AL_get_array_tsize(size,sz,ndim)
       include 'meshf.h'
       integer size, ndim
       integer sz(sz_0:sz_1,ndim)

       size = 1

       Do i = 1, ndim
	bx = sz(sz_start,i)
	ex = sz(sz_end,i)
	bxgp = sz(sz_sg,i)
	exgp = sz(sz_eg,i)
	size = size*(ex+exgp-bx+bxgp+1)
       End Do

       end

*/*F
*---------------------------------------------------
*... GET_ARRAY_ISIZE
*
*  Get the size of the array on the node
*  EXCLUDING the ghost points.
*
*F*/

       subroutine AL_get_array_isize(size,sz,ndim)
       include 'meshf.h'
       integer size, ndim
       integer sz(sz_0:sz_1,ndim)

       size = 1

       Do i = 1, ndim
	bx = 0
	ex = sz(sz_mdim,i)
	bxgp = sz(sz_sg,i)
	exgp = sz(sz_eg,i)
	size = size*(ex+exgp+bxgp)
       End Do

       end

*/*F
*-----------------------------------------------------------
*..... SUBROUTINE AL_COPY_ARRAY
*..... Copy the contents of an array onto another
*F*/

      subroutine AL_copy_array(sz,ndim,from,to)
      include 'meshf.h'
      integer sz(sz_0:sz_1,ndim)
      dimension from(1), to(1)
      integer off(3), beg(3), end(3)
      static integer ind

      if( ndim .eq. 3 ) then
        call AL_get_array_bounds(bx,ex,bxgp,exgp,"x",sz)
        call AL_get_array_bounds(by,ey,bygp,eygp,"y",sz)
        call AL_get_array_bounds(bz,ez,bzgp,ezgp,"z",sz)
        call AL_cp_arr_3d(to,from,
     &       bx,ex,bxgp,exgp,by,ey,bygp,eygp,bz,ez,bzgp,ezgp)
      endif

      return
      end
*-----------------------------------------------------------
*>>>>>>>>>>>>>>>>>> COPY ARRAY ROUTINES >>>>>>>>>>>>>>>>>>>>>>>*
*-----------------------------------------------------------

      subroutine AL_cp_arr_3d(to,from,
     &       bx,ex,bxgp,exgp,by,ey,bygp,eygp,bz,ez,bzgp,ezgp)

      include 'meshf.h'
      dimension to(bx-bxgp:ex+exgp,by-bygp:ey+eygp,bz-bzgp:ez+ezgp)
      dimension from(bx-bxgp:ex+exgp,by-bygp:ey+eygp,bz-bzgp:ez+ezgp)

          Do k = bz, ez
          Do j = by, ey
          Do i = bx, ex
               to(i,j,k) = from(i,j,k)
          End Do
          End Do
          End Do
      return
      end

      subroutine AL_from_array_to_rast3d(rast,arr,arrmax,arrmin,
     &        bx,ex,bxgp,exgp, by,ey,bygp,eygp, bz,ez,bzgp,ezgp ) 
      include 'meshf.h'
      parameter( BYTE = 255 )
      dimension arr(bx-bxgp:ex+exgp,by-bygp:ey+eygp,bz-bzgp:ez+ezgp)
      character*1 rast(bx:ex,by:ey,bz:ez), c

      scale = BYTE/(arrmax-arrmin)

      Do k = bz, ez
      Do j = by, ey
      Do i = bx, ex
          aval = arr(i,j,k)
          aval = min(aval,arrmax)
          aval = max(aval,arrmin)
          rast(i,j,k) = char(int((aval-arrmin)*scale))
      End Do
      End Do
      End Do

      return
      end

*/*F
*-----------------------------------------------------------
*..... SUBROUTINE AL_ZERO_ARRAY
*..... Zero the contents of an array
*F*/

      subroutine AL_zero_array(a,n0,n1)
      include 'meshf.h'
      dimension a(1)
      integer n0, n1

      Do n = n0, n1
        a(n) = 0.d0
      End Do

      return
      end
*-----------------------------------------------------------

*-----------------------------------------------------------
*>>>>>>>>>>>>>>>>>> MAX MIN ROUTINES >>>>>>>>>>>>>>>>>>>>>>>*
*-----------------------------------------------------------
*/*F
*... function AL_GET_MAXMIN
*... Get MAX and MIN of an array
*F*/

      subroutine AL_get_maxmin(xmax,xmin,x,sz,ndim,procset)
      include 'meshf.h'
      integer sz(sz_0:sz_1,ndim),procset,ierr
      dimension x(*)
      real*8 mx(1), w(1)

      if( ndim .eq. 3 ) then
        call AL_get_array_bounds(bx,ex,bxgp,exgp,"x",sz)
        call AL_get_array_bounds(by,ey,bygp,eygp,"y",sz)
        call AL_get_array_bounds(bz,ez,bzgp,ezgp,"z",sz)

        call AL_get_local_maxmin_3d(xmax,xmin,x,
     &   bx,ex,bxgp,exgp,by,ey,bygp,eygp,bz,ez,bzgp,ezgp)

      endif

cccc.... The 2D case must be added here ....

      mx(1) = xmin
c     call PIgdmin(mx(1),1,w(1),procset)
      call MPI_ALLREDUCE(mx(1),w(1),1,MESH_PRECISION,MPI_MIN,
     & MPI_COMM_WORLD,ierr)
      xmin = w(1)

      mx(1) = xmax
c     call PIgdmax(mx(1),1,w(1),procset)
      call MPI_ALLREDUCE(mx(1),w(1),1,MESH_PRECISION,MPI_MAX,
     & MPI_COMM_WORLD,ierr)
      xmax = w(1)

      return
      end

*.... Get LOCAL MAXMIN of an array 
      subroutine AL_get_local_maxmin_3d(xmax,xmin,x,
     &    bx,ex,bxgp,exgp, by,ey,bygp,eygp,bz,ez,bzgp,ezgp)
      include 'meshf.h'      
      dimension x(bx-bxgp:ex+exgp,by-bygp:ey+eygp,bz-bzgp:ez+ezgp)

      xmax = x(bx,by,bz)
      xmin = x(bx,by,bz)

      Do k = bz, ez
      Do j = by, ey
      Do i = bx, ex
          xmax = max(xmax,x(i,j,k))
          xmin = min(xmin,x(i,j,k)) 
      End Do
      End Do
      End Do  
 
      return
      end 
*-----------------------------------------------------------
*-----------------------------------------------------------
*... function AL_GET_MAX
*... Get MAX of an array
      function AL_get_max(x,sz,ndim,procset)
      include 'meshf.h'
      integer sz(sz_0:sz_1,ndim), ierr
      dimension x(*)
      real*8 mx(1), w(1), xmax

      if( ndim .eq. 3 ) then
        call AL_get_array_bounds(bx,ex,bxgp,exgp,"x",sz)
        call AL_get_array_bounds(by,ey,bygp,eygp,"y",sz)
        call AL_get_array_bounds(bz,ez,bzgp,ezgp,"z",sz)

        xmax =  AL_get_local_max_3d(x,
     &   bx,ex,bxgp,exgp,by,ey,bygp,eygp,bz,ez,bzgp,ezgp)
      endif

cccc.... The 2D case must be added here ....

      mx(1) = xmax
c     call PIgdmax(mx(1),1,w(1),procset)
      call MPI_ALLREDUCE(mx(1),w(1),1,MESH_PRECISION,MPI_MAX,
     & MPI_COMM_WORLD,ierr)
      xmax = w(1)
      AL_get_max = xmax

      return
      end

*.... Get LOCAL MAX of an array 
      function AL_get_local_max_3d(x,
     &  bx,ex,bxgp,exgp,by,ey,bygp,eygp,bz,ez,bzgp,ezgp)
      include 'meshf.h'      
      dimension x(bx-bxgp:ex+exgp,by-bygp:ey+eygp,bz-bzgp:ez+ezgp)

      xmax = x(bx,by,bz)

      Do k = bz, ez
      Do j = by, ey
      Do i = bx, ex
          xmax = max(xmax,x(i,j,k)) 
      End Do
      End Do
      End Do  
 
      AL_get_local_max_3d = xmax

      return
      end 
*-----------------------------------------------------------
*-----------------------------------------------------------
*... function AL_GET_MIN
*... Get MIN of an array
      function AL_get_min(x,sz,ndim,procset)
      include 'meshf.h'
      integer sz(sz_0:sz_1,ndim), procset, ierr
      dimension x(*)
      real*8 mx(1), w(1), xmin

      if( ndim .eq. 3 ) then
        call AL_get_array_bounds(bx,ex,bxgp,exgp,"x",sz)
        call AL_get_array_bounds(by,ey,bygp,eygp,"y",sz)
        call AL_get_array_bounds(bz,ez,bzgp,ezgp,"z",sz)

        xmin =  AL_get_local_min_3d(x,bx,ex,bxgp,exgp,
     &                by,ey,bygp,eygp,bz,ez,bzgp,ezgp)
      endif

cccc.... The 2D case must be added here ....

      mx(1) = xmin
c     call PIgdmin(mx(1),1,w(1),ALLPROC)
      call MPI_ALLREDUCE(mx(1),w(1),1,MESH_PRECISION,MPI_MIN,
     & MPI_COMM_WORLD,ierr)
      xmin = w(1)
      AL_get_min = xmin

      return
      end

*.... Get LOCAL min of an array 
      function AL_get_local_min_3d(x,bx,ex,bxgp,exgp,
     &                by,ey,bygp,eygp,bz,ez,bzgp,ezgp)
      include 'meshf.h'
      dimension x(bx-bxgp:ex+exgp,by-bygp:ey+eygp,bz-bzgp:ez+ezgp)

      xmin = x(bx,by,bz)

      Do k = bz, ez
      Do j = by, ey
      Do i = bx, ex
          xmin = min(xmin,x(i,j,k)) 
      End Do
      End Do
      End Do   
      AL_get_local_min_3d = xmin
      return
      end 

*-----------------------------------------------------------
*>>>>>>>>>>>>>>>>>>>> ARRAY AVERAGES >>>>>>>>>>>>>>>>>>>>>>*
*-----------------------------------------------------------
*... Get AVERAGE of an array
      function AL_get_average(x,sz,ndim,proc,procset)
      include 'meshf.h'
      integer sz(sz_0:sz_1,ndim),proc(p_0:p_1),procset
      dimension x(1)
      real*8 mx(1), w(1), xav

      nproc = proc(p_np)

      if( ndim .eq. 3 ) then
        call AL_get_array_bounds(bx,ex,bxgp,exgp,"x",sz)
        call AL_get_array_bounds(by,ey,bygp,eygp,"y",sz)
        call AL_get_array_bounds(bz,ez,bzgp,ezgp,"z",sz)

        xav =  AL_get_local_average_3d(x,bx,ex,bxgp,exgp,
     &                      by,ey,bygp,eygp,bz,ez,bzgp,ezgp)
      endif

      mx(1) = xav
c     call PIgdsum(mx(1),1,w(1),0)
      call MPI_ALLREDUCE(mx(1),w(1),1,MESH_PRECISION,MPI_SUM,
     & MPI_COMM_WORLD,ierr)
      xav = w(1)
      AL_get_average = xav/nproc

      return
      end

*.... Get LOCAL AVERAGE of an array 
      function AL_get_local_average_3d(x,bx,ex,bxgp,exgp,
     &                      by,ey,bygp,eygp,bz,ez,bzgp,ezgp)
      include 'meshf.h'
      
      dimension x(bx-bxgp:ex+exgp,by-bygp:ey+eygp,bz-bzgp:ez+ezgp)
      real*8 n_elements, xav

      n_elements = (ex-bx+1)*(ey-by+1)*(ez-bz+1)

      xav = 0.d0

      Do k = bz, ez
      Do j = by, ey
      Do i = bx, ex
          xav = xav + x(i,j,k) 
      End Do
      End Do
      End Do   
      AL_get_local_average_3d = xav/n_elements
      return
      end 

*-----------------------------------------------------------
*>>>>>>>>>>>>>>>>>>>>>>>> ARRAY DECOMPOSITION >>>>>>>>>>>>>>*
*-----------------------------------------------------------
*... DECOMP_ARRAY_ ....
*.... Find the array decomposition and assign local array
*.... dimensions.
*.... NOTE: must call set_global_size() first.
*
*...  opt = 0   Use our own decomposition (DEAFULT).
*...  opt = 1   Use BlockComm decomposition.
*...  opt = 2   Impose user-defined decomposition.

       subroutine AL_decomp_array(sz,ndim,proc,stencil,opt)
         include "meshf.h"
         integer nproc, stencil, opt
         integer sz(sz_0:sz_1,ndim), proc(p_0:p_1)

	 nproc = proc(p_np)
	 myid  = proc(p_id)

         Do i = 1, ndim
            sz(sz_isparallel,i) = 0
            sz(sz_isparallel,i) = 0
            sz(sz_isparallel,i) = 0
         End Do

         if( opt.ne.1 .and. opt.ne.2) then
            call AL_find_decomp(sz,proc,ndim)
 	 endif

         Do i = 1, ndim         
            if( proc(p_npx+i-1) .ne. 1) sz(sz_isparallel,i) =  1
            if( opt .eq. 1 ) then
                    sz(sz_ndim,i) = -1
            else
                    sz(sz_ndim,i) = proc(p_npx+i-1)
            endif
         End Do

         call BCFindGhostFromStencil( ndim, sz, 0, 0, stencil )
         call BCGlobalToLocalArray  ( ndim, sz, nproc, myid )

         if( opt.ne.1 .and. opt.ne.2) then
         Do i = 1, ndim
           proc(p_npx+i-1) = sz(sz_ndim,i)
         End Do
	 endif

         return
       end


*---------------------------------------------------

*... FIND_DECOMP

	subroutine AL_find_decomp(sz,proc,ndim)
           include "meshf.h"
           integer sz(sz_0:sz_1,ndim),proc(p_0:p_1)

	   if( ndim .eq. 3 ) then
	       call AL_decomp_3d(sz,proc)
	   endif

	   if( ndim .eq. 2 ) then
              nx = sz(sz_mdim,1)
              ny = sz(sz_mdim,2)
*......... We need a fix for odd sized arrays, where the 
*......... array size is expressed as  NX = 2^(pow) + 1
c             nx = nx - mod(nx,2)
c             ny = ny - mod(ny,2)

              nproc = proc(p_np)
	      call AL_decomp_2d(nproc,nx,ny,nprocx,nprocy)
              proc(p_npx) = nprocx
              proc(p_npy) = nprocy
	      sz(sz_ndim,1) = nprocx
	      sz(sz_ndim,2) = nprocy

*.... Location of this processor along the direction ...
              myid = proc(p_id)
	      sz(sz_loc,1) = mod(myid,nprocx)
 	      sz(sz_loc,2) = myid/nprocx+1
	      proc(p_xloc) = sz(sz_loc,1)
	      proc(p_yloc) = sz(sz_loc,2)
* ............................................
	   endif

           return

	end

*------------------------------------------------------------------
*  DECOMP_3D -- Given NX NY NZ and NPROC determine the 
*               maximally cubic distribution.
*               It needs DECOMP_2D.
*------------------------------------------------------------------

	subroutine AL_decomp_3d(sz,proc)
           include "meshf.h"
           integer sz(sz_0:sz_1,3),proc(p_0:p_1)
	   integer pow3

           nx = sz(sz_mdim,1)
           ny = sz(sz_mdim,2)
	   nz = sz(sz_mdim,3)

*......... We need a fix for odd sized arrays, where the 
*......... array size is expressed as  NX = 2^(pow) + 1
c           nx = nx - mod(nx,2)
c           ny = ny - mod(ny,2)
c           nz = nz - mod(nz,2)

           nproc = proc(p_np)

	   pow3 = alog(float(nproc))/alog(2.)

	   npw3 = 2.**float(pow3)

	   if( npw3 .ne. nproc ) then
	      print *,'nproc is not a power of two'
	      stop
	   endif

           nnz = 2.**(alog(float(nz))/alog(2.))

	   maxxy = max(nx,ny)
	   minxy = min(nx,ny)

	   Do ipz = 0, pow3

	      nprocz = 2**ipz
	      nproc2 = 2**(pow3-ipz)

	      call AL_decomp_2d(nproc2,nx,ny,nprocx,nprocy)

	      maxp2 = max(nprocx,nprocy)
	      minp2 = min(nprocx,nprocy)

	      if( nz .ge. maxxy ) then
		 if( nprocz .ge. maxp2 ) goto 100
	      endif

	      if( nz .lt. maxxy .and. nz .gt. minxy ) then
	       if( nprocz .lt. maxp2 .and. nprocz .ge. minp2 ) goto 100
	      endif

	      if( nz .le. minxy ) then
		 if( nprocz.le.minp2  ) then
			   nprocx_old = nprocx
			   nprocy_old = nprocy
			   nprocz_old = nprocz
		 endif
		 if( nprocz.gt.minp2 ) then
			   nprocx = nprocx_old
			   nprocy = nprocy_old
			   nprocz = nprocz_old
			   goto 100
		 endif
	      endif

	   End Do

*...  Ready to return .....
 100    continue
        
        proc(p_npx) = nprocx
        proc(p_npy) = nprocy
        proc(p_npz) = nprocz
	sz(sz_ndim,1) = nprocx
	sz(sz_ndim,2) = nprocy
	sz(sz_ndim,3) = nprocz

*... set the location of the processor along this dimension ...
 	myid = proc(p_id)

	nzl = myid/nprocx/nprocy + 1
	n1  = mod(myid,nprocx*nprocy)
	nyl = n1/nprocx + 1
	nxl = mod(n1,nprocx)

 	sz(sz_loc,1) =  nxl
 	sz(sz_loc,2) =  nyl
 	sz(sz_loc,3) =  nzl
	proc(p_xloc) =  nxl
	proc(p_yloc) =  nyl
	proc(p_zloc) =  nzl

        return

        end

*------------------------------------------------------------------
*------------------------------------------------------------------
*  DECOMP_2D -- Given NX NY and NPROC determine the 
*               maximally square distribution.
*------------------------------------------------------------------
	subroutine AL_decomp_2d(nproc,nx,ny,nprocx,nprocy)

	integer pow2

        if( nprocx .eq. -1) return
	if( nprocy .eq. -1) return

	if( nproc .eq. 1 ) then
		nprocx = 1
		nprocy = 1
		return
	endif

        nnx = 2.**(alog(float(nx))/alog(2.))

*...... We assume that we have even numbers of points........

        if( nx .lt. ny ) then
	      n1 = ny
	      n2 = nx
	else 
	      n1 = nx
	      n2 = ny
        endif	

        pow2 = alog(float(nproc))/alog(2.)

	npw2 = 2.**(alog(float(nproc))/alog(2.))

	if( npw2 .ne. nproc ) then
	      print *,'nproc is not a power of two'
	      stop
	endif

        nnx = 2.**(alog(float(nx))/alog(2.))

	if( nnx .ne. nx ) then
	      print *,'nx is not a power of two',nx,nnx
	      stop
	endif

	nny = 2.**(alog(float(ny))/alog(2.))

	if( nny .ne. ny ) then
              print *,'ny is not a power of two',ny,nny
	      stop
	endif

        nproc1 = 1
	nproc2 = 1

	Do ip = 0, pow2
	     nproc1 = 2**ip
	     nproc2 = 2**(pow2-ip)
             np1    =  n1/nproc1
	     np2    =  n2/nproc2
	     if( np1 .lt. np2 ) goto 10
	     nproc1_old = nproc1
	     nproc2_old = nproc2
	     np1_old = np1
	     np2_old = np2
	End Do

10	continue

        if( nx .lt. ny ) then
	    nprocx = nproc2_old
	    nprocy = nproc1_old
	    np1 = np2_old
            np2 = np1_old
	else
	    nprocx = nproc1_old
	    nprocy = nproc2_old
	    np1 = np1_old
            np2 = np2_old
	endif

	return 

	end

********************************************************************
*       TEST PROGRAM FOR DECOMP_3D
*-------------------------------------------------------------------
c	program try
c
c20         continue
c	   print*,'Enter nx, ny, nz'
c	   read*,nx,ny,nz
c
c10         continue
c           print*,'Enter nproc (0 to change problem size)'
c	   read*,nproc
c	   if(nproc .eq. 0) goto 20
c
c	   call decomp_3d(nproc,nx,ny,nz,nprocx,nprocy,nprocz)
c
c	   print*,'The decompositions is ',nprocx,nprocy,nprocz
c	   print*,'The local size is ',nx/nprocx,ny/nprocy,nz/nprocz
c
c	   goto 10
c	
c	end
c
