c*******************  BUK_DSK.F *************************
c
c      This program generates a disordered sequence of N
c      integers using the random number generator given
c      by randlc (written by D. Bailey) and then uses a 
c      bucket sort to sort it.
c
c      Compile with: %f77 -extend_source -o buk_dsk buk_dsk.f
c
c      NOTE: The 'rank' array is out of core
c
c      Prog: Leo Dagum
c            NASA Ames
c      Date: Aug 1990.
c      Last update: Oct 1993
c	            Made 'rank' array out of core, added 
c		    dynamic memory allocation and Class B
c		    verification table.  NOTE: memory 
c		    allocation uses f77 malloc syntax
c		    that works for Sun and SGI f77 compilers.
c		    It may not work on your workstation.
c
c********************************************************
      implicit		    none
      integer               N
      integer               MAXKEY      
      integer               m
      integer               numreps
      double precision      A
      double precision      S

      parameter            (N = 8388608*4)
      parameter            (MAXKEY = 524288*4)

c      parameter            (N = 65536)
c      parameter            (MAXKEY = 2048)

      parameter            (m = MAXKEY/4)
      parameter            (numreps = 10)
      parameter            (A = 1220703125.D0, S = 314159265.D0)


      pointer(ikey, key)
      dimension key(1)
      pointer(irank, rank)
      dimension rank(1)
      pointer(ikey2, key2)
      dimension key2(1)
      integer	key,key2,rank



      integer               keyden(0:MAXKEY-1)
      integer               i,j,k,fp,fp2

      double precision      x
      double precision      seed
      double precision      randlc
      real                  etime
      real                  t1, t2, tarray(2)
      real                  second
      real                  time1, time2
      character*80	    filename, filename2
      
*
* Initialize seed for random number generator
*
      seed = S
c
c allocate arrays
c
	ikey = malloc(N*4)
	irank = malloc(N*2)
	do i=1,N/2
	   rank(i) = 0
	enddo
*
* Initialize keys with a Gaussian distribution in key densities
*
      do  2 i=0, N-1
            x = randlc(seed, A)      
            x = x + randlc(seed, A)
            x = x + randlc(seed, A)
            x = x + randlc(seed, A)
            key(i+1) = m*x
 2      continue
*
* Timing should begin here.  The keys will be ranked
* numreps times, this is only to make sure that the
* elapsed time is large enough to measure with a stop
* watch (in case the system provided timer is significantly 
* off).
*
*      time1 = second()
       t1 = etime(tarray)

c
c open file to store ranks, delete any existing file by the same name
c
      fp = 1
      filename = 'RANK'
      open(fp, file=filename, status='unknown', form='unformatted')
      close(unit=fp, status='delete')

      do  5 i=1, numreps
            print *, 'Begin iteration ',i
            key(i+1) =  i
	    key(i+numreps+1) = MAXKEY - i
            call bucksort(key, rank, keyden, N, MAXKEY, i,  fp, filename)
            if (N .eq. 1048576*32 .and. MAXKEY .eq. 32768*64) then
              call part_B(N, rank, i, fp, filename)
	    endif
            if (N .eq. 1048576*8 .and. MAXKEY .eq. 32768*16) then
              call part_A(N, rank, i, fp, filename)
	    endif
	    if (N .eq. 65536 .and. MAXKEY .eq. 2048) then
               call parttest(N, rank, i, fp, filename)
	    endif 
5      continue

*
* Timing should end here.
*
*      time2 = second()
       t2 = etime(tarray)
      print *,'Performed ',numreps,' rankings in time: ', t2-t1
*      print *,'Performed ',numreps,' rankings in time: ', time2-time1

c
c open file to store keys, delete any existing file by the same name
c
      print *, 'writing out key sequence'
      fp2 = 2
      filename2 = 'KEYS'
      open(fp2, file=filename2, status='unknown', form='unformatted')
      close(unit=fp2, status='delete')
      open(fp2, file=filename2, status='unknown', form='unformatted')
      write(fp2) (key(i+1), i=0,N/4-1)
      write(fp2) (key(i+1), i=N/4,2*N/4-1)
      write(fp2) (key(i+1), i=2*N/4,3*N/4-1)
      write(fp2) (key(i+1), i=3*N/4,N-1)
      close(fp2)
*
* Move keys to their ranked positions in 4 steps, 
* key will store sorted sequence, key2 will store original sequence in quarters
*
c
c read rank array and key array, allocate a key2 array of size N/4
c
	ikey2 = malloc(N)
	do i=1,N/4
	   key2(i) = -1
	enddo

        open(fp, file=filename, status='old', form='unformatted')
	print *, 'reading in 1st half of rank array'
	read(fp) (rank(i+1), i=0,N/2-1)

	print *, 'reading in 1st quarter of key array'
        open(fp2, file=filename2, status='old', form='unformatted')
	read(fp2) (key2(i+1), i=0,N/4-1)


      	do i=0, N/4-1
            k       = rank(i+1)
            key(k+1) = key2(i+1)
	enddo

	print *, 'reading in 2nd quarter of key array'
	read(fp2) (key2(i+1), i=0,N/4-1)
      	do i=0, N/4-1
            k       = rank(i+N/4+1)
            key(k+1) = key2(i+1)
	enddo

	print *, 'reading in 2nd half of rank array'
	read(fp) (rank(i+1), i=0,N/2-1)
	print *, 'reading in 3rd quarter of key array'
	read(fp2) (key2(i+1), i=0,N/4-1)
      	do i=0, N/4-1
            k       = rank(i+1)
            key(k+1) = key2(i+1)
	enddo

	print *, 'reading in 4th quarter of key array'
	read(fp2) (key2(i+1), i=0,N/4-1)
      	do i=0, N/4-1
            k       = rank(i+N/4+1)
            key(k+1) = key2(i+1)
	enddo

c
c close and delete the RANK and KEYS files
c
        close(unit=fp, status='delete')
        close(unit=fp2, status='delete')
*
*  Check that things got sorted ok (sorted here means non-descending order)
*
  	j = 0
	do i=1, N
	   if (key(i) .eq. -1) j = j + 1
	enddo
	print *, j, ' keys with val -1'
	print *, '  '

      j = 0
      do 30 i=1, N-1
            if (key(i+1) .lt.  key(i)) then
                  j = j + 1
            endif
 30      continue

      if (j .eq. 0) then
            print *, 'PASSED:   0 out of place.'
      else
            print *, 'FAILED:  ', j, ' out of place.'
      endif
      
      stop
      end
*******************************************************************
*
*       BUCKET_SORT
*      This subroutine actually only ranks the keys (i.e. it does
*      not move the keys to their sorted locations).  Ranking is
*      done using the bucket sorting algorithm.  
*
*******************************************************************
      subroutine bucksort(key, rank, keyden, N, MAXKEY, iter, fp, filename)


      integer            N
      integer            MAXKEY      

      integer            key(0:N-1)
      integer            rank(0:N/2-1)
      integer            keyden(0:MAXKEY-1)
      integer		 iter
      integer            i, j, k, fp
      character*80	 filename

*
*  Zero the keyden array
*
      do 40 i=0, MAXKEY-1
            keyden(i) = 0
 40      continue
*
*  Count occurrences of each key (the 'key density')
*

      do 60 i=0, N-1
            k = key(i)
            keyden(k) = keyden(k) + 1
 60      continue
*
*  Create running sum (i.e. starting index) of keyden array
*
      do 80 i=1, MAXKEY-1
            keyden(i) = keyden(i) + keyden(i-1)
 80      continue
*
*  Compute rank for each key
*

      do  110 i=0, N/2-1
            k = key(i)
            keyden(k) = keyden(k) - 1
            rank(i)    = keyden(k) 
 110      continue
c
c open file and write out 1st half of rank array
c
      open(fp, file=filename, status='unknown', form='unformatted')
      close(unit=fp, status='delete')
          open(fp, file=filename, status='unknown', form='unformatted')
	  write(fp) (rank(i), i=0,N/2-1)
c
c loop on 2nd half of keys
c
	do i=N/2, N-1
	    k = key(i)
	    keyden(k) = keyden(k) - 1
	    rank(i-N/2) = keyden(k)
	enddo
c
c write out 2nd half of rank array
c
	write(fp) (rank(i), i=0,N/2-1)
	close(fp)


      end
*******************************************************************
*       PARTTEST
*      This routine performs the partial verification test on the
*      ranking.  The numbers are correct for the scaled down benchmark
*      given here.  For the full scale benchmark apply the numbers
*      provided in the benchmark description.
*******************************************************************
      subroutine parttest(N, rank, iter, fp, filename)

      integer            N
      integer            rank(0:N/2-1)
      integer            iter
      integer            i, j, errs, fp
      character*80	 filename


        i = iter - 1
        errs = 0 

        open(fp, file=filename, status='old', form='unformatted')
	read(fp) (rank(j), j=0,N/2-1)


	print *, 'rank(17148)     = ', rank(17148)
	print *, 'rank(23627)     = ', rank(23627)
	print *, 'rank(4431)      = ', rank(4431)

      if (rank(17148) .ne. (19+i)) then
           print *, 'FAILED partial verification test 1'
	   errs = errs + 1
      endif
      if (rank(23627) .ne. (347+i)) then
           print *, 'FAILED partial verification test 2'
	   errs = errs + 1
      endif
      if (rank(4431) .ne. (65462-i)) then
           print *, 'FAILED partial verification test 3'
	   errs = errs + 1
      endif


	read(fp) (rank(j), j=0,N/2-1)
	close(fp)

	print *, 'rank(48427-N/2) = ', rank(48427-N/2)
	print *, 'rank(62548-N/2) = ', rank(62548-N/2)

      if (rank(48427-N/2) .ne. (1+i)) then
           print *, 'FAILED partial verification test 4'
	   errs = errs + 1
      endif

      if (rank(62548-N/2) .ne. (64916-i)) then
           print *, 'FAILED partial verification test 5'
	   errs = errs + 1
      endif
      if (errs .eq. 0) then
           print *, 'PASSED partial verification test'
      endif
      return
      end
*******************************************************************
*      PART_A
*      This routine performs the partial verification test on the
*      ranking.  The numbers are correct for the Class A size problem.
*******************************************************************
        subroutine part_A(N, rank, iter, fp, filename)

        integer            N
        integer            rank(0:N/2-1)
        integer            iter
        integer            i, j, errs, fp
        character*80	 filename

        i = iter - 1
	errs = 0

        open(fp, file=filename, status='old', form='unformatted')
	read(fp) (rank(j), j=0,N/2-1)

        if (rank(662041) .ne. (17523+i)) then
           print *, 'FAILED partial verification test 1'
	   errs = errs + 1
        endif
        if (rank(2112377) .ne. (104+i)) then
           print *, 'FAILED partial verification test 2'
	   errs = errs + 1
        endif
        if (rank(3642833) .ne. (8288932-i)) then
           print *, 'FAILED partial verification test 3'
	   errs = errs + 1
        endif

	read(fp) (rank(j), j=0,N/2-1)
	close(fp)

        if (rank(4250760-N/2) .ne. (8388264-i)) then
           print *, 'FAILED partial verification test 4'
	   errs = errs + 1
        endif
        if (rank(5336171-N/2) .ne. (123928+i)) then
           print *, 'FAILED partial verification test 5'
	   errs = errs + 1
        endif
        if (errs .eq. 0) then
           print *, 'PASSED partial verification test'
        endif


        return
        end
*******************************************************************
*      PART_B
*      This routine performs the partial verification test on the
*      ranking.  The numbers are correct for the Class B size problem.
*******************************************************************
        subroutine part_B(N, rank, iter, fp, filename)

        integer            N
        integer            rank(0:N/2-1)
        integer            iter
        integer            i, j, errs, fp
        character*80	 filename

        i = iter 
	errs = 0

        open(fp, file=filename, status='old', form='unformatted')
	read(fp) (rank(j), j=0,N/2-1)

        if (rank(41869) .ne. (33422937-i)) then
           print *, 'FAILED partial verification test 1'
	   errs = errs + 1
        endif
        if (rank(812306) .ne. (10244+i)) then
           print *, 'FAILED partial verification test 2'
	   errs = errs + 1
        endif
        if (rank(5102857) .ne. (59149+i)) then
           print *, 'FAILED partial verification test 3'
	   errs = errs + 1
        endif

	read(fp) (rank(j), j=0,N/2-1)
	close(fp)

        if (rank(18232239-N/2) .ne. (33135281-i)) then
           print *, 'FAILED partial verification test 4'
	   errs = errs + 1
        endif
        if (rank(26860214-N/2) .ne. (99+i)) then
           print *, 'FAILED partial verification test 5'
	   errs = errs + 1
        endif
        if (errs .eq. 0) then
           print *, 'PASSED partial verification test'
        endif


        return
        end
******************************************************************************
C
      FUNCTION RANDLC (X, A)
C
C   This routine returns a uniform pseudorandom double precision number in the
C   range (0, 1) by using the linear congruential generator
C
C   x_{k+1} = a x_k  (mod 2^46)
C
C   where 0 < x_k < 2^46 and 0 < a < 2^46.  This scheme generates 2^44 numbers
C   before repeating.  The argument A is the same as 'a' in the above formula,
C   and X is the same as x_0.  A and X must be odd double precision integers
C   in the range (1, 2^46).  The returned value RANDLC is normalized to be
C   between 0 and 1, i.e. RANDLC = 2^(-46) * x_1.  X is updated to contain
C   the new seed x_1, so that subsequent calls to RANDLC using the same
C   arguments will generate a continuous sequence.
C
C   This routine should produce the same results on any computer with at least
C   48 mantissa bits in double precision floating point data.  On Cray systems,
C   double precision should be disabled.
C
C   David H. Bailey     October 26, 1990
C
      IMPLICIT DOUBLE PRECISION (A-H, O-Z)
      SAVE KS, R23, R46, T23, T46
      DATA KS/0/
C
C   If this is the first call to RANDLC, compute R23 = 2 ^ -23, R46 = 2 ^ -46,
C   T23 = 2 ^ 23, and T46 = 2 ^ 46.  These are computed in loops, rather than
C   by merely using the ** operator, in order to insure that the results are
C   exact on all systems.  This code assumes that 0.5D0 is represented exactly.
C
      IF (KS .EQ. 0) THEN
        R23 = 1.D0
        R46 = 1.D0
        T23 = 1.D0
        T46 = 1.D0
C
        DO 100 I = 1, 23
          R23 = 0.5D0 * R23
          T23 = 2.D0 * T23
 100    CONTINUE
C
        DO 110 I = 1, 46
          R46 = 0.5D0 * R46
          T46 = 2.D0 * T46
 110    CONTINUE
C
        KS = 1
      ENDIF
C
C   Break A into two parts such that A = 2^23 * A1 + A2 and set X = N.
C
      T1 = R23 * A
      A1 = AINT (T1)
      A2 = A - T23 * A1
C
C   Break X into two parts such that X = 2^23 * X1 + X2, compute
C   Z = A1 * X2 + A2 * X1  (mod 2^23), and then
C   X = 2^23 * Z + A2 * X2  (mod 2^46).
C
      T1 = R23 * X
      X1 = AINT (T1)
      X2 = X - T23 * X1
      T1 = A1 * X2 + A2 * X1
      T2 = AINT (R23 * T1)
      Z = T1 - T23 * T2
      T3 = T23 * Z + A2 * X2
      T4 = AINT (R46 * T3)
      X = T3 - T46 * T4
      RANDLC = R46 * X
C
      RETURN
      END


