/* The first part of this file  "async.c" is based on "sbp_multiproc.h"
 * written by Jochen Roehrig,  (C) 1994 by bird@cs.uni-sb.de 
 * at Saarbruecken University, LS Prof. W.J. Paul, SB-PRAM project,
 * within the framework of the p4gcc compiler libraries.
 * Documentation will be contained in Jochen's Master thesis
 * which will appear 1995 at Saarbruecken University, Germany.
 * Jochen's contribution is greatly appreciated.
 * ---- Christoph W. Kessler, in March 1995.
 *
 * rwd_lock is a new lock type added in March 1999.
 */

/* 
 * Implementation of a locking mechanism
 * simple, fair and safe locks: -> forklib2.asm
 * reader-writer-locks and reader-writer-deleter-locks: here
 *
 * Implementation of counting barrier synchronization ->forklib2.asm
 *
 * Implementation of a parallel loop mechanism
 * Calculation of a unique id in a process group
 * Implementation of a parallel copy routine
 * Implementation of a parallel fifo queue
 * Implementation of a parallel broadcast queue
 */

#include <fork.h>
#include <io.h>
#include <math.h>

#define REPORT_LOCKUP_UNLOCK 0    // if 1, print debug info to stdout

pr int __mp_dummyvar;   /* bugfix to make multipraefix work properly */

simple_lock *new_SimpleLock( void )
{
  simple_lock *ret = (simple_lock *)shmalloc( sizeof( simple_lock ));
  *ret = 0;
  return ret;
}

fair_lock *new_FairLock( void )
{
  fair_lock *ret = (fair_lock *)shmalloc( sizeof( fair_lock ));
  ret->nextnum = ret->actnum = 0;
  return ret;
}


void fair_lock_init( fair_lock *pfl )
{
  pfl->nextnum = pfl->actnum = 0;
}


int fair_lockup_delflag( fair_lock *plock, int *delflag )
{
  // same as fair_lockup( plock ) (see forklib2.asm)
  // but aborts and returns 0 when delflag becomes 1 while waiting
  unsigned int myticket = mpadd( &(plock->nextnum), 1 );
  while (plock->actnum < myticket)     /* wait and check */
     if ( *delflag != 0)  return 0;
  return 1;                            /* OK, obtained the lock */
}


/* ############### definition of reader/writer-lock  ############### */

/* we use "mpadd" and "syncor" to acces the memory cell holding the number
 * of readers owning the lock and the writer-acces-flag
 *
 * to be sure that there won't occur an error in a sorting node, in a
 * network node or in a memory unit during access to the memory cell holding
 * the reader counter we'll do "syncor"/"mpadd" only if MODULO == 0/1
 */

#define __RW_FLAG__ 0x40000000  /* 2^30 */
/* we can't use 2^31 since we have to calculate corrrectly -(__RW_FLAG__) 
 * typedef struct {
 *  unsigned int reader_cnt;  * Bits 0-29 are used as reader counter
 *                            * Bit 30 is used as writer-flag
 *  fair_lock writerlock;
 * } rw_lock;
 */

void rw_lock_init( rw_lock *lock)
{
   lock->readercounter = 0;
   lock->writerlock.nextnum = 0;   /* init fair lock */
   lock->writerlock.actnum = 0;    /* init fair lock */
}

rw_lock *new_RWLock( void )
{
  rw_lock *ret = (rw_lock *)shmalloc( sizeof( rw_lock ));
  ret->readercounter = 0;
  ret->writerlock.nextnum = ret->writerlock.actnum = 0;
  return ret;
}



void rw_lockup( rw_lock *lock, int ptype)
{
 int wait;
#ifdef PROFILING
 traceEntry( TRACE_LOCKUP_WAIT );
#endif
 if(ptype == RW_READ) {         /* reader-lock */
    wait = 1; /* wait = TRUE */
    while(wait) {
       wait = 0; /* wait = FALSE */
       /* wait for writer to leave the lock */
       while( mpadd(&(lock->readercounter), 0) & __RW_FLAG__ );
       /* writer has finished - now try to catch lock */
       if( mpadd(&(lock->readercounter), 1) & __RW_FLAG__ ) {
          /* another writer was faster than me */
          mpadd( &(lock->readercounter), -1); 
          /* undo lock */
          wait = 1; /* wait = TRUE */
       }
    }
 }
 else {      /* writer-lock */
    /* wait for writer to leave the lock */
#if 0
    unsigned int rc = lock->readercounter;
#endif
    fair_lockup(&(lock->writerlock));
    /*  set flag to indicate to the readers that I want to have the lock */
#if 0
    asm("bmc    0\n\
         gethi  0x40000000,r31 /*__RW_FLAG__*/\n\
         syncor r31,%rc\n\
         nop    /*delay*/");
         /*\n\ nop    \n\ stg    r31,r30,0 */
#endif
    syncor( (int *)&(lock->readercounter), __RW_FLAG__ );
    /* wait for readers to leave the lock */
    while( mpadd( &(lock->readercounter), 0) & (__RW_FLAG__-1));
    /*     ^^^^^ Zuweisung an Dummy koennte Probleme machen. -> asm */
    /*     evtl. benutze syncor()-Routine */
 }
#ifdef PROFILING
 traceEntry( TRACE_LOCKUP_DONE );
#endif
} /* _rw_lockup() */


void rw_unlock( rw_lock *lock, int ptype, int wait)
{
 if(ptype == RW_READ)    /* reader-unlock */
    mpadd ( &(lock->readercounter), -1);
 else {   /* writer-unlock */
    int i;
    /* free lock for readers */
    mpadd ( &(lock->readercounter), -__RW_FLAG__);      
    /* wait loop */
    for(i = 0; i < wait; i++);
    /* free for writers */
    fair_unlock(&(lock->writerlock));
 }
} /* _rw_unlock() */



// RWD - Lock: a generalization of the RW-Lock  C. Kessler 9903

void rwd_lock_init( rwd_lock *lock)
{
   lock->readercounter = 0;
   lock->writerlock.nextnum = 0;   /* init fair lock */
   lock->writerlock.actnum = 0;    /* init fair lock */
   lock->deleteflag = 0;
}

rwd_lock *new_RWDLock( void )
{
  rwd_lock *ret = (rwd_lock *)shmalloc( sizeof( rwd_lock ));
  ret->readercounter = 0;
  ret->writerlock.nextnum = ret->writerlock.actnum = 0;
  ret->deleteflag = 0;
  return ret;
}



int rwd_lockup( rwd_lock *lock, int ptype)
{
 int wait;
#if REPORT_LOCKUP_UNLOCK
 pprintf("call rwd_lockup( %p, %s )\n", lock, (ptype==RW_READ)?"RW_READ":
          (ptype==RW_WRITE)?"RW_WRITE":"RW_DELETE");
#endif
#ifdef PROFILING
 traceEntry( TRACE_LOCKUP_WAIT );
#endif
 if(ptype == RW_READ) {         /* reader-lock */
    wait = 1; /* wait = TRUE */
    while(wait) {
       wait = 0; /* wait = FALSE */
       /* wait for writer to leave the lock */
       while( mpadd(&(lock->readercounter), 0) & __RW_FLAG__ )
          if (lock->deleteflag) {
#ifdef PROFILING
             traceEntry( TRACE_LOCKUP_DONE );
#endif
             return 0;  /* fail */
          }
       /* writer has finished - now try to catch lock */
       if( mpadd(&(lock->readercounter), 1) & __RW_FLAG__ ) {
          /* another writer was faster than me */
          mpadd( &(lock->readercounter), -1); 
          /* undo lock */
          if (lock->deleteflag) {
#ifdef PROFILING
             traceEntry( TRACE_LOCKUP_DONE );
#endif
             return 0;  /* fail */
          }
          wait = 1; /* wait = TRUE */
       }
    }
#if REPORT_LOCKUP_UNLOCK
    pprintf("rwd_lockup( %p, RW_READ ) succeeded\n", lock );
#endif
 }
 else {      /* writer-lock */
    /* wait for writer to leave the lock */
    if (!fair_lockup_delflag(&(lock->writerlock), &(lock->deleteflag))) {
#ifdef PROFILING
        traceEntry( TRACE_LOCKUP_DONE );
#endif
        return 0;   /* fail */
    }
    /*  set flag to indicate to the readers that I want to have the lock */
    syncor( (int *)&(lock->readercounter), __RW_FLAG__ );
    /* wait for readers to leave the lock */
    while( mpadd( &(lock->readercounter), 0) & (__RW_FLAG__-1))
       if (lock->deleteflag) {
#ifdef PROFILING
           traceEntry( TRACE_LOCKUP_DONE );
#endif
           return 0;  /* fail */
       }
    if (ptype == RW_DELETE) {
       lock->deleteflag = 1;   /*stg at mo=1, all other accesses ldg at mo=0*/
    }
#if REPORT_LOCKUP_UNLOCK
    pprintf("rwd_lockup( %p, %s ) succeeded\n", lock, (ptype==RW_WRITE)?"RW_WRITE":"RW_DELETE");
#endif
 }
#ifdef PROFILING
 traceEntry( TRACE_LOCKUP_DONE );
#endif
 return 1;
} /* rwd_lockup() */


void rwd_unlock( rwd_lock *lock, int ptype, int wait)
{
#if REPORT_LOCKUP_UNLOCK
 pprintf("rwd_unlock( %p, %s )\n", lock, (ptype==RW_READ)?"RW_READ":
          (ptype==RW_WRITE)?"RW_WRITE":"RW_DELETE");
#endif
 if(ptype == RW_READ)    /* reader-unlock */
    mpadd ( &(lock->readercounter), -1);
 else {   /* writer-unlock */
    int i;
    /* free lock for readers */
    mpadd ( &(lock->readercounter), -__RW_FLAG__);      
    /* wait loop */
    for(i = 0; i < wait; i++);
    /* free for writers */
    fair_unlock(&(lock->writerlock));
 }
} /* rwd_unlock() */



#if 0

/* ############### parallel loop ############### */

/*
 * sbp_parloop_t
 */

typedef struct {
   int index;
   sbp_barrier_t sync;
} sbp_parloop_t;

/*
 * sbp_parloop_init()
 */

void sbp_parloop_init(sbp_parloop_t *pl)
{
   pl->index = 0;
   sbp_barrier_init(&(pl->sync));
} /* sbp_parloop_init */
 
/*
 * sbp_parloop()
 */

void sbp_counter_set(void *counter_and_value);
	/* this function will be called during synchronization to reset the
	   loop index */

int sbp_parloop(sbp_parloop_t *pl, int max, int nprocs, int stride)
{
   int oldind;
   unsigned int cnt_set[2]; /* used to reset counter */
	
   if((oldind = sbp_mpadd_m1(&(pl->index), stride)) > max)
   /* oldind > max ==> loop end, synchronize processes, reinitalize pl->index
      and assign -1 to oldind */
   {
	   cnt_set[0] = (unsigned int)(&(pl->index));
	   cnt_set[1] = (unsigned int)0;

      sbp_barrier(&(pl->sync), nprocs, sbp_counter_set, (void *) cnt_set);

      oldind = -1;
   }
   return oldind;
} /* sbp_parloop */

/* ############# calculation of a unique id in a process group  ############ */

unsigned int sbp_get_id_in_group(unsigned int procs_in_group, unsigned int *counter,
				 sbp_barrier_t *barrier)
{
	unsigned int id;
	unsigned int cnt_set[2]; /* used to reset counter */

	/* reset counter - just to be sure */

	cnt_set[0] = (unsigned int)counter;
	cnt_set[1] = (unsigned int)0;

	sbp_barrier(barrier, procs_in_group, sbp_counter_set, (void *)cnt_set);

	/* get id */

	id = sbp_mpadd(counter, 1);

	/* synchronize processes */

	sbp_barrier(barrier, procs_in_group, NULL, NULL);
	
	return id;
	
} /* sbp_get_id_in_group() */


/* ############### parallel queue/broadcast queue ############### */

/* #define log2_floor(value)  in async.h*/

/*
 * maximum queue size
 * #define SBP_MAX_PQ_LOG 13 * there are 2^SBP_MAX_PQ_LOG lists *
 * #define SBP_MAX_BQ_LOG 13 * there are 2^SBP_MAX_BQ_LOG lists * 
 */
/*
 * dummy queue element
 */
/* the definition of "sbp_pq_ele" and "sbp_bq_ele" must be identical
 * (modulo different names for the struct items 

 * struct __pq_listhd__; * defined below *
 * struct __bq_listhd__; * defined below *

 * typedef struct __sbp_pq_ele__ {
  * * the first four components must correspond to the first four components
    * of "sbp_pq_listhd" *
  * struct __pq_listhd__  *head;    * pointer to head of list stg/ldg on MODULO 0/1 *
  * struct __sbp_pq_ele__ *next;    * to build a doubly linked list *
  * struct __sbp_pq_ele__ *prev;    * to build a doubly linked list *
  * unsigned int missing; * number of removed items at the right of the element *
  * unsigned int dummy1, dummy2; * only used in "sbp_bq_ele" *
 * } sbp_pq_ele;
 * 
 * typedef struct __sbp_bq_ele__ {
  * * the first three components must correspond to the first three components
    * of "sbp_bq_listhd"
  * struct __bq_listhd__  *head;       * pointer to head of list stg/ldg on MODULO 0/1
  * struct __sbp_bq_ele__ *next;       * to build a doubly linked list
  * struct __sbp_bq_ele__ *prev;       * to build a doubly linked list
  * unsigned int id;    * identificator of stored element
  * unsigned int read_start; * decremented when a process starts to read the element access only by mpadd
  * unsigned int read_end;   * decremented when the process has finished to copy the element syncadd_m0/ldg_m1 
 * } sbp_bq_ele;
 * 
 * 
 *
 * SBP_PQ_TYPEDEF/SBP_BQ_TYPEDEF
 *
 * - the first six items correspond to the six items
 *   of "sbp_pq_ele"/"sbp_bq_ele"
 *
 * - both macros are interchangeable
 * 
 * #define SBP_PQ_TYPEDEF(typename, items...)
 *
 * head of a list of queued elements
 */

/*typedef struct __pq_listhd__ { ... } sbp_pq_listhd; */


/*typedef struct __bq_listhd__ { ... } sbp_bq_listhd;*/

/*
 * parallel queue/broadcast queue
 */

/*typedef struct __sbp_parallel_queue__ {
 *  unsigned int q_size;       /* only read access */
 *  int min_num_ele;           /* tdr_m1 & mpadd_m1/syncadd_m0 */
 *  unsigned int read_count;   /* only access by mpadd */
 *  unsigned int write_count;   /* only access by mpadd */
 *  sbp_pq_listhd *listarray;  /* only read access */
 * } sbp_pq;

/*typedef struct __sbp_broadcast_queue__ {
 *  unsigned int q_size;        * only read access *
 *  unsigned int num_procs;     * number of processors thatt use this queue; only read access
 *  int min_num_ele;            * syncadd_m0/ldg_m1
 *  unsigned int write_count;   * only access by mpadd 
 *  unsigned int *read_count;   * holds the id of the next element 
                                  the process wants to read;
                                  there's one counter for each processor
 *  sbp_bq_listhd *list_array;  * array of lists of queued elements only read access
 *  } sbp_bq;
 */

int sbp_pq_noe(sbp_pq *pq)
{
	return sbp_mpadd_m1(&(pq->min_num_ele), 0);
} /* sbp_pq_noe() */


int sbp_bq_noe(sbp_bq *bq, unsigned int id_in_group)
{
	return (sbp_mpadd_m1(&(bq->min_num_ele), 0)
			- bq->read_count[id_in_group]);
} /* sbp_bq_noe() */

#endif






/* ======================================================= */
/* TRACING-SUPPORT    990722 C. Kessler */

sync void initTracing ( sh int lengthInWords )
{
  seq {
    int thegps;
    if (lengthInWords <= 0) 
       pprintf( "initTracing: bad par\n");
    asm("stg gps,%thegps\n");
    _tracebuffer = (int *)shmalloc( lengthInWords );
    _tracebufferlength = lengthInWords;
    _traceshsize = shavail() + thegps;
  }
}

void traceEntry( int eventtype )
{
  int thegps, thetime;
  int *pos;
  if (! _tracingenabled) return;
  thetime = getct(); 
  asm("stg gps,%thegps\n");
  pos = (int *)mpadd( &_postracebuf, 3 );
  *pos = (eventtype << 16) + __PROC_NR__;
  pos[1] = thetime;
  pos[2] = thegps;
}

sync void startTracing ( void )
{
  seq {
     int thegps;
     asm("stg gps,%thegps\n");
     _tracefirstgps = thegps;
     if (!_tracebuffer) {   // use default size:
        _tracebuffer = (int *)shmalloc( 8192 );
        _tracebufferlength = 8192;
     } 
     _postracebuf = _tracebuffer;
     _tracestarttime = getct();
     _tracingenabled = 1;
  }
}

sync void stopTracing ( void )
{
  _tracingenabled = 0;
  farm _tracestoptime = getct();
} 


extern void itohexs( int n, char *buf, int width ); // io.c


sync void writeTraceFile( sh char *filename, sh char *title )
{
 sh FILE *tf;
 sh int **tabl;
 sh int p=0;
 sh int outbufsize, datawindowsize;
 sh char *outbuffer;
 sh int *dataptr;
 int *ptr, i;
 $ = mpadd( &p, 1);
 seq {
  if (!filename) pprintf("writeTraceFile: bad filename\n");
  printf("\n%x Writing tracefile %s entitled %s\n with %d entries... ",
          _traceshsize,
          filename, title, (_postracebuf - _tracebuffer)/3 );
  if (_tracingenabled) {   // implicitly do a stopTracing():
      _tracingenabled = 0;
      _tracestoptime = getct();
  }
  if (!_tracebuffer) pprintf("writeTraceFile: no initTracing\n");
  if ((tf = fopen( filename, "r" ))) {
    char buf;
    printf("\nwriteTraceFile: file %s exists. Overwrite (y/n)?", filename);
    scanf("%s", &buf );
    if (buf!='y') exit(1);
    printf("overwriting...");
  }
  fclose(tf);
  if (! (tf = fopen( filename, "w" )))
    printf("writeTraceFile: could not open file %s\n", filename );
  fprintf(tf, "%d %x %x %x %x %d %d %d %d %d %d %s\n",
          __STARTED_PROCS__, _tracestarttime, _tracestoptime,
          _traceshsize, _tracefirstgps, _shldgcnt, _shstgcnt,
          _mpaddcnt, _mpmaxcnt, _mpandcnt, _mporcnt,
          title? title: "untitled");
   tabl = (int **)shmalloc(__STARTED_PROCS__ * sizeof(int*));
 }
  
 // "parallel" intermezzo (need to access private counters):
 farm {
   tabl[__PROC_NR__] = (int *)shmalloc( 6 * sizeof(int));
   tabl[__PROC_NR__][0] = _myshldgcnt;
   tabl[__PROC_NR__][1] = _myshstgcnt;
   tabl[__PROC_NR__][2] = _mympaddcnt;
   tabl[__PROC_NR__][3] = _mympmaxcnt;
   tabl[__PROC_NR__][4] = _mympandcnt;
   tabl[__PROC_NR__][5] = _mymporcnt;
 }
 seq {
  for (i=0; i<__STARTED_PROCS__; i++)
    fprintf( tf, "%x %x %x %x %x %x\n", 
      tabl[i][0], tabl[i][1], tabl[i][2], tabl[i][3], tabl[i][4], tabl[i][5]);
 }
 datawindowsize = p * 16;  // max #events per flush
 outbufsize = 25 * datawindowsize;
 outbuffer = shalloc( outbufsize );
 for (dataptr = _tracebuffer; dataptr < _postracebuf; dataptr+=3*datawindowsize) {
   char *mybuf = outbuffer + $*25;
   int outsize;
   farm {
    int outevents = min( datawindowsize, (_postracebuf - dataptr ) / 3);
    outsize = 25*outevents;
    //pprintf("outsize %d  outevents %d\n", outsize, outevents );
    for ( ptr = dataptr+3*$; ptr < dataptr+3*outevents; ptr+=(3*p)) {
      //One line per ptr. Format: "6x 8x 8x\n"
            //      evnttype + thepid, thetime, thegps 
      //pprintf("event %x: %x %x %x\n", ptr, ptr[0], ptr[1], ptr[2] );
      itohexs( ptr[0], mybuf, 6 );  
      mybuf[6] = ' ';
      itohexs( ptr[1], mybuf+7, 8 );  
      mybuf[15] = ' ';
      itohexs( ptr[2], mybuf+16, 8 );  
      mybuf[24] = '\n';
      mybuf += 25*p;
    }
   }
   //seq printf("flushing mybuf %x: %d\n", mybuf, outsize);
   seq write( tf->fd, outbuffer, outsize );
 }
 seq {
    fprintf(tf, "0 0 0\n");    // end marker
    fclose( tf );
    printf("done\n");
 }
 farm shfree( tabl[__PROC_NR__] );
 seq shfree( tabl );
 shallfree();
}
