/* @TITLE "adapt: OPRA - Offline PRefetch Analyzer" */
/* 
 * adapt - Adaptable portion recognizer
 * 
 * David Kotz 1989
 *
 * This algorithm is like PORT, in that it recognizes sequential portions,
 * but tries to maintain the PDF of the run lengths it has seen so far,
 * and use it to predict run lengths. It makes no attempt to look for
 * regular skiplength patterns, so it won't jump over skips.
 *
 * See notebook for 10/31/89 for derivation.
 * We base this on the idea that the expected value of the run length,
 * given the current run length c, is
 *
 *                  1                
 * E[R|c] =  ---------------  *  SUM (x*p(x))
 *           1 - F(c) + p(c)    x>=c
 * 
 * Where F(c) is the PDF of the run length = SUM(p(x)) for all x<=c
 *       p(c) is the pmf of the run length = cnt(c)/total.
 * where cnt(c) is the number of occurrences of runlen c,
 *       total is the total number of runs = SUM(cnt(x)) for all x.
 * 
 * First we factor out the 'total' in the denominator of all the p(x)
 * (see research notes for 11/30/89):
 *
 *                  1                
 * E[R|c] =  ---------------      *  SUM (x*cnt(x))
 *           1 - cum(c) + cnt(c)    x>=c
 * 
 * We compute the two factors of the formula separately, as coeff and sum.
 * Actually 'sum' and 'cum' are computed incrementally, as c changes,
 * and 'coeff' is only calculated when we make the prediction.
 * Actually, adapt.sum is SUM (x*cnt(x)) for
 * x>c, and we add in c*cnt(c) when we compute E[R|c], since cnt(c) is often
 * zero. This way 'sum' only changes whenever we move from one oldrun record
 * to another. Similarly, cum(c) is adjusted when  we move from one 
 * oldrun record to another.
 *
 * Immediate rereference counts as a correct prediction.
 * 
 */

static char rcsid[] = "$Id: adapt.c,v 1.4 89/12/01 14:21:05 dfk Exp $";

/* @SUBTITLE "Definitions" */
#include <stdio.h>
#include "dfk.h"
#include "opra.h"
extern float atof();

extern double rint();		/* needs -lm load option */
#define round(x) rint(x)

/* #define DEBUG */

static int MaxDist = 5;		/* number of blocks to predict for record runs */

typedef struct runstruct RUN;
struct runstruct {
    int runlen;			/* length of the run */
    int count;				/* how many occurrences */
    RUN *next;				/* link in list */
};

static struct {
    /* current run */
    int first;				/* start of current run */
    int last;				/* last block read */
    int runlen;			/* current runlen = last-first+1 */
    int distance;			/* predicted runlength */
    RUN head;				/* head of runlength queue */
    RUN *cur;				/* current run's pointer into queue */
    int total;				/* total number of runs recorded */
    long exp;				/* SUM(x*cnt(x)) for all x */
    long sum;				/* SUM(x*cnt(x)) for all x>=c */
    long cum;				/* SUM(cnt(x)) for all x<=c */
    boolean required;		/* TRUE if required to call predict */
} adapt;					/* data for this algorithm */

static void new_run();
static void update_run();
static void finished_run();
static void predict();

/* @SUBTITLE "ParmsADAPT: command-line parameters" */
void
ParmsADAPT(argc, argv)
	int argc;
	char **argv;
{
    if (argc > 1) {
	   fprintf(stderr, "%s: ADAPT only uses 1 parameter\n", program);
	   exit(EXIT_USAGE);
    }
    MaxDist = atoi(argv[0]);
    if (MaxDist <= 0) {
	   fprintf(stderr, "%s: ADAPT must have MaxDist > 0\n", program);
	   exit(EXIT_PARM);
    }
#ifdef DEBUG
    printf("MaxDist = %d\n", MaxDist);
#endif
}

/* @SUBTITLE "ResetADAPT: Reset data structures" */
void
ResetADAPT()
{
    RUN *run, *next;

    adapt.first = adapt.last = -100;
    adapt.runlen = 0;
    adapt.distance = 0;
    for (run = adapt.head.next; run != NULL; run = next) {
	   next = run->next;
	   free(run);
    }
    bzero(&adapt.head, sizeof(RUN));
    adapt.cur = &adapt.head;
    adapt.total = 0;
    adapt.sum = 0;
    adapt.exp = 0;
    adapt.cum = 0;
    adapt.required = FALSE;
}

/* @SUBTITLE "RefADAPT: Process block Reference" */
boolean					/* did we correctly predict block */
RefADAPT(block, mistake)
	int block;			/* (input) block number of reference */
	int *mistake;			/* (output) how many mistakes */
{
    int runlen;
    boolean predicted;

    if (adapt.last < 0) {
	   /* first reference in this sequence */
	   predicted = FALSE;
	   *mistake = 0;
	   new_run(block);
    } else if (block == adapt.last) {    /* immediate rereference */
	   /* adapt.runlen, distance do not change */
	   predicted = TRUE;
	   *mistake = 0;
    } else {				/* not a rereference */
	   if (block == adapt.last + 1) {
		  /* in-order reference. Did we predict it? */
		  predicted = (adapt.distance > adapt.runlen);
		  *mistake = 0;	/* can't be mistakes */
		  update_run(block);
	   } else {
		  /* out-of-order reference, new run begins */
		  int left = adapt.distance - adapt.runlen;
		  *mistake = left > 0 ? left : 0;
		  predicted = FALSE;

		  finished_run();

		  /* set up new run */
		  new_run(block);
	   }
    }

    /* and return status of last prediction */
    return(predicted);
}

/* @SUBTITLE "new_run: Start recording a new run" */
static void
new_run(block)
	int block;
{
    adapt.first = adapt.last = block;
    adapt.runlen = 1;

    /* reset 'current' pointer */
    adapt.cur = &adapt.head;
    adapt.sum = adapt.exp;
    adapt.cum = 0;

    /* move along list only when we are equal to the next runlen */
    if (adapt.cur->next != NULL && adapt.runlen == adapt.cur->next->runlen) {
	   /* move along list */
	   adapt.cur = adapt.cur->next;
	   adapt.sum -= adapt.cur->runlen * adapt.cur->count;
	   adapt.cum += adapt.cur->count;
    }

    /* make new prediction */
    predict();
}

/* @SUBTITLE "update_run: update a run" */
static void
update_run(block)
	int block;
{
    /* will call predict if we must called predict, or */
    /* if was equal to runlen, update changes prediction */
    boolean newpredict = adapt.required || (adapt.runlen == adapt.cur->runlen);

    adapt.runlen++;
    adapt.last = block;

    /* move along list only when we are equal to the next runlen */
    if (adapt.cur->next != NULL && adapt.runlen == adapt.cur->next->runlen) {
	   /* move along list */
	   adapt.cur = adapt.cur->next;
	   adapt.sum -= adapt.cur->runlen * adapt.cur->count;
	   adapt.cum += adapt.cur->count;
	   newpredict = TRUE;	/* prediction also changes when we move */
    }

    if (newpredict)
	 predict();
}

/* @SUBTITLE "finished_run: Finish recording a run" */
static void
finished_run()
{
    RUN *new;				/* new run block */
    RUN *run;				/* loop var */

    adapt.total++;			/* increment count of runs */

    /* either bump count of the current run, or add a new entry */
    if (adapt.cur->runlen == adapt.runlen) {
	   adapt.cur->count++;
#ifdef DEBUG
	   fprintf(stderr, "Another (%d) run length %d\n", 
			 adapt.cur->count, adapt.runlen);
#endif DEBUG
    } else {
	   /* then adapt.runlen > adapt.cur->runlen */
	   /* make a new block and insert after adapt.cur */
	   new = (RUN *)malloc(sizeof(RUN));
	   if (new == NULL) {
		  fprintf(stderr, "out of memory\n");
		  exit(EXIT_NOMEM);
	   }
	   new->runlen = adapt.runlen;
	   new->count = 1;
	   new->next = adapt.cur->next;
	   adapt.cur->next = new;
#ifdef DEBUG
	   fprintf(stderr, "New run length %d\n", adapt.runlen);
#endif DEBUG
    }

    /* note that the total sum changes by exactly runlen: either one 
	* more of a previous runlen, or 1 of a new runlen. No need to
     * recompute all the pmf's and pdf's now.
	*/
    adapt.exp += adapt.runlen;

    /* coeff determined in predict() */
}

/* @SUBTITLE "predict: predict the expected run length" */
static void
predict()
{
    long count;			/* cnt(c) */

    count = (adapt.cur->runlen == adapt.runlen ? adapt.cur->count : 0);
    if (count == 0 && adapt.cum == adapt.total) {
	   /* We land here if:
	    *  - we're in first run (cum=total=0)
	    *  - current is longer than longest runlen 
	    * We use a tentative guess.
	    */
	   adapt.distance = distance(adapt.runlen);
	   adapt.required = TRUE; /* must call back for a new prediction */
    } else {
	   adapt.distance = 
		round((float)(count * adapt.cur->runlen + adapt.sum)
			 / (float)(adapt.total - adapt.cum + count)); /* coeff */
	   adapt.required = FALSE; /* don't call us until something changes */
    }
    
#ifdef DEBUG
    fprintf(stderr,
		  "runlen %d Predict length %d, total %d  %s\n",
		  adapt.runlen, adapt.distance, adapt.total,
		  adapt.required ? "callback required" : "");
#endif DEBUG
}

/* @SUBTITLE "distance: how long might this run get?" */
static int 
distance(runlen)
	int runlen;
{
    if (runlen > MaxDist)	/* very long - cut off */
	 return(runlen + MaxDist);
    else					/* linear guess */
	 return (runlen + runlen);
}

/* @SUBTITLE "LastADAPT: What happens at end of string" */
void
LastADAPT(mistake)
	int *mistake;			/* how many mistakes? */
{
    int left = adapt.distance - adapt.runlen;
    *mistake = left > 0 ? left : 0;
}

