/* @TITLE "predict-ADAPT.c - ADAPT predict algorithm" */
/* predict-ADAPT.c: Adaptable portion recognizer
 * 
 * David Kotz 1989
 *
 * This algorithm is like PORT, in that it recognizes sequential portions,
 * but tries to maintain the PDF of the run lengths it has seen so far,
 * and use it to predict run lengths. It makes no attempt to look for
 * regular skiplength patterns, so it won't jump over skips.
 *
 * See notebook for 10/31/89 for derivation.
 * We base this on the idea that the expected value of the run length,
 * given the current run length c, is
 *
 *                  1                
 * E[R|c] =  ---------------  *  SUM (x*p(x))
 *           1 - F(c) + p(c)    x>=c
 * 
 * Where F(c) is the PDF of the run length = SUM(p(x)) for all x<=c
 *       p(c) is the pmf of the run length = cnt(c)/total.
 * where cnt(c) is the number of occurrences of runlen c,
 *       total is the total number of runs = SUM(cnt(x)) for all x.
 * 
 * First we factor out the 'total' in the denominator of all the p(x)
 * (see research notes for 11/30/89):
 *
 *                  1                
 * E[R|c] =  ---------------      *  SUM (x*cnt(x))
 *           1 - cum(c) + cnt(c)    x>=c
 * 
 * We compute the two factors of the formula separately, as coeff and sum.
 * Actually 'sum' and 'cum' are computed incrementally, as c changes,
 * and 'coeff' is only calculated when we make the prediction.
 * Actually, adapt.sum is SUM (x*cnt(x)) for
 * x>c, and we add in c*cnt(c) when we compute E[R|c], since cnt(c) is often
 * zero. This way 'sum' only changes whenever we move from one oldrun record
 * to another. Similarly, cum(c) is adjusted when  we move from one 
 * oldrun record to another.
 *
 * Immediate rereference counts as a correct prediction.
 *
 * FUNCTIONS:
 *      InitPredict_ADAPT
 *
 * 'Local' entry points (called with function pointers):
 *      LateNotify
 *      Notify
 *      CheckAvailable
 *      GetWork
 *      GiveBackWork
 *      Done
 *      Dump
 *
 * David Kotz  November/December 1989
 */

static char rcsid[] = "$Id: predict-adapt.c,v 7.1 91/05/09 19:31:27 dfk Tape2 $"; 

#include <stdio.h>
#include <usdfk.h>
#include "internal.h"
#include "prefetch.h"
#include "stats.h"
#include "rapidelog.h"
#include "predict-debug.h"

/* some math functions */
extern double rint();		/* needs -lm load option */
#define round(x) rint(x)

static void Done();
static void Dump();
static boolean Notify();
static void LateNotify();
static boolean CheckAvailable();
static boolean GetWork();
static void GiveBackWork();

static void new_run();
static void update_run();
static void finished_run();

static void predict();
#define NOPREDICT (-100)		/* value used for no prediction */
static void mistakes();

/* @SUBTITLE "Local Data" */

static int MaxDist = 5;		/* number of blocks to predict for record runs */

typedef struct runstruct RUN;
struct runstruct {
    int runlen;			/* length of the run */
    int count;				/* how many occurrences */
    RUN *next;				/* link in list */
};

struct adaptdata {
    /* current run */
    int first;				/* start of current run */
    int last;				/* last block read */
    int runlen;			/* current runlen = last-first+1 */
    int distance;			/* predicted runlength */
    int usedcount;			/* number of blocks ref'd or pref'd */
    RUN head;				/* head of runlength queue */
    RUN *cur;				/* current run's pointer into queue */
    int total;				/* total number of runs recorded */
    long exp;				/* SUM(x*cnt(x)) for all x */
    long sum;				/* SUM(x*cnt(x)) for all x>=c */
    long cum;				/* SUM(cnt(x)) for all x<=c */
    boolean required;		/* TRUE if required to call predict */
};

/* @SUBTITLE "InitPredict_ADAPT: Initialize for ADAPT algorithm" */
void
InitPredict_ADAPT(rpd, parm)
	RAPIDFILE *rpd;
	int parm;
{
    struct adaptdata *adapt;

    adapt = (struct adaptdata *)AllocLocal(sizeof(struct adaptdata));

    adapt->first = adapt->last = NOPREDICT;
    adapt->distance = 0;
    adapt->usedcount = 0;
    adapt->runlen = 0;

    bzero(&adapt->head, sizeof(RUN));
    adapt->cur = &adapt->head;
    adapt->total = 0;
    adapt->sum = 0;
    adapt->exp = 0;
    adapt->cum = 0;
    adapt->required = FALSE;

    rpd->prefetch.available = FALSE; /* no prediction yet */
    rpd->prefetch.private = (ANYPTR) adapt;

    rpd->prefetch.donerpd = Done;
    rpd->prefetch.notify = Notify;
    rpd->prefetch.latenotify = LateNotify;
    rpd->prefetch.checkavail = CheckAvailable;
    rpd->prefetch.getwork = GetWork;
    rpd->prefetch.giveback = GiveBackWork;
    rpd->prefetch.dump = Dump;

    if (parm > 0)
	 MaxDist = parm;
}

/* @SUBTITLE "LateNotify: Multiple notifications" */
static void
LateNotify(node, rpd, private, last, length, sector)
	int node;				/* processor number */
	RAPIDFILE *rpd;		/* can be trusted: this is local predictor */
	ANYPTR private;		/* private data */
	int last;				/* last block accessed in run */
	int length;			/* length of consecutive run */
	int sector;			/* most recent block accessed (-1 if none) */
{
    struct adaptdata *adapt = (struct adaptdata *)private;

#ifdef TRACE
    printf("Proc %d (%d) in ADAPT LateNotify: last=%d, length=%d, sector=%d\n",
		 Vnode, node, last, length, sector);
    fflush(stdout);
#endif

    if (sector >= 0) {
	   if (length > 0)
		if (sector == last+1) {
		    /* just an extension to the same portion */
		    last = sector;
		    length++;
		    new_run(adapt, last, length);
		} else {
		    /* actually two portions */
		    new_run(adapt, last, length);
		    finished_run(adapt);
		    new_run(adapt, sector, 1);
		}
	   else {
		  /* just one portion, containing only one sector */
		  new_run(adapt, sector, 1);
	   }
    } else {
	   if (length > 0) {
		  /* just one portion */
		  new_run(adapt, last, length);
	   } /* else no portions */
    }

    rpd->prefetch.available = (adapt->usedcount < adapt->distance);
}

/* @SUBTITLE "Notify: notification that a sector is accessed" */
static boolean
Notify(rpd, block)
	RAPIDFILE *rpd;
	int block;
{
    struct adaptdata *adapt = (struct adaptdata *)(rpd->prefetch.private);
    int runlen;
    boolean predicted;
    boolean wanted;

    if (adapt->last < 0) {
	   /* first reference in this sequence */
	   adapt->first = adapt->last = block;
	   predicted = FALSE;
	   wanted = FALSE;
	   new_run(adapt, block, 1);
    } else if (block == adapt->last) {    /* immediate rereference */
	   /* adapt->next does not change */
	   predicted = TRUE;
	   wanted = FALSE;
    } else {				/* not a rereference */
	   if (block == adapt->last + 1) {
		  /* in-order reference. Did we predict it? */
		  predicted = (adapt->distance > adapt->runlen);
		  update_run(adapt, block);
		  wanted = (adapt->usedcount >= adapt->runlen);
	   } else {
		  /* out-of-order reference, new run begins */
		  predicted = FALSE;
		  wanted = FALSE;
		  mistakes(rpd, adapt);

		  finished_run(adapt);

		  /* set up new run */
		  new_run(adapt, block, 1);
	   }
    }

    if (predicted)
	 my_stats->correct++;

    if (adapt->usedcount < adapt->runlen)
	 adapt->usedcount = adapt->runlen;	/* keep usedcount >= runlen */

    rpd->prefetch.available = (adapt->usedcount < adapt->distance);

    return(wanted);
}

/* @SUBTITLE "new_run: Start recording a new run" */
static void
new_run(adapt, last, length)
	struct adaptdata *adapt;
	int last;				/* last block in the new run */
	int length;			/* length of the new run */
{
    adapt->first = last - length + 1;
    adapt->last = last;
    adapt->runlen = length;

    /* reset 'current' pointer */
    adapt->cur = &adapt->head;
    adapt->sum = adapt->exp;
    adapt->cum = 0;

    /* move along list only when we are equal to the next runlen */
    if (adapt->cur->next != NULL && adapt->runlen == adapt->cur->next->runlen) {
	   /* move along list */
	   adapt->cur = adapt->cur->next;
	   adapt->sum -= adapt->cur->runlen * adapt->cur->count;
	   adapt->cum += adapt->cur->count;
    }

    /* reinitialize usedcount and distance, and make new prediction */
    adapt->usedcount = adapt->runlen;
    adapt->distance = adapt->runlen;

    predict(adapt);
}

/* @SUBTITLE "update_run: update a run" */
static void
update_run(adapt, block)
	struct adaptdata *adapt;
	int block;
{
    /* will call predict if we must call predict, or */
    /* if was equal to runlen, update changes prediction */
    boolean newpredict = adapt->required 
	 || (adapt->runlen == adapt->cur->runlen);

    adapt->runlen++;
    adapt->last = block;

    /* move along list only when we are equal to the next runlen */
    if (adapt->cur->next != NULL && adapt->runlen == adapt->cur->next->runlen){
	   /* move along list */
	   adapt->cur = adapt->cur->next;
	   adapt->sum -= adapt->cur->runlen * adapt->cur->count;
	   adapt->cum += adapt->cur->count;
	   newpredict = TRUE;	/* prediction also changes when we move */
    }

    if (newpredict)
	 predict(adapt);
}

/* @SUBTITLE "finished_run: Finish recording a run" */
static void
finished_run(adapt)
	struct adaptdata *adapt;
{
    RUN *new;				/* new run block */
    RUN *run;				/* loop var */

    adapt->total++;			/* increment count of runs */

    /* either bump count of the current run, or add a new entry */
    if (adapt->cur->runlen == adapt->runlen) {
	   adapt->cur->count++;
#ifdef DEBUG
	   fprintf(stderr, "Another (%d) run length %d\n", 
			 adapt->cur->count, adapt->runlen);
#endif DEBUG
    } else {
	   /* then adapt->runlen > adapt->cur->runlen */
	   /* make a new block and insert after adapt->cur */
	   new = (RUN *)malloc(sizeof(RUN));
	   if (new == NULL) {
		  fprintf(stderr, "out of memory in ADAPT new_run\n");
		  return;			/* give up */
	   }
	   new->runlen = adapt->runlen;
	   new->count = 1;
	   new->next = adapt->cur->next;
	   adapt->cur->next = new;
#ifdef DEBUG
	   fprintf(stderr, "New run length %d\n", adapt->runlen);
#endif DEBUG
    }

    /* note that the total sum changes by exactly runlen: either one 
	* more of a previous runlen, or 1 of a new runlen. No need to
     * recompute all the pmf's and pdf's now.
	*/
    adapt->exp += adapt->runlen;

    /* adapt->coeff determined in predict() */
}


/* @SUBTITLE "predict: predict next block for prefetch" */
static void
predict(adapt)
	struct adaptdata *adapt;
{
    long count;			/* cnt(c) */

    count = (adapt->cur->runlen == adapt->runlen ? adapt->cur->count : 0);
    if (count == 0 && adapt->cum == adapt->total) {
	   /* We land here if:
	    *  - we're in first run (cum=total=0)
	    *  - current is longer than longest runlen 
	    * We use a tentative guess.
	    */
	   adapt->distance = distance(adapt->runlen);
	   adapt->required = TRUE; /* must call back for a new prediction */
    } else {
	   adapt->distance = 
		round((float)(count * adapt->cur->runlen + adapt->sum)
			 / (float)(adapt->total - adapt->cum + count)); /* coeff */
	   adapt->required = FALSE; /* don't call us until something changes */
    }
    
#ifdef DEBUG
    fprintf(stderr,
		  "runlen %d Predict length %d, total %d  %s\n",
		  adapt->runlen, adapt->distance, adapt->total,
		  adapt->required ? "callback required" : "");
#endif DEBUG
}

/* @SUBTITLE "distance: how long might this run get?" */
static int 
distance(runlen)
	int runlen;
{
    if (runlen > MaxDist)	/* very long - cut off */
	 return(runlen + MaxDist);
    else					/* linear guess */
	 return (runlen + runlen);
}

/* @SUBTITLE "mistakes: process mistakes" */
static void
mistakes(rpd, adapt)
	RAPIDFILE *rpd;		/* RPD to pass on */
	struct adaptdata *adapt;	/* adapt data */
{
    int i;
    int end = adapt->first + adapt->usedcount;

    for (i=adapt->last+1; i < end; i++)
	 RT_PrefetchMistake(rpd, i);
}

/* @SUBTITLE "CheckAvailable: is there work available?" */
/* Are there any blocks to be prefetched? */

static boolean				/* TRUE if there's work */
CheckAvailable(rpd)
	RAPIDFILE *rpd;
{
    struct adaptdata *adapt = (struct adaptdata *)(rpd->prefetch.private);

    rpd->prefetch.available = (adapt->usedcount < adapt->distance);
    /* FALSE keeps us from being called until next Notify() */

    return(rpd->prefetch.available);
}

/* @SUBTITLE "GetWork: get a block to prefetch" */
static boolean				/* TRUE if block was found */
GetWork(rpd, block)
	RAPIDFILE *rpd;
	int *block;	/* returned */
{
    struct adaptdata *adapt = (struct adaptdata *)(rpd->prefetch.private);

    if (adapt->usedcount < adapt->distance) {
	   *block = adapt->first + adapt->usedcount++;
	   rpd->prefetch.available = (adapt->usedcount < adapt->distance);
	   return(TRUE);
    } else {
	   rpd->prefetch.available = FALSE;
	   return(FALSE);
    }
}

/* @SUBTITLE "GiveBackWork: Sector could not be prefetched" */

static void
GiveBackWork(rpd, sector)
	RAPIDFILE *rpd;
	int sector;
{
    struct adaptdata *adapt = (struct adaptdata *)(rpd->prefetch.private);

    adapt->usedcount--;
    rpd->prefetch.available = TRUE;
}

/* @SUBTITLE "Done: What happens at end of string" */
static void
Done(rpd)
	RAPIDFILE *rpd;
{
    struct adaptdata *adapt = (struct adaptdata *)(rpd->prefetch.private);
    RUN *run, *next;

    /* mark all mistakes */
    mistakes(rpd, adapt);

    /* free run list */
    for (run = adapt->head.next; run != NULL; run = next) {
	   next = run->next;
	   free(run);
    }

    UsFree(adapt);
    rpd->prefetch.private = NULL;
}

/* @SUBTITLE "Dump: debugging dump" */
static void
Dump(rpd)
	RAPIDFILE *rpd;
{
    struct adaptdata *adapt = (struct adaptdata *)(rpd->prefetch.private);

    printf("  ADAPT Predictor (MaxDist = %d):\n", MaxDist);
    if (adapt != NULL) {
	   printf("   first = %d\n", adapt->first);
	   printf("   last = %d\n", adapt->last);
	   printf("   runlen = %d\n", adapt->runlen);
	   printf("   distance = %d\n", adapt->distance);
	   printf("   usedcount = %d\n", adapt->usedcount);
	   printf("   head.next = 0x%x\n", adapt->head.next);
	   printf("   cur = 0x%x\n", adapt->cur);
	   printf("   cur->runlen = %d\n", adapt->cur ? adapt->cur->runlen : 0);
	   printf("   total = %d\n", adapt->total);
	   printf("   exp = %ld\n", adapt->exp);
	   printf("   sum = %ld\n", adapt->sum);
	   printf("   cum = %ld\n", adapt->cum);
	   printf("   required = %s\n", adapt->required ? "TRUE" : "FALSE");
    } else
	 printf("   NULL adaptdata");
}
