/* @TITLE "exchunk - expand each chunk of a pattern" */
/*
 * exchunk.c - Copies a pattern file; each chunk in the pattern is 
 * expanded into multiple chunks in the output file, with a different
 * chunksize. Useful for creating gpr and lpr patterns that are 
 * equivalent except for the chunksize. 
 *
 * An alternative is to specify the new chunksize, and each *portion* in 
 * the existing pattern is rewritten as a series of chunks of that size,
 * with possibly one smaller chunk at the end of the portion. This 
 * will ensure the same block pattern. The above expansion method is
 * just a remapped version of this method, where the new chunksize becomes
 * the old (maximum) chunksize divided by the exapansion factor.
 * In any case, ExpandFactor = OldChunksize / NewChunksize.
 *
 * Thus, the name "expand" is not really right for this program any more; 
 * it *resizes* the chunks to be the correct size. It is possible to 
 * specify *bigger* chunk sizes to grow larger chunks from smaller chunks.
 *
 * For local patterns, we use any parallelism that is available.
 *
 * It does not assume that the chunksize is constant across all chunks.
 * 
 * The comptime is reapportioned between chunks in a correct way.
 *
 * usage:
 *   exchunk [-#] args
 * where args are described below. The optional -# tells exchunk to start up
 * with # processes, regardless of the current cluster size (# should be
 * less than or equal to the cluster size). This is useful for global 
 * patterns, where -1 is the correct form.
 * 
 * David Kotz 6/90, updated 8/90
 */

static char rcsid[] = "$Id: exchunk.c,v 7.1 91/05/09 19:33:24 dfk Tape2 $"; 

#include <stdio.h>
#include <usdfk.h>
#include "format.h"
#include "refs.h"
#include "pat-intern.h"

/* @SUBTITLE "Declarations" */
/* IMPORTED Data */
extern REFS *my_refs;

/* GLOBAL DATA */
char filename[P_FILENAMELEN+1];	/* first filename */
char newfilename[P_FILENAMELEN+1]; /* new pattern filename */

/* Remember the header from the pattern file */
static P_HEAD head;			/* header for pattern file */

/* And our shared variables */
static float ExpandFactor;	/* expand each chunk to this many chunks */
static int NewChunkSize;		/* new chunk size in bytes */

/* see pat-intern.h */
#define NUM_STYLES 11
char style_names[][5] = {
    "RND", "LPR", "LPS", "LW", "GPR", "GPS", "GW", "SEG", "LW1", "MIX", "GRND"
};

/* LOCAL FUNCTIONS */
static void ReadInfo();
static void GetOldFile();
static void ConvertString();
static void ExpandString();
static char *StyleName();
static boolean EndPortion();

/* @SUBTITLE "main program" */
main(argc, argv)
	int argc;
	char **argv;
{
    if (argc >= 2 && *argv[1] == '-') {
	   /* the -# argument: limit the number of procs */
	   int nprocs = atoi(argv[1]+1);
	   printf("Limiting to %d procs\n", nprocs);
	   SetUsConfig(configProcs, nprocs);
	   argc--; argv++;		/* "scan" off this argument */
    }

    InitializeUs();			/* so we can use parallelism, if there */

    ReadInfo(argc, argv);	/* read filenames, AND load old pattern */

    /* convert all strings (nprocs==1 for global) */
    GenOnI(ConvertString, head.nprocs);

    head.chunksize = NewChunkSize;	/* the new maximum */
    head.computation /= ExpandFactor; /* a rough new average */

    /* open new file */
    CreateFile(newfilename, &head); /* make new file */

    WritePattern();

    /* Finish up */
    ClosePattern();

    printf("File %s complete\n", newfilename);

    exit(0);
}

/* @SUBTITLE "ReadInfo: Read in the various pattern parameters" */
#define LINESIZE 11
static void
ReadInfo(argc, argv)
	int argc;				/* pointing just before the args we scan */
	char **argv;
{
    char line[LINESIZE];
    boolean ok;
    boolean ask;
    boolean Expand;
    int factor;			/* early value for ExpandFactor */

    if (--argc > 0) {
	   strncpy(filename, *++argv, P_FILENAMELEN);
	   printf("Filename of EXISTING pattern is '%s'\n", filename);
    } else {
	   printf("Name of EXISTING pattern FILE (%d chars): ", P_FILENAMELEN);
	   ReadString(filename, "Filename", P_FILENAMELEN);
    }	   
    if (*filename == '\0') {
	   printf("Must specify filename\n");
	   exit(1);
    }

    GetOldFile();

    if (--argc > 0) {
	   strncpy(newfilename, *++argv, P_FILENAMELEN);
	   printf("Filename of NEW pattern is '%s'\n", newfilename);
    } else {
	   printf("Name of NEW pattern FILE (%d chars): ", P_FILENAMELEN);
	   ReadString(newfilename, "Filename", P_FILENAMELEN);
    }	   
    if (*newfilename == '\0') {
	   printf("Must specify filename\n");
	   exit(1);
    }

    if (access(newfilename, 0) == 0) {
	   printf("File '%s' exists. Overwrite [y]? ", newfilename);
	   ReadString(line, "yes or no", LINESIZE);
	   switch (*line) {
		  case 'y':
		  case 'Y':
		  case '\0': {
			 break;
		  }
		  case 'n':
		  case 'N':
		  default: {
			 printf("File not touched.\n");
			 exit(0);
		  }
	   }
    }

    if (--argc > 0 ) {
	   /* if this arg is a number starting with a 'b', then this is
	    * a number of bytes that is to be a chunksize.
	    * Otherwise it is an expansion factor.
	    */
	   if (**++argv == 'b') {
		  NewChunkSize = atoi(++*argv);
		  Expand = FALSE;
	   } else {
		  factor = atoi(*argv);
		  Expand = TRUE;
	   }
	   ask = FALSE;
    } else
	 ask = TRUE;

    if (ask) {
	   while (1) {
		  printf("Will you specify an expansion factor (e) or a chunksize (c)? ");
		  ReadString(line, "Expansion/Chunksize", LINESIZE);
		  if (line[0] == 'e') {
			 Expand = TRUE;
			 break;
		  } else if (line[0] == 'c') {
			 Expand = FALSE;
			 break;
		  }
	   }
    }
    
    do {
	   if (ask) {
		  if (Expand) {
			 printf("Expand each chunk to how many chunks: ");
			 ReadNumber("Expansion factor", &factor);
		  } else {
			 printf("New Chunksize, in bytes: ");
			 ReadNumber("New Chunksize", &NewChunkSize);
		  }
	   }
	   ok = FALSE;
	   ask = TRUE;
	   if (Expand) {
		  if (factor < 1)
		    printf("Expansion (%d) must be greater than 1\n", factor);
		  else if (head.chunksize % factor != 0)
		    printf("Expansion (%d) does not divide chunksize (%d) evenly\n",
				 factor, head.chunksize);
		  else 
		    ok = TRUE;
	   } else {
		  if (NewChunkSize < 1)
		    printf("Chunksize (%d) must be greater than 1\n", NewChunkSize);
		  else 
		    ok = TRUE;
	   }
    } while (!ok);

    if (Expand) {
	   ExpandFactor = factor;
	   NewChunkSize = head.chunksize / ExpandFactor;
    } else
	 ExpandFactor = (float)(head.chunksize) / NewChunkSize;

    printf("New Chunksize=%d bytes, ExpandFactor=%g\n",
		 NewChunkSize, ExpandFactor);
    Share(&ExpandFactor);
    Share(&NewChunkSize);
    
    if (--argc > 0) {
	   strncpy(head.name, *++argv, P_NAMELEN);
	   printf("Pattern name is '%s'\n", head.name);
    } else {
	   printf("Name of pattern (%d chars): ", P_NAMELEN);
	   ReadString(head.name, "Name", P_NAMELEN);
    }

    if (--argc > 0) {
	   strncpy(head.comment, *++argv, P_COMLEN);
	   printf("Comment is '%s'\n", head.comment);
    } else {
	   printf("Comment (%d chars): ", P_COMLEN);
	   ReadString(head.comment, "Comment", P_COMLEN);
    }

}

/* @SUBTITLE "GetOldFile: get info from existing file" */
static void
GetOldFile()
{
    int r;

    /* read whole pattern, shared among all procs */
    if (!LoadPatternShared(filename, &head)) {
	   fprintf(stderr, "Cannot load pattern '%s'\n", filename);
	   exit(1);
    }

    printf("Comment: \n%s\n", head.comment);
    printf("File size: %u bytes\n", head.filesize);
    printf("Max Chunk size: %u bytes\n", head.chunksize);
    printf("References are %s\n", head.writes ? "Writes" : "Reads");
    printf("%s pattern (%s)\n",
		 head.global ? "Global" : "Local", StyleName(head.style));
    if (!head.global)
	 printf("Number of processes: %d\n", head.nprocs);
    if (head.style != PAT_MIX)
	 printf("Computation: %u msec, %s\n", head.computation,
		   head.computation_fixed ? "Fixed" : "Average");
    else
	 printf("Computation: unknown\n");
    printf("\n");
}

/* @SUBTITLE "StyleName: return the name of the pattern style" */
static char *
StyleName(style)
	int style;
{
    char name[100];

    if (style >= 0 && style < NUM_STYLES)
	 return(style_names[style]);
    else {
	   sprintf(name, "(number %d)", style);
	   return(name);
    }
}

/* @SUBTITLE "ConvertString: Convert one string" */
static void
ConvertString(dummy, index)
	int dummy; 
	int index;			/* which string to use */
{
    UsePattern(index);
    ExpandString();
}

/* @SUBTITLE "ExpandString: Expand each chunk of one string" */
/* The string is in my_refs */
static void
ExpandString()
{
    ONEREF *newchunks;		/* the new set of chunks */
    ONEREF *oldchunks;		/* the old set of chunks */
    unsigned int newchunksize; /* the new chunk size */
    int nchunks;			/* old number of chunks */
    int old, new;			/* chunk number in old and new sets */
    ONEREF *newchunk, *oldchunk; /* pointer to one old and one new chunk */
    int p;				/* portion number */
    boolean write;			/* write or read */
    unsigned int offset;		/* offset in file */
    unsigned int comptime;	/* current aggregate comp time */
    unsigned int size;		/* current sequential piece, in bytes */
    unsigned int lastsize, lastcomp; /* size&comp for most recent chunk */
    int alloced;

    /* some useful values */
    nchunks = my_refs->nchunks;
    oldchunks = my_refs->chunks;
    /* allocate new chunks array */
    newchunks = (ONEREF *)
	 AllocGlobal((int) (nchunks * ExpandFactor * 2 + 1) * sizeof(ONEREF));
    alloced = ((int) (nchunks * ExpandFactor * 2 + 1));

    /* start at beginning of patterns */
    new = 0;				/* new pattern chunk number */
    old = 0;				/* old pattern chunk number */
    p = 0;				/* old/new pattern portion number */
    size = 0;				/* initially, nothing to work with */

    while (old < nchunks) {
	   if (size == 0) {
		  /* start fresh on each "portion" */
		  oldchunk = &(oldchunks[old]);
		  write = oldchunk->write;
		  offset = oldchunk->offset;
		  size = oldchunk->length;
		  comptime = oldchunk->comptime;
	   }
	   /* Loop until we have enough bytes, or end of portion */
	   while (size < NewChunkSize && !EndPortion(old,p)) {
		  oldchunk = &(oldchunks[++old]);
		  size += oldchunk->length;
		  comptime += oldchunk->comptime;
	   }
	   /* Parcel the piece out into multiple new chunks */
	   lastcomp = oldchunk->comptime;
	   lastsize = oldchunk->length;
	   while (size >= NewChunkSize) {
		  /* make a new chunk of size "NewChunkSize" */
		  newchunk = &(newchunks[new++]);
		  newchunk->write = write;
		  newchunk->offset = offset;
		  newchunk->length = NewChunkSize;
		  newchunk->comptime = comptime - 
		    lastcomp * (float)(size-NewChunkSize) / lastsize;
		  comptime -= newchunk->comptime;
		  offset += NewChunkSize;
		  size -= NewChunkSize;
	   }
	   /* Use up the rest only if end of portion */
	   if (size > 0 && EndPortion(old,p)) {
		  /* make a new chunk of size "size", to finish portion */
		  newchunk = &(newchunks[new++]);
		  newchunk->write = write;
		  newchunk->offset = offset;
		  newchunk->length = size; /* definitely < NewChunkSize */
		  newchunk->comptime = comptime; /* left over from above */
		  offset += size;
		  size = 0;
	   }

	   /* Need to update portion marker? */
	   /* Note number of portions does not change */
	   if (p < my_refs->nportions && my_refs->portions[p] == old)
		my_refs->portions[p++] = new-1;

	   /* If this chunk is used up, move to the next one */
	   if (size == 0)
		old++;
    }

    /* Now toss the old chunks and replace with new chunks */
    my_refs->chunks = newchunks;
    UsFree(oldchunks);

    if (new > alloced)
	 printf("overran allocation in new chunk list %d > %d\n", new, alloced);

    /* fiddle some key values */
    my_refs->nchunks = new;
    my_refs->chunksize = NewChunkSize; /* remember, only a maximum */
}

/* @SUBTITLE "EndPortion: are we at the end of a portion?" */
static boolean
EndPortion(c, p)
	int c;				/* current chunk number */
	int p;				/* current portion number */
	/* global my_refs */
{
    ONEREF *chunk, *nextchunk;

    /* End of portion is defined by:
	*   portion marker in portions[]
	*   change of read/write flag
	*   jump in byte sequence
	*   end of pattern
	*/

    if (c >= my_refs->nchunks)
	 return(TRUE);			/* end of pattern */
    if (p < my_refs->nportions && my_refs->portions[p] == c)
	 return(TRUE);			/* end of portion, by marker */

    chunk = &(my_refs->chunks[c]);
    nextchunk = &(my_refs->chunks[c+1]);

    if (chunk->write != nextchunk->write)
	 return(TRUE);			/* change in read/write flag */
    if (chunk->offset + chunk->length != nextchunk->offset)
	 return(TRUE);			/*  discontinuity in byte sequence */
    
    return(FALSE);
}
