/* TAPE/PVM %W% %G% */

/* TAPE/PVM control task */

#include <sys/stat.h>
#include <sys/time.h>
#include <sys/param.h>
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include "pvm3.h"

#include "tape.h"
#include "tapetask_globals.h"
#include "tape_clockstats.h"
#include "tape_tags.h"

#define HOSTNAMELENGTH 40
#define CS_TASK "clock_synchro"
#define EST_TIME temps_attente*(nb_pingpong+wsize-2)
#define TRACEFILE "events"

#ifndef TAPE_TRACESDIR
#ifdef MSDOS
#define TAPE_TRACESDIR	"tape.trc"
#else
#define TAPE_TRACESDIR	"tape.traces"
#endif
#endif

#ifdef pvm3_2_6
#define PVMHOSTINFO hostinfo
#else
#define PVMHOSTINFO pvmhostinfo
#endif

int htid;  /* tid of tape_control's host */
int dtid;
char *file_name;
int taskid;

int buffer_size;
int nb_pingpong;
int temps_attente;
int base_s; /* time basis in seconds */

struct coef_corel *table_coef_corel;

struct l_tache
   { int num_tache;
     struct l_tache *suivant;
   };

struct liste_host
{
char nom_host[HOSTNAMELENGTH];
struct liste_host *suivant;
};

void printList(struct l_tache *l)
{
 
  while(l!=NULL) { 
    printf("== %x\n", l->num_tache);
    l=l->suivant;
  }

  printf("-----\n");

  fflush(stdout);
}

/* look for daemon tid of machine "name" in
 * config table "hostp" of size "nhost"
 */

int lookfordtid(name,hostp,nhost)
     char *name;
     struct pvmhostinfo *hostp;
     int nhost;
{
  int i;

  for(i=0;i<nhost;i++)
    if(!strcmp(name,hostp[i].hi_name))
      return hostp[i].hi_tid;

  return ADDED_FAILED;
}

/*******************************************************************
 *
 *  Main code of the tape_control task. 
 *    a) do first SBA phase if necessary
 *    b) launch user application and maintain the state
 *       of the virtual machine (number of hosts, tasks)
 *    c) once the user application has finished, do second phase 
 *       of SBA taking into account the dynamically added hosts
 *    d) compute clock statistics (write into file)
 *    e) collect and globalize events, write final trace file
 *
 *******************************************************************/

    void main(void)

{

int *info;

int verbose;                      /* verbose flag for tape_control */
int nb_tache_active;                     /* number of active tasks */
int wsize;               /* window size for clock sample smoothing */
char pathname[MAXPATHLEN];                    /* wd of master task */

struct res_somme                    /* cumulative clock statistics */
  **table_res,                  /* ... for hosts in initial config */
  **table_res_ajout,            /* ... for dynamically added hosts */
  **table_res_merge;           /* merge of the previous two tables */

struct l_tache              /* list of tasks that reached pvm_exit */
  *lt_fini,                                                /* head */
  *ptdr_lt_fini;                                           /* tail */

struct liste_host               /* list of dynamically added hosts */ 
  *l_host_ajoute,                                          /* head */
  *ptdr_lm;                                                /* tail */

struct PVMHOSTINFO                      /* hostinfo config. tables */
  *table_host,                      /* initial config, before SBA1 */
  *table_host_fin;  /* final config, after end of user application */

int nb_host,                  /* number of hosts in initial config */
    nb_host1,                   /* number of hosts in final config */
    nb_host2;                 /* number of dynamically added hosts */

int DynamicHosts = 1;           /* true if hosts can be added dyn. */
char tape_compact;                 /* <=> on-line event compacting */
int nb_arch;

struct timeval tv;                            /* used to get dates */
int end_sba1;                                 /* end date of SBA 1 */

int tid,buf,maitre;
int lg,tag;
int nbuf,i,j;
char nom[80];

 /* The TAPE control task ignores SIGTERM. It cannot be
  * killed by a reset from the PVM console. 
  */

signal(SIGTERM,SIG_IGN);

 /* get operating parameters from user's master task */

tid=taskid=pvm_mytid();

maitre=pvm_parent();
pvm_recv(maitre,msgtag_taille);
pvm_upkint(&buffer_size,1,1);
pvm_upkint(&nb_pingpong,1,1);
pvm_upkint(&temps_attente,1,1);
pvm_upkint(&wsize,1,1);
pvm_upkint(&verbose,1,1);
pvm_upkint(&DynamicHosts,1,1);
pvm_upkbyte(&tape_compact,1,1);
pvm_upkstr(pathname);

if(verbose) 
  {
    printf("[tape_control] Starting (%s).\n",TAPE_VERSION);
    fflush(stdout);
  }

 /* create trace directory and change wd to it */

/* umask(022); */

if(chdir(pathname))
 fprintf(stderr,
         "[tape_control] warning, cannot change to %s, using home directory instead.\n",
         pathname);

/* Create trace directory if needed -- YA */

if (chdir(TAPE_TRACESDIR)) {
    if (!mkdir(TAPE_TRACESDIR, 0777)) {
      if (chdir(TAPE_TRACESDIR)) {
  fprintf(stderr,
          "[tape_control] warning, cannot cd to trace pool %s\n", TAPE_TRACESDIR);
}
    }
  }

  sprintf(nom, "%x.tape", taskid);

if(mkdir(nom,0777))
 fprintf(stderr,"[tape_control] warning, cannot create directory %s\n",
         nom);
else 
 if(chdir(nom))
  fprintf(stderr,
          "[tape_control] warning, cannot change to newly created directory %s\n",
          nom);
 


if( nb_pingpong != 0 ) {

  /* SBA1 */

  /* get initial config and allocate table of cumulative clock stats */

  htid=pvm_tidtohost(tid);
  if(pvm_config(&nb_host,&nb_arch,&table_host)<0) {
    fprintf(stderr,"[tape_control] fatal error, cannot get pvm config.\n");
    fflush(stderr); }

  if(!(table_res=(struct res_somme **) calloc(nb_host,sizeof(struct res_somme *))))
    fatalMallocFailure(__FILE__,__LINE__);

  /* initialize table for clock_synchro stats */

  for (i=0;i<nb_host;i++)
    {
      if(!(table_res[i]=(struct res_somme *) malloc(sizeof(struct res_somme))))
	fatalMallocFailure(__FILE__,__LINE__);
      if(!(table_res[i]->win=(struct point *) calloc(wsize,sizeof(struct point))))
	fatalMallocFailure(__FILE__,__LINE__);
      if(!(table_res[i]->rnk=(int *) calloc(wsize,sizeof(int))))
	fatalMallocFailure(__FILE__,__LINE__);
      table_res[i]->SX=0.0;
      table_res[i]->SY=0.0;
      table_res[i]->SXY=0.0;
      table_res[i]->SXX=0.0;
      table_res[i]->SYY=0.0;
      table_res[i]->dtid=table_host[i].hi_tid;
      if(!(table_res[i]->name=(char *)malloc(strlen(table_host[i].hi_name)+1)))
	fatalMallocFailure(__FILE__,__LINE__);
      strcpy(table_res[i]->name,table_host[i].hi_name);
      table_res[i]->util=0;
    }

  free(table_host);
  
  /* get base time */
  /* this date is global because tape_control runs on the referenece proc */
  tape_clock(&tv);
  base_s=tv.tv_sec;
  
  if( nb_host>1 ) 
    {
      if(verbose)
	{
	  printf("[tape_control] SBA phase1 (%d hosts, estimated time %d s)\n", 
		 nb_host, EST_TIME );
	  fflush(stdout);
	}
      horloge_reference(table_res,nb_host,nb_pingpong,temps_attente,wsize);
    }

  /* get date of end of SBA 1 */
  /* this date is global because tape_control runs on the reference proc */
  tape_clock(&tv);
  end_sba1=tv.tv_sec;

} /* SBA1 */

else {

  /* skip SBA1 */

  tape_clock(&tv);
  base_s=tv.tv_sec;
  end_sba1=base_s;

  if( verbose ) {
    printf("[tape_control] Note, no clock synchro.\n");
    fflush(stdout);
  }

}

  /* leave control to the user's master task */

if(verbose)
  {
    printf("[tape_control] Running instrumented application...\n");
    fflush(stdout);
  }

pvm_initsend(PvmDataDefault);
pvm_pkint(&base_s,1,1);
pvm_send(maitre,msgtag_init);

/***********************************************************************
 *
 *      centralize information on user's application 
 *         - count number of spawned tasks
 *         - maintain list of finished tasks (pvm_exit)
 *         - maintail list of machines added to initial configuration
 *         - detect killed tasks
 *
 ***********************************************************************/

 /* init list of added hosts */
ptdr_lm = l_host_ajoute = NULL; 

 /* init list of finished tasks */
ptdr_lt_fini = lt_fini = NULL;

nb_tache_active=1;        /* # of active tasks */
nb_host2=0;               /* # of added hosts */
while(nb_tache_active>0)
   {
   buf=pvm_recv(-1,-1);
   pvm_bufinfo(buf,&lg,&tag,&tid);
   switch(tag)
      {
	
	/* new tasks have been successfully spawned */
        /* we receive the number of tasks spawned */
      case msgtag_spawn:
      pvm_upkint(&i,1,1);
      nb_tache_active+=i;
      break;
      
        /* a task reached its pvm_exit */
      case msgtag_exit:
      {

      struct l_tache *tache_cour;
      nb_tache_active--;

      if(!(tache_cour=(struct l_tache *) malloc(sizeof(struct l_tache))))
        fatalMallocFailure(__FILE__,__LINE__);
      tache_cour->num_tache = tid;
      tache_cour->suivant = 0;

      if(lt_fini==NULL)
	ptdr_lt_fini = lt_fini = tache_cour;
      else
	{
	  ptdr_lt_fini->suivant = tache_cour;
	  ptdr_lt_fini = ptdr_lt_fini->suivant;
	}

      if(verbose)
	{
	  printf("[tape_control] %x left us.\n",tid);
	  fflush(stdout);
	}

      }
      break;
      
        /* a task has been killed */
      case msgtag_kill:
      nb_tache_active--;
      break;

        /* a list of hosts have been added successfully to the configuration */
      case msgtag_addhost:
      {
      int nb,i;
      struct liste_host *machine;

      pvm_upkint(&nb,1,1);
      for(i=0;i<nb;i++)
       {
	 nb_host2++;
	 if(!(machine=(struct liste_host *)malloc(sizeof(struct liste_host))))
           fatalMallocFailure(__FILE__,__LINE__);
	 pvm_upkstr(machine->nom_host);
	 machine->suivant=0;

	 if(l_host_ajoute==NULL)
	   ptdr_lm = l_host_ajoute = machine;
	 else
	   {
	     ptdr_lm->suivant=machine;
	     ptdr_lm=ptdr_lm->suivant;
	   }
       }
      break;
      }

        /* something 's gone wrong */
      default:
      fprintf(stderr,"[tape_control] ignored undefined control message from task %x.\n",tid);
      fflush(stderr);
      }
   }

/* now, all the tasks reached pvm_exit */

if(verbose)
  {
    printf("[tape_control] Done.\n");
    fflush(stdout);
  }


/*****************************************************************
 *
 * Now we can do the second SBA phase. In case the user's
 * application added hosts dynamically, we repeat the first SBA
 * phase for these hosts.
 *
 *****************************************************************/

if( nb_pingpong == 0 )
  goto COLLECT;

if( ! DynamicHosts ) 
 {
   if(verbose)
     printf("[tape_control] Note, no check for added hosts.\n");
   goto SBA2;
 }

if(pvm_config(&nb_host1,&nb_arch,&table_host_fin)<0) {
  fprintf(stderr,"[tape_control] fatal error, cannot get pvm config.\n");
  fflush(stderr); }

/* printList(lt_fini); */


/* Check if all the hosts of the initial configuration are still present.
 * If some are missing, we try to add them to the conf. again.  
 */

for (i=0;i<nb_host;i++)
   {
   j=0;
   while(j<nb_host1&&strcmp(table_host_fin[j].hi_name,table_res[i]->name))
      j++;
   if (j==nb_host1) {
      fprintf(stderr,
        "[tape_control] %s no longer in configuration, trying to add it...\n",
        table_res[i]->name);
      if(pvm_addhosts(&table_res[i]->name,1,info)<0) {
        fprintf(stderr,"[tape_control] failed to add %s.\n",table_res[i]->name);
        table_res[i]->dtid=ADDED_FAILED; }
      else {
        fprintf(stderr,"[tape_control] ok, %s added.\n",table_res[i]->name);
        table_res[i]->dtid=ADDED_OK;
	/*** SHOULD BE SET TO THE DTID OF THE NEW DAEMON ***/
      }
      fflush(stderr); 
    }
 }

/* check if all the dynamically added hosts are still in the 
   configuration - if some are missing (normally they should 
   not) we attempt to add them again.
*/

while (ptdr_lm)
   {
   i=0;
   while(i<nb_host1&&strcmp(table_host_fin[i].hi_name,ptdr_lm->nom_host))
      i++;
   if (i==nb_host1) {
      char *tmp;
      /* il s'agit d'une nouvelle machine par rapport a la config actuelle */
      if(!(tmp=(char *)malloc(strlen(ptdr_lm->nom_host)+1)))
        fatalMallocFailure(__FILE__,__LINE__);
      strcpy(tmp,ptdr_lm->nom_host);
      fprintf(stderr,
        "[tape_control] %s no longer in configuration, trying to add it...\n",
        tmp);
      if(pvm_addhosts(&tmp,1,info)<0)
        fprintf(stderr,"[tape_control] failed to add %s.\n",tmp);
      else
        fprintf(stderr,"[tape_control] ok, %s added.\n",tmp);
        /*** KEEP DTID OF NEW DAEMON ***/
      fflush(stderr); 
    }
   ptdr_lm=ptdr_lm->suivant;
   }

  /* do the first phase of clock synchronization for the dynamically added hosts */
  /* initialize table for cumulative statistics for these hosts */

if(nb_host2&&!(table_res_ajout=(struct res_somme **) calloc(nb_host2,sizeof(struct res_somme *))))
  fatalMallocFailure(__FILE__,__LINE__);

ptdr_lm=l_host_ajoute;
i=0;
while(ptdr_lm)
  {
  if(!(table_res_ajout[i]=(struct res_somme *)malloc(sizeof(struct res_somme))))
    fatalMallocFailure(__FILE__,__LINE__);
  if(!(table_res_ajout[i]->win=(struct point *) calloc(wsize,sizeof(struct point))))
    fatalMallocFailure(__FILE__,__LINE__);
  if(!(table_res_ajout[i]->rnk=(int *) calloc(wsize,sizeof(int))))
    fatalMallocFailure(__FILE__,__LINE__);
  table_res_ajout[i]->SX=0.0;
  table_res_ajout[i]->SY=0.0;
  table_res_ajout[i]->SXY=0.0;
  table_res_ajout[i]->SXX=0.0;
  table_res_ajout[i]->SYY=0.0;
  table_res_ajout[i]->dtid=lookfordtid(ptdr_lm->nom_host,
				       table_host_fin,
				       nb_host1);
  if(!(table_res_ajout[i]->name=(char *)malloc(strlen(ptdr_lm->nom_host)+1)))
    fatalMallocFailure(__FILE__,__LINE__);
  strcpy(table_res_ajout[i]->name,ptdr_lm->nom_host);
  table_res_ajout[i]->util=0;
  i++;
  ptdr_lm=ptdr_lm->suivant;
  }

free(table_host_fin);

if( nb_host2>0 ) 
  {
    if(verbose)
      {
	printf("[tape_control] %d hosts have been added dynamically by the user's\n",nb_host2);
	printf("[tape_control] application. Repeat'g SBA1 for these hosts (about %d s).\n", 
	       EST_TIME );
	fflush(stdout);
      }
    horloge_reference(table_res_ajout,nb_host2,nb_pingpong,temps_attente,wsize);
  }

/* Now all the hosts that have been in the conf. are in again, and
 * their first SBA phase has been completed. The cumulative statistics
 * relative to this first phase are stored in
 *
 *       table_res[0..nb_host-1]  for the hosts in the initial config,
 * table_res_ajout[0..nb_host2-1] for the dynamically added hosts.
 * 
 * If some cumulative statistics could not be taken (because of host failure...)
 * the 'dtid' field of the corresponding structure is ADDED_FAILED.
 */

/* we merge the 2 tables 'table_res' and 'table_res_ajout' in one
 * single table, 'table_res_merge'.
 */

 SBA2:

if(!(table_res_merge=(struct res_somme **)calloc(nb_host+nb_host2,sizeof(struct res_somme*))))
  fatalMallocFailure(__FILE__,__LINE__);
for(i=0;i<nb_host;i++)
  table_res_merge[i]=table_res[i];
for(i=0;i<nb_host2;i++)
  table_res_merge[i+nb_host]=table_res_ajout[i];
free(table_res);
if(nb_host2) free(table_res_ajout);

if(verbose)
  {
    printf("[tape_control] SBA phase2 (%d hosts, estimated time %d s)\n", 
	   nb_host+nb_host2,EST_TIME );
    fflush(stdout);
  }

if(nb_host+nb_host2>1)
  horloge_reference(table_res_merge,nb_host+nb_host2,nb_pingpong,temps_attente,wsize);

  /* allocate table for the results of the per-host clock statistics */

if(!(table_coef_corel=(struct coef_corel *)calloc(nb_host+nb_host2,sizeof(struct coef_corel))))
  fatalMallocFailure(__FILE__,__LINE__);

  /* compute the results of the per-host clock statistics in table_coef_corel,
   * write results to file 
   */

calcul_coef_lin(table_res_merge,2*nb_pingpong,nb_host+nb_host2,taskid,nb_pingpong,
                temps_attente,wsize);

for(i=0;i<nb_host+nb_host2;i++)
  {
    free(table_res_merge[i]->win);
    free(table_res_merge[i]->rnk);
    free(table_res_merge[i]->name);
    free(table_res_merge[i]);
  }
free(table_res_merge);


/*************************************************************
 *
 * Create trace file and collect events   
 *
 *************************************************************/

 COLLECT:

{
FILE *F;
int i;
char *buffer;

if(!(F=fopen(TRACEFILE,"w"))) { 
 fprintf(stderr,"[tape_control] warning, cannot open %s, using stdout instead\n",
         TRACEFILE);
 F=stdout;
}

/* Write event file header */

fprintf(F,"# TAPE/PVM event file - %s\n", TAPE_VERSION);

if (!(buffer = (char *)malloc((unsigned)buffer_size)))
  fatalMallocFailure(__FILE__,__LINE__);

/* loop over all the tasks in our task list and collect their traces,
   on arrival, the dates of the events are corrected (globalized) */

while(lt_fini)
   {

   double a=1.0, /* drift */
          b=0.0; /* offset */

   if(verbose)
     {
       printf("[tape_control] collecting traces from task %x.\n",lt_fini->num_tache);
       fflush(stdout);
     }

   if(lt_fini->num_tache)

     {

       char precevent[1000]; /* previously decoded event (cf correction.c) */

       pvm_initsend(0);   
       pvm_send(lt_fini->num_tache,msgtag_control);

       /* get the drift a and offset b from table_coef_corel */
       if(nb_pingpong)
	 coef(pvm_tidtohost(lt_fini->num_tache),&a,&b,nb_host+nb_host2);

   
       /* reception du nombre de buffers a recevoir */
       pvm_recv(lt_fini->num_tache,msgtag_nbuf);
       pvm_upkint(&nbuf,1,1);

        /* on demande la reception de nbuf buffer */

       precevent[0]='\0';
       for(i=1;i<=nbuf;i++)
	 {
	   pvm_recv(lt_fini->num_tache,msgtag_buf+i);
	   pvm_upkstr(buffer);

	   /* decode the buffer and construct global time */
	   if(tape_compact)
	     correction(precevent,buffer,strlen(buffer),a,b,end_sba1,F);
	   else
	     nocompcorrection(precevent,buffer,strlen(buffer),a,b,end_sba1,F);

	 }

       /* print last event - because no intrusion delay has been measured for
        * this event, the delay is considered null
        */

       fprintf(F,"0 %s\n", precevent);
       
     }

   else

     fprintf(stderr,"[tape_control] error, task list corrupt.\n");

   lt_fini=lt_fini->suivant;
   
 }

if(F!=stdout) fclose(F);
free(buffer);
free(table_coef_corel);

}

if(verbose)
{
  printf("[tape_control] Finished.\n");
  fflush(stdout);
}

pvm_exit();
}
