/*****************************************************************************\
*                                                                             *
*  PVMTasker.c - tasker-like and remote command spawner for Wamm	          *
*                                                                             *
\*****************************************************************************/

/*
 * 
 *
 *               WAMM: Wide Area Metacomputer Manager
 *     CNUCE - Institute of the Italian National Research Council
 *      Authors:  R. Baraglia, M. Cosso, G. Faieta, M. Formica, 
 *                      D. Laforenza, M. Nicosia 
 *                   (C) 1997 All Rights Reserved
 *
 *                              NOTICE
 *
 *
 * Permission is hereby granted, without written agreement and without license
 * or royalty fees, to use, copy, modify, and distribute this software and
 * its documentation for educational and research purpose only, provided that
 * the above copyright notice and the following two paragraphs appear in all
 * copies of this software and in the supporting documentation. No charge,
 * other than an "at-cost" distribution fee, may be charged for copies,
 * derivations, or distributions of this material without the express written
 * consent of the copyright holder.
 * 
 * IN NO EVENT SHALL THE INSTITUTION (CNUCE-CNR) AND THE AUTHORS BE LIABLE TO
 * ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL
 * DAMAGES ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
 * IF THEINSTITUTION OR THE AUTHORS HAS BEEN ADVISED OF THE POSSIBILITY OF 
 * SUCH DAMAGE.
 *
 * THE INSTITUTION (CNUCE-CNR) AND THE AUTHORS SPECIFICALLY DISCLAIMS ANY 
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
 * ON AN "AS IS" BASIS, AND THE AUTHORS HAS NO OBLIGATION TO
 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 *
 *
 * We want thanks Brad Topol of the Georgia Institute of Technology, Atlanta.
 */


#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <pvm3.h>
#include <sys/wait.h>
#include <signal.h>

#include <fcntl.h>
#include <errno.h>
#include <sys/time.h>
#include <sys/times.h>
#include <sys/types.h>
#include <sys/resource.h>


#include "tags.h"
#include "version.h"
#include "util.h"


/*************/
/* Strutture */
/*************/

struct Task {
	struct Task * prev, * next;		/* precedente, successivo */
	int pid;						/* pid processo */
	int tid;						/* tid (solo per task PVM) */
	int checking;			     /* 1 = controllo (solo task PVM) */
	int no;							/* NetworkObj (solo task normali) */
	char * out;						/* stdout (solo task normali) */
	char * err;						/* stderr (solo task normali) */
};

/*********************/
/* variabili globali */
/*********************/

struct Task * TaskList;
struct Task * PVMTaskList;

int MyTid;		/* tid tasker */
int ParentTid;	/* tid interfaccia */
int DTid;		/* tid demone locale */
int toslpfd[2];	/* pipe verso lo slave */
int fromslpfd[2];	/* pipe dallo slave */
/*****************************/
/* variabili globali private */
/*****************************/


static int olslave = 0;	/* e' != 0 se lo slave e' partito */
static int have_reserved=0; 
static char slave_sig = 'U';
static char name_host[64];

/***************/
/* definizioni */
/***************/

#define DELAY		500000		/* ritardo (us) tra un controllo e l'altro */
#define CHECK		10000000	/* us tra un controllo sul master e l'altro */

#define OL_PROCINFO 6
#define STARTSLAVE 500
#define KILLSLAVE 600
#define KILLSLAVE_ACK 650
#define SAMPLEDELAY 700
#define SLAVEKILLED 800
/************/
/* funzioni */
/************/

extern void pvmendtask (void);

struct Task *		AddTask (struct Task **);
void				DelTask (struct Task **, struct Task *);
void				CheckTasks (int);
struct Task *		FindPVMTask (int);

void 				StartCmd (void);

/****************************/
/* funzioni globali private */
/****************************/

static int start_olslave(void);
static void reserve_status(void);
static void kill_slave(void);
static void get_status(void);

/********/
/* main */
/********/

void main (int argc, char ** argv)
{	
	int ver;					/* 1=versione ok */
	struct timeval tv;			/* timeval per pvm_trecv */
	int bufid, tag, tid;		/* informazioni sul messaggio ricevuto */
	int check;					/* contatore controllo master */
	int mon;
	int sample_delay = 5;
	long int sampling;
	long int sampling_frequency = 0;
	int *status;	
	/*************************/
	/* Registra il programma */
	/*************************/
	
	MyTid = pvm_mytid();
	ParentTid = pvm_parent();
	DTid = pvm_tidtohost (MyTid);
	gethostname(name_host, 64);
	pvm_setopt (PvmRoute, PvmDontRoute);
	
	signal (SIGTERM, SIG_IGN);
	signal (SIGINT, SIG_IGN);
	
	/****************************/
	/* Controllo versione e ack */
	/****************************/
		
	ver = (!strcmp(VER_TSKR,argv[1]));
	
	pvm_packf ("%+ %d", PvmDataDefault, ver);
	pvm_send (ParentTid, T_TSKR_ACK);
	
	if (!ver) {
		pvm_exit();
		exit(1);
	}
		
	/*************/
	/* Main Loop */
	/*************/
	
	sampling = sample_delay * 1000000;
	tv.tv_sec = 0;
	tv.tv_usec = DELAY;
	
	check = 0;
	
	for (;;) {
	
		/**** attende un messaggio o il timeout ****/
	
		if ((bufid = pvm_trecv (-1, -1, &tv)) < 0) break;
		if (have_reserved) {
			get_status();
		}
		/**** controllo master ****/
		
		check += DELAY;
		if (check >= CHECK) {

			if (pvm_pstat (ParentTid) < 0 ) {
			
				/**** l'interfaccia non esiste piu`: usciamo. ****/
				
				kill_slave();
				pvm_exit ();
				exit (1);
			}
			
			check = 0;
		}
		if (olslave){
			sampling_frequency += DELAY;
			if (sampling_frequency >= sampling) {
				reserve_status();
				sampling_frequency = 0;
			}
		}
		/**** controlla i task ****/
				
		CheckTasks (DTid);		
		
		/**** messaggi ****/
		
		if (bufid<=0) continue;
		pvm_bufinfo (bufid, NULL, &tag, &tid);
	
		switch (tag) {
			
			/**************/					
			/* T_TSKR_CMD */
			/**************/
	
			case T_TSKR_CMD:	StartCmd ();
								break;
								
			case STARTSLAVE:	slave_sig = 'U';
								olslave = start_olslave();
								break;
								
			case KILLSLAVE:		kill_slave();			
								break;
								
			case KILLSLAVE_ACK:	kill_slave();
								pvm_initsend(PvmDataDefault);
								pvm_send(ParentTid, SLAVEKILLED);
								break;				
								
			case SAMPLEDELAY:	if (have_reserved) {
									get_status();
								}
								pvm_upkint(&sample_delay,1,1);
								sampling = 	sample_delay * 1000000;			
								break;					
								
			default : 			break;
		}				
	}
	
	/**** il pvmd e` morto: e` bene uscire... ****/
	
	/* pvm_exit(); XXX e` inutile... */
	exit (0);	
}

/*******************************************************/
/* StartCmd () - esegue un comando (T_TSKR_CMD no cmd) */
/*******************************************************/

void StartCmd ()
{
	int no;					/* NetworkObj (ignorato dal tasker) */
	char buffer[1024];		/* comando da eseguire */

	struct Task * task;		/* processo figlio */
	int pid;

	char * out;				/* stdout */
	char * err;				/* stderr */

	/**** preleva gli argomenti ****/

	pvm_unpackf ("%d %s", &no, buffer);
	
	/**** prepara il comando ****/
	
	out = strdup (tmpnam(NULL));
	err = strdup (tmpnam(NULL));
	sprintf (buffer+strlen(buffer), " 1> %s 2> %s", out, err);
	
	/**** Fork ****/
	
	pid = fork();
	if (!pid) {
	
		/**** Figlio ****/
		
		/* chiude il socket con il pvmd e resetta mytid interno */
		
		pvmendtask();
		
		/* il figlio deve avere segnali standard! */
		
		signal (SIGTERM, SIG_DFL);
		signal (SIGINT, SIG_DFL);
		
		execlp ("sh", "sh", "-c", buffer, NULL);
		
		/**** shell non trovata (!) ****/

		_exit(1);
	}
	
	/**** Padre ****/
	
	if (pid == -1) {
		/* XXX inviare messaggio di errore all'interfaccia. */
		free (out);
		free (err);
		return;
	}
	
	task = AddTask(&TaskList);
	task->pid = pid;
	task->no = no;
	task->out = out;
	task->err = err;
}

/**************/
/* CheckTasks */
/**************/

void CheckTasks (int dtid)
{
	struct Task * task, * tmp;
	int n;
	
	int ntask;					/* risultati pvm_tasks */
	struct pvmtaskinfo * ti;

	int res;					/* risultato processo */
	char * outbuf;				/* buffer per stdout */
	char * errbuf;				/* buffer per stderr */
	int outlen, errlen;			/* lunghezze buffer */
	
	/****************************/
	/* Controlla i task non PVM */
	/****************************/
	
	task = TaskList;
	while (task) {
	
		tmp = task->next;
	
		if (waitpid (task->pid, &res, WNOHANG)) {
	
			/*************************/
			/* Invia stdout e stderr */
			/*************************/
			
			outlen = ReadFile (task->out, &outbuf);
			errlen = ReadFile (task->err, &errbuf);
			
			pvm_packf ("%+ %d %d", PvmDataDefault, task->no, res);
			pvm_packf ("%d %s", outlen, outbuf);
			pvm_packf ("%d %s", errlen, errbuf);
			pvm_send (ParentTid, T_TSKR_CMD);
			
			unlink (task->out);
			unlink (task->err);
			free (outbuf);
			free (errbuf);
			
			DelTask (&TaskList, task);
		}
		
		task = tmp;
	}
	
	/************************/
	/* Controlla i task PVM */
	/************************/
	
	if ((pvm_tasks (DTid, &ntask, &ti)) < 0) return;
	
	/**** Marca tutti i task come "checking" ****/
	
	task = PVMTaskList;
	while (task) {
		task->checking = 1;
		task = task->next;
	}
	
	/**** Esame dei task ****/
	
	for (n=0; n<ntask; n++) {
		
		task = FindPVMTask (ti[n].ti_tid);
		if (task) task->checking = 0;			/* era gia` presente */
		
		else {
			
			/**************/
			/* Nuovo task */
			/**************/
			printf(" Nuovo task\n");
			task = AddTask (&PVMTaskList);
			task -> tid = ti[n].ti_tid;
			
			/**** informa l'interfaccia ****/
								
			pvm_packf ("%+ %d", PvmDataDefault, ti[n].ti_tid);
			pvm_send (ParentTid, T_TSKR_NEW);
		}	
	}
	
	/**** Tutti i task rimasti con "checking" sono terminati ****/
	
	task = PVMTaskList;
	while (task) {
	
		tmp = task->next;
		
		if (task->checking) {
		
			/**** informa l'interfaccia ****/
			
			pvm_packf ("%+ %d", PvmDataDefault, task->tid);
			pvm_send (ParentTid, T_TSKR_END);
			
			DelTask (&PVMTaskList, task);
		}
		
		task = tmp;
	}
}

/*************************************************/
/* AddTask (&list) - aggiunge un task alla lista */
/*************************************************/

struct Task * AddTask (struct Task ** list)
{
	struct Task * task;
	
	task = calloc (1, sizeof (struct Task));
	
	/**** aggiunge il task all'inizio ****/
	
	task->prev = NULL;
	task->next = *list;
	
	if (*list) (*list)->prev = task;
	*list = task;
	
	return (task);
}

/*******************************************************/
/* DelTask (&list, task) - rimuove un task dalla lista */
/*******************************************************/

void DelTask (struct Task ** list, struct Task * task)
{
	/**** sgancia l'elemento ****/
	
	if (task->prev) task->prev->next = task->next;
	if (task->next) task->next->prev = task->prev;
	
	if (task == *list) *list = task->next;
	
	/**** svuota la struttura ****/
	
	if (task->out) free (task->out);
	if (task->err) free (task->err);
	free (task);
}

/*****************************************/
/* FindPVMTask (tid) - cerca un task PVM */
/*****************************************/

struct Task * FindPVMTask (int tid)
{
	struct Task * task;
	
	task = PVMTaskList;	
	while (task && (task->tid != tid)) task = task->next;
	return (task);
}


/***************************************************************/
/* start olslave restituisce 0 se c'e' un errore, 1 altrimenti */
/***************************************************************/

int start_olslave()
{
    int flags,cc1, cc2;
    static int child;
    char execname[256];
    char *p1=NULL,*p2=NULL;
    
    
    cc1 = pipe(toslpfd);
    cc2 =pipe(fromslpfd);
   
    child=fork();
    if (child==0) {
    
		close(fromslpfd[0]); 
		close(toslpfd[1]); 
		if (dup2(fromslpfd[1],1) < 0) {
			printf("error dup2");
			return(0);
		}
		if (dup2(toslpfd[0],0) < 0) {
			printf("dup2"); 
			return(0);
		}
		p1=getenv("PVM_ROOT");
		p2=getenv("PVM_ARCH");
		if (p1==NULL) { 
	    	printf("could not get PVM_ROOT trying default");
		}
		if (p2==NULL) { 
	    	printf("ERROR could not get PVM_ARCH");
	    	return(0);	
		}
		if (p1==NULL) {
	    	/*try default*/
	    	p1=getenv("HOME");
	    	sprintf(execname,"%s/bin/%s/olslave", p1,p2);
	    	execl(execname, execname, NULL);
	    	printf("ERROR Should not continue after exec");
	    	return(0); 
		}
		else {
	    	sprintf(execname,"%s/bin/%s/olslave", p1,p2);
	    	execl(execname, execname, NULL);
	    	printf("ERROR Should not continue after exec");
	    	return(0); 
		}
    }
    if (child == -1) {
    	printf("ERROR fork\n");
    	return(0);
    }
    close(fromslpfd[1]); 
    close(toslpfd[0]); 
    
    if ((flags = fcntl(fromslpfd[0], F_GETFL, 0)) == -1) {
		printf("Error-- fcntl1\n");
		return(0);
    } 
    flags = flags | O_NDELAY;
    if (( fcntl(fromslpfd[0], F_SETFL, flags)) == -1) {
		printf("Error-- fcntl2\n");
		return(0);
    }
    return(1);
}

/******************/
/* reserve_status */
/******************/

void reserve_status() 
{
    if(olslave) {
	if (write(toslpfd[1],&slave_sig,1)== -1)  {
	    have_reserved=0;
	    printf("Error writing to olslave");
	}
	else 
	    have_reserved = 1;
    }
}

/**************/
/* kill_slave */
/**************/

void kill_slave()  
{
	int * status;
	
    if(olslave) {
	slave_sig='K';
	write(toslpfd[1],&slave_sig,1);
	wait(status);
	olslave = 0;
    }
}

/**************/
/* get_status */
/**************/

void get_status()
{
    int moredata=1;
    int rdlen;
    int oldbuf, sbuf;
    char infobuf[10240];
    int bcount;
    if (olslave) {
	if (have_reserved == 0) return; /*safety valve*/
	bcount=0;
	while(moredata) {
	    rdlen=read(fromslpfd[0],&infobuf[bcount],4096);
	   
	     
	    if (((rdlen==0) && (have_reserved==0)) || 
		((rdlen == -1) && (have_reserved==0)) || 
		((bcount+rdlen) >= 6144)) {break;};
	    if (rdlen > 0) {
		bcount+=rdlen;
		have_reserved=0;
		
	    }
	}
	if (bcount) {
		if ((sbuf = pvm_mkbuf(PvmDataDefault)) < 0) 
		    pvm_perror("ol getstatus mkbuf");
		if ((oldbuf = pvm_setsbuf(sbuf)) < 0)
		    pvm_perror("olgetstatus setsbuf");
		pvm_pkint(&bcount, 1, 1);
		pvm_pkbyte(infobuf, bcount, 1);
		pvm_pkstr(name_host);
		pvm_send(ParentTid, OL_PROCINFO);
		pvm_setsbuf(oldbuf);
		pvm_freebuf(sbuf);
	}
    }   
}

