/******************************************************************************
*
*  Copyright (C) 1995 A. Bode, J. Pruyne and G. Stellner
*
*  This file is part of CoCheck
*
*  CoCheck is free software; you can redistribute it and/or
*  modify it under the terms of the GNU Library General Public
*  License as published by the Free Software Foundation; either
*  version 2 of the License, or (at your option) any later version.
*
*  CoCheck is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
*  Library General Public License for more details.
*
*  You should have received a copy of the GNU Library General Public
*  License along with this library; if not, write to the Free
*  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
*  Contact to the authors:
*
*  electronic mail: {bode,stellner}@informatik.tu-muenchen.de
*
*  paper mail:      Prof. Dr. A. Bode
*                   Lehrstuhl f"ur Rechnertechnik und Rechnerorganisation
*                   Institut f"ur Informatik
*                   Technische Universit"at M"unchen
*                   80290 M"unchen
*                   Germany
*
******************************************************************************/
/******************************************************************************

  pvm_rm_handlers.c,v
  1995/11/07 13:51:08
  1.6
  Exp
  stellner

  Authors: J. Pruyne, G. Stellner

  Description: Server routines of the vanilla RM of CoCheck

******************************************************************************/
static char rcs_id[] = "pvm_rm_handlers.c,v 1.6 1995/11/07 13:51:08 stellner Exp";

#include <stdio.h>
#include <types.h>
#include <signal.h>
#include <netinet/in.h>
#include <sys/time.h>
#include "pvm3.h"
#include "pvmsdpro.h"
#include "../src/global.h"
#include "task.h"

#if defined(CO_CHECK)
#include <string.h>
#include "CoCheckExport.h"

#if defined(PROFILE)
static struct timeval tp;
static struct timezone tzp;
static struct timeval post_tp;
#endif

int co_check_restart_f();
#endif

extern int MyTid;
extern struct pvmhostinfo *local_host;
extern char *get_task_ckpt_file();

int
sm_spawn(for_whom, req_id)
int		for_whom;
int		req_id;
{
	char	cmd_buf[1000];
	int		flag;
	char	dest_arch[100];
	int		count;
	int		arg_count;
	char	**argv;
	int		out_tid;
	int 	out_cod;
	int		trc_tid;
	int		trc_cod;
	int		env_count;
	char	**env;
	char	temp_arg[1000];
	int		i, j;
	int		dest_host;
	int		sbuf;
	int		one = 1;
	int		new_tid;
	int		ret_count;
	struct 	pvmtaskinfo		tinfo;
#if defined(CO_CHECK)
	char	*new_ckpt_file_name;
	extern  char *gen_ckpt_name();
#endif

	pvm_upkstr( cmd_buf );
	pvm_upkint( &flag, 1, 0 );
	pvm_upkstr( dest_arch );
	pvm_upkint( &count, 1, 0 );
	arg_count = unpack_string_list(&argv);
	pvm_upkint( &out_tid, 1, 1 );
	pvm_upkint( &out_cod, 1, 1 );
	pvm_upkint( &trc_tid, 1, 1 );
	pvm_upkint( &trc_cod, 1, 1 );
	env_count = unpack_string_list(&env);

	pvm_initsend(PvmDataDefault);
	pvm_pkint(&count, 1, 1);
	pvm_setmwid(pvm_getsbuf(), req_id);
	for (i = 0; i < count; i++) {
		dest_host = select_host(flag, dest_arch);
		if (dest_host) {
			sbuf = pvm_setsbuf(pvm_mkbuf(PvmDataFoo));
			pvm_pkint(&for_whom, 1, 1);
			pvm_pkstr(cmd_buf);
			pvm_pkint(&flag, 1, 1);
			pvm_pkint(&one, 1, 1);	/* Always tell host to create 1 task */
#if defined(CO_CHECK) && 0
			arg_count += 2;
#endif
			pvm_pkint(&arg_count, 1, 1);
#if defined(CO_CHECK)
			new_ckpt_file_name = gen_ckpt_name(cmd_buf);
#endif
#if defined(CO_CHECK) && 0
			pvm_pkstr("-_condor_ckpt");
			new_ckpt_file_name = gen_ckpt_name(cmd_buf);
			pvm_pkstr(new_ckpt_file_name);

			for (j = 0; j < arg_count - 2; j++) {
#else
			for (j = 0; j < arg_count; j++) {
#endif
				pvm_pkstr( argv[j] );
			}
			pvm_pkint(&out_tid, 1, 1);
			pvm_pkint(&out_cod, 1, 1);
			pvm_pkint(&trc_tid, 1, 1);
			pvm_pkint(&trc_cod, 1, 1);
			pvm_pkint(&env_count, 1, 1);
			for (j = 0; j < env_count; j++) {
				pvm_pkstr( env[j] );
			}
			pvm_setmwid(pvm_getsbuf(), req_id);
			pvm_send(dest_host | TIDPVMD, SM_EXEC);

			pvm_freebuf(pvm_setsbuf(sbuf));

			/* Synchronous, slow but easy */
/*			pvm_recv(dest_host | TIDPVMD, SM_EXECACK); */
			pvm_recv(-1, SM_EXECACK);
			pvm_upkint(&ret_count, 1, 1);
			if (ret_count != 1) {
				printf("sm_spawn: unexpected return count: %d\n", ret_count);
			}
			pvm_upkint(&new_tid, 1, 1);
			pvm_pkint(&new_tid, 1, 1);

			tinfo.ti_tid = new_tid;
			tinfo.ti_ptid = for_whom;
			tinfo.ti_host = dest_host;
			tinfo.ti_flag = T_RUNNING;
			tinfo.ti_a_out = cmd_buf;
			tinfo.ti_pid = 0;	/* process id not supported */
#if !defined(CO_CHECK)
			new_task(&tinfo, "");
#else
			new_task(&tinfo, new_ckpt_file_name);
#endif
		} else {
			new_tid = PvmNoHost;
			pvm_pkint(&new_tid, 1, 1);
		}
	}
	pvm_send(for_whom, SM_SPAWN);
	free_string_list(argv, arg_count - 2);
	free_string_list(env, env_count);
}


int
sm_exec(for_whom, req_id)
int		for_whom;
int		req_id;
{
	printf("sm_exec: SHOULD NEVER BE CALLED!\n");
}


int
sm_execack(for_whom, req_id)
int		for_whom;
int		req_id;
{
	int		count;
	struct pvmtaskinfo task;

	pvm_upkint(&count, 1, 1);
	pvm_upkint(&(task.ti_tid), 1, 1);
	task.ti_ptid = PvmNoParent;
	task.ti_host = pvm_tidtohost( task.ti_tid );
	task.ti_flag = T_RUNNING;
	task.ti_a_out = "Unknown";
	task.ti_pid = 0;
	new_task( &task, "" );
}


int
sm_task(for_whom, req_id)
int		for_whom;
int		req_id;
{
	int		where;
	int		zero = 0;

	pvm_upkint(&where, 1, 1);
	pvm_initsend(PvmDataDefault);
	pvm_setmwid(pvm_getsbuf(), req_id);
	pvm_pkint(&zero, 1, 1);		/* Error code? */
	pack_task_list(where);
	pvm_send(for_whom, SM_TASK);
}


int
sm_config(for_whom, req_id)
int		for_whom;
int		req_id;
{
	pvm_initsend(PvmDataDefault);
	pvm_setmwid(pvm_getsbuf(), req_id);
	pack_host_list();
	pvm_send(for_whom, SM_CONFIG);
}


int
sm_addhost(for_whom, req_id)
int		for_whom;
int		req_id;
{
	int		count;
	char	**host_names;
	int		i;
	int		narches;
	int		new_count;
	struct  pvmhostinfo	newhost;
	char	new_host_name[100];
	char	new_host_arch[100];

	count = unpack_string_list(&host_names);

	pvm_initsend(PvmDataFoo);
	pvm_setmwid(pvm_getsbuf(), req_id);
	pvm_pkint(&count, 1, 1);
	for (i = 0; i < count; i++) {
		pvm_pkstr( host_names[i] );
	}

	pvm_send(local_host->hi_tid | TIDPVMD, SM_ADD);
	pvm_recv(-1, SM_ADDACK);

	pvm_upkint(&count, 1, 1);
	pvm_upkint(&narches, 1, 1);
	pvm_initsend(PvmDataDefault);
	pvm_setmwid(pvm_getsbuf(), req_id);
	pvm_pkint(&count, 1, 1);
	pvm_pkint(&narches, 1, 1);
	if (count > 0) {
		newhost.hi_name = new_host_name;
		newhost.hi_arch = new_host_arch;
		for (i = 0; i < count; i++) {
			pvm_upkint(&newhost.hi_tid, 1, 1);
			pvm_upkstr( new_host_name );
			pvm_upkstr( new_host_arch );
			pvm_upkint(&newhost.hi_speed, 1, 1);
			new_host( &newhost );
			pvm_pkint(&newhost.hi_tid, 1, 1);
			pvm_pkstr( new_host_name );
			pvm_pkstr( new_host_arch );
			pvm_pkint(&newhost.hi_speed, 1, 1);
		}
	}

	pvm_send(for_whom, SM_ADDHOST);
	free_string_list(host_names, count);
}


int
sm_delhost(for_whom, req_id)
int		for_whom;
int		req_id;
{
	char	**host_names;
	int		count;
	int		*status;
	int		rc;

	count = unpack_string_list(&host_names);
	status = (int *) malloc( count * sizeof(int) );

	rc = pvm_delhosts(host_names, count, status);

	pvm_initsend(PvmDataDefault);
	pvm_setmwid(pvm_getsbuf(), req_id);
	pvm_pkint(&rc, 1, 1);
	pvm_pkint(status, rc, 1);

	free(status);
	free_string_list(host_names, count);
}


int
sm_add(for_whom, req_id)
int		for_whom;
int		req_id;
{
	printf("sm_add: SHOULD NEVER BE CALLED!\n");
}


int
sm_addack(for_whom, req_id)
int		for_whom;
int		req_id;
{
	printf("sm_addack: SHOULD NEVER BE CALLED!\n");
}


int
sm_notify(for_whom, req_id)
int		for_whom;
int		req_id;
{
	int		kind;
	int		msg_id;
	int		count;
	int		notify_tid;

	pvm_upkint(&kind, 1, 1);
	pvm_upkint(&msg_id, 1, 1);
	pvm_upkint(&count, 1, 1);
	if ( kind == PvmHostAdd ) {
		new_notification(kind, for_whom, msg_id, count);
	} else {
		for (; count > 0; count--) {
			pvm_upkint(&notify_tid, 1, 1);
			new_notification(kind, for_whom, msg_id, notify_tid);
		}
	}

}


int
sm_taskx(for_whom, req_id)
int		for_whom;
int		req_id;
{
	int		dead_tid;
	int		es;

	pvm_upkint(&dead_tid, 1, 1);
	del_task(dead_tid);
	pvm_upkint(&es, 1, 1);
	fprintf(stderr, "Task t%x, exit status = 0x%x\n", dead_tid, es);
}


int
sm_hostx(for_whom, req_id)
int		for_whom;
int		req_id;
{
	int		dead_host_tid;

	pvm_upkint(&dead_host_tid, 1, 1);
	del_host(dead_host_tid);
}


int
sm_handoff(for_whom, req_id)
int		for_whom;
int		req_id;
{
	printf("sm_handoff: SHOULD NEVER BE CALLED!\n");
}


int
sm_sched(for_whom, req_id)
int		for_whom;
int		req_id;
{
	int		ec = -1;

	pvm_initsend(PvmDataDefault);
	pvm_setmwid(pvm_getsbuf(), req_id);
	pvm_pkint(&ec, 1, 1);
	pvm_send(for_whom, SM_SCHED);
}


int
sm_sthost(for_whom, req_id)
int		for_whom;
int		req_id;
{
	printf("sm_sthost: SHOULD NEVER BE CALLED!\n");
}


int
sm_sthostack(for_whom, req_id)
int		for_whom;
int		req_id;
{
	printf("sm_sthostack: SHOULD NEVER BE CALLED!\n");
}

#if defined(CO_CHECK)
static int InList(me, n, list)
int me;
int n;
int *list;
{
  int i;
  int inlist;

  for (inlist=0, i=0; i<n; i++)
    if (list[i] == me)
    {
      inlist = 1;
      break;
    }

  return(inlist);
}

static void CoCheckSendSyn(ckptMethod, taskCkptFile, n, tids, rec, state)
int ckptMethod;
char *taskCkptFile;
int n;
int *tids;
int rec;
int state;
{
	int		syns_sent;

	if (state == T_STAYING) {
		ckptMethod = CO_CHECK_METHOD_NONE;
	}

	pvm_initsend(PvmDataDefault);
	pvm_pkint(&MyTid, 1, 1);
	pvm_pkint(&ckptMethod, 1, 1);
	if( ckptMethod == CO_CHECK_METHOD_FILE ) {
		pvm_pkstr(taskCkptFile);
	}

	pvm_pkint(&n, 1, 1);
	pvm_pkint(tids, n, 1);
#ifdef DEBUG
  fprintf(stderr, "Sending CO_CHECK_SYN to t%x\n", rec);
#endif
	pvm_send(rec, CO_CHECK_SYN);
	set_task_state(rec, state);   /* set new task state */
}

static int CoCheckRecPRT(sin)
struct sockaddr_in *sin;
{
  int tid;
  int rbuf;

  rbuf = pvm_recv(-1, CO_CHECK_PRT); /* recv address info */

  pvm_bufinfo(rbuf, 0, 0, &tid); /* who sent it */
  pvm_upkbyte((char*)&(sin->sin_addr), sizeof(sin->sin_addr), 1);
  pvm_upkbyte((char*)&(sin->sin_port), sizeof(sin->sin_port), 1);

#ifdef DEBUG
  fprintf(stderr, "Received CO_CHECK_PRT from t%x, ", tid);
#ifndef IMA_LINUX
  fprintf(stderr, "IP 0x%x, ", ntohl(sin->sin_addr));
#endif
  fprintf(stderr, "port %d\n", ntohs(sin->sin_port));
#endif

  return(tid);
}

int
co_check(for_whom, req_id)
int		for_whom;
int		req_id;
{
	int		i;
	int		AllTidsN;
	int		*AllTids;
	char	ckpt_file_name[1000];
	char	*task_ckpt_file;
	int		ckpt_method;
  int   exclude;
	int ckpt_fd;
  int tid;
  int CkptMigrateN;
  int CkptVacateN;
  int *CkptMigrate;
  int *CkptVacate;
  struct sockaddr_in sin;

#if defined(PROFILE)
	gettimeofday(&tp, &tzp);
#endif
	pvm_upkstr(ckpt_file_name);   /* extract name of master ckpt file */
  pvm_upkint(&ckpt_method, 1, 1); /* how should the ckpt be created */
  pvm_upkint(&exclude, 1, 1);   /* tid of a task which should be excluded */
  pvm_upkint(&CkptMigrateN, 1, 1); /* number of tasks to migrate */

  /* make sure to allocate at least one slot so that realloc works later*/
  CkptMigrate = (int*)malloc((CkptMigrateN==0?1:CkptMigrateN)*sizeof(int));

	if (CkptMigrateN > 0) {
		pvm_upkint(CkptMigrate, CkptMigrateN, 1);
	}
	pvm_upkint(&CkptVacateN, 1, 1); /* number of hosts to vacate */

  CkptVacate = (int*)malloc((CkptVacateN==0?1:CkptVacateN)*sizeof(int));

	if (CkptVacateN > 0) {
		pvm_upkint(CkptMigrate, CkptMigrateN, 1);
	}

	AllTidsN = get_task_tid_list(&AllTids, exclude);

  switch(ckpt_method)
  {
  case CO_CHECK_METHOD_FILE:    /* write a ckpt directly to a file */
    for (i=0; i<AllTidsN; i++)
    {                           /* restart takes care of redistribution */
      if ((task_ckpt_file = get_task_ckpt_file(AllTids[i])) == 0)
      {
        fprintf(stderr, "No ckpt file for t%x\n", AllTids[i]);
        exit(1);
      }

      CoCheckSendSyn(ckpt_method, task_ckpt_file,
                     AllTidsN, AllTids, AllTids[i], T_CKPTING);
    }

			break;

		case CO_CHECK_METHOD_PORT:    /* write the ckpt to a socket */
			for (i=0; i<AllTidsN; i++) {
				/* also used for migration based on a ckpt */
				CoCheckSendSyn(ckpt_method, "", AllTidsN,
							   AllTids, AllTids[i], T_CKPTING);
			}
			break;

		case CO_CHECK_METHOD_MIGR:    /* do migration */
			for (i=0; i<CkptMigrateN; i++) {
				CkptMigrate[i] = get_task_ctid(CkptMigrate[i]);
				CoCheckSendSyn(CO_CHECK_METHOD_PORT, "", AllTidsN,
							   AllTids, CkptMigrate[i], T_MIGRATING);
			}

			for (i=0; i<AllTidsN; i++) {
				if (!InList(AllTids[i], CkptMigrateN, CkptMigrate)) {
					CoCheckSendSyn(CO_CHECK_METHOD_NONE, "", AllTidsN,
								   AllTids, AllTids[i], T_STAYING);
				}
			}

			break;
		}

	for (i = 0; i < AllTidsN; i++) {
		pvm_sendsig(AllTids[i], CO_CHECK_SIG);
	}

	switch(ckpt_method) {
	    case CO_CHECK_METHOD_FILE:
		    break;

		case CO_CHECK_METHOD_PORT:
			for (i=0; i<AllTidsN; i++) {
				tid = CoCheckRecPRT(&sin);

				if ((ckpt_fd = connect_to_addr(sin)) >= 0) {
					/* try to connect to adr */
					copy_bytes_to_file_in_background(ckpt_fd,
													 get_task_ckpt_file(tid));
				}
			}

#if 0
    for (i=0; i<AllTidsN; i++) {
		fprintf(stderr, "Cleaned up for pid %d\n", wait(0));
	}
#endif


			pvm_rm_write_ckpt(ckpt_file_name, AllTidsN, AllTids);
			break;

		case CO_CHECK_METHOD_MIGR:
			co_check_migrate_restart(CkptMigrateN, exclude);
			break;
		}

	free(AllTids);
  free(CkptMigrate);
  free(CkptVacate);
}

int co_check_frestart(ckpt_file_name, exclude)
char *ckpt_file_name;
int exclude;
{
	int tid_count;
	int *tid_list;
	int *orig_tid_list;
	int i;

	if (pvm_rm_read_from_ckpt(ckpt_file_name) < 0) {
		return (-1);
	}

  tid_count = get_orig_tid_list(&orig_tid_list);
  tid_list = (int *)malloc(tid_count*sizeof(int));

	for (i=0; i<tid_count; i++) {
		restart_task(orig_tid_list[i]);
	}

  for (i=0; i<tid_count; i++)
  {
    pvm_recv(-1, CO_CHECK_RJN);
    pvm_upkint(&(orig_tid_list[i]), 1, 1);
    pvm_upkint(&(tid_list[i]), 1, 1);
    set_task_tid(tid_list[i], orig_tid_list[i]);
#ifdef DEBUG
    printf("RJN: otid 0x%x(%d) ctid 0x%x(%d)\n", orig_tid_list[i],
           orig_tid_list[i], tid_list[i], tid_list[i]);
#endif
  }

	pvm_initsend(PvmDataDefault);
	pvm_pkint(&tid_count, 1, 1);
  pvm_pkint(orig_tid_list, tid_count, 1);
  pvm_pkint(tid_list, tid_count, 1);
	pvm_mcast(tid_list, tid_count, CO_CHECK_LST);
	free(tid_list);
	free(orig_tid_list);

#if defined(PROFILE)
	gettimeofday(&post_tp, &tzp);
	fprintf(stderr, "Pre checkpoint: %d.%d Post checkpoint: %d.%d\n",
			tp.tv_sec, tp.tv_usec, post_tp.tv_sec, post_tp.tv_usec);
#endif

  return(0);
}

void
co_check_restart(for_whom, req_id)
int		for_whom;
int		req_id;
{
	int		tid_count;
	int		*tid_list;
	int		*orig_tid_list;
	int		i;
  int   exclude;
	char	ckpt_file_name[1000];

	pvm_upkstr(ckpt_file_name);   /* name of the master ckpt file */
  pvm_upkint(&exclude, 1, 1);   /* tid of a task which should be excluded */

  co_check_frestart(ckpt_file_name, exclude);
}


int co_check_migrate_restart(tid_count, exclude)
int tid_count;
int exclude;
{
	int *tid_list;
	int *orig_tid_list;
	int i;
	int		tid;
	struct	sockaddr_in	sin;
	int		rbuf;

	for (i=0; i < tid_count; i++) {
		tid = CoCheckRecPRT(&sin);

		set_task_ckpt_file(tid, sin_to_string(&sin));
		set_task_state(tid, T_RESTARTING);
		restart_task(get_task_otid(tid));
	}

	tid_list = (int *) malloc(tid_count * sizeof(int));
	orig_tid_list = (int *) malloc(tid_count * sizeof(int));

	for (i=0; i < tid_count; i++) {
		rbuf = pvm_recv(-1, CO_CHECK_RJN);
		pvm_bufinfo(rbuf, 0, 0, &tid); /* who sent it */
		pvm_upkint(&orig_tid_list[i], 1, 1);
		pvm_upkint(&tid_list[i], 1, 1);
		set_task_tid(tid_list[i], orig_tid_list[i]);
#ifdef DEBUG
		printf("RJN: otid 0x%x(%d) ctid 0x%x(%d)\n", orig_tid_list[i], orig_tid_list[i], tid_list[i], tid_list[i]);
#endif
	}

	pvm_initsend(PvmDataDefault);
	pvm_pkint(&tid_count, 1, 1);
	pvm_pkint(orig_tid_list, tid_count, 1);
	pvm_pkint(tid_list, tid_count, 1);
	free(tid_list);
	free(orig_tid_list);

	tid_count = get_task_tid_list(&tid_list, exclude);

	pvm_mcast(tid_list, tid_count, CO_CHECK_LST);

	free(tid_list);

#if defined(PROFILE)
	gettimeofday(&post_tp, &tzp);
	fprintf(stderr, "Pre checkpoint: %d.%d Post checkpoint: %d.%d\n",
			tp.tv_sec, tp.tv_usec, post_tp.tv_sec, post_tp.tv_usec);
#endif

  return(0);
}


int
co_check_exec_name(for_whom, req_id)
int		for_whom;
int		req_id;
{
	char	exec_name[1000];

	pvm_upkstr(exec_name);

	set_task_binary_name(for_whom, exec_name);
}
#endif
