/******************************************************************************
*
*  Copyright (C) 1995 A. Bode, J. Pruyne and G. Stellner
*
*  This file is part of CoCheck
*
*  CoCheck is free software; you can redistribute it and/or
*  modify it under the terms of the GNU Library General Public
*  License as published by the Free Software Foundation; either
*  version 2 of the License, or (at your option) any later version.
*
*  CoCheck is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
*  Library General Public License for more details.
*
*  You should have received a copy of the GNU Library General Public
*  License along with this library; if not, write to the Free
*  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
*  Contact to the authors:
*
*  electronic mail: {bode,stellner}@informatik.tu-muenchen.de
*
*  paper mail:      Prof. Dr. A. Bode
*                   Lehrstuhl f"ur Rechnertechnik und Rechnerorganisation
*                   Institut f"ur Informatik
*                   Technische Universit"at M"unchen
*                   80290 M"unchen
*                   Germany
*
******************************************************************************/
/******************************************************************************

  m.c,v
  1995/11/07 13:51:59
  1.7
  Exp
  stellner

  Authors: G. Stellner

  Description: The api functions of CoCheck

******************************************************************************/
#include <stdio.h>
#include <signal.h>
#include <sys/wait.h>
#include <fcntl.h>

#include "pvm3.h"

#include "error.h"
#include "types.h"
#include "kojakmalloc.h"
#include "CoCheckExport.h"

#define USAGE "m proc memory"
#define NPROC 3
#define SND 1
#define REC 2

#ifdef DEBUG
extern int _CkptNum;
extern void printMapTable();
#endif

int *gtids;

void InitMapping(tids, nproc)
int *tids;
int nproc;
{
	pvm_initsend(PvmDataDefault); /* broadcast all tids to all processes */
	pvm_pkint(&nproc, 1, 1);
	pvm_pkint(tids, nproc, 1);
	pvm_mcast(&tids[1], nproc-1, 11);
}

int GetMyIndex(tids, nproc, mytid)
int *tids;
int *nproc;
int mytid;
{
  int i;

	pvm_recv(tids[0], 11);
  pvm_upkint(nproc, 1, 1);
  pvm_upkint(tids, *nproc, 1);

  tids[0] = pvm_parent();

  for (i=1; i<*nproc; i++)	/* get my index in the worker field */
    if ( tids[i] == mytid)
      break;

  return(i);			/* that's me */
}

#if 0
void CoCheck(binary, act, tids)
char	*binary;
int act;
int *tids;
{
  int j;
  int tid;
  int *otids;
  int *ctids;

  for (j=1; j<act+1; j++)
  {
    _Db(0, "CoCheck", _LOC_, "CO_CHECK_SIG --> 0x%x", tids[j]);
    pvm_sendsig(tids[j], CO_CHECK_SIG);
  }

  pvm_initsend(PvmDataRaw);     /* only homogenmous machines participate */
  pvm_pkint(&(tids[0]), 1, 1);  /* checkpointer tid */
  pvm_pkint(&act, 1, 1);        /* number of tasks */

  for (j=1; j<act+1; j++)
    pvm_pkint(&(tids[j]), 1, 1);

  for (j=1; j<act+1; j++)
  {
    _Db(0, "CoCheck", _LOC_, "CO_CHECK_SYN --> 0x%x", tids[j]);
    pvm_send(tids[j], CO_CHECK_SYN);
  }

  pvm_notify(PvmTaskExit, 123, act, &tids[1]);

  for(j = 0; j < act; j++) {
	  pvm_recv(-1, 123);
	  pvm_upkint(&tid, 1, 1);
	  _Db(0, "CoCheck", _LOC_, "Received exit notification for t%x", tid);
  }

  otids = (int *)TMALLOC(2*act, int*, "OTIDS");

  if (otids == (int *)0)
    _Error(FAIL, "CoCheck", _LOC_, "no memory for tid list!");

  printf("Hit return to restart: "); fflush(stdout);
  gets(&j);

  spawn_tasks(binary, "-_condor_restart", act, otids);

  for (j=0; j<act; j++)
  {
    pvm_recv(-1, CO_CHECK_RJN);
    pvm_upkint(&(otids[j]), 1, 1);
    pvm_upkint(&(otids[act+j]), 1, 1);
    _Db(0, "CoCheck", _LOC_, "otid 0x%x ctid 0x%x", otids[j], otids[act+j]);
	_UpdateOrInsert(&(otids[j]), &(otids[act+j]), 1);
  }

  pvm_initsend(PvmDataRaw);
  pvm_pkint(&act, 1, 1);        /* number of tasks */
  pvm_pkint(&(otids[0]), 2*act, 1); /* mapping of original/current tids */

  for (j=0; j<act; j++)
  {
    _Db(0, "CoCheck", _LOC_, "CO_CHECK_LST --> 0x%x", otids[act+j]);
    pvm_send(otids[act+j], CO_CHECK_LST);
  }
  _Db(0, "CoCheck", _LOC_, "Checkpoint Complete!!!!!");
}
#endif

spawn_tasks(binary, arg1, arg2, nproc, tidlist)
char	*binary;
char	*arg1;
char	*arg2;
int		nproc;
int		*tidlist;
{
	char	*av[5];
	char	outfile[100];
	int		i;
	int		act = 0;

	av[0] = arg1;
	av[1] = arg2;
	av[2] = 0;
	for (i = 0; i < nproc; i++) {
		sprintf(outfile, "/tmp/ckpt_%d", i);
		act += pvm_spawn(binary, av, PvmTaskArch, "SUN4", 1, &(tidlist[i]));
/*		act += pvm_spawn(binary, av, PvmTaskArch, "ALPHA", 1, &(tidlist[i])); */
	}
    _Db(0, "main", _LOC_, "created %d task(s)", act);
	return act;
}

void ring(tids, me, nproc)
int *tids;
int me;
int nproc;
{
	int i;
	int k;
	int dest = (me+1)%nproc;
	int src = (me > 0)?me-1:nproc-1;
	int frm;

	i = 1;

	while(1)
	{
		if (me == 0)			/* kick off the computation */
		{
			_Db(0, "main", _LOC_, "send token >%d< --> 0x%x", i, tids[dest]);
			pvm_initsend(PvmDataDefault);
			pvm_pkint(&i, 1, 1);
			pvm_pkint(&me, 1, 1);
			pvm_send(tids[dest], 1);

			_Db(0, "main", _LOC_, "recv token >%d< <-- 0x%x", i, tids[src]);
			pvm_recv(tids[src], 1);
			pvm_upkint(&k, 1, 1);
			pvm_upkint(&frm, 1, 1);
			_Db(0, "main", _LOC_, "got >%d< <-- 0x%x(%d)", k, tids[frm], frm);

			if (k != i || frm != src) {
				_Db(0, "main", _LOC_, "ERROR!!! i: %d k: %d src: %d frm: %d\n",
					i, k, src, frm);
			}

			i += 1;
		}
		else
		{
			_Db(0, "main", _LOC_, "recv next token <-- 0x%x", tids[src]);
			pvm_recv(tids[src], 1);
			pvm_upkint(&k, 1, 1);
			pvm_upkint(&frm, 1, 1);

			if (frm != src)
				_Db(0, "main", _LOC_, "ERROR!!! k: %d src: %d frm: %d\n", k, src, frm);

			_Db(0, "main", _LOC_, "send token >%d< --> 0x%x", k, tids[dest]);
			pvm_initsend(PvmDataDefault);
			pvm_pkint(&k, 1, 1);
			pvm_pkint(&me, 1, 1);
			pvm_send(tids[dest], 1);
		}
	}
}

void main(argc, argv)
int argc;
char *argv[];
{
	int tid;
	int me;
	int cc;
	int i, k;
	int n;
	int stid;
	int act = 0;
	int nproc = NPROC;
	int memsz;
	char *mem;
	char	outfile[100];
	int		new_fd;
	char	*av[15];

	if (argc != 3)
	{
		printf("%s\n", USAGE);
		exit(1);
	}

	nproc = atoi(argv[1]);
	memsz = atoi(argv[2]);

	gtids = (int *)malloc(nproc * sizeof(int));
	mem = (char *)malloc(memsz);

	for (i=0; i<memsz; i+=1024)
		mem[i] = (char)(i%255);

	_SetDbLevel(0);

	tid = pvm_mytid();  /* enroll pvm */

	sprintf(outfile, "/tmp/out_t%x", tid);
	new_fd = open(outfile, O_RDWR | O_CREAT | O_TRUNC, 0666);
	if (new_fd < 0) {
		_Db(0, "main", _LOC_, "Failed to open outfile %s", outfile);
		exit(0);
	}
	dprintf_init( new_fd );

	_Db(0, "main", _LOC_, "Pid: %d mytid: 0x%x(%d)", getpid(), tid, tid);

	gtids[0] = pvm_parent();

	_Db(0, "main", _LOC_, "Parent tid: 0x%x(%d)", gtids[0], gtids[0]);

	if ( gtids[0] < 0)
	{
		gtids[0] = tid;
		me = 0;

		/* start the rest of the application */
		_Db(0, "main", _LOC_, "spawning %d tasks", nproc);
		act = spawn_tasks(argv[0], argv[1], argv[2], nproc - 1, &gtids[SND]);

		for (i=0; i< nproc; i++)
			_Db(0, "main", _LOC_, "tasks[%d]: 0x%x", i, gtids[i]);

		/* I am the parent so initialize all the others */
		InitMapping(gtids, nproc);	/* distribute the mapping */
	}
	else
	{
		/*	  sprintf(outfile, "/tmp/out_t%x", tid);
			  unlink(outfile);
			  new_fd = open(outfile, O_WRONLY | O_CREAT, 0666);
			  close( 1 );
			  dup( new_fd );
			  close( 2 );
			  dup( new_fd );
			  close( new_fd ); */

		me = GetMyIndex(gtids, &nproc, tid); /* receive init info from parent */

		fprintf(stderr, "me = %d\n", me);
		fflush(stderr);
		_Db(0, "main", _LOC_, "argv[1] = %s pid = %d", argv[1], getpid());

	}

  ring(gtids, me, nproc);

  pvm_exit();  /* forget about pvm from now on */
  exit(0);
}
