/******************************************************************************
*
*  Copyright (C) 1995 A. Bode, J. Pruyne and G. Stellner
*
*  This file is part of CoCheck
*
*  CoCheck is free software; you can redistribute it and/or
*  modify it under the terms of the GNU Library General Public
*  License as published by the Free Software Foundation; either
*  version 2 of the License, or (at your option) any later version.
*
*  CoCheck is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
*  Library General Public License for more details.
*
*  You should have received a copy of the GNU Library General Public
*  License along with this library; if not, write to the Free
*  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
*  Contact to the authors:
*
*  electronic mail: {bode,stellner}@informatik.tu-muenchen.de
*
*  paper mail:      Prof. Dr. A. Bode
*                   Lehrstuhl f"ur Rechnertechnik und Rechnerorganisation
*                   Institut f"ur Informatik
*                   Technische Universit"at M"unchen
*                   80290 M"unchen
*                   Germany
*
******************************************************************************/
/******************************************************************************

  protocol.c,v
  1995/11/07 13:51:40
  1.8

  Exp

  stellner

  Authors: G. Stellner

  Description: Core routines to capture the state of the network

******************************************************************************/
static char rcs_id[] = "protocol.c,v 1.8 1995/11/07 13:51:40 stellner Exp";

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <netdb.h>

#include "pvm3.h"

#include "types.h"
#include "error.h"
#include "kojakmalloc.h"
#include "CoCheckExport.h"

static int CkptTid = -1;
static int CkptMethod;
static int Ckpt_sock_fd;
static int DoICkpt;

/* The functions in this module are all called from an signal handler. So we
 * should be careful in what we are doing, e.g. mallocing is avoided.
 */

int create_listen_port( sock_addr )
struct sockaddr_in		*sock_addr;
{
	int		sock;
	int		status;
	int		len;
	char	host[MAXHOSTNAMELEN];
	struct  hostent	*hp;

	bzero( (char *) sock_addr, sizeof (*sock_addr));
	sock = socket(AF_INET, SOCK_STREAM,0);
	if	(sock < 0) {
		return -1;
	}

	len = sizeof(*sock_addr);

	status = bind(sock, (struct sockaddr *) sock_addr, len);
	if (status < 0) {
		perror("bind");
	}

	if (listen(sock, 1) < 0) {
		perror("listen");
	}

	if( getsockname(sock,(struct sockaddr *) sock_addr, &len) < 0 ) {
		perror("getsockname");
	}

	gethostname(host, sizeof(host));

	hp = gethostbyname(host);

	if (hp == 0 || hp->h_addrtype != AF_INET) {
		perror("gethostbyname");
	}

	bcopy((char *) (hp->h_addr_list[0]), (char *) &sock_addr->sin_addr,
		  hp->h_length);

	return sock;
}


static int ReadyMsgs(mid)
int mid;
{
  int i;
  int n;
  int mytid;
  int tid;
  int o;
  char	ckpt_file_name[1000];
  struct sockaddr_in	sin;

  Ckpt_sock_fd = -1;
  DoICkpt = 1;

  mytid = PVM_mytid();          /* get my current tid from PVM */

  if (mid < 0)                  /* sync message already received */
    mid = PVM_recv(-1, CO_CHECK_SYN); /* no, then get it now */

  o = PVM_setrbuf(mid);         /* make sure buffer containing task list is*/
                                /* the active receive buffer */
  PVM_upkint(&CkptTid, 1, 1);   /* get tid of checkpointer */
  PVM_upkint(&CkptMethod, 1, 1); /* how should the ckpt be created */

  switch(CkptMethod)
  {
  case CO_CHECK_METHOD_FILE:
    PVM_upkstr(ckpt_file_name);	/* Name of file to checkpoint to */
    set_checkpoint_file_name( ckpt_file_name );
    break;
  case CO_CHECK_METHOD_PORT:
    /* Generate a passive port, pack IP addr, port num and send
       would be nice to have socket connected now and set the fd for
       the Condor checkpointing library. Can we block that long here? */
    Ckpt_sock_fd = create_listen_port( &sin );
    PVM_initsend(PvmDataDefault);
    PVM_pkbyte(&(sin.sin_addr), sizeof(sin.sin_addr), 1);
    PVM_pkbyte(&(sin.sin_port), sizeof(sin.sin_port), 1);
    PVM_send(CkptTid, CO_CHECK_PRT);
    break;
  case CO_CHECK_METHOD_NONE:
    /* Set global flag to tell us not to call ckpt() later */
    DoICkpt = 0;
    break;
  default:
    _Db(0, "ReadyMsgs", _LOC_, " Unknown checkpoint method %d",
			  CkptMethod);
    break;
  }
  PVM_upkint(&n, 1, 1);         /* extract number of tasks from buf */
  _Db(0, "ReadyMsgs", _LOC_, "%d task in list", n);

  for (i=0; i<n; i++)           /* do it in a loop to avoid mallocing */
  {
    PVM_upkint(&tid, 1, 1);     /* get current tid of a task */
    _Db(0, "ReadyMsgs", _LOC_, "mytid 0x%x, tid 0x%x", mytid, tid);

    if (mytid != tid)           /* don't send to myself */
    {
      _Db(0, "ReadyMsgs", _LOC_, "CO_CHECK_RDY -->0x%x", tid);
      PVM_initsend(PvmDataRaw); /* for a null message no conversion needed */
      PVM_send(tid, CO_CHECK_RDY); /* send ready message to this task */
    }
  }

  PVM_setrbuf(o);               /* switch back to old recv buffer*/

  return(n);
}

int
ReallyDoCkpt()
{
	return DoICkpt;
}

static int Wait4ReadyMsgs(n)
int n;
{
  int r = 0;
  int mid;
  int tag;
  int tid;

  n -= 1;                       /* don't expect a ready message from myself */

  _Db(0, "Wait4ReadyMsgs", _LOC_, "trying to get %d CO_CHECK_RDY", n);
  while (n > 0)                 /* as long as we didn't get all */
  {
    mid = PVM_recv(-1, -1);     /* just receive what comes in */

    PVM_bufinfo(mid, (int *)0, &tag, &tid);
    _Db(0, "Wait4ReadyMsgs", _LOC_, "mid %d tag %d <-- 0x%x", mid, tag, tid);

    if (tag == CO_CHECK_RDY)    /* is it a ready message */
    {                           /* yes, then */
      _Db(0, "Wait4ReadyMsgs", _LOC_, "****** CO_CHECK_RDY <-- 0x%x", tid);
      r += 1;                   /* increment number of received ready mes */
      n -= 1;                   /* and wait for one message less */
    }
    else                        /* belongs to the state of the network */
      _Save2Buf(mid, _Rarp(tid)); /* so, save for rec after ckpt */
  }

  return(r);
}


void _LeavePVM(tidlistmid)
int tidlistmid;
{
	int n;
	int r;
	int	new_sock;
	struct sockaddr	new_addr;
	int		addr_len;

	_Db(0, "_LeavePVM", _LOC_, "Entering _LeavePVM...(%d)", tidlistmid);
	n = ReadyMsgs(tidlistmid);    /* send ready message to all tasks */
	r = Wait4ReadyMsgs(n);        /* capture state of the network */

	if (!ReallyDoCkpt()) {
		return;
	}

  _LeaveGroups();               /* quit membership in all groups */
  _Db(0, "_LeavePVM", _LOC_, "Quitting PVM...", n);
  PVM_exit();                   /* standalone now, quit PVM */

  /* At this point, we've disconnected from PVM, so we can safely wait
     for someone to connect to us to migrate to */
  if (Ckpt_sock_fd != -1) {
	  addr_len = sizeof(new_addr);
	  new_sock = accept(Ckpt_sock_fd, &new_addr, &addr_len);
	  if (new_sock < 0) {
		  perror("accept");
	  }
	  close(Ckpt_sock_fd);
	  set_checkpoint_fd( new_sock );
  }
}

static void ReRegisterMe()
{
  int myctid;
  int myotid;
  static char	PVMEPID_buf[20];

  sprintf(PVMEPID_buf, "PVMEPID=%d", getpid());
  putenv(PVMEPID_buf);


  _Db(0, "ReRegisterMe", _LOC_, "set PVMEPID=%s with (%s)",
	  getenv("PVMEPID"), PVMEPID_buf);
  myctid = PVM_mytid();         /* re-enroll PVM */
  myotid = pvm_mytid();         /* get my original tid */

  _Db(0, "ReRegisterMe", _LOC_, "0x%x new ctid: 0x%x", myotid, myctid);
  PVM_initsend(PvmDataDefault);
  PVM_pkint(&myotid, 1, 1);     /* first my original tid */
  PVM_pkint(&myctid, 1, 1);     /* new current tid */
  PVM_send(CkptTid, CO_CHECK_RJN); /* we are back in the game */
}

static void InsTidsMap()
{
  int n;
  int i;
  int *tids;

  PVM_recv(CkptTid, CO_CHECK_LST); /* get tids list */
  PVM_upkint(&n, 1, 1);         /* how many tasks in mapping list */

  tids = (int *)TMALLOC(2*n, int, "TIDSLST");

  if(tids ==NULL)
    _Error(FAIL, "InsTidsMap", _LOC_, "no memory for tid list!");

  PVM_upkint(&(tids[0]), n, 1); /* unpack the original tids and */
  PVM_upkint(&(tids[n]), n, 1); /* their current counterpart */

  for (i=0; i<n; i++)
    _Db(0, "InsTidsMap", _LOC_, "%d o 0x%x c 0x%x", i, tids[i], tids[n+i]);

  _UpdateOrInsert(&(tids[0]), &(tids[n]), n); /* insert in tid mapping table */
  FREE(tids);
}

void _ReJoinPVM()
{
	if (ReallyDoCkpt()) {
		ReRegisterMe();
	}

	InsTidsMap();

	if (ReallyDoCkpt()) {
		_ReInsNotifications();
		_ReInsGroups();
	}
}
