/* This program collects timing statistics for node-to-node message passing
 * 
 * To measure performance of native send/receive, compile as
 *		Paragon:	icc -nx -O -DPGON_NODE nntime.c
 * 		iPSC/860:	icc -O -DI860_NODE nntime.c -node
 *		CM5(CMMD):  cc -c -DCM5_NODE nntime.c; cmmd-ld -comp cc -node nntime.o
 */

#include <stdio.h>
#include <memory.h>
#ifdef CM5_NODE
#include <cm/cmmd.h>
#include <cm/timers.h>
#endif
#ifdef PVM
#include "pvm3.h"
#endif

#define NPROC	4
#define SAMPLE	1			/* sample rate */
#define MAXSIZE	10000		/* must be a power of 10 */
#define ENCODE	PvmDataRaw	


#ifdef CM5_NODE

#include <cm/cmmd.h>
#define TIMER_CLEAR		CMMD_node_timer_clear(1) 
#define TIMER_START		CMMD_node_timer_start(1) 
#define TIMER_STOP		CMMD_node_timer_stop(1) 
#define TIMER_ELAPSED	CMMD_node_timer_elapsed(1) 

#endif /*CM5_NODE*/

#if defined(PGON_NODE) || defined(I860_NODE)

double tstart, tstop, dclock();
#define TIMER_CLEAR		(tstart = tstop = 0)
#define TIMER_START		(tstart = dclock())
#define TIMER_STOP		(tstop = dclock())
#define TIMER_ELAPSED	(tstop - tstart)

#endif /*PGON_NODE/I860_NODE*/

#if !defined(PGON_NODE) && !defined(I860_NODE) && !defined(CM5_NODE)

#include <sys/time.h>
struct timeval tv1, tv2;
#define TIMER_CLEAR     (tv1.tv_sec = tv1.tv_usec = tv2.tv_sec = tv2.tv_usec =0)
#define TIMER_START     gettimeofday(&tv1, (struct timezone*)0)
#define TIMER_STOP      gettimeofday(&tv2, (struct timezone*)0)
#define TIMER_ELAPSED	(tv2.tv_sec-tv1.tv_sec+(tv2.tv_usec-tv1.tv_usec)*1.E-6)

#endif 


main()
{
	int mytid;				/* my task id */
	int tids[NPROC];		/* array of task IDs */

#ifdef PVM

	mytid = pvm_mytid();
	tids[0] = pvm_parent();
/* pvm_setdebug(3);  */

	if (tids[0] < 0)
		mom(mytid, tids);
	else {
		pvm_recv(tids[0], 0);
		pvm_upkint(tids, NPROC, 1);
		printf("mytid = %d\n", mytid);
		if (mytid == tids[1])
			child1(tids);
		else if (mytid == tids[2])
			child2();
		else
			child3(tids);
	}
	pvm_exit();

#else /*PVM*/

#if defined(PGON_NODE) || defined(I860_NODE)

	if (mynode())
		catcher();
	else
		sender();

#endif /*I860_NODE*/

#ifdef CM5_NODE

	CMMD_fset_io_mode(stdout, CMMD_independent);
	if (CMMD_self_address() == 0)
		sender();
	if (CMMD_self_address() == 1)
		catcher();
	CMMD_all_msgs_wait();

#endif /*CM5_NODE*/

#endif /*PVM*/

	exit();
}


#ifdef PVM

mom(mytid, tids)
	int mytid;
	int tids[];
{
	tids[0] = mytid;
	pvm_spawn("nntime", (char**)0, 0, "", NPROC-1, &tids[1]);
	pvm_initsend(ENCODE);
	pvm_pkint(tids, NPROC, 1);
	pvm_mcast(&tids[1], NPROC-1, 0);

	/* test host-node recv */

	pvm_recv(-1, 0);
	validate();
}


child1(tids)
	int tids[];
{
	int size;

	/* test node-to-node recv */
	pvm_recv(-1, 0);
	validate();
	
	/* do timing measurements */
	puts("Node-to-node Send/Ack\n");

	/* synchronize */
	pvm_initsend(ENCODE);
	pvm_pkint(&size, 1, 1);
	pvm_send(tids[3], 0);
	pvm_recv(-1, 0);		/* receive ack */

	for (size = 1; size <= MAXSIZE; size *= 10) 
		time_one(size, tids[3]);
		/* time_ind(size, tids[3]); */

	/* inform child3 to exit */
	pvm_initsend(ENCODE);
	pvm_pkstr("death");
	pvm_send(tids[3], 13);
}


child3(tids)
	int tids[];
{
	int n, stid, bufid, bytes, type;
	double data[MAXSIZE];
	char str[32];

	/* test node-to-node send */

	for (n = 0; n < MAXSIZE; n++)
		data[n] = n*n;
	pvm_initsend(ENCODE);
	sprintf(str, "%d doubles from node zero", MAXSIZE);
	pvm_pkstr(str);
	pvm_pkdouble(data, MAXSIZE, 1);
	pvm_mcast(tids, NPROC-1, 0);

	pvm_initsend(ENCODE);
	pvm_pkint(&tids[3], 1, 1);

	/* receive data and ack */

	while(1) {
		bufid = pvm_recv(-1, -1);
		pvm_bufinfo(bufid, &bytes, &type, &stid);
		if (type == 13)
			break;
#ifdef KSR1
		pvm_upkdouble(data, bytes/sizeof(double), 1); 
#endif
		pvm_send(stid, 0);
	}
}


child2()
{
	pvm_recv(-1, 0);
	validate();
}


validate()
{
	double data[MAXSIZE];
	char str[32];
	int i;

	pvm_upkstr(str);
	printf("%s\n", str);
	pvm_upkdouble(data, MAXSIZE, 1);

	for (i = 0; i < MAXSIZE; i++)
		if ((i*i - data[i]) > 0.01) {
			printf("error: data[%d] = %g\n", i, data[i]);
			break;
		}

	if (i == MAXSIZE)
		printf("%d doubles received correctly\n\n\n", i);
}

	
/* measure packing and sending time */
time_ind(size, dtid)
int size;
int dtid;
{
	int i;
	int t1, t2, t3;
	double data[MAXSIZE];

	for (i = 0; i < size; i++)
		data[i] = i*i;
		printf("Pack (us)   Send (us)   Ack (us)   (Data size = %d)", 8*size);
		puts("\n=======================================================");

	for (i = 0; i < SAMPLE; i++) {
#ifdef TIMER_CLEAR
			pvm_initsend(ENCODE);
			TIMER_CLEAR;
			TIMER_START;
			pvm_pkdouble(data, size, 1);
			TIMER_STOP;
			t1 = 1000000*TIMER_ELAPSED;
			TIMER_CLEAR;
			TIMER_START;
		pvm_send(dtid, 0);
			TIMER_STOP;
			t2 = 1000000*TIMER_ELAPSED;
			TIMER_CLEAR;
			TIMER_START;
			pvm_recv(-1, 0);		/* receive ack */
			TIMER_STOP;
			t3 = 1000000*TIMER_ELAPSED;
			printf("%d           %d            %d\n", t1, t2, t3);
#endif
		}
		putchar('\n');
	}


/* measure packet transit time */
time_one(size, dtid)
int size;
int dtid;
{
	int i;
	double data[MAXSIZE];
	int t;

#ifdef TIMER_CLEAR

	for (i = 0; i < size; i++)
		data[i] = i*i;

	pvm_initsend(ENCODE);
	pvm_pkdouble(data, size, 1);
	TIMER_CLEAR;
	TIMER_START;
	for (i = 0; i < SAMPLE; i++) {
		pvm_send(dtid, 0);
		pvm_recv(-1, 0);		/* receive ack */
}
	TIMER_STOP;
	t = 1000000*TIMER_ELAPSED/SAMPLE;
	printf("Send+ACK T = %d (us)  (%.4f MB/s)      Data size: %d\n",
		t, 8.0*(float)size/(float)t, 8*size);

#endif /*TIMER_CLEAR*/
}

#endif /*PVM*/


/* 
 * these routines are used to measure performance of native message-passing
 * primitives
 */
sender()
{
	int size, ack;

	/* synchronize */
#if defined(PGON_NODE) || defined(I860_NODE)
	csend(123, &size, sizeof(int), 1, 0);
	crecv(-1, &ack, sizeof(int));
#endif /*defined(PGON_NODE) || defined(I860_NODE)*/

#ifdef CM5_NODE
	CMMD_send_block(1, 123, &size, sizeof(int));
	CMMD_receive_block(CMMD_ANY_NODE, CMMD_ANY_TAG, &ack, sizeof(int));
#endif

	/* do timing measurements */
	puts("Node-to-node Send/Ack\n");
	for (size = 1; size <= MAXSIZE; size *= 10) 
		time_ack(size);

#if defined(PGON_NODE) || defined(I860_NODE)
	csend(13, (char *)0, 0, 1, 0);
#endif 
#ifdef CM5_NODE
	CMMD_send_block(1, 13, (char *)0, 0);
#endif
}


/* receive message and ack */
catcher()
{
	double data[MAXSIZE];
	int ack = 1;

	while(1) {
#ifdef CM5_NODE
		CMMD_receive_block(CMMD_ANY_NODE, CMMD_ANY_TAG, (char *)data, 
			MAXSIZE*sizeof(double));
		if (CMMD_msg_tag() == 13)
			exit(0);
		CMMD_send_block(0, 456, &ack, sizeof(int));
#endif
#if defined(PGON_NODE) || defined(I860_NODE)
		crecv(-1, (char *)data, MAXSIZE*sizeof(double));
		if (infotype() == 13)
			exit(0);
		csend(456, &ack, sizeof(int), 0, 0);
#endif
	}
}


/* measure packet transit time */
time_ack(size)
int size;
{
	int i;
	double data[MAXSIZE];
	int t;

#ifdef TIMER_CLEAR

	for (i = 0; i < size; i++)
		data[i] = i*i;

	TIMER_CLEAR;
	TIMER_START;
/*
	memcpy(data, dat1, size*sizeof(double));
*/
	for (i = 0; i < SAMPLE; i++) {
		int ack;

#if defined(PGON_NODE) || defined(I860_NODE)
		csend(123, (char *)data, size*sizeof(double), 1, 0);
		crecv(-1, &ack, sizeof(int));
#endif /*defined(PGON_NODE) || defined(I860_NODE)*/

#ifdef CM5_NODE
		CMMD_send_block(1, 123, (char *)data, size*sizeof(double));
		CMMD_receive_block(CMMD_ANY_NODE, CMMD_ANY_TAG, &ack, sizeof(int));
#endif
	}
/*
	memcpy(dat1, data, size*sizeof(double));
*/
	TIMER_STOP;
	t = 1000000.0*TIMER_ELAPSED/(double)SAMPLE;
	printf("Send+ACK T = %d (us)  (%.4f MB/s)      Data size: %d\n",
		t, 8.0*(float)size/(float)t, 8*size);

#endif /*TIMER_CLEAR*/
}


