Newsgroups: comp.parallel.pvm
From: e_meier@unibw-hamburg.de (Wilhelm Meier)
Subject: PVM 3.3.9 / pvm_upk... End of buffer on SUNMP
Organization: University of the Federal Armed Forces, Hamburg
Date: 13 Oct 1995 12:38:09 GMT
Message-ID: <E_MEIER.95Oct13133809@trier.unibw-hamburg.de>


Hello,

I have the following problem with the timing.c / timing_slave.c of the
pvm distribution. I made a small modification of the two programs so
that the timing / timing_slave unpack the data they receive. This
works well if you run on ONE Architekture. e.g only SUNMP or
SUN4SOL2. If you add an additional host of the other type to the VM I
got the following messages (starting timing on the SUNMP):

libpvm [t4003f]: pvm_upkint(): End of buffer

and in pvml.<uid> on the SUNMP-machine:

[t80040000] netinput() bogus pkt from 139.11.160.44:33791

Once again: this happens if timing runs on a SUNMP and timing_slave
runs on SUN4SOL2. If both run on ONE type of machine, all works fine.
The modification of the timing / timing_slave example comes at the end
of this mail.
It follows another, VERY SIMPLE example (test_master.c test_slave.c)
which shows the same symptom - even if I link on the SUNMP with
-lpvm3s ! The slave only sends a block of data to the master who
unpacks the data. This time you get:

libpvm [t40013]: pvm_upkdouble(): End of buffer

The only way to get this working is to use even on the SUNMP the
SUN4SOL2 binaries of the user-programms AND the pvm, pvmd3 ! So I
think, there is a bug in the shared-memory part of PVM.

Do you have any comments ?

Wilhelm

The following parts are:

timing.c
timing_slave.c

test_master.c
test_slave.c
heat.h




---------------------------------------------------------------------
timing.c (modified)
---------------------------------------------------------------------
#define	MOD			/* Modification !!! */

/*
*	timing.c
*
*	Does a few communication timing tests on pvm.
*	Uses `timing_slave' to echo messages.
* ----------------------------------------------------------------------------
*     If this test is run over machines with differnet data formats
*     Then change 'ENCODING' to PvmDataDefault in timing and timing_slave
* ----------------------------------------------------------------------------
*
*	9 Dec 1991  Manchek
*  14 Oct 1992  Geist  - revision to pvm3
*   6 Mar 1994  Geist  - synch tasks and add direct route
*/

#include <stdio.h>
#include <sys/time.h>
#include <time.h>

#include <sys/types.h>
#include <fcntl.h>
#include <stdio.h>
#include <math.h>
#include "pvm3.h"


#define SLAVENAME "timing_slave"
#define ENCODING  PvmDataRaw

main(argc, argv)
	int argc;
	char **argv;
{
	int mytid;                  /* my task id */
	int stid = 0;				/* slave task id */
	int reps = 20;				/* number of samples per test */
	struct timeval tv1, tv2;	/* for timing */
	int dt1, dt2;				/* time for one iter */
	int at1, at2;				/* accum. time */
	int numint;					/* message length */
	int n;
	int i;
	int *iarray = 0;
#ifdef MOD
	int len, dummy;
#endif

	/* enroll in pvm */

	if ((mytid = pvm_mytid()) < 0) {
		exit(1);
	}
	printf("i'm t%x\n", mytid);

	/* start up slave task */

	if (pvm_spawn(SLAVENAME, (char**)0, 0, "", 1, &stid) < 0 || stid < 0) {
		fputs("can't initiate slave\n", stderr);
		goto bail;
	}

    /* Wait for slave task to start up */
    pvm_setopt(PvmRoute, PvmRouteDirect);
    pvm_recv( stid, 0 );
	printf("slave is task t%x\n", stid);

	/*
	*  round-trip timing test
	*/

	puts("Doing Round Trip test, minimal message size\n");
	at1 = 0;

	/* pack buffer */

#ifndef MOD
	pvm_initsend(ENCODING);
	pvm_pkint(&stid, 1, 1);
#else
	len = 1;
	pvm_initsend(ENCODING);
	pvm_pkint(&len, 1, 1);
	pvm_pkint(&dummy, 1, 1);
#endif

	puts(" N     uSec");
	for (n = 1; n <= reps; n++) {
		gettimeofday(&tv1, (struct timezone*)0);

		if (pvm_send(stid, 1)) {
			fprintf(stderr, "can't send to \"%s\"\n", SLAVENAME);
			goto bail;
		}

		if (pvm_recv(-1, -1) < 0) {
			fprintf(stderr, "recv error%d\n" );
			goto bail;
		}
#ifdef MOD
		pvm_upkint(&dummy, 1, 1);
#endif

		gettimeofday(&tv2, (struct timezone*)0);

		dt1 = (tv2.tv_sec - tv1.tv_sec) * 1000000 + tv2.tv_usec - tv1.tv_usec;
		printf("%2d %8d\n", n, dt1);
		at1 += dt1;
	}
	printf("RTT Avg uSec %d\n", at1 / reps);

	/*
	*  bandwidth test for different message lengths
	*/

	puts("\nDoing Bandwidth tests\n");

	for (numint = 25; numint < 1000000; numint *= 10) {
		printf("\nMessage size %d\n", numint * 4);
		at1 = at2 = 0;
		iarray = (int*)malloc(numint * sizeof(int));
		puts(" N  Pack uSec  Send uSec");
		for (n = 1; n <= reps; n++) {
			gettimeofday(&tv1, (struct timezone*)0);

			pvm_initsend(ENCODING);
#ifdef MOD
			pvm_pkint(&numint, 1, 1);
#endif
			pvm_pkint(iarray, numint, 1);

			gettimeofday(&tv2, (struct timezone*)0);
			dt1 = (tv2.tv_sec - tv1.tv_sec) * 1000000
				+ tv2.tv_usec - tv1.tv_usec;

			gettimeofday(&tv1, (struct timezone*)0);

			if (pvm_send(stid, 1)) {
				fprintf(stderr, "can't send to \"%s\"\n", SLAVENAME);
				goto bail;
			}

			if (pvm_recv(-1, -1) < 0) {
				fprintf(stderr, "recv error%d\n" );
				goto bail;
			}
#ifdef MOD
			pvm_upkint(&len, 1, 1);
			pvm_upkint(iarray, len, 1);
#endif

			gettimeofday(&tv2, (struct timezone*)0);
			dt2 = (tv2.tv_sec - tv1.tv_sec) * 1000000
				+ tv2.tv_usec - tv1.tv_usec;

			printf("%2d   %8d   %8d\n", n, dt1, dt2);
			at1 += dt1;
			at2 += dt2;
		}

		if (!(at1 /= reps))
			at1 = 1;
		if (!(at2 /= reps))
			at2 = 1;
		puts("Avg uSec");
		printf("     %8d   %8d\n", at1, at2);
		puts("Avg Byte/uSec");
		printf("     %8f   %8f\n",
			(numint * 4) / (double)at1,
			(numint * 4) / (double)at2);
	}

	puts("\ndone");

bail:
	if (stid > 0)
		pvm_kill(stid);
	pvm_exit();
	exit(1);
}

---------------------------------------------------------------------
timing_slave.c (modified)
---------------------------------------------------------------------
#define MOD


/*
*	timing_slave.c
*
*	See timing.c
*/

#include <sys/types.h>
#include <fcntl.h>
#include <stdio.h>
#include "pvm3.h"

#define ENCODING  PvmDataRaw

main(argc, argv)
	int argc;
	char **argv;
{
	int mytid;   /* my task id */
	int dtid;    /* driver task */
	int bufid;
	int n = 0;
#ifdef MOD
	int len, dummy, *buffer;

	buffer = (int *) calloc(1000000, sizeof(int));
#endif
	/* enroll in pvm */

	mytid = pvm_mytid();

    /* tell parent I am ready */

    pvm_setopt(PvmRoute, PvmRouteDirect);
    pvm_initsend(ENCODING);
    pvm_send( pvm_parent(), 0 );

	/* pack mytid in buffer */
#ifndef MOD
	pvm_initsend(ENCODING);
	pvm_pkint(&mytid, 1, 1);
#endif
	/* our job is just to echo back to the sender when we get a message */

	while (1) {
		bufid = pvm_recv(-1, -1);
		pvm_bufinfo(bufid, (int*)0, (int*)0, &dtid);
#ifdef MOD
		pvm_upkint(&len, 1, 1);
		pvm_upkint(buffer, len, 1);
#endif
		pvm_freebuf(pvm_getrbuf());  /* for shared memory refcount hang */
#ifdef MOD
		pvm_initsend(ENCODING);
		pvm_pkint(&len, 1, 1);
		pvm_pkint(buffer, len, 1);
#endif		
		pvm_send(dtid, 2);
/*
		printf("echo %d\n", ++n);
*/
	}
}
---------------------------------------------------------------------
(test_master.c)
---------------------------------------------------------------------

#include <heat.h>   
#include "pvm3.h"

#define SLAVE_PROG_NAME	"test_slave"




void
main(int argc, char* argv[])
/*****
 * 
 * 
 *****/
{
	int		this_tid, i;
	int		child_tids[N_TASK];
	int		nslaves;
	int		send_buffer;
	int		receive_buffer;
	int		number_started, n_samples;
	double		*data = NULL;

	this_tid = pvm_mytid();
	
	nslaves = 2;
	n_samples = N_SAMPLES;
	
	/* Slaves starten */
	number_started = pvm_spawn(SLAVE_PROG_NAME, (char **) 0, PvmTaskDefault, (char *) 0, nslaves, child_tids);
	
   	for(i = 0; i < nslaves; i++) {   
   		if (child_tids[i] < 0) {   
   			printf("\nErrorcode %d", child_tids[i]);   
   		}   
   		else {   
   			printf("\nTID: %d", child_tids[i]);   
   		}   
   	}   

	data = (double *) calloc(n_samples, sizeof(double));


	receive_buffer = pvm_recv(child_tids[0], DATA_INIT);               
	pvm_upkdouble(data, n_samples, 1);
	
	receive_buffer = pvm_recv(child_tids[1], DATA_INIT);               
	pvm_upkdouble(data, n_samples, 1);               

      	pvm_exit();      

	exit(0);
}

---------------------------------------------------------------------
(test_slave.c)
---------------------------------------------------------------------

#include <heat.h>
#include "pvm3.h"


void
main(int argc, char* argv[])
/*****
 * 
 * 
 *****/
{
	int		receive_buffer, send_buffer;
	int		this_tid, n_samples, parent_tid;
	double		*data;



	this_tid = pvm_mytid();
	parent_tid = pvm_parent();

	n_samples = N_SAMPLES;
	
	data     = (double *) calloc(n_samples, sizeof(double));

	send_buffer = pvm_initsend(PvmDataRaw);
	pvm_pkdouble(data, n_samples, 1);
	pvm_send(parent_tid, DATA_INIT);

	pvm_exit();
	exit(0);
}

---------------------------------------------------------------------
(heat.h)
---------------------------------------------------------------------
#define	N_TASK		20
#define	N_SAMPLES	1000

/* Message Tags */

#define END_TAG		4
#define	DATA_INIT	5
---------------------------------------------------------------------
-- 
                             _\\|//_ 
                             ( O-O )
---------------------------o00--(_)--00o------------------------------
Dr.-Ing. Wilhelm Meier		Phone:	(49)(0)40 / 65412524
UniBwH, FB ET			FAX:	(49)(0)40 / 65412822
Holstenhofweg 85		email:	e_meier@unibw-hamburg.de
D-22043	Hamburg, GERMANY	http://www.unibw-hamburg.de/EWEB/ANT/meier.html



