Newsgroups: comp.parallel.pvm
From: manchek@thud.cs.utk.edu (Bob Manchek)
Subject: Are you getting bad data in messages when using PvmDataInPlace
Organization: Computer Science Dept, University of Tennessee, Knoxville
Date: 5 Oct 1995 14:57:10 GMT
Message-ID: <450ro6INNqvg@CS.UTK.EDU>

with two or more hosts?  I thought so.

Here is a patch that will be going in 3.3.10 that fixes a problem in
the pvmd (this got broke between 3.3.7 and 3.3.8).

The problem is in the code that refragments messages that are too long
to send in one gulp (usually caused with InPlace packing, but sometimes
with hosts with different MTUs).  The first message fragment gets
clipped by 16 bytes, causing data beyond about 4k bytes into the
message to be off by 16 bytes.

-b

*** ../../netlib/pvm3.3.9/src/pvmd.c	Fri Sep 08 13:58:58 1995
--- pvmd.c	Wed Oct 04 13:12:10 1995
***************
*** 1320,1325 ****
--- 1320,1326 ----
  	char *cp;
  	int len;
  	int cc;
+ 	char dummy[DDFRAGHDR];
  
  /*
  	len = 0;
***************
*** 1375,1380 ****
--- 1376,1387 ----
  		}
  		cp -= DDFRAGHDR;
  		len += DDFRAGHDR;
+ 
+ 	/*
+ 	* save under packet header, because databuf may be shared.
+ 	* we don't worry about message header, because it's only at the head.
+ 	*/
+ 		BCOPY(cp, dummy, sizeof(dummy));
  		if (cp < pp->pk_buf) {
  			pvmlogerror("netoutput() no headroom for packet header\n");
  			return 0;
***************
*** 1424,1429 ****
--- 1431,1438 ----
  			stats.sdok++;
  #endif
  
+ 		BCOPY(dummy, cp, sizeof(dummy));	/* restore under header */
+ 
  	/*
  	* set timer for next retry
  	*/
***************
*** 3074,3079 ****
--- 3083,3091 ----
  *	If data plus header length is greater than host mtu,
  *	refragment into >1 pkts.
  *
+ *	We have to pay special attention to the FFSOM packet - make it
+ *	shorter so there's room to prepend the message header later.
+ *
  *	If send window to host has room, push packet to opq.
  */
  
***************
*** 3082,3088 ****
  	struct hostd *hp;
  	struct pkt *pp;
  {
! 	int mmtu = hp->hd_mtu < ourudpmtu ? hp->hd_mtu : ourudpmtu;
  
  	pp->pk_flag = (pp->pk_flag & (FFSOM|FFEOM)) | FFDAT;
  	if (debugmask & PDMPACKET) {
--- 3094,3101 ----
  	struct hostd *hp;
  	struct pkt *pp;
  {
! 	int maxl = (hp->hd_mtu < ourudpmtu ? hp->hd_mtu : ourudpmtu) - DDFRAGHDR;
! 	int llim = pp->pk_flag & FFSOM ? maxl - TTMSGHDR : maxl;
  
  	pp->pk_flag = (pp->pk_flag & (FFSOM|FFEOM)) | FFDAT;
  	if (debugmask & PDMPACKET) {
***************
*** 3091,3102 ****
  		pvmlogerror(pvmtxt);
  	}
  
! 	if (pp->pk_len + DDFRAGHDR <= mmtu) {
  		LISTPUTBEFORE(hp->hd_txq, pp, pk_link, pk_rlink);
  
  	} else {
  		struct pkt *pp2;
- 		int lim = mmtu - DDFRAGHDR;
  		char *cp = pp->pk_dat;
  		int togo;
  		int n;
--- 3104,3114 ----
  		pvmlogerror(pvmtxt);
  	}
  
! 	if (pp->pk_len <= llim) {
  		LISTPUTBEFORE(hp->hd_txq, pp, pk_link, pk_rlink);
  
  	} else {
  		struct pkt *pp2;
  		char *cp = pp->pk_dat;
  		int togo;
  		int n;
***************
*** 3104,3114 ****
  		int fe = pp->pk_flag & FFEOM;
  
  		for (togo = pp->pk_len; togo > 0; togo -= n) {
! 			n = min(togo, lim);
! /*
! 			sprintf(pvmtxt, "pkt_to_host() refrag len %d\n", n);
! 			pvmlogerror(pvmtxt);
! */
  #ifdef	STATISTICS
  			stats.refrag++;
  #endif
--- 3116,3126 ----
  		int fe = pp->pk_flag & FFEOM;
  
  		for (togo = pp->pk_len; togo > 0; togo -= n) {
! 			n = min(togo, llim);
! 			if ((debugmask & PDMPACKET) && togo != pp->pk_len) {
! 				sprintf(pvmtxt, "pkt_to_host() refrag len %d\n", n);
! 				pvmlogerror(pvmtxt);
! 			}
  #ifdef	STATISTICS
  			stats.refrag++;
  #endif
***************
*** 3119,3124 ****
--- 3131,3137 ----
  				ff |= fe;
  			pp2->pk_flag = ff | FFDAT;
  			ff = 0;
+ 			llim = maxl;
  			pp2->pk_cod = pp->pk_cod;
  			pp2->pk_enc = pp->pk_enc;
  			pp2->pk_wid = pp->pk_wid;
***************
*** 3250,3256 ****
  
  	} else {
  		struct pkt *pp2;
! 		int lim = ourudpmtu - DDFRAGHDR;
  		char *cp = pp->pk_dat;
  		int togo;
  		int n;
--- 3263,3269 ----
  
  	} else {
  		struct pkt *pp2;
! 		int maxl = ourudpmtu - DDFRAGHDR;
  		char *cp = pp->pk_dat;
  		int togo;
  		int n;
***************
*** 3258,3264 ****
  		int fe = pp->pk_flag & FFEOM;
  
  		for (togo = pp->pk_len; togo > 0; togo -= n) {
! 			n = min(togo, lim);
  			sprintf(pvmtxt, "pkt_to_task() refrag len %d\n", n);
  			pvmlogerror(pvmtxt);
  			pp2 = pk_new(0);
--- 3271,3277 ----
  		int fe = pp->pk_flag & FFEOM;
  
  		for (togo = pp->pk_len; togo > 0; togo -= n) {
! 			n = min(togo, maxl);
  			sprintf(pvmtxt, "pkt_to_task() refrag len %d\n", n);
  			pvmlogerror(pvmtxt);
  			pp2 = pk_new(0);


-- 
/ Robert Manchek                University of Tennessee     /
/                               Computer Science Department /
/ (423)974-8295                 Ayres Hall #104             /
/ manchek@CS.UTK.EDU.           Knoxville TN  37996-1301    /
/     http://www.netlib.org/utk/people/BobManchek.html      /


