/* Fork95 code generator, made from lcc1.9's VAX subset code generator */

#include "c.h"

extern int STARTED_PROCS;  /*CWK: compile-time constant, set by -nprocs option */
int STARTED_PROCS = 4096;  /*Defaultwert: ganze PRAM alleine benutzen CWK*/
extern int Pcodepar;/*CWK: zur schnellen Parameteruebergabe von emitcode() an emit()*/
extern int Kflag;   /*CWK: if!=0, gen. consistency checking code after closing grps */
extern int mflag;   /*CWK: mflag !=0 -> care about modulo*/
#define force_modulo_0 if(mflag)print("bmc\t 0       \t /*force next mo=0*/\n")
#define force_modulo_1 if(mflag)print("bms\t 0       \t /*force next mo=1*/\n")
#define modulo_nop     if(mflag)print("nop\t         \t /*advance modulo*/\n")


#ifdef DEBUG
#define debug(x,y) if (x) y
static void lprint(Node, char *);
static void nprint(Node);
static char *rnames(unsigned);
static int id;
static Node lhead;

#else
#define debug(x,y)
#endif

static int rflag;      /* != 0 to trace register allocation */
static int shframesize, prframesize;      /* CWK: size of sh/pr activation record */
       int shframedepth;  /* CWK 970902, used in dag.c*/
static int proffset;      /* CWK: vormals offset, current private frame offset */
       int shoffset;      /* CWK: current shared frame offset */
       int resetshoffset=0; /*970903: flag indicates that a new group has just started*/
static int argbuildsize;   /* size of argument build area */
static int argoffset;      /* offset from top of stack for next argument */
static int nregs = FREE_REGS;   /* number of allocatable registers */
static unsigned rmask;      /* rmask&(1<<r) == 0 if register r is free */
static unsigned usedmask;   /* usedmask&(1<<r) == 1 if register r was used */
static int reginfo[] = {   /*      1<<x if op+x is legal; */
   0,         /* 0x1000<<x if op+x needs a register */
#include "reginfo.h"
};
static int sargnr = 0;     /*CWK, zaehlt shared Argumente*/
static int pargnr = 0;     /*CWK, zaehlt private Argumente*/
static int mylabelnr = 1;  /*CWK, labels fuer spezielle Zwecke*/
#define mylabel() (mylabelnr++)

static void genreloads(Node, Node, Symbol);
static Symbol genspill(Node);
static void getreg(Node);
static Node *linearize(Node, Node *, Node);
static int needsreg(Node);
static void putreg(Node);
static void ralloc(Node);
static void restore(unsigned);
static void save(unsigned);
static int spillee(Node, unsigned);
static void spill(int, unsigned, Node);
static unsigned uses(Node);
static int valid(int);
static void Epilog(int);   /*CWK*/
static void retflush(Node);  /*CWK*/

#define typecode(p) (optype(p->op) == U ? I : optype(p->op) == B ? P : optype(p->op))
#define sets(p) ((p)->x.rmask<<(p)->x.reg)

static int besetzt = 0; /*CWK*/

/* firstfreeparreg() - gibt erstes freies Parameter-Register.  CWK */
static int firstfreeparreg()
{
 if (++besetzt > MAX_PARAMS_IN_REGISTER)  assert(0);
    /* darf nie Parreg anfordern und alle besetzt! */
 return besetzt;
}

/* freeallparregs() - gib alle Parameter-Register wieder frei.  CWK */
#define freeallparregs() besetzt=0

/* address - initialize q for addressing expression p+n */
void address(Symbol q, Symbol p, int n) {
   if (p->scope == GLOBAL || p->sclass == STATIC || p->sclass == EXTERN)
      q->x.name = stringf("%s%s%d", p->x.name, n >= 0 ? "+" : "", n);
   else {
      q->x.offset = p->x.offset + n;
      if (p->scope == PARAM) {       /* formaler Parameter */
       if (!p->parreg) {  /*CWK*/
         q->x.name = stringf("%d(ap)", q->x.offset);  /*CWK*/
       } else  q->x.name = stringf("par%d,%d", p->parreg, q->x.offset);  /*CWK*/
         /*es muss beruecksichtigt werden, dass ein Parameter auch in einem
          *Register uebergeben worden sein kann. Verarztung in ADDRxxx CWK*/
      }
      else {                          /* lokale Variable */
        q->x.name = stringf("%s,%d/*via address*/", (p->shared)?"fps":"fpp", 
          (p->shared)? 2+q->x.offset : q->x.offset ); /*CWK 970908*/
/* Achtung!
 * in asmcode muss hier auf sh locals verzichtet werden.
 */
      }
   }
}

/* asmcode - emit assembly language specified by asm */
void asmcode(char *str, Symbol argv[]) {
   for ( ; *str; str++)
      if (*str == '%' && str[1] >= 0 && str[1] <= 9)
         print("%s", argv[*++str]->x.name);
      else
         print("%c", *str);
   print("\n");
}

/* blockbeg - begin a compound statement */
void blockbeg(Env *e) {
   /*assert(rmask == (((~0)<<nregs)|1));   testweise raus 990129*/
   /*      VORSICHT! WAR VIELLEICHT WICHTIG. ABER MIT DEM ASSERT
    *      FUNKtIONIERT DIE JOIN-IMPLEMENTIERUNG NICHT RICHTIG, 
    *      DENN DANACH KOENNEN KEINE COMPOUNDSTATEMENTS MEHR
    *      AUFGEMACHT WERDEN. ICH WEISS AUCH NICHT WARUM... 
    */
   e->rmask = rmask;
   e->proffset = proffset;  /* statt offset CWK */
   e->shoffset = shoffset; /*CWK*/
   assert(shframedepth>=0);
   e->framedepth = shframedepth;  /*970902*/
   if (resetshoffset) {
       print("/*RESET shoffset, vorher %d*/\n",shoffset);
       shoffset = 0; /*970903*/
       resetshoffset = 0;
   }
}

/* blockend - end a compound statement */
void blockend(Env *e) {
   if (proffset > prframesize)   prframesize = proffset;  /*CWK*/
   if (shoffset > shframesize)   shframesize = shoffset;  /*CWK*/
   proffset = e->proffset;
   shoffset = e->shoffset;
   shframedepth = e->framedepth;  /*970902*/
   rmask = e->rmask;
}

/* defconst - define a constant */
void defconst(int ty, Value v) {
   switch (ty) {
   case C: print(".byte %d\n",   v.uc); break;
   case S: print(".word %d\n",   v.us); break;
   case I: print(".long %d\n",   v.i ); break;
   case U: print(".long 0x%x\n", v.u ); break;
   case P: print(".long 0x%x\n", v.p ); break;
#ifdef vax
   case F:
      print(".long 0x%x\n", ((unsigned *) &v.f)[0]);
      break;
   case D: 
      print(".long 0x%x,0x%x\n", ((unsigned *) &v.d)[0],
         ((unsigned *) &v.d)[1]);
      break;
#else
   case F: {
      char buf[MAXLINE];
      /*sprintf(buf, ".float 0f%.8e\n", v.f);*/
      sprintf(buf, ".float 0e%.8e\n", v.f);   /*CWK*/
      outs(buf);
      break;
      }
   case D: {
      char buf[MAXLINE];
      /*sprintf(buf, ".double 0d%.18e\n", v.d);*/
      sprintf(buf, ".float 0e%.18e\n", (float)v.d);   /*CWK 970716*/
      outs(buf);
      break;
      }
#endif
   default: assert(0);
   }
}

/* defstring - emit a string constant */
void defstring(int len, char *s) {
   print(".ascii \"");       /*CWK*/
   while (len-- > 0)
      if (isspace(*s) && *s!=' ' || iscntrl(*s))
         switch(*s++) {
            case '\n': print("\\n");break;
            case '\t': print("\\t");break;
            case '\f': print("\\f");break;
            case '\r': print("\\r");break;
            case '\v': print("\\v");break;
            default: print("\\%o", *(s-1) ); break;  /*control characters*/
         }
      else
         print("%c", *s++);
      /*print(".byte %d\n", *s++); CWK 950331*/
   print("\"\n");       /*CWK*/
}

/* defsymbol - initialize p's Xsymbol fields */
void defsymbol(Symbol p) {
   if (p->scope == CONSTANTS)
      p->x.name = p->name;
   else if (p->scope >= LOCAL && p->sclass == STATIC)
      p->x.name = stringf("L%d", genlabel(1));
   else if (p->generated)
      p->x.name = stringf("L%s", p->name);
   else
      p->x.name = stringf("_%s", p->name);
}

void countload( int k )
{  /*990119, gen code to increment the shared loads counter */
   print("gethi\t __shldgcnt,r30 \t /*sh.load-stat*/\n");
   print("add\t r30,__shldgcnt&0x1fff,r30 /*sh.load-stat*/\n");
   print("getlo\t %d,r31 \t /*sh.load-stat*/\n", k );
   print("syncadd\t r31,r30,0 \t /*sh.load-stat*/\n");
   /*keine Rundentrennung erforderlich, da nur syncadd-Zugriffe
    *auf diese Counter erfolgen. Lesen erst in printAccStat()
    */
   /* und auch den privaten shldg-Zaehler inkrementieren: */
   print("gethi\t __myshldgcnt,r30 \t /*sh.load-stat*/\n");
   print("add\t r30,__myshldgcnt&0x1fff,r30 /*sh.load-stat*/\n");
   print("syncadd\t r31,r30,0 \t /*sh.load-stat*/\n");
}

void ccountload( int k, int regnr )
{  /*990122, gen code to increment the shared loads counter */
   /*skip increment if register regnr contains a private address*/
   /*i.e. bit 31 in Rregnr is set */
   print("gethi\t 0x80000000,r30 \t /*ccountload*/\n");
   print("and\t r%d,r30,r30 \t /*ccountload*/\n", regnr );
   print("bne\t 8           \t /*ccountload*/\n");
   print("gethi\t __shldgcnt,r30 \t /*sh.load-stat*/\n");
   print("add\t r30,__shldgcnt&0x1fff,r30 /*sh.load-stat*/\n");
   print("getlo\t %d,r31 \t /*sh.load-stat*/\n", k );
   print("syncadd\t r31,r30,0 \t /*sh.load-stat*/\n");
   /*keine Rundentrennung erforderlich, da nur syncadd-Zugriffe
    *auf diese Counter erfolgen. Lesen erst in printAccStat()
    */
   /* und auch den privaten shldg-Zaehler inkrementieren: */
   print("gethi\t __myshldgcnt,r30 \t /*sh.load-stat*/\n");
   print("add\t r30,__myshldgcnt&0x1fff,r30 /*sh.load-stat*/\n");
   print("syncadd\t r31,r30,0 \t /*sh.load-stat*/\n");
}

void countstore( int k )
{  /*990119, gen code to increment the shared stores counter */
   print("gethi\t __shstgcnt,r30 \t /*sh.stores-stat*/\n");
   print("add\t r30,__shstgcnt&0x1fff,r30 /*sh.stores-stat*/\n");
   print("getlo\t %d,r31 \t /*sh.stores-stat*/\n", k );
   print("syncadd\t r31,r30,0 \t /*sh.stores-stat*/\n");
   /*keine Rundentrennung erforderlich, da nur syncadd-Zugriffe
    *auf diese Counter erfolgen. Lesen erst in printAccStat()
    */
   print("gethi\t __myshstgcnt,r30 \t /*sh.stores-stat*/\n");
   print("add\t r30,__myshstgcnt&0x1fff,r30 /*sh.stores-stat*/\n");
   print("syncadd\t r31,r30,0 \t /*sh.stores-stat*/\n");
}

void ccountstore( int k, int regnr )
{  /*990122, gen code to increment the shared stores counter */
   /*skip increment if register regnr contains a private address*/
   /*i.e. bit 31 in Rregnr is set */
   print("gethi\t 0x80000000,r30 \t /*ccountstore*/\n");
   print("and\t r%d,r30,r30 \t /*ccountstore*/\n", regnr );
   print("bne\t 8           \t /*ccountstore*/\n");
   print("gethi\t __shstgcnt,r30 \t /*sh.stores-stat*/\n");
   print("add\t r30,__shstgcnt&0x1fff,r30 /*sh.stores-stat*/\n");
   print("getlo\t %d,r31 \t /*sh.stores-stat*/\n", k );
   print("syncadd\t r31,r30,0 \t /*sh.stores-stat*/\n");
   /*keine Rundentrennung erforderlich, da nur syncadd-Zugriffe
    *auf diese Counter erfolgen. Lesen erst in printAccStat()
    */
   print("gethi\t __myshstgcnt,r30 \t /*sh.stores-stat*/\n");
   print("add\t r30,__myshstgcnt&0x1fff,r30 /*sh.stores-stat*/\n");
   print("syncadd\t r31,r30,0 \t /*sh.stores-stat*/\n");
}

void countbarrier( int k )
{  /*990119, gen code to increment the barriers counter */
   print("gethi\t __barrcnt,r30 \t /*barrier-stat*/\n");
   print("add\t r30,__barrcnt&0x1fff,r30 /*barrier-stat*/\n");
   print("getlo\t %d,r31 \t /*barrier-stat*/\n", k );
   print("syncadd\t r31,r30,0 \t /*barrier-stat*/\n");
   /*keine Rundentrennung erforderlich, da nur syncadd-Zugriffe
    *auf diese Counter erfolgen. Lesen erst in printAccStat()
    */
}

void countmpadd( int k ) 
{   /*990119*/
   print("gethi\t __mpaddcnt,r30 \t /*MPADD-stat*/\n");
   print("add\t r30,__mpaddcnt&0x1fff,r30 /*MPADD-stat*/\n");
   print("getlo\t %d,r31 \t /*MPADD-stat*/\n", k );
   print("syncadd\t r31,r30,0 \t /*MPADD-stat*/\n");
   /*keine Rundentrennung erforderlich, da nur syncadd-Zugriffe
    *auf diese Counter erfolgen. Lesen erst in printAccStat()
    */
   print("gethi\t __mympaddcnt,r30 \t /*MPADD-stat*/\n");
   print("add\t r30,__mympaddcnt&0x1fff,r30 /*MPADD-stat*/\n");
   print("syncadd\t r31,r30,0 \t /*MPADD-stat*/\n");
}

void countmpmax( int k ) 
{   /*990119*/
   print("gethi\t __mpmaxcnt,r30 \t /*MPMAX-stat*/\n");
   print("add\t r30,__mpmaxcnt&0x1fff,r30 /*MPMAX-stat*/\n");
   print("getlo\t %d,r31 \t /*MPMAX-stat*/\n", k );
   print("syncadd\t r31,r30,0 \t /*MPMAX-stat*/\n");
   /*keine Rundentrennung erforderlich, da nur syncadd-Zugriffe
    *auf diese Counter erfolgen. Lesen erst in printAccStat()
    */
   print("gethi\t __mympmaxcnt,r30 \t /*MPMAX-stat*/\n");
   print("add\t r30,__mympmaxcnt&0x1fff,r30 /*MPMAX-stat*/\n");
   print("syncadd\t r31,r30,0 \t /*MPMAX-stat*/\n");
}

void countmpand( int k ) 
{   /*990119*/
   print("gethi\t __mpandcnt,r30 \t /*MPAND-stat*/\n");
   print("add\t r30,__mpandcnt&0x1fff,r30 /*MPAND-stat*/\n");
   print("getlo\t %d,r31 \t /*MPAND-stat*/\n", k );
   print("syncadd\t r31,r30,0 \t /*MPAND-stat*/\n");
   /*keine Rundentrennung erforderlich, da nur syncadd-Zugriffe
    *auf diese Counter erfolgen. Lesen erst in printAccStat()
    */
   print("gethi\t __mympandcnt,r30 \t /*MPAND-stat*/\n");
   print("add\t r30,__mympandcnt&0x1fff,r30 /*MPAND-stat*/\n");
   print("syncadd\t r31,r30,0 \t /*MPAND-stat*/\n");
}

void countmpor( int k ) 
{   /*990119*/
   print("gethi\t __mporcnt,r30 \t /*MPOR-stat*/\n");
   print("add\t r30,__mporcnt&0x1fff,r30 /*MPOR-stat*/\n");
   print("getlo\t %d,r31 \t /*MPOR-stat*/\n", k );
   print("syncadd\t r31,r30,0 \t /*MPOR-stat*/\n");
   /*keine Rundentrennung erforderlich, da nur syncadd-Zugriffe
    *auf diese Counter erfolgen. Lesen erst in printAccStat()
    */
   print("gethi\t __mymporcnt,r30 \t /*MPOR-stat*/\n");
   print("add\t r30,__mymporcnt&0x1fff,r30 /*MPOR-stat*/\n");
   print("syncadd\t r31,r30,0 \t /*MPOR-stat*/\n");
}

        /* " FDCSIUPVB" */
#define suffix(p)    ".fdbwllll."[optype((p)->op)]
/*#define binary(inst) print("%s%c3 r%d,r%d,r%d\n", inst, suffix(p), \CWK*/
#define binary(inst) print("%s\t r%d,r%d,r%d\n", inst, \
                        b->x.reg+ROFF, a->x.reg+ROFF, r)
/*#define unary(inst)  print("%s%c r%d,r%d\n", inst, suffix(p), a->x.reg, r)CWK*/
#define unary(inst)  print("%s\t r%d,r%d\n", inst, a->x.reg+ROFF, r)
/*#define compare(cp)  print("cmp%c r%d,r%d; j%s %s\n", suffix(p), \CWK*/
                        /*a->x.reg, b->x.reg, cp, p->syms[0]->x.name)*/
#define compare(cp)  print("sub\t r%d,r%d,pc\t /*compare*/\nb%s\t %s\n", \
                        b->x.reg+ROFF, a->x.reg+ROFF, cp, p->syms[0]->x.name)
#define fcompare(fcp)  print("fsub\t r%d,r%d,pc\t /*fcompare*/\nfb%s\t %s\n", \
                        b->x.reg+ROFF, a->x.reg+ROFF, fcp, p->syms[0]->x.name)
/*PRAM: sub b,a,x bedeutet x = a - b. CWK*/

/* emit - emit the dags on list p */
void emit(Node p) {
   int parreg;  /*CWK*/
   int label1, label2;  /*CWK*/
   for (; p; p = p->x.next) {
      Node a = p->kids[0], b = p->kids[1];
      int r = p->x.reg+ROFF;  /*CWK: Registernummer - Offset*/
      switch (p->op) {
      case BANDU:                         binary("and");   break; /*CWK*/
      case BORU:                          binary("or");   break; /*CWK*/
      case BXORU:                         binary("xor");   break; /*CWK*/
      case ADDD:  case ADDF:              binary("fadd");   break; /*CWK*/
      case ADDI:  case ADDP:  case ADDU:  binary("add");   break; /*CWK*/
      case SUBD:  case SUBF:
         print("fsub\t r%d,r%d,r%d\t /*SUBF*/\n",
                       b->x.reg+ROFF, a->x.reg+ROFF, r);  /*CWK*/
         break;
      case SUBI:  case SUBP:  case SUBU:
         print("sub\t r%d,r%d,r%d\t /*SUBI*/\n",
                      b->x.reg+ROFF, a->x.reg+ROFF, r);  /*CWK*/
         break;
      case MULD:  case MULF:              binary("fmul");   break; /*CWK*/
      case MULI:  case MULU:              binary("mul");   break; /*CWK*/
      case DIVD:  case DIVF:
         print("mov\t r%d,par1\t /*DIVF*/\n", a->x.reg+ROFF);    /*CWK*/
         print("mov\t r%d,par2\t /*DIVF*/\n", b->x.reg+ROFF);    /*CWK*/
         print("bsrg\t spp,forklib_fdiv\t /*DIVF*/\n");          /*CWK*/
         print("mov\t Ret,r%d\t /*DIVF*/\n", r);                 /*CWK*/
         /*forklib_fdiv preserves synchronicity.*/
         break;
      case DIVI:  /*binary("div");   break; CWK */
         /*r30, r31 wird benutzt, ist scratch, braucht nicht gesavet werden. */
         /*print("pushl r%d; pushl r%d; calls $2,udiv; movl r0,r%d\n",*/
         print("mov\t r%d,par1\t /*DIVI*/\n", a->x.reg+ROFF);    /*CWK*/
         print("mov\t r%d,par2\t /*DIVI*/\n", b->x.reg+ROFF);    /*CWK*/
         /*force_modulo_1; auskommentiert weil nur priv. Stackzugriff */
         print("bsrg\t spp,forklib_divi\t /*DIVI*/\n");                 /*CWK*/
         print("mov\t Ret,r%d\t /*DIVI*/\n", r);                 /*CWK*/
         if (p->synchron) {
           /*if (b->maybe_priv)*/
             print("bsrg\t spp,forklib_sync\t /*DIVI*/\n");             /*CWK*/
             if (Tflag) countbarrier(1); /*990119*/
         }
         /*restore(p->x.busy&(1<<(FREE_REGS-1)));CWK*/
         break;
      case DIVU:       /* call Divisionsroutine CWK */
         /*r30, r31 wird benutzt, ist scratch, braucht nicht gesavet werden. */
         /*print("pushl r%d; pushl r%d; calls $2,udiv; movl r0,r%d\n",*/
         print("mov\t r%d,par1\t /*DIVU*/\n", a->x.reg+ROFF);    /*CWK*/
         print("mov\t r%d,par2\t /*DIVU*/\n", b->x.reg+ROFF);    /*CWK*/
         /*force_modulo_1; auskommentiert weil nur priv. Stackzugriff */
         print("bsrg\t spp,forklib_divu\t /*DIVU*/\n");                 /*CWK*/
         print("mov\t Ret,r%d\t /*DIVU*/\n", r);                 /*CWK*/
         if (p->synchron) {
           /*if (b->maybe_priv)  950324*/
             print("bsrg\t spp,forklib_sync\t /*DIVU*/\n");                 /*CWK*/
             if (Tflag) countbarrier(1); /*990119*/
         }
         /*restore(p->x.busy&(1<<(FREE_REGS-1)));CWK*/
         break;
      case MODI:  /* a%b == a - (a/b)*b  */
         /*save(p->x.busy&0x3e);CWK*/
         print("mov\t r%d,par1\t /*MODI*/\n", a->x.reg+ROFF);     /*CWK*/
         print("mov\t r%d,par2\t /*MODI*/\n", b->x.reg+ROFF);     /*CWK*/
         /*force_modulo_1; auskommentiert weil nur priv. Stackzugriff */
         print("bsrg\t spp,forklib_divi\t /*MODI*/\n");           /*CWK*/
         /*restore(p->x.busy&0x3e);*/
         print("mul\t r%d,Ret,Ret\t /*MODI*/\n", b->x.reg+ROFF); /*CWK*/
         print("sub\t Ret,r%d,r%d\t /*MODI*/\n", a->x.reg+ROFF,r); /*CWK*/
         if (p->synchron) {
           /*if (b->maybe_priv)   950324 */
             /*modulo_nop; ausk. weil _sync selbst moduliert*/
             print("bsrg\t spp,forklib_sync\t /*MODI*/\n"); /*CWK*/
             if (Tflag) countbarrier(1); /*990119*/
         }
         /*print("divl3 r%d,r%d,r0; mull2 r%d,r0; subl3 r0,r%d,r%d\n",
            b->x.reg, a->x.reg, b->x.reg, a->x.reg, r);*/
         break;
      case MODU:       /* call Moduloroutine CWK */
         /*save(p->x.busy&0x3e);*/
         /*print("pushl r%d; pushl r%d; calls $2,urem; movl r0,r%d\n",*/
         print("mov\t r%d,par1\t /*MODU*/\n", a->x.reg+ROFF); /*CWK*/
         print("mov\t r%d,par2\t /*MODU*/\n", b->x.reg+ROFF); /*CWK*/
         /*force_modulo_1; auskommentiert weil nur priv. Stackzugriff */
         print("bsrg\t spp,forklib_divu\t /*MODU*/\n");           /*CWK*/
         print("mov\t par3,r%d\t /*MODU*/\n", r);         /*CWK*/
         if (p->synchron) {
           /*if (b->maybe_priv)   950324 */
             print("bsrg\t spp,forklib_sync\t /*MODU*/\n"); /*CWK*/
             if (Tflag) countbarrier(1); /*990119*/
         }
         /*restore(p->x.busy&0x3e);*/
         break;
      case ILOG2I:                                                    /*CWK*/
         print("rm \t r%d,r%d  \t /*ILOG2*/\n", a->x.reg+ROFF, r );  /*CWK*/
         break;
      case MPADDI:                                                    /*CWK*/
         force_modulo_1;
         print("mpadd\t r%d,0,r%d\t /*MPADD*/\nnop\t          \t /*MPADD*/\n",
                        a->x.reg+ROFF, b->x.reg+ROFF);            /*CWK*/
         print("mov\t r%d,r%d\t /*MPADD*/\n",
                        b->x.reg+ROFF, r );            /*CWK*/
         if (Tflag) countmpadd( 1 );  /*990119*/
         break;
      case MPANDI:                                                    /*CWK*/
         force_modulo_1;
         print("mpand\t r%d,0,r%d\t /*MPAND*/\nnop\t          \t /*MPAND*/\n",
                        a->x.reg+ROFF, b->x.reg+ROFF);            /*CWK*/
         print("mov\t r%d,r%d\t /*MPAND*/\n",
                        b->x.reg+ROFF, r );            /*CWK*/
         if (Tflag) countmpand( 1 );  /*990119*/
         break;
      case MPMAXI:                                                    /*CWK*/
         force_modulo_1;
         print("mpmax\t r%d,0,r%d\t /*MPMAX*/\nnop\t          \t /*MPMAX*/\n",
                        a->x.reg+ROFF, b->x.reg+ROFF);            /*CWK*/
         print("mov\t r%d,r%d\t /*MPMAX*/\n",
                        b->x.reg+ROFF, r );            /*CWK*/
         if (Tflag) countmpmax( 1 );  /*990119*/
         break;
      case MPORI:                                                     /*CWK*/
         force_modulo_1;
         print("mpor\t r%d,0,r%d\t /*MPOR*/\nnop\t          \t /*MPOR*/\n",
                        a->x.reg+ROFF, b->x.reg+ROFF);            /*CWK*/
         print("mov\t r%d,r%d\t /*MPOR*/\n",
                        b->x.reg+ROFF, r );            /*CWK*/
         if (Tflag) countmpor( 1 );  /*990119*/
         break;
      case RSHU:
         /*print("subl3 r%d,$32,r0; extzv r%d,r0,r%d,r%d\n",
            b->x.reg, b->x.reg, a->x.reg, r); */
         print("lsr\t r%d,r%d,r%d\t /*RSHU*/\n",         /*CWK*/
            a->x.reg+ROFF, b->x.reg+ROFF, r);
         break;
      case RSHI:       /* lsr statt asr CWK 950306 */
         print("lsr\t r%d,r%d,r%d\t /*RSHI*/\n",
                      a->x.reg+ROFF, b->x.reg+ROFF, r);   /*CWK*/
         break;
      case LSHI: case LSHU:       /* lsl statt asl CWK 950306 */
         print("lsl\t r%d,r%d,r%d\t /*LSHx*/\n", a->x.reg+ROFF, b->x.reg+ROFF, r);
         break;
      case INDIRB:
         /*print("moval (r%d),r%d\n", a->x.reg+ROFF, r);*/
         if (Tflag)
            ccountload( 1, a->x.reg+ROFF );  /*990122*/
         force_modulo_0;                                       /*CWK*/
         print("ldgn\t r%d,0,r%d\t /*INDIRB*/\n", a->x.reg+ROFF, r);/*CWK*/
         break;
      case INDIRC: case INDIRD: case INDIRF: case INDIRI:
      case INDIRP: case INDIRS:
         /*print("mov%c (r%d),r%d\n", suffix(p), a->x.reg, r);*/
         if (Tflag)
            ccountload( 1, a->x.reg+ROFF );  /*990122*/
         force_modulo_0;
         print("ldgn\t r%d,0,r%d\t /*INDIRx*/\n", a->x.reg+ROFF, r);/*CWK*/
         break;
      case BCOMU:  /* unary("mcom" );  break; CWK: gibts nur bei VAX*/
         print("getlo\t -1,r31\n");  /* r31 is scratch reg.; gen. ffffffff */
         print("xor\t r%d,r31,r%d\n", a->x.reg+ROFF, r); 
         break;
      case NEGD:  case NEGF:
         print("fsub\t r%d,R0,r%d\t /*NEGF*/\n", a->x.reg+ROFF, r);/*CWK*/
         /*unary("mneg" );*/
         break;
      case NEGI:
         print("sub\t r%d,0,r%d\t /*NEGI*/\n", a->x.reg+ROFF, r);/*CWK*/
         break;
      /* PRAM: D=F und C=S=U=I=P  CWK*/
      case CVDI:  unary("ftoi" );  break;  /*CWK, vormals cvtd*/
      case CVID:  unary("itof");  break;/* vormals cvtl*/
      case CVCI:  /*CWK, vormals cvtb*/
      case CVCU:  /*CWK, vormals movzb*/
      case CVSI:  /*CWK, vormals cvtw*/
      case CVSU:  /*CWK, vormals movzw*/
      case CVDF:  /*CWK, vormals cvtd*/
      case CVFD: 
      case CVUC:  case CVUS:  /*CWK, vormals cvtl*/
      case CVIC:  case CVIS:  /*"*/
      case CVIU:  case CVUI:  /*vormals mov*/
      case CVPU:  case CVUP:                         /*vormals mov*/
           if (a->x.reg+ROFF != r)     /* vermeide mov ri,ri */
              unary("mov");
           break;    /*CWK*/
      case RETD: case RETF: case RETI:
         /*print("mov%c r%d,r0; ret\n", suffix(p), a->x.reg);*/
         retflush(p);    if (!cfunc->sync) assert(p->rframes==0);
         Epilog(cfunc->sync);                            /*endet auf mo==0*/
         print("mov\t r%d,Ret\t\t /*RETx*/\n", a->x.reg+ROFF);     /*CWK,1*/
         if (glevel) print("ibp10\n");     /*950504*/
         print("return\nnop\n");                                   /*CWK,0/1*/
         break;
      case SRETD: case SRETF: case SRETI:                     /*CWK*/
         retflush(p);    assert(cfunc->sync);
         Epilog(1);     /*endet auf mo==?*/
         print("bsrg\t spp,forklib_sync\t /*SRETx: synchr. Blattgr*/\n");
         if (Tflag) countbarrier(1); /*990119*/
         print("mov\t r%d,Ret\t\t /*SRETx*/\n", a->x.reg+ROFF);   /*0*/
#if 0
 testweise 990119:
 dies ist eigentlich ueberfluessig, weil der Rueckgabewert
 per Sprachdefinition privat ist. Der Programmierer ist selbst dafuer
 verantwortlich, in solchen Faellen erst eine solche Zuweisung an eine
 shared Variable zu programmieren.
         print("stg\t Ret,fps,0\t /*write in SM loc*/\n");    /*CWK,1*/
         print("ldgn\t fps,0,Ret\t /*fetch common Ret*/\n");  /*CWK,0*/
#endif
         /*modulo_nop; zum returnen braucht man nicht modulieren*/
         if (glevel) print("nop\nibp10\n");     /*950504*/
         print("return\nnop\n");
         break;
      case RETV:
         retflush(p);
         Epilog(cfunc->sync);     /*endet auf mo==?*/
         /*modulo_nop;*/
         if (glevel) print("nop\nibp10\n");     /*950504*/
         print("return\t\t\t /*RETV*/\nnop\n");  /* vormals ret CWK, 0/1 */
         break;
      case ADDRGP: /* vorher Basisregister setzen! */ /*CWK*/
         print("gethi\t %s,r%d \t /*ADDRGP*/\n", p->syms[0]->x.name, r);
         print("add\t r%d,(%s)&0x1fff,r%d  /*ADDRGP*/\n", r, p->syms[0]->x.name, r);
         break;
      case ADDRLP:
         /*print("moval %s,r%d\n", p->syms[0]->x.name, r);*/
         print("add\t %s,r%d\t /*ADDRLP*/\n", p->syms[0]->x.name, r);
#if 0
         if (isarray(p->syms[0]->u.c.loc->type)) {
           print("getlo\t %s,r31\n", p->syms[0]->u.c.loc->type->size );
           print("sub\t r31,r%d,r%d /*ADDRLP-Korrektur*/\n", r,r); /*950320*/
         }
#endif
         break;
      case ADDRFP:
         print("add\t %s,r%d\t /*ADDRFP*/\n", p->syms[0]->x.name, r);  /*CWK*/
         break; /*weil der lcc fuer Parameter trotzdem Indirektionen einfuegt */
      case CNSTC:
         if (!(p->syms[0]->u.c.v.sc >>19))
            print("getlo\t %s,r%d    \t /*CNSTC*/\n", p->syms[0]->x.name, r);
         else {
            print("gethi\t %s,r%d    \t /*CNSTC*/\n", p->syms[0]->x.name, r);
            print("add\t r%d,(%s)&0x1fff,r%d  /*dto*/\n", r, p->syms[0]->x.name, r);
         }
         break;
      case CNSTP:
         if (!(p->syms[0]->u.c.v.i >>19))     /* da .p nicht akzeptiert wird*/
            print("getlo\t %s,r%d    \t /*CNSTP*/\n", p->syms[0]->x.name, r);
         else {
            print("gethi\t %s,r%d    \t /*CNSTP*/\n", p->syms[0]->x.name, r);
            print("add\t r%d,(%s)&0x1fff,r%d  /*dto*/\n", r, p->syms[0]->x.name, r);
         }
         break;
      case CNSTS:
         if (!(p->syms[0]->u.c.v.ss >>19))
            print("getlo\t %s,r%d    \t /*CNSTS*/\n", p->syms[0]->x.name, r);
         else {
            print("gethi\t %s,r%d    \t /*CNSTS*/\n", p->syms[0]->x.name, r);
            print("add\t r%d,(%s)&0x1fff,r%d  /*dto*/\n", r, p->syms[0]->x.name, r);
         }
         break;
      case CNSTU:
         if (!(p->syms[0]->u.c.v.u >>19))
            print("getlo\t %s,r%d    \t /*CNSTU*/\n", p->syms[0]->x.name, r);
         else {
            print("gethi\t %s,r%d    \t /*CNSTU*/\n", p->syms[0]->x.name, r);
            print("add\t r%d,(%s)&0x1fff,r%d  /*dto*/\n", r, p->syms[0]->x.name, r);
         }
         break;
      case CNSTI:
         if (!(p->syms[0]->u.c.v.i >>19))
            print("getlo\t %s,r%d    \t /*CNSTI*/\n", p->syms[0]->x.name, r);
         else {
            print("gethi\t %s,r%d    \t /*CNSTI*/\n", p->syms[0]->x.name, r);
            print("add\t r%d,(%s)&0x1fff,r%d  /*dto*/\n", r, p->syms[0]->x.name, r);
         }
         break;
      case JUMPV:
         /*print("jmp (r%d)\n", a->x.reg);*/
         print("jra\t r%d,0     \t /*JUMPV*/\n", a->x.reg+ROFF);/*CWK*/
         break;
      case ASGNB:
         /*ggf. save(p->x.busy&...);*/
         /*print("movc3 $%s,(r%d),(r%d)\n", p->syms[0]->x.name,b->x.reg,a->x.reg);*/
         print("gethi\t %s,par3 \t /*ASGNB*/\n", p->syms[0]->x.name);
         print("add\t par3,(%s)&0x1fff,par3 /*ASGNB:size*/\n", p->syms[0]->x.name);
         print("mov\t r%d,par1\t /*ASGNB:src*/\n", b->x.reg+ROFF);
         print("mov\t r%d,par2\t /*ASGNB:dest*/\n", a->x.reg+ROFF);
         if (Tflag)
           if (! a->maybe_priv) {  /*990119*/
             /* ^ nicht ganz korrekt, aber so kommts ungefaehr hin. */
             print("gethi\t __shldgcnt,r30 \t /*sh.load-stat*/\n");
             print("add\t r30,__shldgcnt&0x1fff,r30 /*sh.load-stat*/\n");
             print("syncadd\t par3,r30,0 \t /*sh.load-stat*/\n");
             print("gethi\t __shstgcnt,r30 \t /*sh.store-stat*/\n");
             print("add\t r30,__shstgcnt&0x1fff,r30 /*sh.store-stat*/\n");
             print("syncadd\t par3,r30,0 \t /*sh.store-stat*/\n");
           }
         /*ggf. restore(p->x.busy&...);*/
         force_modulo_1;
         print("bsrg\t spp,forklib_movb\t /*ASGNB*/\n");    /*1*/
         break;
      case ASGNC: case ASGND: case ASGNF: case ASGNI: case ASGNP: case ASGNS:
         /*print("mov%c r%d,(r%d)\n", suffix(p), b->x.reg, a->x.reg);*/
#ifdef ADDRREG_FUNKTIONIERT
         if (a->syms[0] && a->syms[0]->parreg)
           print("mov\t r%d,par%d\t /*ASGNx(parreg)*/\n",
                        b->x.reg+ROFF, a->syms[0]->parreg );
         else
#endif
           if (Tflag)
              ccountstore(1, a->x.reg+ROFF );  /*990122*/
           force_modulo_1;
           print("stg\t r%d,r%d,0\t /*ASGNx*/\n",
                        b->x.reg+ROFF, a->x.reg+ROFF); /*CWK*//*1*/
         break;
      /* case ARGB kann nicht vorkommen! NOARGB eingeschaltet CWK */
      case ARGB: assert(0);
         /*save(p->x.busy&0x3f);*/
         print("movc3 $%s,(r%d),%d(sp)   ARGB  \n", p->syms[0]->x.name,
            a->x.reg+ROFF, p->x.argoffset);
         /*restore(p->x.busy&0x3f);*/
         break;
      case ARGD: case ARGF: case ARGI: case ARGP:
         /*force_modulo_1; auskommentiert, weil auf privatem stack*/
         if (pargnr==0) { /* erstes Argument: rette alten app */
            print("pshg\t app,spp\t /*rette app*/\n");         /*CWK*/ /*1*/
            /* neuer app wird unmittelbar vorm CALL gesetzt, damit man auf den
             * alten app fuer die Argumentberechnungen noch zugreifen kann CWK */
            /*modulo_nop; s.o. */
         }
         /*print("mov%c r%d,%d(sp)\n", suffix(p),*/
         print("pshg\t r%d,spp\t\t /*ARGx Nr. %d*/\n",         /*CWK*/
            a->x.reg+ROFF, /*p->x.argoffset,*/ ++pargnr);
         if (Kflag)
           print("bsrg\t spp,forklib_pstacktest /*ARGx*/\n");         /*CWK*/
         break;
      /* case RARGB kann nicht vorkommen! NOARGB eingeschaltet   CWK*/
      case RARGD: case RARGF: case RARGI: case RARGP:          /*CWK*/
         parreg = firstfreeparreg();   /* liefert 1..4 */
         print("mov\t r%d,par%d\t /*RARGx*/\n", a->x.reg+ROFF, parreg );
         break;
      /* case SARGB kann nicht vorkommen! NOARGB eingeschaltet   CWK*/
      case SARGD: case SARGF: case SARGI: case SARGP:          /*CWK*/
         /*force_modulo_1; HS*/
         if (sargnr==0) { /* erstes Argument: rette alten aps */
            print("pshg\t aps,sps\t /*rette aps*/\n");         /*CWK*/  /*1*/
            /* neuer aps wird unmittelbar vorm CALL gesetzt, damit man auf den
             * alten aps fuer die Argumentberechnungen noch zugreifen kann CWK */
            /*modulo_nop;HS*/
            if (Tflag) countstore(1);  /*990119*/
         }
         print("pshg\t r%d,sps \t /*SARGx Nr. %d*/\n", a->x.reg+ROFF, ++sargnr);/*1*/
         if (Tflag) countstore(1);  /*990119*/
         /*modulo_nop;*/
         if (Kflag)
           print("bsrg\t spp,forklib_sstacktest /*SARGx: overflow?*/\n");
         break;
      case CALLB:   /*961104 bisher assert(0);     /*CWK: unverarzteter Fall */
         error("Function returning a struct would lead to inefficient code.\n\
                Please pass the result via a pointer to work around.\n");
         break;
         /*Der Rest funktioniert noch nicht. CWK 961104*/
         freeallparregs();  /*CWK*/
         if (sargnr)              /*aps neu setzen (schon vom ersten SARG gerettet*/
            print("add\t sps,-%d,aps\t /*neuer aps*/\n", sargnr+1 );   /*CWK*/  /*0*/
         if (pargnr)              /*aps neu setzen (schon vom ersten SARG gerettet*/
            print("add\t spp,-%d,app\t /*neuer app*/\n", pargnr+1 );   /*CWK*/  /*0*/
         save(p->x.busy&0xfff);/*bis zu 12 Register retten*/
         /*save(p->x.busy&0x3e);*/
         print("jsrg\t spp,r%d,0\t /*CALLB*/\n", a->x.reg+ROFF);
#if 0
         if (a->x.reg == 1) {                 /*????????CWK????????*/
            print("movl r1,r0 /*CALLB*/\n");
            a->x.reg = 0;
         }
         if (b->x.reg != 1)
            print("movl r%d,r1 /*CALLB*/\n", b->x.reg);
         print("calls $0,(r%d) /*CALLB*/\n", a->x.reg);
         /*restore(p->x.busy&0x3e);*/
#endif
         if (p->synchron) {
            print("bsrg\t spp,forklib_sync /*CALLx: synchro*/\n");
            if (Tflag) countbarrier(1); /*990119*/
         }
         /*RET+B hinterlaesst Pointer auf Ergebnis in Ret: */
         print("mov\t Ret,r%d\t /*CALLB: ptr to result*/\n", r );
         restore(p->x.busy&0xfff);
         if (pargnr >0) {  /*CWK*/
            print("mov\t app,spp\t /*alten spp setzen*/\n");
            print("ldgn\t spp,0,app\t /*alten app holen*/\n");
         }
         sargnr = pargnr = 0;        /*reset. CWK*/
         break;
      case CALLD: case CALLF: case CALLI: case CALLV:
         freeallparregs();  /*CWK*/
         if (sargnr)              /*aps neu setzen (schon vom ersten SARG gerettet)*/
            print("add\t sps,-%d,aps\t /*neuer aps*/\n", sargnr+1 );   /*CWK*/
         if (pargnr)              /*aps neu setzen (schon vom ersten ARG gerettet)*/
            print("add\t spp,-%d,app\t /*neuer app*/\n", pargnr+1 );   /*CWK*/
         save(p->x.busy&0xfff);/*bis zu 12 Register retten*/
         /*print("calls $0,(r%d) \n", a->x.reg);*/
         /*force_modulo_1; nur priv.Stackoperationen*/
         print("jsrg\t spp,r%d,0\t /*CALLx*/\n", a->x.reg+ROFF); /*1*/
         if (p->synchron) {
            /*modulo_nop;*/
            print("bsrg\t spp,forklib_sync /*CALLx: synchro*/\n"); /*1*/
            if (Tflag) countbarrier(1); /*990119*/
         }
         if (p->op != CALLV)
            /*print("mov%c r0,r%d\n", suffix(p), r);*/
            print("mov\t Ret,r%d\n", r);
         restore(p->x.busy&0xfff);
         if (pargnr >0) {  /*CWK*/
            print("mov\t app,spp\t /*alten spp setzen*/\n");
            /*force_modulo_0;*/
            print("ldgn\t spp,0,app\t /*alten app holen*/\n");      /*0*/
         }
         if (sargnr >0) {  /*CWK*/
            print("mov\t aps,sps\t /*alten sps setzen*/\n");
            /*if (!pargnr)*/
            force_modulo_0;
            print("ldgn\t sps,0,aps\t /*alten aps holen*/\n");    /*0*/
            if (Tflag) countload( 1 );  /*990119*/
         }
         sargnr = pargnr = 0;        /*reset. CWK*/
         break;
      case EQD:   case EQF:   fcompare("eq"); break; /*feql CWK 951128*/
      case EQI:   compare("eq" ); break; /*eql*/
      case GED:   case GEF:   fcompare("oge" ); break; /*overflow or fgeq CWK 951128*/
      case GEI:   compare("ge" ); break; /*geq*/
      case GEU:   compare("ge"); break; /*gequ*/
      case GTD:   case GTF:   fcompare("ogt" ); break; /*overflow or fgtr CWK 951128*/
      case GTI:   compare("gt" ); break; /*gtr*/
      case GTU:   compare("gt"); break; /*gtru*/
      case LED:   case LEF:   fcompare("ule" ); break; /*underflow or fleq CWK 951128*/
      case LEI:   compare("le" ); break; /*leq*/
      case LEU:   compare("le"); break; /*lequ*/
      case LTD:   case LTF:   fcompare("ult" ); break; /*flss CWK 951128*/
      case LTI:   compare("lt" ); break; /*lss*/
      case LTU:   compare("lt"); break; /*lssu*/
      case NED:   case NEF:   fcompare("ne" ); break; /*fneq CWK 951128*/
      case NEI:   compare("ne" ); break; /*neq*/
      case LABELV:
         print("%s:\n", p->syms[0]->x.name);
         break;
      case ADDRREGB: case ADDRREGF: case ADDRREGD: case ADDRREGP:
      case ADDRREGI:        assert(0);   /*CWK: vorerst nicht benutzen!*/
         print("MOV par%d,r%d /*ADDRREGx %s r%d*/\n", p->syms[0]->parreg, r,
                p->syms[0]->x.name, r);
         break;
      case ADR_PROZ_NUMP:                                   /*CWK*/
         print("add\t gpp,2,r%d\t /*load addr of $*/\n", r);
         break;
      case ADR_GRP_NUMP:                                    /*CWK*/
         print("add\t gpp,1,r%d\t /*load addr of @*/\n", r);
         break;
      case ADDRSGP:
         /*print("moval %s,r%d; SHARED\n", p->syms[0]->x.name, r);*/
         print("gethi\t %s,r%d   \t /*ADDRSGP*/\n", p->syms[0]->x.name, r); /*CWK*/
         print("add\t r%d,(%s)&0x1fff,r%d  /*dto*/\n", r, p->syms[0]->x.name, r);
         break;
      case ADDRSLP:       /*CWK*/
#if 0
         print("add\t %s,r%d\t /*ADDRSLP*/\n", p->syms[0]->x.name, r);/*vorher*/
#endif
         /* 970903*/
         if (p->syms[0]->x.framedepth == 0)  /* access at top level of fn. */
           if (!cfunc->sync)  /*980129: start bastelt einen fake-fps!*/
             print("add\t fps,%d,r%d\t /*ADDRSLP a*/\n", 1+p->syms[0]->x.offset, r);
           else
             print("add\t fps,%d,r%d\t /*ADDRSLP b*/\n", 1+p->syms[0]->x.offset, r);
               /*vorher 2 statt 1 ^ da Fn-Grfr. dazw., jetzt retval weg */
         else   /*framedepth > 0:*/
         if (p->shframedepth - p->syms[0]->x.framedepth >= 1) {  /*new 970902*/
           int j;
           print("ldg gps,0,r31  /*ADDRSLP, nested(%d) [%d]*/\n nop\n",
             p->shframedepth - p->syms[0]->x.framedepth,
                               p->syms[0]->x.framedepth ); 
           for ( j=p->shframedepth - p->syms[0]->x.framedepth - 1; j>0; j--)
             print("ldg r31,0,r31  /*ADDRSLP, nested(%d)*/\n nop\n", j);
           print("add\t r31,%d,r%d\t /*ADDRSLP c*/\n", 1 + p->syms[0]->x.offset, r);
         }                                        /* ^ = gps alt + sync cell (spaeter + @) */
         else  /*declared by same group:*/
           print("add\t gps,%d,r%d\t /*ADDRSLP d*/\n", 1 + p->syms[0]->x.offset, r);
         break;
      case ADDRSFP:
         print("add\t %s,r%d\t /*ADDRSFP*/\n", p->syms[0]->x.name, r);  /*CWK*/
         break; /*weil der lcc fuer Parameter trotzdem Indirektionen einfuegt*/

      case SYNCV:                /*CWK*/
         /*falls Moduloausgleich erforderlich, hier einfuegen. Start mit mo==0*/
         /*assert( p->synchron==1 );  /*cfunc->synchron && rausgeworfen 950324*/
         /*force_modulo_1; da sync selbst moduliert*/
         print("bsrg\t spp,forklib_sync\t /*SYNCV*/\n");   /**/
         if (Tflag) countbarrier(1); /*990119*/
         break;

     case SHSPACEV:                       /*970902*/
         /* allocate space for shared block-local variables */
         if (Pcodepar > 0)
            print("add\t sps,%d,sps\t /*alloc. stack space for sh locals*/\n",
                   Pcodepar );
         break;

      case SPLITV:                /*CWK: berechne Trennzelle im SM*/
         force_modulo_0;
         print("sub\t sps,eps,Ret\t /*split*/\n");
         print("lsr\t Ret,1,Ret\t /*split*/\n");
         print("add\t Ret,sps,Ret\t /*split: Trennzelle*/\n");
         print("pshg\t Ret,spp\t /*split: zwischensp.*/\n");                /*1*/
         break;

      case NEXTCASEV:                /*CWK, dummy*/
         /* Die Adresse der Trennzelle liegt oben auf dem pStack. */
         /* Pcodepar==0 -> then,  ==1 -> else. */
         if (glevel) /*950504*/
            if (Pcodepar==0)   print("ibp1_true\n");
            else               print("ibp1_false\n");
         force_modulo_0;
         print("popg\t spp,Ret\t /*nextcase%d*/\n", Pcodepar);            /*0*/
         /* spp-> |          |                 lege privaten Gruppenframe an:
          *       |$         |
          *       |@         |
          * gpp-> |gpp alt   |
          *       |Trennzelle|(bei if;   bei fork: shared Stackgroesse der Grp.)
          *       |# Gruppen |(bei fork;   bei if: undefined, hier 0)
          *       |sps alt   |
          *       |eps alt   |
          *       +----------+*/ 
         print("pshg\t eps,spp\t /*nextcase%d*/\n", Pcodepar);             /*1*/
         /*modulo_nop;   */                                                  /*0*/
         print("pshg\t sps,spp\t /*nextcase%d*/\n", Pcodepar);             /*1*/
         if (Pcodepar == 0)  /*then:  setze neuen eps*/
            print("add\t Ret,-1,eps\t /*nextcase%d*/\n", Pcodepar);        /*0*/
         else                /*else:  setze neuen sps*/
            print("mov\t Ret,sps\t /*nextcase%d*/\n", Pcodepar);           /*"*/
         print("pshgc\t spp    \t /*nextcase%d*/\n", Pcodepar);  /*undefined,1*/
         /*modulo_nop;  */                                                   /*0*/
         print("pshg\t Ret,spp\t /*nextcase%d*/\n", Pcodepar); /*Trennzelle, 1*/
         /*modulo_nop; */                                                    /*0*/
         /* lege shared Gruppenframe an: */
         force_modulo_1;
         print("pshg\t gps,sps\t /*nextcase%d*/\n", Pcodepar);  /*rette alten gps,1*/
         print("add\t sps,-1,gps\t /*nextcase%d*/\n", Pcodepar); /*neuer gps,0*/
         print("pshgc\t sps     \t /*nextcase%d*/\n", Pcodepar); /*init Synczelle,1*/
         print("add\t R0,1,par1\t /*nextcase%d*/\n", Pcodepar); /*'',        0*/
         print("mpadd\t gps,1,par1\t /*nextcase%d*/\n", Pcodepar);         /*1*/
         if (Kflag)
          print("bsrg\t spp,forklib_sstacktest /*nextcase%d*/\n", Pcodepar); /*0*/
         /* lege Rest des privaten Gruppenframe an: */
         print("ldg\t gpp,1,par1\t /*nextcase%d: @old*/\n", Pcodepar);
         print("pshg\t gpp,spp\t /*nextcase%d*/\n", Pcodepar); /*alter gpp*/
         print("ldg\t gpp,2,Ret\t /*nextcase%d: $old*/\n", Pcodepar);
         print("pshg\t par1,spp\t /*nextcase%d: copy@*/\n", Pcodepar);
         print("add\t spp,-2,gpp\t /*nextcase%d*/\n", Pcodepar);
         print("pshg\t Ret,spp\t /*nextcase%d: copy$*/\n", Pcodepar);
         if (Kflag)
          print("bsrg\t spp,forklib_pstacktest /*nextcase%d*/\n", Pcodepar);
         break;

      case LGRPIFV:                /*CWK*/
         force_modulo_0;
         print("ldg\t gps,0,gps\t /*lgrpif: alter gps*/\n");               /*0*/
         /*modulo_nop;*/
         print("ldg\t gpp,-3,sps\t /*lgrpif: free frame*/\n");             /*1*/
         /*vormals add gps,2,sps, kann zu Konflikten mit ProzFrames fuehren*/
         /*Wegen Splittung des SM muss der alte sps vom priv.Frame restored werden*/
         /*modulo_nop;*/
         print("ldg\t gpp,-4,eps\t /*lgrpif: alter eps*/\n");              /*0*/
#if 0
         ersetzt950814print("add\t spp,-7,spp\t /*lgrpif: free frame*/\n");/*1*/
#endif
         print("add\t gpp,-4,spp\t /*lgrpif: free frame*/\n");             /*1*/
         print("ldgn\t gpp,0,gpp\t /*lgrpif: alter gpp*/\n");              /*0*/
         /*vormals add gpp,3,spp, kann zu Konflikten mit ProzFrames fuehren*/
         if (glevel)    /*950504*/
            print("ibp4\n");
#ifdef DEBUG_LGRPIFV
          print("sub\t aps,sps,pc\t /*lgrpif-Konsistenztest*/\n");
          print("ble\t 5         \t /*Konsistenztest*/\n");
          print("sub\t fps,sps,pc\t /*lgrpif-Konsistenztest*/\n");
          print("ble\t 3         \t /*Konsistenztest*/\n");
          print("sub\t gps,sps,pc\t /*lgrpif-Konsistenztest*/\n");
          print("bgt\t 4         \t /*Konsistenztest*/\n");
          /*Fehlermeldung SHARED STACK OVERFLOW ausloesen: */
          print("add\t sps,-1,eps \t /*Konsistenztest*/\n");
          if (Kflag)
           print("bsrg\t spp,forklib_sstacktest /*Konsistenztest*/\n");
          print("sub\t app,spp,pc\t /*lgrpif-Konsistenztest*/\n");
          print("ble\t 5         \t /*Konsistenztest*/\n");
          print("sub\t fpp,spp,pc\t /*lgrpif-Konsistenztest*/\n");
          print("ble\t 3         \t /*Konsistenztest*/\n");
          print("sub\t gpp,spp,pc\t /*lgrpif-Konsistenztest*/\n");
          print("bgt\t 4         \t /*Konsistenztest*/\n");
          /*Fehlermeldung PRIVATE STACK OVERFLOW ausloesen: */
          print("add\t spp,-1,epp \t /*Konsistenztest*/\n");
          if (Kflag)
           print("bsrg\t spp,forklib_pstacktest /*Konsistenztest*/\n");
#endif
         break;

      case MKGRPV:                /*CWK, a->x.reg+ROFF enthaelt Gruppengroesse */
         /* lege neuen privaten Gruppenframe an: */
         /*force_modulo_1; nur private Stack-Zugriffe*/
         print("pshg\t eps,spp\t /*mkgrp: save eps*/\n");                  /*1*/
         print("sub\t sps,eps,par1\t /*mkgrp: freies SM*/\n");             /*0*/
         print("pshg\t sps,spp  \t /*mkgrp: save sps*/\n");                /*1*/
         /* ...berechne Aufteilung des SM:*/
         print("mov\t r%d,par2\t /*mkgrp: divide*/\n", a->x.reg+ROFF );    /*0*/
         print("bsrg\t spp,forklib_divi\t /*mkgrp: am.grps*/\n");          /*1*/
         /* ...Division durch 0 wird divi schon merken. Ergebnis in Ret:*/
         /*modulo_nop;*/
         print("pshg\t r%d,spp\t /*mkgrp: #groups*/\n", a->x.reg+ROFF );
                                                          /*fuer engrp */  /*1*/
         /*modulo_nop;*/
         print("pshg\t Ret,spp\t /*mkgrp: new size*/\n"); /*fuer engrp */  /*1*/
         print("ldg\t gpp,1,par1\t /*mkgrp: old @*/\n");                   /*0*/
         print("pshg\t gpp,spp\t /*mkgrp: rette gpp*/\n");                 /*1*/
         print("ldg\t gpp,2,Ret\t /*mkgrp: old $*/\n");                    /*0*/
         print("pshg\t par1,spp\t /*mkgrp: copy @*/\n");                   /*1*/
         print("add\t spp,-2,gpp\t /*mkgrp: neuer gpp*/\n");               /*0*/
         print("pshg\t Ret,spp\t /*mkgrp: copy $*/\n");                    /*1*/
         if (glevel)    /*950504*/
            print("ibp2\n");
         if (Kflag)
          print("bsrg\t spp,forklib_pstacktest /*mkgrp*/\n");
         /* ...damit anschliessend auf neue Adressen und alte Werte
          *    von $ und @ zugegr. wird*/
         break;

      case ENGRPV:                /*CWK*/
         /*teile shared stack unter neuen Gruppen gleichmaessig auf: */
         /*force_modulo_0; vorerst nur private Zugriffe*/
         /* erst synchronisieren: DIVI/MODI kann zur Asynchronitaet fuehren! */
         print("bsrg\t spp,forklib_sync /*engrp*/\n");    /*CWK950324*/
         if (Tflag) countbarrier(1); /*990119*/
         print("ldg\t gpp,1,par1\t /*engrp: new @*/\n");            /*0*/
         print("ldg\t gpp,-2,par2\t /*engrp: # groups*/\n");        /*0*/
         print("nop\t            \t /*engrp: delay*/\n");           /*1*/
         print("sub\t par1,par2,pc\t /*engrp:*/\n");               /*0*/
         print("ble\t forklib_printwronggroupnr /*@>=#gr: abort*/\n");      /*1*/
         print("mov\t par1,pc\t /*engrp*/\n");      /*0*/
         print("blt\t forklib_printwronggroupnr /*neg. @: abort*/\n");      /*1*/
         print("ldg\t gpp,-1,r30\t /*engrp: new size*/\n");        /*0*/
         print("nop\t           \t /*engrp: delay*/\n");           /*1*/
         print("mul\t par1,r30,Ret\t /*engrp: new offset=@*size*/\n");     /*0*/
         print("add\t sps,Ret,sps\t /*engrp: new sps*/\n");         /*1*/
         print("add\t sps,r30,eps\t /*engrp: compute*/\n");         /*0*/
         print("add\t eps,-1,eps\t /*engrp: new eps*/\n");          /*1*/
         /*shared Gruppenrahmen anlegen:*/
         /*modulo_nop;  */                                            /*0*/
         force_modulo_1;
         print("pshg\t gps,sps\t /*engrp: save gps*/\n");           /*1*/
#ifndef SELTSAM
         print("ldg\t gps,1,par1\n");  /*950323: alte Prozessorzahl*/
         modulo_nop;                  /*950323*/
#endif
         print("add\t sps,-1,gps\t /*engrp: new gps*/\n");             /*0*/
#ifndef SELTSAM
         print("pshgc\t sps    \t /*engrp: init synccell*/\n");       /*1*/
         /*Mist!! das klappt nicht fuer Nichtzweierpotenzen bei Vollmond*/
         print("getlo\t 1,Ret  \t /*engrp*/\n");                      /*0*/
         print("mpadd\t gps,1,Ret\t /*engrp: set synccell*/\n");    /*1*/
#else
         /* par2 enth. noch #groups */
         print("bsrg\t spp,forklib_divi\n");         /*950323*/
         modulo_nop;
         print("pshg\t Ret,sps\n");  /* Synccell*/   /*950323 end*/
#endif
         /*modulo_nop; bsrg spp braucht nicht moduliert zu werden */
         if (Kflag)
          print("bsrg\t spp,forklib_sstacktest /*engrp: overflow?*/\n"); /*1*/
         /*privater Gruppenrahmen wurde schon von mkgrp und @=,$=... angelegt */
         if (Tflag) {
            countload( 1 );   /*990122*/
            countstore( 2 );   /*990122*/
            countmpadd( 1 );   /*990122*/
         }
         break;

      case EXGRPV:                /*CWK*/
         force_modulo_0;
         print("ldg\t gps,0,gps\t /*exgrp: old gps*/\n");         /*0*/
#if 0
         ausk. 950815 CWK: Modulierung nur fuer 1. Befehl erforderlich
         print("mov\t gps,sps\t /*exgrp: free frame*/\n");        /*1*/
#endif

         print("ldg\t gpp,0,gpp\t /*exgrp: old gpp*/\n");
         print("add\t gpp,-2,spp\t /*exgrp: free frame*/\n");
         print("popgn\t spp,sps\t /*exgrp: old sps*/\n");
         print("popgn\t spp,eps\t /*exgrp: old eps*/\n");
         if (glevel) {   /*950504*/
            print("nop\n");  /*delay*/
            print("ibp4\n");
         }
#ifdef DEBUG_EXGRPV
          print("sub\t aps,sps,pc\t /*exgrp-Konsistenztest*/\n");
          print("ble\t 5         \t /*Konsistenztest*/\n");
          print("sub\t fps,sps,pc\t /*Konsistenztest*/\n");
          print("ble\t 3         \t /*Konsistenztest*/\n");
          print("sub\t gps,sps,pc\t /*Konsistenztest*/\n");
          print("bgt\t 4         \t /*Konsistenztest*/\n");
          /*Fehlermeldung SHARED STACK OVERFLOW ausloesen: */
          print("add\t sps,-1,eps \t /*Konsistenztest*/\n");
          print("bsrg\t spp,forklib_sstacktest /*Konsistenztest*/\n");
          print("sub\t app,spp,pc\t /*Konsistenztest*/\n");
          print("ble\t 5         \t /*Konsistenztest*/\n");
          print("sub\t fpp,spp,pc\t /*Konsistenztest*/\n");
          print("ble\t 3         \t /*Konsistenztest*/\n");
          print("sub\t gpp,spp,pc\t /*Konsistenztest*/\n");
          print("bgt\t 4         \t /*Konsistenztest*/\n");
          /*Fehlermeldung PRIVATE STACK OVERFLOW ausloesen: */
          print("add\t spp,-1,epp \t /*Konsistenztest*/\n");
          print("bsrg\t spp,forklib_pstacktest /*generate errmsg*/\n");
#endif
         break;

      case SPROCV:                /*CWK*/
         /* Frueher: Falls ein Ausdruck start(e) spezifiziert wurde, enthaelt */
         /* Register a->x.reg die Anzahl der zu startenden Prozessoren.*/
         /* Ansonsten (a==NULL) wird als default STARTED_PROCS genommen*/
         /* Test, ob die Anzahl der gestarteten Pr. ueberschritten wuerde: */
         if (a) {
          print("bsrg\t spp,forklib_sync /*sproc: synchronize required procs*/\n");
          if (Tflag) countbarrier(1); /*990119*/
          if (glevel)  {  /*950504*/
             print("pshg r%d,spp\n", a->x.reg+ROFF);
             print("nop\n");      /*delay slot*/
             print("ibp3\n");
             print("add spp,-1,spp\n");
          }
          print("getlo\t %d,par1 \t /*sproc: started procs*/\n", STARTED_PROCS);
          print("sub\t r%d,par1,pc\t /*sproc*/\n", a->x.reg+ROFF);
          label1 = mylabel();
          print("blt\t ERROR%d \t /*sproc: Error?*/\n",label1);
          /* Nun rechnet jeder Prozessor $orig aus, ob er sein SHADOW bit */
          /* setzen muss:  ($orig steht in ___PROC_NR__): */
          print("gethi\t ___PROC_NR__,par2 /*sproc: $orig.*/\n");
          print("add\t par2,___PROC_NR__&0x1fff,par2 /*sproc*/\n");
          force_modulo_0;
          print("ldg\t par2,0,par2\t /*sproc: $orig.*/\n");            /*0*/
          print("nop\t            \t /*delay slot*/\n");
          print("sub\t r%d,par2,pc\t /*sproc*/\n", a->x.reg+ROFF);     /*0*/
          print("blt\t CONT%d_0  \t /*sproc*/\n", label1); /*statt bge 950413 *1*/
          /*hier muss jetzt der Sprung nach XPROC eingebaut werden!siehe gen.c.neu*/
          print("getsr\t Ret     \t /*sproc*/\n");                     /*0*/
#if 0
          /* Hier wird das SHADOW-Bit geloescht fuer die neuaktiven Proz. */
          print("and\t Ret,~(SHADOW),Ret  /*sproc*/\n");
#else
          /* Hier wird das SHADOW-Bit gesetzt fuer die zu desaktivierenden Proz. */
          print("or\t Ret,SHADOW,Ret  /*sproc*/\n");      /*950413*/
#endif
          print("putsr\t Ret,Ret \t /*sproc*/\n");                     /*0*/

          print("bra\t CONT%d_1  \t /*sproc*/\n", label1);             /*1*/
          print("ERROR%d:\n", label1);
          print("getsr\t Ret     \t /*sproc: Error*/\n");              /*0*/
          print("and\t Ret,SHADOW,Ret\t /*sproc*/\n");
          print("bne\t 8      \t /*sproc: skip msg if $!=0*/\n");      /*0*/
          print("gethi\t forklib_sprocsexceeded,par2 /*sproc*/\n");    /*1*/
          print("add\t par2,forklib_sprocsexceeded&0x1fff,par2 /*sproc*/\n");
          print("getlo\t 2,par1  \t /*sproc: stderr*/\n");
          print("getlo\t 40,par3  \t /*sproc: msg length*/\n");
          print("getlo\t SYSCALL_WRITE,Ret  \t /*sproc: write*/\n");
          modulo_nop;
          print("sysc\t          \t /*sproc: write message*/\n");      /*1*/
          print("bsrg\t spp,forklib_sync\n");
          if (Tflag) countbarrier(1); /*990119*/
          print("getlo\t 1,Ret    \t /*sproc: error*/\n");
          print("jra\t _exit \t /*sproc: abort prg*/\n");       /*1*/
          print("CONT%d_0:\n", label1);                             /*0*/
          /*die Prozessoren mit $orig>= der angeforderten Anzahl warten
           *5 Schritte, bis ihre Kollegen ihr shadowbit geloescht haben:*/
          print("nop\t /*wait until shadow has been set*/\n");
          print("getlo\t 0,par2  \t /*sproc:shadow processors numbered 0*/\n");
          print("nop\nnop\n");
          print("CONT%d_1:\n", label1);                             /*1*/
         } 
         else {  /* !a, d.h. alle Prozessoren starten: */
            print("bsrg\t spp,forklib_sync /*sproc: synchronize required procs*/\n");
            if (Tflag) countbarrier(1); /*990119*/
            if (glevel)    /*950504*/
                print("ibp_3\n");
#if 0
 CWK 960812
            print("getsr\t Ret     \t /*sproc*/\n");
            print("and\t Ret,~(SHADOW),Ret  /*sproc*/\n");
            print("putsr\t Ret,Ret \t /*sproc*/\n");
#endif
            print("gethi\t ___PROC_NR__,par2 /*sproc: $orig.*/\n");
            print("add\t par2,___PROC_NR__&0x1fff,par2 /*sproc*/\n");
            print("ldg\t par2,0,par2\t /*sproc: $orig.*/\n");       /*0*/
         }
         /* SHADOW wurde behandelt. Nun erzeuge neuen privaten Gruppenframe: */
         print("pshg\t fps,spp\t /*sproc*/\n"); /*980129*/
         print("pshg\t eps,spp\t /*sproc*/\n"); 
         print("pshg\t sps,spp\t /*sproc*/\n");
         print("pshg\t gpp,spp\t /*sproc*/\n");
         print("pshgc\t spp    \t /*sproc: @=0*/\n");
         print("add\t spp,-2,gpp\t /*sproc: neuer gpp*/\n");
         print("pshg\t par2,spp\t /*sproc: $=$orig. oder 0, falls SHADOW*/\n");
         if (Kflag)
          print("bsrg\t spp,forklib_pstacktest /*sproc: overflow?*/\n");
         /* Synchronisation nicht erforderlich, weil die Prozessoren bislang
          * synchron geblieben sind. Erzeuge neuen shared Gruppenframe:*/
         force_modulo_1;
         print("pshg\t gps,sps\t /*sproc: sh.grfr*/\n");             /*1*/
         print("add\t sps,-1,gps\t /*sproc*/\n");
#if 0
         if (a)
           print("stg\t r%d,gps,1\t /*sproc:init synccell*/\n", a->x.reg+ROFF);
         else {
           print("gethi\t ___STARTED_PROCS__,Ret \t /*sproc:load #procs*/\n");
           print("add\t Ret,0x1fff&___STARTED_PROCS__,Ret \t /*sproc:load #procs*/\n");
           print("nop \t \t \t /*sproc:advance modulo*/\n");
           print("ldg\t Ret,0,Ret \t /*sproc:load #procs*/\n");
           print("nop \t \t \t /*sproc:delay*/\n");
           print("nop \t \t \t /*sproc:advance modulo*/\n");
           print("stg\t Ret,gps,1\t /*sproc:init synccell*/\n");
         }
         print("add\t sps,1,sps\t /*sproc*/\n");
#endif
         print("pshgc\t sps    \t /*sproc: init synccell*/\n");
         print("getlo\t 1,r30\n");
         print("mpaddn\t gps,1,r30\t /*sproc: set synccell*/\n");
         print("mov\t gps,fps\t /*sproc: fake fps for ADDRSL*/\n");/*980129*/
         break;

      case XPROCV:                /*CWK*/
         /*!alle Aenderungen hier muessen auch nach retflush() uebertragen werden!*/
         /*Die Gruppenrahmen vor der start-Anweisung werden wiederhergestellt.*/
         force_modulo_1;
         /*print("bsrg\t spp,forklib_sync\n"); weg nach Absprache mit HS */
         print("ldg\t gpp,-3,fps\t /*xproc: restore fps*/\n"); /*1*980129*/
         print("ldg\t gps,0,gps\t /*xproc: restore gps*/\n");       /*0*/
         print("mov\t gpp,spp\t /*xproc: prepare spp*/\n");
         print("ldg\t gpp,0,gpp\t /*xproc: restore gpp*/\n");       /*0*/
#if 0
CWK 960812
         print("gethi\t ___PROC_NR__,par2 /*xproc: $orig.*/\n");
#endif
         print("popg\t spp,sps\t /*xproc: restore sps*/\n");       /*0*/
#if 0
CWK 960812
         print("add\t par2,___PROC_NR__&0x1fff,par2 /*xproc*/\n");       /*1*/
#endif
         print("popg\t spp,eps\t /*xproc: restore eps*/\n");       /*0*/
         if (glevel) {   /*950504*/
            modulo_nop;
            print("ibp4\n");
         }
         /*Alle Prozessoren ausser 0 setzen das SHADOW-Bit:*/
#if 0
    970904: Wieso ??? Dabei kann ich mir den Prozedurframe zerhacken!
         print("add\t gpp,3,spp\t /*xproc: restore spp*/\n");
#endif
#if 0
CWK 960812   SHADOW wird ganz eliminiert.
         print("ldg\t par2,0,par2\t /*xproc: $orig.*/\n");           /*0*/
         print("nop\t            \t /*delay slot*/\n");
         print("add\t par2,0,par2\t /*xproc: cmp w.0*/\n");
         print("beq\t 5          \t /*xproc*/\n");
         print("getsr\t Ret     \t /*xproc*/\n");
#if 0
         print("or\t Ret,SHADOW,Ret\t /*xproc*/\n");
#else
         print("and\t Ret,~(SHADOW),Ret\t /*xproc*/\n");   /*clear SHADOW. 950413*/
#endif
         print("putsr\t Ret,Ret \t /*xproc*/\n");
         print("bra\t 5          \t /*xproc*/\n");
         print("nop\t            \t /*xproc:stay sync*/\n");
         print("nop\t            \t /*xproc:stay sync*/\n");
         print("nop\t            \t /*xproc:stay sync*/\n");
         print("nop\t            \t /*xproc:stay sync*/\n");
         /*Shadow-Bit ist nun verarztet. Synchron. */
#endif
         break;

      case ENLGRPV:                /*CWK*/
         /* Es wird ein neuer shared Gruppenframe angelegt fuer die Gruppe *
          * der Prozessoren, die die Schleife ausfuehren. Der private      *
          * Gruppenframe bleibt unveraendert, ebenso der eps.              *
          * Die Synchronisationszelle wird mit dem alten Wert initialisiert. */
         /* Pcodepar gibt Art der Schleife (while, for, do) an  950504 */
         if (glevel)  /*950504*/
            print("ibp0_%d\n", Pcodepar);
         force_modulo_0;
#if 0
         print("bsrg\t spp,forklib_sync /*enlgrp*/\n");              /*1*/
#endif
         print("ldg\t gps,1,r31\t /*enlgrp: old $@*/\n");
         print("pshg\t gps,sps\t /*enlgrp: old gps*/\n");
         print("add\t sps,-1,gps\t /*enlgrp: new gps*/\n");
         print("pshg\t r31,sps\t /*enlgrp: init s.cell*/\n");
         break;

      case EXLGRPV:                /*CWK, dummy*/
         /* am Ende von privaten Schleifen. Der shared Gruppenframe fuer   *
          * die iterierenden Prozessoren wird abgeraeumt.                  */
         if (glevel)  print("nop\nibp0_false\n");  /*950505*/
         force_modulo_0;
#if 0
         print("getlo -1,r31 \t /*exlgrp*/\n");                         /*0*/
         print("mpadd\t gps,1,r31\t /*exlgrp:check out*/\n");           /*1*/
#endif
         print("ldg\t gps,0,gps\t /*exlgrp: rest.gps*/\n");             /*0*/
/*testweise drinlassen: 970905*/
         print("add\t sps,-2,sps\t /*exlgrp: free frame*/\n");           /*1*/
         /*vormals add gps,2,sps fuehrt bei dazwischenliegendem Prozedurframe
          *zum Chaos. Da der sps von Schleifen nicht zwischengespeichert wird,
          *muss der sps mit dem Holzhammer korrigiert werden: sps-=2.
          *Dies macht nix, weil das SM bei loops nicht aufgesplittet wird. CWK*/
#if 0
 so muesste es eigentlich sein am 970905
         print("mov\t gps,sps\t /*exlgrp: free group frame*/\n");           /*1*/
#endif
         if (glevel)  /*950504*/
            print("ibp4\n");
#ifdef DEBUG_EXLGRPV
          print("sub\t aps,sps,pc\t /*exlgrp-Konsistenztest*/\n");
          print("ble\t 5         \t /*Konsistenztest*/\n");
          print("sub\t fps,sps,pc\t /*Konsistenztest*/\n");
          print("ble\t 3         \t /*Konsistenztest*/\n");
          print("sub\t gps,sps,pc\t /*Konsistenztest*/\n");
          print("bgt\t 4         \t /*Konsistenztest*/\n");
          /*Fehlermeldung SHARED STACK OVERFLOW ausloesen: */
          print("add\t sps,-1,eps \t /*Konsistenztest*/\n");
          print("bsrg\t spp,forklib_sstacktest /*Konsistenztest*/\n");
          print("sub\t app,spp,pc\t /*Konsistenztest*/\n");
          print("ble\t 5         \t /*Konsistenztest*/\n");
          print("sub\t fpp,spp,pc\t /*Konsistenztest*/\n");
          print("ble\t 3         \t /*Konsistenztest*/\n");
          print("sub\t gpp,spp,pc\t /*Konsistenztest*/\n");
          print("bgt\t 4         \t /*Konsistenztest*/\n");
          /*Fehlermeldung PRIVATE STACK OVERFLOW ausloesen: */
          print("add\t spp,-1,epp \t /*Konsistenztest*/\n");
          print("bsrg\t spp,forklib_pstacktest /*Konsistenztest*/\n");
#endif
         break;

      case SIZEV:                  /*CWK*/
         /* bestimme vor jeder Iteration, wieviele Prozessoren diese Schleife
          * noch ausfuehren. Nachdem alle Prozessoren der Schleifengruppe 
          * synchronisiert sind, genuegt es, die Synchronisationszelle zu nullen
          * und parallel eine 1 zu addieren. */
         /* Pcodepar gibt Art der Schleife (while, for, do) an  950504 */
         if (glevel)  /*950504*/
            print("ibp0_true\n");
#if 0
         print("bsrg\t spp,forklib_sync /*size*/\n");
#endif
         force_modulo_0;
         print("getlo\t 0,Ret  \t /*size*/\n");
         print("stg\t Ret,gps,1\t /*size: init s.cell*/\n");              /*1*/
         print("getlo\t 1,Ret  \t /*size: Ret=1 */\n");                   /*0*/
         print("mpaddn\t gps,1,Ret\t /*size:#active pr*/\n");             /*1*/
         break;

      case IBPV:                /*CWK 950504*/
         print("ibp%d\n", Pcodepar);
         break;

      case FLUSHFRAMESV:                /*CWK*/
         force_modulo_0;
         /*zuerst in der obersten Gruppe austragen:*/
#if 0
         print("getlo\t -1,r31   \t /*FLUSHFRAME*/\n");
         print("mpaddn\t gps,1,r31\t /*FLUSHFRAME*/\n");
#endif
         for (label1=Pcodepar; label1 > 0; label1--) {
            print("ldg\t gpp,-4,eps\t /*FLUSHFRAME(%d of %d)*/\n", label1, Pcodepar);
            print("getlo\t -1,r31   \t /*FLUSHFRAME(%d of %d)*/\n", label1, Pcodepar);
            print("ldg\t gpp,0,gpp\t /*FLUSHFRAME(%d of %d)*/\n", label1, Pcodepar);
            print("mov\t gps,sps\t /*FLUSHFRAME(%d of %d)*/\n", label1, Pcodepar);
            print("ldg\t gps,0,gps\t /*FLUSHFRAME(%d of %d)*/\n", label1, Pcodepar);
            print("add\t gpp,3,spp\t /*FLUSHFRAME(%d of %d)*/\n", label1, Pcodepar);
            /* in der naechstunteren Gruppe austragen: */
            print("mpaddn\t gps,1,r31\t /*FLUSHFRAME(%d of %d)*/\n",label1,Pcodepar);
            if (glevel) { /*950504*/
               modulo_nop;
               print("ibp4\n");
            }
            if (Tflag) 
              { countload( Pcodepar ); countmpadd(Pcodepar); }  /*990122*/
         }
         break;
      case BEGSEQV:                /*CWK 970310*/
         force_modulo_0;
         /*synchron. Bestimme leader: Proz. mit kleinster MPADD-priority*/
         print("getlo\t -1,r31   \t /*BEGSEQV%d*/\n",Pcodepar);
         print("syncadd\t r31,gps,1\t /*BEGSEQV*/\n");
         print("getlo\t 1,r31   \t /*BEGSEQV*/\n");
         print("mpadd\t gps,1,r31\t /*BEGSEQV*/\n");
         print("nop\t          \t /*BEGSEQV*/\n");
         print("add\t r31,R0,r31\t /*BEGSEQV*/\n");
         print("bgt\t ENDSEQ%d\t /*BEGSEQV*/\n", Pcodepar);
         /* jetzt koennte noch ein shared Gruppenframe fuer 1 Prozessor
          * aufgebaut werden, um barriers im seq-Rumpf zu erlauben !
          * das macht aber bei 1 Prozessor wenig Sinn.
          * Ein privater Frame waere dann auch noetig, damit flushframes
          * ordentlich arbeitet.*/
         if (Tflag)  countmpadd( 2 );   /*990122*/
         break;
      case ENDSEQV:                /*CWK 970310*/
         /*asynchron. Label und barrier ->weiter synchron in parallel*/
         /* und genau hier muesste dieser Frame wieder abgeraeumt werden. */
         print("ENDSEQ%d: bsrg\t spp,forklib_sync /*ENDSEQV%d*/\n",
                Pcodepar, Pcodepar);
         if (Tflag) countbarrier(1); /*990119*/
         break;
      case JBLOCKaV:          /*990126 */
         print("\n.section \".gsdata\",.data \t /*JBLOCKaV%d*/\n", Pcodepar);
         print(".globl busTicket%d \t /*JBLOCKaV*/\n", Pcodepar);
         print("busTicket%d: .word 0 \t /*JBLOCKaV*/\n", Pcodepar);
         print(".globl busSM%d \t /*JBLOCKaV*/\n", Pcodepar);
         print("busSM%d: .word 0 \t /*JBLOCKaV*/\n", Pcodepar);
         print(".globl busGone%d \t /*JBLOCKaV*/\n", Pcodepar);
         print("busGone%d: .word 0 \t /*JBLOCKaV*/\n", Pcodepar);
         print(".section \".gpdata\",.data \t /*JBLOCKaV%d*/\n", Pcodepar);
         print(".globl busSize%d \t /*JBLOCKaV*/\n", Pcodepar);
         print("busSize%d: .word 0 \t /*JBLOCKaV*/\n", Pcodepar);
         print(".globl busRank%d \t /*JBLOCKaV*/\n", Pcodepar);
         print("busRank%d: .word 0 \t /*JBLOCKaV*/\n", Pcodepar);
         print("\n.section \".text\",.text \t /*JBLOCKaV%d*/\n", Pcodepar);
         print("gethi\t busTicket%d,r31 \t /*JBLOCKaV*/\n", Pcodepar);
         print("add\t r31,busTicket%d&0x1fff,r31 \t /*JBLOCKaV*/\n", Pcodepar);
         print("gethi\t ___TICKETP__,r30 \t /*JBLOCKaV*/\n");
         print("add\t r30,___TICKETP__&0x1fff,r30 \t /*JBLOCKaV*/\n");
         print("stg\t r31,r30,0 \t /*JBLOCKaV*/\n");
         print("gethi\t busRank%d,r31 \t /*JBLOCKaV*/\n", Pcodepar);
         print("add\t r31,busRank%d&0x1fff,r31 \t /*JBLOCKaV*/\n", Pcodepar);
         print("gethi\t ___RANKP__,r30 \t /*JBLOCKaV*/\n");
         print("add\t r30,___RANKP__&0x1fff,r30 \t /*JBLOCKaV*/\n");
         print("stg\t r31,r30,0 \t /*JBLOCKaV*/\n");
         break;

      case JBLOCKbV:          /*990126 */
         if (Tflag)  countload( 1 );   /*990202*/
         print("gethi\t busSize%d,r31\t /*JBLOCKbV*/\n", Pcodepar);
         print("add\t r31,busSize%d&0x1fff,r31\t /*JBLOCKbV*/\n", Pcodepar);
         print("stg\t r%d,r31,0 \t /*JBLOCKbV*/\n", a->x.reg+ROFF );
         print("gethi\t busGone%d,r31\t /*JBLOCKbV*/\n", Pcodepar);
         print("add\t r31,busGone%d&0x1fff,r31\t /*JBLOCKbV*/\n", Pcodepar);
         print("bmc\t 0       \t /*JBLOCKbV*/\n");
         print("ldgn\t r31,0,r31\t /*JBLOCKbV*/\n");
         print("add\t r31,0,r31\t /*JBLOCKbV*/\n");
         print("bne\t LJBh%d \t /*JBLOCKbV: jump if gone*/\n", Pcodepar);
         break;

      case JBLOCKcV:          /*990126 */
         print("gethi\t busTicket%d,r31\t /*JBLOCKcV*/\n", Pcodepar);
         print("add\t r31,busTicket%d&0x1fff,r31\t /*JBLOCKcV*/\n", Pcodepar);
         print("getlo\t 1,r30\t /*JBLOCKcV*/\n");
         print("bmc\t 0        \t /*JBLOCKcV*/\n");
         print("mpadd\t r31,0,r30\t /*JBLOCKcV 0*/\n");
         print("gethi\t busRank%d,r31\t /*JBLOCKcV*/\n", Pcodepar);
         print("add\t r31,busRank%d&0x1fff,r31\t /*JBLOCKcV*/\n", Pcodepar);
         print("stg\t r30,r31,0\t /*JBLOCKcV*/\n");
         print("add\t r30,0,r30\t /*JBLOCKcV*/\n");
         print("bne\t LJBd%d\t /* jump if (!driver)*/\t /*JBLOCKcV*/\n", Pcodepar);

         print(" /*BusSM%d = shmalloc(busSize%d):*/\t /*JBLOCKcV*/\n", Pcodepar, Pcodepar);
         print("gethi\t busSize%d,r30\t /*JBLOCKcV*/\n", Pcodepar);
         print("add\t r30,busSize%d&0x1fff,r30\t /*JBLOCKcV*/\n", Pcodepar);
         print("ldgn\t r30,0,r30\t /*JBLOCKcV*/\n");
         print("pshg\t app,spp\t /*JBLOCKcV*/\n");
         print("pshg\t r30,spp\t /*JBLOCKcV*/\n");
         print("add\t spp,-2,app\t /*JBLOCKcV*/\n");
         print("bsrg\t spp,_shmalloc\t /*JBLOCKcV*/\n");
         print("mov\t app,spp\t /*JBLOCKcV*/\n");
         print("ldgn\t spp,0,app\t /*JBLOCKcV*/\n");
         print("gethi\t busSM%d,r31\t /*JBLOCKcV*/\n", Pcodepar);
         print("add\t r31,busSM%d&0x1fff,r31\t /*JBLOCKcV*/\n", Pcodepar);
         print("bms\t 0        \t /*JBLOCKcV*/\n");
         print("stg\t Ret,r31,0\t /*JBLOCKcV 1*/\n");

         print("pshg\t gps,spp \t /*JBLOCKcV (driver)*/\n");
         print("pshg\t eps,spp \t /*JBLOCKcV (driver)*/\n");
         print("pshg\t sps,spp \t /*JBLOCKcV (driver)*/\n");
         print("pshg\t gpp,spp \t /*JBLOCKcV (driver)*/\n");
         print("add \t spp,-1,gpp \t /*JBLOCKcV (driver)*/\n");
         print("pshgc\t spp \t /*@ JBLOCKcV (driver)*/\n");
         print("pshgc\t spp \t /*$ JBLOCKcV (driver)*/\n");
         print("gethi\t busSize%d,r30\t /*JBLOCKcV*/\n", Pcodepar);
         print("add\t r30,busSize%d&0x1fff,r30\t /*JBLOCKcV*/\n", Pcodepar);
         print("ldgn\t r30,0,r30\t /*JBLOCKcV*/\n");
         print("mov\t Ret,gps \t /*JBLOCKcV (driver)*/\n");
         print("add\t Ret,r30,eps \t /*JBLOCKcV (driver)*/\n");
         print("add\t eps,-1,eps \t /*JBLOCKcV (driver)*/\n");
         print("mov\t Ret,sps \t /*JBLOCKcV (driver)*/\n");
         print("pshgc\t sps \t /*empty JBLOCKcV (driver)*/\n");
         print("pshgc\t sps \t /*sc JBLOCKcV (driver)*/\n");
         if (Tflag)  countmpadd( 2 );   /*990202 including driver part of dV */
         if (Tflag)  countstore( 5 );   /*990202 including driver part of dV*/
         /* delaystmt follows */
         break;

      case JBLOCKdV:          /*990126 */
         print("gethi\t busGone%d,r31\t /*JBLOCKdV*/\n", Pcodepar);
         print("add\t r31,busGone%d&0x1fff,r31\t /*JBLOCKdV*/\n", Pcodepar);
         print("getlo\t 1,r30\t /*JBLOCKdV*/\n");
         print("bms\t 0      \t /*JBLOCKdV*/\n");
         print("stg\t r30,r31,0\t /*JBLOCKdV 1*/\n");
         print("nop\nnop\nnop\nnop\nnop\nnop\nnop\nnop\nnop \t /*JBLOCKdV (driver)*/\n");
         print("gethi\t busTicket%d,r30\t /*JBLOCKdV 1*/\n", Pcodepar);
         print("add\t r30,busTicket%d&0x1fff,r30\t /*JBLOCKdV 0*/\n", Pcodepar);
         print("getlo\t 0,r31\t /*JBLOCKdV 1*/\n");
         print("mpadd\t r30,0,r31\t /*JBLOCKdV 0*/\n");
         print("nop\nnop\t          \t /*JBLOCKdV*/\n");
         print("stg\t r31,gps,1\t /*JBLOCKdV 1*/\n");
         print("jra\t LJBdend%d\t /*JBLOCKdV*/\n", Pcodepar);

         print("LJBd%d: \t /*JBLOCKdV: not driver*/\n", Pcodepar);
         print("pshg\t gps,spp \t /*JBLOCKdV (not driver)*/\n");
         print("pshg\t eps,spp \t /*JBLOCKdV (not driver)*/\n");
         print("pshg\t sps,spp \t /*JBLOCKdV (not driver)*/\n");
         print("pshg\t gpp,spp \t /*JBLOCKdV (not driver)*/\n");
         print("add \t spp,-1,gpp \t /*JBLOCKdV (not driver)*/\n");
         print("gethi\t busRank%d,r31\t /*JBLOCKdV*/\n", Pcodepar);
         print("add\t r31,busRank%d&0x1fff,r31\t /*JBLOCKdV*/\n", Pcodepar);
         print("ldg\t r31,0,r31\t /*JBLOCKdV*/\n");
         print("pshgc\t spp \t /*@ JBLOCKdV (not driver)*/\n");
         print("pshg\t r31,spp \t /*$ JBLOCKdV (not driver)*/\n");
         print("gethi\t busGone%d,r31\t /*JBLOCKdV*/\n", Pcodepar);
         print("add\t r31,busGone%d&0x1fff,r31\t /*JBLOCKdV*/\n", Pcodepar);
         print("bmc\t 0        \t /*JBLOCKdV*/\n");
         print("ldg\t r31,0,r30\t /*JBLOCKdV 0*/\n");
         print("nop\t          \t /*JBLOCKdV 1*/\n");
         print("add\t r30,0,r30\t /*JBLOCKdV 0*/\n");
         print("beq\t -3       \t /*JBLOCKdV 1*/\n");
         print("nop\nnop\nnop\nnop\nnop\nnop\nnop\nnop \t /*JBLOCKdV (not driver)*/\n");
         print("gethi\t busSM%d,r31\t /*JBLOCKdV 0*/\n", Pcodepar);
         print("add\t r31,busSM%d&0x1fff,r31\t /*JBLOCKdV 1*/\n", Pcodepar);
         print("ldg\t r31,0,gps \t /*JBLOCKdV (not driver) 0*/\n");
         print("gethi\t busSize%d,r30\t /*JBLOCKdV 1*/\n", Pcodepar);
         print("add\t r30,busSize%d&0x1fff,r30\t /*JBLOCKdV 0*/\n", Pcodepar);
         print("add\t gps,2,sps\t /*JBLOCKdV 1*/\n");
         print("ldg\t r30,0,r30\t /*JBLOCKdV 0*/\n");
         print("add\t gps,-1,r31\t /*JBLOCKdV 1*/\n");
         print("add\t r31,r30,eps\t /*JBLOCKdV 0*/\n");
         if (Tflag)  countload ( 2 );   /*990202*/
         print("LJBdend%d: \t /*JBLOCKdV (end of not driver)*/\n", Pcodepar);
         break;
      case JBLOCKeV:          /*990126 */
         print("bmc\t 0       \t /*JBLOCKeV*/\n");
         print("getlo\t -1,r31 \t /*JBLOCKeV 0*/\n");
         print("mpadd\t gps,1,r31 \t /*JBLOCKeV 1*/\n");
         print("ldg\t gpp,0,gpp \t /*JBLOCKeV*/\n");
         print("mov\t gpp,spp \t /*JBLOCKeV*/\n");
         print("popg\t spp,sps \t /*JBLOCKeV*/\n");
         print("popg\t spp,eps \t /*JBLOCKeV*/\n");
         print("popg\t spp,gps \t /*JBLOCKeV*/\n");
         if (Tflag)  countmpadd( 1 );   /*990202*/
         break;
      case JBLOCKfV:          /*990126 */
         print("bsrg spp,forklib_sync \t /*JBLOCKfV*/\n");
         /* now exactly synchronous: */
         print("bmc \t 0 \t /*JBLOCKfV*/\n");
         print("getlo\t -1,r31 \t /*JBLOCKfV 0, renumber $*/\n");
         print("mpadd\t gps,1,r31 \t /*JBLOCKfV 1*/\n");
         print("getlo\t 1,r30 \t /*JBLOCKfV 0*/\n");
         print("mpadd\t gps,1,r30 \t /*JBLOCKfV 1*/\n");
         print("nop\t          \t /*JBLOCKfV*/\n");
         print("stg\t r30,gpp,2 \t /*JBLOCKfV set $*/\n");
         if (Tflag)  countmpadd( 2 );   /*990202*/
         if (Tflag)  countbarrier( 1 );   /*990202*/
         break;
      case JBLOCKgV:          /*990126 */
         print("ldgn\t gpp,2,r30\t /*JBLOCKgV*/\n");
         print("add\t r30,0,r30\t /*JBLOCKgV*/\n");
         print("bne\t LJBg%d       \t /*JBLOCKgV*/\n", Pcodepar);
         print("gethi\t busSM%d,r30\t /*JBLOCKgV: free shared bus memory*/\n", Pcodepar);
         print("add\t r30,busSM%d&0x1fff,r30\t /*JBLOCKgV*/\n", Pcodepar);
         print("ldg\t r30,0,r30\t /*JBLOCKgV*/\n");
         print("pshg\t app,spp\t /*JBLOCKgV*/\n");
         print("pshg\t r30,spp\t /*JBLOCKgV*/\n");
         print("add\t spp,-2,app\t /*JBLOCKgV*/\n");
         print("bsrg\t spp,_shfree\t /*JBLOCKgV*/\n");
         print("mov\t app,spp\t /*JBLOCKgV*/\n");
         print("ldgn\t spp,0,app\t /*JBLOCKgV*/\n");
         print("LJBg%d:bsrg spp,forklib_sync /*JBLOCKgV*/\n", Pcodepar);
         print("gethi\t busTicket%d,r30\t /*JBLOCKgV: re-init ticket automaton*/\n", Pcodepar);
         print("add\t r30,busTicket%d&0x1fff,r30\t /*JBLOCKgV*/\n", Pcodepar);
         print("bms\t 0        \t /*JBLOCKgV*/\n");
         print("stgc\t r30      \t /*JBLOCKgV 1*/\n");
         print("gethi\t busGone%d,r30\t /*JBLOCKgV 0: busGone=0*/\n", Pcodepar);
         print("add\t r30,busGone%d&0x1fff,r30\t /*JBLOCKgV 1*/\n", Pcodepar);
         print("nop \t /*JBLOCKgV 0*/\n");
         print("stgc\t r30     \t /*JBLOCKgV 1*/\n");
         print("ldg\t gpp,0,gpp \t /*JBLOCKgV*/\n");
         print("mov\t gpp,spp \t /*JBLOCKgV*/\n");
         print("popg\t spp,sps \t /*JBLOCKgV*/\n");
         print("popg\t spp,eps \t /*JBLOCKgV*/\n");
         print("popg\t spp,gps \t /*JBLOCKgV*/\n");
         if (Tflag)  countbarrier( 1 );   /*990202*/
         if (Tflag)  countstore( 2 );   /*990202*/
         if (Tflag)  countload( 1 );   /*990202*/
         break;
      case JBLOCKhV:          /*990126 */
         print("LJBh%d: /*empty*/\t /*JBLOCKhV*/\n", Pcodepar);
         break;
      default: assert(0);
      }
   }
}

/* function - generate code for a function */
void function(Symbol f, Symbol caller[],
   Symbol callee[], int ncalls) {
   int i;

   proffset = 1;  /*statt 4 CWK*/
   shoffset = 1;  /*CWK*/
   resetshoffset = 0;
   shframedepth = 0;  /*970902*/
   freeallparregs();  /*CWK, fuer alle Faelle*/
   for (i = 0; caller[i] && callee[i]; i++) {
      if (callee[i]->shared) {   /*CWK*/
        shoffset = roundup(shoffset, caller[i]->type->align);
        callee[i]->x.offset = caller[i]->x.offset = shoffset;
        callee[i]->x.name = caller[i]->x.name = stringf("aps,%d", shoffset); /*CWK*/
        shoffset += caller[i]->type->size;
      }
      else {
        proffset = roundup(proffset, caller[i]->type->align);
        callee[i]->x.offset = caller[i]->x.offset = proffset;
        callee[i]->x.name = caller[i]->x.name = stringf("app,%d", proffset); /*CWK*/
        proffset += caller[i]->type->size;
      }
      callee[i]->sclass = AUTO;
   }
   usedmask = argbuildsize = prframesize = shframesize
            = shoffset = proffset = 0; /*CWK*/
   gencode(caller, callee);
   assert(shframedepth==0);
   /*print("%s:.word 0x%x\n", f->x.name, usedmask&~0x3f); CWK: .word... weg*/
   print("%s:\n", f->x.name);  /*CWK*/
   /* pc wurde gerade durch call gerettet. Jetzt fpp und sr retten: CWK*/ 
   /*force_modulo_1; nur Operationen auf priv. Stack:*/
   print("pshg\t fpp,spp\t /*save old fpp*/\n");                   /*CWK, 1*/
   if (f->sync) {
     /*               v-- statt vormals 3: eps wieder rein 960815 */
     print("add\t spp,4,fpp\t /*set new fpp*/\n");   /*CWK,                 0*/
     print("pshg\t fps,spp\t /*save old fps*/\n");   /*940915*/      /*CWK, 1*/
     print("mov\t sps,fps\t /*set new fps*/\n");   /*CWK,                   0*/
     print("pshg\t gpp,spp\t /*save old gpp*/\n");   /*940915*/      /*CWK, 1*/
     /*modulo_nop;*/
     print("pshg\t gps,spp\t /*save old gps*/\n");   /*940915*/      /*CWK, 1*/
     /*modulo_nop;*/
     /*nicht auskommentiert damit shallfree funktionsbezogen arbeiten kann:*/
     print("pshg\t eps,spp\t /*save old eps*/\n");   /*940919*/      /*CWK, 1*/
   }
   else { /*async*/
#if 0
     versuchsweise auskommentiert 970905
     versuchsweise wieder drin wegen ADDRSL-Problem 980129
#endif
     if (shframesize) {   /*shared proc frame for async fn:*/
        print("pshg\t fps,sps\t /*save old fps*/\n");   /*950331*/   /*CWK, 1*/
        print("add\t sps,-1,fps\t /*new fps*/\n");  /*950331*/
        if (Tflag) countstore(1);  /*990119*/
     }
     print("mov\t spp,fpp\t /*set new fpp*/\n");   /*CWK,                   0*/
   }

   if (f->sync) {  /* Funktionsgruppe aufmachen: 950111 Vor Reservierung
                     fuer lokale Variablen gezogen 970905 */
      force_modulo_0;
      print("ldg\t gps,1,r31\t /*Funktionsgruppe aufmachen: lade Prozessorzahl*/\n");
      print("pshg\t gps,sps\t /*Funktionsgruppe: rette alten gps*/\n");
      print("add\t sps,-1,gps\t /*Funktionsgruppe: neuer gps*/\n");
      print("pshg\t r31,sps\t /*Funktionsgruppe: init s.cell*/\n");
      if (Tflag) { countload(1); countstore(2); } /*990119*/
   }

   /* framesize += (4*) nregs + argbuildsize; */
   /*CWK: Nach Neuorganisation des Stackframes brauchen wir die nregs und die
    * argbuildsize nicht mehr. Wenig sinnvoll, weil als naechster Frame auch 
    * ein privater Gruppenframe kommen kann. Also bleibt __framesize == |__locals|.*/
   /*print("subl2 $%d,sp\n", framesize);     CWK*/
   print("add\t fpp,%d,spp\t /*alloc space f locals*/\n", prframesize + 1);/*CWK,0*/
   /* +1 wegen zusaetzlicher Reservezelle, auf die der fpp zeigt*/
   if (f->sync)
#if 0
 offensichtlich braucht man das shframesize gar nicht mehr...
 versuchsweise raus 970908
     if (shframesize)   /*950331*/  /*970905*/
       print("add\t fps,%d,sps\t /*alloc space f locals*/\n",
             2 + shframesize + 1);
          /* 2 fuer Funktionsgruppe */
     else
#endif
     /*neu: da sh lokale Var. relativ zur Funktionsgruppe, bleibt sps=fps+1+2 */
       print("add\t fps,2,sps\t /*space for fn group frame */\n");
/* vor 980205 war's hier^ eine 3 fuer struct returnvalue */
  
   if (glevel)      /*950504*/
      if (f->sync)
         print("nop\nibp9_0      /* synchronous fn, frame completed */\n");
      else
         print("ibp9_1      /* asynchronous fn, frame completed */\n");

   if (isstruct(freturn(f->type)))
      /*print("movl r1,-1(fp)\n");  *statt -4 CWK: VAX-spezifisch */
      if (f->shared) {
        /*modulo_nop;  sync moduliert selbst.*/
        print("bsrg\t spp,forklib_sync\t /*struct-Erg*/\n");  /* CWK       1*/
        modulo_nop;
        print("stg\t Ret,fps,0\t /*struct-Erg*/\n");  /* CWK      1*/
        if (Tflag) countbarrier(1); /*990119*/
        if (Tflag) countstore(1); /*990119*/
      }
      else {
        /*modulo_nop;*/
        print("stg\t Ret,fpp,0\t /*struct-Erg\n");  /* CWK        1*/
      }
   emitcode();
   if (glevel > 1) {
      /*force_modulo_0;*/
      print("return\nnop\n");
   }
}

void Epilog(sync)   /*CWK: alten privaten Stackframe wiederherstellen */
   int sync;        /*0 iff asynchronous function, then drop shared pointers*/
{                   /*     aufgerufen von RETx             940915     */
   if (sync) {  /*zuerst Funktionsgruppe zumachen:*/
     force_modulo_0;
     print("getlo\t -1,r31   \t /*Fn-Gruppe zumachen:*/\n");
     print("mpaddn\t gps,1,r31\t /*austragen,*/\n");
     print("ldg\t gps,0,gps\t /*restore gps,*/\n"); /*0*/
#if 0
     print("add\t sps,-2,sps\t /*Fn-grp: free frame*/\n");           /*1*/
#endif
     /*970904: shared group frame is variable in size:*/
     print("mov\t gps,sps\t /*Fn-grp: free frame*/\n");           /*1*/
   }
   print("mov\t fpp,spp\t /*free locals*/\n");                 /*1*/
   /*force_modulo_0; auskommentiert, weil nur private Stackoperationen*/
#ifdef SAVE_STATUS_REGISTER
   print("popg\t spp,sr \t /*altes sr holen*/\n");             /*0*/
   /*modulo_nop; */                                               /*1*/
#endif
   if (sync) {
#if 0
     auskommentiert 960724
     die Funktionsgruppen-Prozessoren, die hier warten, legen keine
     shalloc()-Objekte mehr an. Restaurieren nur explizit mit shallfree().
     print("popg\t spp,eps\t /*alten eps holen*/\n");            /*0*/
     print("popg\t spp,gps\t /*alten gps holen*/\n");            /*0*/
     /*ersetzt durch:*/
     print("popng\t spp,-2,gps\t /*restore old gps, ignore eps*/\n");/*0*/

     /*970904 auch auskommentiert:*/
     print("popng\t gps,0,gps\t /*restore old gps, ignore eps*/\n");/*0*/
     /*     ^-- 960815 */
#endif
     print("add\t spp,-1,spp\t /*ignore old eps*/\n");   /*970904*/
     print("popg\t spp,gps\t /*restore old gps*/\n"); 
     print("popg\t spp,gpp\t /*restore old gpp*/\n");
     print("mov\t fps,sps\t /*free locals*/\n");
     print("popg\t spp,fps\t /*restore old fps*/\n");
   }
#if 0
   versuchsweise auskommentiert 970905
   else if (shframesize) {   /*950331*/
     print("mov\t fps,sps\t /*free locals*/\n");                 /*1*/
     print("popg\t sps,fps\t /*alten fps holen*/\n");            /*0*/
   }
#endif
   print("popg\t spp,fpp\t /*alten fpp holen*/\n");            /*0*/
   if (Tflag && sync) {  countload(1);  countmpadd(1);  }  /*990119*/
   /*Es folgt ret, das den pc poppt und spp nochmals dekrementiert*/
   /*Das Restaurieren des app und aps erfolgt durch den caller. */
}

/* gen - generate code for the dags on list p */
Node gen(Node p) {
   Node head, *last;

   debug(1,id = 0);
   for (last = &head; p; p = p->link)
      last = linearize(p, last, 0);
   debug(rflag,(lhead = head, lprint(head," before ralloc")));
   for (p = head; p; p = p->x.next) {
      ralloc(p);
      if (p->count == 0 && sets(p))
         putreg(p);
   }
   debug(rflag,lprint(lhead," after ralloc"));
   return head;
}

/* getreg - allocate 1 or 2 registers for node p */
static void getreg(Node p) {
   int r, m = optype(p->op) == D ? 3 : 1;

   for (r = 0; r < nregs; r++)
      if ((rmask&(m<<r)) == 0) {
         p->x.rmask = m;
         p->x.reg = r;
         if (p->syms[0])  p->syms[0]->reg = r;     /*CWK*/
         rmask |= sets(p);
         usedmask |= sets(p);
         debug(rflag,fprint(2,"allocating %s to node #%d\n", rnames(sets(p)), p->x.id));
         return;
      }
   debug(rflag,lprint(lhead, " before spillee"));
   r = spillee(p, m);
   spill(r, m, p);
   debug(rflag,lprint(lhead, " after spill"));
   assert((rmask&(m<<r)) == 0);
   getreg(p);
}

/* genreloads - make the nodes after dot use reloads of temp instead of p's register */
static void genreloads(Node dot, Node p, Symbol temp) {
   int i;
   Node last;

   for (last = dot; dot = dot->x.next; last = dot)
      for (i = 0; i < MAXKIDS; i++)
         if (dot->kids[i] == p) {
            dot->kids[i] = newnode(INDIR + typecode(p),
               newnode(ADDRL+P, 0, 0, temp), 0, 0);
            dot->kids[i]->count = 1;
            p->count--;
            linearize(dot->kids[i], &last->x.next, last->x.next);
            last = dot->kids[i];
         }
   assert(p->count == 0);
}

/* genspill - generate code to spill p's register and return the temporary used */
static Symbol genspill(Node p) {
   Symbol temp = newtemp(AUTO, typecode(p));
   Node q = p->x.next;

   linearize(newnode(ASGN + typecode(p),
      newnode(ADDRLP, 0, 0, temp), p, 0),
      &p->x.next, p->x.next);
   rmask &= ~1;
   for (p = p->x.next; p != q; p = p->x.next)
      ralloc(p);
   rmask |= 1;
   return temp;
}

/* global - global id */
void global(Symbol p) {
   switch (p->type->align) {
   case 2: print(".align 1; "); break;
   case 4: print(".align 2; "); break;
   case 8: print(".align 3; "); break;
   }
   print("%s:", p->x.name);
}

/* linearize - linearize node list p */
static Node *linearize(Node p, Node *last, Node next) {
   if (p && !p->x.visited) {
      last = linearize(p->kids[0], last, 0);
      last = linearize(p->kids[1], last, 0);
      p->x.visited = 1;
      *last = p;
      last = &p->x.next;
      debug(1,if (p->x.id == 0) p->x.id = ++id);      
      debug(rflag,{fprint(2,"listing node "); nprint(p);})
   }
   *last = next;
   return last;
}

/* local - local variable */
void local(Symbol p) {
  if (p->shared) {  /*CWK*/
     p->x.offset = shoffset + 1;
     p->x.framedepth = p->shframedepth;  /*970902*/
     p->x.name = stringf("fps,%d", shoffset + 1);  /*CWK*/
     /* diese beiden vorgezogen 950814 CWK. neu: + 1  */
     shoffset = roundup(shoffset + p->type->size, p->type->align);
     shoffset = roundup(shoffset, 1);
  } /*shared*/
  else {  /*private*/
   if (1 || !p->parreg) {  /*CWK: einstweilen Standardfall*/
     p->x.offset = /*-*/ proffset + 1;  /*CWK*/
     /*p->x.name = stringf("%d(fp)", -offset);*/
     p->x.name = stringf("fpp,%d", proffset + 1);  /*CWK*/
     /* diese beiden vorgezogen 950814 CWK */
     proffset = roundup(proffset + p->type->size, p->type->align);
     proffset = roundup(proffset, 1);  /* 1 statt 4 CWK */
   }
   else {  /* spaeter auch Parameter in Register: Kein Offset */
     p->x.offset = 0;
     p->x.name = stringf("par%d", p->parreg);  /*CWK*/
   }
  } /*private*/
  p->sclass = AUTO;
}

/* needsreg - does p need a register? */
static int needsreg(p) Node p; {
   assert(opindex(p->op) > 0 && opindex(p->op) < sizeof reginfo/sizeof reginfo[0]);
   return reginfo[opindex(p->op)]&(0x1000<<optype(p->op));
}

/* progbeg - beginning of program */
void progbeg(int argc, char *argv[]) {
   extern int atoi(char *);      /* (omit) */
   while (--argc > 0)
      if (**++argv == '-' && argv[0][1] >= '0' && argv[0][1] <= '9')
         nregs = atoi(*argv + 1);
      else if (strcmp(*argv, "-r") == 0)   /* (omit) */
         rflag++;         /* (omit) */
   rmask = ((~0)<<nregs)|1;
   print("#include %cforkaliases%c\n\n", '"', '"');
   print(".section %c.gsdata%c, .data\n", '"','"');
#if 0
   print(".globl ___STARTED_PROCS__\n");
   print("___STARTED_PROCS__:.int %d\n", STARTED_PROCS);
   print(".globl ___ACTIVE_PROCS__\n");
   print("___ACTIVE_PROCS__:.int %d\n", 1);    /*gueltig nach Startupcode*/
   abgeschafft, weil Probleme mit der Mehrfachcompilation 
   __STARTED_PROCS__ wird jetzt in der forklib.asm angelegt und initialisiert.
#endif
   print(".section %c.text%c, .text\n", '"','"');
   print("jra\t forklib_startup\n\n");
}

/* putreg - decrement register usage */
static void putreg(Node p) {
   if (p && --p->count <= 0)
      { assert(p->x.rmask);
      rmask &= ~sets(p);
      debug(rflag,fprint(2,"deallocating %s from node #%d\n", rnames(sets(p)), p->x.id)); }
}

/* ralloc - assign a register for p */
static void ralloc(Node p) {
   int i;

   assert(p);
   assert(p->x.rmask == 0);
   switch (generic(p->op)) {
   case ARG: /* SARG wird explizit gehandelt, RARG braucht keinen Stackplatz */
      /*CWK: brauche argoffset/argbuildsize nicht fuer FORK. Daher keine
       *     Unterscheidung zw. shared und private argoffset/-buildsize noetig.*/
      argoffset = roundup(argoffset, p->syms[1]->u.c.v.i);
      p->x.argoffset = argoffset;
      argoffset += p->syms[0]->u.c.v.i;
      if (argoffset > argbuildsize)
         argbuildsize = roundup(argoffset, 1);  /*statt 4 CWK*/
      break;
   case CALL:
      argoffset = 0;
      break;
   default:assert(valid(p->op));
   }
   for (i = 0; i < MAXKIDS; i++)              /*spaeter bei FlOps putreg nachher*/
      putreg(p->kids[i]);                     /*wegen exception-Handling     CWK*/
   p->x.busy = rmask;
   if (needsreg(p))
      getreg(p);
}

/* restore - restore registers in mask */
static void restore(unsigned mask) {
   int i;

   for (i = nregs-1; i > 0; i--)         /* BUGFIX: in umgekehrter Rf. wieder poppen */
      if (mask&(1<<i)) {
         /*print("movl %d(fp),r%d\n", (4*)i - framesize + argbuildsize, i);*/
         /*force_modulo_0; auskommentiert, da nur priv. Stackzugriff*/
         print("popgn\t spp,r%d \t /*restore r%d*/\n", i+ROFF, i+ROFF ); /*CWK*/
      }
}

/* save - save registers in mask */
static void save(unsigned mask) {
   int i;

   for (i = 1; i < nregs; i++)
      if (mask&(1<<i)) {
         /*print("movl r%d,%d(fp)\n", i, (4*)i - framesize + argbuildsize, i);*/
         /*force_modulo_1; auskommentiert, da nur priv. Stackzugriff*/
         print("pshg\t r%d,spp\t\t /*save r%d*/\n", i+ROFF, i+ROFF );   /*CWK*/
      }
}

/* segment - switch to logical segment s */
void segment(int s) {      /* Neue Segmente eingefuehrt CWK */
   print(".section %c",'"');  /*CWK fuer PRASS */
   switch (s) {               /*CWK: Segmentname*/
   case   CODE: print(".text");   break;
   case    LIT: /*fall through. was: print(".text 1\n"); break;  Konstanten */
   case GSDATA: print(".gsdata");   break;
   case  GSBSS: print(".gsbss");   break;
   case   DATA: print(".gpdata");   break;
   case    BSS: print(".gpbss");   break;
   default: assert(0);
   }
   print("%c, ",'"');  /*CWK fuer PRASS */
   switch (s) {               /*CWK: Segmenttyp*/
   case   CODE: print(".text\n");   break;
   case    LIT:
   case GSDATA: case  DATA: print(".data\n");   break;
   case  GSBSS: case  BSS:  print(".bss\n");   break;
   default: assert(0);
   }
}

/* spill - spill all registers that overlap (r,m) */
static void spill(int r, unsigned m, Node dot) {
   int i;
   Node p = dot;

   while (p = p->x.next)
      for (i = 0; i < MAXKIDS; i++)
         if (p->kids[i] && sets(p->kids[i])&(m<<r)) {
            Symbol temp = genspill(p->kids[i]);
            rmask &= ~sets(p->kids[i]);
            genreloads(dot, p->kids[i], temp);
         }
}

/* spillee - identify the most-distantly-used register */
static int spillee(Node dot, unsigned m) {
   int bestdist = -1, bestreg = /*0*/ 32-FREE_REGS, dist, r;
   Node q;

   debug(rflag,fprint(2,"spillee: dot is node #%d\n", dot->x.id));
   for (r = 1; r < nregs - (m>>1); r++) {
      dist = 0;
      for (q = dot->x.next; q && !(uses(q)&(m<<r)); q = q->x.next)
         dist++;
      assert(q);   /* (omit) */
      debug(rflag,fprint(2,"r%d used in node #%d at distance %d\n", r, q->x.id, dist));
      if (dist > bestdist) {
         bestdist = dist;
         bestreg = r;
      }
   }
   debug(rflag,fprint(2,"spilling %s\n",rnames(m<<bestreg)));
   assert(bestreg);   /* (omit) */
   return bestreg;
}

/* uses - return mask of registers used by node p */
static unsigned uses(Node p) {
   int i;
   unsigned m = 0;

   for (i = 0; i < MAXKIDS; i++)
      if (p->kids[i])
         m |= sets(p->kids[i]);
   return m;
}

/* valid - is operator op a valid operator ? */
static int valid(op) {
   return opindex(op) > 0 && opindex(op) < sizeof reginfo/sizeof reginfo[0] ?
      reginfo[opindex(op)]&(1<<optype(op)) : 0;
}

#ifdef DEBUG
/* lprint - print the nodelist beginning at p */
static void lprint(Node p, char *s) {
   fprint(2, "node list%s:\n", s);
   if (p) {
      char buf[100];
      sprintf(buf, "%-4s%-8s%-8s%-8s%-7s%-13s%s",
         " #", "op", "kids", "syms", "count", "uses", "sets");
      fprint(2, "%s\n", buf);
   }
   for ( ; p; p = p->x.next)
      nprint(p);
}

/* nprint - print a line describing node p */
static void nprint(Node p) {
   int i;
   char *kids = "", *syms = "", buf[200];

   if (p->kids[0]) {
      static char buf[100];
      buf[0] = 0;
      for (i = 0; i < MAXKIDS && p->kids[i]; i++)
         sprintf(buf + strlen(buf), "%3d", p->kids[i]->x.id);
      kids = &buf[1];
   }
   if (p->syms[0] && p->syms[0]->x.name) {
      static char buf[100];
      buf[0] = 0;
      for (i = 0; i < MAXSYMS && p->syms[i]; i++) {
         if (p->syms[i]->x.name)
            sprintf(buf + strlen(buf), " %s", p->syms[i]->x.name);
         if (p->syms[i]->u.c.loc)
            sprintf(buf + strlen(buf), "=%s", p->syms[i]->u.c.loc->name);
      }
      syms = &buf[1];
   }
   sprintf(buf, "%2d. %-8s%-8s%-8s %2d    %-13s",
      p->x.id, opname(p->op), kids, syms, p->count, rnames(uses(p)));
   sprintf(buf + strlen(buf), "%s", rnames(sets(p)));
   fprint(2, "%s\n", buf);
}

/* rnames - return names of registers given by mask m */
static char *rnames(unsigned m) {
   static char buf[100];
   int r;

   buf[0] = buf[1] = 0;
   for (r = 0; r < nregs; r++)
      if (m&(1<<r))
         sprintf(buf + strlen(buf), " r%d", r);
   return &buf[1];
}
#endif

#ifndef V9
#include <errno.h>
#ifndef errno
extern int errno;
#endif

/* strtol - interpret str as a base b number; if ptr!=0, *ptr gets updated str */
long strtol(str, ptr, b) char *str, **ptr; {
   long n = 0;
   char *s, sign = '+';
   int d, overflow = 0;

   if (ptr)
      *ptr = str;
   if (b < 0 || b == 1 || b > 36)
      return 0;
   while (*str==' '||*str=='\f'||*str=='\n'||*str=='\r'||*str=='\t'||*str=='\v')
      str++;
   if (*str == '-' || *str == '+')
      sign = *str++;
   if (b == 0)
      if (str[0] == '0' && (str[1] == 'x' || str[1] == 'X')) {
         b = 16;
         str += 2;
      } else if (str[0] == '0')
         b = 8;
      else
         b = 10;
   for (s = str; *str; str++) {
      if (*str >= '0' && *str <= '9')
         d = *str - '0';
      else if (*str >= 'a' && *str <= 'z' || *str >= 'A' && *str <= 'Z')
         d = (*str&~040) - 'A' + 10;
      else
         break;
      if (d >= b)
         break;
      if (n < (LONG_MIN + d)/b)
         overflow = 1;
      n = b*n - d;
   }
   if (s == str)
      return 0;
   if (ptr)
      *ptr = str;
   if (overflow || (sign == '+' && n == LONG_MIN)) {
      errno = ERANGE;
      return sign == '+' ? LONG_MAX : LONG_MIN;
   }
   return sign == '+' ? -n : n;
}
#endif


/* === Funktionen fuer ex-p-code Befehle, CWK-neu 940628: ==================== */

/* shspace(): allocate space for shared block-local variables */ 
void shspace(int k)      /*970902*/
{
    Tree e = tree(SHSPACE+V, voidtype, 0, 0);
    code(Pcode);
    codelist->u.node = listnodes( e, 0, 0 );
    codelist->par = k;
}

/* splitSM(): erzeuge Code zur Berechnung der Trennzelle,
 * um freien SM-Bereich gleichmaessig aufzuteilen */
void splitSM()
{
   Tree e = tree(SPLIT+V, voidtype, 0, 0);
   code(Pcode);
   codelist->u.node = listnodes( e, 0, 0 );
}

/* nextcase(erster): erzeuge Code, um Speicher gemaess Trennzelle aufzuteilen
 * und Gruppenrahmen fuer then-Gruppe anzulegen (!erster->else-Gruppe) */
void nextcase( erster )
   int erster;
{
   Tree e = tree(NEXTCASE+V, voidtype, 0, 0);
   shframedepth++;  /*970902*/
   code(Pcode);
   codelist->u.node = listnodes( e, 0, 0 );
   codelist->par = erster;
}

/* lgrp_if(): erzeuge Code zum Verlassen angelegter Gruppenrahmen bei if-Anw. */
void lgrp_if()
{
   Tree e = tree(LGRPIF+V, voidtype, 0, 0);
   shframedepth--;  /*970902*/
   code(Pcode);
   codelist->u.node = listnodes( e, 0, 0 );
}

/* synchronize(): erzeuge Code zum Synchronisieren der Blattgruppen */
void synchronize() 
{
   Tree e = tree(SYNC+V, voidtype, 0, 0);
   code(Pcode);
   codelist->u.node = listnodes( e, 0, 0 );
}

/* engrp(): Gruppen betreten bei Fork-Anweisung */
void engrp()
{
   Tree e = tree(ENGRP+V, voidtype, 0, 0);
   shframedepth ++;  /*970902*/
   code(Pcode);
   codelist->u.node = listnodes( e, 0, 0 );
}

/* exgrp(): Gruppen verlassen nach Fork-Anweisung */
void exgrp()
{
   Tree e = tree(EXGRP+V, voidtype, 0, 0);
   shframedepth --;
   code(Pcode);
   codelist->u.node = listnodes( e, 0, 0 );
}

/* xproc(): Prozessoren verlassen nach start-Anweisung */
void xproc()
{
   Tree e = tree(XPROC+V, voidtype, 0, 0);
   code(Pcode);
   codelist->u.node = listnodes( e, 0, 0 );
}

/* enlgrp(): Gruppenerzeugung fuer Schleifen */
void enlgrp( looptyp )
{
   Tree e = tree(ENLGRP+V, voidtype, 0, 0);
   shframedepth ++;
   code(Pcode);
   codelist->u.node = listnodes( e, 0, 0 );
   codelist->par = looptyp;    /*950504*/
}

/* exlgrp(): Gruppen verlassen nach Schleife */
void exlgrp()
{
   Tree e = tree(EXLGRP+V, voidtype, 0, 0);
   shframedepth --;
   code(Pcode);
   codelist->u.node = listnodes( e, 0, 0 );
}

/* size(): Bestimmung der Gruppengroesse vor Schleifeneintritt */
void size()
{
   Tree e = tree(SIZE+V, voidtype, 0, 0);
   code(Pcode);
   codelist->u.node = listnodes( e, 0, 0 );
}

/* ibp( k ):  IBP Nr. k erzeugen */
void ibp( k )
   int k;
{
   Tree e = tree(IBP+V, voidtype, 0, 0);
   code(Pcode);
   codelist->u.node = listnodes( e, 0, 0 );
   codelist->par = k;
}

/* flushframes( k ):  flush k group frames (private and shared) */
void flushframes( k )
   int k;
{
   Tree e;
   assert( k>=0 );
   if (!k) return;
   e = tree(FLUSHFRAMES+V, voidtype, 0, 0);
   code(Pcode);
   codelist->u.node = listnodes( e, 0, 0 );
   codelist->par = k;
   /*shframedepth -= k;   /*970902*/
   /*muss weg, weil die shframedepth eine
     statische und keine dynamische Groesse ist! 980129 */
   shframedepth --;
   /*weil der Compiler zu schlau ist
     und das naechste lgrpif o.dgl. wegoptimiert*/
}


/* retflush():  flush some shared group frames, care about starts CWK */
void retflush( p )  Node p; {
 int i;
 assert( p->rframes >= 0); assert( p->started >=0 );
 /*shframedepth -= p->rframes;   /*970902*/
   /*muss weg, weil die shframedepth eine
     statische und keine dynamische Groesse ist! 980129 */
 shframedepth --;
   /*weil der Compiler zu schlau ist
     und das naechste lgrpif o.dgl. wegoptimiert*/
 if (p->rframes > 0)
    force_modulo_0;
 for (i = p->rframes; i > 0; i--) {
    print("getlo\t -1,r31   \t /*RETFLUSH(%d)*/\n", i);
    print("mpaddn\t gps,1,r31\t /*RETFLUSH(%d)*/\n", i);
    print("ldg\t gps,0,gps\t /*RETFLUSH(%d)*/\n", i);
    print("mov\t gps,sps\t /*RETFLUSH(%d)*/\n", i);   /*970904*/
 }
 if (Tflag && p->rframes) {   /*990119*/
    countload( p->rframes);
    countmpadd( p->rframes);
 }
 if (p->started) {     /*issue an xproc(): */
         /*Die Gruppenrahmen vor der start-Anweisung werden wiederhergestellt.*/
         if (!p->rframes)  force_modulo_1;
         print("getlo\t -1,r31   \t /*retxproc: aus startGruppe*/\n");
         print("mpadd\t gps,1,r31\t /*retxproc: austragen*/\n");
         /*print("bsrg\t spp,forklib_sync\n"); weg nach Absprache mit HS*/
         print("ldg\t gpp,-3,fps\t /*retxproc: restore fps*/\n"); /*1*980129*/
         print("ldg\t gps,0,gps\t /*retxproc: restore gps*/\n");       /*0*/
         print("mov\t gps,sps\t /*retxproc: restore sps*/\n");
         print("mov\t gpp,spp\t /*retxproc: prepare spp*/\n");
         print("ldg\t gpp,0,gpp\t /*retxproc: restore gpp*/\n");       /*0*/
#if 0
CWK 960812
         print("gethi\t ___PROC_NR__,par2 /*xproc: $orig.*/\n");
#endif
         print("popg\t spp,sps\t /*xproc: restore sps*/\n");       /*0*/
#if 0
CWK 960812
         print("add\t par2,___PROC_NR__&0x1fff,par2 /*xproc*/\n");       /*1*/
#endif
         print("popg\t spp,eps\t /*xproc: restore eps*/\n");       /*0*/
         /*Alle Prozessoren ausser 0 setzen das SHADOW-Bit:*/
#if 0
    970904 Wieso?? Damit kann ich mir den Prozedurframe zerhacken!
         print("add\t gpp,3,spp\t /*xproc: restore spp*/\n");
#endif
#if 0
CWK 960812
         print("ldg\t par2,0,par2\t /*xproc: $orig.*/\n");           /*0*/
         print("nop\t            \t /*delay slot*/\n");
         print("add\t par2,0,par2\t /*xproc: cmp w.0*/\n");
         print("beq\t 5          \t /*xproc*/\n");
         print("getsr\t Ret     \t /*xproc*/\n");
         print("or\t Ret,SHADOW,Ret\t /*xproc*/\n");
         print("putsr\t Ret,Ret \t /*xproc*/\n");
         print("bra\t 5          \t /*xproc*/\n");
         print("nop\t            \t /*xproc:stay sync*/\n");
         print("nop\t            \t /*xproc:stay sync*/\n");
         print("nop\t            \t /*xproc:stay sync*/\n");
         print("nop\t            \t /*xproc:stay sync*/\n");
#endif
         if (Tflag) {   /*990119*/
            countload( 1 );
            countmpadd( 1 );
         }
 }
}


/* begseq(): enter seq body, mask out all but one processor 970310 */
void begseq( int labelnr ) 
{
   Tree e = tree(BEGSEQ+V, voidtype, 0, 0);
   code(Pcode);
   codelist->u.node = listnodes( e, 0, 0 );
   codelist->par = labelnr;
}

/* endseq(): exit seq body: label, barrier   970310 */
void endseq( int labelnr ) 
{
   Tree e = tree(ENDSEQ+V, voidtype, 0, 0);
   code(Pcode);
   codelist->u.node = listnodes( e, 0, 0 );
   codelist->par = labelnr;
}


/* ================= STAB - Funktionen    CWK 940921 ================== */
/* adaptiert aus lcc 3.1, files sparc.c und symbolic.c */

#include "stab.h"
static char *currentfile;       /* current file name */
static int ntypes;

static void asgncode (Type, int);
static void dbxout (Type);
static int dbxtype (Type);
static int emittype (Type, int, int);

/* asgncode - assign type code to ty */
static void asgncode(ty, lev) Type ty; {
	if (ty->x.marked || ty->x.typeno)
		return;
	ty->x.marked = 1;
	switch (ty->op) {
	case VOLATILE: case CONST: case VOLATILE+CONST:
		asgncode(ty->type, lev);
		ty->x.typeno = ty->type->x.typeno;
		break;
	case POINTER: case FUNCTION: case ARRAY:
		asgncode(ty->type, lev + 1);
		/* fall thru */
	case VOID: case CHAR: case SHORT: case INT: case UNSIGNED:
	case FLOAT: case DOUBLE:
		break;
	case STRUCT: case UNION: {
		Field p;
		for (p = fieldlist(ty); p; p = p->link)
			asgncode(p->type, lev + 1);
		/* fall thru */
	case ENUM:
		if (ty->x.typeno == 0)
			ty->x.typeno = ++ntypes;
		if (lev > 0 && (*ty->u.sym->name < '0' || *ty->u.sym->name > '9'))
			dbxout(ty);
		break;
		}
	default:
		assert(0);
	}
}

/* dbxout - output .stabs entry for type ty */
static void dbxout(ty) Type ty; {
	ty = unqual(ty);
	if (!ty->x.printed) {
		int col = 0;
		print(".stabs \""), col += 8;
		if (ty->u.sym && !(isfunc(ty) || isarray(ty) || isptr(ty)))
			print("%s", ty->u.sym->name), col += strlen(ty->u.sym->name);
		print(":%c", isstruct(ty) || isenum(ty) ? 'T' : 't'), col += 2;
		emittype(ty, 0, col);
		print("\",%d,0,0,0\n", N_LSYM);
	}
}

/* dbxtype - emit a stabs entry for type ty, return type code */
static int dbxtype(ty) Type ty; {
	asgncode(ty, 0);
	dbxout(ty);
	return ty->x.typeno;
}

/*
 * emittype - emit ty's type number, emitting its definition if necessary.
 * Returns the output column number after emission; col is the approximate
 * output column before emission and is used to emit continuation lines for long
 * struct, union, and enum types. Continuations are not emitted for other types,
 * even if the definition is long. lev is the depth of calls to emittype.
 */
static int emittype(ty, lev, col) Type ty; {
	int tc = ty->x.typeno;

	if (isconst(ty) || isvolatile(ty)) {
		col = emittype(ty->type, lev, col);
		ty->x.typeno = ty->type->x.typeno;
		ty->x.printed = 1;
		return col;
	}
	if (tc == 0) {
		ty->x.typeno = tc = ++ntypes;
/*              fprint(2,"`%t'=%d\n", ty, tc); */
	}
	print("%d", tc), col += 3;
	if (ty->x.printed)
		return col;
	ty->x.printed = 1;
	switch (ty->op) {
	case VOID:	/* void is defined as itself */
		print("=%d", tc), col += 1+3;
		break;
	case CHAR:	/* unsigned char is a subrange of int */
		if (ty == unsignedchar)
			print("=r1;0;255;"), col += 10;
		else	/* following pcc, char is a subrange of itself */
			print("=r%d;-128;127;", tc), col += 2+3+10;
		break;
	case SHORT:	/* short is a subrange of int */
		if (ty == unsignedshort)
			print("=r1;0;65535;"), col += 12;
		else	/* signed */
			print("=r1;-32768;32767;"), col += 17;
		break;
	case INT:	/* int is a subrange of itself */
		print("=r1;%d;%d;", INT_MIN, INT_MAX), col += 4+11+1+10+1;
		break;
	case UNSIGNED:	/* unsigned is a subrange of int */
		print("=r1;0;-1;"), col += 9;
		break;
	case FLOAT: case DOUBLE:	/* float, double get sizes instead of ranges */
		print("=r1;%d;0;", ty->size), col += 4+1+3;
		break;
	case POINTER:
		print("=*"), col += 2;
		col = emittype(ty->type, lev + 1, col);
		break;
	case FUNCTION:
		print("=f"), col += 2;
		col = emittype(ty->type, lev + 1, col);
		break;
	case ARRAY:	/* array includes subscript as an int range */
		if (ty->size && ty->type->size)
			print("=ar1;0;%d;", ty->size/ty->type->size - 1), col += 7+3+1;
		else
			print("=ar1;0;-1;"), col += 10;
		col = emittype(ty->type, lev + 1, col);
		break;
	case STRUCT: case UNION: {
		Field p;
		if (!ty->u.sym->defined) {
			print("=x%c%s:", ty->op == STRUCT ? 's' : 'u', ty->u.sym->name);
			col += 2+1+strlen(ty->u.sym->name)+1;
			break;
		}
		if (lev > 0 && (*ty->u.sym->name < '0' || *ty->u.sym->name > '9')) {
			ty->x.printed = 0;
			break;
		}
		print("=%c%d", ty->op == STRUCT ? 's' : 'u', ty->size), col += 1+1+3;
		for (p = fieldlist(ty); p; p = p->link) {
			if (p->name)
				print("%s:", p->name), col += strlen(p->name)+1;
			else
				print(":"), col += 1;
			col = emittype(p->type, lev + 1, col);
			/*if (p->lsb)
				print(",%d,%d;", 8*p->offset +
					(IR->little_endian ? fieldright(p) : fieldleft(p)),
					fieldsize(p));
			else CWK*/
				print(",%d,%d;", 8*p->offset, 8*p->type->size);
			col += 1+3+1+3+1;	/* accounts for ,%d,%d; */
			if (col >= 80 && p->link) {
				print("\\\\\",%d,0,0,0\n.stabs \"", N_LSYM);
				col = 8;
			}
		}
		print(";"), col += 1;
		break;
		}
	case ENUM: {
		Symbol *p;
		if (lev > 0 && (*ty->u.sym->name < '0' || *ty->u.sym->name > '9')) {
			ty->x.printed = 0;
			break;
		}
		print("=e"), col += 2;
		for (p = ty->u.sym->u.idlist; *p; p++) {
			print("%s:%d,", (*p)->name, (*p)->u.value), col += strlen((*p)->name)+3;
			if (col >= 80 && p[1]) {
				print("\\\\\",%d,0,0,0\n.stabs \"", N_LSYM);
				col = 8;
			}
		}
		print(";"), col += 1;
		break;
		}
	default:
		assert(0);
	}
	return col;
}

/* stabblock - output a stab entry for '{' or '}' at level lev */
void stabblock(brace, lev, p)  Symbol *p; {
	if (brace == '{')
		while (*p)
			stabsym(*p++);
	print(".stabd 0x%x,0,%d\n", brace == '{' ? N_LBRAC : N_RBRAC, lev);
}

/* stabfend - end of function p */
void stabfend(p, line) Symbol p; {}

/* stabinit - initialize stab output */
void stabinit(file, argc, argv) char *file, *argv[]; {
	typedef void (*Closure)(Symbol, void *);

	if (file && *file) {
                extern char *MYPATH;  /*main.c  CWK*/
                print(".stabs \"%s\",0x%x,0,0,Ltext\n", MYPATH, N_SO); /*CWK*/
                print(".stabs \"%s\",0x%x,0,0,Ltext\n", file, N_SO);
		/*(*IR->segment)(CODE);*/ segment( CODE ); /*CWK*/
		print("Ltext:");
		currentfile = file;
	}
	dbxtype(inttype);
	dbxtype(chartype);
	dbxtype(doubletype);
	dbxtype(floattype);
	dbxtype(longdouble);
	dbxtype(longtype);
	dbxtype(shorttype);
	dbxtype(signedchar);
	dbxtype(unsignedchar);
	dbxtype(unsignedlong);
	dbxtype(unsignedshort);
	dbxtype(unsignedtype);
	dbxtype(voidtype);
	foreach(types, GLOBAL, (Closure)stabtype, (void *)0 /*CWK, statt NULL*/);
}

/* stabline - emit stab entry for source coordinate *cp */
void stabline(cp) Coordinate *cp; {
	if (cp->file && cp->file != currentfile) {
		int lab = genlabel(1);
		print("L%d: .stabs \"%s\",0x%x,0,0,L%d\n", lab,
				cp->file, N_SOL, lab);
		currentfile = cp->file;
	}
	print(".stabd 0x%x,0,%d\n", N_SLINE, cp->y);
}

/* stabsym - output a stab entry for symbol p */
void stabsym(p) Symbol p; {
	int code, tc, sz = p->type->size;

	if (p->generated || p->computed)
		return;
	if (isfunc(p->type)) {
		print(".stabs \"%s:%c%d\",%d,0,0,%s\n", p->name,
			p->sclass == STATIC ? 'f' : 'F', dbxtype(freturn(p->type)),
			N_FUN, p->x.name);
		return;
	}
        /*CWK auskommentiert:
	if (!IR->wants_argb && p->scope == PARAM && p->structarg) {
		assert(isptr(p->type) && isstruct(p->type->type));
		tc = dbxtype(p->type->type);
		sz = p->type->type->size;
	} else
        weil argb vorlaeufig uninteressant */
		tc = dbxtype(p->type);
	if (p->sclass == AUTO && p->scope == GLOBAL || p->sclass == EXTERN) {
		print(".stabs \"%s:G", p->name);
		code = N_GSYM;
	} else if (p->sclass == STATIC) {
		print(".stabs \"%s:%c%d\",%d,0,0,%s\n", p->name, p->scope == GLOBAL ? 'S' : 'V',
			tc, p->u.seg == BSS ? N_LCSYM : N_STSYM, p->x.name);
		return;
	} else if (p->sclass == REGISTER) {
		if (p->scope > PARAM) {
			int r = p->reg /* statt p->x.regnode->number CWK */;
			/*if (p->x.regnode->set == FREG)
				r += 32;	* floating point */
			print(".stabs \"%s:r%d\",%d,0,", p->name, tc, N_RSYM);
			print("%d,%d\n", sz, r);
		}
		return;
	} else if (p->scope == PARAM) {
		print(".stabs \"%s:p", p->name);
		code = N_PSYM;
	} else if (p->scope >= LOCAL) {
		print(".stabs \"%s:", p->name);
		code = N_LSYM;
	} else
		assert(0);
	print("%d\",%d,0,0,%s\n", tc, code,
		p->scope >= PARAM && p->sclass != EXTERN ? p->x.name : "0");
}

/* stabtype - output a stab entry for type *p */
void stabtype(p) Symbol p; {
	if (p->type) {
		if (p->sclass == 0)
			dbxtype(p->type);
		else if (p->sclass == TYPEDEF)
			print(".stabs \"%s:t%d\",%d,0,0,0\n", p->name, dbxtype(p->type), N_LSYM);
	}
}

/* symname - print prefix, p's name, declaration source coordinate, suffix */
static void symname(p) Symbol p; {
        if (p)
                print("%s@%w.%d", p->name, &p->src, p->src.x);
        else
                print("0");
}

/* stabend - finalize stab output */
void stabend(cp, p, cpp, sp, stab) Coordinate *cp, **cpp; Symbol p, *sp, *stab; {
   } /*CWK: vorlaeufig auskommentiert
        int i;

        symname(p);
        print("\n");
        for (i = 0; cpp[i] && sp[i]; i++) {
                print("%w.%d: ", cpp[i], cpp[i]->x);
                symname(sp[i]);
                print("\n");
        }
} */
