/****************************************************************************
*                                                                           *
*  Author      : Resi Hoever-Klier, GMD, SCAI.LAB                           *
*  Copyright   : GMD St. Augustin, Germany                                  *
*  Date        : Dec 97                                                     *
*  Last Update : Jan 98                                                     *
*                                                                           *
*  This Module is part of the DALIB                                         *
*                                                                           *
*  Module      : timestat.m4                                                *
*                                                                           *
*  Function: Generating timing statistics for all routines called.          *
*                                                                           *
*            Timing statistics will be collected, if the executable is      *
*            called with -time.                                             *
*            Info on brutto time usage will be collected until end of       *
*            program run, whereas info on netto time consumption can be     *
*            switched off and on by calling dalib_timestat_off() and        *
*            dalib_timestat_on() respectively in the FORTRAN program.       *
*            Suggestion at program start: dalib_timestat_on().              *
*                                                                           *
*                                                                           *
*  Export :  internal Interface                                             *
*  ============================                                             *
*                                                                           *
*   void dalib_init_timestat (void)                                         *
*                                                                           *
*      - called if executable was called with -time or with -timestat       *
*      - enables collection of timing statistics info                       *
*                                                                           *
*   void FUNCTION dalib_timestat_on (void)                                  *
*                                                                           *
*      - called in FORTRAN program to restart collection of netto           *
*        timing statistics                                                  *
*                                                                           *
*   void dalib_start_timestat (current_routine_name,len)                    *
*                                                                           *
*      - called at start of every (sub)routine after dalib_timestat_on until*
*        dalib_stop_timestat or end of program run to start the timers for  *
*        the routine: the netto timer if dalib_timestat_on, the brutto timer*
*        anyway.                                                            *
*                                                                           *
*   void dalib_stop_timestat (caller_routine_name,len)                      *
*                                                                           *
*      - called at end of every (sub)routine. Stops the brutto timer for    *
*        the routine and the netto-timer, if dalib_timestat_on.             *
*                                                                           *
*   void FUNCTION dalib_timestat_off (void)                                 *
*                                                                           *
*      - called in FORTRAN program to stop collection of netto              *
*        timing statistics                                                  *
*                                                                           *
*   void dalib_collect_timestat (void)                                      *
*                                                                           *
*      - called at end of program run if executable was called with -time   *
*        or with -timestat                                                  *
*      - collects and prints out timing statistics info                     *
*                                                                           *
*                                                                           *
*   Related dalib-modules:                                                  *
*                                                                           *
*   dalib.h   : definition of pcb.time_flag and time_statistics_collect     *
*   & dalib.m4: if pcb.time_flag:                                           *
*               dalib_start_subroutine: calls dalib_start_timestat          *
*               dalib_stop_subroutine:  calls dalib_stop_timestat           *
*                                                                           *
*   arguments.m4: sets pcb.time_flag if executable called with -time        *
*                                                                           *
*   initall.m4: dalib_init: call of dalib_init_timestat if pcb.time_flag    *
*   & init1.m4:               unset of time_statistics_collect              *
*               dalib_exit: call of dalib_collect_timestat                  *
*                                                                           *
****************************************************************************/

#include <stdio.h>
#include <string.h>
#include "dalib.h"

#undef  DEBUG1
#undef DEBUG


#define MAX(A,B)  ((A) > (B) ? (A) : (B))
#define MIN(A,B)  ((A) < (B) ? (A) : (B))

static char timefilename[100];          /* file for timing info */
static FILE *timefile;

#ifdef DEBUG1
static FILE *testfile;         	/* file for testing info (1 for each proc) */
#endif

#define NUM_OF_ROUTINES 256
#define AVER_ROUTINE_NAME_LEN 20
#define ROUTINE_INCREMENT 64
/*
#define NUM_OF_ROUTINES 2
#define AVER_ROUTINE_NAME_LEN 10
#define ROUTINE_INCREMENT 2
*/

 
typedef struct
   { int offset;                /* of rout.name in array my_names */
     int len;                   /* of routine name */
     int brutto;                /* brutto time used in routine */
     int netto;                 /* netto time used in routine */
     int brutto_time_start;     /* timestamp */
     int netto_time_start;      /* timestamp */
     int lower;                 /* index of lexically lower rout. name */
     int higher;                /* index of lexically higher rout. name */
   } timing_info;
 
 
typedef struct
   { int offset;                /* of rout.name in array sorted_names */
     int len;                   /* of routine name */
     int brutto;                /* in routine */
     int netto;                 /* in routine */
   } timing_sorted;
 
 
timing_info *my_times;          /* to hold info for each process */
int my_times_size;              /* space allocated for my_times */

static int num_of_routines;     /* num of already called routines */

static char *my_names;          /* names of called routines in the 
                                   order of their calling           */
static int my_names_size;       /* space allocated for my_names     */
static int my_names_len;        /* my_names is filled up to         */

int actual_routine;             /* index of routine to be timed */
 
int results[2];			/* to send to p1: results[0]: num_of_routines*/
				/*                results[1]: my_names_len */
timing_sorted *my_sresults;     /* to send to proc. 1 */
int my_sresults_size;           /* space allocated for my_sresults */
int res_ind;                    /* result-index */
char *my_sorted_names;          /* to send to proc. 1 */
                                /* space for my_sorted_names: my_names_len */
int name_ind;                   /* offset in my_sorted_names */
 
timing_sorted *sresults_all;    /* to collect info of all processes */
int sresults_all_size;
timing_sorted **sresults;       /* to reach each processes' info */
int sresults_size;
char *all_sorted_names;         /* to collect names of all processes*/
int all_sorted_names_size;
char **all_names;               /* to reach each processes' names*/
int all_names_size;
 
int *all_results;       /*for p.1 to know the sizes of the results of each p:
                all_results[2*proc+0]: num of called routines of proc
                all_results[2*proc+1]: size of names of called rout of proc */
int all_results_size;

int *act_index;         /* holds the index of the actual rout of each proc */
int act_index_size;
 

/*******************************************************************
*                                                                  *
*  dalib_init_timestat ()                                          *
*                                                                  *
*  for each process: initializes timing:                           *
*                                                                  *
*  Creates initial memory for timing info: for tree of timing_info *
*  and for place for the names of the called routines.             *
*  Initial suggestion:                                             *
*  NUM_OF_ROUTINES routines of length: AVER_ROUTINE_NAME_LEN       *
*                                                                  *
*  Initializes counter of already called routines.                 * 
*                                                                  *
*  Opens timing_statistics file timefile ("<executable>.timestat") *
*                                                                  *
*******************************************************************/

void dalib_init_timestat ()

{ int j,k,m;

#ifdef DEBUG1
  char nid[6] = "";
  char testfilename[100] = "testfile";

  printf("%d: dalib_init_timestat:\n",pcb.i);
 
  sprintf (nid,"%d",pcb.i);
  strcat (testfilename,nid);

  if ((testfile = fopen(testfilename,"w")) == NULL)
     dalib_internal_error("Unable to open testfile for writing");
 
     printf("opened testfile %s\n",testfilename);

#endif

  if (pcb.i == 1)
    {
     strcpy (timefilename, dalib_program_name);
     strcat (timefilename,".timestat");

     if ((timefile = fopen(timefilename,"w")) == NULL)
        dalib_internal_error("Unable to open timingfile for writing");

#ifdef DEBUG1
     printf("opened timingfile %s\n",timefilename);
#endif

    }

  my_times_size = NUM_OF_ROUTINES * sizeof(timing_info);
  my_times = (timing_info *) dalib_malloc (my_times_size,"dalib_init_timestat");

  my_names_size	= NUM_OF_ROUTINES * AVER_ROUTINE_NAME_LEN * sizeof(char); 
  my_names	= (char *) dalib_malloc (my_names_size,"init_timestat"); 

  my_names_len = 0;   
  num_of_routines = 0;

/*  my_sresults, my_sorted_names and the arrays for collection
    of results for proc 1
    will be allocated in dalib_collect_timestat when we know,
    how much space we will need for them */

  actual_routine = -1;

  time_statistics_collect = 1;   /* collect timing info when set */
                                 /*set by dalib_timestat_on in fortran pu*/
                                 /*unset by dalib_timestat_on in fortran pu*/

  return;
     
} /*dalib_init_timestat */


/***********************************************************************
***********************************************************************/


/*************************************************************************
*   void FUNCTION dalib_timestat_on (void)                               *
*                                                                        *
*   called in FORTRAN program to restart collection of timing statistics *
*   (never called before dalib_start_timestat if pcb.time_flag set)      *
*                                                                        *
*************************************************************************/
void FUNCTION(dalib_timestat_on) ()

{
 int time;

#ifdef DEBUG1
  printf("%d: dalib_timestat_on:\n",pcb.i);
#endif

  if (pcb.time_flag == 0)     return;   /* executable not called with -time */

  if (time_statistics_collect)   return;   /* already set, ignore */
   
  time_statistics_collect = 1;             /* set */

  time = dalib_timestamp();

/* my_times[actual_routine].brutto_time_start = time; */ 
                                          /* brutto timer should run either */
  my_times[actual_routine].netto_time_start = time;

  return;

} /* dalib_timestat_on */

 
/*******************************************************************
*                                                                  *
* ROUTINES FOR dalib_start_timestat ()                             *
*                                                                  *
*******************************************************************/


/*******************************************************************
*                                                                  *
*  void addtree(int: testind)                                      *
*                                                                  *
*  - find a lexical neighbour to my_times[num_of_routines].name    *
*                                                                  *
*******************************************************************/
static void addtree(testind)
int testind;
 
{
 int i,j;

/* 
#ifdef DEBUG1
 fprintf(testfile,"%d: addtree (testind:%d):\n",pcb.i,testind);
#endif
*/

  if (num_of_routines == 0 && testind == 0) return; /*found: 1st routine*/


  i = strcmp(my_names + my_times[num_of_routines].offset, my_names + my_times[testind].offset);

  if (i < 0)
     if (my_times[testind].lower == -1)   /*found*/
       { my_times[testind].lower = num_of_routines;
 	 return;
       }
      else
       { addtree(my_times[testind].lower);
	 return;
       }

  if (i > 0)
     if (my_times[testind].higher == -1)   /*found*/
       { my_times[testind].higher = num_of_routines;
         return;
       }
      else
       { addtree(my_times[testind].higher);
         return;
       }
 
/* (i == 0):  may only occur at first name (testind = num_of_routines = 0) */


  printf("%d: addtree confused: my_times[%d].name (%s) == my_times[%d].name (%s) ... \n"
         ,pcb.i,num_of_routines,my_names+my_times[num_of_routines].offset,testind,my_names+my_times[testind].offset);

  dalib_internal_error("addtree (timing)");
  dalib_stop();


}/* addtree*/

 
/*******************************************************************
*                                                                  *
*  void handle_new_routine_name(subr_name,len)                     *
*                                                                  *
*  - stores the new routine name in array my_routine_names         *
*    and actualizes pointers                                       *
*                                                                  *
*******************************************************************/
static void handle_new_routine_name(subr_name,len)
char *subr_name;
int len;
 
{
 int i,j;
 int size;
 
#ifdef DEBUG1
 fprintf(testfile,"%d: handle_new_routine_name (nr %d):  %s:\n",pcb.i,num_of_routines,subr_name);
#endif


  /* get enough space for my_times if not sufficient */

  if (num_of_routines >= NUM_OF_ROUTINES)  /*num_of_routines:index*/
    
    { timing_info* new_times;

      /* increase size of my_times */

      size = my_times_size + sizeof (timing_info) * ROUTINE_INCREMENT;


#ifdef DEBUG1

 fprintf(testfile,"%d: increasing my_times; old size: %d; new size: %d\n",pcb.i,my_times_size,size);

 for (i = 0; i < num_of_routines; i++)
    fprintf(testfile,"%d: my_times[%d]: off: %d; len: %d; br: %d; ne: %d; bs: %d; ns: %d; lo: %d; hi: %d; Routine[%d]: %s\n"
                     ,pcb.i,i,my_times[i].offset,my_times[i].len,my_times[i].brutto,my_times[i].netto,my_times[i].brutto_time_start,my_times[i].netto_time_start,my_times[i].lower,my_times[i].higher
                     ,i,my_names+my_times[i].offset);

#endif

      new_times = (timing_info *) dalib_malloc(size,"handle_new_routine_name:new_times");

#ifdef DEBUG1
 fprintf(testfile,"%d: my_times increased\n",pcb.i);
#endif


      /* memcopy by hand ... */

      for (i = 0; i < num_of_routines; i++)
          new_times[i] = my_times[i];

      dalib_free (my_times,my_times_size);

      my_times = new_times;
      my_times_size = size;

#ifdef DEBUG1

 fprintf(testfile,"%d: completed copy of my_times:\n",pcb.i);

 for (i = 0; i < num_of_routines; i++)
    fprintf(testfile,"%d: my_times[%d]: off: %d; len: %d; br: %d; ne: %d; bs: %d; ns: %d; lo: %d; hi: %d; Routine[%d]: %s\n"
                     ,pcb.i,i,my_times[i].offset,my_times[i].len,my_times[i].brutto,my_times[i].netto,my_times[i].brutto_time_start,my_times[i].netto_time_start,my_times[i].lower,my_times[i].higher
                     ,i,my_names+my_times[i].offset);

#endif
 
    }


  /* get enough space for my_names if not sufficient */

  if (my_names_len + len +1 >= my_names_size)

    { char *new_names;

     /* increase size of my_names */

     size = my_names_size + ROUTINE_INCREMENT * AVER_ROUTINE_NAME_LEN * sizeof(char);

#ifdef DEBUG1

 fprintf(testfile,"%d: increasing my_names; old size %d filled up to %d; new size: %d. Contents:\n",pcb.i,my_names_size,my_names_len,size);

 for ( i = 0; i <= my_names_len; i++)
  fprintf(testfile,"%c",my_names[i]);
 fprintf (testfile,"\n");

 for (i = 0; i < num_of_routines; i++)
    fprintf(testfile,"%d: my_times[%d]: off: %d; len: %d; br: %d; ne: %d; bs: %d; ns: %d; lo: %d; hi: %d; Routine[%d]: %s\n"
                     ,pcb.i,i,my_times[i].offset,my_times[i].len,my_times[i].brutto,my_times[i].netto,my_times[i].brutto_time_start,my_times[i].netto_time_start,my_times[i].lower,my_times[i].higher
                     ,i,my_names+my_times[i].offset);

#endif

     new_names = (char *) dalib_malloc (size,"handle_new_routine_name:new_names");

#ifdef DEBUG1
 fprintf(testfile,"%d: my_names increased\n",pcb.i);
#endif


     dalib_memcopy (new_names, my_names,my_names_len);

     if (my_names != (char *) 0)
        dalib_free (my_names, my_names_size);
 
     my_names = new_names;
     my_names_size = size;

#ifdef DEBUG1

 fprintf(testfile,"%d: completed copy of my_names. Contents:\n",pcb.i);

 for ( i = 0; i < my_names_len; i++) fprintf(testfile,"%c",my_names[i]);
 fprintf (testfile,"\n");

 for (i = 0; i < num_of_routines; i++)
    fprintf(testfile,"%d: my_times[%d]: off: %d; len: %d; br: %d; ne: %d; bs: %d; ns: %d; lo: %d; hi: %d; Routine[%d]: %s\n"
                     ,pcb.i,i,my_times[i].offset,my_times[i].len,my_times[i].brutto,my_times[i].netto,my_times[i].brutto_time_start,my_times[i].netto_time_start,my_times[i].lower,my_times[i].higher
                     ,i,my_names+my_times[i].offset);

#endif
   }


  /* --- START OF REAL WORK --- */

  my_times[num_of_routines].offset = my_names_len; /* start of rout.name */

  /* now copy the name itself (we better don't use strcpy ...) */
  for (i = 0; i < len; i++)
      my_names[my_names_len++] = subr_name[i];
  my_names[my_names_len++] = '\0';		/* delimiter */

  my_times[num_of_routines].len    = len + 1;  /* \0 */
  my_times[num_of_routines].brutto = 0;
  my_times[num_of_routines].netto  = 0;

  my_times[num_of_routines].lower   = -1;
  my_times[num_of_routines].higher  = -1;
  
  addtree(0); 

/*
#ifdef DEBUG1

  fprintf(testfile,"%d: inited my_times for routine %d: %s\n",
               pcb.i,num_of_routines,my_names+my_times[num_of_routines].offset);

 for ( i = 0; i < my_names_len; i++) fprintf(testfile,"%c",my_names[i]);
 fprintf (testfile,"\n");

 for (i = 0; i < num_of_routines; i++)
    fprintf(testfile,"%d: my_times[%d]: off: %d; len: %d; br: %d; ne: %d; bs: %d; ns: %d; lo: %d; hi: %d; Routine[%d]: %s\n"
                     ,pcb.i,i,my_times[i].offset,my_times[i].len,my_times[i].brutto,my_times[i].netto,my_times[i].brutto_time_start,my_times[i].netto_time_start,my_times[i].lower,my_times[i].higher
                     ,i,my_names+my_times[i].offset);

#endif
*/ 
  
  num_of_routines++;

  return;

}/*handle_new_routine_name*/


/*******************************************************************
*                                                                  *
*  int get_index_of_routine(subr_name,len)                         *
*                                                                  *
*  - gets the index of subr_name in array my_times                 *
*                                                                  *
*******************************************************************/
static int get_index_of_routine(subr_name,len)
char *subr_name;
int len;
 
{
 int i,j;

/*   
#ifdef DEBUG1
  fprintf(testfile,"%d: get_index_of_routine %s:\n",pcb.i,subr_name);
#endif
*/

  for (i = 0; i < num_of_routines; i++)
     if (strcmp(subr_name,my_names+my_times[i].offset) == 0) return (i);

  handle_new_routine_name(subr_name,len);

  return (num_of_routines - 1);

} /*get_index_of_routine*/


/*******************************************************************
*                                                                  *
*  void dalib_start_timestat (subr_name,len)                       *
*                                                                  *
*  - called at every start of a routine                            *
*                                                                  *
*  - stops the netto timer of the calling routine (if not main)    *
*    and adds the consumed time to its used_netto_time             *
*  - starts the brutto and netto timer of the actual routine       *
*                                                                  *
*******************************************************************/
void dalib_start_timestat (subr_name,len) 
char *subr_name;
int len;
 
{
 int i,time;

/*
#ifdef DEBUG1
  fprintf(testfile,"%d: dalib_start_timestat of %s:\n",pcb.i,subr_name);
#endif
*/

  time = dalib_timestamp(); 

  if (actual_routine >= 0)        /* at 1st call actual_routine = -1 */
    {
      if (time_statistics_collect)
        {
         i = time - my_times[actual_routine].netto_time_start;
         my_times[actual_routine].netto += i;
        }
    }

  actual_routine = get_index_of_routine(subr_name,len);

  my_times[actual_routine].brutto_time_start = time;

  if (time_statistics_collect)
     my_times[actual_routine].netto_time_start  = time;

/*
#ifdef DEBUG1
  fprintf(testfile,"%d: started timer for routine %d (%s).\n",pcb.i,actual_routine,subr_name);
#endif
*/

  return;


} /* dalib_start_timestat */
 

/*******************************************************************
*                                                                  *
* ROUTINES FOR dalib_stop_timestat ()                              *
*                                                                  *
*******************************************************************/

 
/************************************************************************
*                                                                       *
*  void dalib_stop_timestat (caller_name,len)                           *
*                                                                       *
*  - called at every end of a routine                                   *
*                                                                       *
*  - stops the brutto and netto timer of the actual routine             *
*    and adds the used time to its brutto and netto                     *
*  - restores actual_routine_name (caller_name) and restarts the        *
*    corresponding netto time counter                                   *
*                                                                       *
************************************************************************/
void dalib_stop_timestat (caller_name,len)
char *caller_name;
int len;
 
{
 int i,time;
 
/*
#ifdef DEBUG1
  fprintf(testfile,"%d: dalib_stop_timestat for routine %d (caller: %s)\n",pcb.i,actual_routine,caller_name);
#endif
*/

  time = dalib_timestamp();

/* stop brutto timer of actual routine in any case
   stop netto and brutto timer of actual routine if time_statistics_collect */

  i = time - my_times[actual_routine].brutto_time_start;
  my_times[actual_routine].brutto += i;

  if (time_statistics_collect)
    {
     i = time - my_times[actual_routine].netto_time_start;
     my_times[actual_routine].netto += i;

/* 
#ifdef DEBUG1
     fprintf(testfile,"subr%d %s used brutto: %d, netto: %d\n",
         	actual_routine,my_names+my_times[actual_routine].offset,
           my_times[actual_routine].brutto,my_times[actual_routine].netto);
#endif
*/
    }


  /* reset actual_routine_name to caller_name and restart corresponding
     netto time counter */

  if (caller_name[0] == '\0')
     actual_routine = 0;
   else
     actual_routine = get_index_of_routine(caller_name,len);

/* restart netto timer of calling routine if time_statistics_collect */

  if (time_statistics_collect)
      my_times[actual_routine].netto_time_start = time;

  return;
  
 
} /* dalib_stop_timestat */


/***********************************************************************
***********************************************************************/


/***********************************************************************
*   void FUNCTION dalib_timestat_off (void)                            *
*                                                                      *
*   called in FORTRAN program to stop collection of timing statistics  *
*                                                                      *
***********************************************************************/
void FUNCTION(dalib_timestat_off) ()

{
 int i;
 int time;

#ifdef DEBUG1
  printf("%d: dalib_timestat_off:\n",pcb.i);
#endif

  if (pcb.time_flag == 0)   return;   /* executable called without -time */

  if (time_statistics_collect == 0)   return;   /* already unset, ignore */

  time_statistics_collect = 0;             /* unset */

  time = dalib_timestamp();

/* stop netto timer of actual routine (brutto timer should run either) */

/*  i = time - my_times[actual_routine].brutto_time_start;
  my_times[actual_routine].brutto += i; */

  i = time - my_times[actual_routine].netto_time_start;
  my_times[actual_routine].netto += i;


  return;

} /* dalib_timestat_off */

 
/*******************************************************************
*                                                                  *
* ROUTINES FOR dalib_collect_timestat()                            *
*                                                                  *
*******************************************************************/


/*********************************************************************
*                                                                    *
*  int   next_routine ()                                             *
*                                                                    *
*  - chooses the lexically smallest name of the actual routines      *
*    names of all processes as next_routine                          *
*  - returns 0 if there is no more name to be processed              *
*            the id of the 1rst proc with next_routine in its list   *
*                                                                    *
*********************************************************************/
static int next_routine()

{
 int i,j;
 int found;       /*index of first routine name in range*/
 char name[256];

#ifdef DEBUG1
 fprintf(testfile,"%d:next_routine:\n",pcb.i);
#endif

  found = 0;      /*no valid routine name*/

/*FIRST suggestion: next valid routine name in list of proc's */

  for (j = 1; j <= pcb.p; j++)  /*inspect lists of all processes*/
     {
      if (act_index[j] < all_results[2*j+0])      /*inspect range*/
        {

#ifdef DEBUG1
         fprintf(testfile,"%d: act_index[%d]: %d; all_results[2*%d+0]: %d\n",
                          pcb.i,j,act_index[j],j,all_results[2*j+0]);
         fprintf(testfile,"%d: sresults[%d][act_index[%d]].offset: %d\n",
                           pcb.i,j,j,sresults[j][act_index[j]].offset);
         fprintf(testfile,"%d: all_names[%d]+sresults[%d][act_index[%d]].offset: %s\n",
                  pcb.i,j,j,j,all_names[j]+sresults[j][act_index[j]].offset); 
#endif

          strcpy(name,all_names[j]+sresults[j][act_index[j]].offset);

#ifdef DEBUG1
          fprintf(testfile,"%d: name: %s; sresults[%d][act_index[%d]].offset: %d\n",
                            pcb.i,name,j,j,sresults[j][act_index[j]].offset); 
#endif

          found = j;
          break;
        }
      }

#ifdef DEBUG1
  fprintf (testfile,"%d: found = %d\n",pcb.i,found);
#endif

  if (found == 0) return (0);      /* no more routine name to be processed */

/* Find the lexically smallest name in the lists of the proc's */
  
  for (j = found+1; j <= pcb.p; j++) /*inspect lists of remaining processes*/

     { if (act_index[j] < all_results[2*j+0])

               /* no comparison behind valid names!      */
         
        { i = strcmp(all_names[j]+sresults[j][act_index[j]].offset,name);

          if (i < 0)

            { strcpy(name,all_names[j]+sresults[j][act_index[j]].offset);
              found = j;
            }

        } 

     } /*for*/

#ifdef DEBUG1
  fprintf (testfile,"%d: returning found = %d (%d): %s\n",
           	pcb.i,found,act_index[found],
                all_names[found]+sresults[found][act_index[found]].offset);
#endif
 

  return (found);


} /* next_routine */

/*******************************************************************
*                                                                  *
*  static int is_called(smallest_name,act_proc)                    *
*                                                                  *
*  - returns 1 if routine smallest_name was called by act_proc     *
*            0 else                                                *
*                                                                  *
*******************************************************************/
static int is_called(smallest_name,act_proc)
char *smallest_name;
int act_proc;

{
int i,j;

#ifdef DEBUG1
 fprintf(testfile,"%d:is_called(smallest_name:<%s>,act_proc:<%d>)\n",pcb.i,smallest_name,act_proc);
#endif


 /* if (no more uncounted routine of proc p) return (0) */

  if (act_index[act_proc] >= all_results[2*act_proc + 0]) return (0);


/*  if (routine_name != lexically smallest name) return (0);
       else   return (true)   */

  if ((strcmp(smallest_name,all_names[act_proc]+sresults[act_proc][act_index[act_proc]].offset)) !=0 ) return (0);

  return (1);

} /* is_called */


/*******************************************************************
*                                                                  *
*  static void  treesort (index)                                   *
*                                                                  *
*  - writes tree my_times sorted into array my_sresults            *
*  - writes namearray my_routine_names sorted to my_sorted_names   *
*                                                                  *
*******************************************************************/
static void treesort (index)
int index;
 
{
  int j,k,l;

/* 
#ifdef DEBUG1
  fprintf(testfile,"%d: treesort (index: %d,res_ind:%d,name_ind:%d)\n",pcb.i,index,res_ind,name_ind);
#endif
*/
 
  if (my_times[index].lower != -1)       /*first store all lefthand nodes*/
    treesort (my_times[index].lower);

/*
#ifdef DEBUG1
  fprintf(testfile,"%d: my_times[%d].name %s;(len:%d)\n",
                   pcb.i,index,my_names+my_times[index].offset,my_times[index].len);
#endif
*/
 
  strcpy (my_sorted_names+name_ind,my_names+my_times[index].offset);
  my_sresults[res_ind].offset = name_ind;
  my_sresults[res_ind].len    = my_times[index].len;
  my_sresults[res_ind].brutto = my_times[index].brutto;
  my_sresults[res_ind].netto  = my_times[index].netto;

  name_ind += my_times[index].len;

/*
#ifdef DEBUG1
  fprintf(testfile,"%d: stored no %d: name: %s (len: %d); brutto: %d; netto: %d\n",
     pcb.i,res_ind,my_sorted_names+my_sresults[res_ind].offset,
     my_sresults[res_ind].len,my_sresults[res_ind].brutto,
     my_sresults[res_ind].netto);
#endif
*/
 
  res_ind++;

  if (my_times[index].higher != -1)       /*after all righthand nodes*/
     treesort (my_times[index].higher);
 
} /*treesort */
 

/*******************************************************************
*                                                                  *
*  static void  dalib_sort_timings(void)                           *
*                                                                  *
*  - writes the times used for each routine called by this         *
*    process lexically sorted by routine names into my_sresults    *
*                                                                  *
*******************************************************************/
static void  dalib_sort_timings()
 
{
  int j,k,l;
  timing_info *ptr;
 
#ifdef DEBUG1
  fprintf(testfile,"%d: dalib_sort_timings:\n",pcb.i);
#endif

  name_ind = 0;  /*points to start of new name*/

  res_ind = 0;   /* number of already sorted routine entries */
 
  treesort (0);

/*
#ifdef DEBUG1
  fprintf(testfile,"%d: sorted routine names and times:\n",pcb.i);
    for (j = 0; j < num_of_routines; j++)
     fprintf(testfile,"%d: subr[%d]: %s; brutto: %d, netto: %d\n",
             pcb.i,j,my_sorted_names+my_sresults[j].offset,
             my_sresults[j].brutto,my_sresults[j].netto);
#endif
*/

} /*dalib_sort_timings */
 

/*******************************************************************
*                                                                  *
*  staitc void prepare_own_results ()                              *
*                                                                  *
*  allocates memory for results to send to p1                      *
*                                                                  *
*  for each proc to generate its alphabetically sorted results     *
*  to send to proc 1.                                              *
*                                                                  *
*******************************************************************/

static void prepare_own_results ()

{
  int i,j;

#ifdef DEBUG
  printf ("%d: prepare_own_results, no of routines = %d, names_len = %d:\n",
           pcb.i, num_of_routines, my_names_len);
#endif

  results[0] = num_of_routines;
  results[1] = my_names_len;

#ifdef DEBUG1
  fprintf(testfile,"%d: results to send: %d routines; %d nameslen\n",
                    pcb.i,results[0],results[1]);
#endif


/*get memory for my own results*/

  my_sresults_size = 1 * num_of_routines * sizeof(timing_sorted);
  my_sresults = (timing_sorted *) dalib_malloc(my_sresults_size,"dalib_sort_timings:my_sresults");

  my_sorted_names = (char *) dalib_malloc (my_names_len,"dalib_sort_timings:my_sorted_names");


#ifdef DEBUG1
  fprintf(testfile,"%d: GOT %d routines and their timings:\n",
                    pcb.i,results[0]);

  for (j = 0; j < results[0]; j++)
     fprintf(testfile,"%d: subr nr %d: %s (len:%d); brutto-time: %d, netto-time: %d\n"
                      ,pcb.i,j,my_names+my_times[j].offset,my_times[j].len
                      ,my_times[j].brutto,my_times[j].netto);
#endif

  dalib_sort_timings();

#ifdef DEBUG1
  fprintf(testfile,"%d: GOT %d sorted routines and their timings:\n",
                    pcb.i,results[0]);

  for (j = 0; j < results[0]; j++)
     fprintf(testfile,"%d: subr nr %d: %s (len:%d); brutto-time: %d, netto-time: %d\n"
        ,pcb.i,j,my_sorted_names+my_sresults[j].offset,my_sresults[j].len
                ,my_sresults[j].brutto,my_sresults[j].netto);
#endif

  return;

} /* prepare_own_results */


/*******************************************************************
*                                                                  *
*  void send_own_results ()                                        *
*                                                                  *
*  sends results to proc 1:                                        *
*  - sizes of results                                              *
*  - sorted results  of timing                                     *
*  - sorted names of called routines                               *
*                                                                  *
*******************************************************************/

static void send_own_results ()

{ int i,j;

#ifdef DEBUG
   printf ("%d: send_own_results: num of subs: %d, len of sorted_names: %d\n",
            pcb.i, results[0], results[1]);
#endif

     dalib_send (1, results, 2 * sizeof(int),1);

#ifdef DEBUG1
     fprintf(testfile,"%d: STARTING TO SEND MY_SRESULTS\n",pcb.i);
     for (j = 0; j < results[0]; j++)
      fprintf(testfile,"%d: routine: %s: len: %d, brutto: %d, netto: %d\n",
            pcb.i,my_sorted_names+my_sresults[j].offset,my_sresults[j].len,
            my_sresults[j].brutto,my_sresults[j].netto);
#endif

     dalib_send (1, my_sresults, results[0] * sizeof(timing_sorted), 1);

#ifdef DEBUG1
     fprintf(testfile,"%d: STARTING TO SEND MY_SORTED_NAMES\n",pcb.i);
     for (i = 0; i < results[1]; i++) printf("%c",my_sorted_names[i]);
     fprintf(testfile,"\n");
#endif

     dalib_send (1, my_sorted_names, results[1]* sizeof (char), 1);

#ifdef DEBUG1
     fprintf(testfile,"%d: send_own_results: SENDING COMPLETED.\n",pcb.i);
#endif

} /* send_own_results */

/*******************************************************************
*                                                                  *
*  collect_all_results_sizes()                                     *
*                                                                  *
*  - requests memory for sizes of all results                      *
*  - receives sizes of results of all procs                        *
*  - requests memory for results of all procs                      *
*                                                                  *
*******************************************************************/

static void collect_all_results_sizes()

{
  int i,j;
  int max_num_of_routines;
  int max_len_of_subr_names;


#ifdef DEBUG
  printf ("%d: collect_all_results_sizes:\n", pcb.i);
#endif

/* FIRST: Get some more memory for sizes of results of each proc */

   /* all_results[2*proc +0] = num_of_routines of proc proc;
      all_results[2*proc +1] = size of my_sorted_names of proc proc;  */

   all_results_size = 2 * (pcb.p + 1);
                         /* for better adressability we dont use index 0 */
   all_results = dalib_int_malloc (all_results_size, 
                                   "dalib_collect_timestat:all_results");

   /* act_index [proc] : running form 0 to num_of_routines of proc proc:
                         to reach each of proc's results */

   act_index_size = pcb.p + 1;
   act_index   = dalib_int_malloc (act_index_size, 
                                   "dalib_collect_timestat:act_index");

                         /* for better adressability we dont use index 0 */


#ifdef DEBUG1
     fprintf(testfile,"%d: collecting sizes of results of %d processes:\n",pcb.i,pcb.p);
#endif


/* sizes of PROC 1 */


     all_results[2 * 1 + 0] = results[0];
     all_results[2 * 1 + 1] = results[1];

                                        /* initializing for building of max */
     max_num_of_routines    = all_results[2 * 1 + 0];
     max_len_of_subr_names  = all_results[2 * 1 + 1];


/* sizes of PROC 2 to PROC pcb.p */


     for (j = 2; j <= pcb.p; j++)
       {

#ifdef DEBUG
         printf("%d: waiting for sizes of results of process no. %d\n",
                pcb.i, j);
#endif

         dalib_receive (j, &all_results[2*j], 2 * sizeof(int));

#ifdef DEBUG
      printf("%d: got sizes from proc %d: num_of_rout:%d, len_of_names: %d\n",
             pcb.i, j, all_results[2*j+0], all_results[2*j+1]);
#endif

         max_num_of_routines = MAX(max_num_of_routines,all_results[2*j]);
         max_len_of_subr_names  = MAX(max_len_of_subr_names,all_results[2*j+1]);
        }

#ifdef DEBUG
    printf ("%d: max_num_of_subr: %d; max_len_of_subr: %d\n",
            pcb.i, max_num_of_routines, max_len_of_subr_names);
#endif

/* GET MEMORY FOR RESULTS OF ALL PROCESSES */

/* results */

     sresults_all_size = (pcb.p) * max_num_of_routines * sizeof(timing_sorted);
     sresults_all = (timing_sorted *) dalib_malloc(sresults_all_size,"dalib_collect_timestat:sresults_all");


     sresults_size = sizeof(timing_sorted *) * (pcb.p + 1);
     sresults = (timing_sorted**) dalib_malloc (sresults_size,"dalib_collect_timestat:sresults");


     for (j = 1; j <= pcb.p; j++) sresults[j] = sresults_all + (j - 1) * max_num_of_routines;
                                                 /*for simple adressability*/

/* names */

     all_sorted_names_size = pcb.p * max_len_of_subr_names * sizeof(char);
     all_sorted_names = dalib_malloc(all_sorted_names_size,"dalib_collect_timestat:all_sorted_names");


     all_names_size = (pcb.p + 1) * sizeof (char *);
     all_names = (char **) dalib_malloc (all_names_size,"dalib_collect_timestat:all_names");


     for (j = 1; j <= pcb.p; j++) all_names[j] = all_sorted_names + (j - 1) * max_len_of_subr_names;
                                                /*for simple adressability*/


  return;

} /* collect_all_results_sizes */


/*******************************************************************
*                                                                  *
*  collect_all_results ()                                          *
*                                                                  *
*  - receives timing results of all procs                          * 
*  - receives sorted list of names of all procs                    *
*                                                                  *
*******************************************************************/
static void collect_all_results()

{
 int i,j;

#ifdef DEBUG1
 fprintf(testfile,"%d: collect_all_results:\n");
#endif


/* RESULTS OF PROC1 */

  for (i = 0; i < all_results[2*1+1]; i++)
     {
      all_names[1][i] = my_sorted_names[i];
     }


  for (i = 0; i < all_results[2*1+0]; i++)
    {
     sresults[1][i].offset = my_sresults[i].offset;
     sresults[1][i].len    = my_sresults[i].len;
     sresults[1][i].brutto = my_sresults[i].brutto;
     sresults[1][i].netto  = my_sresults[i].netto;

#ifdef DEBUG1
     fprintf(testfile,"%d: sresults[1][%d]: .offset: %d; .len: %d; .brutto: %d; .netto: %d (%s)\n",pcb.i,i,sresults[1][i].offset,sresults[1][i].len,sresults[1][i].brutto,sresults[1][i].netto,all_names[1]+sresults[1][i].offset);
#endif
      }


/* RESULTS OF PROC2 TO PROC pcb.p */

  for (j = 2; j <= pcb.p; j++)
     {

#ifdef DEBUG1
      fprintf(testfile,"%d:WAITING FOR RESULTS OF PROCESS no. %d.\n",pcb.i,j);
#endif

      dalib_receive (j, sresults[j], all_results[2*j+0] * sizeof(timing_sorted));

#ifdef DEBUG1
      fprintf(testfile,"%d:GOT RESULTS OF PROCESS no. %d.\n",pcb.i,j);
      for (i = 0; i < all_results[2*j+0]; i++)
      fprintf(testfile,"%d: (%d:) offset: %d; len: %d; brutto: %d; netto: %d\n",pcb.i,j,sresults[j][i].offset,sresults[j][i].len,sresults[j][i].brutto,sresults[j][i].netto);
#endif

      dalib_receive (j, all_names[j], all_results[2*j+1] * sizeof(char));

#ifdef DEBUG1
      fprintf(testfile,"%d:GOT ROUTINE_NAMES OF PROCESS no. %d.\n",pcb.i,j);
      for (i = 0; i < all_results[2*j+0]; i++)
      fprintf(testfile,"%d: (%d:) routine name[%s]: %s\n",pcb.i,j,all_names[j]+sresults[j][i].offset);
#endif

     }

#ifdef DEBUG1
  for (j = 1; j<= pcb.p; j++)
     {
      fprintf(testfile,"%d: all_results[2*%d+0]: %d, all_results[2*%d+1]: %d\n",pcb.i,j,all_results[2*j+0],j,all_results[2*j+1]);

      for (i = 0; i < all_results[2*j+0]; i++)
       fprintf(testfile,"%d: routine[%d][%d]: %s, sres[%d][%d].len: %d,.brutto: %d, .netto: %d\n",pcb.i,j,i,all_names[j]+sresults[j][i].offset,j,i,sresults[j][i].len,sresults[j][i].brutto,sresults[j][i].netto);
      }
#endif


  return;


} /* collect_all_results */


/*******************************************************************
*                                                                  *
*  void compute_timings()                                          *
*                                                                  *
*  - computes sum and average of timings for every called routine  *
*    and writes the results into file timefile                     *
*                                                                  *
*******************************************************************/
static void compute_timings()

{
  int i,j;

  char actual_subr[256];  /* name of routine to summarize times for */
  int  act_subr_ind;      /*num of the first proc having called actual_subr*/
  int bruttosum;                /* sum of brutto times for actual routine */
  int  nettosum;                /* sum of netto times for actual routine */
  double br,ne;
  double averbr,averne;

  int minbr,maxbr;        /* min rsp. max brutto time used for actual_subr */
  int minne,maxne;        /* min rsp. max netto time used for actual_subr */
  double minbrd,maxbrd;   /* double of minbr, maxbr */
  double minned,maxned;   /* double of minne, maxne */
  double maxdiffbr,maxdiffne;
  

  int numcallers;

  int timerate;
 

#ifdef DEBUG1
 fprintf(testfile,"%d: collect_all_results:\n");
#endif


  timerate= dalib_timerate();

  for (j = 1; j <= pcb.p; j++) act_index[j] = 0;
                /* act_index[j] counts from 0 to all_results[2 * j] */

  fprintf (timefile,
    "Timing statistics summary for program %s (%d procs) in seconds:\n\n",
    dalib_program_name,pcb.p);

  fprintf(timefile,"Name of Routine      called   Aver netto +-Max. diff.  Aver brutto +-Max. diff.\n"); /*header*/
  fprintf(timefile,"                     by #p.\n"); /*header*/

/* find next routine to print times for */

  while ((act_subr_ind = next_routine ()) != 0)

     { /* summarize times for that subr; actualize corresp. act_index's */

       strcpy (actual_subr,all_names[act_subr_ind]+sresults[act_subr_ind][act_index[act_subr_ind]].offset);

#ifdef DEBUG1
       fprintf(testfile,"got actual routine name: %s of list of proc %d\n",actual_subr,act_subr_ind);
#endif

       bruttosum = 0;
       nettosum  = 0;
       numcallers = 0;

       for (j = 1; j<= pcb.p; j++)
          {
           if ( is_called(actual_subr,j))
             {
              if (numcallers == 0)
                { minbr = maxbr = sresults[j][act_index[j]].brutto;
                  minne = maxne = sresults[j][act_index[j]].netto;
                }
               else
                { minbr = MIN(minbr,sresults[j][act_index[j]].brutto);
                  maxbr = MAX(maxbr,sresults[j][act_index[j]].brutto);
                  minne = MIN(minne,sresults[j][act_index[j]].netto);
                  maxne = MAX(maxne,sresults[j][act_index[j]].netto);
                }
              numcallers++;
              bruttosum += sresults[j][act_index[j]].brutto;
              nettosum  += sresults[j][act_index[j]].netto;
              act_index[j]++;
             }
           }

       br = (double) bruttosum / (double) timerate;
       ne = (double) nettosum / (double) timerate;
       averbr = br / (double) numcallers;
       averne = ne / (double) numcallers;

       minbrd = (double) minbr / (double) timerate;
       maxbrd = (double) maxbr / (double) timerate;
       minned = (double) minne / (double) timerate;
       maxned = (double) maxne / (double) timerate;

       maxdiffbr = MAX(averbr - minbrd, maxbrd - averbr);
       maxdiffne = MAX(averne - minned, maxned - averne);
 

       fprintf(timefile,"%-20s %4d   %12.3f +-%10.3f %12.3f +-%10.3f\n",actual_subr,numcallers,averne,maxdiffne,averbr,maxdiffbr);

      } /*while*/


  fprintf(timefile,"\n\nTiming statistics of the processes:\n");

  for ( j = 1; j <= pcb.p; j++)
     {
      fprintf(timefile,"\nProcess %d:\n",j);
      fprintf(timefile,"Name of Routine       Used time (netto)  Used time (brutto)\n"); /*header*/

       for (i = 0; i < all_results[2 * j + 0]; i++)
          {
           br = (double) sresults[j][i].brutto/ (double) timerate;
           ne = (double) sresults[j][i].netto/ (double) timerate;

           fprintf(timefile,"%-20s %14.3f sec  %14.3f sec\n", all_names[j]+sresults[j][i].offset,ne,br);

          }
      }

  return;


} /* compute_timings */


/********************************************************************
*                                                                   *
*  void dalib_collect_timestat()         collection of timing info  *
*                                           (called by dalib_exit)  *
*                                                                   *
*  each process:                                                    *
*  generates its alphabetically sorted results (array of chars)     *
*  containting routine names and corresponding array of int         *
*  containing the times (in ticks)) and transmits them to process1. *
*                                                                   *
*  process1:                                                        *
*  collects results of other processes, computes the timing values  *
*  for each routine.                                                *
*                                                                   *
********************************************************************/

void dalib_collect_timestat()

{

#ifdef DEBUG
  printf ("%d: dalib_collect_timestat:\n",pcb.i);
#endif

  prepare_own_results();
	
  if (pcb.i > 1)   send_own_results();

  if (pcb.i == 1)    /* collect */
    {
     collect_all_results_sizes();

     collect_all_results();

     compute_timings();

     fclose (timefile);

     printf("... timing statistics in file %s.\n",timefilename);

     dalib_free (sresults,sresults_size);
     dalib_free (sresults_all,sresults_all_size);
     dalib_free (all_names,all_names_size);
     dalib_free (all_sorted_names,all_sorted_names_size);
     dalib_int_free (all_results,all_results_size);
     dalib_int_free (act_index,act_index_size);
 
    } /* if (pcb.i == 1) */

#ifdef DEBUG1
  fclose (testfile);
#endif

  dalib_free (my_names,my_names_size);
  dalib_free (my_sorted_names,my_names_len);
  dalib_free (my_times,my_times_size);
  dalib_free (my_sresults,my_sresults_size) ;
 
} /* dalib_collect_timestat */
